1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/ADT/STLExtras.h" 18 #include "llvm/Support/Debug.h" 19 #include "llvm/Support/raw_ostream.h" 20 21 #define DEBUG_TYPE "format-parser" 22 23 namespace clang { 24 namespace format { 25 26 class FormatTokenSource { 27 public: 28 virtual ~FormatTokenSource() {} 29 virtual FormatToken *getNextToken() = 0; 30 31 virtual unsigned getPosition() = 0; 32 virtual FormatToken *setPosition(unsigned Position) = 0; 33 }; 34 35 namespace { 36 37 class ScopedDeclarationState { 38 public: 39 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 40 bool MustBeDeclaration) 41 : Line(Line), Stack(Stack) { 42 Line.MustBeDeclaration = MustBeDeclaration; 43 Stack.push_back(MustBeDeclaration); 44 } 45 ~ScopedDeclarationState() { 46 Stack.pop_back(); 47 if (!Stack.empty()) 48 Line.MustBeDeclaration = Stack.back(); 49 else 50 Line.MustBeDeclaration = true; 51 } 52 53 private: 54 UnwrappedLine &Line; 55 std::vector<bool> &Stack; 56 }; 57 58 class ScopedMacroState : public FormatTokenSource { 59 public: 60 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 61 FormatToken *&ResetToken) 62 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 63 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 64 Token(nullptr) { 65 TokenSource = this; 66 Line.Level = 0; 67 Line.InPPDirective = true; 68 } 69 70 ~ScopedMacroState() override { 71 TokenSource = PreviousTokenSource; 72 ResetToken = Token; 73 Line.InPPDirective = false; 74 Line.Level = PreviousLineLevel; 75 } 76 77 FormatToken *getNextToken() override { 78 // The \c UnwrappedLineParser guards against this by never calling 79 // \c getNextToken() after it has encountered the first eof token. 80 assert(!eof()); 81 Token = PreviousTokenSource->getNextToken(); 82 if (eof()) 83 return getFakeEOF(); 84 return Token; 85 } 86 87 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 88 89 FormatToken *setPosition(unsigned Position) override { 90 Token = PreviousTokenSource->setPosition(Position); 91 return Token; 92 } 93 94 private: 95 bool eof() { return Token && Token->HasUnescapedNewline; } 96 97 FormatToken *getFakeEOF() { 98 static bool EOFInitialized = false; 99 static FormatToken FormatTok; 100 if (!EOFInitialized) { 101 FormatTok.Tok.startToken(); 102 FormatTok.Tok.setKind(tok::eof); 103 EOFInitialized = true; 104 } 105 return &FormatTok; 106 } 107 108 UnwrappedLine &Line; 109 FormatTokenSource *&TokenSource; 110 FormatToken *&ResetToken; 111 unsigned PreviousLineLevel; 112 FormatTokenSource *PreviousTokenSource; 113 114 FormatToken *Token; 115 }; 116 117 } // end anonymous namespace 118 119 class ScopedLineState { 120 public: 121 ScopedLineState(UnwrappedLineParser &Parser, 122 bool SwitchToPreprocessorLines = false) 123 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 124 if (SwitchToPreprocessorLines) 125 Parser.CurrentLines = &Parser.PreprocessorDirectives; 126 else if (!Parser.Line->Tokens.empty()) 127 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 128 PreBlockLine = std::move(Parser.Line); 129 Parser.Line = llvm::make_unique<UnwrappedLine>(); 130 Parser.Line->Level = PreBlockLine->Level; 131 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 132 } 133 134 ~ScopedLineState() { 135 if (!Parser.Line->Tokens.empty()) { 136 Parser.addUnwrappedLine(); 137 } 138 assert(Parser.Line->Tokens.empty()); 139 Parser.Line = std::move(PreBlockLine); 140 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 141 Parser.MustBreakBeforeNextToken = true; 142 Parser.CurrentLines = OriginalLines; 143 } 144 145 private: 146 UnwrappedLineParser &Parser; 147 148 std::unique_ptr<UnwrappedLine> PreBlockLine; 149 SmallVectorImpl<UnwrappedLine> *OriginalLines; 150 }; 151 152 class CompoundStatementIndenter { 153 public: 154 CompoundStatementIndenter(UnwrappedLineParser *Parser, 155 const FormatStyle &Style, unsigned &LineLevel) 156 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 157 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) { 158 Parser->addUnwrappedLine(); 159 } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 160 Parser->addUnwrappedLine(); 161 ++LineLevel; 162 } 163 } 164 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 165 166 private: 167 unsigned &LineLevel; 168 unsigned OldLineLevel; 169 }; 170 171 namespace { 172 173 class IndexedTokenSource : public FormatTokenSource { 174 public: 175 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 176 : Tokens(Tokens), Position(-1) {} 177 178 FormatToken *getNextToken() override { 179 ++Position; 180 return Tokens[Position]; 181 } 182 183 unsigned getPosition() override { 184 assert(Position >= 0); 185 return Position; 186 } 187 188 FormatToken *setPosition(unsigned P) override { 189 Position = P; 190 return Tokens[Position]; 191 } 192 193 void reset() { Position = -1; } 194 195 private: 196 ArrayRef<FormatToken *> Tokens; 197 int Position; 198 }; 199 200 } // end anonymous namespace 201 202 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 203 const AdditionalKeywords &Keywords, 204 ArrayRef<FormatToken *> Tokens, 205 UnwrappedLineConsumer &Callback) 206 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 207 CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr), 208 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {} 209 210 void UnwrappedLineParser::reset() { 211 PPBranchLevel = -1; 212 Line.reset(new UnwrappedLine); 213 CommentsBeforeNextToken.clear(); 214 FormatTok = nullptr; 215 MustBreakBeforeNextToken = false; 216 PreprocessorDirectives.clear(); 217 CurrentLines = &Lines; 218 DeclarationScopeStack.clear(); 219 PPStack.clear(); 220 } 221 222 void UnwrappedLineParser::parse() { 223 IndexedTokenSource TokenSource(AllTokens); 224 do { 225 DEBUG(llvm::dbgs() << "----\n"); 226 reset(); 227 Tokens = &TokenSource; 228 TokenSource.reset(); 229 230 readToken(); 231 parseFile(); 232 // Create line with eof token. 233 pushToken(FormatTok); 234 addUnwrappedLine(); 235 236 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 237 E = Lines.end(); 238 I != E; ++I) { 239 Callback.consumeUnwrappedLine(*I); 240 } 241 Callback.finishRun(); 242 Lines.clear(); 243 while (!PPLevelBranchIndex.empty() && 244 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 245 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 246 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 247 } 248 if (!PPLevelBranchIndex.empty()) { 249 ++PPLevelBranchIndex.back(); 250 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 251 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 252 } 253 } while (!PPLevelBranchIndex.empty()); 254 255 } 256 257 void UnwrappedLineParser::parseFile() { 258 // The top-level context in a file always has declarations, except for pre- 259 // processor directives and JavaScript files. 260 bool MustBeDeclaration = 261 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript; 262 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 263 MustBeDeclaration); 264 parseLevel(/*HasOpeningBrace=*/false); 265 // Make sure to format the remaining tokens. 266 flushComments(true); 267 addUnwrappedLine(); 268 } 269 270 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 271 bool SwitchLabelEncountered = false; 272 do { 273 switch (FormatTok->Tok.getKind()) { 274 case tok::comment: 275 nextToken(); 276 addUnwrappedLine(); 277 break; 278 case tok::l_brace: 279 // FIXME: Add parameter whether this can happen - if this happens, we must 280 // be in a non-declaration context. 281 parseBlock(/*MustBeDeclaration=*/false); 282 addUnwrappedLine(); 283 break; 284 case tok::r_brace: 285 if (HasOpeningBrace) 286 return; 287 nextToken(); 288 addUnwrappedLine(); 289 break; 290 case tok::kw_default: 291 case tok::kw_case: 292 if (!SwitchLabelEncountered && 293 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 294 ++Line->Level; 295 SwitchLabelEncountered = true; 296 parseStructuralElement(); 297 break; 298 default: 299 parseStructuralElement(); 300 break; 301 } 302 } while (!eof()); 303 } 304 305 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) { 306 // We'll parse forward through the tokens until we hit 307 // a closing brace or eof - note that getNextToken() will 308 // parse macros, so this will magically work inside macro 309 // definitions, too. 310 unsigned StoredPosition = Tokens->getPosition(); 311 FormatToken *Tok = FormatTok; 312 // Keep a stack of positions of lbrace tokens. We will 313 // update information about whether an lbrace starts a 314 // braced init list or a different block during the loop. 315 SmallVector<FormatToken *, 8> LBraceStack; 316 assert(Tok->Tok.is(tok::l_brace)); 317 do { 318 // Get next none-comment token. 319 FormatToken *NextTok; 320 unsigned ReadTokens = 0; 321 do { 322 NextTok = Tokens->getNextToken(); 323 ++ReadTokens; 324 } while (NextTok->is(tok::comment)); 325 326 switch (Tok->Tok.getKind()) { 327 case tok::l_brace: 328 Tok->BlockKind = BK_Unknown; 329 LBraceStack.push_back(Tok); 330 break; 331 case tok::r_brace: 332 if (!LBraceStack.empty()) { 333 if (LBraceStack.back()->BlockKind == BK_Unknown) { 334 bool ProbablyBracedList = false; 335 if (Style.Language == FormatStyle::LK_Proto) { 336 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 337 } else { 338 // Using OriginalColumn to distinguish between ObjC methods and 339 // binary operators is a bit hacky. 340 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 341 NextTok->OriginalColumn == 0; 342 343 // If there is a comma, semicolon or right paren after the closing 344 // brace, we assume this is a braced initializer list. Note that 345 // regardless how we mark inner braces here, we will overwrite the 346 // BlockKind later if we parse a braced list (where all blocks 347 // inside are by default braced lists), or when we explicitly detect 348 // blocks (for example while parsing lambdas). 349 // 350 // We exclude + and - as they can be ObjC visibility modifiers. 351 ProbablyBracedList = 352 NextTok->isOneOf(tok::comma, tok::period, tok::colon, 353 tok::r_paren, tok::r_square, tok::l_brace, 354 tok::l_paren, tok::ellipsis) || 355 (NextTok->is(tok::semi) && 356 (!ExpectClassBody || LBraceStack.size() != 1)) || 357 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 358 } 359 if (ProbablyBracedList) { 360 Tok->BlockKind = BK_BracedInit; 361 LBraceStack.back()->BlockKind = BK_BracedInit; 362 } else { 363 Tok->BlockKind = BK_Block; 364 LBraceStack.back()->BlockKind = BK_Block; 365 } 366 } 367 LBraceStack.pop_back(); 368 } 369 break; 370 case tok::at: 371 case tok::semi: 372 case tok::kw_if: 373 case tok::kw_while: 374 case tok::kw_for: 375 case tok::kw_switch: 376 case tok::kw_try: 377 case tok::kw___try: 378 if (!LBraceStack.empty()) 379 LBraceStack.back()->BlockKind = BK_Block; 380 break; 381 default: 382 break; 383 } 384 Tok = NextTok; 385 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 386 // Assume other blocks for all unclosed opening braces. 387 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 388 if (LBraceStack[i]->BlockKind == BK_Unknown) 389 LBraceStack[i]->BlockKind = BK_Block; 390 } 391 392 FormatTok = Tokens->setPosition(StoredPosition); 393 } 394 395 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 396 bool MunchSemi) { 397 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); 398 unsigned InitialLevel = Line->Level; 399 nextToken(); 400 401 addUnwrappedLine(); 402 403 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 404 MustBeDeclaration); 405 if (AddLevel) 406 ++Line->Level; 407 parseLevel(/*HasOpeningBrace=*/true); 408 409 if (!FormatTok->Tok.is(tok::r_brace)) { 410 Line->Level = InitialLevel; 411 return; 412 } 413 414 nextToken(); // Munch the closing brace. 415 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 416 nextToken(); 417 Line->Level = InitialLevel; 418 } 419 420 static bool isGoogScope(const UnwrappedLine &Line) { 421 // FIXME: Closure-library specific stuff should not be hard-coded but be 422 // configurable. 423 if (Line.Tokens.size() < 4) 424 return false; 425 auto I = Line.Tokens.begin(); 426 if (I->Tok->TokenText != "goog") 427 return false; 428 ++I; 429 if (I->Tok->isNot(tok::period)) 430 return false; 431 ++I; 432 if (I->Tok->TokenText != "scope") 433 return false; 434 ++I; 435 return I->Tok->is(tok::l_paren); 436 } 437 438 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 439 const FormatToken &InitialToken) { 440 switch (Style.BreakBeforeBraces) { 441 case FormatStyle::BS_Linux: 442 return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class); 443 case FormatStyle::BS_Allman: 444 case FormatStyle::BS_GNU: 445 return true; 446 default: 447 return false; 448 } 449 } 450 451 void UnwrappedLineParser::parseChildBlock() { 452 FormatTok->BlockKind = BK_Block; 453 nextToken(); 454 { 455 bool GoogScope = 456 Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line); 457 ScopedLineState LineState(*this); 458 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 459 /*MustBeDeclaration=*/false); 460 Line->Level += GoogScope ? 0 : 1; 461 parseLevel(/*HasOpeningBrace=*/true); 462 flushComments(isOnNewLine(*FormatTok)); 463 Line->Level -= GoogScope ? 0 : 1; 464 } 465 nextToken(); 466 } 467 468 void UnwrappedLineParser::parsePPDirective() { 469 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 470 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 471 nextToken(); 472 473 if (!FormatTok->Tok.getIdentifierInfo()) { 474 parsePPUnknown(); 475 return; 476 } 477 478 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 479 case tok::pp_define: 480 parsePPDefine(); 481 return; 482 case tok::pp_if: 483 parsePPIf(/*IfDef=*/false); 484 break; 485 case tok::pp_ifdef: 486 case tok::pp_ifndef: 487 parsePPIf(/*IfDef=*/true); 488 break; 489 case tok::pp_else: 490 parsePPElse(); 491 break; 492 case tok::pp_elif: 493 parsePPElIf(); 494 break; 495 case tok::pp_endif: 496 parsePPEndIf(); 497 break; 498 default: 499 parsePPUnknown(); 500 break; 501 } 502 } 503 504 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 505 if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) 506 PPStack.push_back(PP_Unreachable); 507 else 508 PPStack.push_back(PP_Conditional); 509 } 510 511 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 512 ++PPBranchLevel; 513 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 514 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 515 PPLevelBranchIndex.push_back(0); 516 PPLevelBranchCount.push_back(0); 517 } 518 PPChainBranchIndex.push(0); 519 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 520 conditionalCompilationCondition(Unreachable || Skip); 521 } 522 523 void UnwrappedLineParser::conditionalCompilationAlternative() { 524 if (!PPStack.empty()) 525 PPStack.pop_back(); 526 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 527 if (!PPChainBranchIndex.empty()) 528 ++PPChainBranchIndex.top(); 529 conditionalCompilationCondition( 530 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 531 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 532 } 533 534 void UnwrappedLineParser::conditionalCompilationEnd() { 535 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 536 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 537 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 538 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 539 } 540 } 541 // Guard against #endif's without #if. 542 if (PPBranchLevel > 0) 543 --PPBranchLevel; 544 if (!PPChainBranchIndex.empty()) 545 PPChainBranchIndex.pop(); 546 if (!PPStack.empty()) 547 PPStack.pop_back(); 548 } 549 550 void UnwrappedLineParser::parsePPIf(bool IfDef) { 551 nextToken(); 552 bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && 553 FormatTok->Tok.getLiteralData() != nullptr && 554 StringRef(FormatTok->Tok.getLiteralData(), 555 FormatTok->Tok.getLength()) == "0") || 556 FormatTok->Tok.is(tok::kw_false); 557 conditionalCompilationStart(!IfDef && IsLiteralFalse); 558 parsePPUnknown(); 559 } 560 561 void UnwrappedLineParser::parsePPElse() { 562 conditionalCompilationAlternative(); 563 parsePPUnknown(); 564 } 565 566 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 567 568 void UnwrappedLineParser::parsePPEndIf() { 569 conditionalCompilationEnd(); 570 parsePPUnknown(); 571 } 572 573 void UnwrappedLineParser::parsePPDefine() { 574 nextToken(); 575 576 if (FormatTok->Tok.getKind() != tok::identifier) { 577 parsePPUnknown(); 578 return; 579 } 580 nextToken(); 581 if (FormatTok->Tok.getKind() == tok::l_paren && 582 FormatTok->WhitespaceRange.getBegin() == 583 FormatTok->WhitespaceRange.getEnd()) { 584 parseParens(); 585 } 586 addUnwrappedLine(); 587 Line->Level = 1; 588 589 // Errors during a preprocessor directive can only affect the layout of the 590 // preprocessor directive, and thus we ignore them. An alternative approach 591 // would be to use the same approach we use on the file level (no 592 // re-indentation if there was a structural error) within the macro 593 // definition. 594 parseFile(); 595 } 596 597 void UnwrappedLineParser::parsePPUnknown() { 598 do { 599 nextToken(); 600 } while (!eof()); 601 addUnwrappedLine(); 602 } 603 604 // Here we blacklist certain tokens that are not usually the first token in an 605 // unwrapped line. This is used in attempt to distinguish macro calls without 606 // trailing semicolons from other constructs split to several lines. 607 static bool tokenCanStartNewLine(const clang::Token &Tok) { 608 // Semicolon can be a null-statement, l_square can be a start of a macro or 609 // a C++11 attribute, but this doesn't seem to be common. 610 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 611 Tok.isNot(tok::l_square) && 612 // Tokens that can only be used as binary operators and a part of 613 // overloaded operator names. 614 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 615 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 616 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 617 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 618 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 619 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 620 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 621 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 622 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 623 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 624 Tok.isNot(tok::lesslessequal) && 625 // Colon is used in labels, base class lists, initializer lists, 626 // range-based for loops, ternary operator, but should never be the 627 // first token in an unwrapped line. 628 Tok.isNot(tok::colon) && 629 // 'noexcept' is a trailing annotation. 630 Tok.isNot(tok::kw_noexcept); 631 } 632 633 void UnwrappedLineParser::parseStructuralElement() { 634 assert(!FormatTok->Tok.is(tok::l_brace)); 635 switch (FormatTok->Tok.getKind()) { 636 case tok::at: 637 nextToken(); 638 if (FormatTok->Tok.is(tok::l_brace)) { 639 parseBracedList(); 640 break; 641 } 642 switch (FormatTok->Tok.getObjCKeywordID()) { 643 case tok::objc_public: 644 case tok::objc_protected: 645 case tok::objc_package: 646 case tok::objc_private: 647 return parseAccessSpecifier(); 648 case tok::objc_interface: 649 case tok::objc_implementation: 650 return parseObjCInterfaceOrImplementation(); 651 case tok::objc_protocol: 652 return parseObjCProtocol(); 653 case tok::objc_end: 654 return; // Handled by the caller. 655 case tok::objc_optional: 656 case tok::objc_required: 657 nextToken(); 658 addUnwrappedLine(); 659 return; 660 case tok::objc_try: 661 // This branch isn't strictly necessary (the kw_try case below would 662 // do this too after the tok::at is parsed above). But be explicit. 663 parseTryCatch(); 664 return; 665 default: 666 break; 667 } 668 break; 669 case tok::kw_asm: 670 nextToken(); 671 if (FormatTok->is(tok::l_brace)) { 672 FormatTok->Type = TT_InlineASMBrace; 673 nextToken(); 674 while (FormatTok && FormatTok->isNot(tok::eof)) { 675 if (FormatTok->is(tok::r_brace)) { 676 FormatTok->Type = TT_InlineASMBrace; 677 nextToken(); 678 addUnwrappedLine(); 679 break; 680 } 681 FormatTok->Finalized = true; 682 nextToken(); 683 } 684 } 685 break; 686 case tok::kw_namespace: 687 parseNamespace(); 688 return; 689 case tok::kw_inline: 690 nextToken(); 691 if (FormatTok->Tok.is(tok::kw_namespace)) { 692 parseNamespace(); 693 return; 694 } 695 break; 696 case tok::kw_public: 697 case tok::kw_protected: 698 case tok::kw_private: 699 if (Style.Language == FormatStyle::LK_Java || 700 Style.Language == FormatStyle::LK_JavaScript) 701 nextToken(); 702 else 703 parseAccessSpecifier(); 704 return; 705 case tok::kw_if: 706 parseIfThenElse(); 707 return; 708 case tok::kw_for: 709 case tok::kw_while: 710 parseForOrWhileLoop(); 711 return; 712 case tok::kw_do: 713 parseDoWhile(); 714 return; 715 case tok::kw_switch: 716 parseSwitch(); 717 return; 718 case tok::kw_default: 719 nextToken(); 720 parseLabel(); 721 return; 722 case tok::kw_case: 723 parseCaseLabel(); 724 return; 725 case tok::kw_try: 726 case tok::kw___try: 727 parseTryCatch(); 728 return; 729 case tok::kw_extern: 730 nextToken(); 731 if (FormatTok->Tok.is(tok::string_literal)) { 732 nextToken(); 733 if (FormatTok->Tok.is(tok::l_brace)) { 734 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 735 addUnwrappedLine(); 736 return; 737 } 738 } 739 break; 740 case tok::kw_export: 741 if (Style.Language == FormatStyle::LK_JavaScript) { 742 parseJavaScriptEs6ImportExport(); 743 return; 744 } 745 break; 746 case tok::identifier: 747 if (FormatTok->is(TT_ForEachMacro)) { 748 parseForOrWhileLoop(); 749 return; 750 } 751 if (Style.Language == FormatStyle::LK_JavaScript && 752 FormatTok->is(Keywords.kw_import)) { 753 parseJavaScriptEs6ImportExport(); 754 return; 755 } 756 if (FormatTok->is(Keywords.kw_signals)) { 757 nextToken(); 758 if (FormatTok->is(tok::colon)) { 759 nextToken(); 760 addUnwrappedLine(); 761 } 762 return; 763 } 764 // In all other cases, parse the declaration. 765 break; 766 default: 767 break; 768 } 769 do { 770 switch (FormatTok->Tok.getKind()) { 771 case tok::at: 772 nextToken(); 773 if (FormatTok->Tok.is(tok::l_brace)) 774 parseBracedList(); 775 break; 776 case tok::kw_enum: 777 parseEnum(); 778 break; 779 case tok::kw_typedef: 780 nextToken(); 781 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 782 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 783 parseEnum(); 784 break; 785 case tok::kw_struct: 786 case tok::kw_union: 787 case tok::kw_class: 788 parseRecord(); 789 // A record declaration or definition is always the start of a structural 790 // element. 791 break; 792 case tok::period: 793 nextToken(); 794 // In Java, classes have an implicit static member "class". 795 if (Style.Language == FormatStyle::LK_Java && FormatTok && 796 FormatTok->is(tok::kw_class)) 797 nextToken(); 798 break; 799 case tok::semi: 800 nextToken(); 801 addUnwrappedLine(); 802 return; 803 case tok::r_brace: 804 addUnwrappedLine(); 805 return; 806 case tok::l_paren: 807 parseParens(); 808 break; 809 case tok::caret: 810 nextToken(); 811 if (FormatTok->Tok.isAnyIdentifier() || 812 FormatTok->isSimpleTypeSpecifier()) 813 nextToken(); 814 if (FormatTok->is(tok::l_paren)) 815 parseParens(); 816 if (FormatTok->is(tok::l_brace)) 817 parseChildBlock(); 818 break; 819 case tok::l_brace: 820 if (!tryToParseBracedList()) { 821 // A block outside of parentheses must be the last part of a 822 // structural element. 823 // FIXME: Figure out cases where this is not true, and add projections 824 // for them (the one we know is missing are lambdas). 825 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) 826 addUnwrappedLine(); 827 FormatTok->Type = TT_FunctionLBrace; 828 parseBlock(/*MustBeDeclaration=*/false); 829 addUnwrappedLine(); 830 return; 831 } 832 // Otherwise this was a braced init list, and the structural 833 // element continues. 834 break; 835 case tok::kw_try: 836 // We arrive here when parsing function-try blocks. 837 parseTryCatch(); 838 return; 839 case tok::identifier: { 840 // Parse function literal unless 'function' is the first token in a line 841 // in which case this should be treated as a free-standing function. 842 if (Style.Language == FormatStyle::LK_JavaScript && 843 FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) { 844 tryToParseJSFunction(); 845 break; 846 } 847 if ((Style.Language == FormatStyle::LK_JavaScript || 848 Style.Language == FormatStyle::LK_Java) && 849 FormatTok->is(Keywords.kw_interface)) { 850 parseRecord(); 851 break; 852 } 853 854 StringRef Text = FormatTok->TokenText; 855 nextToken(); 856 if (Line->Tokens.size() == 1 && 857 // JS doesn't have macros, and within classes colons indicate fields, 858 // not labels. 859 Style.Language != FormatStyle::LK_JavaScript) { 860 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) { 861 parseLabel(); 862 return; 863 } 864 // Recognize function-like macro usages without trailing semicolon as 865 // well as free-standing macros like Q_OBJECT. 866 bool FunctionLike = FormatTok->is(tok::l_paren); 867 if (FunctionLike) 868 parseParens(); 869 870 bool FollowedByNewline = 871 CommentsBeforeNextToken.empty() 872 ? FormatTok->NewlinesBefore > 0 873 : CommentsBeforeNextToken.front()->NewlinesBefore > 0; 874 875 if (FollowedByNewline && 876 (Text.size() >= 5 || FunctionLike) && 877 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 878 addUnwrappedLine(); 879 return; 880 } 881 } 882 break; 883 } 884 case tok::equal: 885 nextToken(); 886 if (FormatTok->Tok.is(tok::l_brace)) { 887 parseBracedList(); 888 } 889 break; 890 case tok::l_square: 891 parseSquare(); 892 break; 893 case tok::kw_new: 894 parseNew(); 895 break; 896 default: 897 nextToken(); 898 break; 899 } 900 } while (!eof()); 901 } 902 903 bool UnwrappedLineParser::tryToParseLambda() { 904 // FIXME: This is a dirty way to access the previous token. Find a better 905 // solution. 906 if (!Line->Tokens.empty() && 907 (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator, 908 tok::kw_new, tok::kw_delete) || 909 Line->Tokens.back().Tok->closesScope() || 910 Line->Tokens.back().Tok->isSimpleTypeSpecifier())) { 911 nextToken(); 912 return false; 913 } 914 assert(FormatTok->is(tok::l_square)); 915 FormatToken &LSquare = *FormatTok; 916 if (!tryToParseLambdaIntroducer()) 917 return false; 918 919 while (FormatTok->isNot(tok::l_brace)) { 920 if (FormatTok->isSimpleTypeSpecifier()) { 921 nextToken(); 922 continue; 923 } 924 switch (FormatTok->Tok.getKind()) { 925 case tok::l_brace: 926 break; 927 case tok::l_paren: 928 parseParens(); 929 break; 930 case tok::amp: 931 case tok::star: 932 case tok::kw_const: 933 case tok::comma: 934 case tok::less: 935 case tok::greater: 936 case tok::identifier: 937 case tok::coloncolon: 938 case tok::kw_mutable: 939 nextToken(); 940 break; 941 case tok::arrow: 942 FormatTok->Type = TT_TrailingReturnArrow; 943 nextToken(); 944 break; 945 default: 946 return true; 947 } 948 } 949 LSquare.Type = TT_LambdaLSquare; 950 parseChildBlock(); 951 return true; 952 } 953 954 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 955 nextToken(); 956 if (FormatTok->is(tok::equal)) { 957 nextToken(); 958 if (FormatTok->is(tok::r_square)) { 959 nextToken(); 960 return true; 961 } 962 if (FormatTok->isNot(tok::comma)) 963 return false; 964 nextToken(); 965 } else if (FormatTok->is(tok::amp)) { 966 nextToken(); 967 if (FormatTok->is(tok::r_square)) { 968 nextToken(); 969 return true; 970 } 971 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 972 return false; 973 } 974 if (FormatTok->is(tok::comma)) 975 nextToken(); 976 } else if (FormatTok->is(tok::r_square)) { 977 nextToken(); 978 return true; 979 } 980 do { 981 if (FormatTok->is(tok::amp)) 982 nextToken(); 983 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) 984 return false; 985 nextToken(); 986 if (FormatTok->is(tok::ellipsis)) 987 nextToken(); 988 if (FormatTok->is(tok::comma)) { 989 nextToken(); 990 } else if (FormatTok->is(tok::r_square)) { 991 nextToken(); 992 return true; 993 } else { 994 return false; 995 } 996 } while (!eof()); 997 return false; 998 } 999 1000 void UnwrappedLineParser::tryToParseJSFunction() { 1001 nextToken(); 1002 1003 // Consume function name. 1004 if (FormatTok->is(tok::identifier)) 1005 nextToken(); 1006 1007 if (FormatTok->isNot(tok::l_paren)) 1008 return; 1009 nextToken(); 1010 while (FormatTok->isNot(tok::l_brace)) { 1011 // Err on the side of caution in order to avoid consuming the full file in 1012 // case of incomplete code. 1013 if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren, 1014 tok::comment)) 1015 return; 1016 nextToken(); 1017 } 1018 parseChildBlock(); 1019 } 1020 1021 bool UnwrappedLineParser::tryToParseBracedList() { 1022 if (FormatTok->BlockKind == BK_Unknown) 1023 calculateBraceTypes(); 1024 assert(FormatTok->BlockKind != BK_Unknown); 1025 if (FormatTok->BlockKind == BK_Block) 1026 return false; 1027 parseBracedList(); 1028 return true; 1029 } 1030 1031 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { 1032 bool HasError = false; 1033 nextToken(); 1034 1035 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1036 // replace this by using parseAssigmentExpression() inside. 1037 do { 1038 if (Style.Language == FormatStyle::LK_JavaScript && 1039 FormatTok->is(Keywords.kw_function)) { 1040 tryToParseJSFunction(); 1041 continue; 1042 } 1043 switch (FormatTok->Tok.getKind()) { 1044 case tok::caret: 1045 nextToken(); 1046 if (FormatTok->is(tok::l_brace)) { 1047 parseChildBlock(); 1048 } 1049 break; 1050 case tok::l_square: 1051 tryToParseLambda(); 1052 break; 1053 case tok::l_brace: 1054 // Assume there are no blocks inside a braced init list apart 1055 // from the ones we explicitly parse out (like lambdas). 1056 FormatTok->BlockKind = BK_BracedInit; 1057 parseBracedList(); 1058 break; 1059 case tok::r_paren: 1060 // JavaScript can just have free standing methods and getters/setters in 1061 // object literals. Detect them by a "{" following ")". 1062 if (Style.Language == FormatStyle::LK_JavaScript) { 1063 nextToken(); 1064 if (FormatTok->is(tok::l_brace)) 1065 parseChildBlock(); 1066 break; 1067 } 1068 nextToken(); 1069 break; 1070 case tok::r_brace: 1071 nextToken(); 1072 return !HasError; 1073 case tok::semi: 1074 HasError = true; 1075 if (!ContinueOnSemicolons) 1076 return !HasError; 1077 nextToken(); 1078 break; 1079 case tok::comma: 1080 nextToken(); 1081 break; 1082 default: 1083 nextToken(); 1084 break; 1085 } 1086 } while (!eof()); 1087 return false; 1088 } 1089 1090 void UnwrappedLineParser::parseParens() { 1091 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1092 nextToken(); 1093 do { 1094 switch (FormatTok->Tok.getKind()) { 1095 case tok::l_paren: 1096 parseParens(); 1097 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1098 parseChildBlock(); 1099 break; 1100 case tok::r_paren: 1101 nextToken(); 1102 return; 1103 case tok::r_brace: 1104 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1105 return; 1106 case tok::l_square: 1107 tryToParseLambda(); 1108 break; 1109 case tok::l_brace: 1110 if (!tryToParseBracedList()) 1111 parseChildBlock(); 1112 break; 1113 case tok::at: 1114 nextToken(); 1115 if (FormatTok->Tok.is(tok::l_brace)) 1116 parseBracedList(); 1117 break; 1118 case tok::identifier: 1119 if (Style.Language == FormatStyle::LK_JavaScript && 1120 FormatTok->is(Keywords.kw_function)) 1121 tryToParseJSFunction(); 1122 else 1123 nextToken(); 1124 break; 1125 default: 1126 nextToken(); 1127 break; 1128 } 1129 } while (!eof()); 1130 } 1131 1132 void UnwrappedLineParser::parseSquare() { 1133 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1134 if (tryToParseLambda()) 1135 return; 1136 do { 1137 switch (FormatTok->Tok.getKind()) { 1138 case tok::l_paren: 1139 parseParens(); 1140 break; 1141 case tok::r_square: 1142 nextToken(); 1143 return; 1144 case tok::r_brace: 1145 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1146 return; 1147 case tok::l_square: 1148 parseSquare(); 1149 break; 1150 case tok::l_brace: { 1151 if (!tryToParseBracedList()) 1152 parseChildBlock(); 1153 break; 1154 } 1155 case tok::at: 1156 nextToken(); 1157 if (FormatTok->Tok.is(tok::l_brace)) 1158 parseBracedList(); 1159 break; 1160 default: 1161 nextToken(); 1162 break; 1163 } 1164 } while (!eof()); 1165 } 1166 1167 void UnwrappedLineParser::parseIfThenElse() { 1168 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1169 nextToken(); 1170 if (FormatTok->Tok.is(tok::l_paren)) 1171 parseParens(); 1172 bool NeedsUnwrappedLine = false; 1173 if (FormatTok->Tok.is(tok::l_brace)) { 1174 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1175 parseBlock(/*MustBeDeclaration=*/false); 1176 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1177 Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 1178 addUnwrappedLine(); 1179 } else { 1180 NeedsUnwrappedLine = true; 1181 } 1182 } else { 1183 addUnwrappedLine(); 1184 ++Line->Level; 1185 parseStructuralElement(); 1186 --Line->Level; 1187 } 1188 if (FormatTok->Tok.is(tok::kw_else)) { 1189 if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) 1190 addUnwrappedLine(); 1191 nextToken(); 1192 if (FormatTok->Tok.is(tok::l_brace)) { 1193 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1194 parseBlock(/*MustBeDeclaration=*/false); 1195 addUnwrappedLine(); 1196 } else if (FormatTok->Tok.is(tok::kw_if)) { 1197 parseIfThenElse(); 1198 } else { 1199 addUnwrappedLine(); 1200 ++Line->Level; 1201 parseStructuralElement(); 1202 --Line->Level; 1203 } 1204 } else if (NeedsUnwrappedLine) { 1205 addUnwrappedLine(); 1206 } 1207 } 1208 1209 void UnwrappedLineParser::parseTryCatch() { 1210 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1211 nextToken(); 1212 bool NeedsUnwrappedLine = false; 1213 if (FormatTok->is(tok::colon)) { 1214 // We are in a function try block, what comes is an initializer list. 1215 nextToken(); 1216 while (FormatTok->is(tok::identifier)) { 1217 nextToken(); 1218 if (FormatTok->is(tok::l_paren)) 1219 parseParens(); 1220 if (FormatTok->is(tok::comma)) 1221 nextToken(); 1222 } 1223 } 1224 // Parse try with resource. 1225 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1226 parseParens(); 1227 } 1228 if (FormatTok->is(tok::l_brace)) { 1229 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1230 parseBlock(/*MustBeDeclaration=*/false); 1231 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1232 Style.BreakBeforeBraces == FormatStyle::BS_GNU || 1233 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { 1234 addUnwrappedLine(); 1235 } else { 1236 NeedsUnwrappedLine = true; 1237 } 1238 } else if (!FormatTok->is(tok::kw_catch)) { 1239 // The C++ standard requires a compound-statement after a try. 1240 // If there's none, we try to assume there's a structuralElement 1241 // and try to continue. 1242 addUnwrappedLine(); 1243 ++Line->Level; 1244 parseStructuralElement(); 1245 --Line->Level; 1246 } 1247 while (1) { 1248 if (FormatTok->is(tok::at)) 1249 nextToken(); 1250 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1251 tok::kw___finally) || 1252 ((Style.Language == FormatStyle::LK_Java || 1253 Style.Language == FormatStyle::LK_JavaScript) && 1254 FormatTok->is(Keywords.kw_finally)) || 1255 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1256 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1257 break; 1258 nextToken(); 1259 while (FormatTok->isNot(tok::l_brace)) { 1260 if (FormatTok->is(tok::l_paren)) { 1261 parseParens(); 1262 continue; 1263 } 1264 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1265 return; 1266 nextToken(); 1267 } 1268 NeedsUnwrappedLine = false; 1269 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1270 parseBlock(/*MustBeDeclaration=*/false); 1271 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1272 Style.BreakBeforeBraces == FormatStyle::BS_GNU || 1273 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { 1274 addUnwrappedLine(); 1275 } else { 1276 NeedsUnwrappedLine = true; 1277 } 1278 } 1279 if (NeedsUnwrappedLine) { 1280 addUnwrappedLine(); 1281 } 1282 } 1283 1284 void UnwrappedLineParser::parseNamespace() { 1285 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1286 1287 const FormatToken &InitialToken = *FormatTok; 1288 nextToken(); 1289 if (FormatTok->Tok.is(tok::identifier)) 1290 nextToken(); 1291 if (FormatTok->Tok.is(tok::l_brace)) { 1292 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1293 addUnwrappedLine(); 1294 1295 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1296 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1297 DeclarationScopeStack.size() > 1); 1298 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1299 // Munch the semicolon after a namespace. This is more common than one would 1300 // think. Puttin the semicolon into its own line is very ugly. 1301 if (FormatTok->Tok.is(tok::semi)) 1302 nextToken(); 1303 addUnwrappedLine(); 1304 } 1305 // FIXME: Add error handling. 1306 } 1307 1308 void UnwrappedLineParser::parseNew() { 1309 assert(FormatTok->is(tok::kw_new) && "'new' expected"); 1310 nextToken(); 1311 if (Style.Language != FormatStyle::LK_Java) 1312 return; 1313 1314 // In Java, we can parse everything up to the parens, which aren't optional. 1315 do { 1316 // There should not be a ;, { or } before the new's open paren. 1317 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace)) 1318 return; 1319 1320 // Consume the parens. 1321 if (FormatTok->is(tok::l_paren)) { 1322 parseParens(); 1323 1324 // If there is a class body of an anonymous class, consume that as child. 1325 if (FormatTok->is(tok::l_brace)) 1326 parseChildBlock(); 1327 return; 1328 } 1329 nextToken(); 1330 } while (!eof()); 1331 } 1332 1333 void UnwrappedLineParser::parseForOrWhileLoop() { 1334 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) && 1335 "'for', 'while' or foreach macro expected"); 1336 nextToken(); 1337 if (FormatTok->Tok.is(tok::l_paren)) 1338 parseParens(); 1339 if (FormatTok->Tok.is(tok::l_brace)) { 1340 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1341 parseBlock(/*MustBeDeclaration=*/false); 1342 addUnwrappedLine(); 1343 } else { 1344 addUnwrappedLine(); 1345 ++Line->Level; 1346 parseStructuralElement(); 1347 --Line->Level; 1348 } 1349 } 1350 1351 void UnwrappedLineParser::parseDoWhile() { 1352 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1353 nextToken(); 1354 if (FormatTok->Tok.is(tok::l_brace)) { 1355 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1356 parseBlock(/*MustBeDeclaration=*/false); 1357 if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) 1358 addUnwrappedLine(); 1359 } else { 1360 addUnwrappedLine(); 1361 ++Line->Level; 1362 parseStructuralElement(); 1363 --Line->Level; 1364 } 1365 1366 // FIXME: Add error handling. 1367 if (!FormatTok->Tok.is(tok::kw_while)) { 1368 addUnwrappedLine(); 1369 return; 1370 } 1371 1372 nextToken(); 1373 parseStructuralElement(); 1374 } 1375 1376 void UnwrappedLineParser::parseLabel() { 1377 nextToken(); 1378 unsigned OldLineLevel = Line->Level; 1379 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1380 --Line->Level; 1381 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1382 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1383 parseBlock(/*MustBeDeclaration=*/false); 1384 if (FormatTok->Tok.is(tok::kw_break)) { 1385 // "break;" after "}" on its own line only for BS_Allman and BS_GNU 1386 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1387 Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 1388 addUnwrappedLine(); 1389 } 1390 parseStructuralElement(); 1391 } 1392 addUnwrappedLine(); 1393 } else { 1394 if (FormatTok->is(tok::semi)) 1395 nextToken(); 1396 addUnwrappedLine(); 1397 } 1398 Line->Level = OldLineLevel; 1399 } 1400 1401 void UnwrappedLineParser::parseCaseLabel() { 1402 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1403 // FIXME: fix handling of complex expressions here. 1404 do { 1405 nextToken(); 1406 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1407 parseLabel(); 1408 } 1409 1410 void UnwrappedLineParser::parseSwitch() { 1411 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1412 nextToken(); 1413 if (FormatTok->Tok.is(tok::l_paren)) 1414 parseParens(); 1415 if (FormatTok->Tok.is(tok::l_brace)) { 1416 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1417 parseBlock(/*MustBeDeclaration=*/false); 1418 addUnwrappedLine(); 1419 } else { 1420 addUnwrappedLine(); 1421 ++Line->Level; 1422 parseStructuralElement(); 1423 --Line->Level; 1424 } 1425 } 1426 1427 void UnwrappedLineParser::parseAccessSpecifier() { 1428 nextToken(); 1429 // Understand Qt's slots. 1430 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots)) 1431 nextToken(); 1432 // Otherwise, we don't know what it is, and we'd better keep the next token. 1433 if (FormatTok->Tok.is(tok::colon)) 1434 nextToken(); 1435 addUnwrappedLine(); 1436 } 1437 1438 void UnwrappedLineParser::parseEnum() { 1439 // Won't be 'enum' for NS_ENUMs. 1440 if (FormatTok->Tok.is(tok::kw_enum)) 1441 nextToken(); 1442 1443 // Eat up enum class ... 1444 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1445 nextToken(); 1446 while (FormatTok->Tok.getIdentifierInfo() || 1447 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1448 tok::greater, tok::comma, tok::question)) { 1449 nextToken(); 1450 // We can have macros or attributes in between 'enum' and the enum name. 1451 if (FormatTok->is(tok::l_paren)) 1452 parseParens(); 1453 if (FormatTok->is(tok::identifier)) 1454 nextToken(); 1455 } 1456 1457 // Just a declaration or something is wrong. 1458 if (FormatTok->isNot(tok::l_brace)) 1459 return; 1460 FormatTok->BlockKind = BK_Block; 1461 1462 if (Style.Language == FormatStyle::LK_Java) { 1463 // Java enums are different. 1464 parseJavaEnumBody(); 1465 return; 1466 } 1467 1468 // Parse enum body. 1469 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1470 if (HasError) { 1471 if (FormatTok->is(tok::semi)) 1472 nextToken(); 1473 addUnwrappedLine(); 1474 } 1475 1476 // We fall through to parsing a structural element afterwards, so that in 1477 // enum A {} n, m; 1478 // "} n, m;" will end up in one unwrapped line. 1479 } 1480 1481 void UnwrappedLineParser::parseJavaEnumBody() { 1482 // Determine whether the enum is simple, i.e. does not have a semicolon or 1483 // constants with class bodies. Simple enums can be formatted like braced 1484 // lists, contracted to a single line, etc. 1485 unsigned StoredPosition = Tokens->getPosition(); 1486 bool IsSimple = true; 1487 FormatToken *Tok = Tokens->getNextToken(); 1488 while (Tok) { 1489 if (Tok->is(tok::r_brace)) 1490 break; 1491 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1492 IsSimple = false; 1493 break; 1494 } 1495 // FIXME: This will also mark enums with braces in the arguments to enum 1496 // constants as "not simple". This is probably fine in practice, though. 1497 Tok = Tokens->getNextToken(); 1498 } 1499 FormatTok = Tokens->setPosition(StoredPosition); 1500 1501 if (IsSimple) { 1502 parseBracedList(); 1503 addUnwrappedLine(); 1504 return; 1505 } 1506 1507 // Parse the body of a more complex enum. 1508 // First add a line for everything up to the "{". 1509 nextToken(); 1510 addUnwrappedLine(); 1511 ++Line->Level; 1512 1513 // Parse the enum constants. 1514 while (FormatTok) { 1515 if (FormatTok->is(tok::l_brace)) { 1516 // Parse the constant's class body. 1517 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1518 /*MunchSemi=*/false); 1519 } else if (FormatTok->is(tok::l_paren)) { 1520 parseParens(); 1521 } else if (FormatTok->is(tok::comma)) { 1522 nextToken(); 1523 addUnwrappedLine(); 1524 } else if (FormatTok->is(tok::semi)) { 1525 nextToken(); 1526 addUnwrappedLine(); 1527 break; 1528 } else if (FormatTok->is(tok::r_brace)) { 1529 addUnwrappedLine(); 1530 break; 1531 } else { 1532 nextToken(); 1533 } 1534 } 1535 1536 // Parse the class body after the enum's ";" if any. 1537 parseLevel(/*HasOpeningBrace=*/true); 1538 nextToken(); 1539 --Line->Level; 1540 addUnwrappedLine(); 1541 } 1542 1543 void UnwrappedLineParser::parseRecord() { 1544 const FormatToken &InitialToken = *FormatTok; 1545 nextToken(); 1546 1547 1548 // The actual identifier can be a nested name specifier, and in macros 1549 // it is often token-pasted. 1550 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash, 1551 tok::kw___attribute, tok::kw___declspec, 1552 tok::kw_alignas) || 1553 ((Style.Language == FormatStyle::LK_Java || 1554 Style.Language == FormatStyle::LK_JavaScript) && 1555 FormatTok->isOneOf(tok::period, tok::comma))) { 1556 bool IsNonMacroIdentifier = 1557 FormatTok->is(tok::identifier) && 1558 FormatTok->TokenText != FormatTok->TokenText.upper(); 1559 nextToken(); 1560 // We can have macros or attributes in between 'class' and the class name. 1561 if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren)) 1562 parseParens(); 1563 } 1564 1565 // Note that parsing away template declarations here leads to incorrectly 1566 // accepting function declarations as record declarations. 1567 // In general, we cannot solve this problem. Consider: 1568 // class A<int> B() {} 1569 // which can be a function definition or a class definition when B() is a 1570 // macro. If we find enough real-world cases where this is a problem, we 1571 // can parse for the 'template' keyword in the beginning of the statement, 1572 // and thus rule out the record production in case there is no template 1573 // (this would still leave us with an ambiguity between template function 1574 // and class declarations). 1575 if (FormatTok->isOneOf(tok::colon, tok::less)) { 1576 while (!eof()) { 1577 if (FormatTok->is(tok::l_brace)) { 1578 calculateBraceTypes(/*ExpectClassBody=*/true); 1579 if (!tryToParseBracedList()) 1580 break; 1581 } 1582 if (FormatTok->Tok.is(tok::semi)) 1583 return; 1584 nextToken(); 1585 } 1586 } 1587 if (FormatTok->Tok.is(tok::l_brace)) { 1588 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1589 addUnwrappedLine(); 1590 1591 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1592 /*MunchSemi=*/false); 1593 } 1594 // We fall through to parsing a structural element afterwards, so 1595 // class A {} n, m; 1596 // will end up in one unwrapped line. 1597 // This does not apply for Java and JavaScript. 1598 if (Style.Language == FormatStyle::LK_Java || 1599 Style.Language == FormatStyle::LK_JavaScript) 1600 addUnwrappedLine(); 1601 } 1602 1603 void UnwrappedLineParser::parseObjCProtocolList() { 1604 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 1605 do 1606 nextToken(); 1607 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 1608 nextToken(); // Skip '>'. 1609 } 1610 1611 void UnwrappedLineParser::parseObjCUntilAtEnd() { 1612 do { 1613 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 1614 nextToken(); 1615 addUnwrappedLine(); 1616 break; 1617 } 1618 if (FormatTok->is(tok::l_brace)) { 1619 parseBlock(/*MustBeDeclaration=*/false); 1620 // In ObjC interfaces, nothing should be following the "}". 1621 addUnwrappedLine(); 1622 } else if (FormatTok->is(tok::r_brace)) { 1623 // Ignore stray "}". parseStructuralElement doesn't consume them. 1624 nextToken(); 1625 addUnwrappedLine(); 1626 } else { 1627 parseStructuralElement(); 1628 } 1629 } while (!eof()); 1630 } 1631 1632 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 1633 nextToken(); 1634 nextToken(); // interface name 1635 1636 // @interface can be followed by either a base class, or a category. 1637 if (FormatTok->Tok.is(tok::colon)) { 1638 nextToken(); 1639 nextToken(); // base class name 1640 } else if (FormatTok->Tok.is(tok::l_paren)) 1641 // Skip category, if present. 1642 parseParens(); 1643 1644 if (FormatTok->Tok.is(tok::less)) 1645 parseObjCProtocolList(); 1646 1647 if (FormatTok->Tok.is(tok::l_brace)) { 1648 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1649 Style.BreakBeforeBraces == FormatStyle::BS_GNU) 1650 addUnwrappedLine(); 1651 parseBlock(/*MustBeDeclaration=*/true); 1652 } 1653 1654 // With instance variables, this puts '}' on its own line. Without instance 1655 // variables, this ends the @interface line. 1656 addUnwrappedLine(); 1657 1658 parseObjCUntilAtEnd(); 1659 } 1660 1661 void UnwrappedLineParser::parseObjCProtocol() { 1662 nextToken(); 1663 nextToken(); // protocol name 1664 1665 if (FormatTok->Tok.is(tok::less)) 1666 parseObjCProtocolList(); 1667 1668 // Check for protocol declaration. 1669 if (FormatTok->Tok.is(tok::semi)) { 1670 nextToken(); 1671 return addUnwrappedLine(); 1672 } 1673 1674 addUnwrappedLine(); 1675 parseObjCUntilAtEnd(); 1676 } 1677 1678 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 1679 assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export)); 1680 nextToken(); 1681 1682 // Consume the "default" in "export default class/function". 1683 if (FormatTok->is(tok::kw_default)) 1684 nextToken(); 1685 1686 // Consume "function" and "default function", so that these get parsed as 1687 // free-standing JS functions, i.e. do not require a trailing semicolon. 1688 if (FormatTok->is(Keywords.kw_function)) { 1689 nextToken(); 1690 return; 1691 } 1692 1693 if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, Keywords.kw_var)) 1694 return; // Fall through to parsing the corresponding structure. 1695 1696 if (FormatTok->is(tok::l_brace)) { 1697 FormatTok->BlockKind = BK_Block; 1698 parseBracedList(); 1699 } 1700 1701 while (!eof() && FormatTok->isNot(tok::semi) && 1702 FormatTok->isNot(tok::l_brace)) { 1703 nextToken(); 1704 } 1705 } 1706 1707 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 1708 StringRef Prefix = "") { 1709 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 1710 << (Line.InPPDirective ? " MACRO" : "") << ": "; 1711 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 1712 E = Line.Tokens.end(); 1713 I != E; ++I) { 1714 llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] "; 1715 } 1716 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 1717 E = Line.Tokens.end(); 1718 I != E; ++I) { 1719 const UnwrappedLineNode &Node = *I; 1720 for (SmallVectorImpl<UnwrappedLine>::const_iterator 1721 I = Node.Children.begin(), 1722 E = Node.Children.end(); 1723 I != E; ++I) { 1724 printDebugInfo(*I, "\nChild: "); 1725 } 1726 } 1727 llvm::dbgs() << "\n"; 1728 } 1729 1730 void UnwrappedLineParser::addUnwrappedLine() { 1731 if (Line->Tokens.empty()) 1732 return; 1733 DEBUG({ 1734 if (CurrentLines == &Lines) 1735 printDebugInfo(*Line); 1736 }); 1737 CurrentLines->push_back(*Line); 1738 Line->Tokens.clear(); 1739 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 1740 for (SmallVectorImpl<UnwrappedLine>::iterator 1741 I = PreprocessorDirectives.begin(), 1742 E = PreprocessorDirectives.end(); 1743 I != E; ++I) { 1744 CurrentLines->push_back(*I); 1745 } 1746 PreprocessorDirectives.clear(); 1747 } 1748 } 1749 1750 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 1751 1752 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 1753 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 1754 FormatTok.NewlinesBefore > 0; 1755 } 1756 1757 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 1758 bool JustComments = Line->Tokens.empty(); 1759 for (SmallVectorImpl<FormatToken *>::const_iterator 1760 I = CommentsBeforeNextToken.begin(), 1761 E = CommentsBeforeNextToken.end(); 1762 I != E; ++I) { 1763 if (isOnNewLine(**I) && JustComments) 1764 addUnwrappedLine(); 1765 pushToken(*I); 1766 } 1767 if (NewlineBeforeNext && JustComments) 1768 addUnwrappedLine(); 1769 CommentsBeforeNextToken.clear(); 1770 } 1771 1772 void UnwrappedLineParser::nextToken() { 1773 if (eof()) 1774 return; 1775 flushComments(isOnNewLine(*FormatTok)); 1776 pushToken(FormatTok); 1777 readToken(); 1778 } 1779 1780 void UnwrappedLineParser::readToken() { 1781 bool CommentsInCurrentLine = true; 1782 do { 1783 FormatTok = Tokens->getNextToken(); 1784 assert(FormatTok); 1785 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 1786 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 1787 // If there is an unfinished unwrapped line, we flush the preprocessor 1788 // directives only after that unwrapped line was finished later. 1789 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 1790 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 1791 // Comments stored before the preprocessor directive need to be output 1792 // before the preprocessor directive, at the same level as the 1793 // preprocessor directive, as we consider them to apply to the directive. 1794 flushComments(isOnNewLine(*FormatTok)); 1795 parsePPDirective(); 1796 } 1797 while (FormatTok->Type == TT_ConflictStart || 1798 FormatTok->Type == TT_ConflictEnd || 1799 FormatTok->Type == TT_ConflictAlternative) { 1800 if (FormatTok->Type == TT_ConflictStart) { 1801 conditionalCompilationStart(/*Unreachable=*/false); 1802 } else if (FormatTok->Type == TT_ConflictAlternative) { 1803 conditionalCompilationAlternative(); 1804 } else if (FormatTok->Type == TT_ConflictEnd) { 1805 conditionalCompilationEnd(); 1806 } 1807 FormatTok = Tokens->getNextToken(); 1808 FormatTok->MustBreakBefore = true; 1809 } 1810 1811 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 1812 !Line->InPPDirective) { 1813 continue; 1814 } 1815 1816 if (!FormatTok->Tok.is(tok::comment)) 1817 return; 1818 if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) { 1819 CommentsInCurrentLine = false; 1820 } 1821 if (CommentsInCurrentLine) { 1822 pushToken(FormatTok); 1823 } else { 1824 CommentsBeforeNextToken.push_back(FormatTok); 1825 } 1826 } while (!eof()); 1827 } 1828 1829 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 1830 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 1831 if (MustBreakBeforeNextToken) { 1832 Line->Tokens.back().Tok->MustBreakBefore = true; 1833 MustBreakBeforeNextToken = false; 1834 } 1835 } 1836 1837 } // end namespace format 1838 } // end namespace clang 1839