1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-parser" 17 18 #include "UnwrappedLineParser.h" 19 #include "llvm/Support/Debug.h" 20 21 namespace clang { 22 namespace format { 23 24 class FormatTokenSource { 25 public: 26 virtual ~FormatTokenSource() {} 27 virtual FormatToken *getNextToken() = 0; 28 29 virtual unsigned getPosition() = 0; 30 virtual FormatToken *setPosition(unsigned Position) = 0; 31 }; 32 33 namespace { 34 35 class ScopedDeclarationState { 36 public: 37 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 38 bool MustBeDeclaration) 39 : Line(Line), Stack(Stack) { 40 Line.MustBeDeclaration = MustBeDeclaration; 41 Stack.push_back(MustBeDeclaration); 42 } 43 ~ScopedDeclarationState() { 44 Stack.pop_back(); 45 if (!Stack.empty()) 46 Line.MustBeDeclaration = Stack.back(); 47 else 48 Line.MustBeDeclaration = true; 49 } 50 51 private: 52 UnwrappedLine &Line; 53 std::vector<bool> &Stack; 54 }; 55 56 class ScopedMacroState : public FormatTokenSource { 57 public: 58 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 59 FormatToken *&ResetToken, bool &StructuralError) 60 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 61 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 62 StructuralError(StructuralError), 63 PreviousStructuralError(StructuralError), Token(NULL) { 64 TokenSource = this; 65 Line.Level = 0; 66 Line.InPPDirective = true; 67 } 68 69 ~ScopedMacroState() { 70 TokenSource = PreviousTokenSource; 71 ResetToken = Token; 72 Line.InPPDirective = false; 73 Line.Level = PreviousLineLevel; 74 StructuralError = PreviousStructuralError; 75 } 76 77 virtual FormatToken *getNextToken() { 78 // The \c UnwrappedLineParser guards against this by never calling 79 // \c getNextToken() after it has encountered the first eof token. 80 assert(!eof()); 81 Token = PreviousTokenSource->getNextToken(); 82 if (eof()) 83 return getFakeEOF(); 84 return Token; 85 } 86 87 virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); } 88 89 virtual FormatToken *setPosition(unsigned Position) { 90 Token = PreviousTokenSource->setPosition(Position); 91 return Token; 92 } 93 94 private: 95 bool eof() { return Token && Token->HasUnescapedNewline; } 96 97 FormatToken *getFakeEOF() { 98 static bool EOFInitialized = false; 99 static FormatToken FormatTok; 100 if (!EOFInitialized) { 101 FormatTok.Tok.startToken(); 102 FormatTok.Tok.setKind(tok::eof); 103 EOFInitialized = true; 104 } 105 return &FormatTok; 106 } 107 108 UnwrappedLine &Line; 109 FormatTokenSource *&TokenSource; 110 FormatToken *&ResetToken; 111 unsigned PreviousLineLevel; 112 FormatTokenSource *PreviousTokenSource; 113 bool &StructuralError; 114 bool PreviousStructuralError; 115 116 FormatToken *Token; 117 }; 118 119 } // end anonymous namespace 120 121 class ScopedLineState { 122 public: 123 ScopedLineState(UnwrappedLineParser &Parser, 124 bool SwitchToPreprocessorLines = false) 125 : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) { 126 if (SwitchToPreprocessorLines) 127 Parser.CurrentLines = &Parser.PreprocessorDirectives; 128 PreBlockLine = Parser.Line.take(); 129 Parser.Line.reset(new UnwrappedLine()); 130 Parser.Line->Level = PreBlockLine->Level; 131 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 132 } 133 134 ~ScopedLineState() { 135 if (!Parser.Line->Tokens.empty()) { 136 Parser.addUnwrappedLine(); 137 } 138 assert(Parser.Line->Tokens.empty()); 139 Parser.Line.reset(PreBlockLine); 140 Parser.MustBreakBeforeNextToken = true; 141 if (SwitchToPreprocessorLines) 142 Parser.CurrentLines = &Parser.Lines; 143 } 144 145 private: 146 UnwrappedLineParser &Parser; 147 const bool SwitchToPreprocessorLines; 148 149 UnwrappedLine *PreBlockLine; 150 }; 151 152 namespace { 153 154 class IndexedTokenSource : public FormatTokenSource { 155 public: 156 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 157 : Tokens(Tokens), Position(-1) {} 158 159 virtual FormatToken *getNextToken() { 160 ++Position; 161 return Tokens[Position]; 162 } 163 164 virtual unsigned getPosition() { 165 assert(Position >= 0); 166 return Position; 167 } 168 169 virtual FormatToken *setPosition(unsigned P) { 170 Position = P; 171 return Tokens[Position]; 172 } 173 174 private: 175 ArrayRef<FormatToken *> Tokens; 176 int Position; 177 }; 178 179 } // end anonymous namespace 180 181 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 182 ArrayRef<FormatToken *> Tokens, 183 UnwrappedLineConsumer &Callback) 184 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 185 CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL), 186 Callback(Callback), AllTokens(Tokens) {} 187 188 bool UnwrappedLineParser::parse() { 189 DEBUG(llvm::dbgs() << "----\n"); 190 IndexedTokenSource TokenSource(AllTokens); 191 Tokens = &TokenSource; 192 readToken(); 193 parseFile(); 194 for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end(); 195 I != E; ++I) { 196 Callback.consumeUnwrappedLine(*I); 197 } 198 199 // Create line with eof token. 200 pushToken(FormatTok); 201 Callback.consumeUnwrappedLine(*Line); 202 return StructuralError; 203 } 204 205 void UnwrappedLineParser::parseFile() { 206 ScopedDeclarationState DeclarationState( 207 *Line, DeclarationScopeStack, 208 /*MustBeDeclaration=*/ !Line->InPPDirective); 209 parseLevel(/*HasOpeningBrace=*/false); 210 // Make sure to format the remaining tokens. 211 flushComments(true); 212 addUnwrappedLine(); 213 } 214 215 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 216 bool SwitchLabelEncountered = false; 217 do { 218 switch (FormatTok->Tok.getKind()) { 219 case tok::comment: 220 nextToken(); 221 addUnwrappedLine(); 222 break; 223 case tok::l_brace: 224 // FIXME: Add parameter whether this can happen - if this happens, we must 225 // be in a non-declaration context. 226 parseBlock(/*MustBeDeclaration=*/false); 227 addUnwrappedLine(); 228 break; 229 case tok::r_brace: 230 if (HasOpeningBrace) 231 return; 232 StructuralError = true; 233 nextToken(); 234 addUnwrappedLine(); 235 break; 236 case tok::kw_default: 237 case tok::kw_case: 238 if (!SwitchLabelEncountered && 239 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 240 ++Line->Level; 241 SwitchLabelEncountered = true; 242 parseStructuralElement(); 243 break; 244 default: 245 parseStructuralElement(); 246 break; 247 } 248 } while (!eof()); 249 } 250 251 void UnwrappedLineParser::calculateBraceTypes() { 252 // We'll parse forward through the tokens until we hit 253 // a closing brace or eof - note that getNextToken() will 254 // parse macros, so this will magically work inside macro 255 // definitions, too. 256 unsigned StoredPosition = Tokens->getPosition(); 257 unsigned Position = StoredPosition; 258 FormatToken *Tok = FormatTok; 259 // Keep a stack of positions of lbrace tokens. We will 260 // update information about whether an lbrace starts a 261 // braced init list or a different block during the loop. 262 SmallVector<FormatToken *, 8> LBraceStack; 263 assert(Tok->Tok.is(tok::l_brace)); 264 do { 265 // Get next none-comment token. 266 FormatToken *NextTok; 267 unsigned ReadTokens = 0; 268 do { 269 NextTok = Tokens->getNextToken(); 270 ++ReadTokens; 271 } while (NextTok->is(tok::comment)); 272 273 switch (Tok->Tok.getKind()) { 274 case tok::l_brace: 275 LBraceStack.push_back(Tok); 276 break; 277 case tok::r_brace: 278 if (!LBraceStack.empty()) { 279 if (LBraceStack.back()->BlockKind == BK_Unknown) { 280 // If there is a comma, semicolon or right paren after the closing 281 // brace, we assume this is a braced initializer list. Note that 282 // regardless how we mark inner braces here, we will overwrite the 283 // BlockKind later if we parse a braced list (where all blocks inside 284 // are by default braced lists), or when we explicitly detect blocks 285 // (for example while parsing lambdas). 286 // 287 // We exclude + and - as they can be ObjC visibility modifiers. 288 if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren, 289 tok::l_brace, tok::colon) || 290 (NextTok->isBinaryOperator() && 291 !NextTok->isOneOf(tok::plus, tok::minus))) { 292 Tok->BlockKind = BK_BracedInit; 293 LBraceStack.back()->BlockKind = BK_BracedInit; 294 } else { 295 Tok->BlockKind = BK_Block; 296 LBraceStack.back()->BlockKind = BK_Block; 297 } 298 } 299 LBraceStack.pop_back(); 300 } 301 break; 302 case tok::semi: 303 case tok::kw_if: 304 case tok::kw_while: 305 case tok::kw_for: 306 case tok::kw_switch: 307 case tok::kw_try: 308 if (!LBraceStack.empty()) 309 LBraceStack.back()->BlockKind = BK_Block; 310 break; 311 default: 312 break; 313 } 314 Tok = NextTok; 315 Position += ReadTokens; 316 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 317 // Assume other blocks for all unclosed opening braces. 318 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 319 if (LBraceStack[i]->BlockKind == BK_Unknown) 320 LBraceStack[i]->BlockKind = BK_Block; 321 } 322 323 FormatTok = Tokens->setPosition(StoredPosition); 324 } 325 326 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel) { 327 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); 328 unsigned InitialLevel = Line->Level; 329 nextToken(); 330 331 addUnwrappedLine(); 332 333 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 334 MustBeDeclaration); 335 if (AddLevel) 336 ++Line->Level; 337 parseLevel(/*HasOpeningBrace=*/true); 338 339 if (!FormatTok->Tok.is(tok::r_brace)) { 340 Line->Level = InitialLevel; 341 StructuralError = true; 342 return; 343 } 344 345 nextToken(); // Munch the closing brace. 346 Line->Level = InitialLevel; 347 } 348 349 void UnwrappedLineParser::parseChildBlock() { 350 FormatTok->BlockKind = BK_Block; 351 nextToken(); 352 { 353 ScopedLineState LineState(*this); 354 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 355 /*MustBeDeclaration=*/false); 356 Line->Level += 1; 357 parseLevel(/*HasOpeningBrace=*/true); 358 Line->Level -= 1; 359 } 360 nextToken(); 361 } 362 363 void UnwrappedLineParser::parsePPDirective() { 364 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 365 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); 366 nextToken(); 367 368 if (FormatTok->Tok.getIdentifierInfo() == NULL) { 369 parsePPUnknown(); 370 return; 371 } 372 373 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 374 case tok::pp_define: 375 parsePPDefine(); 376 return; 377 case tok::pp_if: 378 parsePPIf(); 379 break; 380 case tok::pp_ifdef: 381 case tok::pp_ifndef: 382 parsePPIfdef(); 383 break; 384 case tok::pp_else: 385 parsePPElse(); 386 break; 387 case tok::pp_elif: 388 parsePPElIf(); 389 break; 390 case tok::pp_endif: 391 parsePPEndIf(); 392 break; 393 default: 394 parsePPUnknown(); 395 break; 396 } 397 } 398 399 void UnwrappedLineParser::pushPPConditional() { 400 if (!PPStack.empty() && PPStack.back() == PP_Unreachable) 401 PPStack.push_back(PP_Unreachable); 402 else 403 PPStack.push_back(PP_Conditional); 404 } 405 406 void UnwrappedLineParser::parsePPIf() { 407 nextToken(); 408 if ((FormatTok->Tok.isLiteral() && 409 StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) == 410 "0") || 411 FormatTok->Tok.is(tok::kw_false)) { 412 PPStack.push_back(PP_Unreachable); 413 } else { 414 pushPPConditional(); 415 } 416 parsePPUnknown(); 417 } 418 419 void UnwrappedLineParser::parsePPIfdef() { 420 pushPPConditional(); 421 parsePPUnknown(); 422 } 423 424 void UnwrappedLineParser::parsePPElse() { 425 if (!PPStack.empty()) 426 PPStack.pop_back(); 427 pushPPConditional(); 428 parsePPUnknown(); 429 } 430 431 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 432 433 void UnwrappedLineParser::parsePPEndIf() { 434 if (!PPStack.empty()) 435 PPStack.pop_back(); 436 parsePPUnknown(); 437 } 438 439 void UnwrappedLineParser::parsePPDefine() { 440 nextToken(); 441 442 if (FormatTok->Tok.getKind() != tok::identifier) { 443 parsePPUnknown(); 444 return; 445 } 446 nextToken(); 447 if (FormatTok->Tok.getKind() == tok::l_paren && 448 FormatTok->WhitespaceRange.getBegin() == 449 FormatTok->WhitespaceRange.getEnd()) { 450 parseParens(); 451 } 452 addUnwrappedLine(); 453 Line->Level = 1; 454 455 // Errors during a preprocessor directive can only affect the layout of the 456 // preprocessor directive, and thus we ignore them. An alternative approach 457 // would be to use the same approach we use on the file level (no 458 // re-indentation if there was a structural error) within the macro 459 // definition. 460 parseFile(); 461 } 462 463 void UnwrappedLineParser::parsePPUnknown() { 464 do { 465 nextToken(); 466 } while (!eof()); 467 addUnwrappedLine(); 468 } 469 470 // Here we blacklist certain tokens that are not usually the first token in an 471 // unwrapped line. This is used in attempt to distinguish macro calls without 472 // trailing semicolons from other constructs split to several lines. 473 bool tokenCanStartNewLine(clang::Token Tok) { 474 // Semicolon can be a null-statement, l_square can be a start of a macro or 475 // a C++11 attribute, but this doesn't seem to be common. 476 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 477 Tok.isNot(tok::l_square) && 478 // Tokens that can only be used as binary operators and a part of 479 // overloaded operator names. 480 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 481 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 482 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 483 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 484 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 485 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 486 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 487 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 488 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 489 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 490 Tok.isNot(tok::lesslessequal) && 491 // Colon is used in labels, base class lists, initializer lists, 492 // range-based for loops, ternary operator, but should never be the 493 // first token in an unwrapped line. 494 Tok.isNot(tok::colon); 495 } 496 497 void UnwrappedLineParser::parseStructuralElement() { 498 assert(!FormatTok->Tok.is(tok::l_brace)); 499 switch (FormatTok->Tok.getKind()) { 500 case tok::at: 501 nextToken(); 502 if (FormatTok->Tok.is(tok::l_brace)) { 503 parseBracedList(); 504 break; 505 } 506 switch (FormatTok->Tok.getObjCKeywordID()) { 507 case tok::objc_public: 508 case tok::objc_protected: 509 case tok::objc_package: 510 case tok::objc_private: 511 return parseAccessSpecifier(); 512 case tok::objc_interface: 513 case tok::objc_implementation: 514 return parseObjCInterfaceOrImplementation(); 515 case tok::objc_protocol: 516 return parseObjCProtocol(); 517 case tok::objc_end: 518 return; // Handled by the caller. 519 case tok::objc_optional: 520 case tok::objc_required: 521 nextToken(); 522 addUnwrappedLine(); 523 return; 524 default: 525 break; 526 } 527 break; 528 case tok::kw_namespace: 529 parseNamespace(); 530 return; 531 case tok::kw_inline: 532 nextToken(); 533 if (FormatTok->Tok.is(tok::kw_namespace)) { 534 parseNamespace(); 535 return; 536 } 537 break; 538 case tok::kw_public: 539 case tok::kw_protected: 540 case tok::kw_private: 541 parseAccessSpecifier(); 542 return; 543 case tok::kw_if: 544 parseIfThenElse(); 545 return; 546 case tok::kw_for: 547 case tok::kw_while: 548 parseForOrWhileLoop(); 549 return; 550 case tok::kw_do: 551 parseDoWhile(); 552 return; 553 case tok::kw_switch: 554 parseSwitch(); 555 return; 556 case tok::kw_default: 557 nextToken(); 558 parseLabel(); 559 return; 560 case tok::kw_case: 561 parseCaseLabel(); 562 return; 563 case tok::kw_return: 564 parseReturn(); 565 return; 566 case tok::kw_extern: 567 nextToken(); 568 if (FormatTok->Tok.is(tok::string_literal)) { 569 nextToken(); 570 if (FormatTok->Tok.is(tok::l_brace)) { 571 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 572 addUnwrappedLine(); 573 return; 574 } 575 } 576 // In all other cases, parse the declaration. 577 break; 578 default: 579 break; 580 } 581 do { 582 switch (FormatTok->Tok.getKind()) { 583 case tok::at: 584 nextToken(); 585 if (FormatTok->Tok.is(tok::l_brace)) 586 parseBracedList(); 587 break; 588 case tok::kw_enum: 589 parseEnum(); 590 break; 591 case tok::kw_struct: 592 case tok::kw_union: 593 case tok::kw_class: 594 parseRecord(); 595 // A record declaration or definition is always the start of a structural 596 // element. 597 break; 598 case tok::semi: 599 nextToken(); 600 addUnwrappedLine(); 601 return; 602 case tok::r_brace: 603 addUnwrappedLine(); 604 return; 605 case tok::l_paren: 606 parseParens(); 607 break; 608 case tok::caret: 609 nextToken(); 610 if (FormatTok->is(tok::l_brace)) { 611 parseChildBlock(); 612 } 613 break; 614 case tok::l_brace: 615 if (!tryToParseBracedList()) { 616 // A block outside of parentheses must be the last part of a 617 // structural element. 618 // FIXME: Figure out cases where this is not true, and add projections 619 // for them (the one we know is missing are lambdas). 620 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || 621 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup || 622 Style.BreakBeforeBraces == FormatStyle::BS_Allman) 623 addUnwrappedLine(); 624 parseBlock(/*MustBeDeclaration=*/false); 625 addUnwrappedLine(); 626 return; 627 } 628 // Otherwise this was a braced init list, and the structural 629 // element continues. 630 break; 631 case tok::identifier: { 632 StringRef Text = FormatTok->TokenText; 633 nextToken(); 634 if (Line->Tokens.size() == 1) { 635 if (FormatTok->Tok.is(tok::colon)) { 636 parseLabel(); 637 return; 638 } 639 // Recognize function-like macro usages without trailing semicolon. 640 if (FormatTok->Tok.is(tok::l_paren)) { 641 parseParens(); 642 if (FormatTok->HasUnescapedNewline && 643 tokenCanStartNewLine(FormatTok->Tok)) { 644 addUnwrappedLine(); 645 return; 646 } 647 } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 && 648 Text == Text.upper()) { 649 // Recognize free-standing macros like Q_OBJECT. 650 addUnwrappedLine(); 651 return; 652 } 653 } 654 break; 655 } 656 case tok::equal: 657 nextToken(); 658 if (FormatTok->Tok.is(tok::l_brace)) { 659 parseBracedList(); 660 } 661 break; 662 case tok::l_square: 663 tryToParseLambda(); 664 break; 665 default: 666 nextToken(); 667 break; 668 } 669 } while (!eof()); 670 } 671 672 void UnwrappedLineParser::tryToParseLambda() { 673 if (!tryToParseLambdaIntroducer()) { 674 return; 675 } 676 if (FormatTok->is(tok::l_paren)) { 677 parseParens(); 678 } 679 680 while (FormatTok->isNot(tok::l_brace)) { 681 switch (FormatTok->Tok.getKind()) { 682 case tok::l_brace: 683 break; 684 return; 685 case tok::l_paren: 686 parseParens(); 687 break; 688 case tok::semi: 689 case tok::equal: 690 case tok::eof: 691 return; 692 default: 693 nextToken(); 694 break; 695 } 696 } 697 parseChildBlock(); 698 } 699 700 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 701 nextToken(); 702 if (FormatTok->is(tok::equal)) { 703 nextToken(); 704 if (FormatTok->is(tok::r_square)) return true; 705 if (FormatTok->isNot(tok::comma)) return false; 706 nextToken(); 707 } else if (FormatTok->is(tok::amp)) { 708 nextToken(); 709 if (FormatTok->is(tok::r_square)) return true; 710 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 711 return false; 712 } 713 if (FormatTok->is(tok::comma)) nextToken(); 714 } else if (FormatTok->is(tok::r_square)) { 715 nextToken(); 716 return true; 717 } 718 do { 719 if (FormatTok->is(tok::amp)) nextToken(); 720 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) return false; 721 nextToken(); 722 if (FormatTok->is(tok::comma)) { 723 nextToken(); 724 } else if (FormatTok->is(tok::r_square)) { 725 nextToken(); 726 return true; 727 } else { 728 return false; 729 } 730 } while (!eof()); 731 return false; 732 } 733 734 bool UnwrappedLineParser::tryToParseBracedList() { 735 if (FormatTok->BlockKind == BK_Unknown) 736 calculateBraceTypes(); 737 assert(FormatTok->BlockKind != BK_Unknown); 738 if (FormatTok->BlockKind == BK_Block) 739 return false; 740 parseBracedList(); 741 return true; 742 } 743 744 void UnwrappedLineParser::parseBracedList() { 745 nextToken(); 746 747 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 748 // replace this by using parseAssigmentExpression() inside. 749 do { 750 // FIXME: When we start to support lambdas, we'll want to parse them away 751 // here, otherwise our bail-out scenarios below break. The better solution 752 // might be to just implement a more or less complete expression parser. 753 switch (FormatTok->Tok.getKind()) { 754 case tok::caret: 755 nextToken(); 756 if (FormatTok->is(tok::l_brace)) { 757 parseChildBlock(); 758 } 759 break; 760 case tok::l_square: 761 tryToParseLambda(); 762 break; 763 case tok::l_brace: 764 // Assume there are no blocks inside a braced init list apart 765 // from the ones we explicitly parse out (like lambdas). 766 FormatTok->BlockKind = BK_BracedInit; 767 parseBracedList(); 768 break; 769 case tok::r_brace: 770 nextToken(); 771 return; 772 case tok::semi: 773 // Probably a missing closing brace. Bail out. 774 return; 775 case tok::comma: 776 nextToken(); 777 break; 778 default: 779 nextToken(); 780 break; 781 } 782 } while (!eof()); 783 } 784 785 void UnwrappedLineParser::parseReturn() { 786 nextToken(); 787 788 do { 789 switch (FormatTok->Tok.getKind()) { 790 case tok::l_brace: 791 parseBracedList(); 792 if (FormatTok->Tok.isNot(tok::semi)) { 793 // Assume missing ';'. 794 addUnwrappedLine(); 795 return; 796 } 797 break; 798 case tok::l_paren: 799 parseParens(); 800 break; 801 case tok::r_brace: 802 // Assume missing ';'. 803 addUnwrappedLine(); 804 return; 805 case tok::semi: 806 nextToken(); 807 addUnwrappedLine(); 808 return; 809 case tok::l_square: 810 tryToParseLambda(); 811 break; 812 default: 813 nextToken(); 814 break; 815 } 816 } while (!eof()); 817 } 818 819 void UnwrappedLineParser::parseParens() { 820 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 821 nextToken(); 822 do { 823 switch (FormatTok->Tok.getKind()) { 824 case tok::l_paren: 825 parseParens(); 826 break; 827 case tok::r_paren: 828 nextToken(); 829 return; 830 case tok::r_brace: 831 // A "}" inside parenthesis is an error if there wasn't a matching "{". 832 return; 833 case tok::l_brace: { 834 if (!tryToParseBracedList()) { 835 parseChildBlock(); 836 } 837 break; 838 } 839 case tok::at: 840 nextToken(); 841 if (FormatTok->Tok.is(tok::l_brace)) 842 parseBracedList(); 843 break; 844 default: 845 nextToken(); 846 break; 847 } 848 } while (!eof()); 849 } 850 851 void UnwrappedLineParser::parseIfThenElse() { 852 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 853 nextToken(); 854 if (FormatTok->Tok.is(tok::l_paren)) 855 parseParens(); 856 bool NeedsUnwrappedLine = false; 857 if (FormatTok->Tok.is(tok::l_brace)) { 858 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 859 addUnwrappedLine(); 860 parseBlock(/*MustBeDeclaration=*/false); 861 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 862 addUnwrappedLine(); 863 else 864 NeedsUnwrappedLine = true; 865 } else { 866 addUnwrappedLine(); 867 ++Line->Level; 868 parseStructuralElement(); 869 --Line->Level; 870 } 871 if (FormatTok->Tok.is(tok::kw_else)) { 872 nextToken(); 873 if (FormatTok->Tok.is(tok::l_brace)) { 874 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 875 addUnwrappedLine(); 876 parseBlock(/*MustBeDeclaration=*/false); 877 addUnwrappedLine(); 878 } else if (FormatTok->Tok.is(tok::kw_if)) { 879 parseIfThenElse(); 880 } else { 881 addUnwrappedLine(); 882 ++Line->Level; 883 parseStructuralElement(); 884 --Line->Level; 885 } 886 } else if (NeedsUnwrappedLine) { 887 addUnwrappedLine(); 888 } 889 } 890 891 void UnwrappedLineParser::parseNamespace() { 892 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 893 nextToken(); 894 if (FormatTok->Tok.is(tok::identifier)) 895 nextToken(); 896 if (FormatTok->Tok.is(tok::l_brace)) { 897 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || 898 Style.BreakBeforeBraces == FormatStyle::BS_Allman) 899 addUnwrappedLine(); 900 901 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 902 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 903 DeclarationScopeStack.size() > 1); 904 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 905 // Munch the semicolon after a namespace. This is more common than one would 906 // think. Puttin the semicolon into its own line is very ugly. 907 if (FormatTok->Tok.is(tok::semi)) 908 nextToken(); 909 addUnwrappedLine(); 910 } 911 // FIXME: Add error handling. 912 } 913 914 void UnwrappedLineParser::parseForOrWhileLoop() { 915 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) && 916 "'for' or 'while' expected"); 917 nextToken(); 918 if (FormatTok->Tok.is(tok::l_paren)) 919 parseParens(); 920 if (FormatTok->Tok.is(tok::l_brace)) { 921 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 922 addUnwrappedLine(); 923 parseBlock(/*MustBeDeclaration=*/false); 924 addUnwrappedLine(); 925 } else { 926 addUnwrappedLine(); 927 ++Line->Level; 928 parseStructuralElement(); 929 --Line->Level; 930 } 931 } 932 933 void UnwrappedLineParser::parseDoWhile() { 934 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 935 nextToken(); 936 if (FormatTok->Tok.is(tok::l_brace)) { 937 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 938 addUnwrappedLine(); 939 parseBlock(/*MustBeDeclaration=*/false); 940 } else { 941 addUnwrappedLine(); 942 ++Line->Level; 943 parseStructuralElement(); 944 --Line->Level; 945 } 946 947 // FIXME: Add error handling. 948 if (!FormatTok->Tok.is(tok::kw_while)) { 949 addUnwrappedLine(); 950 return; 951 } 952 953 nextToken(); 954 parseStructuralElement(); 955 } 956 957 void UnwrappedLineParser::parseLabel() { 958 nextToken(); 959 unsigned OldLineLevel = Line->Level; 960 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 961 --Line->Level; 962 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 963 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 964 addUnwrappedLine(); 965 parseBlock(/*MustBeDeclaration=*/false); 966 if (FormatTok->Tok.is(tok::kw_break)) { 967 // "break;" after "}" on its own line only for BS_Allman 968 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 969 addUnwrappedLine(); 970 parseStructuralElement(); 971 } 972 } 973 addUnwrappedLine(); 974 Line->Level = OldLineLevel; 975 } 976 977 void UnwrappedLineParser::parseCaseLabel() { 978 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 979 // FIXME: fix handling of complex expressions here. 980 do { 981 nextToken(); 982 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 983 parseLabel(); 984 } 985 986 void UnwrappedLineParser::parseSwitch() { 987 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 988 nextToken(); 989 if (FormatTok->Tok.is(tok::l_paren)) 990 parseParens(); 991 if (FormatTok->Tok.is(tok::l_brace)) { 992 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 993 addUnwrappedLine(); 994 parseBlock(/*MustBeDeclaration=*/false); 995 addUnwrappedLine(); 996 } else { 997 addUnwrappedLine(); 998 ++Line->Level; 999 parseStructuralElement(); 1000 --Line->Level; 1001 } 1002 } 1003 1004 void UnwrappedLineParser::parseAccessSpecifier() { 1005 nextToken(); 1006 // Otherwise, we don't know what it is, and we'd better keep the next token. 1007 if (FormatTok->Tok.is(tok::colon)) 1008 nextToken(); 1009 addUnwrappedLine(); 1010 } 1011 1012 void UnwrappedLineParser::parseEnum() { 1013 nextToken(); 1014 // Eat up enum class ... 1015 if (FormatTok->Tok.is(tok::kw_class) || 1016 FormatTok->Tok.is(tok::kw_struct)) 1017 nextToken(); 1018 if (FormatTok->Tok.is(tok::identifier) || 1019 FormatTok->Tok.is(tok::kw___attribute) || 1020 FormatTok->Tok.is(tok::kw___declspec)) { 1021 nextToken(); 1022 // We can have macros or attributes in between 'enum' and the enum name. 1023 if (FormatTok->Tok.is(tok::l_paren)) { 1024 parseParens(); 1025 } 1026 if (FormatTok->Tok.is(tok::identifier)) 1027 nextToken(); 1028 } 1029 bool HasError = false; 1030 if (FormatTok->Tok.is(tok::l_brace)) { 1031 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) 1032 addUnwrappedLine(); 1033 nextToken(); 1034 addUnwrappedLine(); 1035 ++Line->Level; 1036 do { 1037 switch (FormatTok->Tok.getKind()) { 1038 case tok::l_paren: 1039 parseParens(); 1040 break; 1041 case tok::r_brace: 1042 addUnwrappedLine(); 1043 nextToken(); 1044 --Line->Level; 1045 if (HasError) { 1046 if (FormatTok->is(tok::semi)) 1047 nextToken(); 1048 addUnwrappedLine(); 1049 } 1050 return; 1051 case tok::semi: 1052 HasError = true; 1053 nextToken(); 1054 addUnwrappedLine(); 1055 break; 1056 case tok::comma: 1057 nextToken(); 1058 addUnwrappedLine(); 1059 break; 1060 default: 1061 nextToken(); 1062 break; 1063 } 1064 } while (!eof()); 1065 } 1066 // We fall through to parsing a structural element afterwards, so that in 1067 // enum A {} n, m; 1068 // "} n, m;" will end up in one unwrapped line. 1069 } 1070 1071 void UnwrappedLineParser::parseRecord() { 1072 nextToken(); 1073 if (FormatTok->Tok.is(tok::identifier) || 1074 FormatTok->Tok.is(tok::kw___attribute) || 1075 FormatTok->Tok.is(tok::kw___declspec)) { 1076 nextToken(); 1077 // We can have macros or attributes in between 'class' and the class name. 1078 if (FormatTok->Tok.is(tok::l_paren)) { 1079 parseParens(); 1080 } 1081 // The actual identifier can be a nested name specifier, and in macros 1082 // it is often token-pasted. 1083 while (FormatTok->Tok.is(tok::identifier) || 1084 FormatTok->Tok.is(tok::coloncolon) || 1085 FormatTok->Tok.is(tok::hashhash)) 1086 nextToken(); 1087 1088 // Note that parsing away template declarations here leads to incorrectly 1089 // accepting function declarations as record declarations. 1090 // In general, we cannot solve this problem. Consider: 1091 // class A<int> B() {} 1092 // which can be a function definition or a class definition when B() is a 1093 // macro. If we find enough real-world cases where this is a problem, we 1094 // can parse for the 'template' keyword in the beginning of the statement, 1095 // and thus rule out the record production in case there is no template 1096 // (this would still leave us with an ambiguity between template function 1097 // and class declarations). 1098 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { 1099 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { 1100 if (FormatTok->Tok.is(tok::semi)) 1101 return; 1102 nextToken(); 1103 } 1104 } 1105 } 1106 if (FormatTok->Tok.is(tok::l_brace)) { 1107 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || 1108 Style.BreakBeforeBraces == FormatStyle::BS_Allman) 1109 addUnwrappedLine(); 1110 1111 parseBlock(/*MustBeDeclaration=*/true); 1112 } 1113 // We fall through to parsing a structural element afterwards, so 1114 // class A {} n, m; 1115 // will end up in one unwrapped line. 1116 } 1117 1118 void UnwrappedLineParser::parseObjCProtocolList() { 1119 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 1120 do 1121 nextToken(); 1122 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 1123 nextToken(); // Skip '>'. 1124 } 1125 1126 void UnwrappedLineParser::parseObjCUntilAtEnd() { 1127 do { 1128 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 1129 nextToken(); 1130 addUnwrappedLine(); 1131 break; 1132 } 1133 if (FormatTok->is(tok::l_brace)) { 1134 parseBlock(/*MustBeDeclaration=*/false); 1135 // In ObjC interfaces, nothing should be following the "}". 1136 addUnwrappedLine(); 1137 } else { 1138 parseStructuralElement(); 1139 } 1140 } while (!eof()); 1141 } 1142 1143 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 1144 nextToken(); 1145 nextToken(); // interface name 1146 1147 // @interface can be followed by either a base class, or a category. 1148 if (FormatTok->Tok.is(tok::colon)) { 1149 nextToken(); 1150 nextToken(); // base class name 1151 } else if (FormatTok->Tok.is(tok::l_paren)) 1152 // Skip category, if present. 1153 parseParens(); 1154 1155 if (FormatTok->Tok.is(tok::less)) 1156 parseObjCProtocolList(); 1157 1158 // If instance variables are present, keep the '{' on the first line too. 1159 if (FormatTok->Tok.is(tok::l_brace)) 1160 parseBlock(/*MustBeDeclaration=*/true); 1161 1162 // With instance variables, this puts '}' on its own line. Without instance 1163 // variables, this ends the @interface line. 1164 addUnwrappedLine(); 1165 1166 parseObjCUntilAtEnd(); 1167 } 1168 1169 void UnwrappedLineParser::parseObjCProtocol() { 1170 nextToken(); 1171 nextToken(); // protocol name 1172 1173 if (FormatTok->Tok.is(tok::less)) 1174 parseObjCProtocolList(); 1175 1176 // Check for protocol declaration. 1177 if (FormatTok->Tok.is(tok::semi)) { 1178 nextToken(); 1179 return addUnwrappedLine(); 1180 } 1181 1182 addUnwrappedLine(); 1183 parseObjCUntilAtEnd(); 1184 } 1185 1186 void UnwrappedLineParser::addUnwrappedLine() { 1187 if (Line->Tokens.empty()) 1188 return; 1189 DEBUG({ 1190 llvm::dbgs() << "Line(" << Line->Level << ")" 1191 << (Line->InPPDirective ? " MACRO" : "") << ": "; 1192 for (std::list<FormatToken *>::iterator I = Line->Tokens.begin(), 1193 E = Line->Tokens.end(); 1194 I != E; ++I) { 1195 llvm::dbgs() << (*I)->Tok.getName() << " "; 1196 } 1197 llvm::dbgs() << "\n"; 1198 }); 1199 CurrentLines->push_back(*Line); 1200 Line->Tokens.clear(); 1201 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 1202 for (std::vector<UnwrappedLine>::iterator 1203 I = PreprocessorDirectives.begin(), 1204 E = PreprocessorDirectives.end(); 1205 I != E; ++I) { 1206 CurrentLines->push_back(*I); 1207 } 1208 PreprocessorDirectives.clear(); 1209 } 1210 } 1211 1212 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 1213 1214 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 1215 bool JustComments = Line->Tokens.empty(); 1216 for (SmallVectorImpl<FormatToken *>::const_iterator 1217 I = CommentsBeforeNextToken.begin(), 1218 E = CommentsBeforeNextToken.end(); 1219 I != E; ++I) { 1220 if ((*I)->NewlinesBefore && JustComments) { 1221 addUnwrappedLine(); 1222 } 1223 pushToken(*I); 1224 } 1225 if (NewlineBeforeNext && JustComments) { 1226 addUnwrappedLine(); 1227 } 1228 CommentsBeforeNextToken.clear(); 1229 } 1230 1231 void UnwrappedLineParser::nextToken() { 1232 if (eof()) 1233 return; 1234 flushComments(FormatTok->NewlinesBefore > 0); 1235 pushToken(FormatTok); 1236 readToken(); 1237 } 1238 1239 void UnwrappedLineParser::readToken() { 1240 bool CommentsInCurrentLine = true; 1241 do { 1242 FormatTok = Tokens->getNextToken(); 1243 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 1244 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 1245 // If there is an unfinished unwrapped line, we flush the preprocessor 1246 // directives only after that unwrapped line was finished later. 1247 bool SwitchToPreprocessorLines = 1248 !Line->Tokens.empty() && CurrentLines == &Lines; 1249 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 1250 // Comments stored before the preprocessor directive need to be output 1251 // before the preprocessor directive, at the same level as the 1252 // preprocessor directive, as we consider them to apply to the directive. 1253 flushComments(FormatTok->NewlinesBefore > 0); 1254 parsePPDirective(); 1255 } 1256 1257 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 1258 !Line->InPPDirective) { 1259 continue; 1260 } 1261 1262 if (!FormatTok->Tok.is(tok::comment)) 1263 return; 1264 if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) { 1265 CommentsInCurrentLine = false; 1266 } 1267 if (CommentsInCurrentLine) { 1268 pushToken(FormatTok); 1269 } else { 1270 CommentsBeforeNextToken.push_back(FormatTok); 1271 } 1272 } while (!eof()); 1273 } 1274 1275 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 1276 Line->Tokens.push_back(Tok); 1277 if (MustBreakBeforeNextToken) { 1278 Line->Tokens.back()->MustBreakBefore = true; 1279 MustBreakBeforeNextToken = false; 1280 } 1281 } 1282 1283 } // end namespace format 1284 } // end namespace clang 1285