1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-parser" 17 18 #include "UnwrappedLineParser.h" 19 #include "llvm/Support/Debug.h" 20 21 namespace clang { 22 namespace format { 23 24 class FormatTokenSource { 25 public: 26 virtual ~FormatTokenSource() {} 27 virtual FormatToken *getNextToken() = 0; 28 29 virtual unsigned getPosition() = 0; 30 virtual FormatToken *setPosition(unsigned Position) = 0; 31 }; 32 33 namespace { 34 35 class ScopedDeclarationState { 36 public: 37 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 38 bool MustBeDeclaration) 39 : Line(Line), Stack(Stack) { 40 Line.MustBeDeclaration = MustBeDeclaration; 41 Stack.push_back(MustBeDeclaration); 42 } 43 ~ScopedDeclarationState() { 44 Stack.pop_back(); 45 if (!Stack.empty()) 46 Line.MustBeDeclaration = Stack.back(); 47 else 48 Line.MustBeDeclaration = true; 49 } 50 51 private: 52 UnwrappedLine &Line; 53 std::vector<bool> &Stack; 54 }; 55 56 class ScopedMacroState : public FormatTokenSource { 57 public: 58 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 59 FormatToken *&ResetToken, bool &StructuralError) 60 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 61 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 62 StructuralError(StructuralError), 63 PreviousStructuralError(StructuralError), Token(NULL) { 64 TokenSource = this; 65 Line.Level = 0; 66 Line.InPPDirective = true; 67 } 68 69 ~ScopedMacroState() { 70 TokenSource = PreviousTokenSource; 71 ResetToken = Token; 72 Line.InPPDirective = false; 73 Line.Level = PreviousLineLevel; 74 StructuralError = PreviousStructuralError; 75 } 76 77 virtual FormatToken *getNextToken() { 78 // The \c UnwrappedLineParser guards against this by never calling 79 // \c getNextToken() after it has encountered the first eof token. 80 assert(!eof()); 81 Token = PreviousTokenSource->getNextToken(); 82 if (eof()) 83 return getFakeEOF(); 84 return Token; 85 } 86 87 virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); } 88 89 virtual FormatToken *setPosition(unsigned Position) { 90 Token = PreviousTokenSource->setPosition(Position); 91 return Token; 92 } 93 94 private: 95 bool eof() { return Token && Token->HasUnescapedNewline; } 96 97 FormatToken *getFakeEOF() { 98 static bool EOFInitialized = false; 99 static FormatToken FormatTok; 100 if (!EOFInitialized) { 101 FormatTok.Tok.startToken(); 102 FormatTok.Tok.setKind(tok::eof); 103 EOFInitialized = true; 104 } 105 return &FormatTok; 106 } 107 108 UnwrappedLine &Line; 109 FormatTokenSource *&TokenSource; 110 FormatToken *&ResetToken; 111 unsigned PreviousLineLevel; 112 FormatTokenSource *PreviousTokenSource; 113 bool &StructuralError; 114 bool PreviousStructuralError; 115 116 FormatToken *Token; 117 }; 118 119 } // end anonymous namespace 120 121 class ScopedLineState { 122 public: 123 ScopedLineState(UnwrappedLineParser &Parser, 124 bool SwitchToPreprocessorLines = false) 125 : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) { 126 if (SwitchToPreprocessorLines) 127 Parser.CurrentLines = &Parser.PreprocessorDirectives; 128 PreBlockLine = Parser.Line.take(); 129 Parser.Line.reset(new UnwrappedLine()); 130 Parser.Line->Level = PreBlockLine->Level; 131 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 132 } 133 134 ~ScopedLineState() { 135 if (!Parser.Line->Tokens.empty()) { 136 Parser.addUnwrappedLine(); 137 } 138 assert(Parser.Line->Tokens.empty()); 139 Parser.Line.reset(PreBlockLine); 140 Parser.MustBreakBeforeNextToken = true; 141 if (SwitchToPreprocessorLines) 142 Parser.CurrentLines = &Parser.Lines; 143 } 144 145 private: 146 UnwrappedLineParser &Parser; 147 const bool SwitchToPreprocessorLines; 148 149 UnwrappedLine *PreBlockLine; 150 }; 151 152 namespace { 153 154 class IndexedTokenSource : public FormatTokenSource { 155 public: 156 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 157 : Tokens(Tokens), Position(-1) {} 158 159 virtual FormatToken *getNextToken() { 160 ++Position; 161 return Tokens[Position]; 162 } 163 164 virtual unsigned getPosition() { 165 assert(Position >= 0); 166 return Position; 167 } 168 169 virtual FormatToken *setPosition(unsigned P) { 170 Position = P; 171 return Tokens[Position]; 172 } 173 174 private: 175 ArrayRef<FormatToken *> Tokens; 176 int Position; 177 }; 178 179 } // end anonymous namespace 180 181 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 182 ArrayRef<FormatToken *> Tokens, 183 UnwrappedLineConsumer &Callback) 184 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 185 CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL), 186 Callback(Callback), AllTokens(Tokens) {} 187 188 bool UnwrappedLineParser::parse() { 189 DEBUG(llvm::dbgs() << "----\n"); 190 IndexedTokenSource TokenSource(AllTokens); 191 Tokens = &TokenSource; 192 readToken(); 193 parseFile(); 194 for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end(); 195 I != E; ++I) { 196 Callback.consumeUnwrappedLine(*I); 197 } 198 199 // Create line with eof token. 200 pushToken(FormatTok); 201 Callback.consumeUnwrappedLine(*Line); 202 return StructuralError; 203 } 204 205 void UnwrappedLineParser::parseFile() { 206 ScopedDeclarationState DeclarationState( 207 *Line, DeclarationScopeStack, 208 /*MustBeDeclaration=*/ !Line->InPPDirective); 209 parseLevel(/*HasOpeningBrace=*/false); 210 // Make sure to format the remaining tokens. 211 flushComments(true); 212 addUnwrappedLine(); 213 } 214 215 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 216 do { 217 switch (FormatTok->Tok.getKind()) { 218 case tok::comment: 219 nextToken(); 220 addUnwrappedLine(); 221 break; 222 case tok::l_brace: 223 // FIXME: Add parameter whether this can happen - if this happens, we must 224 // be in a non-declaration context. 225 parseBlock(/*MustBeDeclaration=*/false); 226 addUnwrappedLine(); 227 break; 228 case tok::r_brace: 229 if (HasOpeningBrace) 230 return; 231 StructuralError = true; 232 nextToken(); 233 addUnwrappedLine(); 234 break; 235 default: 236 parseStructuralElement(); 237 break; 238 } 239 } while (!eof()); 240 } 241 242 void UnwrappedLineParser::calculateBraceTypes() { 243 // We'll parse forward through the tokens until we hit 244 // a closing brace or eof - note that getNextToken() will 245 // parse macros, so this will magically work inside macro 246 // definitions, too. 247 unsigned StoredPosition = Tokens->getPosition(); 248 unsigned Position = StoredPosition; 249 FormatToken *Tok = FormatTok; 250 // Keep a stack of positions of lbrace tokens. We will 251 // update information about whether an lbrace starts a 252 // braced init list or a different block during the loop. 253 SmallVector<FormatToken *, 8> LBraceStack; 254 assert(Tok->Tok.is(tok::l_brace)); 255 do { 256 // Get next none-comment token. 257 FormatToken *NextTok; 258 unsigned ReadTokens = 0; 259 do { 260 NextTok = Tokens->getNextToken(); 261 ++ReadTokens; 262 } while (NextTok->is(tok::comment)); 263 264 switch (Tok->Tok.getKind()) { 265 case tok::l_brace: 266 LBraceStack.push_back(Tok); 267 break; 268 case tok::r_brace: 269 if (!LBraceStack.empty()) { 270 if (LBraceStack.back()->BlockKind == BK_Unknown) { 271 // If there is a comma, semicolon or right paren after the closing 272 // brace, we assume this is a braced initializer list. 273 274 // FIXME: Note that this currently works only because we do not 275 // use the brace information while inside a braced init list. 276 // Thus, if the parent is a braced init list, we consider all 277 // brace blocks inside it braced init list. That works good enough 278 // for now, but we will need to fix it to correctly handle lambdas. 279 if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren, 280 tok::l_brace, tok::colon)) { 281 Tok->BlockKind = BK_BracedInit; 282 LBraceStack.back()->BlockKind = BK_BracedInit; 283 } else { 284 Tok->BlockKind = BK_Block; 285 LBraceStack.back()->BlockKind = BK_Block; 286 } 287 } 288 LBraceStack.pop_back(); 289 } 290 break; 291 case tok::semi: 292 case tok::kw_if: 293 case tok::kw_while: 294 case tok::kw_for: 295 case tok::kw_switch: 296 case tok::kw_try: 297 if (!LBraceStack.empty()) 298 LBraceStack.back()->BlockKind = BK_Block; 299 break; 300 default: 301 break; 302 } 303 Tok = NextTok; 304 Position += ReadTokens; 305 } while (Tok->Tok.isNot(tok::eof)); 306 // Assume other blocks for all unclosed opening braces. 307 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 308 if (LBraceStack[i]->BlockKind == BK_Unknown) 309 LBraceStack[i]->BlockKind = BK_Block; 310 } 311 FormatTok = Tokens->setPosition(StoredPosition); 312 } 313 314 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 315 unsigned AddLevels) { 316 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); 317 nextToken(); 318 319 addUnwrappedLine(); 320 321 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 322 MustBeDeclaration); 323 Line->Level += AddLevels; 324 parseLevel(/*HasOpeningBrace=*/true); 325 326 if (!FormatTok->Tok.is(tok::r_brace)) { 327 Line->Level -= AddLevels; 328 StructuralError = true; 329 return; 330 } 331 332 nextToken(); // Munch the closing brace. 333 Line->Level -= AddLevels; 334 } 335 336 void UnwrappedLineParser::parsePPDirective() { 337 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 338 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); 339 nextToken(); 340 341 if (FormatTok->Tok.getIdentifierInfo() == NULL) { 342 parsePPUnknown(); 343 return; 344 } 345 346 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 347 case tok::pp_define: 348 parsePPDefine(); 349 return; 350 case tok::pp_if: 351 parsePPIf(); 352 break; 353 case tok::pp_ifdef: 354 case tok::pp_ifndef: 355 parsePPIfdef(); 356 break; 357 case tok::pp_else: 358 parsePPElse(); 359 break; 360 case tok::pp_elif: 361 parsePPElIf(); 362 break; 363 case tok::pp_endif: 364 parsePPEndIf(); 365 break; 366 default: 367 parsePPUnknown(); 368 break; 369 } 370 } 371 372 void UnwrappedLineParser::pushPPConditional() { 373 if (!PPStack.empty() && PPStack.back() == PP_Unreachable) 374 PPStack.push_back(PP_Unreachable); 375 else 376 PPStack.push_back(PP_Conditional); 377 } 378 379 void UnwrappedLineParser::parsePPIf() { 380 nextToken(); 381 if ((FormatTok->Tok.isLiteral() && 382 StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) == 383 "0") || 384 FormatTok->Tok.is(tok::kw_false)) { 385 PPStack.push_back(PP_Unreachable); 386 } else { 387 pushPPConditional(); 388 } 389 parsePPUnknown(); 390 } 391 392 void UnwrappedLineParser::parsePPIfdef() { 393 pushPPConditional(); 394 parsePPUnknown(); 395 } 396 397 void UnwrappedLineParser::parsePPElse() { 398 if (!PPStack.empty()) 399 PPStack.pop_back(); 400 pushPPConditional(); 401 parsePPUnknown(); 402 } 403 404 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 405 406 void UnwrappedLineParser::parsePPEndIf() { 407 if (!PPStack.empty()) 408 PPStack.pop_back(); 409 parsePPUnknown(); 410 } 411 412 void UnwrappedLineParser::parsePPDefine() { 413 nextToken(); 414 415 if (FormatTok->Tok.getKind() != tok::identifier) { 416 parsePPUnknown(); 417 return; 418 } 419 nextToken(); 420 if (FormatTok->Tok.getKind() == tok::l_paren && 421 FormatTok->WhitespaceRange.getBegin() == 422 FormatTok->WhitespaceRange.getEnd()) { 423 parseParens(); 424 } 425 addUnwrappedLine(); 426 Line->Level = 1; 427 428 // Errors during a preprocessor directive can only affect the layout of the 429 // preprocessor directive, and thus we ignore them. An alternative approach 430 // would be to use the same approach we use on the file level (no 431 // re-indentation if there was a structural error) within the macro 432 // definition. 433 parseFile(); 434 } 435 436 void UnwrappedLineParser::parsePPUnknown() { 437 do { 438 nextToken(); 439 } while (!eof()); 440 addUnwrappedLine(); 441 } 442 443 // Here we blacklist certain tokens that are not usually the first token in an 444 // unwrapped line. This is used in attempt to distinguish macro calls without 445 // trailing semicolons from other constructs split to several lines. 446 bool tokenCanStartNewLine(clang::Token Tok) { 447 // Semicolon can be a null-statement, l_square can be a start of a macro or 448 // a C++11 attribute, but this doesn't seem to be common. 449 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 450 Tok.isNot(tok::l_square) && 451 // Tokens that can only be used as binary operators and a part of 452 // overloaded operator names. 453 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 454 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 455 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 456 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 457 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 458 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 459 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 460 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 461 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 462 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 463 Tok.isNot(tok::lesslessequal) && 464 // Colon is used in labels, base class lists, initializer lists, 465 // range-based for loops, ternary operator, but should never be the 466 // first token in an unwrapped line. 467 Tok.isNot(tok::colon); 468 } 469 470 void UnwrappedLineParser::parseStructuralElement() { 471 assert(!FormatTok->Tok.is(tok::l_brace)); 472 switch (FormatTok->Tok.getKind()) { 473 case tok::at: 474 nextToken(); 475 if (FormatTok->Tok.is(tok::l_brace)) { 476 parseBracedList(); 477 break; 478 } 479 switch (FormatTok->Tok.getObjCKeywordID()) { 480 case tok::objc_public: 481 case tok::objc_protected: 482 case tok::objc_package: 483 case tok::objc_private: 484 return parseAccessSpecifier(); 485 case tok::objc_interface: 486 case tok::objc_implementation: 487 return parseObjCInterfaceOrImplementation(); 488 case tok::objc_protocol: 489 return parseObjCProtocol(); 490 case tok::objc_end: 491 return; // Handled by the caller. 492 case tok::objc_optional: 493 case tok::objc_required: 494 nextToken(); 495 addUnwrappedLine(); 496 return; 497 default: 498 break; 499 } 500 break; 501 case tok::kw_namespace: 502 parseNamespace(); 503 return; 504 case tok::kw_inline: 505 nextToken(); 506 if (FormatTok->Tok.is(tok::kw_namespace)) { 507 parseNamespace(); 508 return; 509 } 510 break; 511 case tok::kw_public: 512 case tok::kw_protected: 513 case tok::kw_private: 514 parseAccessSpecifier(); 515 return; 516 case tok::kw_if: 517 parseIfThenElse(); 518 return; 519 case tok::kw_for: 520 case tok::kw_while: 521 parseForOrWhileLoop(); 522 return; 523 case tok::kw_do: 524 parseDoWhile(); 525 return; 526 case tok::kw_switch: 527 parseSwitch(); 528 return; 529 case tok::kw_default: 530 nextToken(); 531 parseLabel(); 532 return; 533 case tok::kw_case: 534 parseCaseLabel(); 535 return; 536 case tok::kw_return: 537 parseReturn(); 538 return; 539 case tok::kw_extern: 540 nextToken(); 541 if (FormatTok->Tok.is(tok::string_literal)) { 542 nextToken(); 543 if (FormatTok->Tok.is(tok::l_brace)) { 544 parseBlock(/*MustBeDeclaration=*/true, 0); 545 addUnwrappedLine(); 546 return; 547 } 548 } 549 // In all other cases, parse the declaration. 550 break; 551 default: 552 break; 553 } 554 do { 555 switch (FormatTok->Tok.getKind()) { 556 case tok::at: 557 nextToken(); 558 if (FormatTok->Tok.is(tok::l_brace)) 559 parseBracedList(); 560 break; 561 case tok::kw_enum: 562 parseEnum(); 563 break; 564 case tok::kw_struct: 565 case tok::kw_union: 566 case tok::kw_class: 567 parseRecord(); 568 // A record declaration or definition is always the start of a structural 569 // element. 570 break; 571 case tok::semi: 572 nextToken(); 573 addUnwrappedLine(); 574 return; 575 case tok::r_brace: 576 addUnwrappedLine(); 577 return; 578 case tok::l_paren: 579 parseParens(); 580 break; 581 case tok::l_brace: 582 if (!tryToParseBracedList()) { 583 // A block outside of parentheses must be the last part of a 584 // structural element. 585 // FIXME: Figure out cases where this is not true, and add projections 586 // for them (the one we know is missing are lambdas). 587 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || 588 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) 589 addUnwrappedLine(); 590 parseBlock(/*MustBeDeclaration=*/false); 591 addUnwrappedLine(); 592 return; 593 } 594 // Otherwise this was a braced init list, and the structural 595 // element continues. 596 break; 597 case tok::identifier: { 598 StringRef Text = FormatTok->TokenText; 599 nextToken(); 600 if (Line->Tokens.size() == 1) { 601 if (FormatTok->Tok.is(tok::colon)) { 602 parseLabel(); 603 return; 604 } 605 // Recognize function-like macro usages without trailing semicolon. 606 if (FormatTok->Tok.is(tok::l_paren)) { 607 parseParens(); 608 if (FormatTok->HasUnescapedNewline && 609 tokenCanStartNewLine(FormatTok->Tok)) { 610 addUnwrappedLine(); 611 return; 612 } 613 } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 && 614 Text == Text.upper()) { 615 // Recognize free-standing macros like Q_OBJECT. 616 addUnwrappedLine(); 617 return; 618 } 619 } 620 break; 621 } 622 case tok::equal: 623 nextToken(); 624 if (FormatTok->Tok.is(tok::l_brace)) { 625 parseBracedList(); 626 } 627 break; 628 default: 629 nextToken(); 630 break; 631 } 632 } while (!eof()); 633 } 634 635 bool UnwrappedLineParser::tryToParseBracedList() { 636 if (FormatTok->BlockKind == BK_Unknown) 637 calculateBraceTypes(); 638 assert(FormatTok->BlockKind != BK_Unknown); 639 if (FormatTok->BlockKind == BK_Block) 640 return false; 641 parseBracedList(); 642 return true; 643 } 644 645 void UnwrappedLineParser::parseBracedList() { 646 nextToken(); 647 648 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 649 // replace this by using parseAssigmentExpression() inside. 650 do { 651 // FIXME: When we start to support lambdas, we'll want to parse them away 652 // here, otherwise our bail-out scenarios below break. The better solution 653 // might be to just implement a more or less complete expression parser. 654 switch (FormatTok->Tok.getKind()) { 655 case tok::l_brace: 656 parseBracedList(); 657 break; 658 case tok::r_brace: 659 nextToken(); 660 return; 661 case tok::semi: 662 // Probably a missing closing brace. Bail out. 663 return; 664 case tok::comma: 665 nextToken(); 666 break; 667 default: 668 nextToken(); 669 break; 670 } 671 } while (!eof()); 672 } 673 674 void UnwrappedLineParser::parseReturn() { 675 nextToken(); 676 677 do { 678 switch (FormatTok->Tok.getKind()) { 679 case tok::l_brace: 680 parseBracedList(); 681 if (FormatTok->Tok.isNot(tok::semi)) { 682 // Assume missing ';'. 683 addUnwrappedLine(); 684 return; 685 } 686 break; 687 case tok::l_paren: 688 parseParens(); 689 break; 690 case tok::r_brace: 691 // Assume missing ';'. 692 addUnwrappedLine(); 693 return; 694 case tok::semi: 695 nextToken(); 696 addUnwrappedLine(); 697 return; 698 default: 699 nextToken(); 700 break; 701 } 702 } while (!eof()); 703 } 704 705 void UnwrappedLineParser::parseParens() { 706 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 707 nextToken(); 708 do { 709 switch (FormatTok->Tok.getKind()) { 710 case tok::l_paren: 711 parseParens(); 712 break; 713 case tok::r_paren: 714 nextToken(); 715 return; 716 case tok::r_brace: 717 // A "}" inside parenthesis is an error if there wasn't a matching "{". 718 return; 719 case tok::l_brace: { 720 if (!tryToParseBracedList()) { 721 nextToken(); 722 { 723 ScopedLineState LineState(*this); 724 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 725 /*MustBeDeclaration=*/false); 726 Line->Level += 1; 727 parseLevel(/*HasOpeningBrace=*/true); 728 Line->Level -= 1; 729 } 730 nextToken(); 731 } 732 break; 733 } 734 case tok::at: 735 nextToken(); 736 if (FormatTok->Tok.is(tok::l_brace)) 737 parseBracedList(); 738 break; 739 default: 740 nextToken(); 741 break; 742 } 743 } while (!eof()); 744 } 745 746 void UnwrappedLineParser::parseIfThenElse() { 747 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 748 nextToken(); 749 if (FormatTok->Tok.is(tok::l_paren)) 750 parseParens(); 751 bool NeedsUnwrappedLine = false; 752 if (FormatTok->Tok.is(tok::l_brace)) { 753 parseBlock(/*MustBeDeclaration=*/false); 754 NeedsUnwrappedLine = true; 755 } else { 756 addUnwrappedLine(); 757 ++Line->Level; 758 parseStructuralElement(); 759 --Line->Level; 760 } 761 if (FormatTok->Tok.is(tok::kw_else)) { 762 nextToken(); 763 if (FormatTok->Tok.is(tok::l_brace)) { 764 parseBlock(/*MustBeDeclaration=*/false); 765 addUnwrappedLine(); 766 } else if (FormatTok->Tok.is(tok::kw_if)) { 767 parseIfThenElse(); 768 } else { 769 addUnwrappedLine(); 770 ++Line->Level; 771 parseStructuralElement(); 772 --Line->Level; 773 } 774 } else if (NeedsUnwrappedLine) { 775 addUnwrappedLine(); 776 } 777 } 778 779 void UnwrappedLineParser::parseNamespace() { 780 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 781 nextToken(); 782 if (FormatTok->Tok.is(tok::identifier)) 783 nextToken(); 784 if (FormatTok->Tok.is(tok::l_brace)) { 785 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux) 786 addUnwrappedLine(); 787 788 parseBlock(/*MustBeDeclaration=*/true, 0); 789 // Munch the semicolon after a namespace. This is more common than one would 790 // think. Puttin the semicolon into its own line is very ugly. 791 if (FormatTok->Tok.is(tok::semi)) 792 nextToken(); 793 addUnwrappedLine(); 794 } 795 // FIXME: Add error handling. 796 } 797 798 void UnwrappedLineParser::parseForOrWhileLoop() { 799 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) && 800 "'for' or 'while' expected"); 801 nextToken(); 802 if (FormatTok->Tok.is(tok::l_paren)) 803 parseParens(); 804 if (FormatTok->Tok.is(tok::l_brace)) { 805 parseBlock(/*MustBeDeclaration=*/false); 806 addUnwrappedLine(); 807 } else { 808 addUnwrappedLine(); 809 ++Line->Level; 810 parseStructuralElement(); 811 --Line->Level; 812 } 813 } 814 815 void UnwrappedLineParser::parseDoWhile() { 816 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 817 nextToken(); 818 if (FormatTok->Tok.is(tok::l_brace)) { 819 parseBlock(/*MustBeDeclaration=*/false); 820 } else { 821 addUnwrappedLine(); 822 ++Line->Level; 823 parseStructuralElement(); 824 --Line->Level; 825 } 826 827 // FIXME: Add error handling. 828 if (!FormatTok->Tok.is(tok::kw_while)) { 829 addUnwrappedLine(); 830 return; 831 } 832 833 nextToken(); 834 parseStructuralElement(); 835 } 836 837 void UnwrappedLineParser::parseLabel() { 838 if (FormatTok->Tok.isNot(tok::colon)) 839 return; 840 nextToken(); 841 unsigned OldLineLevel = Line->Level; 842 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 843 --Line->Level; 844 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 845 parseBlock(/*MustBeDeclaration=*/false); 846 if (FormatTok->Tok.is(tok::kw_break)) 847 parseStructuralElement(); // "break;" after "}" goes on the same line. 848 } 849 addUnwrappedLine(); 850 Line->Level = OldLineLevel; 851 } 852 853 void UnwrappedLineParser::parseCaseLabel() { 854 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 855 // FIXME: fix handling of complex expressions here. 856 do { 857 nextToken(); 858 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 859 parseLabel(); 860 } 861 862 void UnwrappedLineParser::parseSwitch() { 863 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 864 nextToken(); 865 if (FormatTok->Tok.is(tok::l_paren)) 866 parseParens(); 867 if (FormatTok->Tok.is(tok::l_brace)) { 868 parseBlock(/*MustBeDeclaration=*/false, Style.IndentCaseLabels ? 2 : 1); 869 addUnwrappedLine(); 870 } else { 871 addUnwrappedLine(); 872 Line->Level += (Style.IndentCaseLabels ? 2 : 1); 873 parseStructuralElement(); 874 Line->Level -= (Style.IndentCaseLabels ? 2 : 1); 875 } 876 } 877 878 void UnwrappedLineParser::parseAccessSpecifier() { 879 nextToken(); 880 // Otherwise, we don't know what it is, and we'd better keep the next token. 881 if (FormatTok->Tok.is(tok::colon)) 882 nextToken(); 883 addUnwrappedLine(); 884 } 885 886 void UnwrappedLineParser::parseEnum() { 887 nextToken(); 888 if (FormatTok->Tok.is(tok::identifier) || 889 FormatTok->Tok.is(tok::kw___attribute) || 890 FormatTok->Tok.is(tok::kw___declspec)) { 891 nextToken(); 892 // We can have macros or attributes in between 'enum' and the enum name. 893 if (FormatTok->Tok.is(tok::l_paren)) { 894 parseParens(); 895 } 896 if (FormatTok->Tok.is(tok::identifier)) 897 nextToken(); 898 } 899 if (FormatTok->Tok.is(tok::l_brace)) { 900 nextToken(); 901 addUnwrappedLine(); 902 ++Line->Level; 903 do { 904 switch (FormatTok->Tok.getKind()) { 905 case tok::l_paren: 906 parseParens(); 907 break; 908 case tok::r_brace: 909 addUnwrappedLine(); 910 nextToken(); 911 --Line->Level; 912 return; 913 case tok::comma: 914 nextToken(); 915 addUnwrappedLine(); 916 break; 917 default: 918 nextToken(); 919 break; 920 } 921 } while (!eof()); 922 } 923 // We fall through to parsing a structural element afterwards, so that in 924 // enum A {} n, m; 925 // "} n, m;" will end up in one unwrapped line. 926 } 927 928 void UnwrappedLineParser::parseRecord() { 929 nextToken(); 930 if (FormatTok->Tok.is(tok::identifier) || 931 FormatTok->Tok.is(tok::kw___attribute) || 932 FormatTok->Tok.is(tok::kw___declspec)) { 933 nextToken(); 934 // We can have macros or attributes in between 'class' and the class name. 935 if (FormatTok->Tok.is(tok::l_paren)) { 936 parseParens(); 937 } 938 // The actual identifier can be a nested name specifier, and in macros 939 // it is often token-pasted. 940 while (FormatTok->Tok.is(tok::identifier) || 941 FormatTok->Tok.is(tok::coloncolon) || 942 FormatTok->Tok.is(tok::hashhash)) 943 nextToken(); 944 945 // Note that parsing away template declarations here leads to incorrectly 946 // accepting function declarations as record declarations. 947 // In general, we cannot solve this problem. Consider: 948 // class A<int> B() {} 949 // which can be a function definition or a class definition when B() is a 950 // macro. If we find enough real-world cases where this is a problem, we 951 // can parse for the 'template' keyword in the beginning of the statement, 952 // and thus rule out the record production in case there is no template 953 // (this would still leave us with an ambiguity between template function 954 // and class declarations). 955 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { 956 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { 957 if (FormatTok->Tok.is(tok::semi)) 958 return; 959 nextToken(); 960 } 961 } 962 } 963 if (FormatTok->Tok.is(tok::l_brace)) { 964 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux) 965 addUnwrappedLine(); 966 967 parseBlock(/*MustBeDeclaration=*/true); 968 } 969 // We fall through to parsing a structural element afterwards, so 970 // class A {} n, m; 971 // will end up in one unwrapped line. 972 } 973 974 void UnwrappedLineParser::parseObjCProtocolList() { 975 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 976 do 977 nextToken(); 978 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 979 nextToken(); // Skip '>'. 980 } 981 982 void UnwrappedLineParser::parseObjCUntilAtEnd() { 983 do { 984 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 985 nextToken(); 986 addUnwrappedLine(); 987 break; 988 } 989 parseStructuralElement(); 990 } while (!eof()); 991 } 992 993 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 994 nextToken(); 995 nextToken(); // interface name 996 997 // @interface can be followed by either a base class, or a category. 998 if (FormatTok->Tok.is(tok::colon)) { 999 nextToken(); 1000 nextToken(); // base class name 1001 } else if (FormatTok->Tok.is(tok::l_paren)) 1002 // Skip category, if present. 1003 parseParens(); 1004 1005 if (FormatTok->Tok.is(tok::less)) 1006 parseObjCProtocolList(); 1007 1008 // If instance variables are present, keep the '{' on the first line too. 1009 if (FormatTok->Tok.is(tok::l_brace)) 1010 parseBlock(/*MustBeDeclaration=*/true); 1011 1012 // With instance variables, this puts '}' on its own line. Without instance 1013 // variables, this ends the @interface line. 1014 addUnwrappedLine(); 1015 1016 parseObjCUntilAtEnd(); 1017 } 1018 1019 void UnwrappedLineParser::parseObjCProtocol() { 1020 nextToken(); 1021 nextToken(); // protocol name 1022 1023 if (FormatTok->Tok.is(tok::less)) 1024 parseObjCProtocolList(); 1025 1026 // Check for protocol declaration. 1027 if (FormatTok->Tok.is(tok::semi)) { 1028 nextToken(); 1029 return addUnwrappedLine(); 1030 } 1031 1032 addUnwrappedLine(); 1033 parseObjCUntilAtEnd(); 1034 } 1035 1036 void UnwrappedLineParser::addUnwrappedLine() { 1037 if (Line->Tokens.empty()) 1038 return; 1039 DEBUG({ 1040 llvm::dbgs() << "Line(" << Line->Level << ")" 1041 << (Line->InPPDirective ? " MACRO" : "") << ": "; 1042 for (std::list<FormatToken *>::iterator I = Line->Tokens.begin(), 1043 E = Line->Tokens.end(); 1044 I != E; ++I) { 1045 llvm::dbgs() << (*I)->Tok.getName() << " "; 1046 } 1047 llvm::dbgs() << "\n"; 1048 }); 1049 CurrentLines->push_back(*Line); 1050 Line->Tokens.clear(); 1051 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 1052 for (std::vector<UnwrappedLine>::iterator 1053 I = PreprocessorDirectives.begin(), 1054 E = PreprocessorDirectives.end(); 1055 I != E; ++I) { 1056 CurrentLines->push_back(*I); 1057 } 1058 PreprocessorDirectives.clear(); 1059 } 1060 } 1061 1062 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 1063 1064 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 1065 bool JustComments = Line->Tokens.empty(); 1066 for (SmallVectorImpl<FormatToken *>::const_iterator 1067 I = CommentsBeforeNextToken.begin(), 1068 E = CommentsBeforeNextToken.end(); 1069 I != E; ++I) { 1070 if ((*I)->NewlinesBefore && JustComments) { 1071 addUnwrappedLine(); 1072 } 1073 pushToken(*I); 1074 } 1075 if (NewlineBeforeNext && JustComments) { 1076 addUnwrappedLine(); 1077 } 1078 CommentsBeforeNextToken.clear(); 1079 } 1080 1081 void UnwrappedLineParser::nextToken() { 1082 if (eof()) 1083 return; 1084 flushComments(FormatTok->NewlinesBefore > 0); 1085 pushToken(FormatTok); 1086 readToken(); 1087 } 1088 1089 void UnwrappedLineParser::readToken() { 1090 bool CommentsInCurrentLine = true; 1091 do { 1092 FormatTok = Tokens->getNextToken(); 1093 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 1094 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 1095 // If there is an unfinished unwrapped line, we flush the preprocessor 1096 // directives only after that unwrapped line was finished later. 1097 bool SwitchToPreprocessorLines = 1098 !Line->Tokens.empty() && CurrentLines == &Lines; 1099 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 1100 // Comments stored before the preprocessor directive need to be output 1101 // before the preprocessor directive, at the same level as the 1102 // preprocessor directive, as we consider them to apply to the directive. 1103 flushComments(FormatTok->NewlinesBefore > 0); 1104 parsePPDirective(); 1105 } 1106 1107 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 1108 !Line->InPPDirective) { 1109 continue; 1110 } 1111 1112 if (!FormatTok->Tok.is(tok::comment)) 1113 return; 1114 if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) { 1115 CommentsInCurrentLine = false; 1116 } 1117 if (CommentsInCurrentLine) { 1118 pushToken(FormatTok); 1119 } else { 1120 CommentsBeforeNextToken.push_back(FormatTok); 1121 } 1122 } while (!eof()); 1123 } 1124 1125 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 1126 Line->Tokens.push_back(Tok); 1127 if (MustBreakBeforeNextToken) { 1128 Line->Tokens.back()->MustBreakBefore = true; 1129 MustBreakBeforeNextToken = false; 1130 } 1131 } 1132 1133 } // end namespace format 1134 } // end namespace clang 1135