1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-parser" 17 18 #include "UnwrappedLineParser.h" 19 #include "llvm/Support/Debug.h" 20 21 namespace clang { 22 namespace format { 23 24 class FormatTokenSource { 25 public: 26 virtual ~FormatTokenSource() {} 27 virtual FormatToken *getNextToken() = 0; 28 29 virtual unsigned getPosition() = 0; 30 virtual FormatToken *setPosition(unsigned Position) = 0; 31 }; 32 33 class ScopedDeclarationState { 34 public: 35 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 36 bool MustBeDeclaration) 37 : Line(Line), Stack(Stack) { 38 Line.MustBeDeclaration = MustBeDeclaration; 39 Stack.push_back(MustBeDeclaration); 40 } 41 ~ScopedDeclarationState() { 42 Stack.pop_back(); 43 if (!Stack.empty()) 44 Line.MustBeDeclaration = Stack.back(); 45 else 46 Line.MustBeDeclaration = true; 47 } 48 49 private: 50 UnwrappedLine &Line; 51 std::vector<bool> &Stack; 52 }; 53 54 class ScopedMacroState : public FormatTokenSource { 55 public: 56 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 57 FormatToken *&ResetToken, bool &StructuralError) 58 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 59 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 60 StructuralError(StructuralError), 61 PreviousStructuralError(StructuralError), Token(NULL) { 62 TokenSource = this; 63 Line.Level = 0; 64 Line.InPPDirective = true; 65 } 66 67 ~ScopedMacroState() { 68 TokenSource = PreviousTokenSource; 69 ResetToken = Token; 70 Line.InPPDirective = false; 71 Line.Level = PreviousLineLevel; 72 StructuralError = PreviousStructuralError; 73 } 74 75 virtual FormatToken *getNextToken() { 76 // The \c UnwrappedLineParser guards against this by never calling 77 // \c getNextToken() after it has encountered the first eof token. 78 assert(!eof()); 79 Token = PreviousTokenSource->getNextToken(); 80 if (eof()) 81 return getFakeEOF(); 82 return Token; 83 } 84 85 virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); } 86 87 virtual FormatToken *setPosition(unsigned Position) { 88 Token = PreviousTokenSource->setPosition(Position); 89 return Token; 90 } 91 92 private: 93 bool eof() { return Token && Token->HasUnescapedNewline; } 94 95 FormatToken *getFakeEOF() { 96 static bool EOFInitialized = false; 97 static FormatToken FormatTok; 98 if (!EOFInitialized) { 99 FormatTok.Tok.startToken(); 100 FormatTok.Tok.setKind(tok::eof); 101 EOFInitialized = true; 102 } 103 return &FormatTok; 104 } 105 106 UnwrappedLine &Line; 107 FormatTokenSource *&TokenSource; 108 FormatToken *&ResetToken; 109 unsigned PreviousLineLevel; 110 FormatTokenSource *PreviousTokenSource; 111 bool &StructuralError; 112 bool PreviousStructuralError; 113 114 FormatToken *Token; 115 }; 116 117 class ScopedLineState { 118 public: 119 ScopedLineState(UnwrappedLineParser &Parser, 120 bool SwitchToPreprocessorLines = false) 121 : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) { 122 if (SwitchToPreprocessorLines) 123 Parser.CurrentLines = &Parser.PreprocessorDirectives; 124 PreBlockLine = Parser.Line.take(); 125 Parser.Line.reset(new UnwrappedLine()); 126 Parser.Line->Level = PreBlockLine->Level; 127 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 128 } 129 130 ~ScopedLineState() { 131 if (!Parser.Line->Tokens.empty()) { 132 Parser.addUnwrappedLine(); 133 } 134 assert(Parser.Line->Tokens.empty()); 135 Parser.Line.reset(PreBlockLine); 136 Parser.MustBreakBeforeNextToken = true; 137 if (SwitchToPreprocessorLines) 138 Parser.CurrentLines = &Parser.Lines; 139 } 140 141 private: 142 UnwrappedLineParser &Parser; 143 const bool SwitchToPreprocessorLines; 144 145 UnwrappedLine *PreBlockLine; 146 }; 147 148 class IndexedTokenSource : public FormatTokenSource { 149 public: 150 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 151 : Tokens(Tokens), Position(-1) {} 152 153 virtual FormatToken *getNextToken() { 154 ++Position; 155 return Tokens[Position]; 156 } 157 158 virtual unsigned getPosition() { 159 assert(Position >= 0); 160 return Position; 161 } 162 163 virtual FormatToken *setPosition(unsigned P) { 164 Position = P; 165 return Tokens[Position]; 166 } 167 168 private: 169 ArrayRef<FormatToken *> Tokens; 170 int Position; 171 }; 172 173 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 174 ArrayRef<FormatToken *> Tokens, 175 UnwrappedLineConsumer &Callback) 176 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 177 CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL), 178 Callback(Callback), AllTokens(Tokens) { 179 LBraces.resize(Tokens.size(), BS_Unknown); 180 } 181 182 bool UnwrappedLineParser::parse() { 183 DEBUG(llvm::dbgs() << "----\n"); 184 IndexedTokenSource TokenSource(AllTokens); 185 Tokens = &TokenSource; 186 readToken(); 187 parseFile(); 188 for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end(); 189 I != E; ++I) { 190 Callback.consumeUnwrappedLine(*I); 191 } 192 193 // Create line with eof token. 194 pushToken(FormatTok); 195 Callback.consumeUnwrappedLine(*Line); 196 return StructuralError; 197 } 198 199 void UnwrappedLineParser::parseFile() { 200 ScopedDeclarationState DeclarationState( 201 *Line, DeclarationScopeStack, 202 /*MustBeDeclaration=*/ !Line->InPPDirective); 203 parseLevel(/*HasOpeningBrace=*/false); 204 // Make sure to format the remaining tokens. 205 flushComments(true); 206 addUnwrappedLine(); 207 } 208 209 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 210 do { 211 switch (FormatTok->Tok.getKind()) { 212 case tok::comment: 213 nextToken(); 214 addUnwrappedLine(); 215 break; 216 case tok::l_brace: 217 // FIXME: Add parameter whether this can happen - if this happens, we must 218 // be in a non-declaration context. 219 parseBlock(/*MustBeDeclaration=*/false); 220 addUnwrappedLine(); 221 break; 222 case tok::r_brace: 223 if (HasOpeningBrace) 224 return; 225 StructuralError = true; 226 nextToken(); 227 addUnwrappedLine(); 228 break; 229 default: 230 parseStructuralElement(); 231 break; 232 } 233 } while (!eof()); 234 } 235 236 void UnwrappedLineParser::calculateBraceTypes() { 237 // We'll parse forward through the tokens until we hit 238 // a closing brace or eof - note that getNextToken() will 239 // parse macros, so this will magically work inside macro 240 // definitions, too. 241 unsigned StoredPosition = Tokens->getPosition(); 242 unsigned Position = StoredPosition; 243 FormatToken *Tok = FormatTok; 244 // Keep a stack of positions of lbrace tokens. We will 245 // update information about whether an lbrace starts a 246 // braced init list or a different block during the loop. 247 SmallVector<unsigned, 8> LBraceStack; 248 assert(Tok->Tok.is(tok::l_brace)); 249 do { 250 FormatToken *NextTok = Tokens->getNextToken(); 251 switch (Tok->Tok.getKind()) { 252 case tok::l_brace: 253 LBraceStack.push_back(Position); 254 break; 255 case tok::r_brace: 256 if (!LBraceStack.empty()) { 257 if (LBraces[LBraceStack.back()] == BS_Unknown) { 258 // If there is a comma, semicolon or right paren after the closing 259 // brace, we assume this is a braced initializer list. 260 261 // FIXME: Note that this currently works only because we do not 262 // use the brace information while inside a braced init list. 263 // Thus, if the parent is a braced init list, we consider all 264 // brace blocks inside it braced init list. That works good enough 265 // for now, but we will need to fix it to correctly handle lambdas. 266 if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren, 267 tok::l_brace, tok::colon)) 268 LBraces[LBraceStack.back()] = BS_BracedInit; 269 else 270 LBraces[LBraceStack.back()] = BS_Block; 271 } 272 LBraceStack.pop_back(); 273 } 274 break; 275 case tok::semi: 276 case tok::kw_if: 277 case tok::kw_while: 278 case tok::kw_for: 279 case tok::kw_switch: 280 case tok::kw_try: 281 if (!LBraceStack.empty()) 282 LBraces[LBraceStack.back()] = BS_Block; 283 break; 284 default: 285 break; 286 } 287 Tok = NextTok; 288 ++Position; 289 } while (Tok->Tok.isNot(tok::eof)); 290 // Assume other blocks for all unclosed opening braces. 291 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 292 if (LBraces[LBraceStack[i]] == BS_Unknown) 293 LBraces[LBraceStack[i]] = BS_Block; 294 } 295 FormatTok = Tokens->setPosition(StoredPosition); 296 } 297 298 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, 299 unsigned AddLevels) { 300 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); 301 nextToken(); 302 303 addUnwrappedLine(); 304 305 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 306 MustBeDeclaration); 307 Line->Level += AddLevels; 308 parseLevel(/*HasOpeningBrace=*/true); 309 310 if (!FormatTok->Tok.is(tok::r_brace)) { 311 Line->Level -= AddLevels; 312 StructuralError = true; 313 return; 314 } 315 316 nextToken(); // Munch the closing brace. 317 Line->Level -= AddLevels; 318 } 319 320 void UnwrappedLineParser::parsePPDirective() { 321 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 322 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); 323 nextToken(); 324 325 if (FormatTok->Tok.getIdentifierInfo() == NULL) { 326 parsePPUnknown(); 327 return; 328 } 329 330 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 331 case tok::pp_define: 332 parsePPDefine(); 333 return; 334 case tok::pp_if: 335 parsePPIf(); 336 break; 337 case tok::pp_ifdef: 338 case tok::pp_ifndef: 339 parsePPIfdef(); 340 break; 341 case tok::pp_else: 342 parsePPElse(); 343 break; 344 case tok::pp_elif: 345 parsePPElIf(); 346 break; 347 case tok::pp_endif: 348 parsePPEndIf(); 349 break; 350 default: 351 parsePPUnknown(); 352 break; 353 } 354 } 355 356 void UnwrappedLineParser::pushPPConditional() { 357 if (!PPStack.empty() && PPStack.back() == PP_Unreachable) 358 PPStack.push_back(PP_Unreachable); 359 else 360 PPStack.push_back(PP_Conditional); 361 } 362 363 void UnwrappedLineParser::parsePPIf() { 364 nextToken(); 365 if ((FormatTok->Tok.isLiteral() && 366 StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) == 367 "0") || 368 FormatTok->Tok.is(tok::kw_false)) { 369 PPStack.push_back(PP_Unreachable); 370 } else { 371 pushPPConditional(); 372 } 373 parsePPUnknown(); 374 } 375 376 void UnwrappedLineParser::parsePPIfdef() { 377 pushPPConditional(); 378 parsePPUnknown(); 379 } 380 381 void UnwrappedLineParser::parsePPElse() { 382 if (!PPStack.empty()) 383 PPStack.pop_back(); 384 pushPPConditional(); 385 parsePPUnknown(); 386 } 387 388 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 389 390 void UnwrappedLineParser::parsePPEndIf() { 391 if (!PPStack.empty()) 392 PPStack.pop_back(); 393 parsePPUnknown(); 394 } 395 396 void UnwrappedLineParser::parsePPDefine() { 397 nextToken(); 398 399 if (FormatTok->Tok.getKind() != tok::identifier) { 400 parsePPUnknown(); 401 return; 402 } 403 nextToken(); 404 if (FormatTok->Tok.getKind() == tok::l_paren && 405 FormatTok->WhitespaceRange.getBegin() == 406 FormatTok->WhitespaceRange.getEnd()) { 407 parseParens(); 408 } 409 addUnwrappedLine(); 410 Line->Level = 1; 411 412 // Errors during a preprocessor directive can only affect the layout of the 413 // preprocessor directive, and thus we ignore them. An alternative approach 414 // would be to use the same approach we use on the file level (no 415 // re-indentation if there was a structural error) within the macro 416 // definition. 417 parseFile(); 418 } 419 420 void UnwrappedLineParser::parsePPUnknown() { 421 do { 422 nextToken(); 423 } while (!eof()); 424 addUnwrappedLine(); 425 } 426 427 // Here we blacklist certain tokens that are not usually the first token in an 428 // unwrapped line. This is used in attempt to distinguish macro calls without 429 // trailing semicolons from other constructs split to several lines. 430 bool tokenCanStartNewLine(clang::Token Tok) { 431 // Semicolon can be a null-statement, l_square can be a start of a macro or 432 // a C++11 attribute, but this doesn't seem to be common. 433 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 434 Tok.isNot(tok::l_square) && 435 // Tokens that can only be used as binary operators and a part of 436 // overloaded operator names. 437 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 438 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 439 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 440 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 441 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 442 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 443 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 444 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 445 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 446 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 447 Tok.isNot(tok::lesslessequal) && 448 // Colon is used in labels, base class lists, initializer lists, 449 // range-based for loops, ternary operator, but should never be the 450 // first token in an unwrapped line. 451 Tok.isNot(tok::colon); 452 } 453 454 void UnwrappedLineParser::parseStructuralElement() { 455 assert(!FormatTok->Tok.is(tok::l_brace)); 456 switch (FormatTok->Tok.getKind()) { 457 case tok::at: 458 nextToken(); 459 if (FormatTok->Tok.is(tok::l_brace)) { 460 parseBracedList(); 461 break; 462 } 463 switch (FormatTok->Tok.getObjCKeywordID()) { 464 case tok::objc_public: 465 case tok::objc_protected: 466 case tok::objc_package: 467 case tok::objc_private: 468 return parseAccessSpecifier(); 469 case tok::objc_interface: 470 case tok::objc_implementation: 471 return parseObjCInterfaceOrImplementation(); 472 case tok::objc_protocol: 473 return parseObjCProtocol(); 474 case tok::objc_end: 475 return; // Handled by the caller. 476 case tok::objc_optional: 477 case tok::objc_required: 478 nextToken(); 479 addUnwrappedLine(); 480 return; 481 default: 482 break; 483 } 484 break; 485 case tok::kw_namespace: 486 parseNamespace(); 487 return; 488 case tok::kw_inline: 489 nextToken(); 490 if (FormatTok->Tok.is(tok::kw_namespace)) { 491 parseNamespace(); 492 return; 493 } 494 break; 495 case tok::kw_public: 496 case tok::kw_protected: 497 case tok::kw_private: 498 parseAccessSpecifier(); 499 return; 500 case tok::kw_if: 501 parseIfThenElse(); 502 return; 503 case tok::kw_for: 504 case tok::kw_while: 505 parseForOrWhileLoop(); 506 return; 507 case tok::kw_do: 508 parseDoWhile(); 509 return; 510 case tok::kw_switch: 511 parseSwitch(); 512 return; 513 case tok::kw_default: 514 nextToken(); 515 parseLabel(); 516 return; 517 case tok::kw_case: 518 parseCaseLabel(); 519 return; 520 case tok::kw_return: 521 parseReturn(); 522 return; 523 case tok::kw_extern: 524 nextToken(); 525 if (FormatTok->Tok.is(tok::string_literal)) { 526 nextToken(); 527 if (FormatTok->Tok.is(tok::l_brace)) { 528 parseBlock(/*MustBeDeclaration=*/true, 0); 529 addUnwrappedLine(); 530 return; 531 } 532 } 533 // In all other cases, parse the declaration. 534 break; 535 default: 536 break; 537 } 538 do { 539 switch (FormatTok->Tok.getKind()) { 540 case tok::at: 541 nextToken(); 542 if (FormatTok->Tok.is(tok::l_brace)) 543 parseBracedList(); 544 break; 545 case tok::kw_enum: 546 parseEnum(); 547 break; 548 case tok::kw_struct: 549 case tok::kw_union: 550 case tok::kw_class: 551 parseRecord(); 552 // A record declaration or definition is always the start of a structural 553 // element. 554 break; 555 case tok::semi: 556 nextToken(); 557 addUnwrappedLine(); 558 return; 559 case tok::r_brace: 560 addUnwrappedLine(); 561 return; 562 case tok::l_paren: 563 parseParens(); 564 break; 565 case tok::l_brace: 566 if (!tryToParseBracedList()) { 567 // A block outside of parentheses must be the last part of a 568 // structural element. 569 // FIXME: Figure out cases where this is not true, and add projections 570 // for them (the one we know is missing are lambdas). 571 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux || 572 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) 573 addUnwrappedLine(); 574 parseBlock(/*MustBeDeclaration=*/false); 575 addUnwrappedLine(); 576 return; 577 } 578 // Otherwise this was a braced init list, and the structural 579 // element continues. 580 break; 581 case tok::identifier: { 582 StringRef Text = FormatTok->TokenText; 583 nextToken(); 584 if (Line->Tokens.size() == 1) { 585 if (FormatTok->Tok.is(tok::colon)) { 586 parseLabel(); 587 return; 588 } 589 // Recognize function-like macro usages without trailing semicolon. 590 if (FormatTok->Tok.is(tok::l_paren)) { 591 parseParens(); 592 if (FormatTok->HasUnescapedNewline && 593 tokenCanStartNewLine(FormatTok->Tok)) { 594 addUnwrappedLine(); 595 return; 596 } 597 } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 && 598 Text == Text.upper()) { 599 // Recognize free-standing macros like Q_OBJECT. 600 addUnwrappedLine(); 601 return; 602 } 603 } 604 break; 605 } 606 case tok::equal: 607 nextToken(); 608 if (FormatTok->Tok.is(tok::l_brace)) { 609 parseBracedList(); 610 } 611 break; 612 default: 613 nextToken(); 614 break; 615 } 616 } while (!eof()); 617 } 618 619 bool UnwrappedLineParser::tryToParseBracedList() { 620 if (LBraces[Tokens->getPosition()] == BS_Unknown) 621 calculateBraceTypes(); 622 assert(LBraces[Tokens->getPosition()] != BS_Unknown); 623 if (LBraces[Tokens->getPosition()] == BS_Block) 624 return false; 625 parseBracedList(); 626 return true; 627 } 628 629 void UnwrappedLineParser::parseBracedList() { 630 nextToken(); 631 632 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 633 // replace this by using parseAssigmentExpression() inside. 634 do { 635 // FIXME: When we start to support lambdas, we'll want to parse them away 636 // here, otherwise our bail-out scenarios below break. The better solution 637 // might be to just implement a more or less complete expression parser. 638 switch (FormatTok->Tok.getKind()) { 639 case tok::l_brace: 640 parseBracedList(); 641 break; 642 case tok::r_brace: 643 nextToken(); 644 return; 645 case tok::semi: 646 // Probably a missing closing brace. Bail out. 647 return; 648 case tok::comma: 649 nextToken(); 650 break; 651 default: 652 nextToken(); 653 break; 654 } 655 } while (!eof()); 656 } 657 658 void UnwrappedLineParser::parseReturn() { 659 nextToken(); 660 661 do { 662 switch (FormatTok->Tok.getKind()) { 663 case tok::l_brace: 664 parseBracedList(); 665 if (FormatTok->Tok.isNot(tok::semi)) { 666 // Assume missing ';'. 667 addUnwrappedLine(); 668 return; 669 } 670 break; 671 case tok::l_paren: 672 parseParens(); 673 break; 674 case tok::r_brace: 675 // Assume missing ';'. 676 addUnwrappedLine(); 677 return; 678 case tok::semi: 679 nextToken(); 680 addUnwrappedLine(); 681 return; 682 default: 683 nextToken(); 684 break; 685 } 686 } while (!eof()); 687 } 688 689 void UnwrappedLineParser::parseParens() { 690 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 691 nextToken(); 692 do { 693 switch (FormatTok->Tok.getKind()) { 694 case tok::l_paren: 695 parseParens(); 696 break; 697 case tok::r_paren: 698 nextToken(); 699 return; 700 case tok::r_brace: 701 // A "}" inside parenthesis is an error if there wasn't a matching "{". 702 return; 703 case tok::l_brace: { 704 if (!tryToParseBracedList()) { 705 nextToken(); 706 { 707 ScopedLineState LineState(*this); 708 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 709 /*MustBeDeclaration=*/false); 710 Line->Level += 1; 711 parseLevel(/*HasOpeningBrace=*/true); 712 Line->Level -= 1; 713 } 714 nextToken(); 715 } 716 break; 717 } 718 case tok::at: 719 nextToken(); 720 if (FormatTok->Tok.is(tok::l_brace)) 721 parseBracedList(); 722 break; 723 default: 724 nextToken(); 725 break; 726 } 727 } while (!eof()); 728 } 729 730 void UnwrappedLineParser::parseIfThenElse() { 731 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 732 nextToken(); 733 if (FormatTok->Tok.is(tok::l_paren)) 734 parseParens(); 735 bool NeedsUnwrappedLine = false; 736 if (FormatTok->Tok.is(tok::l_brace)) { 737 parseBlock(/*MustBeDeclaration=*/false); 738 NeedsUnwrappedLine = true; 739 } else { 740 addUnwrappedLine(); 741 ++Line->Level; 742 parseStructuralElement(); 743 --Line->Level; 744 } 745 if (FormatTok->Tok.is(tok::kw_else)) { 746 nextToken(); 747 if (FormatTok->Tok.is(tok::l_brace)) { 748 parseBlock(/*MustBeDeclaration=*/false); 749 addUnwrappedLine(); 750 } else if (FormatTok->Tok.is(tok::kw_if)) { 751 parseIfThenElse(); 752 } else { 753 addUnwrappedLine(); 754 ++Line->Level; 755 parseStructuralElement(); 756 --Line->Level; 757 } 758 } else if (NeedsUnwrappedLine) { 759 addUnwrappedLine(); 760 } 761 } 762 763 void UnwrappedLineParser::parseNamespace() { 764 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 765 nextToken(); 766 if (FormatTok->Tok.is(tok::identifier)) 767 nextToken(); 768 if (FormatTok->Tok.is(tok::l_brace)) { 769 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux) 770 addUnwrappedLine(); 771 772 parseBlock(/*MustBeDeclaration=*/true, 0); 773 // Munch the semicolon after a namespace. This is more common than one would 774 // think. Puttin the semicolon into its own line is very ugly. 775 if (FormatTok->Tok.is(tok::semi)) 776 nextToken(); 777 addUnwrappedLine(); 778 } 779 // FIXME: Add error handling. 780 } 781 782 void UnwrappedLineParser::parseForOrWhileLoop() { 783 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) && 784 "'for' or 'while' expected"); 785 nextToken(); 786 if (FormatTok->Tok.is(tok::l_paren)) 787 parseParens(); 788 if (FormatTok->Tok.is(tok::l_brace)) { 789 parseBlock(/*MustBeDeclaration=*/false); 790 addUnwrappedLine(); 791 } else { 792 addUnwrappedLine(); 793 ++Line->Level; 794 parseStructuralElement(); 795 --Line->Level; 796 } 797 } 798 799 void UnwrappedLineParser::parseDoWhile() { 800 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 801 nextToken(); 802 if (FormatTok->Tok.is(tok::l_brace)) { 803 parseBlock(/*MustBeDeclaration=*/false); 804 } else { 805 addUnwrappedLine(); 806 ++Line->Level; 807 parseStructuralElement(); 808 --Line->Level; 809 } 810 811 // FIXME: Add error handling. 812 if (!FormatTok->Tok.is(tok::kw_while)) { 813 addUnwrappedLine(); 814 return; 815 } 816 817 nextToken(); 818 parseStructuralElement(); 819 } 820 821 void UnwrappedLineParser::parseLabel() { 822 if (FormatTok->Tok.isNot(tok::colon)) 823 return; 824 nextToken(); 825 unsigned OldLineLevel = Line->Level; 826 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 827 --Line->Level; 828 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 829 parseBlock(/*MustBeDeclaration=*/false); 830 if (FormatTok->Tok.is(tok::kw_break)) 831 parseStructuralElement(); // "break;" after "}" goes on the same line. 832 } 833 addUnwrappedLine(); 834 Line->Level = OldLineLevel; 835 } 836 837 void UnwrappedLineParser::parseCaseLabel() { 838 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 839 // FIXME: fix handling of complex expressions here. 840 do { 841 nextToken(); 842 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 843 parseLabel(); 844 } 845 846 void UnwrappedLineParser::parseSwitch() { 847 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 848 nextToken(); 849 if (FormatTok->Tok.is(tok::l_paren)) 850 parseParens(); 851 if (FormatTok->Tok.is(tok::l_brace)) { 852 parseBlock(/*MustBeDeclaration=*/false, Style.IndentCaseLabels ? 2 : 1); 853 addUnwrappedLine(); 854 } else { 855 addUnwrappedLine(); 856 Line->Level += (Style.IndentCaseLabels ? 2 : 1); 857 parseStructuralElement(); 858 Line->Level -= (Style.IndentCaseLabels ? 2 : 1); 859 } 860 } 861 862 void UnwrappedLineParser::parseAccessSpecifier() { 863 nextToken(); 864 // Otherwise, we don't know what it is, and we'd better keep the next token. 865 if (FormatTok->Tok.is(tok::colon)) 866 nextToken(); 867 addUnwrappedLine(); 868 } 869 870 void UnwrappedLineParser::parseEnum() { 871 nextToken(); 872 if (FormatTok->Tok.is(tok::identifier) || 873 FormatTok->Tok.is(tok::kw___attribute) || 874 FormatTok->Tok.is(tok::kw___declspec)) { 875 nextToken(); 876 // We can have macros or attributes in between 'enum' and the enum name. 877 if (FormatTok->Tok.is(tok::l_paren)) { 878 parseParens(); 879 } 880 if (FormatTok->Tok.is(tok::identifier)) 881 nextToken(); 882 } 883 if (FormatTok->Tok.is(tok::l_brace)) { 884 nextToken(); 885 addUnwrappedLine(); 886 ++Line->Level; 887 do { 888 switch (FormatTok->Tok.getKind()) { 889 case tok::l_paren: 890 parseParens(); 891 break; 892 case tok::r_brace: 893 addUnwrappedLine(); 894 nextToken(); 895 --Line->Level; 896 return; 897 case tok::comma: 898 nextToken(); 899 addUnwrappedLine(); 900 break; 901 default: 902 nextToken(); 903 break; 904 } 905 } while (!eof()); 906 } 907 // We fall through to parsing a structural element afterwards, so that in 908 // enum A {} n, m; 909 // "} n, m;" will end up in one unwrapped line. 910 } 911 912 void UnwrappedLineParser::parseRecord() { 913 nextToken(); 914 if (FormatTok->Tok.is(tok::identifier) || 915 FormatTok->Tok.is(tok::kw___attribute) || 916 FormatTok->Tok.is(tok::kw___declspec)) { 917 nextToken(); 918 // We can have macros or attributes in between 'class' and the class name. 919 if (FormatTok->Tok.is(tok::l_paren)) { 920 parseParens(); 921 } 922 // The actual identifier can be a nested name specifier, and in macros 923 // it is often token-pasted. 924 while (FormatTok->Tok.is(tok::identifier) || 925 FormatTok->Tok.is(tok::coloncolon) || 926 FormatTok->Tok.is(tok::hashhash)) 927 nextToken(); 928 929 // Note that parsing away template declarations here leads to incorrectly 930 // accepting function declarations as record declarations. 931 // In general, we cannot solve this problem. Consider: 932 // class A<int> B() {} 933 // which can be a function definition or a class definition when B() is a 934 // macro. If we find enough real-world cases where this is a problem, we 935 // can parse for the 'template' keyword in the beginning of the statement, 936 // and thus rule out the record production in case there is no template 937 // (this would still leave us with an ambiguity between template function 938 // and class declarations). 939 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { 940 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { 941 if (FormatTok->Tok.is(tok::semi)) 942 return; 943 nextToken(); 944 } 945 } 946 } 947 if (FormatTok->Tok.is(tok::l_brace)) { 948 if (Style.BreakBeforeBraces == FormatStyle::BS_Linux) 949 addUnwrappedLine(); 950 951 parseBlock(/*MustBeDeclaration=*/true); 952 } 953 // We fall through to parsing a structural element afterwards, so 954 // class A {} n, m; 955 // will end up in one unwrapped line. 956 } 957 958 void UnwrappedLineParser::parseObjCProtocolList() { 959 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 960 do 961 nextToken(); 962 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 963 nextToken(); // Skip '>'. 964 } 965 966 void UnwrappedLineParser::parseObjCUntilAtEnd() { 967 do { 968 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 969 nextToken(); 970 addUnwrappedLine(); 971 break; 972 } 973 parseStructuralElement(); 974 } while (!eof()); 975 } 976 977 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 978 nextToken(); 979 nextToken(); // interface name 980 981 // @interface can be followed by either a base class, or a category. 982 if (FormatTok->Tok.is(tok::colon)) { 983 nextToken(); 984 nextToken(); // base class name 985 } else if (FormatTok->Tok.is(tok::l_paren)) 986 // Skip category, if present. 987 parseParens(); 988 989 if (FormatTok->Tok.is(tok::less)) 990 parseObjCProtocolList(); 991 992 // If instance variables are present, keep the '{' on the first line too. 993 if (FormatTok->Tok.is(tok::l_brace)) 994 parseBlock(/*MustBeDeclaration=*/true); 995 996 // With instance variables, this puts '}' on its own line. Without instance 997 // variables, this ends the @interface line. 998 addUnwrappedLine(); 999 1000 parseObjCUntilAtEnd(); 1001 } 1002 1003 void UnwrappedLineParser::parseObjCProtocol() { 1004 nextToken(); 1005 nextToken(); // protocol name 1006 1007 if (FormatTok->Tok.is(tok::less)) 1008 parseObjCProtocolList(); 1009 1010 // Check for protocol declaration. 1011 if (FormatTok->Tok.is(tok::semi)) { 1012 nextToken(); 1013 return addUnwrappedLine(); 1014 } 1015 1016 addUnwrappedLine(); 1017 parseObjCUntilAtEnd(); 1018 } 1019 1020 void UnwrappedLineParser::addUnwrappedLine() { 1021 if (Line->Tokens.empty()) 1022 return; 1023 DEBUG({ 1024 llvm::dbgs() << "Line(" << Line->Level << ")" 1025 << (Line->InPPDirective ? " MACRO" : "") << ": "; 1026 for (std::list<FormatToken *>::iterator I = Line->Tokens.begin(), 1027 E = Line->Tokens.end(); 1028 I != E; ++I) { 1029 llvm::dbgs() << (*I)->Tok.getName() << " "; 1030 } 1031 llvm::dbgs() << "\n"; 1032 }); 1033 CurrentLines->push_back(*Line); 1034 Line->Tokens.clear(); 1035 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 1036 for (std::vector<UnwrappedLine>::iterator 1037 I = PreprocessorDirectives.begin(), 1038 E = PreprocessorDirectives.end(); 1039 I != E; ++I) { 1040 CurrentLines->push_back(*I); 1041 } 1042 PreprocessorDirectives.clear(); 1043 } 1044 } 1045 1046 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 1047 1048 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 1049 bool JustComments = Line->Tokens.empty(); 1050 for (SmallVectorImpl<FormatToken *>::const_iterator 1051 I = CommentsBeforeNextToken.begin(), 1052 E = CommentsBeforeNextToken.end(); 1053 I != E; ++I) { 1054 if ((*I)->NewlinesBefore && JustComments) { 1055 addUnwrappedLine(); 1056 } 1057 pushToken(*I); 1058 } 1059 if (NewlineBeforeNext && JustComments) { 1060 addUnwrappedLine(); 1061 } 1062 CommentsBeforeNextToken.clear(); 1063 } 1064 1065 void UnwrappedLineParser::nextToken() { 1066 if (eof()) 1067 return; 1068 flushComments(FormatTok->NewlinesBefore > 0); 1069 pushToken(FormatTok); 1070 readToken(); 1071 } 1072 1073 void UnwrappedLineParser::readToken() { 1074 bool CommentsInCurrentLine = true; 1075 do { 1076 FormatTok = Tokens->getNextToken(); 1077 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 1078 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 1079 // If there is an unfinished unwrapped line, we flush the preprocessor 1080 // directives only after that unwrapped line was finished later. 1081 bool SwitchToPreprocessorLines = 1082 !Line->Tokens.empty() && CurrentLines == &Lines; 1083 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 1084 // Comments stored before the preprocessor directive need to be output 1085 // before the preprocessor directive, at the same level as the 1086 // preprocessor directive, as we consider them to apply to the directive. 1087 flushComments(FormatTok->NewlinesBefore > 0); 1088 parsePPDirective(); 1089 } 1090 1091 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 1092 !Line->InPPDirective) { 1093 continue; 1094 } 1095 1096 if (!FormatTok->Tok.is(tok::comment)) 1097 return; 1098 if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) { 1099 CommentsInCurrentLine = false; 1100 } 1101 if (CommentsInCurrentLine) { 1102 pushToken(FormatTok); 1103 } else { 1104 CommentsBeforeNextToken.push_back(FormatTok); 1105 } 1106 } while (!eof()); 1107 } 1108 1109 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 1110 Line->Tokens.push_back(Tok); 1111 if (MustBreakBeforeNextToken) { 1112 Line->Tokens.back()->MustBreakBefore = true; 1113 MustBreakBeforeNextToken = false; 1114 } 1115 } 1116 1117 } // end namespace format 1118 } // end namespace clang 1119