1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15 /// where it can be used to format real code. 16 /// 17 //===----------------------------------------------------------------------===// 18 19 #include "UnwrappedLineParser.h" 20 #include "clang/Basic/Diagnostic.h" 21 #include "llvm/Support/raw_ostream.h" 22 23 // Uncomment to get debug output from the UnwrappedLineParser. 24 // Use in combination with --gtest_filter=*TestName* to limit the output to a 25 // single test. 26 // #define UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT 27 28 namespace clang { 29 namespace format { 30 31 class ScopedMacroState : public FormatTokenSource { 32 public: 33 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 34 FormatToken &ResetToken) 35 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 36 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) { 37 TokenSource = this; 38 Line.Level = 0; 39 Line.InPPDirective = true; 40 } 41 42 ~ScopedMacroState() { 43 TokenSource = PreviousTokenSource; 44 ResetToken = Token; 45 Line.InPPDirective = false; 46 Line.Level = PreviousLineLevel; 47 } 48 49 virtual FormatToken getNextToken() { 50 // The \c UnwrappedLineParser guards against this by never calling 51 // \c getNextToken() after it has encountered the first eof token. 52 assert(!eof()); 53 Token = PreviousTokenSource->getNextToken(); 54 if (eof()) 55 return createEOF(); 56 return Token; 57 } 58 59 private: 60 bool eof() { 61 return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline; 62 } 63 64 FormatToken createEOF() { 65 FormatToken FormatTok; 66 FormatTok.Tok.startToken(); 67 FormatTok.Tok.setKind(tok::eof); 68 return FormatTok; 69 } 70 71 UnwrappedLine &Line; 72 FormatTokenSource *&TokenSource; 73 FormatToken &ResetToken; 74 unsigned PreviousLineLevel; 75 FormatTokenSource *PreviousTokenSource; 76 77 FormatToken Token; 78 }; 79 80 class ScopedLineState { 81 public: 82 ScopedLineState(UnwrappedLineParser &Parser) : Parser(Parser) { 83 PreBlockLine = Parser.Line.take(); 84 Parser.Line.reset(new UnwrappedLine(*PreBlockLine)); 85 assert(Parser.LastInCurrentLine == NULL || 86 Parser.LastInCurrentLine->Children.empty()); 87 PreBlockLastToken = Parser.LastInCurrentLine; 88 PreBlockRootTokenInitialized = Parser.RootTokenInitialized; 89 Parser.RootTokenInitialized = false; 90 Parser.LastInCurrentLine = NULL; 91 } 92 93 ~ScopedLineState() { 94 if (Parser.RootTokenInitialized) { 95 Parser.addUnwrappedLine(); 96 } 97 assert(!Parser.RootTokenInitialized); 98 Parser.Line.reset(PreBlockLine); 99 Parser.RootTokenInitialized = PreBlockRootTokenInitialized; 100 Parser.LastInCurrentLine = PreBlockLastToken; 101 assert(Parser.LastInCurrentLine == NULL || 102 Parser.LastInCurrentLine->Children.empty()); 103 Parser.MustBreakBeforeNextToken = true; 104 } 105 106 private: 107 UnwrappedLineParser &Parser; 108 109 UnwrappedLine *PreBlockLine; 110 FormatToken* PreBlockLastToken; 111 bool PreBlockRootTokenInitialized; 112 }; 113 114 UnwrappedLineParser::UnwrappedLineParser( 115 clang::DiagnosticsEngine &Diag, const FormatStyle &Style, 116 FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback) 117 : Line(new UnwrappedLine), RootTokenInitialized(false), 118 LastInCurrentLine(NULL), MustBreakBeforeNextToken(false), Diag(Diag), 119 Style(Style), Tokens(&Tokens), Callback(Callback) { 120 } 121 122 bool UnwrappedLineParser::parse() { 123 #ifdef UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT 124 llvm::errs() << "----\n"; 125 #endif 126 readToken(); 127 return parseFile(); 128 } 129 130 bool UnwrappedLineParser::parseFile() { 131 bool Error = parseLevel(/*HasOpeningBrace=*/false); 132 // Make sure to format the remaining tokens. 133 addUnwrappedLine(); 134 return Error; 135 } 136 137 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 138 bool Error = false; 139 do { 140 switch (FormatTok.Tok.getKind()) { 141 case tok::comment: 142 nextToken(); 143 addUnwrappedLine(); 144 break; 145 case tok::l_brace: 146 Error |= parseBlock(); 147 addUnwrappedLine(); 148 break; 149 case tok::r_brace: 150 if (HasOpeningBrace) { 151 return false; 152 } else { 153 Diag.Report(FormatTok.Tok.getLocation(), 154 Diag.getCustomDiagID(clang::DiagnosticsEngine::Error, 155 "unexpected '}'")); 156 Error = true; 157 nextToken(); 158 addUnwrappedLine(); 159 } 160 break; 161 default: 162 parseStructuralElement(); 163 break; 164 } 165 } while (!eof()); 166 return Error; 167 } 168 169 bool UnwrappedLineParser::parseBlock(unsigned AddLevels) { 170 assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected"); 171 nextToken(); 172 173 if (!FormatTok.Tok.is(tok::r_brace)) { 174 addUnwrappedLine(); 175 176 Line->Level += AddLevels; 177 parseLevel(/*HasOpeningBrace=*/true); 178 Line->Level -= AddLevels; 179 180 if (!FormatTok.Tok.is(tok::r_brace)) 181 return true; 182 183 } 184 nextToken(); // Munch the closing brace. 185 return false; 186 } 187 188 void UnwrappedLineParser::parsePPDirective() { 189 assert(FormatTok.Tok.is(tok::hash) && "'#' expected"); 190 ScopedMacroState MacroState(*Line, Tokens, FormatTok); 191 nextToken(); 192 193 if (FormatTok.Tok.getIdentifierInfo() == NULL) { 194 addUnwrappedLine(); 195 return; 196 } 197 198 switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) { 199 case tok::pp_define: 200 parsePPDefine(); 201 break; 202 default: 203 parsePPUnknown(); 204 break; 205 } 206 } 207 208 void UnwrappedLineParser::parsePPDefine() { 209 nextToken(); 210 211 if (FormatTok.Tok.getKind() != tok::identifier) { 212 parsePPUnknown(); 213 return; 214 } 215 nextToken(); 216 if (FormatTok.Tok.getKind() == tok::l_paren) { 217 parseParens(); 218 } 219 addUnwrappedLine(); 220 Line->Level = 1; 221 222 // Errors during a preprocessor directive can only affect the layout of the 223 // preprocessor directive, and thus we ignore them. An alternative approach 224 // would be to use the same approach we use on the file level (no 225 // re-indentation if there was a structural error) within the macro 226 // definition. 227 parseFile(); 228 } 229 230 void UnwrappedLineParser::parsePPUnknown() { 231 do { 232 nextToken(); 233 } while (!eof()); 234 addUnwrappedLine(); 235 } 236 237 void UnwrappedLineParser::parseComments() { 238 // Consume leading line comments, e.g. for branches without compounds. 239 while (FormatTok.Tok.is(tok::comment)) { 240 nextToken(); 241 addUnwrappedLine(); 242 } 243 } 244 245 void UnwrappedLineParser::parseStructuralElement() { 246 assert(!FormatTok.Tok.is(tok::l_brace)); 247 parseComments(); 248 249 int TokenNumber = 0; 250 switch (FormatTok.Tok.getKind()) { 251 case tok::at: 252 nextToken(); 253 switch (FormatTok.Tok.getObjCKeywordID()) { 254 case tok::objc_public: 255 case tok::objc_protected: 256 case tok::objc_package: 257 case tok::objc_private: 258 return parseAccessSpecifier(); 259 case tok::objc_interface: 260 case tok::objc_implementation: 261 return parseObjCInterfaceOrImplementation(); 262 case tok::objc_protocol: 263 return parseObjCProtocol(); 264 case tok::objc_end: 265 return; // Handled by the caller. 266 case tok::objc_optional: 267 case tok::objc_required: 268 nextToken(); 269 addUnwrappedLine(); 270 return; 271 default: 272 break; 273 } 274 break; 275 case tok::kw_namespace: 276 parseNamespace(); 277 return; 278 case tok::kw_inline: 279 nextToken(); 280 TokenNumber++; 281 if (FormatTok.Tok.is(tok::kw_namespace)) { 282 parseNamespace(); 283 return; 284 } 285 break; 286 case tok::kw_public: 287 case tok::kw_protected: 288 case tok::kw_private: 289 parseAccessSpecifier(); 290 return; 291 case tok::kw_if: 292 parseIfThenElse(); 293 return; 294 case tok::kw_for: 295 case tok::kw_while: 296 parseForOrWhileLoop(); 297 return; 298 case tok::kw_do: 299 parseDoWhile(); 300 return; 301 case tok::kw_switch: 302 parseSwitch(); 303 return; 304 case tok::kw_default: 305 nextToken(); 306 parseLabel(); 307 return; 308 case tok::kw_case: 309 parseCaseLabel(); 310 return; 311 default: 312 break; 313 } 314 do { 315 ++TokenNumber; 316 switch (FormatTok.Tok.getKind()) { 317 case tok::kw_enum: 318 parseEnum(); 319 return; 320 case tok::kw_struct: // fallthrough 321 case tok::kw_union: // fallthrough 322 case tok::kw_class: 323 parseStructClassOrBracedList(); 324 return; 325 case tok::semi: 326 nextToken(); 327 addUnwrappedLine(); 328 return; 329 case tok::l_paren: 330 parseParens(); 331 break; 332 case tok::l_brace: 333 // A block outside of parentheses must be the last part of a 334 // structural element. 335 // FIXME: Figure out cases where this is not true, and add projections for 336 // them (the one we know is missing are lambdas). 337 parseBlock(); 338 addUnwrappedLine(); 339 return; 340 case tok::identifier: 341 nextToken(); 342 if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) { 343 parseLabel(); 344 return; 345 } 346 break; 347 case tok::equal: 348 nextToken(); 349 if (FormatTok.Tok.is(tok::l_brace)) { 350 parseBracedList(); 351 } 352 break; 353 default: 354 nextToken(); 355 break; 356 } 357 } while (!eof()); 358 } 359 360 void UnwrappedLineParser::parseBracedList() { 361 nextToken(); 362 363 do { 364 switch (FormatTok.Tok.getKind()) { 365 case tok::l_brace: 366 parseBracedList(); 367 break; 368 case tok::r_brace: 369 nextToken(); 370 return; 371 default: 372 nextToken(); 373 break; 374 } 375 } while (!eof()); 376 } 377 378 void UnwrappedLineParser::parseParens() { 379 assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected."); 380 nextToken(); 381 do { 382 switch (FormatTok.Tok.getKind()) { 383 case tok::l_paren: 384 parseParens(); 385 break; 386 case tok::r_paren: 387 nextToken(); 388 return; 389 case tok::l_brace: 390 { 391 nextToken(); 392 ScopedLineState LineState(*this); 393 Line->Level += 1; 394 parseLevel(/*HasOpeningBrace=*/true); 395 Line->Level -= 1; 396 } 397 break; 398 default: 399 nextToken(); 400 break; 401 } 402 } while (!eof()); 403 } 404 405 void UnwrappedLineParser::parseIfThenElse() { 406 assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected"); 407 nextToken(); 408 if (FormatTok.Tok.is(tok::l_paren)) 409 parseParens(); 410 bool NeedsUnwrappedLine = false; 411 if (FormatTok.Tok.is(tok::l_brace)) { 412 parseBlock(); 413 NeedsUnwrappedLine = true; 414 } else { 415 addUnwrappedLine(); 416 ++Line->Level; 417 parseStructuralElement(); 418 --Line->Level; 419 } 420 if (FormatTok.Tok.is(tok::kw_else)) { 421 nextToken(); 422 if (FormatTok.Tok.is(tok::l_brace)) { 423 parseBlock(); 424 addUnwrappedLine(); 425 } else if (FormatTok.Tok.is(tok::kw_if)) { 426 parseIfThenElse(); 427 } else { 428 addUnwrappedLine(); 429 ++Line->Level; 430 parseStructuralElement(); 431 --Line->Level; 432 } 433 } else if (NeedsUnwrappedLine) { 434 addUnwrappedLine(); 435 } 436 } 437 438 void UnwrappedLineParser::parseNamespace() { 439 assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected"); 440 nextToken(); 441 if (FormatTok.Tok.is(tok::identifier)) 442 nextToken(); 443 if (FormatTok.Tok.is(tok::l_brace)) { 444 parseBlock(0); 445 addUnwrappedLine(); 446 } 447 // FIXME: Add error handling. 448 } 449 450 void UnwrappedLineParser::parseForOrWhileLoop() { 451 assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) && 452 "'for' or 'while' expected"); 453 nextToken(); 454 if (FormatTok.Tok.is(tok::l_paren)) 455 parseParens(); 456 if (FormatTok.Tok.is(tok::l_brace)) { 457 parseBlock(); 458 addUnwrappedLine(); 459 } else { 460 addUnwrappedLine(); 461 ++Line->Level; 462 parseStructuralElement(); 463 --Line->Level; 464 } 465 } 466 467 void UnwrappedLineParser::parseDoWhile() { 468 assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected"); 469 nextToken(); 470 if (FormatTok.Tok.is(tok::l_brace)) { 471 parseBlock(); 472 } else { 473 addUnwrappedLine(); 474 ++Line->Level; 475 parseStructuralElement(); 476 --Line->Level; 477 } 478 479 // FIXME: Add error handling. 480 if (!FormatTok.Tok.is(tok::kw_while)) { 481 addUnwrappedLine(); 482 return; 483 } 484 485 nextToken(); 486 parseStructuralElement(); 487 } 488 489 void UnwrappedLineParser::parseLabel() { 490 // FIXME: remove all asserts. 491 assert(FormatTok.Tok.is(tok::colon) && "':' expected"); 492 nextToken(); 493 unsigned OldLineLevel = Line->Level; 494 if (Line->Level > 0) 495 --Line->Level; 496 if (FormatTok.Tok.is(tok::l_brace)) { 497 parseBlock(); 498 } 499 addUnwrappedLine(); 500 Line->Level = OldLineLevel; 501 } 502 503 void UnwrappedLineParser::parseCaseLabel() { 504 assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected"); 505 // FIXME: fix handling of complex expressions here. 506 do { 507 nextToken(); 508 } while (!eof() && !FormatTok.Tok.is(tok::colon)); 509 parseLabel(); 510 } 511 512 void UnwrappedLineParser::parseSwitch() { 513 assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected"); 514 nextToken(); 515 if (FormatTok.Tok.is(tok::l_paren)) 516 parseParens(); 517 if (FormatTok.Tok.is(tok::l_brace)) { 518 parseBlock(Style.IndentCaseLabels ? 2 : 1); 519 addUnwrappedLine(); 520 } else { 521 addUnwrappedLine(); 522 Line->Level += (Style.IndentCaseLabels ? 2 : 1); 523 parseStructuralElement(); 524 Line->Level -= (Style.IndentCaseLabels ? 2 : 1); 525 } 526 } 527 528 void UnwrappedLineParser::parseAccessSpecifier() { 529 nextToken(); 530 // Otherwise, we don't know what it is, and we'd better keep the next token. 531 if (FormatTok.Tok.is(tok::colon)) 532 nextToken(); 533 addUnwrappedLine(); 534 } 535 536 void UnwrappedLineParser::parseEnum() { 537 bool HasContents = false; 538 do { 539 switch (FormatTok.Tok.getKind()) { 540 case tok::l_brace: 541 nextToken(); 542 addUnwrappedLine(); 543 ++Line->Level; 544 parseComments(); 545 break; 546 case tok::l_paren: 547 parseParens(); 548 break; 549 case tok::comma: 550 nextToken(); 551 addUnwrappedLine(); 552 parseComments(); 553 break; 554 case tok::r_brace: 555 if (HasContents) 556 addUnwrappedLine(); 557 --Line->Level; 558 nextToken(); 559 break; 560 case tok::semi: 561 nextToken(); 562 addUnwrappedLine(); 563 return; 564 default: 565 HasContents = true; 566 nextToken(); 567 break; 568 } 569 } while (!eof()); 570 } 571 572 void UnwrappedLineParser::parseStructClassOrBracedList() { 573 nextToken(); 574 do { 575 switch (FormatTok.Tok.getKind()) { 576 case tok::l_brace: 577 // FIXME: Think about how to resolve the error handling here. 578 parseBlock(); 579 parseStructuralElement(); 580 return; 581 case tok::semi: 582 nextToken(); 583 addUnwrappedLine(); 584 return; 585 case tok::equal: 586 nextToken(); 587 if (FormatTok.Tok.is(tok::l_brace)) { 588 parseBracedList(); 589 } 590 break; 591 default: 592 nextToken(); 593 break; 594 } 595 } while (!eof()); 596 } 597 598 void UnwrappedLineParser::parseObjCProtocolList() { 599 assert(FormatTok.Tok.is(tok::less) && "'<' expected."); 600 do 601 nextToken(); 602 while (!eof() && FormatTok.Tok.isNot(tok::greater)); 603 nextToken(); // Skip '>'. 604 } 605 606 void UnwrappedLineParser::parseObjCUntilAtEnd() { 607 do { 608 if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) { 609 nextToken(); 610 addUnwrappedLine(); 611 break; 612 } 613 parseStructuralElement(); 614 } while (!eof()); 615 } 616 617 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 618 nextToken(); 619 nextToken(); // interface name 620 621 // @interface can be followed by either a base class, or a category. 622 if (FormatTok.Tok.is(tok::colon)) { 623 nextToken(); 624 nextToken(); // base class name 625 } else if (FormatTok.Tok.is(tok::l_paren)) 626 // Skip category, if present. 627 parseParens(); 628 629 if (FormatTok.Tok.is(tok::less)) 630 parseObjCProtocolList(); 631 632 // If instance variables are present, keep the '{' on the first line too. 633 if (FormatTok.Tok.is(tok::l_brace)) 634 parseBlock(); 635 636 // With instance variables, this puts '}' on its own line. Without instance 637 // variables, this ends the @interface line. 638 addUnwrappedLine(); 639 640 parseObjCUntilAtEnd(); 641 } 642 643 void UnwrappedLineParser::parseObjCProtocol() { 644 nextToken(); 645 nextToken(); // protocol name 646 647 if (FormatTok.Tok.is(tok::less)) 648 parseObjCProtocolList(); 649 650 // Check for protocol declaration. 651 if (FormatTok.Tok.is(tok::semi)) { 652 nextToken(); 653 return addUnwrappedLine(); 654 } 655 656 addUnwrappedLine(); 657 parseObjCUntilAtEnd(); 658 } 659 660 void UnwrappedLineParser::addUnwrappedLine() { 661 if (!RootTokenInitialized) 662 return; 663 // Consume trailing comments. 664 while (!eof() && FormatTok.NewlinesBefore == 0 && 665 FormatTok.Tok.is(tok::comment)) { 666 nextToken(); 667 } 668 #ifdef UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT 669 FormatToken* NextToken = &Line->RootToken; 670 llvm::errs() << "Line: "; 671 while (NextToken) { 672 llvm::errs() << NextToken->Tok.getName() << " "; 673 NextToken = NextToken->Children.empty() ? NULL : &NextToken->Children[0]; 674 } 675 llvm::errs() << "\n"; 676 #endif 677 Callback.consumeUnwrappedLine(*Line); 678 RootTokenInitialized = false; 679 LastInCurrentLine = NULL; 680 } 681 682 bool UnwrappedLineParser::eof() const { 683 return FormatTok.Tok.is(tok::eof); 684 } 685 686 void UnwrappedLineParser::nextToken() { 687 if (eof()) 688 return; 689 if (RootTokenInitialized) { 690 assert(LastInCurrentLine->Children.empty()); 691 LastInCurrentLine->Children.push_back(FormatTok); 692 LastInCurrentLine = &LastInCurrentLine->Children.back(); 693 } else { 694 Line->RootToken = FormatTok; 695 RootTokenInitialized = true; 696 LastInCurrentLine = &Line->RootToken; 697 } 698 if (MustBreakBeforeNextToken) { 699 LastInCurrentLine->MustBreakBefore = true; 700 MustBreakBeforeNextToken = false; 701 } 702 readToken(); 703 } 704 705 void UnwrappedLineParser::readToken() { 706 FormatTok = Tokens->getNextToken(); 707 while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) && 708 ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) || 709 FormatTok.IsFirst)) { 710 ScopedLineState BlockState(*this); 711 parsePPDirective(); 712 } 713 } 714 715 } // end namespace format 716 } // end namespace clang 717