1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/Support/Debug.h" 18 19 #define DEBUG_TYPE "format-parser" 20 21 namespace clang { 22 namespace format { 23 24 class FormatTokenSource { 25 public: 26 virtual ~FormatTokenSource() {} 27 virtual FormatToken *getNextToken() = 0; 28 29 virtual unsigned getPosition() = 0; 30 virtual FormatToken *setPosition(unsigned Position) = 0; 31 }; 32 33 namespace { 34 35 class ScopedDeclarationState { 36 public: 37 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 38 bool MustBeDeclaration) 39 : Line(Line), Stack(Stack) { 40 Line.MustBeDeclaration = MustBeDeclaration; 41 Stack.push_back(MustBeDeclaration); 42 } 43 ~ScopedDeclarationState() { 44 Stack.pop_back(); 45 if (!Stack.empty()) 46 Line.MustBeDeclaration = Stack.back(); 47 else 48 Line.MustBeDeclaration = true; 49 } 50 51 private: 52 UnwrappedLine &Line; 53 std::vector<bool> &Stack; 54 }; 55 56 class ScopedMacroState : public FormatTokenSource { 57 public: 58 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 59 FormatToken *&ResetToken, bool &StructuralError) 60 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 61 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 62 StructuralError(StructuralError), 63 PreviousStructuralError(StructuralError), Token(nullptr) { 64 TokenSource = this; 65 Line.Level = 0; 66 Line.InPPDirective = true; 67 } 68 69 ~ScopedMacroState() { 70 TokenSource = PreviousTokenSource; 71 ResetToken = Token; 72 Line.InPPDirective = false; 73 Line.Level = PreviousLineLevel; 74 StructuralError = PreviousStructuralError; 75 } 76 77 FormatToken *getNextToken() override { 78 // The \c UnwrappedLineParser guards against this by never calling 79 // \c getNextToken() after it has encountered the first eof token. 80 assert(!eof()); 81 Token = PreviousTokenSource->getNextToken(); 82 if (eof()) 83 return getFakeEOF(); 84 return Token; 85 } 86 87 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 88 89 FormatToken *setPosition(unsigned Position) override { 90 Token = PreviousTokenSource->setPosition(Position); 91 return Token; 92 } 93 94 private: 95 bool eof() { return Token && Token->HasUnescapedNewline; } 96 97 FormatToken *getFakeEOF() { 98 static bool EOFInitialized = false; 99 static FormatToken FormatTok; 100 if (!EOFInitialized) { 101 FormatTok.Tok.startToken(); 102 FormatTok.Tok.setKind(tok::eof); 103 EOFInitialized = true; 104 } 105 return &FormatTok; 106 } 107 108 UnwrappedLine &Line; 109 FormatTokenSource *&TokenSource; 110 FormatToken *&ResetToken; 111 unsigned PreviousLineLevel; 112 FormatTokenSource *PreviousTokenSource; 113 bool &StructuralError; 114 bool PreviousStructuralError; 115 116 FormatToken *Token; 117 }; 118 119 } // end anonymous namespace 120 121 class ScopedLineState { 122 public: 123 ScopedLineState(UnwrappedLineParser &Parser, 124 bool SwitchToPreprocessorLines = false) 125 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 126 if (SwitchToPreprocessorLines) 127 Parser.CurrentLines = &Parser.PreprocessorDirectives; 128 else if (!Parser.Line->Tokens.empty()) 129 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 130 PreBlockLine = std::move(Parser.Line); 131 Parser.Line = llvm::make_unique<UnwrappedLine>(); 132 Parser.Line->Level = PreBlockLine->Level; 133 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 134 } 135 136 ~ScopedLineState() { 137 if (!Parser.Line->Tokens.empty()) { 138 Parser.addUnwrappedLine(); 139 } 140 assert(Parser.Line->Tokens.empty()); 141 Parser.Line = std::move(PreBlockLine); 142 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 143 Parser.MustBreakBeforeNextToken = true; 144 Parser.CurrentLines = OriginalLines; 145 } 146 147 private: 148 UnwrappedLineParser &Parser; 149 150 std::unique_ptr<UnwrappedLine> PreBlockLine; 151 SmallVectorImpl<UnwrappedLine> *OriginalLines; 152 }; 153 154 class CompoundStatementIndenter { 155 public: 156 CompoundStatementIndenter(UnwrappedLineParser *Parser, 157 const FormatStyle &Style, unsigned &LineLevel) 158 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 159 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) { 160 Parser->addUnwrappedLine(); 161 } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 162 Parser->addUnwrappedLine(); 163 ++LineLevel; 164 } 165 } 166 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 167 168 private: 169 unsigned &LineLevel; 170 unsigned OldLineLevel; 171 }; 172 173 namespace { 174 175 class IndexedTokenSource : public FormatTokenSource { 176 public: 177 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 178 : Tokens(Tokens), Position(-1) {} 179 180 FormatToken *getNextToken() override { 181 ++Position; 182 return Tokens[Position]; 183 } 184 185 unsigned getPosition() override { 186 assert(Position >= 0); 187 return Position; 188 } 189 190 FormatToken *setPosition(unsigned P) override { 191 Position = P; 192 return Tokens[Position]; 193 } 194 195 void reset() { Position = -1; } 196 197 private: 198 ArrayRef<FormatToken *> Tokens; 199 int Position; 200 }; 201 202 } // end anonymous namespace 203 204 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 205 const AdditionalKeywords &Keywords, 206 ArrayRef<FormatToken *> Tokens, 207 UnwrappedLineConsumer &Callback) 208 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 209 CurrentLines(&Lines), StructuralError(false), Style(Style), 210 Keywords(Keywords), Tokens(nullptr), Callback(Callback), 211 AllTokens(Tokens), PPBranchLevel(-1) {} 212 213 void UnwrappedLineParser::reset() { 214 PPBranchLevel = -1; 215 Line.reset(new UnwrappedLine); 216 CommentsBeforeNextToken.clear(); 217 FormatTok = nullptr; 218 MustBreakBeforeNextToken = false; 219 PreprocessorDirectives.clear(); 220 CurrentLines = &Lines; 221 DeclarationScopeStack.clear(); 222 StructuralError = false; 223 PPStack.clear(); 224 } 225 226 bool UnwrappedLineParser::parse() { 227 IndexedTokenSource TokenSource(AllTokens); 228 do { 229 DEBUG(llvm::dbgs() << "----\n"); 230 reset(); 231 Tokens = &TokenSource; 232 TokenSource.reset(); 233 234 readToken(); 235 parseFile(); 236 // Create line with eof token. 237 pushToken(FormatTok); 238 addUnwrappedLine(); 239 240 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 241 E = Lines.end(); 242 I != E; ++I) { 243 Callback.consumeUnwrappedLine(*I); 244 } 245 Callback.finishRun(); 246 Lines.clear(); 247 while (!PPLevelBranchIndex.empty() && 248 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 249 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 250 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 251 } 252 if (!PPLevelBranchIndex.empty()) { 253 ++PPLevelBranchIndex.back(); 254 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 255 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 256 } 257 } while (!PPLevelBranchIndex.empty()); 258 259 return StructuralError; 260 } 261 262 void UnwrappedLineParser::parseFile() { 263 ScopedDeclarationState DeclarationState( 264 *Line, DeclarationScopeStack, 265 /*MustBeDeclaration=*/ !Line->InPPDirective); 266 parseLevel(/*HasOpeningBrace=*/false); 267 // Make sure to format the remaining tokens. 268 flushComments(true); 269 addUnwrappedLine(); 270 } 271 272 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 273 bool SwitchLabelEncountered = false; 274 do { 275 switch (FormatTok->Tok.getKind()) { 276 case tok::comment: 277 nextToken(); 278 addUnwrappedLine(); 279 break; 280 case tok::l_brace: 281 // FIXME: Add parameter whether this can happen - if this happens, we must 282 // be in a non-declaration context. 283 parseBlock(/*MustBeDeclaration=*/false); 284 addUnwrappedLine(); 285 break; 286 case tok::r_brace: 287 if (HasOpeningBrace) 288 return; 289 StructuralError = true; 290 nextToken(); 291 addUnwrappedLine(); 292 break; 293 case tok::kw_default: 294 case tok::kw_case: 295 if (!SwitchLabelEncountered && 296 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 297 ++Line->Level; 298 SwitchLabelEncountered = true; 299 parseStructuralElement(); 300 break; 301 default: 302 parseStructuralElement(); 303 break; 304 } 305 } while (!eof()); 306 } 307 308 void UnwrappedLineParser::calculateBraceTypes() { 309 // We'll parse forward through the tokens until we hit 310 // a closing brace or eof - note that getNextToken() will 311 // parse macros, so this will magically work inside macro 312 // definitions, too. 313 unsigned StoredPosition = Tokens->getPosition(); 314 FormatToken *Tok = FormatTok; 315 // Keep a stack of positions of lbrace tokens. We will 316 // update information about whether an lbrace starts a 317 // braced init list or a different block during the loop. 318 SmallVector<FormatToken *, 8> LBraceStack; 319 assert(Tok->Tok.is(tok::l_brace)); 320 do { 321 // Get next none-comment token. 322 FormatToken *NextTok; 323 unsigned ReadTokens = 0; 324 do { 325 NextTok = Tokens->getNextToken(); 326 ++ReadTokens; 327 } while (NextTok->is(tok::comment)); 328 329 switch (Tok->Tok.getKind()) { 330 case tok::l_brace: 331 LBraceStack.push_back(Tok); 332 break; 333 case tok::r_brace: 334 if (!LBraceStack.empty()) { 335 if (LBraceStack.back()->BlockKind == BK_Unknown) { 336 bool ProbablyBracedList = false; 337 if (Style.Language == FormatStyle::LK_Proto) { 338 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 339 } else { 340 // Using OriginalColumn to distinguish between ObjC methods and 341 // binary operators is a bit hacky. 342 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 343 NextTok->OriginalColumn == 0; 344 345 // If there is a comma, semicolon or right paren after the closing 346 // brace, we assume this is a braced initializer list. Note that 347 // regardless how we mark inner braces here, we will overwrite the 348 // BlockKind later if we parse a braced list (where all blocks 349 // inside are by default braced lists), or when we explicitly detect 350 // blocks (for example while parsing lambdas). 351 // 352 // We exclude + and - as they can be ObjC visibility modifiers. 353 ProbablyBracedList = 354 NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon, 355 tok::r_paren, tok::r_square, tok::l_brace, 356 tok::l_paren, tok::ellipsis) || 357 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 358 } 359 if (ProbablyBracedList) { 360 Tok->BlockKind = BK_BracedInit; 361 LBraceStack.back()->BlockKind = BK_BracedInit; 362 } else { 363 Tok->BlockKind = BK_Block; 364 LBraceStack.back()->BlockKind = BK_Block; 365 } 366 } 367 LBraceStack.pop_back(); 368 } 369 break; 370 case tok::at: 371 case tok::semi: 372 case tok::kw_if: 373 case tok::kw_while: 374 case tok::kw_for: 375 case tok::kw_switch: 376 case tok::kw_try: 377 case tok::kw___try: 378 if (!LBraceStack.empty()) 379 LBraceStack.back()->BlockKind = BK_Block; 380 break; 381 default: 382 break; 383 } 384 Tok = NextTok; 385 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 386 // Assume other blocks for all unclosed opening braces. 387 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 388 if (LBraceStack[i]->BlockKind == BK_Unknown) 389 LBraceStack[i]->BlockKind = BK_Block; 390 } 391 392 FormatTok = Tokens->setPosition(StoredPosition); 393 } 394 395 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 396 bool MunchSemi) { 397 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); 398 unsigned InitialLevel = Line->Level; 399 nextToken(); 400 401 addUnwrappedLine(); 402 403 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 404 MustBeDeclaration); 405 if (AddLevel) 406 ++Line->Level; 407 parseLevel(/*HasOpeningBrace=*/true); 408 409 if (!FormatTok->Tok.is(tok::r_brace)) { 410 Line->Level = InitialLevel; 411 StructuralError = true; 412 return; 413 } 414 415 nextToken(); // Munch the closing brace. 416 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 417 nextToken(); 418 Line->Level = InitialLevel; 419 } 420 421 static bool IsGoogScope(const UnwrappedLine &Line) { 422 // FIXME: Closure-library specific stuff should not be hard-coded but be 423 // configurable. 424 if (Line.Tokens.size() < 4) 425 return false; 426 auto I = Line.Tokens.begin(); 427 if (I->Tok->TokenText != "goog") 428 return false; 429 ++I; 430 if (I->Tok->isNot(tok::period)) 431 return false; 432 ++I; 433 if (I->Tok->TokenText != "scope") 434 return false; 435 ++I; 436 return I->Tok->is(tok::l_paren); 437 } 438 439 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 440 const FormatToken &InitialToken) { 441 switch (Style.BreakBeforeBraces) { 442 case FormatStyle::BS_Linux: 443 return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class); 444 case FormatStyle::BS_Allman: 445 case FormatStyle::BS_GNU: 446 return true; 447 default: 448 return false; 449 } 450 } 451 452 void UnwrappedLineParser::parseChildBlock() { 453 FormatTok->BlockKind = BK_Block; 454 nextToken(); 455 { 456 bool GoogScope = 457 Style.Language == FormatStyle::LK_JavaScript && IsGoogScope(*Line); 458 ScopedLineState LineState(*this); 459 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 460 /*MustBeDeclaration=*/false); 461 Line->Level += GoogScope ? 0 : 1; 462 parseLevel(/*HasOpeningBrace=*/true); 463 Line->Level -= GoogScope ? 0 : 1; 464 } 465 nextToken(); 466 } 467 468 void UnwrappedLineParser::parsePPDirective() { 469 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 470 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); 471 nextToken(); 472 473 if (!FormatTok->Tok.getIdentifierInfo()) { 474 parsePPUnknown(); 475 return; 476 } 477 478 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 479 case tok::pp_define: 480 parsePPDefine(); 481 return; 482 case tok::pp_if: 483 parsePPIf(/*IfDef=*/false); 484 break; 485 case tok::pp_ifdef: 486 case tok::pp_ifndef: 487 parsePPIf(/*IfDef=*/true); 488 break; 489 case tok::pp_else: 490 parsePPElse(); 491 break; 492 case tok::pp_elif: 493 parsePPElIf(); 494 break; 495 case tok::pp_endif: 496 parsePPEndIf(); 497 break; 498 default: 499 parsePPUnknown(); 500 break; 501 } 502 } 503 504 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 505 if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) 506 PPStack.push_back(PP_Unreachable); 507 else 508 PPStack.push_back(PP_Conditional); 509 } 510 511 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 512 ++PPBranchLevel; 513 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 514 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 515 PPLevelBranchIndex.push_back(0); 516 PPLevelBranchCount.push_back(0); 517 } 518 PPChainBranchIndex.push(0); 519 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 520 conditionalCompilationCondition(Unreachable || Skip); 521 } 522 523 void UnwrappedLineParser::conditionalCompilationAlternative() { 524 if (!PPStack.empty()) 525 PPStack.pop_back(); 526 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 527 if (!PPChainBranchIndex.empty()) 528 ++PPChainBranchIndex.top(); 529 conditionalCompilationCondition( 530 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 531 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 532 } 533 534 void UnwrappedLineParser::conditionalCompilationEnd() { 535 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 536 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 537 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 538 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 539 } 540 } 541 // Guard against #endif's without #if. 542 if (PPBranchLevel > 0) 543 --PPBranchLevel; 544 if (!PPChainBranchIndex.empty()) 545 PPChainBranchIndex.pop(); 546 if (!PPStack.empty()) 547 PPStack.pop_back(); 548 } 549 550 void UnwrappedLineParser::parsePPIf(bool IfDef) { 551 nextToken(); 552 bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && 553 FormatTok->Tok.getLiteralData() != nullptr && 554 StringRef(FormatTok->Tok.getLiteralData(), 555 FormatTok->Tok.getLength()) == "0") || 556 FormatTok->Tok.is(tok::kw_false); 557 conditionalCompilationStart(!IfDef && IsLiteralFalse); 558 parsePPUnknown(); 559 } 560 561 void UnwrappedLineParser::parsePPElse() { 562 conditionalCompilationAlternative(); 563 parsePPUnknown(); 564 } 565 566 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 567 568 void UnwrappedLineParser::parsePPEndIf() { 569 conditionalCompilationEnd(); 570 parsePPUnknown(); 571 } 572 573 void UnwrappedLineParser::parsePPDefine() { 574 nextToken(); 575 576 if (FormatTok->Tok.getKind() != tok::identifier) { 577 parsePPUnknown(); 578 return; 579 } 580 nextToken(); 581 if (FormatTok->Tok.getKind() == tok::l_paren && 582 FormatTok->WhitespaceRange.getBegin() == 583 FormatTok->WhitespaceRange.getEnd()) { 584 parseParens(); 585 } 586 addUnwrappedLine(); 587 Line->Level = 1; 588 589 // Errors during a preprocessor directive can only affect the layout of the 590 // preprocessor directive, and thus we ignore them. An alternative approach 591 // would be to use the same approach we use on the file level (no 592 // re-indentation if there was a structural error) within the macro 593 // definition. 594 parseFile(); 595 } 596 597 void UnwrappedLineParser::parsePPUnknown() { 598 do { 599 nextToken(); 600 } while (!eof()); 601 addUnwrappedLine(); 602 } 603 604 // Here we blacklist certain tokens that are not usually the first token in an 605 // unwrapped line. This is used in attempt to distinguish macro calls without 606 // trailing semicolons from other constructs split to several lines. 607 bool tokenCanStartNewLine(clang::Token Tok) { 608 // Semicolon can be a null-statement, l_square can be a start of a macro or 609 // a C++11 attribute, but this doesn't seem to be common. 610 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 611 Tok.isNot(tok::l_square) && 612 // Tokens that can only be used as binary operators and a part of 613 // overloaded operator names. 614 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 615 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 616 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 617 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 618 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 619 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 620 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 621 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 622 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 623 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 624 Tok.isNot(tok::lesslessequal) && 625 // Colon is used in labels, base class lists, initializer lists, 626 // range-based for loops, ternary operator, but should never be the 627 // first token in an unwrapped line. 628 Tok.isNot(tok::colon) && 629 // 'noexcept' is a trailing annotation. 630 Tok.isNot(tok::kw_noexcept); 631 } 632 633 void UnwrappedLineParser::parseStructuralElement() { 634 assert(!FormatTok->Tok.is(tok::l_brace)); 635 switch (FormatTok->Tok.getKind()) { 636 case tok::at: 637 nextToken(); 638 if (FormatTok->Tok.is(tok::l_brace)) { 639 parseBracedList(); 640 break; 641 } 642 switch (FormatTok->Tok.getObjCKeywordID()) { 643 case tok::objc_public: 644 case tok::objc_protected: 645 case tok::objc_package: 646 case tok::objc_private: 647 return parseAccessSpecifier(); 648 case tok::objc_interface: 649 case tok::objc_implementation: 650 return parseObjCInterfaceOrImplementation(); 651 case tok::objc_protocol: 652 return parseObjCProtocol(); 653 case tok::objc_end: 654 return; // Handled by the caller. 655 case tok::objc_optional: 656 case tok::objc_required: 657 nextToken(); 658 addUnwrappedLine(); 659 return; 660 case tok::objc_try: 661 // This branch isn't strictly necessary (the kw_try case below would 662 // do this too after the tok::at is parsed above). But be explicit. 663 parseTryCatch(); 664 return; 665 default: 666 break; 667 } 668 break; 669 case tok::kw_asm: 670 nextToken(); 671 if (FormatTok->is(tok::l_brace)) { 672 nextToken(); 673 while (FormatTok && FormatTok->isNot(tok::eof)) { 674 if (FormatTok->is(tok::r_brace)) { 675 nextToken(); 676 break; 677 } 678 FormatTok->Finalized = true; 679 nextToken(); 680 } 681 } 682 break; 683 case tok::kw_namespace: 684 parseNamespace(); 685 return; 686 case tok::kw_inline: 687 nextToken(); 688 if (FormatTok->Tok.is(tok::kw_namespace)) { 689 parseNamespace(); 690 return; 691 } 692 break; 693 case tok::kw_public: 694 case tok::kw_protected: 695 case tok::kw_private: 696 if (Style.Language == FormatStyle::LK_Java) 697 nextToken(); 698 else 699 parseAccessSpecifier(); 700 return; 701 case tok::kw_if: 702 parseIfThenElse(); 703 return; 704 case tok::kw_for: 705 case tok::kw_while: 706 parseForOrWhileLoop(); 707 return; 708 case tok::kw_do: 709 parseDoWhile(); 710 return; 711 case tok::kw_switch: 712 parseSwitch(); 713 return; 714 case tok::kw_default: 715 nextToken(); 716 parseLabel(); 717 return; 718 case tok::kw_case: 719 parseCaseLabel(); 720 return; 721 case tok::kw_try: 722 case tok::kw___try: 723 parseTryCatch(); 724 return; 725 case tok::kw_extern: 726 nextToken(); 727 if (FormatTok->Tok.is(tok::string_literal)) { 728 nextToken(); 729 if (FormatTok->Tok.is(tok::l_brace)) { 730 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 731 addUnwrappedLine(); 732 return; 733 } 734 } 735 break; 736 case tok::identifier: 737 if (FormatTok->IsForEachMacro) { 738 parseForOrWhileLoop(); 739 return; 740 } 741 // In all other cases, parse the declaration. 742 break; 743 default: 744 break; 745 } 746 do { 747 switch (FormatTok->Tok.getKind()) { 748 case tok::at: 749 nextToken(); 750 if (FormatTok->Tok.is(tok::l_brace)) 751 parseBracedList(); 752 break; 753 case tok::kw_enum: 754 parseEnum(); 755 break; 756 case tok::kw_typedef: 757 nextToken(); 758 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 759 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 760 parseEnum(); 761 break; 762 case tok::kw_struct: 763 case tok::kw_union: 764 case tok::kw_class: 765 parseRecord(); 766 // A record declaration or definition is always the start of a structural 767 // element. 768 break; 769 case tok::period: 770 nextToken(); 771 // In Java, classes have an implicit static member "class". 772 if (Style.Language == FormatStyle::LK_Java && FormatTok && 773 FormatTok->is(tok::kw_class)) 774 nextToken(); 775 break; 776 case tok::semi: 777 nextToken(); 778 addUnwrappedLine(); 779 return; 780 case tok::r_brace: 781 addUnwrappedLine(); 782 return; 783 case tok::l_paren: 784 parseParens(); 785 break; 786 case tok::caret: 787 nextToken(); 788 if (FormatTok->Tok.isAnyIdentifier() || 789 FormatTok->isSimpleTypeSpecifier()) 790 nextToken(); 791 if (FormatTok->is(tok::l_paren)) 792 parseParens(); 793 if (FormatTok->is(tok::l_brace)) 794 parseChildBlock(); 795 break; 796 case tok::l_brace: 797 if (!tryToParseBracedList()) { 798 // A block outside of parentheses must be the last part of a 799 // structural element. 800 // FIXME: Figure out cases where this is not true, and add projections 801 // for them (the one we know is missing are lambdas). 802 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) 803 addUnwrappedLine(); 804 FormatTok->Type = TT_FunctionLBrace; 805 parseBlock(/*MustBeDeclaration=*/false); 806 addUnwrappedLine(); 807 return; 808 } 809 // Otherwise this was a braced init list, and the structural 810 // element continues. 811 break; 812 case tok::kw_try: 813 // We arrive here when parsing function-try blocks. 814 parseTryCatch(); 815 return; 816 case tok::identifier: { 817 StringRef Text = FormatTok->TokenText; 818 // Parse function literal unless 'function' is the first token in a line 819 // in which case this should be treated as a free-standing function. 820 if (Style.Language == FormatStyle::LK_JavaScript && Text == "function" && 821 Line->Tokens.size() > 0) { 822 tryToParseJSFunction(); 823 break; 824 } 825 nextToken(); 826 if (Line->Tokens.size() == 1) { 827 if (FormatTok->Tok.is(tok::colon)) { 828 parseLabel(); 829 return; 830 } 831 // Recognize function-like macro usages without trailing semicolon as 832 // well as free-standing macrose like Q_OBJECT. 833 bool FunctionLike = FormatTok->is(tok::l_paren); 834 if (FunctionLike) 835 parseParens(); 836 if (FormatTok->NewlinesBefore > 0 && 837 (Text.size() >= 5 || FunctionLike) && 838 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 839 addUnwrappedLine(); 840 return; 841 } 842 } 843 break; 844 } 845 case tok::equal: 846 nextToken(); 847 if (FormatTok->Tok.is(tok::l_brace)) { 848 parseBracedList(); 849 } 850 break; 851 case tok::l_square: 852 parseSquare(); 853 break; 854 default: 855 nextToken(); 856 break; 857 } 858 } while (!eof()); 859 } 860 861 bool UnwrappedLineParser::tryToParseLambda() { 862 // FIXME: This is a dirty way to access the previous token. Find a better 863 // solution. 864 if (!Line->Tokens.empty() && 865 (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator, 866 tok::kw_new, tok::kw_delete) || 867 Line->Tokens.back().Tok->closesScope() || 868 Line->Tokens.back().Tok->isSimpleTypeSpecifier())) { 869 nextToken(); 870 return false; 871 } 872 assert(FormatTok->is(tok::l_square)); 873 FormatToken &LSquare = *FormatTok; 874 if (!tryToParseLambdaIntroducer()) 875 return false; 876 877 while (FormatTok->isNot(tok::l_brace)) { 878 if (FormatTok->isSimpleTypeSpecifier()) { 879 nextToken(); 880 continue; 881 } 882 switch (FormatTok->Tok.getKind()) { 883 case tok::l_brace: 884 break; 885 case tok::l_paren: 886 parseParens(); 887 break; 888 case tok::amp: 889 case tok::star: 890 case tok::kw_const: 891 case tok::comma: 892 case tok::less: 893 case tok::greater: 894 case tok::identifier: 895 case tok::coloncolon: 896 case tok::kw_mutable: 897 nextToken(); 898 break; 899 case tok::arrow: 900 FormatTok->Type = TT_TrailingReturnArrow; 901 nextToken(); 902 break; 903 default: 904 return true; 905 } 906 } 907 LSquare.Type = TT_LambdaLSquare; 908 parseChildBlock(); 909 return true; 910 } 911 912 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 913 nextToken(); 914 if (FormatTok->is(tok::equal)) { 915 nextToken(); 916 if (FormatTok->is(tok::r_square)) { 917 nextToken(); 918 return true; 919 } 920 if (FormatTok->isNot(tok::comma)) 921 return false; 922 nextToken(); 923 } else if (FormatTok->is(tok::amp)) { 924 nextToken(); 925 if (FormatTok->is(tok::r_square)) { 926 nextToken(); 927 return true; 928 } 929 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 930 return false; 931 } 932 if (FormatTok->is(tok::comma)) 933 nextToken(); 934 } else if (FormatTok->is(tok::r_square)) { 935 nextToken(); 936 return true; 937 } 938 do { 939 if (FormatTok->is(tok::amp)) 940 nextToken(); 941 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) 942 return false; 943 nextToken(); 944 if (FormatTok->is(tok::ellipsis)) 945 nextToken(); 946 if (FormatTok->is(tok::comma)) { 947 nextToken(); 948 } else if (FormatTok->is(tok::r_square)) { 949 nextToken(); 950 return true; 951 } else { 952 return false; 953 } 954 } while (!eof()); 955 return false; 956 } 957 958 void UnwrappedLineParser::tryToParseJSFunction() { 959 nextToken(); 960 961 // Consume function name. 962 if (FormatTok->is(tok::identifier)) 963 nextToken(); 964 965 if (FormatTok->isNot(tok::l_paren)) 966 return; 967 nextToken(); 968 while (FormatTok->isNot(tok::l_brace)) { 969 // Err on the side of caution in order to avoid consuming the full file in 970 // case of incomplete code. 971 if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren, 972 tok::comment)) 973 return; 974 nextToken(); 975 } 976 parseChildBlock(); 977 } 978 979 bool UnwrappedLineParser::tryToParseBracedList() { 980 if (FormatTok->BlockKind == BK_Unknown) 981 calculateBraceTypes(); 982 assert(FormatTok->BlockKind != BK_Unknown); 983 if (FormatTok->BlockKind == BK_Block) 984 return false; 985 parseBracedList(); 986 return true; 987 } 988 989 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { 990 bool HasError = false; 991 nextToken(); 992 993 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 994 // replace this by using parseAssigmentExpression() inside. 995 do { 996 if (Style.Language == FormatStyle::LK_JavaScript && 997 FormatTok->is(Keywords.kw_function)) { 998 tryToParseJSFunction(); 999 continue; 1000 } 1001 switch (FormatTok->Tok.getKind()) { 1002 case tok::caret: 1003 nextToken(); 1004 if (FormatTok->is(tok::l_brace)) { 1005 parseChildBlock(); 1006 } 1007 break; 1008 case tok::l_square: 1009 tryToParseLambda(); 1010 break; 1011 case tok::l_brace: 1012 // Assume there are no blocks inside a braced init list apart 1013 // from the ones we explicitly parse out (like lambdas). 1014 FormatTok->BlockKind = BK_BracedInit; 1015 parseBracedList(); 1016 break; 1017 case tok::r_brace: 1018 nextToken(); 1019 return !HasError; 1020 case tok::semi: 1021 HasError = true; 1022 if (!ContinueOnSemicolons) 1023 return !HasError; 1024 nextToken(); 1025 break; 1026 case tok::comma: 1027 nextToken(); 1028 break; 1029 default: 1030 nextToken(); 1031 break; 1032 } 1033 } while (!eof()); 1034 return false; 1035 } 1036 1037 void UnwrappedLineParser::parseParens() { 1038 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1039 nextToken(); 1040 do { 1041 switch (FormatTok->Tok.getKind()) { 1042 case tok::l_paren: 1043 parseParens(); 1044 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1045 parseChildBlock(); 1046 break; 1047 case tok::r_paren: 1048 nextToken(); 1049 return; 1050 case tok::r_brace: 1051 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1052 return; 1053 case tok::l_square: 1054 tryToParseLambda(); 1055 break; 1056 case tok::l_brace: 1057 if (!tryToParseBracedList()) { 1058 parseChildBlock(); 1059 } 1060 break; 1061 case tok::at: 1062 nextToken(); 1063 if (FormatTok->Tok.is(tok::l_brace)) 1064 parseBracedList(); 1065 break; 1066 case tok::identifier: 1067 if (Style.Language == FormatStyle::LK_JavaScript && 1068 FormatTok->is(Keywords.kw_function)) 1069 tryToParseJSFunction(); 1070 else 1071 nextToken(); 1072 break; 1073 default: 1074 nextToken(); 1075 break; 1076 } 1077 } while (!eof()); 1078 } 1079 1080 void UnwrappedLineParser::parseSquare() { 1081 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1082 if (tryToParseLambda()) 1083 return; 1084 do { 1085 switch (FormatTok->Tok.getKind()) { 1086 case tok::l_paren: 1087 parseParens(); 1088 break; 1089 case tok::r_square: 1090 nextToken(); 1091 return; 1092 case tok::r_brace: 1093 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1094 return; 1095 case tok::l_square: 1096 parseSquare(); 1097 break; 1098 case tok::l_brace: { 1099 if (!tryToParseBracedList()) { 1100 parseChildBlock(); 1101 } 1102 break; 1103 } 1104 case tok::at: 1105 nextToken(); 1106 if (FormatTok->Tok.is(tok::l_brace)) 1107 parseBracedList(); 1108 break; 1109 default: 1110 nextToken(); 1111 break; 1112 } 1113 } while (!eof()); 1114 } 1115 1116 void UnwrappedLineParser::parseIfThenElse() { 1117 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1118 nextToken(); 1119 if (FormatTok->Tok.is(tok::l_paren)) 1120 parseParens(); 1121 bool NeedsUnwrappedLine = false; 1122 if (FormatTok->Tok.is(tok::l_brace)) { 1123 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1124 parseBlock(/*MustBeDeclaration=*/false); 1125 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1126 Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 1127 addUnwrappedLine(); 1128 } else { 1129 NeedsUnwrappedLine = true; 1130 } 1131 } else { 1132 addUnwrappedLine(); 1133 ++Line->Level; 1134 parseStructuralElement(); 1135 --Line->Level; 1136 } 1137 if (FormatTok->Tok.is(tok::kw_else)) { 1138 if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) 1139 addUnwrappedLine(); 1140 nextToken(); 1141 if (FormatTok->Tok.is(tok::l_brace)) { 1142 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1143 parseBlock(/*MustBeDeclaration=*/false); 1144 addUnwrappedLine(); 1145 } else if (FormatTok->Tok.is(tok::kw_if)) { 1146 parseIfThenElse(); 1147 } else { 1148 addUnwrappedLine(); 1149 ++Line->Level; 1150 parseStructuralElement(); 1151 --Line->Level; 1152 } 1153 } else if (NeedsUnwrappedLine) { 1154 addUnwrappedLine(); 1155 } 1156 } 1157 1158 void UnwrappedLineParser::parseTryCatch() { 1159 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1160 nextToken(); 1161 bool NeedsUnwrappedLine = false; 1162 if (FormatTok->is(tok::colon)) { 1163 // We are in a function try block, what comes is an initializer list. 1164 nextToken(); 1165 while (FormatTok->is(tok::identifier)) { 1166 nextToken(); 1167 if (FormatTok->is(tok::l_paren)) 1168 parseParens(); 1169 else 1170 StructuralError = true; 1171 if (FormatTok->is(tok::comma)) 1172 nextToken(); 1173 } 1174 } 1175 // Parse try with resource. 1176 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1177 parseParens(); 1178 } 1179 if (FormatTok->is(tok::l_brace)) { 1180 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1181 parseBlock(/*MustBeDeclaration=*/false); 1182 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1183 Style.BreakBeforeBraces == FormatStyle::BS_GNU || 1184 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { 1185 addUnwrappedLine(); 1186 } else { 1187 NeedsUnwrappedLine = true; 1188 } 1189 } else if (!FormatTok->is(tok::kw_catch)) { 1190 // The C++ standard requires a compound-statement after a try. 1191 // If there's none, we try to assume there's a structuralElement 1192 // and try to continue. 1193 StructuralError = true; 1194 addUnwrappedLine(); 1195 ++Line->Level; 1196 parseStructuralElement(); 1197 --Line->Level; 1198 } 1199 while (1) { 1200 if (FormatTok->is(tok::at)) 1201 nextToken(); 1202 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1203 tok::kw___finally) || 1204 ((Style.Language == FormatStyle::LK_Java || 1205 Style.Language == FormatStyle::LK_JavaScript) && 1206 FormatTok->is(Keywords.kw_finally)) || 1207 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1208 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1209 break; 1210 nextToken(); 1211 while (FormatTok->isNot(tok::l_brace)) { 1212 if (FormatTok->is(tok::l_paren)) { 1213 parseParens(); 1214 continue; 1215 } 1216 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1217 return; 1218 nextToken(); 1219 } 1220 NeedsUnwrappedLine = false; 1221 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1222 parseBlock(/*MustBeDeclaration=*/false); 1223 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1224 Style.BreakBeforeBraces == FormatStyle::BS_GNU || 1225 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { 1226 addUnwrappedLine(); 1227 } else { 1228 NeedsUnwrappedLine = true; 1229 } 1230 } 1231 if (NeedsUnwrappedLine) { 1232 addUnwrappedLine(); 1233 } 1234 } 1235 1236 void UnwrappedLineParser::parseNamespace() { 1237 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1238 1239 const FormatToken &InitialToken = *FormatTok; 1240 nextToken(); 1241 if (FormatTok->Tok.is(tok::identifier)) 1242 nextToken(); 1243 if (FormatTok->Tok.is(tok::l_brace)) { 1244 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1245 addUnwrappedLine(); 1246 1247 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1248 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1249 DeclarationScopeStack.size() > 1); 1250 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1251 // Munch the semicolon after a namespace. This is more common than one would 1252 // think. Puttin the semicolon into its own line is very ugly. 1253 if (FormatTok->Tok.is(tok::semi)) 1254 nextToken(); 1255 addUnwrappedLine(); 1256 } 1257 // FIXME: Add error handling. 1258 } 1259 1260 void UnwrappedLineParser::parseForOrWhileLoop() { 1261 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while) || 1262 FormatTok->IsForEachMacro) && 1263 "'for', 'while' or foreach macro expected"); 1264 nextToken(); 1265 if (FormatTok->Tok.is(tok::l_paren)) 1266 parseParens(); 1267 if (FormatTok->Tok.is(tok::l_brace)) { 1268 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1269 parseBlock(/*MustBeDeclaration=*/false); 1270 addUnwrappedLine(); 1271 } else { 1272 addUnwrappedLine(); 1273 ++Line->Level; 1274 parseStructuralElement(); 1275 --Line->Level; 1276 } 1277 } 1278 1279 void UnwrappedLineParser::parseDoWhile() { 1280 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1281 nextToken(); 1282 if (FormatTok->Tok.is(tok::l_brace)) { 1283 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1284 parseBlock(/*MustBeDeclaration=*/false); 1285 if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) 1286 addUnwrappedLine(); 1287 } else { 1288 addUnwrappedLine(); 1289 ++Line->Level; 1290 parseStructuralElement(); 1291 --Line->Level; 1292 } 1293 1294 // FIXME: Add error handling. 1295 if (!FormatTok->Tok.is(tok::kw_while)) { 1296 addUnwrappedLine(); 1297 return; 1298 } 1299 1300 nextToken(); 1301 parseStructuralElement(); 1302 } 1303 1304 void UnwrappedLineParser::parseLabel() { 1305 nextToken(); 1306 unsigned OldLineLevel = Line->Level; 1307 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1308 --Line->Level; 1309 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1310 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1311 parseBlock(/*MustBeDeclaration=*/false); 1312 if (FormatTok->Tok.is(tok::kw_break)) { 1313 // "break;" after "}" on its own line only for BS_Allman and BS_GNU 1314 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1315 Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 1316 addUnwrappedLine(); 1317 } 1318 parseStructuralElement(); 1319 } 1320 addUnwrappedLine(); 1321 } else { 1322 addUnwrappedLine(); 1323 } 1324 Line->Level = OldLineLevel; 1325 } 1326 1327 void UnwrappedLineParser::parseCaseLabel() { 1328 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1329 // FIXME: fix handling of complex expressions here. 1330 do { 1331 nextToken(); 1332 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1333 parseLabel(); 1334 } 1335 1336 void UnwrappedLineParser::parseSwitch() { 1337 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1338 nextToken(); 1339 if (FormatTok->Tok.is(tok::l_paren)) 1340 parseParens(); 1341 if (FormatTok->Tok.is(tok::l_brace)) { 1342 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1343 parseBlock(/*MustBeDeclaration=*/false); 1344 addUnwrappedLine(); 1345 } else { 1346 addUnwrappedLine(); 1347 ++Line->Level; 1348 parseStructuralElement(); 1349 --Line->Level; 1350 } 1351 } 1352 1353 void UnwrappedLineParser::parseAccessSpecifier() { 1354 nextToken(); 1355 // Understand Qt's slots. 1356 if (FormatTok->is(tok::identifier) && 1357 (FormatTok->TokenText == "slots" || FormatTok->TokenText == "Q_SLOTS")) 1358 nextToken(); 1359 // Otherwise, we don't know what it is, and we'd better keep the next token. 1360 if (FormatTok->Tok.is(tok::colon)) 1361 nextToken(); 1362 addUnwrappedLine(); 1363 } 1364 1365 void UnwrappedLineParser::parseEnum() { 1366 // Won't be 'enum' for NS_ENUMs. 1367 if (FormatTok->Tok.is(tok::kw_enum)) 1368 nextToken(); 1369 1370 // Eat up enum class ... 1371 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1372 nextToken(); 1373 while (FormatTok->Tok.getIdentifierInfo() || 1374 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1375 tok::greater, tok::comma, tok::question)) { 1376 nextToken(); 1377 // We can have macros or attributes in between 'enum' and the enum name. 1378 if (FormatTok->is(tok::l_paren)) 1379 parseParens(); 1380 if (FormatTok->is(tok::identifier)) 1381 nextToken(); 1382 } 1383 1384 // Just a declaration or something is wrong. 1385 if (FormatTok->isNot(tok::l_brace)) 1386 return; 1387 FormatTok->BlockKind = BK_Block; 1388 1389 if (Style.Language == FormatStyle::LK_Java) { 1390 // Java enums are different. 1391 parseJavaEnumBody(); 1392 return; 1393 } 1394 1395 // Parse enum body. 1396 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1397 if (HasError) { 1398 if (FormatTok->is(tok::semi)) 1399 nextToken(); 1400 addUnwrappedLine(); 1401 } 1402 1403 // We fall through to parsing a structural element afterwards, so that in 1404 // enum A {} n, m; 1405 // "} n, m;" will end up in one unwrapped line. 1406 } 1407 1408 void UnwrappedLineParser::parseJavaEnumBody() { 1409 // Determine whether the enum is simple, i.e. does not have a semicolon or 1410 // constants with class bodies. Simple enums can be formatted like braced 1411 // lists, contracted to a single line, etc. 1412 unsigned StoredPosition = Tokens->getPosition(); 1413 bool IsSimple = true; 1414 FormatToken *Tok = Tokens->getNextToken(); 1415 while (Tok) { 1416 if (Tok->is(tok::r_brace)) 1417 break; 1418 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1419 IsSimple = false; 1420 break; 1421 } 1422 // FIXME: This will also mark enums with braces in the arguments to enum 1423 // constants as "not simple". This is probably fine in practice, though. 1424 Tok = Tokens->getNextToken(); 1425 } 1426 FormatTok = Tokens->setPosition(StoredPosition); 1427 1428 if (IsSimple) { 1429 parseBracedList(); 1430 addUnwrappedLine(); 1431 return; 1432 } 1433 1434 // Parse the body of a more complex enum. 1435 // First add a line for everything up to the "{". 1436 nextToken(); 1437 addUnwrappedLine(); 1438 ++Line->Level; 1439 1440 // Parse the enum constants. 1441 while (FormatTok) { 1442 if (FormatTok->is(tok::l_brace)) { 1443 // Parse the constant's class body. 1444 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1445 /*MunchSemi=*/false); 1446 } else if (FormatTok->is(tok::l_paren)) { 1447 parseParens(); 1448 } else if (FormatTok->is(tok::comma)) { 1449 nextToken(); 1450 addUnwrappedLine(); 1451 } else if (FormatTok->is(tok::semi)) { 1452 nextToken(); 1453 addUnwrappedLine(); 1454 break; 1455 } else if (FormatTok->is(tok::r_brace)) { 1456 addUnwrappedLine(); 1457 break; 1458 } else { 1459 nextToken(); 1460 } 1461 } 1462 1463 // Parse the class body after the enum's ";" if any. 1464 parseLevel(/*HasOpeningBrace=*/true); 1465 nextToken(); 1466 --Line->Level; 1467 addUnwrappedLine(); 1468 } 1469 1470 void UnwrappedLineParser::parseRecord() { 1471 const FormatToken &InitialToken = *FormatTok; 1472 nextToken(); 1473 if (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute, 1474 tok::kw___declspec, tok::kw_alignas)) { 1475 nextToken(); 1476 // We can have macros or attributes in between 'class' and the class name. 1477 if (FormatTok->Tok.is(tok::l_paren)) { 1478 parseParens(); 1479 } 1480 // The actual identifier can be a nested name specifier, and in macros 1481 // it is often token-pasted. 1482 while (FormatTok->is(tok::identifier) || FormatTok->is(tok::coloncolon) || 1483 FormatTok->is(tok::hashhash) || 1484 (Style.Language == FormatStyle::LK_Java && 1485 FormatTok->isOneOf(tok::period, tok::comma))) 1486 nextToken(); 1487 1488 // Note that parsing away template declarations here leads to incorrectly 1489 // accepting function declarations as record declarations. 1490 // In general, we cannot solve this problem. Consider: 1491 // class A<int> B() {} 1492 // which can be a function definition or a class definition when B() is a 1493 // macro. If we find enough real-world cases where this is a problem, we 1494 // can parse for the 'template' keyword in the beginning of the statement, 1495 // and thus rule out the record production in case there is no template 1496 // (this would still leave us with an ambiguity between template function 1497 // and class declarations). 1498 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { 1499 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { 1500 if (FormatTok->Tok.is(tok::semi)) 1501 return; 1502 nextToken(); 1503 } 1504 } 1505 } 1506 if (FormatTok->Tok.is(tok::l_brace)) { 1507 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1508 addUnwrappedLine(); 1509 1510 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1511 /*MunchSemi=*/false); 1512 } 1513 // We fall through to parsing a structural element afterwards, so 1514 // class A {} n, m; 1515 // will end up in one unwrapped line. 1516 // This does not apply for Java. 1517 if (Style.Language == FormatStyle::LK_Java) 1518 addUnwrappedLine(); 1519 } 1520 1521 void UnwrappedLineParser::parseObjCProtocolList() { 1522 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 1523 do 1524 nextToken(); 1525 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 1526 nextToken(); // Skip '>'. 1527 } 1528 1529 void UnwrappedLineParser::parseObjCUntilAtEnd() { 1530 do { 1531 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 1532 nextToken(); 1533 addUnwrappedLine(); 1534 break; 1535 } 1536 if (FormatTok->is(tok::l_brace)) { 1537 parseBlock(/*MustBeDeclaration=*/false); 1538 // In ObjC interfaces, nothing should be following the "}". 1539 addUnwrappedLine(); 1540 } else if (FormatTok->is(tok::r_brace)) { 1541 // Ignore stray "}". parseStructuralElement doesn't consume them. 1542 nextToken(); 1543 addUnwrappedLine(); 1544 } else { 1545 parseStructuralElement(); 1546 } 1547 } while (!eof()); 1548 } 1549 1550 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 1551 nextToken(); 1552 nextToken(); // interface name 1553 1554 // @interface can be followed by either a base class, or a category. 1555 if (FormatTok->Tok.is(tok::colon)) { 1556 nextToken(); 1557 nextToken(); // base class name 1558 } else if (FormatTok->Tok.is(tok::l_paren)) 1559 // Skip category, if present. 1560 parseParens(); 1561 1562 if (FormatTok->Tok.is(tok::less)) 1563 parseObjCProtocolList(); 1564 1565 if (FormatTok->Tok.is(tok::l_brace)) { 1566 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1567 Style.BreakBeforeBraces == FormatStyle::BS_GNU) 1568 addUnwrappedLine(); 1569 parseBlock(/*MustBeDeclaration=*/true); 1570 } 1571 1572 // With instance variables, this puts '}' on its own line. Without instance 1573 // variables, this ends the @interface line. 1574 addUnwrappedLine(); 1575 1576 parseObjCUntilAtEnd(); 1577 } 1578 1579 void UnwrappedLineParser::parseObjCProtocol() { 1580 nextToken(); 1581 nextToken(); // protocol name 1582 1583 if (FormatTok->Tok.is(tok::less)) 1584 parseObjCProtocolList(); 1585 1586 // Check for protocol declaration. 1587 if (FormatTok->Tok.is(tok::semi)) { 1588 nextToken(); 1589 return addUnwrappedLine(); 1590 } 1591 1592 addUnwrappedLine(); 1593 parseObjCUntilAtEnd(); 1594 } 1595 1596 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 1597 StringRef Prefix = "") { 1598 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 1599 << (Line.InPPDirective ? " MACRO" : "") << ": "; 1600 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 1601 E = Line.Tokens.end(); 1602 I != E; ++I) { 1603 llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] "; 1604 } 1605 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 1606 E = Line.Tokens.end(); 1607 I != E; ++I) { 1608 const UnwrappedLineNode &Node = *I; 1609 for (SmallVectorImpl<UnwrappedLine>::const_iterator 1610 I = Node.Children.begin(), 1611 E = Node.Children.end(); 1612 I != E; ++I) { 1613 printDebugInfo(*I, "\nChild: "); 1614 } 1615 } 1616 llvm::dbgs() << "\n"; 1617 } 1618 1619 void UnwrappedLineParser::addUnwrappedLine() { 1620 if (Line->Tokens.empty()) 1621 return; 1622 DEBUG({ 1623 if (CurrentLines == &Lines) 1624 printDebugInfo(*Line); 1625 }); 1626 CurrentLines->push_back(*Line); 1627 Line->Tokens.clear(); 1628 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 1629 for (SmallVectorImpl<UnwrappedLine>::iterator 1630 I = PreprocessorDirectives.begin(), 1631 E = PreprocessorDirectives.end(); 1632 I != E; ++I) { 1633 CurrentLines->push_back(*I); 1634 } 1635 PreprocessorDirectives.clear(); 1636 } 1637 } 1638 1639 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 1640 1641 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 1642 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 1643 FormatTok.NewlinesBefore > 0; 1644 } 1645 1646 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 1647 bool JustComments = Line->Tokens.empty(); 1648 for (SmallVectorImpl<FormatToken *>::const_iterator 1649 I = CommentsBeforeNextToken.begin(), 1650 E = CommentsBeforeNextToken.end(); 1651 I != E; ++I) { 1652 if (isOnNewLine(**I) && JustComments) { 1653 addUnwrappedLine(); 1654 } 1655 pushToken(*I); 1656 } 1657 if (NewlineBeforeNext && JustComments) { 1658 addUnwrappedLine(); 1659 } 1660 CommentsBeforeNextToken.clear(); 1661 } 1662 1663 void UnwrappedLineParser::nextToken() { 1664 if (eof()) 1665 return; 1666 flushComments(isOnNewLine(*FormatTok)); 1667 pushToken(FormatTok); 1668 readToken(); 1669 } 1670 1671 void UnwrappedLineParser::readToken() { 1672 bool CommentsInCurrentLine = true; 1673 do { 1674 FormatTok = Tokens->getNextToken(); 1675 assert(FormatTok); 1676 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 1677 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 1678 // If there is an unfinished unwrapped line, we flush the preprocessor 1679 // directives only after that unwrapped line was finished later. 1680 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 1681 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 1682 // Comments stored before the preprocessor directive need to be output 1683 // before the preprocessor directive, at the same level as the 1684 // preprocessor directive, as we consider them to apply to the directive. 1685 flushComments(isOnNewLine(*FormatTok)); 1686 parsePPDirective(); 1687 } 1688 while (FormatTok->Type == TT_ConflictStart || 1689 FormatTok->Type == TT_ConflictEnd || 1690 FormatTok->Type == TT_ConflictAlternative) { 1691 if (FormatTok->Type == TT_ConflictStart) { 1692 conditionalCompilationStart(/*Unreachable=*/false); 1693 } else if (FormatTok->Type == TT_ConflictAlternative) { 1694 conditionalCompilationAlternative(); 1695 } else if (FormatTok->Type == TT_ConflictEnd) { 1696 conditionalCompilationEnd(); 1697 } 1698 FormatTok = Tokens->getNextToken(); 1699 FormatTok->MustBreakBefore = true; 1700 } 1701 1702 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 1703 !Line->InPPDirective) { 1704 continue; 1705 } 1706 1707 if (!FormatTok->Tok.is(tok::comment)) 1708 return; 1709 if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) { 1710 CommentsInCurrentLine = false; 1711 } 1712 if (CommentsInCurrentLine) { 1713 pushToken(FormatTok); 1714 } else { 1715 CommentsBeforeNextToken.push_back(FormatTok); 1716 } 1717 } while (!eof()); 1718 } 1719 1720 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 1721 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 1722 if (MustBreakBeforeNextToken) { 1723 Line->Tokens.back().Tok->MustBreakBefore = true; 1724 MustBreakBeforeNextToken = false; 1725 } 1726 } 1727 1728 } // end namespace format 1729 } // end namespace clang 1730