1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the implementation of the UnwrappedLineParser, 12 /// which turns a stream of tokens into UnwrappedLines. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "UnwrappedLineParser.h" 17 #include "llvm/Support/Debug.h" 18 19 #define DEBUG_TYPE "format-parser" 20 21 namespace clang { 22 namespace format { 23 24 class FormatTokenSource { 25 public: 26 virtual ~FormatTokenSource() {} 27 virtual FormatToken *getNextToken() = 0; 28 29 virtual unsigned getPosition() = 0; 30 virtual FormatToken *setPosition(unsigned Position) = 0; 31 }; 32 33 namespace { 34 35 class ScopedDeclarationState { 36 public: 37 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack, 38 bool MustBeDeclaration) 39 : Line(Line), Stack(Stack) { 40 Line.MustBeDeclaration = MustBeDeclaration; 41 Stack.push_back(MustBeDeclaration); 42 } 43 ~ScopedDeclarationState() { 44 Stack.pop_back(); 45 if (!Stack.empty()) 46 Line.MustBeDeclaration = Stack.back(); 47 else 48 Line.MustBeDeclaration = true; 49 } 50 51 private: 52 UnwrappedLine &Line; 53 std::vector<bool> &Stack; 54 }; 55 56 class ScopedMacroState : public FormatTokenSource { 57 public: 58 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource, 59 FormatToken *&ResetToken, bool &StructuralError) 60 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken), 61 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource), 62 StructuralError(StructuralError), 63 PreviousStructuralError(StructuralError), Token(nullptr) { 64 TokenSource = this; 65 Line.Level = 0; 66 Line.InPPDirective = true; 67 } 68 69 ~ScopedMacroState() { 70 TokenSource = PreviousTokenSource; 71 ResetToken = Token; 72 Line.InPPDirective = false; 73 Line.Level = PreviousLineLevel; 74 StructuralError = PreviousStructuralError; 75 } 76 77 FormatToken *getNextToken() override { 78 // The \c UnwrappedLineParser guards against this by never calling 79 // \c getNextToken() after it has encountered the first eof token. 80 assert(!eof()); 81 Token = PreviousTokenSource->getNextToken(); 82 if (eof()) 83 return getFakeEOF(); 84 return Token; 85 } 86 87 unsigned getPosition() override { return PreviousTokenSource->getPosition(); } 88 89 FormatToken *setPosition(unsigned Position) override { 90 Token = PreviousTokenSource->setPosition(Position); 91 return Token; 92 } 93 94 private: 95 bool eof() { return Token && Token->HasUnescapedNewline; } 96 97 FormatToken *getFakeEOF() { 98 static bool EOFInitialized = false; 99 static FormatToken FormatTok; 100 if (!EOFInitialized) { 101 FormatTok.Tok.startToken(); 102 FormatTok.Tok.setKind(tok::eof); 103 EOFInitialized = true; 104 } 105 return &FormatTok; 106 } 107 108 UnwrappedLine &Line; 109 FormatTokenSource *&TokenSource; 110 FormatToken *&ResetToken; 111 unsigned PreviousLineLevel; 112 FormatTokenSource *PreviousTokenSource; 113 bool &StructuralError; 114 bool PreviousStructuralError; 115 116 FormatToken *Token; 117 }; 118 119 } // end anonymous namespace 120 121 class ScopedLineState { 122 public: 123 ScopedLineState(UnwrappedLineParser &Parser, 124 bool SwitchToPreprocessorLines = false) 125 : Parser(Parser), OriginalLines(Parser.CurrentLines) { 126 if (SwitchToPreprocessorLines) 127 Parser.CurrentLines = &Parser.PreprocessorDirectives; 128 else if (!Parser.Line->Tokens.empty()) 129 Parser.CurrentLines = &Parser.Line->Tokens.back().Children; 130 PreBlockLine = std::move(Parser.Line); 131 Parser.Line = llvm::make_unique<UnwrappedLine>(); 132 Parser.Line->Level = PreBlockLine->Level; 133 Parser.Line->InPPDirective = PreBlockLine->InPPDirective; 134 } 135 136 ~ScopedLineState() { 137 if (!Parser.Line->Tokens.empty()) { 138 Parser.addUnwrappedLine(); 139 } 140 assert(Parser.Line->Tokens.empty()); 141 Parser.Line = std::move(PreBlockLine); 142 if (Parser.CurrentLines == &Parser.PreprocessorDirectives) 143 Parser.MustBreakBeforeNextToken = true; 144 Parser.CurrentLines = OriginalLines; 145 } 146 147 private: 148 UnwrappedLineParser &Parser; 149 150 std::unique_ptr<UnwrappedLine> PreBlockLine; 151 SmallVectorImpl<UnwrappedLine> *OriginalLines; 152 }; 153 154 class CompoundStatementIndenter { 155 public: 156 CompoundStatementIndenter(UnwrappedLineParser *Parser, 157 const FormatStyle &Style, unsigned &LineLevel) 158 : LineLevel(LineLevel), OldLineLevel(LineLevel) { 159 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) { 160 Parser->addUnwrappedLine(); 161 } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 162 Parser->addUnwrappedLine(); 163 ++LineLevel; 164 } 165 } 166 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; } 167 168 private: 169 unsigned &LineLevel; 170 unsigned OldLineLevel; 171 }; 172 173 namespace { 174 175 class IndexedTokenSource : public FormatTokenSource { 176 public: 177 IndexedTokenSource(ArrayRef<FormatToken *> Tokens) 178 : Tokens(Tokens), Position(-1) {} 179 180 FormatToken *getNextToken() override { 181 ++Position; 182 return Tokens[Position]; 183 } 184 185 unsigned getPosition() override { 186 assert(Position >= 0); 187 return Position; 188 } 189 190 FormatToken *setPosition(unsigned P) override { 191 Position = P; 192 return Tokens[Position]; 193 } 194 195 void reset() { Position = -1; } 196 197 private: 198 ArrayRef<FormatToken *> Tokens; 199 int Position; 200 }; 201 202 } // end anonymous namespace 203 204 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, 205 const AdditionalKeywords &Keywords, 206 ArrayRef<FormatToken *> Tokens, 207 UnwrappedLineConsumer &Callback) 208 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), 209 CurrentLines(&Lines), StructuralError(false), Style(Style), 210 Keywords(Keywords), Tokens(nullptr), Callback(Callback), 211 AllTokens(Tokens), PPBranchLevel(-1) {} 212 213 void UnwrappedLineParser::reset() { 214 PPBranchLevel = -1; 215 Line.reset(new UnwrappedLine); 216 CommentsBeforeNextToken.clear(); 217 FormatTok = nullptr; 218 MustBreakBeforeNextToken = false; 219 PreprocessorDirectives.clear(); 220 CurrentLines = &Lines; 221 DeclarationScopeStack.clear(); 222 StructuralError = false; 223 PPStack.clear(); 224 } 225 226 bool UnwrappedLineParser::parse() { 227 IndexedTokenSource TokenSource(AllTokens); 228 do { 229 DEBUG(llvm::dbgs() << "----\n"); 230 reset(); 231 Tokens = &TokenSource; 232 TokenSource.reset(); 233 234 readToken(); 235 parseFile(); 236 // Create line with eof token. 237 pushToken(FormatTok); 238 addUnwrappedLine(); 239 240 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(), 241 E = Lines.end(); 242 I != E; ++I) { 243 Callback.consumeUnwrappedLine(*I); 244 } 245 Callback.finishRun(); 246 Lines.clear(); 247 while (!PPLevelBranchIndex.empty() && 248 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) { 249 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1); 250 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1); 251 } 252 if (!PPLevelBranchIndex.empty()) { 253 ++PPLevelBranchIndex.back(); 254 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size()); 255 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back()); 256 } 257 } while (!PPLevelBranchIndex.empty()); 258 259 return StructuralError; 260 } 261 262 void UnwrappedLineParser::parseFile() { 263 ScopedDeclarationState DeclarationState( 264 *Line, DeclarationScopeStack, 265 /*MustBeDeclaration=*/!Line->InPPDirective); 266 parseLevel(/*HasOpeningBrace=*/false); 267 // Make sure to format the remaining tokens. 268 flushComments(true); 269 addUnwrappedLine(); 270 } 271 272 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) { 273 bool SwitchLabelEncountered = false; 274 do { 275 switch (FormatTok->Tok.getKind()) { 276 case tok::comment: 277 nextToken(); 278 addUnwrappedLine(); 279 break; 280 case tok::l_brace: 281 // FIXME: Add parameter whether this can happen - if this happens, we must 282 // be in a non-declaration context. 283 parseBlock(/*MustBeDeclaration=*/false); 284 addUnwrappedLine(); 285 break; 286 case tok::r_brace: 287 if (HasOpeningBrace) 288 return; 289 StructuralError = true; 290 nextToken(); 291 addUnwrappedLine(); 292 break; 293 case tok::kw_default: 294 case tok::kw_case: 295 if (!SwitchLabelEncountered && 296 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1))) 297 ++Line->Level; 298 SwitchLabelEncountered = true; 299 parseStructuralElement(); 300 break; 301 default: 302 parseStructuralElement(); 303 break; 304 } 305 } while (!eof()); 306 } 307 308 void UnwrappedLineParser::calculateBraceTypes() { 309 // We'll parse forward through the tokens until we hit 310 // a closing brace or eof - note that getNextToken() will 311 // parse macros, so this will magically work inside macro 312 // definitions, too. 313 unsigned StoredPosition = Tokens->getPosition(); 314 FormatToken *Tok = FormatTok; 315 // Keep a stack of positions of lbrace tokens. We will 316 // update information about whether an lbrace starts a 317 // braced init list or a different block during the loop. 318 SmallVector<FormatToken *, 8> LBraceStack; 319 assert(Tok->Tok.is(tok::l_brace)); 320 do { 321 // Get next none-comment token. 322 FormatToken *NextTok; 323 unsigned ReadTokens = 0; 324 do { 325 NextTok = Tokens->getNextToken(); 326 ++ReadTokens; 327 } while (NextTok->is(tok::comment)); 328 329 switch (Tok->Tok.getKind()) { 330 case tok::l_brace: 331 LBraceStack.push_back(Tok); 332 break; 333 case tok::r_brace: 334 if (!LBraceStack.empty()) { 335 if (LBraceStack.back()->BlockKind == BK_Unknown) { 336 bool ProbablyBracedList = false; 337 if (Style.Language == FormatStyle::LK_Proto) { 338 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square); 339 } else { 340 // Using OriginalColumn to distinguish between ObjC methods and 341 // binary operators is a bit hacky. 342 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) && 343 NextTok->OriginalColumn == 0; 344 345 // If there is a comma, semicolon or right paren after the closing 346 // brace, we assume this is a braced initializer list. Note that 347 // regardless how we mark inner braces here, we will overwrite the 348 // BlockKind later if we parse a braced list (where all blocks 349 // inside are by default braced lists), or when we explicitly detect 350 // blocks (for example while parsing lambdas). 351 // 352 // We exclude + and - as they can be ObjC visibility modifiers. 353 ProbablyBracedList = 354 NextTok->isOneOf(tok::comma, tok::semi, tok::period, tok::colon, 355 tok::r_paren, tok::r_square, tok::l_brace, 356 tok::l_paren, tok::ellipsis) || 357 (NextTok->isBinaryOperator() && !NextIsObjCMethod); 358 } 359 if (ProbablyBracedList) { 360 Tok->BlockKind = BK_BracedInit; 361 LBraceStack.back()->BlockKind = BK_BracedInit; 362 } else { 363 Tok->BlockKind = BK_Block; 364 LBraceStack.back()->BlockKind = BK_Block; 365 } 366 } 367 LBraceStack.pop_back(); 368 } 369 break; 370 case tok::at: 371 case tok::semi: 372 case tok::kw_if: 373 case tok::kw_while: 374 case tok::kw_for: 375 case tok::kw_switch: 376 case tok::kw_try: 377 case tok::kw___try: 378 if (!LBraceStack.empty()) 379 LBraceStack.back()->BlockKind = BK_Block; 380 break; 381 default: 382 break; 383 } 384 Tok = NextTok; 385 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty()); 386 // Assume other blocks for all unclosed opening braces. 387 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) { 388 if (LBraceStack[i]->BlockKind == BK_Unknown) 389 LBraceStack[i]->BlockKind = BK_Block; 390 } 391 392 FormatTok = Tokens->setPosition(StoredPosition); 393 } 394 395 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel, 396 bool MunchSemi) { 397 assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected"); 398 unsigned InitialLevel = Line->Level; 399 nextToken(); 400 401 addUnwrappedLine(); 402 403 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 404 MustBeDeclaration); 405 if (AddLevel) 406 ++Line->Level; 407 parseLevel(/*HasOpeningBrace=*/true); 408 409 if (!FormatTok->Tok.is(tok::r_brace)) { 410 Line->Level = InitialLevel; 411 StructuralError = true; 412 return; 413 } 414 415 nextToken(); // Munch the closing brace. 416 if (MunchSemi && FormatTok->Tok.is(tok::semi)) 417 nextToken(); 418 Line->Level = InitialLevel; 419 } 420 421 static bool IsGoogScope(const UnwrappedLine &Line) { 422 // FIXME: Closure-library specific stuff should not be hard-coded but be 423 // configurable. 424 if (Line.Tokens.size() < 4) 425 return false; 426 auto I = Line.Tokens.begin(); 427 if (I->Tok->TokenText != "goog") 428 return false; 429 ++I; 430 if (I->Tok->isNot(tok::period)) 431 return false; 432 ++I; 433 if (I->Tok->TokenText != "scope") 434 return false; 435 ++I; 436 return I->Tok->is(tok::l_paren); 437 } 438 439 static bool ShouldBreakBeforeBrace(const FormatStyle &Style, 440 const FormatToken &InitialToken) { 441 switch (Style.BreakBeforeBraces) { 442 case FormatStyle::BS_Linux: 443 return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class); 444 case FormatStyle::BS_Allman: 445 case FormatStyle::BS_GNU: 446 return true; 447 default: 448 return false; 449 } 450 } 451 452 void UnwrappedLineParser::parseChildBlock() { 453 FormatTok->BlockKind = BK_Block; 454 nextToken(); 455 { 456 bool GoogScope = 457 Style.Language == FormatStyle::LK_JavaScript && IsGoogScope(*Line); 458 ScopedLineState LineState(*this); 459 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack, 460 /*MustBeDeclaration=*/false); 461 Line->Level += GoogScope ? 0 : 1; 462 parseLevel(/*HasOpeningBrace=*/true); 463 Line->Level -= GoogScope ? 0 : 1; 464 } 465 nextToken(); 466 } 467 468 void UnwrappedLineParser::parsePPDirective() { 469 assert(FormatTok->Tok.is(tok::hash) && "'#' expected"); 470 ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError); 471 nextToken(); 472 473 if (!FormatTok->Tok.getIdentifierInfo()) { 474 parsePPUnknown(); 475 return; 476 } 477 478 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) { 479 case tok::pp_define: 480 parsePPDefine(); 481 return; 482 case tok::pp_if: 483 parsePPIf(/*IfDef=*/false); 484 break; 485 case tok::pp_ifdef: 486 case tok::pp_ifndef: 487 parsePPIf(/*IfDef=*/true); 488 break; 489 case tok::pp_else: 490 parsePPElse(); 491 break; 492 case tok::pp_elif: 493 parsePPElIf(); 494 break; 495 case tok::pp_endif: 496 parsePPEndIf(); 497 break; 498 default: 499 parsePPUnknown(); 500 break; 501 } 502 } 503 504 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) { 505 if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable)) 506 PPStack.push_back(PP_Unreachable); 507 else 508 PPStack.push_back(PP_Conditional); 509 } 510 511 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) { 512 ++PPBranchLevel; 513 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size()); 514 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) { 515 PPLevelBranchIndex.push_back(0); 516 PPLevelBranchCount.push_back(0); 517 } 518 PPChainBranchIndex.push(0); 519 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0; 520 conditionalCompilationCondition(Unreachable || Skip); 521 } 522 523 void UnwrappedLineParser::conditionalCompilationAlternative() { 524 if (!PPStack.empty()) 525 PPStack.pop_back(); 526 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 527 if (!PPChainBranchIndex.empty()) 528 ++PPChainBranchIndex.top(); 529 conditionalCompilationCondition( 530 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() && 531 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()); 532 } 533 534 void UnwrappedLineParser::conditionalCompilationEnd() { 535 assert(PPBranchLevel < (int)PPLevelBranchIndex.size()); 536 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) { 537 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) { 538 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1; 539 } 540 } 541 // Guard against #endif's without #if. 542 if (PPBranchLevel > 0) 543 --PPBranchLevel; 544 if (!PPChainBranchIndex.empty()) 545 PPChainBranchIndex.pop(); 546 if (!PPStack.empty()) 547 PPStack.pop_back(); 548 } 549 550 void UnwrappedLineParser::parsePPIf(bool IfDef) { 551 nextToken(); 552 bool IsLiteralFalse = (FormatTok->Tok.isLiteral() && 553 FormatTok->Tok.getLiteralData() != nullptr && 554 StringRef(FormatTok->Tok.getLiteralData(), 555 FormatTok->Tok.getLength()) == "0") || 556 FormatTok->Tok.is(tok::kw_false); 557 conditionalCompilationStart(!IfDef && IsLiteralFalse); 558 parsePPUnknown(); 559 } 560 561 void UnwrappedLineParser::parsePPElse() { 562 conditionalCompilationAlternative(); 563 parsePPUnknown(); 564 } 565 566 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); } 567 568 void UnwrappedLineParser::parsePPEndIf() { 569 conditionalCompilationEnd(); 570 parsePPUnknown(); 571 } 572 573 void UnwrappedLineParser::parsePPDefine() { 574 nextToken(); 575 576 if (FormatTok->Tok.getKind() != tok::identifier) { 577 parsePPUnknown(); 578 return; 579 } 580 nextToken(); 581 if (FormatTok->Tok.getKind() == tok::l_paren && 582 FormatTok->WhitespaceRange.getBegin() == 583 FormatTok->WhitespaceRange.getEnd()) { 584 parseParens(); 585 } 586 addUnwrappedLine(); 587 Line->Level = 1; 588 589 // Errors during a preprocessor directive can only affect the layout of the 590 // preprocessor directive, and thus we ignore them. An alternative approach 591 // would be to use the same approach we use on the file level (no 592 // re-indentation if there was a structural error) within the macro 593 // definition. 594 parseFile(); 595 } 596 597 void UnwrappedLineParser::parsePPUnknown() { 598 do { 599 nextToken(); 600 } while (!eof()); 601 addUnwrappedLine(); 602 } 603 604 // Here we blacklist certain tokens that are not usually the first token in an 605 // unwrapped line. This is used in attempt to distinguish macro calls without 606 // trailing semicolons from other constructs split to several lines. 607 bool tokenCanStartNewLine(clang::Token Tok) { 608 // Semicolon can be a null-statement, l_square can be a start of a macro or 609 // a C++11 attribute, but this doesn't seem to be common. 610 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) && 611 Tok.isNot(tok::l_square) && 612 // Tokens that can only be used as binary operators and a part of 613 // overloaded operator names. 614 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) && 615 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) && 616 Tok.isNot(tok::less) && Tok.isNot(tok::greater) && 617 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) && 618 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) && 619 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) && 620 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) && 621 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) && 622 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) && 623 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) && 624 Tok.isNot(tok::lesslessequal) && 625 // Colon is used in labels, base class lists, initializer lists, 626 // range-based for loops, ternary operator, but should never be the 627 // first token in an unwrapped line. 628 Tok.isNot(tok::colon) && 629 // 'noexcept' is a trailing annotation. 630 Tok.isNot(tok::kw_noexcept); 631 } 632 633 void UnwrappedLineParser::parseStructuralElement() { 634 assert(!FormatTok->Tok.is(tok::l_brace)); 635 switch (FormatTok->Tok.getKind()) { 636 case tok::at: 637 nextToken(); 638 if (FormatTok->Tok.is(tok::l_brace)) { 639 parseBracedList(); 640 break; 641 } 642 switch (FormatTok->Tok.getObjCKeywordID()) { 643 case tok::objc_public: 644 case tok::objc_protected: 645 case tok::objc_package: 646 case tok::objc_private: 647 return parseAccessSpecifier(); 648 case tok::objc_interface: 649 case tok::objc_implementation: 650 return parseObjCInterfaceOrImplementation(); 651 case tok::objc_protocol: 652 return parseObjCProtocol(); 653 case tok::objc_end: 654 return; // Handled by the caller. 655 case tok::objc_optional: 656 case tok::objc_required: 657 nextToken(); 658 addUnwrappedLine(); 659 return; 660 case tok::objc_try: 661 // This branch isn't strictly necessary (the kw_try case below would 662 // do this too after the tok::at is parsed above). But be explicit. 663 parseTryCatch(); 664 return; 665 default: 666 break; 667 } 668 break; 669 case tok::kw_asm: 670 nextToken(); 671 if (FormatTok->is(tok::l_brace)) { 672 nextToken(); 673 while (FormatTok && FormatTok->isNot(tok::eof)) { 674 if (FormatTok->is(tok::r_brace)) { 675 nextToken(); 676 break; 677 } 678 FormatTok->Finalized = true; 679 nextToken(); 680 } 681 } 682 break; 683 case tok::kw_namespace: 684 parseNamespace(); 685 return; 686 case tok::kw_inline: 687 nextToken(); 688 if (FormatTok->Tok.is(tok::kw_namespace)) { 689 parseNamespace(); 690 return; 691 } 692 break; 693 case tok::kw_public: 694 case tok::kw_protected: 695 case tok::kw_private: 696 if (Style.Language == FormatStyle::LK_Java || 697 Style.Language == FormatStyle::LK_JavaScript) 698 nextToken(); 699 else 700 parseAccessSpecifier(); 701 return; 702 case tok::kw_if: 703 parseIfThenElse(); 704 return; 705 case tok::kw_for: 706 case tok::kw_while: 707 parseForOrWhileLoop(); 708 return; 709 case tok::kw_do: 710 parseDoWhile(); 711 return; 712 case tok::kw_switch: 713 parseSwitch(); 714 return; 715 case tok::kw_default: 716 nextToken(); 717 parseLabel(); 718 return; 719 case tok::kw_case: 720 parseCaseLabel(); 721 return; 722 case tok::kw_try: 723 case tok::kw___try: 724 parseTryCatch(); 725 return; 726 case tok::kw_extern: 727 nextToken(); 728 if (FormatTok->Tok.is(tok::string_literal)) { 729 nextToken(); 730 if (FormatTok->Tok.is(tok::l_brace)) { 731 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false); 732 addUnwrappedLine(); 733 return; 734 } 735 } 736 break; 737 case tok::kw_export: 738 if (Style.Language == FormatStyle::LK_JavaScript) { 739 parseJavaScriptEs6ImportExport(); 740 return; 741 } 742 break; 743 case tok::identifier: 744 if (FormatTok->IsForEachMacro) { 745 parseForOrWhileLoop(); 746 return; 747 } 748 if (Style.Language == FormatStyle::LK_JavaScript && 749 FormatTok->is(Keywords.kw_import)) { 750 parseJavaScriptEs6ImportExport(); 751 return; 752 } 753 // In all other cases, parse the declaration. 754 break; 755 default: 756 break; 757 } 758 do { 759 switch (FormatTok->Tok.getKind()) { 760 case tok::at: 761 nextToken(); 762 if (FormatTok->Tok.is(tok::l_brace)) 763 parseBracedList(); 764 break; 765 case tok::kw_enum: 766 parseEnum(); 767 break; 768 case tok::kw_typedef: 769 nextToken(); 770 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS, 771 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS)) 772 parseEnum(); 773 break; 774 case tok::kw_struct: 775 case tok::kw_union: 776 case tok::kw_class: 777 parseRecord(); 778 // A record declaration or definition is always the start of a structural 779 // element. 780 break; 781 case tok::period: 782 nextToken(); 783 // In Java, classes have an implicit static member "class". 784 if (Style.Language == FormatStyle::LK_Java && FormatTok && 785 FormatTok->is(tok::kw_class)) 786 nextToken(); 787 break; 788 case tok::semi: 789 nextToken(); 790 addUnwrappedLine(); 791 return; 792 case tok::r_brace: 793 addUnwrappedLine(); 794 return; 795 case tok::l_paren: 796 parseParens(); 797 break; 798 case tok::caret: 799 nextToken(); 800 if (FormatTok->Tok.isAnyIdentifier() || 801 FormatTok->isSimpleTypeSpecifier()) 802 nextToken(); 803 if (FormatTok->is(tok::l_paren)) 804 parseParens(); 805 if (FormatTok->is(tok::l_brace)) 806 parseChildBlock(); 807 break; 808 case tok::l_brace: 809 if (!tryToParseBracedList()) { 810 // A block outside of parentheses must be the last part of a 811 // structural element. 812 // FIXME: Figure out cases where this is not true, and add projections 813 // for them (the one we know is missing are lambdas). 814 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) 815 addUnwrappedLine(); 816 FormatTok->Type = TT_FunctionLBrace; 817 parseBlock(/*MustBeDeclaration=*/false); 818 addUnwrappedLine(); 819 return; 820 } 821 // Otherwise this was a braced init list, and the structural 822 // element continues. 823 break; 824 case tok::kw_try: 825 // We arrive here when parsing function-try blocks. 826 parseTryCatch(); 827 return; 828 case tok::identifier: { 829 StringRef Text = FormatTok->TokenText; 830 // Parse function literal unless 'function' is the first token in a line 831 // in which case this should be treated as a free-standing function. 832 if (Style.Language == FormatStyle::LK_JavaScript && Text == "function" && 833 Line->Tokens.size() > 0) { 834 tryToParseJSFunction(); 835 break; 836 } 837 nextToken(); 838 if (Line->Tokens.size() == 1 && 839 // JS doesn't have macros, and within classes colons indicate fields, 840 // not labels. 841 (Style.Language != FormatStyle::LK_JavaScript || 842 !Line->MustBeDeclaration)) { 843 if (FormatTok->Tok.is(tok::colon)) { 844 parseLabel(); 845 return; 846 } 847 // Recognize function-like macro usages without trailing semicolon as 848 // well as free-standing macros like Q_OBJECT. 849 bool FunctionLike = FormatTok->is(tok::l_paren); 850 if (FunctionLike) 851 parseParens(); 852 if (FormatTok->NewlinesBefore > 0 && 853 (Text.size() >= 5 || FunctionLike) && 854 tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) { 855 addUnwrappedLine(); 856 return; 857 } 858 } 859 break; 860 } 861 case tok::equal: 862 nextToken(); 863 if (FormatTok->Tok.is(tok::l_brace)) { 864 parseBracedList(); 865 } 866 break; 867 case tok::l_square: 868 parseSquare(); 869 break; 870 default: 871 nextToken(); 872 break; 873 } 874 } while (!eof()); 875 } 876 877 bool UnwrappedLineParser::tryToParseLambda() { 878 // FIXME: This is a dirty way to access the previous token. Find a better 879 // solution. 880 if (!Line->Tokens.empty() && 881 (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator, 882 tok::kw_new, tok::kw_delete) || 883 Line->Tokens.back().Tok->closesScope() || 884 Line->Tokens.back().Tok->isSimpleTypeSpecifier())) { 885 nextToken(); 886 return false; 887 } 888 assert(FormatTok->is(tok::l_square)); 889 FormatToken &LSquare = *FormatTok; 890 if (!tryToParseLambdaIntroducer()) 891 return false; 892 893 while (FormatTok->isNot(tok::l_brace)) { 894 if (FormatTok->isSimpleTypeSpecifier()) { 895 nextToken(); 896 continue; 897 } 898 switch (FormatTok->Tok.getKind()) { 899 case tok::l_brace: 900 break; 901 case tok::l_paren: 902 parseParens(); 903 break; 904 case tok::amp: 905 case tok::star: 906 case tok::kw_const: 907 case tok::comma: 908 case tok::less: 909 case tok::greater: 910 case tok::identifier: 911 case tok::coloncolon: 912 case tok::kw_mutable: 913 nextToken(); 914 break; 915 case tok::arrow: 916 FormatTok->Type = TT_TrailingReturnArrow; 917 nextToken(); 918 break; 919 default: 920 return true; 921 } 922 } 923 LSquare.Type = TT_LambdaLSquare; 924 parseChildBlock(); 925 return true; 926 } 927 928 bool UnwrappedLineParser::tryToParseLambdaIntroducer() { 929 nextToken(); 930 if (FormatTok->is(tok::equal)) { 931 nextToken(); 932 if (FormatTok->is(tok::r_square)) { 933 nextToken(); 934 return true; 935 } 936 if (FormatTok->isNot(tok::comma)) 937 return false; 938 nextToken(); 939 } else if (FormatTok->is(tok::amp)) { 940 nextToken(); 941 if (FormatTok->is(tok::r_square)) { 942 nextToken(); 943 return true; 944 } 945 if (!FormatTok->isOneOf(tok::comma, tok::identifier)) { 946 return false; 947 } 948 if (FormatTok->is(tok::comma)) 949 nextToken(); 950 } else if (FormatTok->is(tok::r_square)) { 951 nextToken(); 952 return true; 953 } 954 do { 955 if (FormatTok->is(tok::amp)) 956 nextToken(); 957 if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) 958 return false; 959 nextToken(); 960 if (FormatTok->is(tok::ellipsis)) 961 nextToken(); 962 if (FormatTok->is(tok::comma)) { 963 nextToken(); 964 } else if (FormatTok->is(tok::r_square)) { 965 nextToken(); 966 return true; 967 } else { 968 return false; 969 } 970 } while (!eof()); 971 return false; 972 } 973 974 void UnwrappedLineParser::tryToParseJSFunction() { 975 nextToken(); 976 977 // Consume function name. 978 if (FormatTok->is(tok::identifier)) 979 nextToken(); 980 981 if (FormatTok->isNot(tok::l_paren)) 982 return; 983 nextToken(); 984 while (FormatTok->isNot(tok::l_brace)) { 985 // Err on the side of caution in order to avoid consuming the full file in 986 // case of incomplete code. 987 if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren, 988 tok::comment)) 989 return; 990 nextToken(); 991 } 992 parseChildBlock(); 993 } 994 995 bool UnwrappedLineParser::tryToParseBracedList() { 996 if (FormatTok->BlockKind == BK_Unknown) 997 calculateBraceTypes(); 998 assert(FormatTok->BlockKind != BK_Unknown); 999 if (FormatTok->BlockKind == BK_Block) 1000 return false; 1001 parseBracedList(); 1002 return true; 1003 } 1004 1005 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) { 1006 bool HasError = false; 1007 nextToken(); 1008 1009 // FIXME: Once we have an expression parser in the UnwrappedLineParser, 1010 // replace this by using parseAssigmentExpression() inside. 1011 do { 1012 if (Style.Language == FormatStyle::LK_JavaScript && 1013 FormatTok->is(Keywords.kw_function)) { 1014 tryToParseJSFunction(); 1015 continue; 1016 } 1017 switch (FormatTok->Tok.getKind()) { 1018 case tok::caret: 1019 nextToken(); 1020 if (FormatTok->is(tok::l_brace)) { 1021 parseChildBlock(); 1022 } 1023 break; 1024 case tok::l_square: 1025 tryToParseLambda(); 1026 break; 1027 case tok::l_brace: 1028 // Assume there are no blocks inside a braced init list apart 1029 // from the ones we explicitly parse out (like lambdas). 1030 FormatTok->BlockKind = BK_BracedInit; 1031 parseBracedList(); 1032 break; 1033 case tok::r_brace: 1034 nextToken(); 1035 return !HasError; 1036 case tok::semi: 1037 HasError = true; 1038 if (!ContinueOnSemicolons) 1039 return !HasError; 1040 nextToken(); 1041 break; 1042 case tok::comma: 1043 nextToken(); 1044 break; 1045 default: 1046 nextToken(); 1047 break; 1048 } 1049 } while (!eof()); 1050 return false; 1051 } 1052 1053 void UnwrappedLineParser::parseParens() { 1054 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected."); 1055 nextToken(); 1056 do { 1057 switch (FormatTok->Tok.getKind()) { 1058 case tok::l_paren: 1059 parseParens(); 1060 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace)) 1061 parseChildBlock(); 1062 break; 1063 case tok::r_paren: 1064 nextToken(); 1065 return; 1066 case tok::r_brace: 1067 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1068 return; 1069 case tok::l_square: 1070 tryToParseLambda(); 1071 break; 1072 case tok::l_brace: 1073 if (!tryToParseBracedList()) { 1074 parseChildBlock(); 1075 } 1076 break; 1077 case tok::at: 1078 nextToken(); 1079 if (FormatTok->Tok.is(tok::l_brace)) 1080 parseBracedList(); 1081 break; 1082 case tok::identifier: 1083 if (Style.Language == FormatStyle::LK_JavaScript && 1084 FormatTok->is(Keywords.kw_function)) 1085 tryToParseJSFunction(); 1086 else 1087 nextToken(); 1088 break; 1089 default: 1090 nextToken(); 1091 break; 1092 } 1093 } while (!eof()); 1094 } 1095 1096 void UnwrappedLineParser::parseSquare() { 1097 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected."); 1098 if (tryToParseLambda()) 1099 return; 1100 do { 1101 switch (FormatTok->Tok.getKind()) { 1102 case tok::l_paren: 1103 parseParens(); 1104 break; 1105 case tok::r_square: 1106 nextToken(); 1107 return; 1108 case tok::r_brace: 1109 // A "}" inside parenthesis is an error if there wasn't a matching "{". 1110 return; 1111 case tok::l_square: 1112 parseSquare(); 1113 break; 1114 case tok::l_brace: { 1115 if (!tryToParseBracedList()) { 1116 parseChildBlock(); 1117 } 1118 break; 1119 } 1120 case tok::at: 1121 nextToken(); 1122 if (FormatTok->Tok.is(tok::l_brace)) 1123 parseBracedList(); 1124 break; 1125 default: 1126 nextToken(); 1127 break; 1128 } 1129 } while (!eof()); 1130 } 1131 1132 void UnwrappedLineParser::parseIfThenElse() { 1133 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected"); 1134 nextToken(); 1135 if (FormatTok->Tok.is(tok::l_paren)) 1136 parseParens(); 1137 bool NeedsUnwrappedLine = false; 1138 if (FormatTok->Tok.is(tok::l_brace)) { 1139 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1140 parseBlock(/*MustBeDeclaration=*/false); 1141 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1142 Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 1143 addUnwrappedLine(); 1144 } else { 1145 NeedsUnwrappedLine = true; 1146 } 1147 } else { 1148 addUnwrappedLine(); 1149 ++Line->Level; 1150 parseStructuralElement(); 1151 --Line->Level; 1152 } 1153 if (FormatTok->Tok.is(tok::kw_else)) { 1154 if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) 1155 addUnwrappedLine(); 1156 nextToken(); 1157 if (FormatTok->Tok.is(tok::l_brace)) { 1158 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1159 parseBlock(/*MustBeDeclaration=*/false); 1160 addUnwrappedLine(); 1161 } else if (FormatTok->Tok.is(tok::kw_if)) { 1162 parseIfThenElse(); 1163 } else { 1164 addUnwrappedLine(); 1165 ++Line->Level; 1166 parseStructuralElement(); 1167 --Line->Level; 1168 } 1169 } else if (NeedsUnwrappedLine) { 1170 addUnwrappedLine(); 1171 } 1172 } 1173 1174 void UnwrappedLineParser::parseTryCatch() { 1175 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected"); 1176 nextToken(); 1177 bool NeedsUnwrappedLine = false; 1178 if (FormatTok->is(tok::colon)) { 1179 // We are in a function try block, what comes is an initializer list. 1180 nextToken(); 1181 while (FormatTok->is(tok::identifier)) { 1182 nextToken(); 1183 if (FormatTok->is(tok::l_paren)) 1184 parseParens(); 1185 else 1186 StructuralError = true; 1187 if (FormatTok->is(tok::comma)) 1188 nextToken(); 1189 } 1190 } 1191 // Parse try with resource. 1192 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) { 1193 parseParens(); 1194 } 1195 if (FormatTok->is(tok::l_brace)) { 1196 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1197 parseBlock(/*MustBeDeclaration=*/false); 1198 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1199 Style.BreakBeforeBraces == FormatStyle::BS_GNU || 1200 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { 1201 addUnwrappedLine(); 1202 } else { 1203 NeedsUnwrappedLine = true; 1204 } 1205 } else if (!FormatTok->is(tok::kw_catch)) { 1206 // The C++ standard requires a compound-statement after a try. 1207 // If there's none, we try to assume there's a structuralElement 1208 // and try to continue. 1209 StructuralError = true; 1210 addUnwrappedLine(); 1211 ++Line->Level; 1212 parseStructuralElement(); 1213 --Line->Level; 1214 } 1215 while (1) { 1216 if (FormatTok->is(tok::at)) 1217 nextToken(); 1218 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except, 1219 tok::kw___finally) || 1220 ((Style.Language == FormatStyle::LK_Java || 1221 Style.Language == FormatStyle::LK_JavaScript) && 1222 FormatTok->is(Keywords.kw_finally)) || 1223 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) || 1224 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally)))) 1225 break; 1226 nextToken(); 1227 while (FormatTok->isNot(tok::l_brace)) { 1228 if (FormatTok->is(tok::l_paren)) { 1229 parseParens(); 1230 continue; 1231 } 1232 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) 1233 return; 1234 nextToken(); 1235 } 1236 NeedsUnwrappedLine = false; 1237 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1238 parseBlock(/*MustBeDeclaration=*/false); 1239 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1240 Style.BreakBeforeBraces == FormatStyle::BS_GNU || 1241 Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) { 1242 addUnwrappedLine(); 1243 } else { 1244 NeedsUnwrappedLine = true; 1245 } 1246 } 1247 if (NeedsUnwrappedLine) { 1248 addUnwrappedLine(); 1249 } 1250 } 1251 1252 void UnwrappedLineParser::parseNamespace() { 1253 assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected"); 1254 1255 const FormatToken &InitialToken = *FormatTok; 1256 nextToken(); 1257 if (FormatTok->Tok.is(tok::identifier)) 1258 nextToken(); 1259 if (FormatTok->Tok.is(tok::l_brace)) { 1260 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1261 addUnwrappedLine(); 1262 1263 bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All || 1264 (Style.NamespaceIndentation == FormatStyle::NI_Inner && 1265 DeclarationScopeStack.size() > 1); 1266 parseBlock(/*MustBeDeclaration=*/true, AddLevel); 1267 // Munch the semicolon after a namespace. This is more common than one would 1268 // think. Puttin the semicolon into its own line is very ugly. 1269 if (FormatTok->Tok.is(tok::semi)) 1270 nextToken(); 1271 addUnwrappedLine(); 1272 } 1273 // FIXME: Add error handling. 1274 } 1275 1276 void UnwrappedLineParser::parseForOrWhileLoop() { 1277 assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while) || 1278 FormatTok->IsForEachMacro) && 1279 "'for', 'while' or foreach macro expected"); 1280 nextToken(); 1281 if (FormatTok->Tok.is(tok::l_paren)) 1282 parseParens(); 1283 if (FormatTok->Tok.is(tok::l_brace)) { 1284 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1285 parseBlock(/*MustBeDeclaration=*/false); 1286 addUnwrappedLine(); 1287 } else { 1288 addUnwrappedLine(); 1289 ++Line->Level; 1290 parseStructuralElement(); 1291 --Line->Level; 1292 } 1293 } 1294 1295 void UnwrappedLineParser::parseDoWhile() { 1296 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected"); 1297 nextToken(); 1298 if (FormatTok->Tok.is(tok::l_brace)) { 1299 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1300 parseBlock(/*MustBeDeclaration=*/false); 1301 if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) 1302 addUnwrappedLine(); 1303 } else { 1304 addUnwrappedLine(); 1305 ++Line->Level; 1306 parseStructuralElement(); 1307 --Line->Level; 1308 } 1309 1310 // FIXME: Add error handling. 1311 if (!FormatTok->Tok.is(tok::kw_while)) { 1312 addUnwrappedLine(); 1313 return; 1314 } 1315 1316 nextToken(); 1317 parseStructuralElement(); 1318 } 1319 1320 void UnwrappedLineParser::parseLabel() { 1321 nextToken(); 1322 unsigned OldLineLevel = Line->Level; 1323 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0)) 1324 --Line->Level; 1325 if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) { 1326 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1327 parseBlock(/*MustBeDeclaration=*/false); 1328 if (FormatTok->Tok.is(tok::kw_break)) { 1329 // "break;" after "}" on its own line only for BS_Allman and BS_GNU 1330 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1331 Style.BreakBeforeBraces == FormatStyle::BS_GNU) { 1332 addUnwrappedLine(); 1333 } 1334 parseStructuralElement(); 1335 } 1336 addUnwrappedLine(); 1337 } else { 1338 addUnwrappedLine(); 1339 } 1340 Line->Level = OldLineLevel; 1341 } 1342 1343 void UnwrappedLineParser::parseCaseLabel() { 1344 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected"); 1345 // FIXME: fix handling of complex expressions here. 1346 do { 1347 nextToken(); 1348 } while (!eof() && !FormatTok->Tok.is(tok::colon)); 1349 parseLabel(); 1350 } 1351 1352 void UnwrappedLineParser::parseSwitch() { 1353 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected"); 1354 nextToken(); 1355 if (FormatTok->Tok.is(tok::l_paren)) 1356 parseParens(); 1357 if (FormatTok->Tok.is(tok::l_brace)) { 1358 CompoundStatementIndenter Indenter(this, Style, Line->Level); 1359 parseBlock(/*MustBeDeclaration=*/false); 1360 addUnwrappedLine(); 1361 } else { 1362 addUnwrappedLine(); 1363 ++Line->Level; 1364 parseStructuralElement(); 1365 --Line->Level; 1366 } 1367 } 1368 1369 void UnwrappedLineParser::parseAccessSpecifier() { 1370 nextToken(); 1371 // Understand Qt's slots. 1372 if (FormatTok->is(tok::identifier) && 1373 (FormatTok->TokenText == "slots" || FormatTok->TokenText == "Q_SLOTS")) 1374 nextToken(); 1375 // Otherwise, we don't know what it is, and we'd better keep the next token. 1376 if (FormatTok->Tok.is(tok::colon)) 1377 nextToken(); 1378 addUnwrappedLine(); 1379 } 1380 1381 void UnwrappedLineParser::parseEnum() { 1382 // Won't be 'enum' for NS_ENUMs. 1383 if (FormatTok->Tok.is(tok::kw_enum)) 1384 nextToken(); 1385 1386 // Eat up enum class ... 1387 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct)) 1388 nextToken(); 1389 while (FormatTok->Tok.getIdentifierInfo() || 1390 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less, 1391 tok::greater, tok::comma, tok::question)) { 1392 nextToken(); 1393 // We can have macros or attributes in between 'enum' and the enum name. 1394 if (FormatTok->is(tok::l_paren)) 1395 parseParens(); 1396 if (FormatTok->is(tok::identifier)) 1397 nextToken(); 1398 } 1399 1400 // Just a declaration or something is wrong. 1401 if (FormatTok->isNot(tok::l_brace)) 1402 return; 1403 FormatTok->BlockKind = BK_Block; 1404 1405 if (Style.Language == FormatStyle::LK_Java) { 1406 // Java enums are different. 1407 parseJavaEnumBody(); 1408 return; 1409 } 1410 1411 // Parse enum body. 1412 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true); 1413 if (HasError) { 1414 if (FormatTok->is(tok::semi)) 1415 nextToken(); 1416 addUnwrappedLine(); 1417 } 1418 1419 // We fall through to parsing a structural element afterwards, so that in 1420 // enum A {} n, m; 1421 // "} n, m;" will end up in one unwrapped line. 1422 } 1423 1424 void UnwrappedLineParser::parseJavaEnumBody() { 1425 // Determine whether the enum is simple, i.e. does not have a semicolon or 1426 // constants with class bodies. Simple enums can be formatted like braced 1427 // lists, contracted to a single line, etc. 1428 unsigned StoredPosition = Tokens->getPosition(); 1429 bool IsSimple = true; 1430 FormatToken *Tok = Tokens->getNextToken(); 1431 while (Tok) { 1432 if (Tok->is(tok::r_brace)) 1433 break; 1434 if (Tok->isOneOf(tok::l_brace, tok::semi)) { 1435 IsSimple = false; 1436 break; 1437 } 1438 // FIXME: This will also mark enums with braces in the arguments to enum 1439 // constants as "not simple". This is probably fine in practice, though. 1440 Tok = Tokens->getNextToken(); 1441 } 1442 FormatTok = Tokens->setPosition(StoredPosition); 1443 1444 if (IsSimple) { 1445 parseBracedList(); 1446 addUnwrappedLine(); 1447 return; 1448 } 1449 1450 // Parse the body of a more complex enum. 1451 // First add a line for everything up to the "{". 1452 nextToken(); 1453 addUnwrappedLine(); 1454 ++Line->Level; 1455 1456 // Parse the enum constants. 1457 while (FormatTok) { 1458 if (FormatTok->is(tok::l_brace)) { 1459 // Parse the constant's class body. 1460 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1461 /*MunchSemi=*/false); 1462 } else if (FormatTok->is(tok::l_paren)) { 1463 parseParens(); 1464 } else if (FormatTok->is(tok::comma)) { 1465 nextToken(); 1466 addUnwrappedLine(); 1467 } else if (FormatTok->is(tok::semi)) { 1468 nextToken(); 1469 addUnwrappedLine(); 1470 break; 1471 } else if (FormatTok->is(tok::r_brace)) { 1472 addUnwrappedLine(); 1473 break; 1474 } else { 1475 nextToken(); 1476 } 1477 } 1478 1479 // Parse the class body after the enum's ";" if any. 1480 parseLevel(/*HasOpeningBrace=*/true); 1481 nextToken(); 1482 --Line->Level; 1483 addUnwrappedLine(); 1484 } 1485 1486 void UnwrappedLineParser::parseRecord() { 1487 const FormatToken &InitialToken = *FormatTok; 1488 nextToken(); 1489 if (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw___attribute, 1490 tok::kw___declspec, tok::kw_alignas)) { 1491 nextToken(); 1492 // We can have macros or attributes in between 'class' and the class name. 1493 if (FormatTok->Tok.is(tok::l_paren)) { 1494 parseParens(); 1495 } 1496 // The actual identifier can be a nested name specifier, and in macros 1497 // it is often token-pasted. 1498 while (FormatTok->is(tok::identifier) || FormatTok->is(tok::coloncolon) || 1499 FormatTok->is(tok::hashhash) || 1500 (Style.Language == FormatStyle::LK_Java && 1501 FormatTok->isOneOf(tok::period, tok::comma))) 1502 nextToken(); 1503 1504 // Note that parsing away template declarations here leads to incorrectly 1505 // accepting function declarations as record declarations. 1506 // In general, we cannot solve this problem. Consider: 1507 // class A<int> B() {} 1508 // which can be a function definition or a class definition when B() is a 1509 // macro. If we find enough real-world cases where this is a problem, we 1510 // can parse for the 'template' keyword in the beginning of the statement, 1511 // and thus rule out the record production in case there is no template 1512 // (this would still leave us with an ambiguity between template function 1513 // and class declarations). 1514 if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) { 1515 while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) { 1516 if (FormatTok->Tok.is(tok::semi)) 1517 return; 1518 nextToken(); 1519 } 1520 } 1521 } 1522 if (FormatTok->Tok.is(tok::l_brace)) { 1523 if (ShouldBreakBeforeBrace(Style, InitialToken)) 1524 addUnwrappedLine(); 1525 1526 parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true, 1527 /*MunchSemi=*/false); 1528 } 1529 // We fall through to parsing a structural element afterwards, so 1530 // class A {} n, m; 1531 // will end up in one unwrapped line. 1532 // This does not apply for Java. 1533 if (Style.Language == FormatStyle::LK_Java || 1534 Style.Language == FormatStyle::LK_JavaScript) 1535 addUnwrappedLine(); 1536 } 1537 1538 void UnwrappedLineParser::parseObjCProtocolList() { 1539 assert(FormatTok->Tok.is(tok::less) && "'<' expected."); 1540 do 1541 nextToken(); 1542 while (!eof() && FormatTok->Tok.isNot(tok::greater)); 1543 nextToken(); // Skip '>'. 1544 } 1545 1546 void UnwrappedLineParser::parseObjCUntilAtEnd() { 1547 do { 1548 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) { 1549 nextToken(); 1550 addUnwrappedLine(); 1551 break; 1552 } 1553 if (FormatTok->is(tok::l_brace)) { 1554 parseBlock(/*MustBeDeclaration=*/false); 1555 // In ObjC interfaces, nothing should be following the "}". 1556 addUnwrappedLine(); 1557 } else if (FormatTok->is(tok::r_brace)) { 1558 // Ignore stray "}". parseStructuralElement doesn't consume them. 1559 nextToken(); 1560 addUnwrappedLine(); 1561 } else { 1562 parseStructuralElement(); 1563 } 1564 } while (!eof()); 1565 } 1566 1567 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() { 1568 nextToken(); 1569 nextToken(); // interface name 1570 1571 // @interface can be followed by either a base class, or a category. 1572 if (FormatTok->Tok.is(tok::colon)) { 1573 nextToken(); 1574 nextToken(); // base class name 1575 } else if (FormatTok->Tok.is(tok::l_paren)) 1576 // Skip category, if present. 1577 parseParens(); 1578 1579 if (FormatTok->Tok.is(tok::less)) 1580 parseObjCProtocolList(); 1581 1582 if (FormatTok->Tok.is(tok::l_brace)) { 1583 if (Style.BreakBeforeBraces == FormatStyle::BS_Allman || 1584 Style.BreakBeforeBraces == FormatStyle::BS_GNU) 1585 addUnwrappedLine(); 1586 parseBlock(/*MustBeDeclaration=*/true); 1587 } 1588 1589 // With instance variables, this puts '}' on its own line. Without instance 1590 // variables, this ends the @interface line. 1591 addUnwrappedLine(); 1592 1593 parseObjCUntilAtEnd(); 1594 } 1595 1596 void UnwrappedLineParser::parseObjCProtocol() { 1597 nextToken(); 1598 nextToken(); // protocol name 1599 1600 if (FormatTok->Tok.is(tok::less)) 1601 parseObjCProtocolList(); 1602 1603 // Check for protocol declaration. 1604 if (FormatTok->Tok.is(tok::semi)) { 1605 nextToken(); 1606 return addUnwrappedLine(); 1607 } 1608 1609 addUnwrappedLine(); 1610 parseObjCUntilAtEnd(); 1611 } 1612 1613 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() { 1614 assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export)); 1615 nextToken(); 1616 1617 if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, Keywords.kw_function, 1618 Keywords.kw_var)) 1619 return; // Fall through to parsing the corresponding structure. 1620 1621 if (FormatTok->is(tok::kw_default)) { 1622 nextToken(); // export default ..., fall through after eating 'default'. 1623 return; 1624 } 1625 1626 if (FormatTok->is(tok::l_brace)) { 1627 FormatTok->BlockKind = BK_Block; 1628 parseBracedList(); 1629 } 1630 1631 while (!eof() && FormatTok->isNot(tok::semi) && 1632 FormatTok->isNot(tok::l_brace)) { 1633 nextToken(); 1634 } 1635 } 1636 1637 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line, 1638 StringRef Prefix = "") { 1639 llvm::dbgs() << Prefix << "Line(" << Line.Level << ")" 1640 << (Line.InPPDirective ? " MACRO" : "") << ": "; 1641 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 1642 E = Line.Tokens.end(); 1643 I != E; ++I) { 1644 llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] "; 1645 } 1646 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(), 1647 E = Line.Tokens.end(); 1648 I != E; ++I) { 1649 const UnwrappedLineNode &Node = *I; 1650 for (SmallVectorImpl<UnwrappedLine>::const_iterator 1651 I = Node.Children.begin(), 1652 E = Node.Children.end(); 1653 I != E; ++I) { 1654 printDebugInfo(*I, "\nChild: "); 1655 } 1656 } 1657 llvm::dbgs() << "\n"; 1658 } 1659 1660 void UnwrappedLineParser::addUnwrappedLine() { 1661 if (Line->Tokens.empty()) 1662 return; 1663 DEBUG({ 1664 if (CurrentLines == &Lines) 1665 printDebugInfo(*Line); 1666 }); 1667 CurrentLines->push_back(*Line); 1668 Line->Tokens.clear(); 1669 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { 1670 for (SmallVectorImpl<UnwrappedLine>::iterator 1671 I = PreprocessorDirectives.begin(), 1672 E = PreprocessorDirectives.end(); 1673 I != E; ++I) { 1674 CurrentLines->push_back(*I); 1675 } 1676 PreprocessorDirectives.clear(); 1677 } 1678 } 1679 1680 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); } 1681 1682 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) { 1683 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) && 1684 FormatTok.NewlinesBefore > 0; 1685 } 1686 1687 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) { 1688 bool JustComments = Line->Tokens.empty(); 1689 for (SmallVectorImpl<FormatToken *>::const_iterator 1690 I = CommentsBeforeNextToken.begin(), 1691 E = CommentsBeforeNextToken.end(); 1692 I != E; ++I) { 1693 if (isOnNewLine(**I) && JustComments) { 1694 addUnwrappedLine(); 1695 } 1696 pushToken(*I); 1697 } 1698 if (NewlineBeforeNext && JustComments) { 1699 addUnwrappedLine(); 1700 } 1701 CommentsBeforeNextToken.clear(); 1702 } 1703 1704 void UnwrappedLineParser::nextToken() { 1705 if (eof()) 1706 return; 1707 flushComments(isOnNewLine(*FormatTok)); 1708 pushToken(FormatTok); 1709 readToken(); 1710 } 1711 1712 void UnwrappedLineParser::readToken() { 1713 bool CommentsInCurrentLine = true; 1714 do { 1715 FormatTok = Tokens->getNextToken(); 1716 assert(FormatTok); 1717 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) && 1718 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) { 1719 // If there is an unfinished unwrapped line, we flush the preprocessor 1720 // directives only after that unwrapped line was finished later. 1721 bool SwitchToPreprocessorLines = !Line->Tokens.empty(); 1722 ScopedLineState BlockState(*this, SwitchToPreprocessorLines); 1723 // Comments stored before the preprocessor directive need to be output 1724 // before the preprocessor directive, at the same level as the 1725 // preprocessor directive, as we consider them to apply to the directive. 1726 flushComments(isOnNewLine(*FormatTok)); 1727 parsePPDirective(); 1728 } 1729 while (FormatTok->Type == TT_ConflictStart || 1730 FormatTok->Type == TT_ConflictEnd || 1731 FormatTok->Type == TT_ConflictAlternative) { 1732 if (FormatTok->Type == TT_ConflictStart) { 1733 conditionalCompilationStart(/*Unreachable=*/false); 1734 } else if (FormatTok->Type == TT_ConflictAlternative) { 1735 conditionalCompilationAlternative(); 1736 } else if (FormatTok->Type == TT_ConflictEnd) { 1737 conditionalCompilationEnd(); 1738 } 1739 FormatTok = Tokens->getNextToken(); 1740 FormatTok->MustBreakBefore = true; 1741 } 1742 1743 if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) && 1744 !Line->InPPDirective) { 1745 continue; 1746 } 1747 1748 if (!FormatTok->Tok.is(tok::comment)) 1749 return; 1750 if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) { 1751 CommentsInCurrentLine = false; 1752 } 1753 if (CommentsInCurrentLine) { 1754 pushToken(FormatTok); 1755 } else { 1756 CommentsBeforeNextToken.push_back(FormatTok); 1757 } 1758 } while (!eof()); 1759 } 1760 1761 void UnwrappedLineParser::pushToken(FormatToken *Tok) { 1762 Line->Tokens.push_back(UnwrappedLineNode(Tok)); 1763 if (MustBreakBeforeNextToken) { 1764 Line->Tokens.back().Tok->MustBreakBefore = true; 1765 MustBreakBeforeNextToken = false; 1766 } 1767 } 1768 1769 } // end namespace format 1770 } // end namespace clang 1771