1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
158       Parser->addUnwrappedLine();
159     } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
160       Parser->addUnwrappedLine();
161       ++LineLevel;
162     }
163   }
164   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
165 
166 private:
167   unsigned &LineLevel;
168   unsigned OldLineLevel;
169 };
170 
171 namespace {
172 
173 class IndexedTokenSource : public FormatTokenSource {
174 public:
175   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
176       : Tokens(Tokens), Position(-1) {}
177 
178   FormatToken *getNextToken() override {
179     ++Position;
180     return Tokens[Position];
181   }
182 
183   unsigned getPosition() override {
184     assert(Position >= 0);
185     return Position;
186   }
187 
188   FormatToken *setPosition(unsigned P) override {
189     Position = P;
190     return Tokens[Position];
191   }
192 
193   void reset() { Position = -1; }
194 
195 private:
196   ArrayRef<FormatToken *> Tokens;
197   int Position;
198 };
199 
200 } // end anonymous namespace
201 
202 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
203                                          const AdditionalKeywords &Keywords,
204                                          ArrayRef<FormatToken *> Tokens,
205                                          UnwrappedLineConsumer &Callback)
206     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
207       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
208       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
209 
210 void UnwrappedLineParser::reset() {
211   PPBranchLevel = -1;
212   Line.reset(new UnwrappedLine);
213   CommentsBeforeNextToken.clear();
214   FormatTok = nullptr;
215   MustBreakBeforeNextToken = false;
216   PreprocessorDirectives.clear();
217   CurrentLines = &Lines;
218   DeclarationScopeStack.clear();
219   PPStack.clear();
220 }
221 
222 void UnwrappedLineParser::parse() {
223   IndexedTokenSource TokenSource(AllTokens);
224   do {
225     DEBUG(llvm::dbgs() << "----\n");
226     reset();
227     Tokens = &TokenSource;
228     TokenSource.reset();
229 
230     readToken();
231     parseFile();
232     // Create line with eof token.
233     pushToken(FormatTok);
234     addUnwrappedLine();
235 
236     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
237                                                   E = Lines.end();
238          I != E; ++I) {
239       Callback.consumeUnwrappedLine(*I);
240     }
241     Callback.finishRun();
242     Lines.clear();
243     while (!PPLevelBranchIndex.empty() &&
244            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
245       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
246       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
247     }
248     if (!PPLevelBranchIndex.empty()) {
249       ++PPLevelBranchIndex.back();
250       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
251       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
252     }
253   } while (!PPLevelBranchIndex.empty());
254 
255 }
256 
257 void UnwrappedLineParser::parseFile() {
258   // The top-level context in a file always has declarations, except for pre-
259   // processor directives and JavaScript files.
260   bool MustBeDeclaration =
261       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
262   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
263                                           MustBeDeclaration);
264   parseLevel(/*HasOpeningBrace=*/false);
265   // Make sure to format the remaining tokens.
266   flushComments(true);
267   addUnwrappedLine();
268 }
269 
270 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
271   bool SwitchLabelEncountered = false;
272   do {
273     switch (FormatTok->Tok.getKind()) {
274     case tok::comment:
275       nextToken();
276       addUnwrappedLine();
277       break;
278     case tok::l_brace:
279       // FIXME: Add parameter whether this can happen - if this happens, we must
280       // be in a non-declaration context.
281       parseBlock(/*MustBeDeclaration=*/false);
282       addUnwrappedLine();
283       break;
284     case tok::r_brace:
285       if (HasOpeningBrace)
286         return;
287       nextToken();
288       addUnwrappedLine();
289       break;
290     case tok::kw_default:
291     case tok::kw_case:
292       if (!SwitchLabelEncountered &&
293           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
294         ++Line->Level;
295       SwitchLabelEncountered = true;
296       parseStructuralElement();
297       break;
298     default:
299       parseStructuralElement();
300       break;
301     }
302   } while (!eof());
303 }
304 
305 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
306   // We'll parse forward through the tokens until we hit
307   // a closing brace or eof - note that getNextToken() will
308   // parse macros, so this will magically work inside macro
309   // definitions, too.
310   unsigned StoredPosition = Tokens->getPosition();
311   FormatToken *Tok = FormatTok;
312   // Keep a stack of positions of lbrace tokens. We will
313   // update information about whether an lbrace starts a
314   // braced init list or a different block during the loop.
315   SmallVector<FormatToken *, 8> LBraceStack;
316   assert(Tok->Tok.is(tok::l_brace));
317   do {
318     // Get next none-comment token.
319     FormatToken *NextTok;
320     unsigned ReadTokens = 0;
321     do {
322       NextTok = Tokens->getNextToken();
323       ++ReadTokens;
324     } while (NextTok->is(tok::comment));
325 
326     switch (Tok->Tok.getKind()) {
327     case tok::l_brace:
328       Tok->BlockKind = BK_Unknown;
329       LBraceStack.push_back(Tok);
330       break;
331     case tok::r_brace:
332       if (!LBraceStack.empty()) {
333         if (LBraceStack.back()->BlockKind == BK_Unknown) {
334           bool ProbablyBracedList = false;
335           if (Style.Language == FormatStyle::LK_Proto) {
336             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
337           } else {
338             // Using OriginalColumn to distinguish between ObjC methods and
339             // binary operators is a bit hacky.
340             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
341                                     NextTok->OriginalColumn == 0;
342 
343             // If there is a comma, semicolon or right paren after the closing
344             // brace, we assume this is a braced initializer list.  Note that
345             // regardless how we mark inner braces here, we will overwrite the
346             // BlockKind later if we parse a braced list (where all blocks
347             // inside are by default braced lists), or when we explicitly detect
348             // blocks (for example while parsing lambdas).
349             //
350             // We exclude + and - as they can be ObjC visibility modifiers.
351             ProbablyBracedList =
352                 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
353                                  tok::r_paren, tok::r_square, tok::l_brace,
354                                  tok::l_paren, tok::ellipsis) ||
355                 (NextTok->is(tok::semi) &&
356                  (!ExpectClassBody || LBraceStack.size() != 1)) ||
357                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
358           }
359           if (ProbablyBracedList) {
360             Tok->BlockKind = BK_BracedInit;
361             LBraceStack.back()->BlockKind = BK_BracedInit;
362           } else {
363             Tok->BlockKind = BK_Block;
364             LBraceStack.back()->BlockKind = BK_Block;
365           }
366         }
367         LBraceStack.pop_back();
368       }
369       break;
370     case tok::at:
371     case tok::semi:
372     case tok::kw_if:
373     case tok::kw_while:
374     case tok::kw_for:
375     case tok::kw_switch:
376     case tok::kw_try:
377     case tok::kw___try:
378       if (!LBraceStack.empty())
379         LBraceStack.back()->BlockKind = BK_Block;
380       break;
381     default:
382       break;
383     }
384     Tok = NextTok;
385   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
386   // Assume other blocks for all unclosed opening braces.
387   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
388     if (LBraceStack[i]->BlockKind == BK_Unknown)
389       LBraceStack[i]->BlockKind = BK_Block;
390   }
391 
392   FormatTok = Tokens->setPosition(StoredPosition);
393 }
394 
395 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
396                                      bool MunchSemi) {
397   assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
398   unsigned InitialLevel = Line->Level;
399   nextToken();
400 
401   addUnwrappedLine();
402 
403   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
404                                           MustBeDeclaration);
405   if (AddLevel)
406     ++Line->Level;
407   parseLevel(/*HasOpeningBrace=*/true);
408 
409   if (!FormatTok->Tok.is(tok::r_brace)) {
410     Line->Level = InitialLevel;
411     return;
412   }
413 
414   nextToken(); // Munch the closing brace.
415   if (MunchSemi && FormatTok->Tok.is(tok::semi))
416     nextToken();
417   Line->Level = InitialLevel;
418 }
419 
420 static bool isGoogScope(const UnwrappedLine &Line) {
421   // FIXME: Closure-library specific stuff should not be hard-coded but be
422   // configurable.
423   if (Line.Tokens.size() < 4)
424     return false;
425   auto I = Line.Tokens.begin();
426   if (I->Tok->TokenText != "goog")
427     return false;
428   ++I;
429   if (I->Tok->isNot(tok::period))
430     return false;
431   ++I;
432   if (I->Tok->TokenText != "scope")
433     return false;
434   ++I;
435   return I->Tok->is(tok::l_paren);
436 }
437 
438 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
439                                    const FormatToken &InitialToken) {
440   switch (Style.BreakBeforeBraces) {
441   case FormatStyle::BS_Linux:
442     return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
443   case FormatStyle::BS_Allman:
444   case FormatStyle::BS_GNU:
445     return true;
446   default:
447     return false;
448   }
449 }
450 
451 void UnwrappedLineParser::parseChildBlock() {
452   FormatTok->BlockKind = BK_Block;
453   nextToken();
454   {
455     bool GoogScope =
456         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
457     ScopedLineState LineState(*this);
458     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
459                                             /*MustBeDeclaration=*/false);
460     Line->Level += GoogScope ? 0 : 1;
461     parseLevel(/*HasOpeningBrace=*/true);
462     flushComments(isOnNewLine(*FormatTok));
463     Line->Level -= GoogScope ? 0 : 1;
464   }
465   nextToken();
466 }
467 
468 void UnwrappedLineParser::parsePPDirective() {
469   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
470   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
471   nextToken();
472 
473   if (!FormatTok->Tok.getIdentifierInfo()) {
474     parsePPUnknown();
475     return;
476   }
477 
478   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
479   case tok::pp_define:
480     parsePPDefine();
481     return;
482   case tok::pp_if:
483     parsePPIf(/*IfDef=*/false);
484     break;
485   case tok::pp_ifdef:
486   case tok::pp_ifndef:
487     parsePPIf(/*IfDef=*/true);
488     break;
489   case tok::pp_else:
490     parsePPElse();
491     break;
492   case tok::pp_elif:
493     parsePPElIf();
494     break;
495   case tok::pp_endif:
496     parsePPEndIf();
497     break;
498   default:
499     parsePPUnknown();
500     break;
501   }
502 }
503 
504 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
505   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
506     PPStack.push_back(PP_Unreachable);
507   else
508     PPStack.push_back(PP_Conditional);
509 }
510 
511 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
512   ++PPBranchLevel;
513   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
514   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
515     PPLevelBranchIndex.push_back(0);
516     PPLevelBranchCount.push_back(0);
517   }
518   PPChainBranchIndex.push(0);
519   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
520   conditionalCompilationCondition(Unreachable || Skip);
521 }
522 
523 void UnwrappedLineParser::conditionalCompilationAlternative() {
524   if (!PPStack.empty())
525     PPStack.pop_back();
526   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
527   if (!PPChainBranchIndex.empty())
528     ++PPChainBranchIndex.top();
529   conditionalCompilationCondition(
530       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
531       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
532 }
533 
534 void UnwrappedLineParser::conditionalCompilationEnd() {
535   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
536   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
537     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
538       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
539     }
540   }
541   // Guard against #endif's without #if.
542   if (PPBranchLevel > 0)
543     --PPBranchLevel;
544   if (!PPChainBranchIndex.empty())
545     PPChainBranchIndex.pop();
546   if (!PPStack.empty())
547     PPStack.pop_back();
548 }
549 
550 void UnwrappedLineParser::parsePPIf(bool IfDef) {
551   nextToken();
552   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
553                          FormatTok->Tok.getLiteralData() != nullptr &&
554                          StringRef(FormatTok->Tok.getLiteralData(),
555                                    FormatTok->Tok.getLength()) == "0") ||
556                         FormatTok->Tok.is(tok::kw_false);
557   conditionalCompilationStart(!IfDef && IsLiteralFalse);
558   parsePPUnknown();
559 }
560 
561 void UnwrappedLineParser::parsePPElse() {
562   conditionalCompilationAlternative();
563   parsePPUnknown();
564 }
565 
566 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
567 
568 void UnwrappedLineParser::parsePPEndIf() {
569   conditionalCompilationEnd();
570   parsePPUnknown();
571 }
572 
573 void UnwrappedLineParser::parsePPDefine() {
574   nextToken();
575 
576   if (FormatTok->Tok.getKind() != tok::identifier) {
577     parsePPUnknown();
578     return;
579   }
580   nextToken();
581   if (FormatTok->Tok.getKind() == tok::l_paren &&
582       FormatTok->WhitespaceRange.getBegin() ==
583           FormatTok->WhitespaceRange.getEnd()) {
584     parseParens();
585   }
586   addUnwrappedLine();
587   Line->Level = 1;
588 
589   // Errors during a preprocessor directive can only affect the layout of the
590   // preprocessor directive, and thus we ignore them. An alternative approach
591   // would be to use the same approach we use on the file level (no
592   // re-indentation if there was a structural error) within the macro
593   // definition.
594   parseFile();
595 }
596 
597 void UnwrappedLineParser::parsePPUnknown() {
598   do {
599     nextToken();
600   } while (!eof());
601   addUnwrappedLine();
602 }
603 
604 // Here we blacklist certain tokens that are not usually the first token in an
605 // unwrapped line. This is used in attempt to distinguish macro calls without
606 // trailing semicolons from other constructs split to several lines.
607 static bool tokenCanStartNewLine(const clang::Token &Tok) {
608   // Semicolon can be a null-statement, l_square can be a start of a macro or
609   // a C++11 attribute, but this doesn't seem to be common.
610   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
611          Tok.isNot(tok::l_square) &&
612          // Tokens that can only be used as binary operators and a part of
613          // overloaded operator names.
614          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
615          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
616          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
617          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
618          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
619          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
620          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
621          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
622          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
623          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
624          Tok.isNot(tok::lesslessequal) &&
625          // Colon is used in labels, base class lists, initializer lists,
626          // range-based for loops, ternary operator, but should never be the
627          // first token in an unwrapped line.
628          Tok.isNot(tok::colon) &&
629          // 'noexcept' is a trailing annotation.
630          Tok.isNot(tok::kw_noexcept);
631 }
632 
633 void UnwrappedLineParser::parseStructuralElement() {
634   assert(!FormatTok->Tok.is(tok::l_brace));
635   switch (FormatTok->Tok.getKind()) {
636   case tok::at:
637     nextToken();
638     if (FormatTok->Tok.is(tok::l_brace)) {
639       parseBracedList();
640       break;
641     }
642     switch (FormatTok->Tok.getObjCKeywordID()) {
643     case tok::objc_public:
644     case tok::objc_protected:
645     case tok::objc_package:
646     case tok::objc_private:
647       return parseAccessSpecifier();
648     case tok::objc_interface:
649     case tok::objc_implementation:
650       return parseObjCInterfaceOrImplementation();
651     case tok::objc_protocol:
652       return parseObjCProtocol();
653     case tok::objc_end:
654       return; // Handled by the caller.
655     case tok::objc_optional:
656     case tok::objc_required:
657       nextToken();
658       addUnwrappedLine();
659       return;
660     case tok::objc_try:
661       // This branch isn't strictly necessary (the kw_try case below would
662       // do this too after the tok::at is parsed above).  But be explicit.
663       parseTryCatch();
664       return;
665     default:
666       break;
667     }
668     break;
669   case tok::kw_asm:
670     nextToken();
671     if (FormatTok->is(tok::l_brace)) {
672       FormatTok->Type = TT_InlineASMBrace;
673       nextToken();
674       while (FormatTok && FormatTok->isNot(tok::eof)) {
675         if (FormatTok->is(tok::r_brace)) {
676           FormatTok->Type = TT_InlineASMBrace;
677           nextToken();
678           addUnwrappedLine();
679           break;
680         }
681         FormatTok->Finalized = true;
682         nextToken();
683       }
684     }
685     break;
686   case tok::kw_namespace:
687     parseNamespace();
688     return;
689   case tok::kw_inline:
690     nextToken();
691     if (FormatTok->Tok.is(tok::kw_namespace)) {
692       parseNamespace();
693       return;
694     }
695     break;
696   case tok::kw_public:
697   case tok::kw_protected:
698   case tok::kw_private:
699     if (Style.Language == FormatStyle::LK_Java ||
700         Style.Language == FormatStyle::LK_JavaScript)
701       nextToken();
702     else
703       parseAccessSpecifier();
704     return;
705   case tok::kw_if:
706     parseIfThenElse();
707     return;
708   case tok::kw_for:
709   case tok::kw_while:
710     parseForOrWhileLoop();
711     return;
712   case tok::kw_do:
713     parseDoWhile();
714     return;
715   case tok::kw_switch:
716     parseSwitch();
717     return;
718   case tok::kw_default:
719     nextToken();
720     parseLabel();
721     return;
722   case tok::kw_case:
723     parseCaseLabel();
724     return;
725   case tok::kw_try:
726   case tok::kw___try:
727     parseTryCatch();
728     return;
729   case tok::kw_extern:
730     nextToken();
731     if (FormatTok->Tok.is(tok::string_literal)) {
732       nextToken();
733       if (FormatTok->Tok.is(tok::l_brace)) {
734         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
735         addUnwrappedLine();
736         return;
737       }
738     }
739     break;
740   case tok::kw_export:
741     if (Style.Language == FormatStyle::LK_JavaScript) {
742       parseJavaScriptEs6ImportExport();
743       return;
744     }
745     break;
746   case tok::identifier:
747     if (FormatTok->is(TT_ForEachMacro)) {
748       parseForOrWhileLoop();
749       return;
750     }
751     if (Style.Language == FormatStyle::LK_JavaScript &&
752         FormatTok->is(Keywords.kw_import)) {
753       parseJavaScriptEs6ImportExport();
754       return;
755     }
756     if (FormatTok->is(Keywords.kw_signals)) {
757       nextToken();
758       if (FormatTok->is(tok::colon)) {
759         nextToken();
760         addUnwrappedLine();
761       }
762       return;
763     }
764     // In all other cases, parse the declaration.
765     break;
766   default:
767     break;
768   }
769   do {
770     switch (FormatTok->Tok.getKind()) {
771     case tok::at:
772       nextToken();
773       if (FormatTok->Tok.is(tok::l_brace))
774         parseBracedList();
775       break;
776     case tok::kw_enum:
777       parseEnum();
778       break;
779     case tok::kw_typedef:
780       nextToken();
781       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
782                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
783         parseEnum();
784       break;
785     case tok::kw_struct:
786     case tok::kw_union:
787     case tok::kw_class:
788       parseRecord();
789       // A record declaration or definition is always the start of a structural
790       // element.
791       break;
792     case tok::period:
793       nextToken();
794       // In Java, classes have an implicit static member "class".
795       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
796           FormatTok->is(tok::kw_class))
797         nextToken();
798       break;
799     case tok::semi:
800       nextToken();
801       addUnwrappedLine();
802       return;
803     case tok::r_brace:
804       addUnwrappedLine();
805       return;
806     case tok::l_paren:
807       parseParens();
808       break;
809     case tok::caret:
810       nextToken();
811       if (FormatTok->Tok.isAnyIdentifier() ||
812           FormatTok->isSimpleTypeSpecifier())
813         nextToken();
814       if (FormatTok->is(tok::l_paren))
815         parseParens();
816       if (FormatTok->is(tok::l_brace))
817         parseChildBlock();
818       break;
819     case tok::l_brace:
820       if (!tryToParseBracedList()) {
821         // A block outside of parentheses must be the last part of a
822         // structural element.
823         // FIXME: Figure out cases where this is not true, and add projections
824         // for them (the one we know is missing are lambdas).
825         if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
826           addUnwrappedLine();
827         FormatTok->Type = TT_FunctionLBrace;
828         parseBlock(/*MustBeDeclaration=*/false);
829         addUnwrappedLine();
830         return;
831       }
832       // Otherwise this was a braced init list, and the structural
833       // element continues.
834       break;
835     case tok::kw_try:
836       // We arrive here when parsing function-try blocks.
837       parseTryCatch();
838       return;
839     case tok::identifier: {
840       // Parse function literal unless 'function' is the first token in a line
841       // in which case this should be treated as a free-standing function.
842       if (Style.Language == FormatStyle::LK_JavaScript &&
843           FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
844         tryToParseJSFunction();
845         break;
846       }
847       if ((Style.Language == FormatStyle::LK_JavaScript ||
848            Style.Language == FormatStyle::LK_Java) &&
849           FormatTok->is(Keywords.kw_interface)) {
850         parseRecord();
851         break;
852       }
853 
854       StringRef Text = FormatTok->TokenText;
855       nextToken();
856       if (Line->Tokens.size() == 1 &&
857           // JS doesn't have macros, and within classes colons indicate fields,
858           // not labels.
859           Style.Language != FormatStyle::LK_JavaScript) {
860         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
861           parseLabel();
862           return;
863         }
864         // Recognize function-like macro usages without trailing semicolon as
865         // well as free-standing macros like Q_OBJECT.
866         bool FunctionLike = FormatTok->is(tok::l_paren);
867         if (FunctionLike)
868           parseParens();
869 
870         bool FollowedByNewline =
871             CommentsBeforeNextToken.empty()
872                 ? FormatTok->NewlinesBefore > 0
873                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
874 
875         if (FollowedByNewline &&
876             (Text.size() >= 5 || FunctionLike) &&
877             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
878           addUnwrappedLine();
879           return;
880         }
881       }
882       break;
883     }
884     case tok::equal:
885       nextToken();
886       if (FormatTok->Tok.is(tok::l_brace)) {
887         parseBracedList();
888       }
889       break;
890     case tok::l_square:
891       parseSquare();
892       break;
893     case tok::kw_new:
894       parseNew();
895       break;
896     default:
897       nextToken();
898       break;
899     }
900   } while (!eof());
901 }
902 
903 bool UnwrappedLineParser::tryToParseLambda() {
904   // FIXME: This is a dirty way to access the previous token. Find a better
905   // solution.
906   if (!Line->Tokens.empty() &&
907       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
908                                         tok::kw_new, tok::kw_delete) ||
909        Line->Tokens.back().Tok->closesScope() ||
910        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
911     nextToken();
912     return false;
913   }
914   assert(FormatTok->is(tok::l_square));
915   FormatToken &LSquare = *FormatTok;
916   if (!tryToParseLambdaIntroducer())
917     return false;
918 
919   while (FormatTok->isNot(tok::l_brace)) {
920     if (FormatTok->isSimpleTypeSpecifier()) {
921       nextToken();
922       continue;
923     }
924     switch (FormatTok->Tok.getKind()) {
925     case tok::l_brace:
926       break;
927     case tok::l_paren:
928       parseParens();
929       break;
930     case tok::amp:
931     case tok::star:
932     case tok::kw_const:
933     case tok::comma:
934     case tok::less:
935     case tok::greater:
936     case tok::identifier:
937     case tok::coloncolon:
938     case tok::kw_mutable:
939       nextToken();
940       break;
941     case tok::arrow:
942       FormatTok->Type = TT_TrailingReturnArrow;
943       nextToken();
944       break;
945     default:
946       return true;
947     }
948   }
949   LSquare.Type = TT_LambdaLSquare;
950   parseChildBlock();
951   return true;
952 }
953 
954 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
955   nextToken();
956   if (FormatTok->is(tok::equal)) {
957     nextToken();
958     if (FormatTok->is(tok::r_square)) {
959       nextToken();
960       return true;
961     }
962     if (FormatTok->isNot(tok::comma))
963       return false;
964     nextToken();
965   } else if (FormatTok->is(tok::amp)) {
966     nextToken();
967     if (FormatTok->is(tok::r_square)) {
968       nextToken();
969       return true;
970     }
971     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
972       return false;
973     }
974     if (FormatTok->is(tok::comma))
975       nextToken();
976   } else if (FormatTok->is(tok::r_square)) {
977     nextToken();
978     return true;
979   }
980   do {
981     if (FormatTok->is(tok::amp))
982       nextToken();
983     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
984       return false;
985     nextToken();
986     if (FormatTok->is(tok::ellipsis))
987       nextToken();
988     if (FormatTok->is(tok::comma)) {
989       nextToken();
990     } else if (FormatTok->is(tok::r_square)) {
991       nextToken();
992       return true;
993     } else {
994       return false;
995     }
996   } while (!eof());
997   return false;
998 }
999 
1000 void UnwrappedLineParser::tryToParseJSFunction() {
1001   nextToken();
1002 
1003   // Consume function name.
1004   if (FormatTok->is(tok::identifier))
1005     nextToken();
1006 
1007   if (FormatTok->isNot(tok::l_paren))
1008     return;
1009   nextToken();
1010   while (FormatTok->isNot(tok::l_brace)) {
1011     // Err on the side of caution in order to avoid consuming the full file in
1012     // case of incomplete code.
1013     if (!FormatTok->isOneOf(tok::identifier, tok::comma, tok::r_paren,
1014                             tok::comment))
1015       return;
1016     nextToken();
1017   }
1018   parseChildBlock();
1019 }
1020 
1021 bool UnwrappedLineParser::tryToParseBracedList() {
1022   if (FormatTok->BlockKind == BK_Unknown)
1023     calculateBraceTypes();
1024   assert(FormatTok->BlockKind != BK_Unknown);
1025   if (FormatTok->BlockKind == BK_Block)
1026     return false;
1027   parseBracedList();
1028   return true;
1029 }
1030 
1031 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1032   bool HasError = false;
1033   nextToken();
1034 
1035   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1036   // replace this by using parseAssigmentExpression() inside.
1037   do {
1038     if (Style.Language == FormatStyle::LK_JavaScript &&
1039         FormatTok->is(Keywords.kw_function)) {
1040       tryToParseJSFunction();
1041       continue;
1042     }
1043     switch (FormatTok->Tok.getKind()) {
1044     case tok::caret:
1045       nextToken();
1046       if (FormatTok->is(tok::l_brace)) {
1047         parseChildBlock();
1048       }
1049       break;
1050     case tok::l_square:
1051       tryToParseLambda();
1052       break;
1053     case tok::l_brace:
1054       // Assume there are no blocks inside a braced init list apart
1055       // from the ones we explicitly parse out (like lambdas).
1056       FormatTok->BlockKind = BK_BracedInit;
1057       parseBracedList();
1058       break;
1059     case tok::r_paren:
1060       // JavaScript can just have free standing methods and getters/setters in
1061       // object literals. Detect them by a "{" following ")".
1062       if (Style.Language == FormatStyle::LK_JavaScript) {
1063         nextToken();
1064         if (FormatTok->is(tok::l_brace))
1065           parseChildBlock();
1066         break;
1067       }
1068       nextToken();
1069       break;
1070     case tok::r_brace:
1071       nextToken();
1072       return !HasError;
1073     case tok::semi:
1074       HasError = true;
1075       if (!ContinueOnSemicolons)
1076         return !HasError;
1077       nextToken();
1078       break;
1079     case tok::comma:
1080       nextToken();
1081       break;
1082     default:
1083       nextToken();
1084       break;
1085     }
1086   } while (!eof());
1087   return false;
1088 }
1089 
1090 void UnwrappedLineParser::parseParens() {
1091   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1092   nextToken();
1093   do {
1094     switch (FormatTok->Tok.getKind()) {
1095     case tok::l_paren:
1096       parseParens();
1097       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1098         parseChildBlock();
1099       break;
1100     case tok::r_paren:
1101       nextToken();
1102       return;
1103     case tok::r_brace:
1104       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1105       return;
1106     case tok::l_square:
1107       tryToParseLambda();
1108       break;
1109     case tok::l_brace:
1110       if (!tryToParseBracedList())
1111         parseChildBlock();
1112       break;
1113     case tok::at:
1114       nextToken();
1115       if (FormatTok->Tok.is(tok::l_brace))
1116         parseBracedList();
1117       break;
1118     case tok::identifier:
1119       if (Style.Language == FormatStyle::LK_JavaScript &&
1120           FormatTok->is(Keywords.kw_function))
1121         tryToParseJSFunction();
1122       else
1123         nextToken();
1124       break;
1125     default:
1126       nextToken();
1127       break;
1128     }
1129   } while (!eof());
1130 }
1131 
1132 void UnwrappedLineParser::parseSquare() {
1133   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1134   if (tryToParseLambda())
1135     return;
1136   do {
1137     switch (FormatTok->Tok.getKind()) {
1138     case tok::l_paren:
1139       parseParens();
1140       break;
1141     case tok::r_square:
1142       nextToken();
1143       return;
1144     case tok::r_brace:
1145       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1146       return;
1147     case tok::l_square:
1148       parseSquare();
1149       break;
1150     case tok::l_brace: {
1151       if (!tryToParseBracedList())
1152         parseChildBlock();
1153       break;
1154     }
1155     case tok::at:
1156       nextToken();
1157       if (FormatTok->Tok.is(tok::l_brace))
1158         parseBracedList();
1159       break;
1160     default:
1161       nextToken();
1162       break;
1163     }
1164   } while (!eof());
1165 }
1166 
1167 void UnwrappedLineParser::parseIfThenElse() {
1168   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1169   nextToken();
1170   if (FormatTok->Tok.is(tok::l_paren))
1171     parseParens();
1172   bool NeedsUnwrappedLine = false;
1173   if (FormatTok->Tok.is(tok::l_brace)) {
1174     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1175     parseBlock(/*MustBeDeclaration=*/false);
1176     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1177         Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1178       addUnwrappedLine();
1179     } else {
1180       NeedsUnwrappedLine = true;
1181     }
1182   } else {
1183     addUnwrappedLine();
1184     ++Line->Level;
1185     parseStructuralElement();
1186     --Line->Level;
1187   }
1188   if (FormatTok->Tok.is(tok::kw_else)) {
1189     if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
1190       addUnwrappedLine();
1191     nextToken();
1192     if (FormatTok->Tok.is(tok::l_brace)) {
1193       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1194       parseBlock(/*MustBeDeclaration=*/false);
1195       addUnwrappedLine();
1196     } else if (FormatTok->Tok.is(tok::kw_if)) {
1197       parseIfThenElse();
1198     } else {
1199       addUnwrappedLine();
1200       ++Line->Level;
1201       parseStructuralElement();
1202       --Line->Level;
1203     }
1204   } else if (NeedsUnwrappedLine) {
1205     addUnwrappedLine();
1206   }
1207 }
1208 
1209 void UnwrappedLineParser::parseTryCatch() {
1210   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1211   nextToken();
1212   bool NeedsUnwrappedLine = false;
1213   if (FormatTok->is(tok::colon)) {
1214     // We are in a function try block, what comes is an initializer list.
1215     nextToken();
1216     while (FormatTok->is(tok::identifier)) {
1217       nextToken();
1218       if (FormatTok->is(tok::l_paren))
1219         parseParens();
1220       if (FormatTok->is(tok::comma))
1221         nextToken();
1222     }
1223   }
1224   // Parse try with resource.
1225   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1226     parseParens();
1227   }
1228   if (FormatTok->is(tok::l_brace)) {
1229     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1230     parseBlock(/*MustBeDeclaration=*/false);
1231     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1232         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1233         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1234       addUnwrappedLine();
1235     } else {
1236       NeedsUnwrappedLine = true;
1237     }
1238   } else if (!FormatTok->is(tok::kw_catch)) {
1239     // The C++ standard requires a compound-statement after a try.
1240     // If there's none, we try to assume there's a structuralElement
1241     // and try to continue.
1242     addUnwrappedLine();
1243     ++Line->Level;
1244     parseStructuralElement();
1245     --Line->Level;
1246   }
1247   while (1) {
1248     if (FormatTok->is(tok::at))
1249       nextToken();
1250     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1251                              tok::kw___finally) ||
1252           ((Style.Language == FormatStyle::LK_Java ||
1253             Style.Language == FormatStyle::LK_JavaScript) &&
1254            FormatTok->is(Keywords.kw_finally)) ||
1255           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1256            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1257       break;
1258     nextToken();
1259     while (FormatTok->isNot(tok::l_brace)) {
1260       if (FormatTok->is(tok::l_paren)) {
1261         parseParens();
1262         continue;
1263       }
1264       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1265         return;
1266       nextToken();
1267     }
1268     NeedsUnwrappedLine = false;
1269     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1270     parseBlock(/*MustBeDeclaration=*/false);
1271     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1272         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1273         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1274       addUnwrappedLine();
1275     } else {
1276       NeedsUnwrappedLine = true;
1277     }
1278   }
1279   if (NeedsUnwrappedLine) {
1280     addUnwrappedLine();
1281   }
1282 }
1283 
1284 void UnwrappedLineParser::parseNamespace() {
1285   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1286 
1287   const FormatToken &InitialToken = *FormatTok;
1288   nextToken();
1289   if (FormatTok->Tok.is(tok::identifier))
1290     nextToken();
1291   if (FormatTok->Tok.is(tok::l_brace)) {
1292     if (ShouldBreakBeforeBrace(Style, InitialToken))
1293       addUnwrappedLine();
1294 
1295     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1296                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1297                      DeclarationScopeStack.size() > 1);
1298     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1299     // Munch the semicolon after a namespace. This is more common than one would
1300     // think. Puttin the semicolon into its own line is very ugly.
1301     if (FormatTok->Tok.is(tok::semi))
1302       nextToken();
1303     addUnwrappedLine();
1304   }
1305   // FIXME: Add error handling.
1306 }
1307 
1308 void UnwrappedLineParser::parseNew() {
1309   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1310   nextToken();
1311   if (Style.Language != FormatStyle::LK_Java)
1312     return;
1313 
1314   // In Java, we can parse everything up to the parens, which aren't optional.
1315   do {
1316     // There should not be a ;, { or } before the new's open paren.
1317     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1318       return;
1319 
1320     // Consume the parens.
1321     if (FormatTok->is(tok::l_paren)) {
1322       parseParens();
1323 
1324       // If there is a class body of an anonymous class, consume that as child.
1325       if (FormatTok->is(tok::l_brace))
1326         parseChildBlock();
1327       return;
1328     }
1329     nextToken();
1330   } while (!eof());
1331 }
1332 
1333 void UnwrappedLineParser::parseForOrWhileLoop() {
1334   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1335          "'for', 'while' or foreach macro expected");
1336   nextToken();
1337   if (FormatTok->Tok.is(tok::l_paren))
1338     parseParens();
1339   if (FormatTok->Tok.is(tok::l_brace)) {
1340     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1341     parseBlock(/*MustBeDeclaration=*/false);
1342     addUnwrappedLine();
1343   } else {
1344     addUnwrappedLine();
1345     ++Line->Level;
1346     parseStructuralElement();
1347     --Line->Level;
1348   }
1349 }
1350 
1351 void UnwrappedLineParser::parseDoWhile() {
1352   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1353   nextToken();
1354   if (FormatTok->Tok.is(tok::l_brace)) {
1355     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1356     parseBlock(/*MustBeDeclaration=*/false);
1357     if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1358       addUnwrappedLine();
1359   } else {
1360     addUnwrappedLine();
1361     ++Line->Level;
1362     parseStructuralElement();
1363     --Line->Level;
1364   }
1365 
1366   // FIXME: Add error handling.
1367   if (!FormatTok->Tok.is(tok::kw_while)) {
1368     addUnwrappedLine();
1369     return;
1370   }
1371 
1372   nextToken();
1373   parseStructuralElement();
1374 }
1375 
1376 void UnwrappedLineParser::parseLabel() {
1377   nextToken();
1378   unsigned OldLineLevel = Line->Level;
1379   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1380     --Line->Level;
1381   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1382     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1383     parseBlock(/*MustBeDeclaration=*/false);
1384     if (FormatTok->Tok.is(tok::kw_break)) {
1385       // "break;" after "}" on its own line only for BS_Allman and BS_GNU
1386       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1387           Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1388         addUnwrappedLine();
1389       }
1390       parseStructuralElement();
1391     }
1392     addUnwrappedLine();
1393   } else {
1394     if (FormatTok->is(tok::semi))
1395       nextToken();
1396     addUnwrappedLine();
1397   }
1398   Line->Level = OldLineLevel;
1399 }
1400 
1401 void UnwrappedLineParser::parseCaseLabel() {
1402   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1403   // FIXME: fix handling of complex expressions here.
1404   do {
1405     nextToken();
1406   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1407   parseLabel();
1408 }
1409 
1410 void UnwrappedLineParser::parseSwitch() {
1411   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1412   nextToken();
1413   if (FormatTok->Tok.is(tok::l_paren))
1414     parseParens();
1415   if (FormatTok->Tok.is(tok::l_brace)) {
1416     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1417     parseBlock(/*MustBeDeclaration=*/false);
1418     addUnwrappedLine();
1419   } else {
1420     addUnwrappedLine();
1421     ++Line->Level;
1422     parseStructuralElement();
1423     --Line->Level;
1424   }
1425 }
1426 
1427 void UnwrappedLineParser::parseAccessSpecifier() {
1428   nextToken();
1429   // Understand Qt's slots.
1430   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1431     nextToken();
1432   // Otherwise, we don't know what it is, and we'd better keep the next token.
1433   if (FormatTok->Tok.is(tok::colon))
1434     nextToken();
1435   addUnwrappedLine();
1436 }
1437 
1438 void UnwrappedLineParser::parseEnum() {
1439   // Won't be 'enum' for NS_ENUMs.
1440   if (FormatTok->Tok.is(tok::kw_enum))
1441     nextToken();
1442 
1443   // Eat up enum class ...
1444   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1445     nextToken();
1446   while (FormatTok->Tok.getIdentifierInfo() ||
1447          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1448                             tok::greater, tok::comma, tok::question)) {
1449     nextToken();
1450     // We can have macros or attributes in between 'enum' and the enum name.
1451     if (FormatTok->is(tok::l_paren))
1452       parseParens();
1453     if (FormatTok->is(tok::identifier))
1454       nextToken();
1455   }
1456 
1457   // Just a declaration or something is wrong.
1458   if (FormatTok->isNot(tok::l_brace))
1459     return;
1460   FormatTok->BlockKind = BK_Block;
1461 
1462   if (Style.Language == FormatStyle::LK_Java) {
1463     // Java enums are different.
1464     parseJavaEnumBody();
1465     return;
1466   }
1467 
1468   // Parse enum body.
1469   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1470   if (HasError) {
1471     if (FormatTok->is(tok::semi))
1472       nextToken();
1473     addUnwrappedLine();
1474   }
1475 
1476   // We fall through to parsing a structural element afterwards, so that in
1477   // enum A {} n, m;
1478   // "} n, m;" will end up in one unwrapped line.
1479 }
1480 
1481 void UnwrappedLineParser::parseJavaEnumBody() {
1482   // Determine whether the enum is simple, i.e. does not have a semicolon or
1483   // constants with class bodies. Simple enums can be formatted like braced
1484   // lists, contracted to a single line, etc.
1485   unsigned StoredPosition = Tokens->getPosition();
1486   bool IsSimple = true;
1487   FormatToken *Tok = Tokens->getNextToken();
1488   while (Tok) {
1489     if (Tok->is(tok::r_brace))
1490       break;
1491     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1492       IsSimple = false;
1493       break;
1494     }
1495     // FIXME: This will also mark enums with braces in the arguments to enum
1496     // constants as "not simple". This is probably fine in practice, though.
1497     Tok = Tokens->getNextToken();
1498   }
1499   FormatTok = Tokens->setPosition(StoredPosition);
1500 
1501   if (IsSimple) {
1502     parseBracedList();
1503     addUnwrappedLine();
1504     return;
1505   }
1506 
1507   // Parse the body of a more complex enum.
1508   // First add a line for everything up to the "{".
1509   nextToken();
1510   addUnwrappedLine();
1511   ++Line->Level;
1512 
1513   // Parse the enum constants.
1514   while (FormatTok) {
1515     if (FormatTok->is(tok::l_brace)) {
1516       // Parse the constant's class body.
1517       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1518                  /*MunchSemi=*/false);
1519     } else if (FormatTok->is(tok::l_paren)) {
1520       parseParens();
1521     } else if (FormatTok->is(tok::comma)) {
1522       nextToken();
1523       addUnwrappedLine();
1524     } else if (FormatTok->is(tok::semi)) {
1525       nextToken();
1526       addUnwrappedLine();
1527       break;
1528     } else if (FormatTok->is(tok::r_brace)) {
1529       addUnwrappedLine();
1530       break;
1531     } else {
1532       nextToken();
1533     }
1534   }
1535 
1536   // Parse the class body after the enum's ";" if any.
1537   parseLevel(/*HasOpeningBrace=*/true);
1538   nextToken();
1539   --Line->Level;
1540   addUnwrappedLine();
1541 }
1542 
1543 void UnwrappedLineParser::parseRecord() {
1544   const FormatToken &InitialToken = *FormatTok;
1545   nextToken();
1546 
1547 
1548   // The actual identifier can be a nested name specifier, and in macros
1549   // it is often token-pasted.
1550   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1551                             tok::kw___attribute, tok::kw___declspec,
1552                             tok::kw_alignas) ||
1553          ((Style.Language == FormatStyle::LK_Java ||
1554            Style.Language == FormatStyle::LK_JavaScript) &&
1555           FormatTok->isOneOf(tok::period, tok::comma))) {
1556     bool IsNonMacroIdentifier =
1557         FormatTok->is(tok::identifier) &&
1558         FormatTok->TokenText != FormatTok->TokenText.upper();
1559     nextToken();
1560     // We can have macros or attributes in between 'class' and the class name.
1561     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1562       parseParens();
1563   }
1564 
1565   // Note that parsing away template declarations here leads to incorrectly
1566   // accepting function declarations as record declarations.
1567   // In general, we cannot solve this problem. Consider:
1568   // class A<int> B() {}
1569   // which can be a function definition or a class definition when B() is a
1570   // macro. If we find enough real-world cases where this is a problem, we
1571   // can parse for the 'template' keyword in the beginning of the statement,
1572   // and thus rule out the record production in case there is no template
1573   // (this would still leave us with an ambiguity between template function
1574   // and class declarations).
1575   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1576     while (!eof()) {
1577       if (FormatTok->is(tok::l_brace)) {
1578         calculateBraceTypes(/*ExpectClassBody=*/true);
1579         if (!tryToParseBracedList())
1580           break;
1581       }
1582       if (FormatTok->Tok.is(tok::semi))
1583         return;
1584       nextToken();
1585     }
1586   }
1587   if (FormatTok->Tok.is(tok::l_brace)) {
1588     if (ShouldBreakBeforeBrace(Style, InitialToken))
1589       addUnwrappedLine();
1590 
1591     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1592                /*MunchSemi=*/false);
1593   }
1594   // We fall through to parsing a structural element afterwards, so
1595   // class A {} n, m;
1596   // will end up in one unwrapped line.
1597   // This does not apply for Java and JavaScript.
1598   if (Style.Language == FormatStyle::LK_Java ||
1599       Style.Language == FormatStyle::LK_JavaScript)
1600     addUnwrappedLine();
1601 }
1602 
1603 void UnwrappedLineParser::parseObjCProtocolList() {
1604   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1605   do
1606     nextToken();
1607   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1608   nextToken(); // Skip '>'.
1609 }
1610 
1611 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1612   do {
1613     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1614       nextToken();
1615       addUnwrappedLine();
1616       break;
1617     }
1618     if (FormatTok->is(tok::l_brace)) {
1619       parseBlock(/*MustBeDeclaration=*/false);
1620       // In ObjC interfaces, nothing should be following the "}".
1621       addUnwrappedLine();
1622     } else if (FormatTok->is(tok::r_brace)) {
1623       // Ignore stray "}". parseStructuralElement doesn't consume them.
1624       nextToken();
1625       addUnwrappedLine();
1626     } else {
1627       parseStructuralElement();
1628     }
1629   } while (!eof());
1630 }
1631 
1632 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1633   nextToken();
1634   nextToken(); // interface name
1635 
1636   // @interface can be followed by either a base class, or a category.
1637   if (FormatTok->Tok.is(tok::colon)) {
1638     nextToken();
1639     nextToken(); // base class name
1640   } else if (FormatTok->Tok.is(tok::l_paren))
1641     // Skip category, if present.
1642     parseParens();
1643 
1644   if (FormatTok->Tok.is(tok::less))
1645     parseObjCProtocolList();
1646 
1647   if (FormatTok->Tok.is(tok::l_brace)) {
1648     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1649         Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1650       addUnwrappedLine();
1651     parseBlock(/*MustBeDeclaration=*/true);
1652   }
1653 
1654   // With instance variables, this puts '}' on its own line.  Without instance
1655   // variables, this ends the @interface line.
1656   addUnwrappedLine();
1657 
1658   parseObjCUntilAtEnd();
1659 }
1660 
1661 void UnwrappedLineParser::parseObjCProtocol() {
1662   nextToken();
1663   nextToken(); // protocol name
1664 
1665   if (FormatTok->Tok.is(tok::less))
1666     parseObjCProtocolList();
1667 
1668   // Check for protocol declaration.
1669   if (FormatTok->Tok.is(tok::semi)) {
1670     nextToken();
1671     return addUnwrappedLine();
1672   }
1673 
1674   addUnwrappedLine();
1675   parseObjCUntilAtEnd();
1676 }
1677 
1678 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1679   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1680   nextToken();
1681 
1682   // Consume the "default" in "export default class/function".
1683   if (FormatTok->is(tok::kw_default))
1684     nextToken();
1685 
1686   // Consume "function" and "default function", so that these get parsed as
1687   // free-standing JS functions, i.e. do not require a trailing semicolon.
1688   if (FormatTok->is(Keywords.kw_function)) {
1689     nextToken();
1690     return;
1691   }
1692 
1693   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, Keywords.kw_var))
1694     return; // Fall through to parsing the corresponding structure.
1695 
1696   if (FormatTok->is(tok::l_brace)) {
1697     FormatTok->BlockKind = BK_Block;
1698     parseBracedList();
1699   }
1700 
1701   while (!eof() && FormatTok->isNot(tok::semi) &&
1702          FormatTok->isNot(tok::l_brace)) {
1703     nextToken();
1704   }
1705 }
1706 
1707 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1708                                                  StringRef Prefix = "") {
1709   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1710                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1711   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1712                                                     E = Line.Tokens.end();
1713        I != E; ++I) {
1714     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1715   }
1716   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1717                                                     E = Line.Tokens.end();
1718        I != E; ++I) {
1719     const UnwrappedLineNode &Node = *I;
1720     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1721              I = Node.Children.begin(),
1722              E = Node.Children.end();
1723          I != E; ++I) {
1724       printDebugInfo(*I, "\nChild: ");
1725     }
1726   }
1727   llvm::dbgs() << "\n";
1728 }
1729 
1730 void UnwrappedLineParser::addUnwrappedLine() {
1731   if (Line->Tokens.empty())
1732     return;
1733   DEBUG({
1734     if (CurrentLines == &Lines)
1735       printDebugInfo(*Line);
1736   });
1737   CurrentLines->push_back(*Line);
1738   Line->Tokens.clear();
1739   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1740     for (SmallVectorImpl<UnwrappedLine>::iterator
1741              I = PreprocessorDirectives.begin(),
1742              E = PreprocessorDirectives.end();
1743          I != E; ++I) {
1744       CurrentLines->push_back(*I);
1745     }
1746     PreprocessorDirectives.clear();
1747   }
1748 }
1749 
1750 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1751 
1752 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1753   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1754          FormatTok.NewlinesBefore > 0;
1755 }
1756 
1757 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1758   bool JustComments = Line->Tokens.empty();
1759   for (SmallVectorImpl<FormatToken *>::const_iterator
1760            I = CommentsBeforeNextToken.begin(),
1761            E = CommentsBeforeNextToken.end();
1762        I != E; ++I) {
1763     if (isOnNewLine(**I) && JustComments)
1764       addUnwrappedLine();
1765     pushToken(*I);
1766   }
1767   if (NewlineBeforeNext && JustComments)
1768     addUnwrappedLine();
1769   CommentsBeforeNextToken.clear();
1770 }
1771 
1772 void UnwrappedLineParser::nextToken() {
1773   if (eof())
1774     return;
1775   flushComments(isOnNewLine(*FormatTok));
1776   pushToken(FormatTok);
1777   readToken();
1778 }
1779 
1780 void UnwrappedLineParser::readToken() {
1781   bool CommentsInCurrentLine = true;
1782   do {
1783     FormatTok = Tokens->getNextToken();
1784     assert(FormatTok);
1785     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1786            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1787       // If there is an unfinished unwrapped line, we flush the preprocessor
1788       // directives only after that unwrapped line was finished later.
1789       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1790       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1791       // Comments stored before the preprocessor directive need to be output
1792       // before the preprocessor directive, at the same level as the
1793       // preprocessor directive, as we consider them to apply to the directive.
1794       flushComments(isOnNewLine(*FormatTok));
1795       parsePPDirective();
1796     }
1797     while (FormatTok->Type == TT_ConflictStart ||
1798            FormatTok->Type == TT_ConflictEnd ||
1799            FormatTok->Type == TT_ConflictAlternative) {
1800       if (FormatTok->Type == TT_ConflictStart) {
1801         conditionalCompilationStart(/*Unreachable=*/false);
1802       } else if (FormatTok->Type == TT_ConflictAlternative) {
1803         conditionalCompilationAlternative();
1804       } else if (FormatTok->Type == TT_ConflictEnd) {
1805         conditionalCompilationEnd();
1806       }
1807       FormatTok = Tokens->getNextToken();
1808       FormatTok->MustBreakBefore = true;
1809     }
1810 
1811     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1812         !Line->InPPDirective) {
1813       continue;
1814     }
1815 
1816     if (!FormatTok->Tok.is(tok::comment))
1817       return;
1818     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1819       CommentsInCurrentLine = false;
1820     }
1821     if (CommentsInCurrentLine) {
1822       pushToken(FormatTok);
1823     } else {
1824       CommentsBeforeNextToken.push_back(FormatTok);
1825     }
1826   } while (!eof());
1827 }
1828 
1829 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1830   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1831   if (MustBreakBeforeNextToken) {
1832     Line->Tokens.back().Tok->MustBreakBefore = true;
1833     MustBreakBeforeNextToken = false;
1834   }
1835 }
1836 
1837 } // end namespace format
1838 } // end namespace clang
1839