1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
158       Parser->addUnwrappedLine();
159     } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
160       Parser->addUnwrappedLine();
161       ++LineLevel;
162     }
163   }
164   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
165 
166 private:
167   unsigned &LineLevel;
168   unsigned OldLineLevel;
169 };
170 
171 namespace {
172 
173 class IndexedTokenSource : public FormatTokenSource {
174 public:
175   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
176       : Tokens(Tokens), Position(-1) {}
177 
178   FormatToken *getNextToken() override {
179     ++Position;
180     return Tokens[Position];
181   }
182 
183   unsigned getPosition() override {
184     assert(Position >= 0);
185     return Position;
186   }
187 
188   FormatToken *setPosition(unsigned P) override {
189     Position = P;
190     return Tokens[Position];
191   }
192 
193   void reset() { Position = -1; }
194 
195 private:
196   ArrayRef<FormatToken *> Tokens;
197   int Position;
198 };
199 
200 } // end anonymous namespace
201 
202 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
203                                          const AdditionalKeywords &Keywords,
204                                          ArrayRef<FormatToken *> Tokens,
205                                          UnwrappedLineConsumer &Callback)
206     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
207       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
208       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
209 
210 void UnwrappedLineParser::reset() {
211   PPBranchLevel = -1;
212   Line.reset(new UnwrappedLine);
213   CommentsBeforeNextToken.clear();
214   FormatTok = nullptr;
215   MustBreakBeforeNextToken = false;
216   PreprocessorDirectives.clear();
217   CurrentLines = &Lines;
218   DeclarationScopeStack.clear();
219   PPStack.clear();
220 }
221 
222 void UnwrappedLineParser::parse() {
223   IndexedTokenSource TokenSource(AllTokens);
224   do {
225     DEBUG(llvm::dbgs() << "----\n");
226     reset();
227     Tokens = &TokenSource;
228     TokenSource.reset();
229 
230     readToken();
231     parseFile();
232     // Create line with eof token.
233     pushToken(FormatTok);
234     addUnwrappedLine();
235 
236     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
237                                                   E = Lines.end();
238          I != E; ++I) {
239       Callback.consumeUnwrappedLine(*I);
240     }
241     Callback.finishRun();
242     Lines.clear();
243     while (!PPLevelBranchIndex.empty() &&
244            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
245       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
246       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
247     }
248     if (!PPLevelBranchIndex.empty()) {
249       ++PPLevelBranchIndex.back();
250       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
251       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
252     }
253   } while (!PPLevelBranchIndex.empty());
254 }
255 
256 void UnwrappedLineParser::parseFile() {
257   // The top-level context in a file always has declarations, except for pre-
258   // processor directives and JavaScript files.
259   bool MustBeDeclaration =
260       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
261   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
262                                           MustBeDeclaration);
263   parseLevel(/*HasOpeningBrace=*/false);
264   // Make sure to format the remaining tokens.
265   flushComments(true);
266   addUnwrappedLine();
267 }
268 
269 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
270   bool SwitchLabelEncountered = false;
271   do {
272     tok::TokenKind kind = FormatTok->Tok.getKind();
273     if (FormatTok->Type == TT_MacroBlockBegin) {
274       kind = tok::l_brace;
275     } else if (FormatTok->Type == TT_MacroBlockEnd) {
276       kind = tok::r_brace;
277     }
278 
279     switch (kind) {
280     case tok::comment:
281       nextToken();
282       addUnwrappedLine();
283       break;
284     case tok::l_brace:
285       // FIXME: Add parameter whether this can happen - if this happens, we must
286       // be in a non-declaration context.
287       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
288         continue;
289       parseBlock(/*MustBeDeclaration=*/false);
290       addUnwrappedLine();
291       break;
292     case tok::r_brace:
293       if (HasOpeningBrace)
294         return;
295       nextToken();
296       addUnwrappedLine();
297       break;
298     case tok::kw_default:
299     case tok::kw_case:
300       if (!SwitchLabelEncountered &&
301           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
302         ++Line->Level;
303       SwitchLabelEncountered = true;
304       parseStructuralElement();
305       break;
306     default:
307       parseStructuralElement();
308       break;
309     }
310   } while (!eof());
311 }
312 
313 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
314   // We'll parse forward through the tokens until we hit
315   // a closing brace or eof - note that getNextToken() will
316   // parse macros, so this will magically work inside macro
317   // definitions, too.
318   unsigned StoredPosition = Tokens->getPosition();
319   FormatToken *Tok = FormatTok;
320   // Keep a stack of positions of lbrace tokens. We will
321   // update information about whether an lbrace starts a
322   // braced init list or a different block during the loop.
323   SmallVector<FormatToken *, 8> LBraceStack;
324   assert(Tok->Tok.is(tok::l_brace));
325   do {
326     // Get next none-comment token.
327     FormatToken *NextTok;
328     unsigned ReadTokens = 0;
329     do {
330       NextTok = Tokens->getNextToken();
331       ++ReadTokens;
332     } while (NextTok->is(tok::comment));
333 
334     switch (Tok->Tok.getKind()) {
335     case tok::l_brace:
336       Tok->BlockKind = BK_Unknown;
337       LBraceStack.push_back(Tok);
338       break;
339     case tok::r_brace:
340       if (!LBraceStack.empty()) {
341         if (LBraceStack.back()->BlockKind == BK_Unknown) {
342           bool ProbablyBracedList = false;
343           if (Style.Language == FormatStyle::LK_Proto) {
344             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
345           } else {
346             // Using OriginalColumn to distinguish between ObjC methods and
347             // binary operators is a bit hacky.
348             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
349                                     NextTok->OriginalColumn == 0;
350 
351             // If there is a comma, semicolon or right paren after the closing
352             // brace, we assume this is a braced initializer list.  Note that
353             // regardless how we mark inner braces here, we will overwrite the
354             // BlockKind later if we parse a braced list (where all blocks
355             // inside are by default braced lists), or when we explicitly detect
356             // blocks (for example while parsing lambdas).
357             //
358             // We exclude + and - as they can be ObjC visibility modifiers.
359             ProbablyBracedList =
360                 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
361                                  tok::r_paren, tok::r_square, tok::l_brace,
362                                  tok::l_paren, tok::ellipsis) ||
363                 (NextTok->is(tok::semi) &&
364                  (!ExpectClassBody || LBraceStack.size() != 1)) ||
365                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
366           }
367           if (ProbablyBracedList) {
368             Tok->BlockKind = BK_BracedInit;
369             LBraceStack.back()->BlockKind = BK_BracedInit;
370           } else {
371             Tok->BlockKind = BK_Block;
372             LBraceStack.back()->BlockKind = BK_Block;
373           }
374         }
375         LBraceStack.pop_back();
376       }
377       break;
378     case tok::at:
379     case tok::semi:
380     case tok::kw_if:
381     case tok::kw_while:
382     case tok::kw_for:
383     case tok::kw_switch:
384     case tok::kw_try:
385     case tok::kw___try:
386       if (!LBraceStack.empty())
387         LBraceStack.back()->BlockKind = BK_Block;
388       break;
389     default:
390       break;
391     }
392     Tok = NextTok;
393   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
394   // Assume other blocks for all unclosed opening braces.
395   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
396     if (LBraceStack[i]->BlockKind == BK_Unknown)
397       LBraceStack[i]->BlockKind = BK_Block;
398   }
399 
400   FormatTok = Tokens->setPosition(StoredPosition);
401 }
402 
403 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
404                                      bool MunchSemi) {
405   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
406          "'{' or macro block token expected");
407   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
408 
409   unsigned InitialLevel = Line->Level;
410   nextToken();
411 
412   if (MacroBlock && FormatTok->is(tok::l_paren))
413     parseParens();
414 
415   addUnwrappedLine();
416 
417   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
418                                           MustBeDeclaration);
419   if (AddLevel)
420     ++Line->Level;
421   parseLevel(/*HasOpeningBrace=*/true);
422 
423   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
424                  : !FormatTok->is(tok::r_brace)) {
425     Line->Level = InitialLevel;
426     return;
427   }
428 
429   nextToken(); // Munch the closing brace.
430 
431   if (MacroBlock && FormatTok->is(tok::l_paren))
432     parseParens();
433 
434   if (MunchSemi && FormatTok->Tok.is(tok::semi))
435     nextToken();
436   Line->Level = InitialLevel;
437 }
438 
439 static bool isGoogScope(const UnwrappedLine &Line) {
440   // FIXME: Closure-library specific stuff should not be hard-coded but be
441   // configurable.
442   if (Line.Tokens.size() < 4)
443     return false;
444   auto I = Line.Tokens.begin();
445   if (I->Tok->TokenText != "goog")
446     return false;
447   ++I;
448   if (I->Tok->isNot(tok::period))
449     return false;
450   ++I;
451   if (I->Tok->TokenText != "scope")
452     return false;
453   ++I;
454   return I->Tok->is(tok::l_paren);
455 }
456 
457 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
458                                    const FormatToken &InitialToken) {
459   switch (Style.BreakBeforeBraces) {
460   case FormatStyle::BS_Linux:
461     return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
462   case FormatStyle::BS_Mozilla:
463     return InitialToken.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union);
464   case FormatStyle::BS_Allman:
465   case FormatStyle::BS_GNU:
466     return true;
467   default:
468     return false;
469   }
470 }
471 
472 void UnwrappedLineParser::parseChildBlock() {
473   FormatTok->BlockKind = BK_Block;
474   nextToken();
475   {
476     bool GoogScope =
477         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
478     ScopedLineState LineState(*this);
479     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
480                                             /*MustBeDeclaration=*/false);
481     Line->Level += GoogScope ? 0 : 1;
482     parseLevel(/*HasOpeningBrace=*/true);
483     flushComments(isOnNewLine(*FormatTok));
484     Line->Level -= GoogScope ? 0 : 1;
485   }
486   nextToken();
487 }
488 
489 void UnwrappedLineParser::parsePPDirective() {
490   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
491   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
492   nextToken();
493 
494   if (!FormatTok->Tok.getIdentifierInfo()) {
495     parsePPUnknown();
496     return;
497   }
498 
499   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
500   case tok::pp_define:
501     parsePPDefine();
502     return;
503   case tok::pp_if:
504     parsePPIf(/*IfDef=*/false);
505     break;
506   case tok::pp_ifdef:
507   case tok::pp_ifndef:
508     parsePPIf(/*IfDef=*/true);
509     break;
510   case tok::pp_else:
511     parsePPElse();
512     break;
513   case tok::pp_elif:
514     parsePPElIf();
515     break;
516   case tok::pp_endif:
517     parsePPEndIf();
518     break;
519   default:
520     parsePPUnknown();
521     break;
522   }
523 }
524 
525 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
526   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
527     PPStack.push_back(PP_Unreachable);
528   else
529     PPStack.push_back(PP_Conditional);
530 }
531 
532 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
533   ++PPBranchLevel;
534   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
535   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
536     PPLevelBranchIndex.push_back(0);
537     PPLevelBranchCount.push_back(0);
538   }
539   PPChainBranchIndex.push(0);
540   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
541   conditionalCompilationCondition(Unreachable || Skip);
542 }
543 
544 void UnwrappedLineParser::conditionalCompilationAlternative() {
545   if (!PPStack.empty())
546     PPStack.pop_back();
547   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
548   if (!PPChainBranchIndex.empty())
549     ++PPChainBranchIndex.top();
550   conditionalCompilationCondition(
551       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
552       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
553 }
554 
555 void UnwrappedLineParser::conditionalCompilationEnd() {
556   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
557   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
558     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
559       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
560     }
561   }
562   // Guard against #endif's without #if.
563   if (PPBranchLevel > 0)
564     --PPBranchLevel;
565   if (!PPChainBranchIndex.empty())
566     PPChainBranchIndex.pop();
567   if (!PPStack.empty())
568     PPStack.pop_back();
569 }
570 
571 void UnwrappedLineParser::parsePPIf(bool IfDef) {
572   nextToken();
573   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
574                          FormatTok->Tok.getLiteralData() != nullptr &&
575                          StringRef(FormatTok->Tok.getLiteralData(),
576                                    FormatTok->Tok.getLength()) == "0") ||
577                         FormatTok->Tok.is(tok::kw_false);
578   conditionalCompilationStart(!IfDef && IsLiteralFalse);
579   parsePPUnknown();
580 }
581 
582 void UnwrappedLineParser::parsePPElse() {
583   conditionalCompilationAlternative();
584   parsePPUnknown();
585 }
586 
587 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
588 
589 void UnwrappedLineParser::parsePPEndIf() {
590   conditionalCompilationEnd();
591   parsePPUnknown();
592 }
593 
594 void UnwrappedLineParser::parsePPDefine() {
595   nextToken();
596 
597   if (FormatTok->Tok.getKind() != tok::identifier) {
598     parsePPUnknown();
599     return;
600   }
601   nextToken();
602   if (FormatTok->Tok.getKind() == tok::l_paren &&
603       FormatTok->WhitespaceRange.getBegin() ==
604           FormatTok->WhitespaceRange.getEnd()) {
605     parseParens();
606   }
607   addUnwrappedLine();
608   Line->Level = 1;
609 
610   // Errors during a preprocessor directive can only affect the layout of the
611   // preprocessor directive, and thus we ignore them. An alternative approach
612   // would be to use the same approach we use on the file level (no
613   // re-indentation if there was a structural error) within the macro
614   // definition.
615   parseFile();
616 }
617 
618 void UnwrappedLineParser::parsePPUnknown() {
619   do {
620     nextToken();
621   } while (!eof());
622   addUnwrappedLine();
623 }
624 
625 // Here we blacklist certain tokens that are not usually the first token in an
626 // unwrapped line. This is used in attempt to distinguish macro calls without
627 // trailing semicolons from other constructs split to several lines.
628 static bool tokenCanStartNewLine(const clang::Token &Tok) {
629   // Semicolon can be a null-statement, l_square can be a start of a macro or
630   // a C++11 attribute, but this doesn't seem to be common.
631   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
632          Tok.isNot(tok::l_square) &&
633          // Tokens that can only be used as binary operators and a part of
634          // overloaded operator names.
635          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
636          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
637          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
638          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
639          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
640          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
641          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
642          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
643          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
644          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
645          Tok.isNot(tok::lesslessequal) &&
646          // Colon is used in labels, base class lists, initializer lists,
647          // range-based for loops, ternary operator, but should never be the
648          // first token in an unwrapped line.
649          Tok.isNot(tok::colon) &&
650          // 'noexcept' is a trailing annotation.
651          Tok.isNot(tok::kw_noexcept);
652 }
653 
654 void UnwrappedLineParser::parseStructuralElement() {
655   assert(!FormatTok->Tok.is(tok::l_brace));
656   switch (FormatTok->Tok.getKind()) {
657   case tok::at:
658     nextToken();
659     if (FormatTok->Tok.is(tok::l_brace)) {
660       parseBracedList();
661       break;
662     }
663     switch (FormatTok->Tok.getObjCKeywordID()) {
664     case tok::objc_public:
665     case tok::objc_protected:
666     case tok::objc_package:
667     case tok::objc_private:
668       return parseAccessSpecifier();
669     case tok::objc_interface:
670     case tok::objc_implementation:
671       return parseObjCInterfaceOrImplementation();
672     case tok::objc_protocol:
673       return parseObjCProtocol();
674     case tok::objc_end:
675       return; // Handled by the caller.
676     case tok::objc_optional:
677     case tok::objc_required:
678       nextToken();
679       addUnwrappedLine();
680       return;
681     case tok::objc_autoreleasepool:
682       nextToken();
683       if (FormatTok->Tok.is(tok::l_brace)) {
684         if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
685             Style.BreakBeforeBraces == FormatStyle::BS_GNU)
686           addUnwrappedLine();
687         parseBlock(/*MustBeDeclaration=*/false);
688       }
689       addUnwrappedLine();
690       return;
691     case tok::objc_try:
692       // This branch isn't strictly necessary (the kw_try case below would
693       // do this too after the tok::at is parsed above).  But be explicit.
694       parseTryCatch();
695       return;
696     default:
697       break;
698     }
699     break;
700   case tok::kw_asm:
701     nextToken();
702     if (FormatTok->is(tok::l_brace)) {
703       FormatTok->Type = TT_InlineASMBrace;
704       nextToken();
705       while (FormatTok && FormatTok->isNot(tok::eof)) {
706         if (FormatTok->is(tok::r_brace)) {
707           FormatTok->Type = TT_InlineASMBrace;
708           nextToken();
709           addUnwrappedLine();
710           break;
711         }
712         FormatTok->Finalized = true;
713         nextToken();
714       }
715     }
716     break;
717   case tok::kw_namespace:
718     parseNamespace();
719     return;
720   case tok::kw_inline:
721     nextToken();
722     if (FormatTok->Tok.is(tok::kw_namespace)) {
723       parseNamespace();
724       return;
725     }
726     break;
727   case tok::kw_public:
728   case tok::kw_protected:
729   case tok::kw_private:
730     if (Style.Language == FormatStyle::LK_Java ||
731         Style.Language == FormatStyle::LK_JavaScript)
732       nextToken();
733     else
734       parseAccessSpecifier();
735     return;
736   case tok::kw_if:
737     parseIfThenElse();
738     return;
739   case tok::kw_for:
740   case tok::kw_while:
741     parseForOrWhileLoop();
742     return;
743   case tok::kw_do:
744     parseDoWhile();
745     return;
746   case tok::kw_switch:
747     parseSwitch();
748     return;
749   case tok::kw_default:
750     nextToken();
751     parseLabel();
752     return;
753   case tok::kw_case:
754     parseCaseLabel();
755     return;
756   case tok::kw_try:
757   case tok::kw___try:
758     parseTryCatch();
759     return;
760   case tok::kw_extern:
761     nextToken();
762     if (FormatTok->Tok.is(tok::string_literal)) {
763       nextToken();
764       if (FormatTok->Tok.is(tok::l_brace)) {
765         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
766         addUnwrappedLine();
767         return;
768       }
769     }
770     break;
771   case tok::kw_export:
772     if (Style.Language == FormatStyle::LK_JavaScript) {
773       parseJavaScriptEs6ImportExport();
774       return;
775     }
776     break;
777   case tok::identifier:
778     if (FormatTok->is(TT_ForEachMacro)) {
779       parseForOrWhileLoop();
780       return;
781     }
782     if (FormatTok->is(TT_MacroBlockBegin)) {
783       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
784                  /*MunchSemi=*/false);
785       return;
786     }
787     if (Style.Language == FormatStyle::LK_JavaScript &&
788         FormatTok->is(Keywords.kw_import)) {
789       parseJavaScriptEs6ImportExport();
790       return;
791     }
792     if (FormatTok->is(Keywords.kw_signals)) {
793       nextToken();
794       if (FormatTok->is(tok::colon)) {
795         nextToken();
796         addUnwrappedLine();
797       }
798       return;
799     }
800     // In all other cases, parse the declaration.
801     break;
802   default:
803     break;
804   }
805   do {
806     switch (FormatTok->Tok.getKind()) {
807     case tok::at:
808       nextToken();
809       if (FormatTok->Tok.is(tok::l_brace))
810         parseBracedList();
811       break;
812     case tok::kw_enum:
813       // parseEnum falls through and does not yet add an unwrapped line as an
814       // enum definition can start a structural element.
815       parseEnum();
816       // This only applies for C++.
817       if (Style.Language != FormatStyle::LK_Cpp) {
818         addUnwrappedLine();
819         return;
820       }
821       break;
822     case tok::kw_typedef:
823       nextToken();
824       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
825                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
826         parseEnum();
827       break;
828     case tok::kw_struct:
829     case tok::kw_union:
830     case tok::kw_class:
831       // parseRecord falls through and does not yet add an unwrapped line as a
832       // record declaration or definition can start a structural element.
833       parseRecord();
834       // This does not apply for Java and JavaScript.
835       if (Style.Language == FormatStyle::LK_Java ||
836           Style.Language == FormatStyle::LK_JavaScript) {
837         addUnwrappedLine();
838         return;
839       }
840       break;
841     case tok::period:
842       nextToken();
843       // In Java, classes have an implicit static member "class".
844       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
845           FormatTok->is(tok::kw_class))
846         nextToken();
847       break;
848     case tok::semi:
849       nextToken();
850       addUnwrappedLine();
851       return;
852     case tok::r_brace:
853       addUnwrappedLine();
854       return;
855     case tok::l_paren:
856       parseParens();
857       break;
858     case tok::caret:
859       nextToken();
860       if (FormatTok->Tok.isAnyIdentifier() ||
861           FormatTok->isSimpleTypeSpecifier())
862         nextToken();
863       if (FormatTok->is(tok::l_paren))
864         parseParens();
865       if (FormatTok->is(tok::l_brace))
866         parseChildBlock();
867       break;
868     case tok::l_brace:
869       if (!tryToParseBracedList()) {
870         // A block outside of parentheses must be the last part of a
871         // structural element.
872         // FIXME: Figure out cases where this is not true, and add projections
873         // for them (the one we know is missing are lambdas).
874         if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
875           addUnwrappedLine();
876         FormatTok->Type = TT_FunctionLBrace;
877         parseBlock(/*MustBeDeclaration=*/false);
878         addUnwrappedLine();
879         return;
880       }
881       // Otherwise this was a braced init list, and the structural
882       // element continues.
883       break;
884     case tok::kw_try:
885       // We arrive here when parsing function-try blocks.
886       parseTryCatch();
887       return;
888     case tok::identifier: {
889       if (FormatTok->is(TT_MacroBlockEnd)) {
890         addUnwrappedLine();
891         return;
892       }
893 
894       // Parse function literal unless 'function' is the first token in a line
895       // in which case this should be treated as a free-standing function.
896       if (Style.Language == FormatStyle::LK_JavaScript &&
897           FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
898         tryToParseJSFunction();
899         break;
900       }
901       if ((Style.Language == FormatStyle::LK_JavaScript ||
902            Style.Language == FormatStyle::LK_Java) &&
903           FormatTok->is(Keywords.kw_interface)) {
904         parseRecord();
905         addUnwrappedLine();
906         return;
907       }
908 
909       StringRef Text = FormatTok->TokenText;
910       nextToken();
911       if (Line->Tokens.size() == 1 &&
912           // JS doesn't have macros, and within classes colons indicate fields,
913           // not labels.
914           Style.Language != FormatStyle::LK_JavaScript) {
915         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
916           parseLabel();
917           return;
918         }
919         // Recognize function-like macro usages without trailing semicolon as
920         // well as free-standing macros like Q_OBJECT.
921         bool FunctionLike = FormatTok->is(tok::l_paren);
922         if (FunctionLike)
923           parseParens();
924 
925         bool FollowedByNewline =
926             CommentsBeforeNextToken.empty()
927                 ? FormatTok->NewlinesBefore > 0
928                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
929 
930         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
931             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
932           addUnwrappedLine();
933           return;
934         }
935       }
936       break;
937     }
938     case tok::equal:
939       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
940       // TT_JsFatArrow. The always start an expression or a child block if
941       // followed by a curly.
942       if (FormatTok->is(TT_JsFatArrow)) {
943         nextToken();
944         if (FormatTok->is(tok::l_brace))
945           parseChildBlock();
946         break;
947       }
948 
949       nextToken();
950       if (FormatTok->Tok.is(tok::l_brace)) {
951         parseBracedList();
952       }
953       break;
954     case tok::l_square:
955       parseSquare();
956       break;
957     case tok::kw_new:
958       parseNew();
959       break;
960     default:
961       nextToken();
962       break;
963     }
964   } while (!eof());
965 }
966 
967 bool UnwrappedLineParser::tryToParseLambda() {
968   if (Style.Language != FormatStyle::LK_Cpp) {
969     nextToken();
970     return false;
971   }
972   // FIXME: This is a dirty way to access the previous token. Find a better
973   // solution.
974   if (!Line->Tokens.empty() &&
975       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
976                                         tok::kw_new, tok::kw_delete) ||
977        Line->Tokens.back().Tok->closesScope() ||
978        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
979     nextToken();
980     return false;
981   }
982   assert(FormatTok->is(tok::l_square));
983   FormatToken &LSquare = *FormatTok;
984   if (!tryToParseLambdaIntroducer())
985     return false;
986 
987   while (FormatTok->isNot(tok::l_brace)) {
988     if (FormatTok->isSimpleTypeSpecifier()) {
989       nextToken();
990       continue;
991     }
992     switch (FormatTok->Tok.getKind()) {
993     case tok::l_brace:
994       break;
995     case tok::l_paren:
996       parseParens();
997       break;
998     case tok::amp:
999     case tok::star:
1000     case tok::kw_const:
1001     case tok::comma:
1002     case tok::less:
1003     case tok::greater:
1004     case tok::identifier:
1005     case tok::numeric_constant:
1006     case tok::coloncolon:
1007     case tok::kw_mutable:
1008       nextToken();
1009       break;
1010     case tok::arrow:
1011       FormatTok->Type = TT_LambdaArrow;
1012       nextToken();
1013       break;
1014     default:
1015       return true;
1016     }
1017   }
1018   LSquare.Type = TT_LambdaLSquare;
1019   parseChildBlock();
1020   return true;
1021 }
1022 
1023 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1024   nextToken();
1025   if (FormatTok->is(tok::equal)) {
1026     nextToken();
1027     if (FormatTok->is(tok::r_square)) {
1028       nextToken();
1029       return true;
1030     }
1031     if (FormatTok->isNot(tok::comma))
1032       return false;
1033     nextToken();
1034   } else if (FormatTok->is(tok::amp)) {
1035     nextToken();
1036     if (FormatTok->is(tok::r_square)) {
1037       nextToken();
1038       return true;
1039     }
1040     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1041       return false;
1042     }
1043     if (FormatTok->is(tok::comma))
1044       nextToken();
1045   } else if (FormatTok->is(tok::r_square)) {
1046     nextToken();
1047     return true;
1048   }
1049   do {
1050     if (FormatTok->is(tok::amp))
1051       nextToken();
1052     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1053       return false;
1054     nextToken();
1055     if (FormatTok->is(tok::ellipsis))
1056       nextToken();
1057     if (FormatTok->is(tok::comma)) {
1058       nextToken();
1059     } else if (FormatTok->is(tok::r_square)) {
1060       nextToken();
1061       return true;
1062     } else {
1063       return false;
1064     }
1065   } while (!eof());
1066   return false;
1067 }
1068 
1069 void UnwrappedLineParser::tryToParseJSFunction() {
1070   nextToken();
1071 
1072   // Consume function name.
1073   if (FormatTok->is(tok::identifier))
1074     nextToken();
1075 
1076   if (FormatTok->isNot(tok::l_paren))
1077     return;
1078 
1079   // Parse formal parameter list.
1080   parseParens();
1081 
1082   if (FormatTok->is(tok::colon)) {
1083     // Parse a type definition.
1084     nextToken();
1085 
1086     // Eat the type declaration. For braced inline object types, balance braces,
1087     // otherwise just parse until finding an l_brace for the function body.
1088     if (FormatTok->is(tok::l_brace))
1089       tryToParseBracedList();
1090     else
1091       while (FormatTok->isNot(tok::l_brace) && !eof())
1092         nextToken();
1093   }
1094 
1095   parseChildBlock();
1096 }
1097 
1098 bool UnwrappedLineParser::tryToParseBracedList() {
1099   if (FormatTok->BlockKind == BK_Unknown)
1100     calculateBraceTypes();
1101   assert(FormatTok->BlockKind != BK_Unknown);
1102   if (FormatTok->BlockKind == BK_Block)
1103     return false;
1104   parseBracedList();
1105   return true;
1106 }
1107 
1108 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1109   bool HasError = false;
1110   nextToken();
1111 
1112   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1113   // replace this by using parseAssigmentExpression() inside.
1114   do {
1115     if (Style.Language == FormatStyle::LK_JavaScript) {
1116       if (FormatTok->is(Keywords.kw_function)) {
1117         tryToParseJSFunction();
1118         continue;
1119       }
1120       if (FormatTok->is(TT_JsFatArrow)) {
1121         nextToken();
1122         // Fat arrows can be followed by simple expressions or by child blocks
1123         // in curly braces.
1124         if (FormatTok->is(tok::l_brace)) {
1125           parseChildBlock();
1126           continue;
1127         }
1128       }
1129     }
1130     switch (FormatTok->Tok.getKind()) {
1131     case tok::caret:
1132       nextToken();
1133       if (FormatTok->is(tok::l_brace)) {
1134         parseChildBlock();
1135       }
1136       break;
1137     case tok::l_square:
1138       tryToParseLambda();
1139       break;
1140     case tok::l_brace:
1141       // Assume there are no blocks inside a braced init list apart
1142       // from the ones we explicitly parse out (like lambdas).
1143       FormatTok->BlockKind = BK_BracedInit;
1144       parseBracedList();
1145       break;
1146     case tok::l_paren:
1147       parseParens();
1148       // JavaScript can just have free standing methods and getters/setters in
1149       // object literals. Detect them by a "{" following ")".
1150       if (Style.Language == FormatStyle::LK_JavaScript) {
1151         if (FormatTok->is(tok::l_brace))
1152           parseChildBlock();
1153         break;
1154       }
1155       break;
1156     case tok::r_brace:
1157       nextToken();
1158       return !HasError;
1159     case tok::semi:
1160       HasError = true;
1161       if (!ContinueOnSemicolons)
1162         return !HasError;
1163       nextToken();
1164       break;
1165     case tok::comma:
1166       nextToken();
1167       break;
1168     default:
1169       nextToken();
1170       break;
1171     }
1172   } while (!eof());
1173   return false;
1174 }
1175 
1176 void UnwrappedLineParser::parseParens() {
1177   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1178   nextToken();
1179   do {
1180     switch (FormatTok->Tok.getKind()) {
1181     case tok::l_paren:
1182       parseParens();
1183       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1184         parseChildBlock();
1185       break;
1186     case tok::r_paren:
1187       nextToken();
1188       return;
1189     case tok::r_brace:
1190       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1191       return;
1192     case tok::l_square:
1193       tryToParseLambda();
1194       break;
1195     case tok::l_brace:
1196       if (!tryToParseBracedList())
1197         parseChildBlock();
1198       break;
1199     case tok::at:
1200       nextToken();
1201       if (FormatTok->Tok.is(tok::l_brace))
1202         parseBracedList();
1203       break;
1204     case tok::identifier:
1205       if (Style.Language == FormatStyle::LK_JavaScript &&
1206           FormatTok->is(Keywords.kw_function))
1207         tryToParseJSFunction();
1208       else
1209         nextToken();
1210       break;
1211     default:
1212       nextToken();
1213       break;
1214     }
1215   } while (!eof());
1216 }
1217 
1218 void UnwrappedLineParser::parseSquare() {
1219   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1220   if (tryToParseLambda())
1221     return;
1222   do {
1223     switch (FormatTok->Tok.getKind()) {
1224     case tok::l_paren:
1225       parseParens();
1226       break;
1227     case tok::r_square:
1228       nextToken();
1229       return;
1230     case tok::r_brace:
1231       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1232       return;
1233     case tok::l_square:
1234       parseSquare();
1235       break;
1236     case tok::l_brace: {
1237       if (!tryToParseBracedList())
1238         parseChildBlock();
1239       break;
1240     }
1241     case tok::at:
1242       nextToken();
1243       if (FormatTok->Tok.is(tok::l_brace))
1244         parseBracedList();
1245       break;
1246     default:
1247       nextToken();
1248       break;
1249     }
1250   } while (!eof());
1251 }
1252 
1253 void UnwrappedLineParser::parseIfThenElse() {
1254   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1255   nextToken();
1256   if (FormatTok->Tok.is(tok::l_paren))
1257     parseParens();
1258   bool NeedsUnwrappedLine = false;
1259   if (FormatTok->Tok.is(tok::l_brace)) {
1260     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1261     parseBlock(/*MustBeDeclaration=*/false);
1262     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1263         Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1264       addUnwrappedLine();
1265     } else {
1266       NeedsUnwrappedLine = true;
1267     }
1268   } else {
1269     addUnwrappedLine();
1270     ++Line->Level;
1271     parseStructuralElement();
1272     --Line->Level;
1273   }
1274   if (FormatTok->Tok.is(tok::kw_else)) {
1275     if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
1276       addUnwrappedLine();
1277     nextToken();
1278     if (FormatTok->Tok.is(tok::l_brace)) {
1279       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1280       parseBlock(/*MustBeDeclaration=*/false);
1281       addUnwrappedLine();
1282     } else if (FormatTok->Tok.is(tok::kw_if)) {
1283       parseIfThenElse();
1284     } else {
1285       addUnwrappedLine();
1286       ++Line->Level;
1287       parseStructuralElement();
1288       --Line->Level;
1289     }
1290   } else if (NeedsUnwrappedLine) {
1291     addUnwrappedLine();
1292   }
1293 }
1294 
1295 void UnwrappedLineParser::parseTryCatch() {
1296   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1297   nextToken();
1298   bool NeedsUnwrappedLine = false;
1299   if (FormatTok->is(tok::colon)) {
1300     // We are in a function try block, what comes is an initializer list.
1301     nextToken();
1302     while (FormatTok->is(tok::identifier)) {
1303       nextToken();
1304       if (FormatTok->is(tok::l_paren))
1305         parseParens();
1306       if (FormatTok->is(tok::comma))
1307         nextToken();
1308     }
1309   }
1310   // Parse try with resource.
1311   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1312     parseParens();
1313   }
1314   if (FormatTok->is(tok::l_brace)) {
1315     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1316     parseBlock(/*MustBeDeclaration=*/false);
1317     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1318         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1319         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1320       addUnwrappedLine();
1321     } else {
1322       NeedsUnwrappedLine = true;
1323     }
1324   } else if (!FormatTok->is(tok::kw_catch)) {
1325     // The C++ standard requires a compound-statement after a try.
1326     // If there's none, we try to assume there's a structuralElement
1327     // and try to continue.
1328     addUnwrappedLine();
1329     ++Line->Level;
1330     parseStructuralElement();
1331     --Line->Level;
1332   }
1333   while (1) {
1334     if (FormatTok->is(tok::at))
1335       nextToken();
1336     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1337                              tok::kw___finally) ||
1338           ((Style.Language == FormatStyle::LK_Java ||
1339             Style.Language == FormatStyle::LK_JavaScript) &&
1340            FormatTok->is(Keywords.kw_finally)) ||
1341           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1342            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1343       break;
1344     nextToken();
1345     while (FormatTok->isNot(tok::l_brace)) {
1346       if (FormatTok->is(tok::l_paren)) {
1347         parseParens();
1348         continue;
1349       }
1350       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1351         return;
1352       nextToken();
1353     }
1354     NeedsUnwrappedLine = false;
1355     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1356     parseBlock(/*MustBeDeclaration=*/false);
1357     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1358         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1359         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1360       addUnwrappedLine();
1361     } else {
1362       NeedsUnwrappedLine = true;
1363     }
1364   }
1365   if (NeedsUnwrappedLine) {
1366     addUnwrappedLine();
1367   }
1368 }
1369 
1370 void UnwrappedLineParser::parseNamespace() {
1371   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1372 
1373   const FormatToken &InitialToken = *FormatTok;
1374   nextToken();
1375   if (FormatTok->Tok.is(tok::identifier))
1376     nextToken();
1377   if (FormatTok->Tok.is(tok::l_brace)) {
1378     if (ShouldBreakBeforeBrace(Style, InitialToken))
1379       addUnwrappedLine();
1380 
1381     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1382                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1383                      DeclarationScopeStack.size() > 1);
1384     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1385     // Munch the semicolon after a namespace. This is more common than one would
1386     // think. Puttin the semicolon into its own line is very ugly.
1387     if (FormatTok->Tok.is(tok::semi))
1388       nextToken();
1389     addUnwrappedLine();
1390   }
1391   // FIXME: Add error handling.
1392 }
1393 
1394 void UnwrappedLineParser::parseNew() {
1395   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1396   nextToken();
1397   if (Style.Language != FormatStyle::LK_Java)
1398     return;
1399 
1400   // In Java, we can parse everything up to the parens, which aren't optional.
1401   do {
1402     // There should not be a ;, { or } before the new's open paren.
1403     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1404       return;
1405 
1406     // Consume the parens.
1407     if (FormatTok->is(tok::l_paren)) {
1408       parseParens();
1409 
1410       // If there is a class body of an anonymous class, consume that as child.
1411       if (FormatTok->is(tok::l_brace))
1412         parseChildBlock();
1413       return;
1414     }
1415     nextToken();
1416   } while (!eof());
1417 }
1418 
1419 void UnwrappedLineParser::parseForOrWhileLoop() {
1420   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1421          "'for', 'while' or foreach macro expected");
1422   nextToken();
1423   if (FormatTok->Tok.is(tok::l_paren))
1424     parseParens();
1425   if (FormatTok->Tok.is(tok::l_brace)) {
1426     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1427     parseBlock(/*MustBeDeclaration=*/false);
1428     addUnwrappedLine();
1429   } else {
1430     addUnwrappedLine();
1431     ++Line->Level;
1432     parseStructuralElement();
1433     --Line->Level;
1434   }
1435 }
1436 
1437 void UnwrappedLineParser::parseDoWhile() {
1438   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1439   nextToken();
1440   if (FormatTok->Tok.is(tok::l_brace)) {
1441     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1442     parseBlock(/*MustBeDeclaration=*/false);
1443     if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1444       addUnwrappedLine();
1445   } else {
1446     addUnwrappedLine();
1447     ++Line->Level;
1448     parseStructuralElement();
1449     --Line->Level;
1450   }
1451 
1452   // FIXME: Add error handling.
1453   if (!FormatTok->Tok.is(tok::kw_while)) {
1454     addUnwrappedLine();
1455     return;
1456   }
1457 
1458   nextToken();
1459   parseStructuralElement();
1460 }
1461 
1462 void UnwrappedLineParser::parseLabel() {
1463   nextToken();
1464   unsigned OldLineLevel = Line->Level;
1465   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1466     --Line->Level;
1467   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1468     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1469     parseBlock(/*MustBeDeclaration=*/false);
1470     if (FormatTok->Tok.is(tok::kw_break)) {
1471       // "break;" after "}" on its own line only for BS_Allman and BS_GNU
1472       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1473           Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1474         addUnwrappedLine();
1475       }
1476       parseStructuralElement();
1477     }
1478     addUnwrappedLine();
1479   } else {
1480     if (FormatTok->is(tok::semi))
1481       nextToken();
1482     addUnwrappedLine();
1483   }
1484   Line->Level = OldLineLevel;
1485 }
1486 
1487 void UnwrappedLineParser::parseCaseLabel() {
1488   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1489   // FIXME: fix handling of complex expressions here.
1490   do {
1491     nextToken();
1492   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1493   parseLabel();
1494 }
1495 
1496 void UnwrappedLineParser::parseSwitch() {
1497   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1498   nextToken();
1499   if (FormatTok->Tok.is(tok::l_paren))
1500     parseParens();
1501   if (FormatTok->Tok.is(tok::l_brace)) {
1502     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1503     parseBlock(/*MustBeDeclaration=*/false);
1504     addUnwrappedLine();
1505   } else {
1506     addUnwrappedLine();
1507     ++Line->Level;
1508     parseStructuralElement();
1509     --Line->Level;
1510   }
1511 }
1512 
1513 void UnwrappedLineParser::parseAccessSpecifier() {
1514   nextToken();
1515   // Understand Qt's slots.
1516   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1517     nextToken();
1518   // Otherwise, we don't know what it is, and we'd better keep the next token.
1519   if (FormatTok->Tok.is(tok::colon))
1520     nextToken();
1521   addUnwrappedLine();
1522 }
1523 
1524 void UnwrappedLineParser::parseEnum() {
1525   // Won't be 'enum' for NS_ENUMs.
1526   if (FormatTok->Tok.is(tok::kw_enum))
1527     nextToken();
1528 
1529   // Eat up enum class ...
1530   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1531     nextToken();
1532 
1533   while (FormatTok->Tok.getIdentifierInfo() ||
1534          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1535                             tok::greater, tok::comma, tok::question)) {
1536     nextToken();
1537     // We can have macros or attributes in between 'enum' and the enum name.
1538     if (FormatTok->is(tok::l_paren))
1539       parseParens();
1540     if (FormatTok->is(tok::identifier)) {
1541       nextToken();
1542       // If there are two identifiers in a row, this is likely an elaborate
1543       // return type. In Java, this can be "implements", etc.
1544       if (Style.Language == FormatStyle::LK_Cpp &&
1545           FormatTok->is(tok::identifier))
1546         return;
1547     }
1548   }
1549 
1550   // Just a declaration or something is wrong.
1551   if (FormatTok->isNot(tok::l_brace))
1552     return;
1553   FormatTok->BlockKind = BK_Block;
1554 
1555   if (Style.Language == FormatStyle::LK_Java) {
1556     // Java enums are different.
1557     parseJavaEnumBody();
1558     return;
1559   } else if (Style.Language == FormatStyle::LK_Proto) {
1560     parseBlock(/*MustBeDeclaration=*/true);
1561     return;
1562   }
1563 
1564   // Parse enum body.
1565   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1566   if (HasError) {
1567     if (FormatTok->is(tok::semi))
1568       nextToken();
1569     addUnwrappedLine();
1570   }
1571 
1572   // There is no addUnwrappedLine() here so that we fall through to parsing a
1573   // structural element afterwards. Thus, in "enum A {} n, m;",
1574   // "} n, m;" will end up in one unwrapped line.
1575 }
1576 
1577 void UnwrappedLineParser::parseJavaEnumBody() {
1578   // Determine whether the enum is simple, i.e. does not have a semicolon or
1579   // constants with class bodies. Simple enums can be formatted like braced
1580   // lists, contracted to a single line, etc.
1581   unsigned StoredPosition = Tokens->getPosition();
1582   bool IsSimple = true;
1583   FormatToken *Tok = Tokens->getNextToken();
1584   while (Tok) {
1585     if (Tok->is(tok::r_brace))
1586       break;
1587     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1588       IsSimple = false;
1589       break;
1590     }
1591     // FIXME: This will also mark enums with braces in the arguments to enum
1592     // constants as "not simple". This is probably fine in practice, though.
1593     Tok = Tokens->getNextToken();
1594   }
1595   FormatTok = Tokens->setPosition(StoredPosition);
1596 
1597   if (IsSimple) {
1598     parseBracedList();
1599     addUnwrappedLine();
1600     return;
1601   }
1602 
1603   // Parse the body of a more complex enum.
1604   // First add a line for everything up to the "{".
1605   nextToken();
1606   addUnwrappedLine();
1607   ++Line->Level;
1608 
1609   // Parse the enum constants.
1610   while (FormatTok) {
1611     if (FormatTok->is(tok::l_brace)) {
1612       // Parse the constant's class body.
1613       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1614                  /*MunchSemi=*/false);
1615     } else if (FormatTok->is(tok::l_paren)) {
1616       parseParens();
1617     } else if (FormatTok->is(tok::comma)) {
1618       nextToken();
1619       addUnwrappedLine();
1620     } else if (FormatTok->is(tok::semi)) {
1621       nextToken();
1622       addUnwrappedLine();
1623       break;
1624     } else if (FormatTok->is(tok::r_brace)) {
1625       addUnwrappedLine();
1626       break;
1627     } else {
1628       nextToken();
1629     }
1630   }
1631 
1632   // Parse the class body after the enum's ";" if any.
1633   parseLevel(/*HasOpeningBrace=*/true);
1634   nextToken();
1635   --Line->Level;
1636   addUnwrappedLine();
1637 }
1638 
1639 void UnwrappedLineParser::parseRecord() {
1640   const FormatToken &InitialToken = *FormatTok;
1641   nextToken();
1642 
1643   // The actual identifier can be a nested name specifier, and in macros
1644   // it is often token-pasted.
1645   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1646                             tok::kw___attribute, tok::kw___declspec,
1647                             tok::kw_alignas) ||
1648          ((Style.Language == FormatStyle::LK_Java ||
1649            Style.Language == FormatStyle::LK_JavaScript) &&
1650           FormatTok->isOneOf(tok::period, tok::comma))) {
1651     bool IsNonMacroIdentifier =
1652         FormatTok->is(tok::identifier) &&
1653         FormatTok->TokenText != FormatTok->TokenText.upper();
1654     nextToken();
1655     // We can have macros or attributes in between 'class' and the class name.
1656     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1657       parseParens();
1658   }
1659 
1660   // Note that parsing away template declarations here leads to incorrectly
1661   // accepting function declarations as record declarations.
1662   // In general, we cannot solve this problem. Consider:
1663   // class A<int> B() {}
1664   // which can be a function definition or a class definition when B() is a
1665   // macro. If we find enough real-world cases where this is a problem, we
1666   // can parse for the 'template' keyword in the beginning of the statement,
1667   // and thus rule out the record production in case there is no template
1668   // (this would still leave us with an ambiguity between template function
1669   // and class declarations).
1670   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1671     while (!eof()) {
1672       if (FormatTok->is(tok::l_brace)) {
1673         calculateBraceTypes(/*ExpectClassBody=*/true);
1674         if (!tryToParseBracedList())
1675           break;
1676       }
1677       if (FormatTok->Tok.is(tok::semi))
1678         return;
1679       nextToken();
1680     }
1681   }
1682   if (FormatTok->Tok.is(tok::l_brace)) {
1683     if (ShouldBreakBeforeBrace(Style, InitialToken))
1684       addUnwrappedLine();
1685 
1686     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1687                /*MunchSemi=*/false);
1688   }
1689   // There is no addUnwrappedLine() here so that we fall through to parsing a
1690   // structural element afterwards. Thus, in "class A {} n, m;",
1691   // "} n, m;" will end up in one unwrapped line.
1692 }
1693 
1694 void UnwrappedLineParser::parseObjCProtocolList() {
1695   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1696   do
1697     nextToken();
1698   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1699   nextToken(); // Skip '>'.
1700 }
1701 
1702 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1703   do {
1704     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1705       nextToken();
1706       addUnwrappedLine();
1707       break;
1708     }
1709     if (FormatTok->is(tok::l_brace)) {
1710       parseBlock(/*MustBeDeclaration=*/false);
1711       // In ObjC interfaces, nothing should be following the "}".
1712       addUnwrappedLine();
1713     } else if (FormatTok->is(tok::r_brace)) {
1714       // Ignore stray "}". parseStructuralElement doesn't consume them.
1715       nextToken();
1716       addUnwrappedLine();
1717     } else {
1718       parseStructuralElement();
1719     }
1720   } while (!eof());
1721 }
1722 
1723 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1724   nextToken();
1725   nextToken(); // interface name
1726 
1727   // @interface can be followed by either a base class, or a category.
1728   if (FormatTok->Tok.is(tok::colon)) {
1729     nextToken();
1730     nextToken(); // base class name
1731   } else if (FormatTok->Tok.is(tok::l_paren))
1732     // Skip category, if present.
1733     parseParens();
1734 
1735   if (FormatTok->Tok.is(tok::less))
1736     parseObjCProtocolList();
1737 
1738   if (FormatTok->Tok.is(tok::l_brace)) {
1739     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1740         Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1741       addUnwrappedLine();
1742     parseBlock(/*MustBeDeclaration=*/true);
1743   }
1744 
1745   // With instance variables, this puts '}' on its own line.  Without instance
1746   // variables, this ends the @interface line.
1747   addUnwrappedLine();
1748 
1749   parseObjCUntilAtEnd();
1750 }
1751 
1752 void UnwrappedLineParser::parseObjCProtocol() {
1753   nextToken();
1754   nextToken(); // protocol name
1755 
1756   if (FormatTok->Tok.is(tok::less))
1757     parseObjCProtocolList();
1758 
1759   // Check for protocol declaration.
1760   if (FormatTok->Tok.is(tok::semi)) {
1761     nextToken();
1762     return addUnwrappedLine();
1763   }
1764 
1765   addUnwrappedLine();
1766   parseObjCUntilAtEnd();
1767 }
1768 
1769 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1770   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1771   nextToken();
1772 
1773   // Consume the "default" in "export default class/function".
1774   if (FormatTok->is(tok::kw_default))
1775     nextToken();
1776 
1777   // Consume "function" and "default function", so that these get parsed as
1778   // free-standing JS functions, i.e. do not require a trailing semicolon.
1779   if (FormatTok->is(Keywords.kw_function)) {
1780     nextToken();
1781     return;
1782   }
1783 
1784   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
1785                          Keywords.kw_var))
1786     return; // Fall through to parsing the corresponding structure.
1787 
1788   if (FormatTok->is(tok::l_brace)) {
1789     FormatTok->BlockKind = BK_Block;
1790     parseBracedList();
1791   }
1792 
1793   while (!eof() && FormatTok->isNot(tok::semi) &&
1794          FormatTok->isNot(tok::l_brace)) {
1795     nextToken();
1796   }
1797 }
1798 
1799 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1800                                                  StringRef Prefix = "") {
1801   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1802                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1803   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1804                                                     E = Line.Tokens.end();
1805        I != E; ++I) {
1806     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1807   }
1808   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1809                                                     E = Line.Tokens.end();
1810        I != E; ++I) {
1811     const UnwrappedLineNode &Node = *I;
1812     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1813              I = Node.Children.begin(),
1814              E = Node.Children.end();
1815          I != E; ++I) {
1816       printDebugInfo(*I, "\nChild: ");
1817     }
1818   }
1819   llvm::dbgs() << "\n";
1820 }
1821 
1822 void UnwrappedLineParser::addUnwrappedLine() {
1823   if (Line->Tokens.empty())
1824     return;
1825   DEBUG({
1826     if (CurrentLines == &Lines)
1827       printDebugInfo(*Line);
1828   });
1829   CurrentLines->push_back(std::move(*Line));
1830   Line->Tokens.clear();
1831   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1832     CurrentLines->append(
1833         std::make_move_iterator(PreprocessorDirectives.begin()),
1834         std::make_move_iterator(PreprocessorDirectives.end()));
1835     PreprocessorDirectives.clear();
1836   }
1837 }
1838 
1839 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1840 
1841 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1842   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1843          FormatTok.NewlinesBefore > 0;
1844 }
1845 
1846 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1847   bool JustComments = Line->Tokens.empty();
1848   for (SmallVectorImpl<FormatToken *>::const_iterator
1849            I = CommentsBeforeNextToken.begin(),
1850            E = CommentsBeforeNextToken.end();
1851        I != E; ++I) {
1852     if (isOnNewLine(**I) && JustComments)
1853       addUnwrappedLine();
1854     pushToken(*I);
1855   }
1856   if (NewlineBeforeNext && JustComments)
1857     addUnwrappedLine();
1858   CommentsBeforeNextToken.clear();
1859 }
1860 
1861 void UnwrappedLineParser::nextToken() {
1862   if (eof())
1863     return;
1864   flushComments(isOnNewLine(*FormatTok));
1865   pushToken(FormatTok);
1866   readToken();
1867 }
1868 
1869 void UnwrappedLineParser::readToken() {
1870   bool CommentsInCurrentLine = true;
1871   do {
1872     FormatTok = Tokens->getNextToken();
1873     assert(FormatTok);
1874     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1875            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1876       // If there is an unfinished unwrapped line, we flush the preprocessor
1877       // directives only after that unwrapped line was finished later.
1878       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1879       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1880       // Comments stored before the preprocessor directive need to be output
1881       // before the preprocessor directive, at the same level as the
1882       // preprocessor directive, as we consider them to apply to the directive.
1883       flushComments(isOnNewLine(*FormatTok));
1884       parsePPDirective();
1885     }
1886     while (FormatTok->Type == TT_ConflictStart ||
1887            FormatTok->Type == TT_ConflictEnd ||
1888            FormatTok->Type == TT_ConflictAlternative) {
1889       if (FormatTok->Type == TT_ConflictStart) {
1890         conditionalCompilationStart(/*Unreachable=*/false);
1891       } else if (FormatTok->Type == TT_ConflictAlternative) {
1892         conditionalCompilationAlternative();
1893       } else if (FormatTok->Type == TT_ConflictEnd) {
1894         conditionalCompilationEnd();
1895       }
1896       FormatTok = Tokens->getNextToken();
1897       FormatTok->MustBreakBefore = true;
1898     }
1899 
1900     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1901         !Line->InPPDirective) {
1902       continue;
1903     }
1904 
1905     if (!FormatTok->Tok.is(tok::comment))
1906       return;
1907     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1908       CommentsInCurrentLine = false;
1909     }
1910     if (CommentsInCurrentLine) {
1911       pushToken(FormatTok);
1912     } else {
1913       CommentsBeforeNextToken.push_back(FormatTok);
1914     }
1915   } while (!eof());
1916 }
1917 
1918 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1919   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1920   if (MustBreakBeforeNextToken) {
1921     Line->Tokens.back().Tok->MustBreakBefore = true;
1922     MustBreakBeforeNextToken = false;
1923   }
1924 }
1925 
1926 } // end namespace format
1927 } // end namespace clang
1928