1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59   return FormatTok.is(tok::comment) &&
60          FormatTok.TokenText.startswith("//");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     TokenSource = this;
86     Line.Level = 0;
87     Line.InPPDirective = true;
88   }
89 
90   ~ScopedMacroState() override {
91     TokenSource = PreviousTokenSource;
92     ResetToken = Token;
93     Line.InPPDirective = false;
94     Line.Level = PreviousLineLevel;
95   }
96 
97   FormatToken *getNextToken() override {
98     // The \c UnwrappedLineParser guards against this by never calling
99     // \c getNextToken() after it has encountered the first eof token.
100     assert(!eof());
101     PreviousToken = Token;
102     Token = PreviousTokenSource->getNextToken();
103     if (eof())
104       return getFakeEOF();
105     return Token;
106   }
107 
108   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
109 
110   FormatToken *setPosition(unsigned Position) override {
111     PreviousToken = nullptr;
112     Token = PreviousTokenSource->setPosition(Position);
113     return Token;
114   }
115 
116 private:
117   bool eof() {
118     return Token && Token->HasUnescapedNewline &&
119            !continuesLineComment(*Token, PreviousToken,
120                                  /*MinColumnToken=*/PreviousToken);
121   }
122 
123   FormatToken *getFakeEOF() {
124     static bool EOFInitialized = false;
125     static FormatToken FormatTok;
126     if (!EOFInitialized) {
127       FormatTok.Tok.startToken();
128       FormatTok.Tok.setKind(tok::eof);
129       EOFInitialized = true;
130     }
131     return &FormatTok;
132   }
133 
134   UnwrappedLine &Line;
135   FormatTokenSource *&TokenSource;
136   FormatToken *&ResetToken;
137   unsigned PreviousLineLevel;
138   FormatTokenSource *PreviousTokenSource;
139 
140   FormatToken *Token;
141   FormatToken *PreviousToken;
142 };
143 
144 } // end anonymous namespace
145 
146 class ScopedLineState {
147 public:
148   ScopedLineState(UnwrappedLineParser &Parser,
149                   bool SwitchToPreprocessorLines = false)
150       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
151     if (SwitchToPreprocessorLines)
152       Parser.CurrentLines = &Parser.PreprocessorDirectives;
153     else if (!Parser.Line->Tokens.empty())
154       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
155     PreBlockLine = std::move(Parser.Line);
156     Parser.Line = llvm::make_unique<UnwrappedLine>();
157     Parser.Line->Level = PreBlockLine->Level;
158     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
159   }
160 
161   ~ScopedLineState() {
162     if (!Parser.Line->Tokens.empty()) {
163       Parser.addUnwrappedLine();
164     }
165     assert(Parser.Line->Tokens.empty());
166     Parser.Line = std::move(PreBlockLine);
167     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
168       Parser.MustBreakBeforeNextToken = true;
169     Parser.CurrentLines = OriginalLines;
170   }
171 
172 private:
173   UnwrappedLineParser &Parser;
174 
175   std::unique_ptr<UnwrappedLine> PreBlockLine;
176   SmallVectorImpl<UnwrappedLine> *OriginalLines;
177 };
178 
179 class CompoundStatementIndenter {
180 public:
181   CompoundStatementIndenter(UnwrappedLineParser *Parser,
182                             const FormatStyle &Style, unsigned &LineLevel)
183       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
184     if (Style.BraceWrapping.AfterControlStatement)
185       Parser->addUnwrappedLine();
186     if (Style.BraceWrapping.IndentBraces)
187       ++LineLevel;
188   }
189   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
190 
191 private:
192   unsigned &LineLevel;
193   unsigned OldLineLevel;
194 };
195 
196 namespace {
197 
198 class IndexedTokenSource : public FormatTokenSource {
199 public:
200   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
201       : Tokens(Tokens), Position(-1) {}
202 
203   FormatToken *getNextToken() override {
204     ++Position;
205     return Tokens[Position];
206   }
207 
208   unsigned getPosition() override {
209     assert(Position >= 0);
210     return Position;
211   }
212 
213   FormatToken *setPosition(unsigned P) override {
214     Position = P;
215     return Tokens[Position];
216   }
217 
218   void reset() { Position = -1; }
219 
220 private:
221   ArrayRef<FormatToken *> Tokens;
222   int Position;
223 };
224 
225 } // end anonymous namespace
226 
227 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
228                                          const AdditionalKeywords &Keywords,
229                                          ArrayRef<FormatToken *> Tokens,
230                                          UnwrappedLineConsumer &Callback)
231     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
232       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
233       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
234       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
235 
236 void UnwrappedLineParser::reset() {
237   PPBranchLevel = -1;
238   Line.reset(new UnwrappedLine);
239   CommentsBeforeNextToken.clear();
240   FormatTok = nullptr;
241   MustBreakBeforeNextToken = false;
242   PreprocessorDirectives.clear();
243   CurrentLines = &Lines;
244   DeclarationScopeStack.clear();
245   PPStack.clear();
246 }
247 
248 void UnwrappedLineParser::parse() {
249   IndexedTokenSource TokenSource(AllTokens);
250   do {
251     DEBUG(llvm::dbgs() << "----\n");
252     reset();
253     Tokens = &TokenSource;
254     TokenSource.reset();
255 
256     readToken();
257     parseFile();
258     // Create line with eof token.
259     pushToken(FormatTok);
260     addUnwrappedLine();
261 
262     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
263                                                   E = Lines.end();
264          I != E; ++I) {
265       Callback.consumeUnwrappedLine(*I);
266     }
267     Callback.finishRun();
268     Lines.clear();
269     while (!PPLevelBranchIndex.empty() &&
270            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
271       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
272       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
273     }
274     if (!PPLevelBranchIndex.empty()) {
275       ++PPLevelBranchIndex.back();
276       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
277       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
278     }
279   } while (!PPLevelBranchIndex.empty());
280 }
281 
282 void UnwrappedLineParser::parseFile() {
283   // The top-level context in a file always has declarations, except for pre-
284   // processor directives and JavaScript files.
285   bool MustBeDeclaration =
286       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
287   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
288                                           MustBeDeclaration);
289   parseLevel(/*HasOpeningBrace=*/false);
290   // Make sure to format the remaining tokens.
291   flushComments(true);
292   addUnwrappedLine();
293 }
294 
295 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
296   bool SwitchLabelEncountered = false;
297   do {
298     tok::TokenKind kind = FormatTok->Tok.getKind();
299     if (FormatTok->Type == TT_MacroBlockBegin) {
300       kind = tok::l_brace;
301     } else if (FormatTok->Type == TT_MacroBlockEnd) {
302       kind = tok::r_brace;
303     }
304 
305     switch (kind) {
306     case tok::comment:
307       nextToken();
308       addUnwrappedLine();
309       break;
310     case tok::l_brace:
311       // FIXME: Add parameter whether this can happen - if this happens, we must
312       // be in a non-declaration context.
313       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
314         continue;
315       parseBlock(/*MustBeDeclaration=*/false);
316       addUnwrappedLine();
317       break;
318     case tok::r_brace:
319       if (HasOpeningBrace)
320         return;
321       nextToken();
322       addUnwrappedLine();
323       break;
324     case tok::kw_default:
325     case tok::kw_case:
326       if (!SwitchLabelEncountered &&
327           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
328         ++Line->Level;
329       SwitchLabelEncountered = true;
330       parseStructuralElement();
331       break;
332     default:
333       parseStructuralElement();
334       break;
335     }
336   } while (!eof());
337 }
338 
339 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
340   // We'll parse forward through the tokens until we hit
341   // a closing brace or eof - note that getNextToken() will
342   // parse macros, so this will magically work inside macro
343   // definitions, too.
344   unsigned StoredPosition = Tokens->getPosition();
345   FormatToken *Tok = FormatTok;
346   const FormatToken *PrevTok = getPreviousToken();
347   // Keep a stack of positions of lbrace tokens. We will
348   // update information about whether an lbrace starts a
349   // braced init list or a different block during the loop.
350   SmallVector<FormatToken *, 8> LBraceStack;
351   assert(Tok->Tok.is(tok::l_brace));
352   do {
353     // Get next non-comment token.
354     FormatToken *NextTok;
355     unsigned ReadTokens = 0;
356     do {
357       NextTok = Tokens->getNextToken();
358       ++ReadTokens;
359     } while (NextTok->is(tok::comment));
360 
361     switch (Tok->Tok.getKind()) {
362     case tok::l_brace:
363       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
364         if (PrevTok->is(tok::colon))
365           // A colon indicates this code is in a type, or a braced list
366           // following a label in an object literal ({a: {b: 1}}). The code
367           // below could be confused by semicolons between the individual
368           // members in a type member list, which would normally trigger
369           // BK_Block. In both cases, this must be parsed as an inline braced
370           // init.
371           Tok->BlockKind = BK_BracedInit;
372         else if (PrevTok->is(tok::r_paren))
373           // `) { }` can only occur in function or method declarations in JS.
374           Tok->BlockKind = BK_Block;
375       } else {
376         Tok->BlockKind = BK_Unknown;
377       }
378       LBraceStack.push_back(Tok);
379       break;
380     case tok::r_brace:
381       if (LBraceStack.empty())
382         break;
383       if (LBraceStack.back()->BlockKind == BK_Unknown) {
384         bool ProbablyBracedList = false;
385         if (Style.Language == FormatStyle::LK_Proto) {
386           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
387         } else {
388           // Using OriginalColumn to distinguish between ObjC methods and
389           // binary operators is a bit hacky.
390           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
391                                   NextTok->OriginalColumn == 0;
392 
393           // If there is a comma, semicolon or right paren after the closing
394           // brace, we assume this is a braced initializer list.  Note that
395           // regardless how we mark inner braces here, we will overwrite the
396           // BlockKind later if we parse a braced list (where all blocks
397           // inside are by default braced lists), or when we explicitly detect
398           // blocks (for example while parsing lambdas).
399           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
400           // braced list in JS.
401           ProbablyBracedList =
402               (Style.Language == FormatStyle::LK_JavaScript &&
403                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
404                                 Keywords.kw_as)) ||
405               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
406               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
407                                tok::r_paren, tok::r_square, tok::l_brace,
408                                tok::l_square, tok::ellipsis) ||
409               (NextTok->is(tok::identifier) &&
410                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
411               (NextTok->is(tok::semi) &&
412                (!ExpectClassBody || LBraceStack.size() != 1)) ||
413               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
414         }
415         if (ProbablyBracedList) {
416           Tok->BlockKind = BK_BracedInit;
417           LBraceStack.back()->BlockKind = BK_BracedInit;
418         } else {
419           Tok->BlockKind = BK_Block;
420           LBraceStack.back()->BlockKind = BK_Block;
421         }
422       }
423       LBraceStack.pop_back();
424       break;
425     case tok::at:
426     case tok::semi:
427     case tok::kw_if:
428     case tok::kw_while:
429     case tok::kw_for:
430     case tok::kw_switch:
431     case tok::kw_try:
432     case tok::kw___try:
433       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
434         LBraceStack.back()->BlockKind = BK_Block;
435       break;
436     default:
437       break;
438     }
439     PrevTok = Tok;
440     Tok = NextTok;
441   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
442 
443   // Assume other blocks for all unclosed opening braces.
444   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
445     if (LBraceStack[i]->BlockKind == BK_Unknown)
446       LBraceStack[i]->BlockKind = BK_Block;
447   }
448 
449   FormatTok = Tokens->setPosition(StoredPosition);
450 }
451 
452 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
453                                      bool MunchSemi) {
454   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
455          "'{' or macro block token expected");
456   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
457   FormatTok->BlockKind = BK_Block;
458 
459   unsigned InitialLevel = Line->Level;
460   nextToken();
461 
462   if (MacroBlock && FormatTok->is(tok::l_paren))
463     parseParens();
464 
465   addUnwrappedLine();
466   size_t OpeningLineIndex = CurrentLines->empty()
467                                 ? (UnwrappedLine::kInvalidIndex)
468                                 : (CurrentLines->size() - 1);
469 
470   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
471                                           MustBeDeclaration);
472   if (AddLevel)
473     ++Line->Level;
474   parseLevel(/*HasOpeningBrace=*/true);
475 
476   if (eof())
477     return;
478 
479   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
480                  : !FormatTok->is(tok::r_brace)) {
481     Line->Level = InitialLevel;
482     FormatTok->BlockKind = BK_Block;
483     return;
484   }
485 
486   nextToken(); // Munch the closing brace.
487 
488   if (MacroBlock && FormatTok->is(tok::l_paren))
489     parseParens();
490 
491   if (MunchSemi && FormatTok->Tok.is(tok::semi))
492     nextToken();
493   Line->Level = InitialLevel;
494   Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
495 }
496 
497 static bool isGoogScope(const UnwrappedLine &Line) {
498   // FIXME: Closure-library specific stuff should not be hard-coded but be
499   // configurable.
500   if (Line.Tokens.size() < 4)
501     return false;
502   auto I = Line.Tokens.begin();
503   if (I->Tok->TokenText != "goog")
504     return false;
505   ++I;
506   if (I->Tok->isNot(tok::period))
507     return false;
508   ++I;
509   if (I->Tok->TokenText != "scope")
510     return false;
511   ++I;
512   return I->Tok->is(tok::l_paren);
513 }
514 
515 static bool isIIFE(const UnwrappedLine &Line,
516                    const AdditionalKeywords &Keywords) {
517   // Look for the start of an immediately invoked anonymous function.
518   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
519   // This is commonly done in JavaScript to create a new, anonymous scope.
520   // Example: (function() { ... })()
521   if (Line.Tokens.size() < 3)
522     return false;
523   auto I = Line.Tokens.begin();
524   if (I->Tok->isNot(tok::l_paren))
525     return false;
526   ++I;
527   if (I->Tok->isNot(Keywords.kw_function))
528     return false;
529   ++I;
530   return I->Tok->is(tok::l_paren);
531 }
532 
533 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
534                                    const FormatToken &InitialToken) {
535   if (InitialToken.is(tok::kw_namespace))
536     return Style.BraceWrapping.AfterNamespace;
537   if (InitialToken.is(tok::kw_class))
538     return Style.BraceWrapping.AfterClass;
539   if (InitialToken.is(tok::kw_union))
540     return Style.BraceWrapping.AfterUnion;
541   if (InitialToken.is(tok::kw_struct))
542     return Style.BraceWrapping.AfterStruct;
543   return false;
544 }
545 
546 void UnwrappedLineParser::parseChildBlock() {
547   FormatTok->BlockKind = BK_Block;
548   nextToken();
549   {
550     bool SkipIndent =
551         (Style.Language == FormatStyle::LK_JavaScript &&
552          (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
553     ScopedLineState LineState(*this);
554     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
555                                             /*MustBeDeclaration=*/false);
556     Line->Level += SkipIndent ? 0 : 1;
557     parseLevel(/*HasOpeningBrace=*/true);
558     flushComments(isOnNewLine(*FormatTok));
559     Line->Level -= SkipIndent ? 0 : 1;
560   }
561   nextToken();
562 }
563 
564 void UnwrappedLineParser::parsePPDirective() {
565   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
566   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
567   nextToken();
568 
569   if (!FormatTok->Tok.getIdentifierInfo()) {
570     parsePPUnknown();
571     return;
572   }
573 
574   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
575   case tok::pp_define:
576     parsePPDefine();
577     return;
578   case tok::pp_if:
579     parsePPIf(/*IfDef=*/false);
580     break;
581   case tok::pp_ifdef:
582   case tok::pp_ifndef:
583     parsePPIf(/*IfDef=*/true);
584     break;
585   case tok::pp_else:
586     parsePPElse();
587     break;
588   case tok::pp_elif:
589     parsePPElIf();
590     break;
591   case tok::pp_endif:
592     parsePPEndIf();
593     break;
594   default:
595     parsePPUnknown();
596     break;
597   }
598 }
599 
600 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
601   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
602     PPStack.push_back(PP_Unreachable);
603   else
604     PPStack.push_back(PP_Conditional);
605 }
606 
607 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
608   ++PPBranchLevel;
609   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
610   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
611     PPLevelBranchIndex.push_back(0);
612     PPLevelBranchCount.push_back(0);
613   }
614   PPChainBranchIndex.push(0);
615   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
616   conditionalCompilationCondition(Unreachable || Skip);
617 }
618 
619 void UnwrappedLineParser::conditionalCompilationAlternative() {
620   if (!PPStack.empty())
621     PPStack.pop_back();
622   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
623   if (!PPChainBranchIndex.empty())
624     ++PPChainBranchIndex.top();
625   conditionalCompilationCondition(
626       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
627       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
628 }
629 
630 void UnwrappedLineParser::conditionalCompilationEnd() {
631   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
632   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
633     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
634       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
635     }
636   }
637   // Guard against #endif's without #if.
638   if (PPBranchLevel > 0)
639     --PPBranchLevel;
640   if (!PPChainBranchIndex.empty())
641     PPChainBranchIndex.pop();
642   if (!PPStack.empty())
643     PPStack.pop_back();
644 }
645 
646 void UnwrappedLineParser::parsePPIf(bool IfDef) {
647   bool IfNDef = FormatTok->is(tok::pp_ifndef);
648   nextToken();
649   bool Unreachable = false;
650   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
651     Unreachable = true;
652   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
653     Unreachable = true;
654   conditionalCompilationStart(Unreachable);
655   parsePPUnknown();
656 }
657 
658 void UnwrappedLineParser::parsePPElse() {
659   conditionalCompilationAlternative();
660   parsePPUnknown();
661 }
662 
663 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
664 
665 void UnwrappedLineParser::parsePPEndIf() {
666   conditionalCompilationEnd();
667   parsePPUnknown();
668 }
669 
670 void UnwrappedLineParser::parsePPDefine() {
671   nextToken();
672 
673   if (FormatTok->Tok.getKind() != tok::identifier) {
674     parsePPUnknown();
675     return;
676   }
677   nextToken();
678   if (FormatTok->Tok.getKind() == tok::l_paren &&
679       FormatTok->WhitespaceRange.getBegin() ==
680           FormatTok->WhitespaceRange.getEnd()) {
681     parseParens();
682   }
683   addUnwrappedLine();
684   Line->Level = 1;
685 
686   // Errors during a preprocessor directive can only affect the layout of the
687   // preprocessor directive, and thus we ignore them. An alternative approach
688   // would be to use the same approach we use on the file level (no
689   // re-indentation if there was a structural error) within the macro
690   // definition.
691   parseFile();
692 }
693 
694 void UnwrappedLineParser::parsePPUnknown() {
695   do {
696     nextToken();
697   } while (!eof());
698   addUnwrappedLine();
699 }
700 
701 // Here we blacklist certain tokens that are not usually the first token in an
702 // unwrapped line. This is used in attempt to distinguish macro calls without
703 // trailing semicolons from other constructs split to several lines.
704 static bool tokenCanStartNewLine(const clang::Token &Tok) {
705   // Semicolon can be a null-statement, l_square can be a start of a macro or
706   // a C++11 attribute, but this doesn't seem to be common.
707   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
708          Tok.isNot(tok::l_square) &&
709          // Tokens that can only be used as binary operators and a part of
710          // overloaded operator names.
711          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
712          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
713          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
714          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
715          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
716          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
717          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
718          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
719          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
720          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
721          Tok.isNot(tok::lesslessequal) &&
722          // Colon is used in labels, base class lists, initializer lists,
723          // range-based for loops, ternary operator, but should never be the
724          // first token in an unwrapped line.
725          Tok.isNot(tok::colon) &&
726          // 'noexcept' is a trailing annotation.
727          Tok.isNot(tok::kw_noexcept);
728 }
729 
730 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
731                           const FormatToken *FormatTok) {
732   // FIXME: This returns true for C/C++ keywords like 'struct'.
733   return FormatTok->is(tok::identifier) &&
734          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
735           !FormatTok->isOneOf(
736               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
737               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
738               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
739               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
740               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
741               Keywords.kw_instanceof, Keywords.kw_interface,
742               Keywords.kw_throws));
743 }
744 
745 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
746                                  const FormatToken *FormatTok) {
747   return FormatTok->Tok.isLiteral() ||
748          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
749          mustBeJSIdent(Keywords, FormatTok);
750 }
751 
752 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
753 // when encountered after a value (see mustBeJSIdentOrValue).
754 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
755                            const FormatToken *FormatTok) {
756   return FormatTok->isOneOf(
757       tok::kw_return, Keywords.kw_yield,
758       // conditionals
759       tok::kw_if, tok::kw_else,
760       // loops
761       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
762       // switch/case
763       tok::kw_switch, tok::kw_case,
764       // exceptions
765       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
766       // declaration
767       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
768       Keywords.kw_async, Keywords.kw_function,
769       // import/export
770       Keywords.kw_import, tok::kw_export);
771 }
772 
773 // readTokenWithJavaScriptASI reads the next token and terminates the current
774 // line if JavaScript Automatic Semicolon Insertion must
775 // happen between the current token and the next token.
776 //
777 // This method is conservative - it cannot cover all edge cases of JavaScript,
778 // but only aims to correctly handle certain well known cases. It *must not*
779 // return true in speculative cases.
780 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
781   FormatToken *Previous = FormatTok;
782   readToken();
783   FormatToken *Next = FormatTok;
784 
785   bool IsOnSameLine =
786       CommentsBeforeNextToken.empty()
787           ? Next->NewlinesBefore == 0
788           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
789   if (IsOnSameLine)
790     return;
791 
792   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
793   bool PreviousStartsTemplateExpr =
794       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
795   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
796     // If the token before the previous one is an '@', the previous token is an
797     // annotation and can precede another identifier/value.
798     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
799     if (PrePrevious->is(tok::at))
800       return;
801   }
802   if (Next->is(tok::exclaim) && PreviousMustBeValue)
803     return addUnwrappedLine();
804   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
805   bool NextEndsTemplateExpr =
806       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
807   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
808       (PreviousMustBeValue ||
809        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
810                          tok::minusminus)))
811     return addUnwrappedLine();
812   if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
813     return addUnwrappedLine();
814 }
815 
816 void UnwrappedLineParser::parseStructuralElement() {
817   assert(!FormatTok->is(tok::l_brace));
818   if (Style.Language == FormatStyle::LK_TableGen &&
819       FormatTok->is(tok::pp_include)) {
820     nextToken();
821     if (FormatTok->is(tok::string_literal))
822       nextToken();
823     addUnwrappedLine();
824     return;
825   }
826   switch (FormatTok->Tok.getKind()) {
827   case tok::at:
828     nextToken();
829     if (FormatTok->Tok.is(tok::l_brace)) {
830       parseBracedList();
831       break;
832     }
833     switch (FormatTok->Tok.getObjCKeywordID()) {
834     case tok::objc_public:
835     case tok::objc_protected:
836     case tok::objc_package:
837     case tok::objc_private:
838       return parseAccessSpecifier();
839     case tok::objc_interface:
840     case tok::objc_implementation:
841       return parseObjCInterfaceOrImplementation();
842     case tok::objc_protocol:
843       return parseObjCProtocol();
844     case tok::objc_end:
845       return; // Handled by the caller.
846     case tok::objc_optional:
847     case tok::objc_required:
848       nextToken();
849       addUnwrappedLine();
850       return;
851     case tok::objc_autoreleasepool:
852       nextToken();
853       if (FormatTok->Tok.is(tok::l_brace)) {
854         if (Style.BraceWrapping.AfterObjCDeclaration)
855           addUnwrappedLine();
856         parseBlock(/*MustBeDeclaration=*/false);
857       }
858       addUnwrappedLine();
859       return;
860     case tok::objc_try:
861       // This branch isn't strictly necessary (the kw_try case below would
862       // do this too after the tok::at is parsed above).  But be explicit.
863       parseTryCatch();
864       return;
865     default:
866       break;
867     }
868     break;
869   case tok::kw_asm:
870     nextToken();
871     if (FormatTok->is(tok::l_brace)) {
872       FormatTok->Type = TT_InlineASMBrace;
873       nextToken();
874       while (FormatTok && FormatTok->isNot(tok::eof)) {
875         if (FormatTok->is(tok::r_brace)) {
876           FormatTok->Type = TT_InlineASMBrace;
877           nextToken();
878           addUnwrappedLine();
879           break;
880         }
881         FormatTok->Finalized = true;
882         nextToken();
883       }
884     }
885     break;
886   case tok::kw_namespace:
887     parseNamespace();
888     return;
889   case tok::kw_inline:
890     nextToken();
891     if (FormatTok->Tok.is(tok::kw_namespace)) {
892       parseNamespace();
893       return;
894     }
895     break;
896   case tok::kw_public:
897   case tok::kw_protected:
898   case tok::kw_private:
899     if (Style.Language == FormatStyle::LK_Java ||
900         Style.Language == FormatStyle::LK_JavaScript)
901       nextToken();
902     else
903       parseAccessSpecifier();
904     return;
905   case tok::kw_if:
906     parseIfThenElse();
907     return;
908   case tok::kw_for:
909   case tok::kw_while:
910     parseForOrWhileLoop();
911     return;
912   case tok::kw_do:
913     parseDoWhile();
914     return;
915   case tok::kw_switch:
916     parseSwitch();
917     return;
918   case tok::kw_default:
919     nextToken();
920     parseLabel();
921     return;
922   case tok::kw_case:
923     parseCaseLabel();
924     return;
925   case tok::kw_try:
926   case tok::kw___try:
927     parseTryCatch();
928     return;
929   case tok::kw_extern:
930     nextToken();
931     if (FormatTok->Tok.is(tok::string_literal)) {
932       nextToken();
933       if (FormatTok->Tok.is(tok::l_brace)) {
934         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
935         addUnwrappedLine();
936         return;
937       }
938     }
939     break;
940   case tok::kw_export:
941     if (Style.Language == FormatStyle::LK_JavaScript) {
942       parseJavaScriptEs6ImportExport();
943       return;
944     }
945     break;
946   case tok::identifier:
947     if (FormatTok->is(TT_ForEachMacro)) {
948       parseForOrWhileLoop();
949       return;
950     }
951     if (FormatTok->is(TT_MacroBlockBegin)) {
952       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
953                  /*MunchSemi=*/false);
954       return;
955     }
956     if (FormatTok->is(Keywords.kw_import)) {
957       if (Style.Language == FormatStyle::LK_JavaScript) {
958         parseJavaScriptEs6ImportExport();
959         return;
960       }
961       if (Style.Language == FormatStyle::LK_Proto) {
962         nextToken();
963         if (FormatTok->is(tok::kw_public))
964           nextToken();
965         if (!FormatTok->is(tok::string_literal))
966           return;
967         nextToken();
968         if (FormatTok->is(tok::semi))
969           nextToken();
970         addUnwrappedLine();
971         return;
972       }
973     }
974     if (Style.isCpp() &&
975         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
976                            Keywords.kw_slots, Keywords.kw_qslots)) {
977       nextToken();
978       if (FormatTok->is(tok::colon)) {
979         nextToken();
980         addUnwrappedLine();
981         return;
982       }
983     }
984     // In all other cases, parse the declaration.
985     break;
986   default:
987     break;
988   }
989   do {
990     const FormatToken *Previous = getPreviousToken();
991     switch (FormatTok->Tok.getKind()) {
992     case tok::at:
993       nextToken();
994       if (FormatTok->Tok.is(tok::l_brace))
995         parseBracedList();
996       break;
997     case tok::kw_enum:
998       // Ignore if this is part of "template <enum ...".
999       if (Previous && Previous->is(tok::less)) {
1000         nextToken();
1001         break;
1002       }
1003 
1004       // parseEnum falls through and does not yet add an unwrapped line as an
1005       // enum definition can start a structural element.
1006       if (!parseEnum())
1007         break;
1008       // This only applies for C++.
1009       if (!Style.isCpp()) {
1010         addUnwrappedLine();
1011         return;
1012       }
1013       break;
1014     case tok::kw_typedef:
1015       nextToken();
1016       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1017                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1018         parseEnum();
1019       break;
1020     case tok::kw_struct:
1021     case tok::kw_union:
1022     case tok::kw_class:
1023       // parseRecord falls through and does not yet add an unwrapped line as a
1024       // record declaration or definition can start a structural element.
1025       parseRecord();
1026       // This does not apply for Java and JavaScript.
1027       if (Style.Language == FormatStyle::LK_Java ||
1028           Style.Language == FormatStyle::LK_JavaScript) {
1029         if (FormatTok->is(tok::semi))
1030           nextToken();
1031         addUnwrappedLine();
1032         return;
1033       }
1034       break;
1035     case tok::period:
1036       nextToken();
1037       // In Java, classes have an implicit static member "class".
1038       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1039           FormatTok->is(tok::kw_class))
1040         nextToken();
1041       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1042           FormatTok->Tok.getIdentifierInfo())
1043         // JavaScript only has pseudo keywords, all keywords are allowed to
1044         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1045         nextToken();
1046       break;
1047     case tok::semi:
1048       nextToken();
1049       addUnwrappedLine();
1050       return;
1051     case tok::r_brace:
1052       addUnwrappedLine();
1053       return;
1054     case tok::l_paren:
1055       parseParens();
1056       break;
1057     case tok::kw_operator:
1058       nextToken();
1059       if (FormatTok->isBinaryOperator())
1060         nextToken();
1061       break;
1062     case tok::caret:
1063       nextToken();
1064       if (FormatTok->Tok.isAnyIdentifier() ||
1065           FormatTok->isSimpleTypeSpecifier())
1066         nextToken();
1067       if (FormatTok->is(tok::l_paren))
1068         parseParens();
1069       if (FormatTok->is(tok::l_brace))
1070         parseChildBlock();
1071       break;
1072     case tok::l_brace:
1073       if (!tryToParseBracedList()) {
1074         // A block outside of parentheses must be the last part of a
1075         // structural element.
1076         // FIXME: Figure out cases where this is not true, and add projections
1077         // for them (the one we know is missing are lambdas).
1078         if (Style.BraceWrapping.AfterFunction)
1079           addUnwrappedLine();
1080         FormatTok->Type = TT_FunctionLBrace;
1081         parseBlock(/*MustBeDeclaration=*/false);
1082         addUnwrappedLine();
1083         return;
1084       }
1085       // Otherwise this was a braced init list, and the structural
1086       // element continues.
1087       break;
1088     case tok::kw_try:
1089       // We arrive here when parsing function-try blocks.
1090       parseTryCatch();
1091       return;
1092     case tok::identifier: {
1093       if (FormatTok->is(TT_MacroBlockEnd)) {
1094         addUnwrappedLine();
1095         return;
1096       }
1097 
1098       // Function declarations (as opposed to function expressions) are parsed
1099       // on their own unwrapped line by continuing this loop. Function
1100       // expressions (functions that are not on their own line) must not create
1101       // a new unwrapped line, so they are special cased below.
1102       size_t TokenCount = Line->Tokens.size();
1103       if (Style.Language == FormatStyle::LK_JavaScript &&
1104           FormatTok->is(Keywords.kw_function) &&
1105           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1106                                                      Keywords.kw_async)))) {
1107         tryToParseJSFunction();
1108         break;
1109       }
1110       if ((Style.Language == FormatStyle::LK_JavaScript ||
1111            Style.Language == FormatStyle::LK_Java) &&
1112           FormatTok->is(Keywords.kw_interface)) {
1113         if (Style.Language == FormatStyle::LK_JavaScript) {
1114           // In JavaScript/TypeScript, "interface" can be used as a standalone
1115           // identifier, e.g. in `var interface = 1;`. If "interface" is
1116           // followed by another identifier, it is very like to be an actual
1117           // interface declaration.
1118           unsigned StoredPosition = Tokens->getPosition();
1119           FormatToken *Next = Tokens->getNextToken();
1120           FormatTok = Tokens->setPosition(StoredPosition);
1121           if (Next && !mustBeJSIdent(Keywords, Next)) {
1122             nextToken();
1123             break;
1124           }
1125         }
1126         parseRecord();
1127         addUnwrappedLine();
1128         return;
1129       }
1130 
1131       // See if the following token should start a new unwrapped line.
1132       StringRef Text = FormatTok->TokenText;
1133       nextToken();
1134       if (Line->Tokens.size() == 1 &&
1135           // JS doesn't have macros, and within classes colons indicate fields,
1136           // not labels.
1137           Style.Language != FormatStyle::LK_JavaScript) {
1138         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1139           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1140           parseLabel();
1141           return;
1142         }
1143         // Recognize function-like macro usages without trailing semicolon as
1144         // well as free-standing macros like Q_OBJECT.
1145         bool FunctionLike = FormatTok->is(tok::l_paren);
1146         if (FunctionLike)
1147           parseParens();
1148 
1149         bool FollowedByNewline =
1150             CommentsBeforeNextToken.empty()
1151                 ? FormatTok->NewlinesBefore > 0
1152                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1153 
1154         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1155             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1156           addUnwrappedLine();
1157           return;
1158         }
1159       }
1160       break;
1161     }
1162     case tok::equal:
1163       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1164       // TT_JsFatArrow. The always start an expression or a child block if
1165       // followed by a curly.
1166       if (FormatTok->is(TT_JsFatArrow)) {
1167         nextToken();
1168         if (FormatTok->is(tok::l_brace))
1169           parseChildBlock();
1170         break;
1171       }
1172 
1173       nextToken();
1174       if (FormatTok->Tok.is(tok::l_brace)) {
1175         parseBracedList();
1176       }
1177       break;
1178     case tok::l_square:
1179       parseSquare();
1180       break;
1181     case tok::kw_new:
1182       parseNew();
1183       break;
1184     default:
1185       nextToken();
1186       break;
1187     }
1188   } while (!eof());
1189 }
1190 
1191 bool UnwrappedLineParser::tryToParseLambda() {
1192   if (!Style.isCpp()) {
1193     nextToken();
1194     return false;
1195   }
1196   const FormatToken* Previous = getPreviousToken();
1197   if (Previous &&
1198       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1199                          tok::kw_delete) ||
1200        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1201     nextToken();
1202     return false;
1203   }
1204   assert(FormatTok->is(tok::l_square));
1205   FormatToken &LSquare = *FormatTok;
1206   if (!tryToParseLambdaIntroducer())
1207     return false;
1208 
1209   while (FormatTok->isNot(tok::l_brace)) {
1210     if (FormatTok->isSimpleTypeSpecifier()) {
1211       nextToken();
1212       continue;
1213     }
1214     switch (FormatTok->Tok.getKind()) {
1215     case tok::l_brace:
1216       break;
1217     case tok::l_paren:
1218       parseParens();
1219       break;
1220     case tok::amp:
1221     case tok::star:
1222     case tok::kw_const:
1223     case tok::comma:
1224     case tok::less:
1225     case tok::greater:
1226     case tok::identifier:
1227     case tok::numeric_constant:
1228     case tok::coloncolon:
1229     case tok::kw_mutable:
1230       nextToken();
1231       break;
1232     case tok::arrow:
1233       FormatTok->Type = TT_LambdaArrow;
1234       nextToken();
1235       break;
1236     default:
1237       return true;
1238     }
1239   }
1240   LSquare.Type = TT_LambdaLSquare;
1241   parseChildBlock();
1242   return true;
1243 }
1244 
1245 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1246   nextToken();
1247   if (FormatTok->is(tok::equal)) {
1248     nextToken();
1249     if (FormatTok->is(tok::r_square)) {
1250       nextToken();
1251       return true;
1252     }
1253     if (FormatTok->isNot(tok::comma))
1254       return false;
1255     nextToken();
1256   } else if (FormatTok->is(tok::amp)) {
1257     nextToken();
1258     if (FormatTok->is(tok::r_square)) {
1259       nextToken();
1260       return true;
1261     }
1262     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1263       return false;
1264     }
1265     if (FormatTok->is(tok::comma))
1266       nextToken();
1267   } else if (FormatTok->is(tok::r_square)) {
1268     nextToken();
1269     return true;
1270   }
1271   do {
1272     if (FormatTok->is(tok::amp))
1273       nextToken();
1274     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1275       return false;
1276     nextToken();
1277     if (FormatTok->is(tok::ellipsis))
1278       nextToken();
1279     if (FormatTok->is(tok::comma)) {
1280       nextToken();
1281     } else if (FormatTok->is(tok::r_square)) {
1282       nextToken();
1283       return true;
1284     } else {
1285       return false;
1286     }
1287   } while (!eof());
1288   return false;
1289 }
1290 
1291 void UnwrappedLineParser::tryToParseJSFunction() {
1292   assert(FormatTok->is(Keywords.kw_function) ||
1293          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1294   if (FormatTok->is(Keywords.kw_async))
1295     nextToken();
1296   // Consume "function".
1297   nextToken();
1298 
1299   // Consume * (generator function). Treat it like C++'s overloaded operators.
1300   if (FormatTok->is(tok::star)) {
1301     FormatTok->Type = TT_OverloadedOperator;
1302     nextToken();
1303   }
1304 
1305   // Consume function name.
1306   if (FormatTok->is(tok::identifier))
1307     nextToken();
1308 
1309   if (FormatTok->isNot(tok::l_paren))
1310     return;
1311 
1312   // Parse formal parameter list.
1313   parseParens();
1314 
1315   if (FormatTok->is(tok::colon)) {
1316     // Parse a type definition.
1317     nextToken();
1318 
1319     // Eat the type declaration. For braced inline object types, balance braces,
1320     // otherwise just parse until finding an l_brace for the function body.
1321     if (FormatTok->is(tok::l_brace))
1322       tryToParseBracedList();
1323     else
1324       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1325         nextToken();
1326   }
1327 
1328   if (FormatTok->is(tok::semi))
1329     return;
1330 
1331   parseChildBlock();
1332 }
1333 
1334 bool UnwrappedLineParser::tryToParseBracedList() {
1335   if (FormatTok->BlockKind == BK_Unknown)
1336     calculateBraceTypes();
1337   assert(FormatTok->BlockKind != BK_Unknown);
1338   if (FormatTok->BlockKind == BK_Block)
1339     return false;
1340   parseBracedList();
1341   return true;
1342 }
1343 
1344 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1345   bool HasError = false;
1346   nextToken();
1347 
1348   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1349   // replace this by using parseAssigmentExpression() inside.
1350   do {
1351     if (Style.Language == FormatStyle::LK_JavaScript) {
1352       if (FormatTok->is(Keywords.kw_function) ||
1353           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1354         tryToParseJSFunction();
1355         continue;
1356       }
1357       if (FormatTok->is(TT_JsFatArrow)) {
1358         nextToken();
1359         // Fat arrows can be followed by simple expressions or by child blocks
1360         // in curly braces.
1361         if (FormatTok->is(tok::l_brace)) {
1362           parseChildBlock();
1363           continue;
1364         }
1365       }
1366       if (FormatTok->is(tok::l_brace)) {
1367         // Could be a method inside of a braced list `{a() { return 1; }}`.
1368         if (tryToParseBracedList())
1369           continue;
1370         parseChildBlock();
1371       }
1372     }
1373     switch (FormatTok->Tok.getKind()) {
1374     case tok::caret:
1375       nextToken();
1376       if (FormatTok->is(tok::l_brace)) {
1377         parseChildBlock();
1378       }
1379       break;
1380     case tok::l_square:
1381       tryToParseLambda();
1382       break;
1383     case tok::l_paren:
1384       parseParens();
1385       // JavaScript can just have free standing methods and getters/setters in
1386       // object literals. Detect them by a "{" following ")".
1387       if (Style.Language == FormatStyle::LK_JavaScript) {
1388         if (FormatTok->is(tok::l_brace))
1389           parseChildBlock();
1390         break;
1391       }
1392       break;
1393     case tok::l_brace:
1394       // Assume there are no blocks inside a braced init list apart
1395       // from the ones we explicitly parse out (like lambdas).
1396       FormatTok->BlockKind = BK_BracedInit;
1397       parseBracedList();
1398       break;
1399     case tok::r_brace:
1400       nextToken();
1401       return !HasError;
1402     case tok::semi:
1403       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1404       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1405       // used for error recovery if we have otherwise determined that this is
1406       // a braced list.
1407       if (Style.Language == FormatStyle::LK_JavaScript) {
1408         nextToken();
1409         break;
1410       }
1411       HasError = true;
1412       if (!ContinueOnSemicolons)
1413         return !HasError;
1414       nextToken();
1415       break;
1416     case tok::comma:
1417       nextToken();
1418       break;
1419     default:
1420       nextToken();
1421       break;
1422     }
1423   } while (!eof());
1424   return false;
1425 }
1426 
1427 void UnwrappedLineParser::parseParens() {
1428   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1429   nextToken();
1430   do {
1431     switch (FormatTok->Tok.getKind()) {
1432     case tok::l_paren:
1433       parseParens();
1434       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1435         parseChildBlock();
1436       break;
1437     case tok::r_paren:
1438       nextToken();
1439       return;
1440     case tok::r_brace:
1441       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1442       return;
1443     case tok::l_square:
1444       tryToParseLambda();
1445       break;
1446     case tok::l_brace:
1447       if (!tryToParseBracedList())
1448         parseChildBlock();
1449       break;
1450     case tok::at:
1451       nextToken();
1452       if (FormatTok->Tok.is(tok::l_brace))
1453         parseBracedList();
1454       break;
1455     case tok::kw_class:
1456       if (Style.Language == FormatStyle::LK_JavaScript)
1457         parseRecord(/*ParseAsExpr=*/true);
1458       else
1459         nextToken();
1460       break;
1461     case tok::identifier:
1462       if (Style.Language == FormatStyle::LK_JavaScript &&
1463           (FormatTok->is(Keywords.kw_function) ||
1464            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1465         tryToParseJSFunction();
1466       else
1467         nextToken();
1468       break;
1469     default:
1470       nextToken();
1471       break;
1472     }
1473   } while (!eof());
1474 }
1475 
1476 void UnwrappedLineParser::parseSquare() {
1477   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1478   if (tryToParseLambda())
1479     return;
1480   do {
1481     switch (FormatTok->Tok.getKind()) {
1482     case tok::l_paren:
1483       parseParens();
1484       break;
1485     case tok::r_square:
1486       nextToken();
1487       return;
1488     case tok::r_brace:
1489       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1490       return;
1491     case tok::l_square:
1492       parseSquare();
1493       break;
1494     case tok::l_brace: {
1495       if (!tryToParseBracedList())
1496         parseChildBlock();
1497       break;
1498     }
1499     case tok::at:
1500       nextToken();
1501       if (FormatTok->Tok.is(tok::l_brace))
1502         parseBracedList();
1503       break;
1504     default:
1505       nextToken();
1506       break;
1507     }
1508   } while (!eof());
1509 }
1510 
1511 void UnwrappedLineParser::parseIfThenElse() {
1512   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1513   nextToken();
1514   if (FormatTok->Tok.is(tok::l_paren))
1515     parseParens();
1516   bool NeedsUnwrappedLine = false;
1517   if (FormatTok->Tok.is(tok::l_brace)) {
1518     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1519     parseBlock(/*MustBeDeclaration=*/false);
1520     if (Style.BraceWrapping.BeforeElse)
1521       addUnwrappedLine();
1522     else
1523       NeedsUnwrappedLine = true;
1524   } else {
1525     addUnwrappedLine();
1526     ++Line->Level;
1527     parseStructuralElement();
1528     --Line->Level;
1529   }
1530   if (FormatTok->Tok.is(tok::kw_else)) {
1531     nextToken();
1532     if (FormatTok->Tok.is(tok::l_brace)) {
1533       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1534       parseBlock(/*MustBeDeclaration=*/false);
1535       addUnwrappedLine();
1536     } else if (FormatTok->Tok.is(tok::kw_if)) {
1537       parseIfThenElse();
1538     } else {
1539       addUnwrappedLine();
1540       ++Line->Level;
1541       parseStructuralElement();
1542       if (FormatTok->is(tok::eof))
1543         addUnwrappedLine();
1544       --Line->Level;
1545     }
1546   } else if (NeedsUnwrappedLine) {
1547     addUnwrappedLine();
1548   }
1549 }
1550 
1551 void UnwrappedLineParser::parseTryCatch() {
1552   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1553   nextToken();
1554   bool NeedsUnwrappedLine = false;
1555   if (FormatTok->is(tok::colon)) {
1556     // We are in a function try block, what comes is an initializer list.
1557     nextToken();
1558     while (FormatTok->is(tok::identifier)) {
1559       nextToken();
1560       if (FormatTok->is(tok::l_paren))
1561         parseParens();
1562       if (FormatTok->is(tok::comma))
1563         nextToken();
1564     }
1565   }
1566   // Parse try with resource.
1567   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1568     parseParens();
1569   }
1570   if (FormatTok->is(tok::l_brace)) {
1571     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1572     parseBlock(/*MustBeDeclaration=*/false);
1573     if (Style.BraceWrapping.BeforeCatch) {
1574       addUnwrappedLine();
1575     } else {
1576       NeedsUnwrappedLine = true;
1577     }
1578   } else if (!FormatTok->is(tok::kw_catch)) {
1579     // The C++ standard requires a compound-statement after a try.
1580     // If there's none, we try to assume there's a structuralElement
1581     // and try to continue.
1582     addUnwrappedLine();
1583     ++Line->Level;
1584     parseStructuralElement();
1585     --Line->Level;
1586   }
1587   while (1) {
1588     if (FormatTok->is(tok::at))
1589       nextToken();
1590     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1591                              tok::kw___finally) ||
1592           ((Style.Language == FormatStyle::LK_Java ||
1593             Style.Language == FormatStyle::LK_JavaScript) &&
1594            FormatTok->is(Keywords.kw_finally)) ||
1595           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1596            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1597       break;
1598     nextToken();
1599     while (FormatTok->isNot(tok::l_brace)) {
1600       if (FormatTok->is(tok::l_paren)) {
1601         parseParens();
1602         continue;
1603       }
1604       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1605         return;
1606       nextToken();
1607     }
1608     NeedsUnwrappedLine = false;
1609     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1610     parseBlock(/*MustBeDeclaration=*/false);
1611     if (Style.BraceWrapping.BeforeCatch)
1612       addUnwrappedLine();
1613     else
1614       NeedsUnwrappedLine = true;
1615   }
1616   if (NeedsUnwrappedLine)
1617     addUnwrappedLine();
1618 }
1619 
1620 void UnwrappedLineParser::parseNamespace() {
1621   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1622 
1623   const FormatToken &InitialToken = *FormatTok;
1624   nextToken();
1625   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1626     nextToken();
1627   if (FormatTok->Tok.is(tok::l_brace)) {
1628     if (ShouldBreakBeforeBrace(Style, InitialToken))
1629       addUnwrappedLine();
1630 
1631     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1632                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1633                      DeclarationScopeStack.size() > 1);
1634     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1635     // Munch the semicolon after a namespace. This is more common than one would
1636     // think. Puttin the semicolon into its own line is very ugly.
1637     if (FormatTok->Tok.is(tok::semi))
1638       nextToken();
1639     addUnwrappedLine();
1640   }
1641   // FIXME: Add error handling.
1642 }
1643 
1644 void UnwrappedLineParser::parseNew() {
1645   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1646   nextToken();
1647   if (Style.Language != FormatStyle::LK_Java)
1648     return;
1649 
1650   // In Java, we can parse everything up to the parens, which aren't optional.
1651   do {
1652     // There should not be a ;, { or } before the new's open paren.
1653     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1654       return;
1655 
1656     // Consume the parens.
1657     if (FormatTok->is(tok::l_paren)) {
1658       parseParens();
1659 
1660       // If there is a class body of an anonymous class, consume that as child.
1661       if (FormatTok->is(tok::l_brace))
1662         parseChildBlock();
1663       return;
1664     }
1665     nextToken();
1666   } while (!eof());
1667 }
1668 
1669 void UnwrappedLineParser::parseForOrWhileLoop() {
1670   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1671          "'for', 'while' or foreach macro expected");
1672   nextToken();
1673   // JS' for await ( ...
1674   if (Style.Language == FormatStyle::LK_JavaScript &&
1675       FormatTok->is(Keywords.kw_await))
1676     nextToken();
1677   if (FormatTok->Tok.is(tok::l_paren))
1678     parseParens();
1679   if (FormatTok->Tok.is(tok::l_brace)) {
1680     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1681     parseBlock(/*MustBeDeclaration=*/false);
1682     addUnwrappedLine();
1683   } else {
1684     addUnwrappedLine();
1685     ++Line->Level;
1686     parseStructuralElement();
1687     --Line->Level;
1688   }
1689 }
1690 
1691 void UnwrappedLineParser::parseDoWhile() {
1692   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1693   nextToken();
1694   if (FormatTok->Tok.is(tok::l_brace)) {
1695     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1696     parseBlock(/*MustBeDeclaration=*/false);
1697     if (Style.BraceWrapping.IndentBraces)
1698       addUnwrappedLine();
1699   } else {
1700     addUnwrappedLine();
1701     ++Line->Level;
1702     parseStructuralElement();
1703     --Line->Level;
1704   }
1705 
1706   // FIXME: Add error handling.
1707   if (!FormatTok->Tok.is(tok::kw_while)) {
1708     addUnwrappedLine();
1709     return;
1710   }
1711 
1712   nextToken();
1713   parseStructuralElement();
1714 }
1715 
1716 void UnwrappedLineParser::parseLabel() {
1717   nextToken();
1718   unsigned OldLineLevel = Line->Level;
1719   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1720     --Line->Level;
1721   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1722     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1723     parseBlock(/*MustBeDeclaration=*/false);
1724     if (FormatTok->Tok.is(tok::kw_break)) {
1725       if (Style.BraceWrapping.AfterControlStatement)
1726         addUnwrappedLine();
1727       parseStructuralElement();
1728     }
1729     addUnwrappedLine();
1730   } else {
1731     if (FormatTok->is(tok::semi))
1732       nextToken();
1733     addUnwrappedLine();
1734   }
1735   Line->Level = OldLineLevel;
1736   if (FormatTok->isNot(tok::l_brace)) {
1737     parseStructuralElement();
1738     addUnwrappedLine();
1739   }
1740 }
1741 
1742 void UnwrappedLineParser::parseCaseLabel() {
1743   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1744   // FIXME: fix handling of complex expressions here.
1745   do {
1746     nextToken();
1747   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1748   parseLabel();
1749 }
1750 
1751 void UnwrappedLineParser::parseSwitch() {
1752   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1753   nextToken();
1754   if (FormatTok->Tok.is(tok::l_paren))
1755     parseParens();
1756   if (FormatTok->Tok.is(tok::l_brace)) {
1757     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1758     parseBlock(/*MustBeDeclaration=*/false);
1759     addUnwrappedLine();
1760   } else {
1761     addUnwrappedLine();
1762     ++Line->Level;
1763     parseStructuralElement();
1764     --Line->Level;
1765   }
1766 }
1767 
1768 void UnwrappedLineParser::parseAccessSpecifier() {
1769   nextToken();
1770   // Understand Qt's slots.
1771   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1772     nextToken();
1773   // Otherwise, we don't know what it is, and we'd better keep the next token.
1774   if (FormatTok->Tok.is(tok::colon))
1775     nextToken();
1776   addUnwrappedLine();
1777 }
1778 
1779 bool UnwrappedLineParser::parseEnum() {
1780   // Won't be 'enum' for NS_ENUMs.
1781   if (FormatTok->Tok.is(tok::kw_enum))
1782     nextToken();
1783 
1784   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1785   // declarations. An "enum" keyword followed by a colon would be a syntax
1786   // error and thus assume it is just an identifier.
1787   if (Style.Language == FormatStyle::LK_JavaScript &&
1788       FormatTok->isOneOf(tok::colon, tok::question))
1789     return false;
1790 
1791   // Eat up enum class ...
1792   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1793     nextToken();
1794 
1795   while (FormatTok->Tok.getIdentifierInfo() ||
1796          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1797                             tok::greater, tok::comma, tok::question)) {
1798     nextToken();
1799     // We can have macros or attributes in between 'enum' and the enum name.
1800     if (FormatTok->is(tok::l_paren))
1801       parseParens();
1802     if (FormatTok->is(tok::identifier)) {
1803       nextToken();
1804       // If there are two identifiers in a row, this is likely an elaborate
1805       // return type. In Java, this can be "implements", etc.
1806       if (Style.isCpp() && FormatTok->is(tok::identifier))
1807         return false;
1808     }
1809   }
1810 
1811   // Just a declaration or something is wrong.
1812   if (FormatTok->isNot(tok::l_brace))
1813     return true;
1814   FormatTok->BlockKind = BK_Block;
1815 
1816   if (Style.Language == FormatStyle::LK_Java) {
1817     // Java enums are different.
1818     parseJavaEnumBody();
1819     return true;
1820   }
1821   if (Style.Language == FormatStyle::LK_Proto) {
1822     parseBlock(/*MustBeDeclaration=*/true);
1823     return true;
1824   }
1825 
1826   // Parse enum body.
1827   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1828   if (HasError) {
1829     if (FormatTok->is(tok::semi))
1830       nextToken();
1831     addUnwrappedLine();
1832   }
1833   return true;
1834 
1835   // There is no addUnwrappedLine() here so that we fall through to parsing a
1836   // structural element afterwards. Thus, in "enum A {} n, m;",
1837   // "} n, m;" will end up in one unwrapped line.
1838 }
1839 
1840 void UnwrappedLineParser::parseJavaEnumBody() {
1841   // Determine whether the enum is simple, i.e. does not have a semicolon or
1842   // constants with class bodies. Simple enums can be formatted like braced
1843   // lists, contracted to a single line, etc.
1844   unsigned StoredPosition = Tokens->getPosition();
1845   bool IsSimple = true;
1846   FormatToken *Tok = Tokens->getNextToken();
1847   while (Tok) {
1848     if (Tok->is(tok::r_brace))
1849       break;
1850     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1851       IsSimple = false;
1852       break;
1853     }
1854     // FIXME: This will also mark enums with braces in the arguments to enum
1855     // constants as "not simple". This is probably fine in practice, though.
1856     Tok = Tokens->getNextToken();
1857   }
1858   FormatTok = Tokens->setPosition(StoredPosition);
1859 
1860   if (IsSimple) {
1861     parseBracedList();
1862     addUnwrappedLine();
1863     return;
1864   }
1865 
1866   // Parse the body of a more complex enum.
1867   // First add a line for everything up to the "{".
1868   nextToken();
1869   addUnwrappedLine();
1870   ++Line->Level;
1871 
1872   // Parse the enum constants.
1873   while (FormatTok) {
1874     if (FormatTok->is(tok::l_brace)) {
1875       // Parse the constant's class body.
1876       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1877                  /*MunchSemi=*/false);
1878     } else if (FormatTok->is(tok::l_paren)) {
1879       parseParens();
1880     } else if (FormatTok->is(tok::comma)) {
1881       nextToken();
1882       addUnwrappedLine();
1883     } else if (FormatTok->is(tok::semi)) {
1884       nextToken();
1885       addUnwrappedLine();
1886       break;
1887     } else if (FormatTok->is(tok::r_brace)) {
1888       addUnwrappedLine();
1889       break;
1890     } else {
1891       nextToken();
1892     }
1893   }
1894 
1895   // Parse the class body after the enum's ";" if any.
1896   parseLevel(/*HasOpeningBrace=*/true);
1897   nextToken();
1898   --Line->Level;
1899   addUnwrappedLine();
1900 }
1901 
1902 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1903   const FormatToken &InitialToken = *FormatTok;
1904   nextToken();
1905 
1906   // The actual identifier can be a nested name specifier, and in macros
1907   // it is often token-pasted.
1908   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1909                             tok::kw___attribute, tok::kw___declspec,
1910                             tok::kw_alignas) ||
1911          ((Style.Language == FormatStyle::LK_Java ||
1912            Style.Language == FormatStyle::LK_JavaScript) &&
1913           FormatTok->isOneOf(tok::period, tok::comma))) {
1914     bool IsNonMacroIdentifier =
1915         FormatTok->is(tok::identifier) &&
1916         FormatTok->TokenText != FormatTok->TokenText.upper();
1917     nextToken();
1918     // We can have macros or attributes in between 'class' and the class name.
1919     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1920       parseParens();
1921   }
1922 
1923   // Note that parsing away template declarations here leads to incorrectly
1924   // accepting function declarations as record declarations.
1925   // In general, we cannot solve this problem. Consider:
1926   // class A<int> B() {}
1927   // which can be a function definition or a class definition when B() is a
1928   // macro. If we find enough real-world cases where this is a problem, we
1929   // can parse for the 'template' keyword in the beginning of the statement,
1930   // and thus rule out the record production in case there is no template
1931   // (this would still leave us with an ambiguity between template function
1932   // and class declarations).
1933   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1934     while (!eof()) {
1935       if (FormatTok->is(tok::l_brace)) {
1936         calculateBraceTypes(/*ExpectClassBody=*/true);
1937         if (!tryToParseBracedList())
1938           break;
1939       }
1940       if (FormatTok->Tok.is(tok::semi))
1941         return;
1942       nextToken();
1943     }
1944   }
1945   if (FormatTok->Tok.is(tok::l_brace)) {
1946     if (ParseAsExpr) {
1947       parseChildBlock();
1948     } else {
1949       if (ShouldBreakBeforeBrace(Style, InitialToken))
1950         addUnwrappedLine();
1951 
1952       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1953                  /*MunchSemi=*/false);
1954     }
1955   }
1956   // There is no addUnwrappedLine() here so that we fall through to parsing a
1957   // structural element afterwards. Thus, in "class A {} n, m;",
1958   // "} n, m;" will end up in one unwrapped line.
1959 }
1960 
1961 void UnwrappedLineParser::parseObjCProtocolList() {
1962   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1963   do
1964     nextToken();
1965   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1966   nextToken(); // Skip '>'.
1967 }
1968 
1969 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1970   do {
1971     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1972       nextToken();
1973       addUnwrappedLine();
1974       break;
1975     }
1976     if (FormatTok->is(tok::l_brace)) {
1977       parseBlock(/*MustBeDeclaration=*/false);
1978       // In ObjC interfaces, nothing should be following the "}".
1979       addUnwrappedLine();
1980     } else if (FormatTok->is(tok::r_brace)) {
1981       // Ignore stray "}". parseStructuralElement doesn't consume them.
1982       nextToken();
1983       addUnwrappedLine();
1984     } else {
1985       parseStructuralElement();
1986     }
1987   } while (!eof());
1988 }
1989 
1990 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1991   nextToken();
1992   nextToken(); // interface name
1993 
1994   // @interface can be followed by either a base class, or a category.
1995   if (FormatTok->Tok.is(tok::colon)) {
1996     nextToken();
1997     nextToken(); // base class name
1998   } else if (FormatTok->Tok.is(tok::l_paren))
1999     // Skip category, if present.
2000     parseParens();
2001 
2002   if (FormatTok->Tok.is(tok::less))
2003     parseObjCProtocolList();
2004 
2005   if (FormatTok->Tok.is(tok::l_brace)) {
2006     if (Style.BraceWrapping.AfterObjCDeclaration)
2007       addUnwrappedLine();
2008     parseBlock(/*MustBeDeclaration=*/true);
2009   }
2010 
2011   // With instance variables, this puts '}' on its own line.  Without instance
2012   // variables, this ends the @interface line.
2013   addUnwrappedLine();
2014 
2015   parseObjCUntilAtEnd();
2016 }
2017 
2018 void UnwrappedLineParser::parseObjCProtocol() {
2019   nextToken();
2020   nextToken(); // protocol name
2021 
2022   if (FormatTok->Tok.is(tok::less))
2023     parseObjCProtocolList();
2024 
2025   // Check for protocol declaration.
2026   if (FormatTok->Tok.is(tok::semi)) {
2027     nextToken();
2028     return addUnwrappedLine();
2029   }
2030 
2031   addUnwrappedLine();
2032   parseObjCUntilAtEnd();
2033 }
2034 
2035 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2036   bool IsImport = FormatTok->is(Keywords.kw_import);
2037   assert(IsImport || FormatTok->is(tok::kw_export));
2038   nextToken();
2039 
2040   // Consume the "default" in "export default class/function".
2041   if (FormatTok->is(tok::kw_default))
2042     nextToken();
2043 
2044   // Consume "async function", "function" and "default function", so that these
2045   // get parsed as free-standing JS functions, i.e. do not require a trailing
2046   // semicolon.
2047   if (FormatTok->is(Keywords.kw_async))
2048     nextToken();
2049   if (FormatTok->is(Keywords.kw_function)) {
2050     nextToken();
2051     return;
2052   }
2053 
2054   // For imports, `export *`, `export {...}`, consume the rest of the line up
2055   // to the terminating `;`. For everything else, just return and continue
2056   // parsing the structural element, i.e. the declaration or expression for
2057   // `export default`.
2058   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2059       !FormatTok->isStringLiteral())
2060     return;
2061 
2062   while (!eof()) {
2063     if (FormatTok->is(tok::semi))
2064       return;
2065     if (Line->Tokens.size() == 0) {
2066       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2067       // import statement should terminate.
2068       return;
2069     }
2070     if (FormatTok->is(tok::l_brace)) {
2071       FormatTok->BlockKind = BK_Block;
2072       parseBracedList();
2073     } else {
2074       nextToken();
2075     }
2076   }
2077 }
2078 
2079 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2080                                                  StringRef Prefix = "") {
2081   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2082                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2083   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2084                                                     E = Line.Tokens.end();
2085        I != E; ++I) {
2086     llvm::dbgs() << I->Tok->Tok.getName() << "["
2087                  << "T=" << I->Tok->Type
2088                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2089   }
2090   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2091                                                     E = Line.Tokens.end();
2092        I != E; ++I) {
2093     const UnwrappedLineNode &Node = *I;
2094     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2095              I = Node.Children.begin(),
2096              E = Node.Children.end();
2097          I != E; ++I) {
2098       printDebugInfo(*I, "\nChild: ");
2099     }
2100   }
2101   llvm::dbgs() << "\n";
2102 }
2103 
2104 void UnwrappedLineParser::addUnwrappedLine() {
2105   if (Line->Tokens.empty())
2106     return;
2107   DEBUG({
2108     if (CurrentLines == &Lines)
2109       printDebugInfo(*Line);
2110   });
2111   CurrentLines->push_back(std::move(*Line));
2112   Line->Tokens.clear();
2113   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2114   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2115     CurrentLines->append(
2116         std::make_move_iterator(PreprocessorDirectives.begin()),
2117         std::make_move_iterator(PreprocessorDirectives.end()));
2118     PreprocessorDirectives.clear();
2119   }
2120 }
2121 
2122 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2123 
2124 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2125   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2126          FormatTok.NewlinesBefore > 0;
2127 }
2128 
2129 // Checks if \p FormatTok is a line comment that continues the line comment
2130 // section on \p Line.
2131 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2132                                         const UnwrappedLine &Line,
2133                                         llvm::Regex &CommentPragmasRegex) {
2134   if (Line.Tokens.empty())
2135     return false;
2136 
2137   StringRef IndentContent = FormatTok.TokenText;
2138   if (FormatTok.TokenText.startswith("//") ||
2139       FormatTok.TokenText.startswith("/*"))
2140     IndentContent = FormatTok.TokenText.substr(2);
2141   if (CommentPragmasRegex.match(IndentContent))
2142     return false;
2143 
2144   // If Line starts with a line comment, then FormatTok continues the comment
2145   // section if its original column is greater or equal to the original start
2146   // column of the line.
2147   //
2148   // Define the min column token of a line as follows: if a line ends in '{' or
2149   // contains a '{' followed by a line comment, then the min column token is
2150   // that '{'. Otherwise, the min column token of the line is the first token of
2151   // the line.
2152   //
2153   // If Line starts with a token other than a line comment, then FormatTok
2154   // continues the comment section if its original column is greater than the
2155   // original start column of the min column token of the line.
2156   //
2157   // For example, the second line comment continues the first in these cases:
2158   //
2159   // // first line
2160   // // second line
2161   //
2162   // and:
2163   //
2164   // // first line
2165   //  // second line
2166   //
2167   // and:
2168   //
2169   // int i; // first line
2170   //  // second line
2171   //
2172   // and:
2173   //
2174   // do { // first line
2175   //      // second line
2176   //   int i;
2177   // } while (true);
2178   //
2179   // and:
2180   //
2181   // enum {
2182   //   a, // first line
2183   //    // second line
2184   //   b
2185   // };
2186   //
2187   // The second line comment doesn't continue the first in these cases:
2188   //
2189   //   // first line
2190   //  // second line
2191   //
2192   // and:
2193   //
2194   // int i; // first line
2195   // // second line
2196   //
2197   // and:
2198   //
2199   // do { // first line
2200   //   // second line
2201   //   int i;
2202   // } while (true);
2203   //
2204   // and:
2205   //
2206   // enum {
2207   //   a, // first line
2208   //   // second line
2209   // };
2210   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2211 
2212   // Scan for '{//'. If found, use the column of '{' as a min column for line
2213   // comment section continuation.
2214   const FormatToken *PreviousToken = nullptr;
2215   for (const UnwrappedLineNode &Node : Line.Tokens) {
2216     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2217         isLineComment(*Node.Tok)) {
2218       MinColumnToken = PreviousToken;
2219       break;
2220     }
2221     PreviousToken = Node.Tok;
2222 
2223     // Grab the last newline preceding a token in this unwrapped line.
2224     if (Node.Tok->NewlinesBefore > 0) {
2225       MinColumnToken = Node.Tok;
2226     }
2227   }
2228   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2229     MinColumnToken = PreviousToken;
2230   }
2231 
2232   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2233                               MinColumnToken);
2234 }
2235 
2236 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2237   bool JustComments = Line->Tokens.empty();
2238   for (SmallVectorImpl<FormatToken *>::const_iterator
2239            I = CommentsBeforeNextToken.begin(),
2240            E = CommentsBeforeNextToken.end();
2241        I != E; ++I) {
2242     // Line comments that belong to the same line comment section are put on the
2243     // same line since later we might want to reflow content between them.
2244     // Additional fine-grained breaking of line comment sections is controlled
2245     // by the class BreakableLineCommentSection in case it is desirable to keep
2246     // several line comment sections in the same unwrapped line.
2247     //
2248     // FIXME: Consider putting separate line comment sections as children to the
2249     // unwrapped line instead.
2250     (*I)->ContinuesLineCommentSection =
2251         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2252     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2253       addUnwrappedLine();
2254     pushToken(*I);
2255   }
2256   if (NewlineBeforeNext && JustComments)
2257     addUnwrappedLine();
2258   CommentsBeforeNextToken.clear();
2259 }
2260 
2261 void UnwrappedLineParser::nextToken() {
2262   if (eof())
2263     return;
2264   flushComments(isOnNewLine(*FormatTok));
2265   pushToken(FormatTok);
2266   if (Style.Language != FormatStyle::LK_JavaScript)
2267     readToken();
2268   else
2269     readTokenWithJavaScriptASI();
2270 }
2271 
2272 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2273   // FIXME: This is a dirty way to access the previous token. Find a better
2274   // solution.
2275   if (!Line || Line->Tokens.empty())
2276     return nullptr;
2277   return Line->Tokens.back().Tok;
2278 }
2279 
2280 void UnwrappedLineParser::distributeComments(
2281     const SmallVectorImpl<FormatToken *> &Comments,
2282     const FormatToken *NextTok) {
2283   // Whether or not a line comment token continues a line is controlled by
2284   // the method continuesLineCommentSection, with the following caveat:
2285   //
2286   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2287   // that each comment line from the trail is aligned with the next token, if
2288   // the next token exists. If a trail exists, the beginning of the maximal
2289   // trail is marked as a start of a new comment section.
2290   //
2291   // For example in this code:
2292   //
2293   // int a; // line about a
2294   //   // line 1 about b
2295   //   // line 2 about b
2296   //   int b;
2297   //
2298   // the two lines about b form a maximal trail, so there are two sections, the
2299   // first one consisting of the single comment "// line about a" and the
2300   // second one consisting of the next two comments.
2301   if (Comments.empty())
2302     return;
2303   bool ShouldPushCommentsInCurrentLine = true;
2304   bool HasTrailAlignedWithNextToken = false;
2305   unsigned StartOfTrailAlignedWithNextToken = 0;
2306   if (NextTok) {
2307     // We are skipping the first element intentionally.
2308     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2309       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2310         HasTrailAlignedWithNextToken = true;
2311         StartOfTrailAlignedWithNextToken = i;
2312       }
2313     }
2314   }
2315   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2316     FormatToken *FormatTok = Comments[i];
2317     if (HasTrailAlignedWithNextToken &&
2318         i == StartOfTrailAlignedWithNextToken) {
2319       FormatTok->ContinuesLineCommentSection = false;
2320     } else {
2321       FormatTok->ContinuesLineCommentSection =
2322           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2323     }
2324     if (!FormatTok->ContinuesLineCommentSection &&
2325         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2326       ShouldPushCommentsInCurrentLine = false;
2327     }
2328     if (ShouldPushCommentsInCurrentLine) {
2329       pushToken(FormatTok);
2330     } else {
2331       CommentsBeforeNextToken.push_back(FormatTok);
2332     }
2333   }
2334 }
2335 
2336 void UnwrappedLineParser::readToken() {
2337   SmallVector<FormatToken *, 1> Comments;
2338   do {
2339     FormatTok = Tokens->getNextToken();
2340     assert(FormatTok);
2341     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2342            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2343       distributeComments(Comments, FormatTok);
2344       Comments.clear();
2345       // If there is an unfinished unwrapped line, we flush the preprocessor
2346       // directives only after that unwrapped line was finished later.
2347       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2348       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2349       // Comments stored before the preprocessor directive need to be output
2350       // before the preprocessor directive, at the same level as the
2351       // preprocessor directive, as we consider them to apply to the directive.
2352       flushComments(isOnNewLine(*FormatTok));
2353       parsePPDirective();
2354     }
2355     while (FormatTok->Type == TT_ConflictStart ||
2356            FormatTok->Type == TT_ConflictEnd ||
2357            FormatTok->Type == TT_ConflictAlternative) {
2358       if (FormatTok->Type == TT_ConflictStart) {
2359         conditionalCompilationStart(/*Unreachable=*/false);
2360       } else if (FormatTok->Type == TT_ConflictAlternative) {
2361         conditionalCompilationAlternative();
2362       } else if (FormatTok->Type == TT_ConflictEnd) {
2363         conditionalCompilationEnd();
2364       }
2365       FormatTok = Tokens->getNextToken();
2366       FormatTok->MustBreakBefore = true;
2367     }
2368 
2369     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2370         !Line->InPPDirective) {
2371       continue;
2372     }
2373 
2374     if (!FormatTok->Tok.is(tok::comment)) {
2375       distributeComments(Comments, FormatTok);
2376       Comments.clear();
2377       return;
2378     }
2379 
2380     Comments.push_back(FormatTok);
2381   } while (!eof());
2382 
2383   distributeComments(Comments, nullptr);
2384   Comments.clear();
2385 }
2386 
2387 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2388   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2389   if (MustBreakBeforeNextToken) {
2390     Line->Tokens.back().Tok->MustBreakBefore = true;
2391     MustBreakBeforeNextToken = false;
2392   }
2393 }
2394 
2395 } // end namespace format
2396 } // end namespace clang
2397