1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59   return FormatTok.is(tok::comment) &&
60          FormatTok.TokenText.startswith("//");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     TokenSource = this;
86     Line.Level = 0;
87     Line.InPPDirective = true;
88   }
89 
90   ~ScopedMacroState() override {
91     TokenSource = PreviousTokenSource;
92     ResetToken = Token;
93     Line.InPPDirective = false;
94     Line.Level = PreviousLineLevel;
95   }
96 
97   FormatToken *getNextToken() override {
98     // The \c UnwrappedLineParser guards against this by never calling
99     // \c getNextToken() after it has encountered the first eof token.
100     assert(!eof());
101     PreviousToken = Token;
102     Token = PreviousTokenSource->getNextToken();
103     if (eof())
104       return getFakeEOF();
105     return Token;
106   }
107 
108   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
109 
110   FormatToken *setPosition(unsigned Position) override {
111     PreviousToken = nullptr;
112     Token = PreviousTokenSource->setPosition(Position);
113     return Token;
114   }
115 
116 private:
117   bool eof() {
118     return Token && Token->HasUnescapedNewline &&
119            !continuesLineComment(*Token, PreviousToken,
120                                  /*MinColumnToken=*/PreviousToken);
121   }
122 
123   FormatToken *getFakeEOF() {
124     static bool EOFInitialized = false;
125     static FormatToken FormatTok;
126     if (!EOFInitialized) {
127       FormatTok.Tok.startToken();
128       FormatTok.Tok.setKind(tok::eof);
129       EOFInitialized = true;
130     }
131     return &FormatTok;
132   }
133 
134   UnwrappedLine &Line;
135   FormatTokenSource *&TokenSource;
136   FormatToken *&ResetToken;
137   unsigned PreviousLineLevel;
138   FormatTokenSource *PreviousTokenSource;
139 
140   FormatToken *Token;
141   FormatToken *PreviousToken;
142 };
143 
144 } // end anonymous namespace
145 
146 class ScopedLineState {
147 public:
148   ScopedLineState(UnwrappedLineParser &Parser,
149                   bool SwitchToPreprocessorLines = false)
150       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
151     if (SwitchToPreprocessorLines)
152       Parser.CurrentLines = &Parser.PreprocessorDirectives;
153     else if (!Parser.Line->Tokens.empty())
154       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
155     PreBlockLine = std::move(Parser.Line);
156     Parser.Line = llvm::make_unique<UnwrappedLine>();
157     Parser.Line->Level = PreBlockLine->Level;
158     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
159   }
160 
161   ~ScopedLineState() {
162     if (!Parser.Line->Tokens.empty()) {
163       Parser.addUnwrappedLine();
164     }
165     assert(Parser.Line->Tokens.empty());
166     Parser.Line = std::move(PreBlockLine);
167     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
168       Parser.MustBreakBeforeNextToken = true;
169     Parser.CurrentLines = OriginalLines;
170   }
171 
172 private:
173   UnwrappedLineParser &Parser;
174 
175   std::unique_ptr<UnwrappedLine> PreBlockLine;
176   SmallVectorImpl<UnwrappedLine> *OriginalLines;
177 };
178 
179 class CompoundStatementIndenter {
180 public:
181   CompoundStatementIndenter(UnwrappedLineParser *Parser,
182                             const FormatStyle &Style, unsigned &LineLevel)
183       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
184     if (Style.BraceWrapping.AfterControlStatement)
185       Parser->addUnwrappedLine();
186     if (Style.BraceWrapping.IndentBraces)
187       ++LineLevel;
188   }
189   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
190 
191 private:
192   unsigned &LineLevel;
193   unsigned OldLineLevel;
194 };
195 
196 namespace {
197 
198 class IndexedTokenSource : public FormatTokenSource {
199 public:
200   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
201       : Tokens(Tokens), Position(-1) {}
202 
203   FormatToken *getNextToken() override {
204     ++Position;
205     return Tokens[Position];
206   }
207 
208   unsigned getPosition() override {
209     assert(Position >= 0);
210     return Position;
211   }
212 
213   FormatToken *setPosition(unsigned P) override {
214     Position = P;
215     return Tokens[Position];
216   }
217 
218   void reset() { Position = -1; }
219 
220 private:
221   ArrayRef<FormatToken *> Tokens;
222   int Position;
223 };
224 
225 } // end anonymous namespace
226 
227 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
228                                          const AdditionalKeywords &Keywords,
229                                          ArrayRef<FormatToken *> Tokens,
230                                          UnwrappedLineConsumer &Callback)
231     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
232       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
233       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
234       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
235 
236 void UnwrappedLineParser::reset() {
237   PPBranchLevel = -1;
238   Line.reset(new UnwrappedLine);
239   CommentsBeforeNextToken.clear();
240   FormatTok = nullptr;
241   MustBreakBeforeNextToken = false;
242   PreprocessorDirectives.clear();
243   CurrentLines = &Lines;
244   DeclarationScopeStack.clear();
245   PPStack.clear();
246 }
247 
248 void UnwrappedLineParser::parse() {
249   IndexedTokenSource TokenSource(AllTokens);
250   do {
251     DEBUG(llvm::dbgs() << "----\n");
252     reset();
253     Tokens = &TokenSource;
254     TokenSource.reset();
255 
256     readToken();
257     parseFile();
258     // Create line with eof token.
259     pushToken(FormatTok);
260     addUnwrappedLine();
261 
262     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
263                                                   E = Lines.end();
264          I != E; ++I) {
265       Callback.consumeUnwrappedLine(*I);
266     }
267     Callback.finishRun();
268     Lines.clear();
269     while (!PPLevelBranchIndex.empty() &&
270            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
271       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
272       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
273     }
274     if (!PPLevelBranchIndex.empty()) {
275       ++PPLevelBranchIndex.back();
276       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
277       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
278     }
279   } while (!PPLevelBranchIndex.empty());
280 }
281 
282 void UnwrappedLineParser::parseFile() {
283   // The top-level context in a file always has declarations, except for pre-
284   // processor directives and JavaScript files.
285   bool MustBeDeclaration =
286       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
287   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
288                                           MustBeDeclaration);
289   if (Style.Language == FormatStyle::LK_TextProto)
290     parseBracedList();
291   else
292     parseLevel(/*HasOpeningBrace=*/false);
293   // Make sure to format the remaining tokens.
294   flushComments(true);
295   addUnwrappedLine();
296 }
297 
298 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
299   bool SwitchLabelEncountered = false;
300   do {
301     tok::TokenKind kind = FormatTok->Tok.getKind();
302     if (FormatTok->Type == TT_MacroBlockBegin) {
303       kind = tok::l_brace;
304     } else if (FormatTok->Type == TT_MacroBlockEnd) {
305       kind = tok::r_brace;
306     }
307 
308     switch (kind) {
309     case tok::comment:
310       nextToken();
311       addUnwrappedLine();
312       break;
313     case tok::l_brace:
314       // FIXME: Add parameter whether this can happen - if this happens, we must
315       // be in a non-declaration context.
316       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
317         continue;
318       parseBlock(/*MustBeDeclaration=*/false);
319       addUnwrappedLine();
320       break;
321     case tok::r_brace:
322       if (HasOpeningBrace)
323         return;
324       nextToken();
325       addUnwrappedLine();
326       break;
327     case tok::kw_default:
328     case tok::kw_case:
329       if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) {
330         // A 'case: string' style field declaration.
331         parseStructuralElement();
332         break;
333       }
334       if (!SwitchLabelEncountered &&
335           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
336         ++Line->Level;
337       SwitchLabelEncountered = true;
338       parseStructuralElement();
339       break;
340     default:
341       parseStructuralElement();
342       break;
343     }
344   } while (!eof());
345 }
346 
347 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
348   // We'll parse forward through the tokens until we hit
349   // a closing brace or eof - note that getNextToken() will
350   // parse macros, so this will magically work inside macro
351   // definitions, too.
352   unsigned StoredPosition = Tokens->getPosition();
353   FormatToken *Tok = FormatTok;
354   const FormatToken *PrevTok = getPreviousToken();
355   // Keep a stack of positions of lbrace tokens. We will
356   // update information about whether an lbrace starts a
357   // braced init list or a different block during the loop.
358   SmallVector<FormatToken *, 8> LBraceStack;
359   assert(Tok->Tok.is(tok::l_brace));
360   do {
361     // Get next non-comment token.
362     FormatToken *NextTok;
363     unsigned ReadTokens = 0;
364     do {
365       NextTok = Tokens->getNextToken();
366       ++ReadTokens;
367     } while (NextTok->is(tok::comment));
368 
369     switch (Tok->Tok.getKind()) {
370     case tok::l_brace:
371       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
372         if (PrevTok->is(tok::colon))
373           // A colon indicates this code is in a type, or a braced list
374           // following a label in an object literal ({a: {b: 1}}). The code
375           // below could be confused by semicolons between the individual
376           // members in a type member list, which would normally trigger
377           // BK_Block. In both cases, this must be parsed as an inline braced
378           // init.
379           Tok->BlockKind = BK_BracedInit;
380         else if (PrevTok->is(tok::r_paren))
381           // `) { }` can only occur in function or method declarations in JS.
382           Tok->BlockKind = BK_Block;
383       } else {
384         Tok->BlockKind = BK_Unknown;
385       }
386       LBraceStack.push_back(Tok);
387       break;
388     case tok::r_brace:
389       if (LBraceStack.empty())
390         break;
391       if (LBraceStack.back()->BlockKind == BK_Unknown) {
392         bool ProbablyBracedList = false;
393         if (Style.Language == FormatStyle::LK_Proto) {
394           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
395         } else {
396           // Using OriginalColumn to distinguish between ObjC methods and
397           // binary operators is a bit hacky.
398           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
399                                   NextTok->OriginalColumn == 0;
400 
401           // If there is a comma, semicolon or right paren after the closing
402           // brace, we assume this is a braced initializer list.  Note that
403           // regardless how we mark inner braces here, we will overwrite the
404           // BlockKind later if we parse a braced list (where all blocks
405           // inside are by default braced lists), or when we explicitly detect
406           // blocks (for example while parsing lambdas).
407           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
408           // braced list in JS.
409           ProbablyBracedList =
410               (Style.Language == FormatStyle::LK_JavaScript &&
411                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
412                                 Keywords.kw_as)) ||
413               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
414               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
415                                tok::r_paren, tok::r_square, tok::l_brace,
416                                tok::l_square, tok::ellipsis) ||
417               (NextTok->is(tok::identifier) &&
418                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
419               (NextTok->is(tok::semi) &&
420                (!ExpectClassBody || LBraceStack.size() != 1)) ||
421               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
422         }
423         if (ProbablyBracedList) {
424           Tok->BlockKind = BK_BracedInit;
425           LBraceStack.back()->BlockKind = BK_BracedInit;
426         } else {
427           Tok->BlockKind = BK_Block;
428           LBraceStack.back()->BlockKind = BK_Block;
429         }
430       }
431       LBraceStack.pop_back();
432       break;
433     case tok::at:
434     case tok::semi:
435     case tok::kw_if:
436     case tok::kw_while:
437     case tok::kw_for:
438     case tok::kw_switch:
439     case tok::kw_try:
440     case tok::kw___try:
441       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
442         LBraceStack.back()->BlockKind = BK_Block;
443       break;
444     default:
445       break;
446     }
447     PrevTok = Tok;
448     Tok = NextTok;
449   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
450 
451   // Assume other blocks for all unclosed opening braces.
452   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
453     if (LBraceStack[i]->BlockKind == BK_Unknown)
454       LBraceStack[i]->BlockKind = BK_Block;
455   }
456 
457   FormatTok = Tokens->setPosition(StoredPosition);
458 }
459 
460 template <class T>
461 static inline void hash_combine(std::size_t &seed, const T &v) {
462   std::hash<T> hasher;
463   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
464 }
465 
466 size_t UnwrappedLineParser::computePPHash() const {
467   size_t h = 0;
468   for (const auto &i : PPStack) {
469     hash_combine(h, size_t(i.Kind));
470     hash_combine(h, i.Line);
471   }
472   return h;
473 }
474 
475 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
476                                      bool MunchSemi) {
477   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
478          "'{' or macro block token expected");
479   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
480   FormatTok->BlockKind = BK_Block;
481 
482   size_t PPStartHash = computePPHash();
483 
484   unsigned InitialLevel = Line->Level;
485   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
486 
487   if (MacroBlock && FormatTok->is(tok::l_paren))
488     parseParens();
489 
490   size_t NbPreprocessorDirectives =
491       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
492   addUnwrappedLine();
493   size_t OpeningLineIndex =
494       CurrentLines->empty()
495           ? (UnwrappedLine::kInvalidIndex)
496           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
497 
498   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
499                                           MustBeDeclaration);
500   if (AddLevel)
501     ++Line->Level;
502   parseLevel(/*HasOpeningBrace=*/true);
503 
504   if (eof())
505     return;
506 
507   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
508                  : !FormatTok->is(tok::r_brace)) {
509     Line->Level = InitialLevel;
510     FormatTok->BlockKind = BK_Block;
511     return;
512   }
513 
514   size_t PPEndHash = computePPHash();
515 
516   // Munch the closing brace.
517   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
518 
519   if (MacroBlock && FormatTok->is(tok::l_paren))
520     parseParens();
521 
522   if (MunchSemi && FormatTok->Tok.is(tok::semi))
523     nextToken();
524   Line->Level = InitialLevel;
525 
526   if (PPStartHash == PPEndHash) {
527     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
528     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
529       // Update the opening line to add the forward reference as well
530       (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
531           CurrentLines->size() - 1;
532     }
533   }
534 }
535 
536 static bool isGoogScope(const UnwrappedLine &Line) {
537   // FIXME: Closure-library specific stuff should not be hard-coded but be
538   // configurable.
539   if (Line.Tokens.size() < 4)
540     return false;
541   auto I = Line.Tokens.begin();
542   if (I->Tok->TokenText != "goog")
543     return false;
544   ++I;
545   if (I->Tok->isNot(tok::period))
546     return false;
547   ++I;
548   if (I->Tok->TokenText != "scope")
549     return false;
550   ++I;
551   return I->Tok->is(tok::l_paren);
552 }
553 
554 static bool isIIFE(const UnwrappedLine &Line,
555                    const AdditionalKeywords &Keywords) {
556   // Look for the start of an immediately invoked anonymous function.
557   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
558   // This is commonly done in JavaScript to create a new, anonymous scope.
559   // Example: (function() { ... })()
560   if (Line.Tokens.size() < 3)
561     return false;
562   auto I = Line.Tokens.begin();
563   if (I->Tok->isNot(tok::l_paren))
564     return false;
565   ++I;
566   if (I->Tok->isNot(Keywords.kw_function))
567     return false;
568   ++I;
569   return I->Tok->is(tok::l_paren);
570 }
571 
572 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
573                                    const FormatToken &InitialToken) {
574   if (InitialToken.is(tok::kw_namespace))
575     return Style.BraceWrapping.AfterNamespace;
576   if (InitialToken.is(tok::kw_class))
577     return Style.BraceWrapping.AfterClass;
578   if (InitialToken.is(tok::kw_union))
579     return Style.BraceWrapping.AfterUnion;
580   if (InitialToken.is(tok::kw_struct))
581     return Style.BraceWrapping.AfterStruct;
582   return false;
583 }
584 
585 void UnwrappedLineParser::parseChildBlock() {
586   FormatTok->BlockKind = BK_Block;
587   nextToken();
588   {
589     bool SkipIndent =
590         (Style.Language == FormatStyle::LK_JavaScript &&
591          (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
592     ScopedLineState LineState(*this);
593     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
594                                             /*MustBeDeclaration=*/false);
595     Line->Level += SkipIndent ? 0 : 1;
596     parseLevel(/*HasOpeningBrace=*/true);
597     flushComments(isOnNewLine(*FormatTok));
598     Line->Level -= SkipIndent ? 0 : 1;
599   }
600   nextToken();
601 }
602 
603 void UnwrappedLineParser::parsePPDirective() {
604   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
605   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
606   nextToken();
607 
608   if (!FormatTok->Tok.getIdentifierInfo()) {
609     parsePPUnknown();
610     return;
611   }
612 
613   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
614   case tok::pp_define:
615     parsePPDefine();
616     return;
617   case tok::pp_if:
618     parsePPIf(/*IfDef=*/false);
619     break;
620   case tok::pp_ifdef:
621   case tok::pp_ifndef:
622     parsePPIf(/*IfDef=*/true);
623     break;
624   case tok::pp_else:
625     parsePPElse();
626     break;
627   case tok::pp_elif:
628     parsePPElIf();
629     break;
630   case tok::pp_endif:
631     parsePPEndIf();
632     break;
633   default:
634     parsePPUnknown();
635     break;
636   }
637 }
638 
639 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
640   size_t Line = CurrentLines->size();
641   if (CurrentLines == &PreprocessorDirectives)
642     Line += Lines.size();
643 
644   if (Unreachable ||
645       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
646     PPStack.push_back({PP_Unreachable, Line});
647   else
648     PPStack.push_back({PP_Conditional, Line});
649 }
650 
651 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
652   ++PPBranchLevel;
653   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
654   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
655     PPLevelBranchIndex.push_back(0);
656     PPLevelBranchCount.push_back(0);
657   }
658   PPChainBranchIndex.push(0);
659   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
660   conditionalCompilationCondition(Unreachable || Skip);
661 }
662 
663 void UnwrappedLineParser::conditionalCompilationAlternative() {
664   if (!PPStack.empty())
665     PPStack.pop_back();
666   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
667   if (!PPChainBranchIndex.empty())
668     ++PPChainBranchIndex.top();
669   conditionalCompilationCondition(
670       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
671       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
672 }
673 
674 void UnwrappedLineParser::conditionalCompilationEnd() {
675   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
676   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
677     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
678       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
679     }
680   }
681   // Guard against #endif's without #if.
682   if (PPBranchLevel > 0)
683     --PPBranchLevel;
684   if (!PPChainBranchIndex.empty())
685     PPChainBranchIndex.pop();
686   if (!PPStack.empty())
687     PPStack.pop_back();
688 }
689 
690 void UnwrappedLineParser::parsePPIf(bool IfDef) {
691   bool IfNDef = FormatTok->is(tok::pp_ifndef);
692   nextToken();
693   bool Unreachable = false;
694   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
695     Unreachable = true;
696   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
697     Unreachable = true;
698   conditionalCompilationStart(Unreachable);
699   parsePPUnknown();
700 }
701 
702 void UnwrappedLineParser::parsePPElse() {
703   conditionalCompilationAlternative();
704   parsePPUnknown();
705 }
706 
707 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
708 
709 void UnwrappedLineParser::parsePPEndIf() {
710   conditionalCompilationEnd();
711   parsePPUnknown();
712 }
713 
714 void UnwrappedLineParser::parsePPDefine() {
715   nextToken();
716 
717   if (FormatTok->Tok.getKind() != tok::identifier) {
718     parsePPUnknown();
719     return;
720   }
721   nextToken();
722   if (FormatTok->Tok.getKind() == tok::l_paren &&
723       FormatTok->WhitespaceRange.getBegin() ==
724           FormatTok->WhitespaceRange.getEnd()) {
725     parseParens();
726   }
727   addUnwrappedLine();
728   Line->Level = 1;
729 
730   // Errors during a preprocessor directive can only affect the layout of the
731   // preprocessor directive, and thus we ignore them. An alternative approach
732   // would be to use the same approach we use on the file level (no
733   // re-indentation if there was a structural error) within the macro
734   // definition.
735   parseFile();
736 }
737 
738 void UnwrappedLineParser::parsePPUnknown() {
739   do {
740     nextToken();
741   } while (!eof());
742   addUnwrappedLine();
743 }
744 
745 // Here we blacklist certain tokens that are not usually the first token in an
746 // unwrapped line. This is used in attempt to distinguish macro calls without
747 // trailing semicolons from other constructs split to several lines.
748 static bool tokenCanStartNewLine(const clang::Token &Tok) {
749   // Semicolon can be a null-statement, l_square can be a start of a macro or
750   // a C++11 attribute, but this doesn't seem to be common.
751   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
752          Tok.isNot(tok::l_square) &&
753          // Tokens that can only be used as binary operators and a part of
754          // overloaded operator names.
755          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
756          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
757          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
758          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
759          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
760          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
761          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
762          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
763          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
764          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
765          Tok.isNot(tok::lesslessequal) &&
766          // Colon is used in labels, base class lists, initializer lists,
767          // range-based for loops, ternary operator, but should never be the
768          // first token in an unwrapped line.
769          Tok.isNot(tok::colon) &&
770          // 'noexcept' is a trailing annotation.
771          Tok.isNot(tok::kw_noexcept);
772 }
773 
774 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
775                           const FormatToken *FormatTok) {
776   // FIXME: This returns true for C/C++ keywords like 'struct'.
777   return FormatTok->is(tok::identifier) &&
778          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
779           !FormatTok->isOneOf(
780               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
781               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
782               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
783               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
784               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
785               Keywords.kw_instanceof, Keywords.kw_interface,
786               Keywords.kw_throws, Keywords.kw_from));
787 }
788 
789 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
790                                  const FormatToken *FormatTok) {
791   return FormatTok->Tok.isLiteral() ||
792          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
793          mustBeJSIdent(Keywords, FormatTok);
794 }
795 
796 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
797 // when encountered after a value (see mustBeJSIdentOrValue).
798 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
799                            const FormatToken *FormatTok) {
800   return FormatTok->isOneOf(
801       tok::kw_return, Keywords.kw_yield,
802       // conditionals
803       tok::kw_if, tok::kw_else,
804       // loops
805       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
806       // switch/case
807       tok::kw_switch, tok::kw_case,
808       // exceptions
809       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
810       // declaration
811       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
812       Keywords.kw_async, Keywords.kw_function,
813       // import/export
814       Keywords.kw_import, tok::kw_export);
815 }
816 
817 // readTokenWithJavaScriptASI reads the next token and terminates the current
818 // line if JavaScript Automatic Semicolon Insertion must
819 // happen between the current token and the next token.
820 //
821 // This method is conservative - it cannot cover all edge cases of JavaScript,
822 // but only aims to correctly handle certain well known cases. It *must not*
823 // return true in speculative cases.
824 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
825   FormatToken *Previous = FormatTok;
826   readToken();
827   FormatToken *Next = FormatTok;
828 
829   bool IsOnSameLine =
830       CommentsBeforeNextToken.empty()
831           ? Next->NewlinesBefore == 0
832           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
833   if (IsOnSameLine)
834     return;
835 
836   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
837   bool PreviousStartsTemplateExpr =
838       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
839   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
840     // If the token before the previous one is an '@', the previous token is an
841     // annotation and can precede another identifier/value.
842     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
843     if (PrePrevious->is(tok::at))
844       return;
845   }
846   if (Next->is(tok::exclaim) && PreviousMustBeValue)
847     return addUnwrappedLine();
848   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
849   bool NextEndsTemplateExpr =
850       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
851   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
852       (PreviousMustBeValue ||
853        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
854                          tok::minusminus)))
855     return addUnwrappedLine();
856   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
857       isJSDeclOrStmt(Keywords, Next))
858     return addUnwrappedLine();
859 }
860 
861 void UnwrappedLineParser::parseStructuralElement() {
862   assert(!FormatTok->is(tok::l_brace));
863   if (Style.Language == FormatStyle::LK_TableGen &&
864       FormatTok->is(tok::pp_include)) {
865     nextToken();
866     if (FormatTok->is(tok::string_literal))
867       nextToken();
868     addUnwrappedLine();
869     return;
870   }
871   switch (FormatTok->Tok.getKind()) {
872   case tok::at:
873     nextToken();
874     if (FormatTok->Tok.is(tok::l_brace)) {
875       nextToken();
876       parseBracedList();
877       break;
878     }
879     switch (FormatTok->Tok.getObjCKeywordID()) {
880     case tok::objc_public:
881     case tok::objc_protected:
882     case tok::objc_package:
883     case tok::objc_private:
884       return parseAccessSpecifier();
885     case tok::objc_interface:
886     case tok::objc_implementation:
887       return parseObjCInterfaceOrImplementation();
888     case tok::objc_protocol:
889       return parseObjCProtocol();
890     case tok::objc_end:
891       return; // Handled by the caller.
892     case tok::objc_optional:
893     case tok::objc_required:
894       nextToken();
895       addUnwrappedLine();
896       return;
897     case tok::objc_autoreleasepool:
898       nextToken();
899       if (FormatTok->Tok.is(tok::l_brace)) {
900         if (Style.BraceWrapping.AfterObjCDeclaration)
901           addUnwrappedLine();
902         parseBlock(/*MustBeDeclaration=*/false);
903       }
904       addUnwrappedLine();
905       return;
906     case tok::objc_try:
907       // This branch isn't strictly necessary (the kw_try case below would
908       // do this too after the tok::at is parsed above).  But be explicit.
909       parseTryCatch();
910       return;
911     default:
912       break;
913     }
914     break;
915   case tok::kw_asm:
916     nextToken();
917     if (FormatTok->is(tok::l_brace)) {
918       FormatTok->Type = TT_InlineASMBrace;
919       nextToken();
920       while (FormatTok && FormatTok->isNot(tok::eof)) {
921         if (FormatTok->is(tok::r_brace)) {
922           FormatTok->Type = TT_InlineASMBrace;
923           nextToken();
924           addUnwrappedLine();
925           break;
926         }
927         FormatTok->Finalized = true;
928         nextToken();
929       }
930     }
931     break;
932   case tok::kw_namespace:
933     parseNamespace();
934     return;
935   case tok::kw_inline:
936     nextToken();
937     if (FormatTok->Tok.is(tok::kw_namespace)) {
938       parseNamespace();
939       return;
940     }
941     break;
942   case tok::kw_public:
943   case tok::kw_protected:
944   case tok::kw_private:
945     if (Style.Language == FormatStyle::LK_Java ||
946         Style.Language == FormatStyle::LK_JavaScript)
947       nextToken();
948     else
949       parseAccessSpecifier();
950     return;
951   case tok::kw_if:
952     parseIfThenElse();
953     return;
954   case tok::kw_for:
955   case tok::kw_while:
956     parseForOrWhileLoop();
957     return;
958   case tok::kw_do:
959     parseDoWhile();
960     return;
961   case tok::kw_switch:
962     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
963       // 'switch: string' field declaration.
964       break;
965     parseSwitch();
966     return;
967   case tok::kw_default:
968     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
969       // 'default: string' field declaration.
970       break;
971     nextToken();
972     parseLabel();
973     return;
974   case tok::kw_case:
975     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
976       // 'case: string' field declaration.
977       break;
978     parseCaseLabel();
979     return;
980   case tok::kw_try:
981   case tok::kw___try:
982     parseTryCatch();
983     return;
984   case tok::kw_extern:
985     nextToken();
986     if (FormatTok->Tok.is(tok::string_literal)) {
987       nextToken();
988       if (FormatTok->Tok.is(tok::l_brace)) {
989         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
990         addUnwrappedLine();
991         return;
992       }
993     }
994     break;
995   case tok::kw_export:
996     if (Style.Language == FormatStyle::LK_JavaScript) {
997       parseJavaScriptEs6ImportExport();
998       return;
999     }
1000     break;
1001   case tok::identifier:
1002     if (FormatTok->is(TT_ForEachMacro)) {
1003       parseForOrWhileLoop();
1004       return;
1005     }
1006     if (FormatTok->is(TT_MacroBlockBegin)) {
1007       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1008                  /*MunchSemi=*/false);
1009       return;
1010     }
1011     if (FormatTok->is(Keywords.kw_import)) {
1012       if (Style.Language == FormatStyle::LK_JavaScript) {
1013         parseJavaScriptEs6ImportExport();
1014         return;
1015       }
1016       if (Style.Language == FormatStyle::LK_Proto) {
1017         nextToken();
1018         if (FormatTok->is(tok::kw_public))
1019           nextToken();
1020         if (!FormatTok->is(tok::string_literal))
1021           return;
1022         nextToken();
1023         if (FormatTok->is(tok::semi))
1024           nextToken();
1025         addUnwrappedLine();
1026         return;
1027       }
1028     }
1029     if (Style.isCpp() &&
1030         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1031                            Keywords.kw_slots, Keywords.kw_qslots)) {
1032       nextToken();
1033       if (FormatTok->is(tok::colon)) {
1034         nextToken();
1035         addUnwrappedLine();
1036         return;
1037       }
1038     }
1039     // In all other cases, parse the declaration.
1040     break;
1041   default:
1042     break;
1043   }
1044   do {
1045     const FormatToken *Previous = getPreviousToken();
1046     switch (FormatTok->Tok.getKind()) {
1047     case tok::at:
1048       nextToken();
1049       if (FormatTok->Tok.is(tok::l_brace)) {
1050         nextToken();
1051         parseBracedList();
1052       }
1053       break;
1054     case tok::kw_enum:
1055       // Ignore if this is part of "template <enum ...".
1056       if (Previous && Previous->is(tok::less)) {
1057         nextToken();
1058         break;
1059       }
1060 
1061       // parseEnum falls through and does not yet add an unwrapped line as an
1062       // enum definition can start a structural element.
1063       if (!parseEnum())
1064         break;
1065       // This only applies for C++.
1066       if (!Style.isCpp()) {
1067         addUnwrappedLine();
1068         return;
1069       }
1070       break;
1071     case tok::kw_typedef:
1072       nextToken();
1073       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1074                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1075         parseEnum();
1076       break;
1077     case tok::kw_struct:
1078     case tok::kw_union:
1079     case tok::kw_class:
1080       // parseRecord falls through and does not yet add an unwrapped line as a
1081       // record declaration or definition can start a structural element.
1082       parseRecord();
1083       // This does not apply for Java and JavaScript.
1084       if (Style.Language == FormatStyle::LK_Java ||
1085           Style.Language == FormatStyle::LK_JavaScript) {
1086         if (FormatTok->is(tok::semi))
1087           nextToken();
1088         addUnwrappedLine();
1089         return;
1090       }
1091       break;
1092     case tok::period:
1093       nextToken();
1094       // In Java, classes have an implicit static member "class".
1095       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1096           FormatTok->is(tok::kw_class))
1097         nextToken();
1098       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1099           FormatTok->Tok.getIdentifierInfo())
1100         // JavaScript only has pseudo keywords, all keywords are allowed to
1101         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1102         nextToken();
1103       break;
1104     case tok::semi:
1105       nextToken();
1106       addUnwrappedLine();
1107       return;
1108     case tok::r_brace:
1109       addUnwrappedLine();
1110       return;
1111     case tok::l_paren:
1112       parseParens();
1113       break;
1114     case tok::kw_operator:
1115       nextToken();
1116       if (FormatTok->isBinaryOperator())
1117         nextToken();
1118       break;
1119     case tok::caret:
1120       nextToken();
1121       if (FormatTok->Tok.isAnyIdentifier() ||
1122           FormatTok->isSimpleTypeSpecifier())
1123         nextToken();
1124       if (FormatTok->is(tok::l_paren))
1125         parseParens();
1126       if (FormatTok->is(tok::l_brace))
1127         parseChildBlock();
1128       break;
1129     case tok::l_brace:
1130       if (!tryToParseBracedList()) {
1131         // A block outside of parentheses must be the last part of a
1132         // structural element.
1133         // FIXME: Figure out cases where this is not true, and add projections
1134         // for them (the one we know is missing are lambdas).
1135         if (Style.BraceWrapping.AfterFunction)
1136           addUnwrappedLine();
1137         FormatTok->Type = TT_FunctionLBrace;
1138         parseBlock(/*MustBeDeclaration=*/false);
1139         addUnwrappedLine();
1140         return;
1141       }
1142       // Otherwise this was a braced init list, and the structural
1143       // element continues.
1144       break;
1145     case tok::kw_try:
1146       // We arrive here when parsing function-try blocks.
1147       parseTryCatch();
1148       return;
1149     case tok::identifier: {
1150       if (FormatTok->is(TT_MacroBlockEnd)) {
1151         addUnwrappedLine();
1152         return;
1153       }
1154 
1155       // Function declarations (as opposed to function expressions) are parsed
1156       // on their own unwrapped line by continuing this loop. Function
1157       // expressions (functions that are not on their own line) must not create
1158       // a new unwrapped line, so they are special cased below.
1159       size_t TokenCount = Line->Tokens.size();
1160       if (Style.Language == FormatStyle::LK_JavaScript &&
1161           FormatTok->is(Keywords.kw_function) &&
1162           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1163                                                      Keywords.kw_async)))) {
1164         tryToParseJSFunction();
1165         break;
1166       }
1167       if ((Style.Language == FormatStyle::LK_JavaScript ||
1168            Style.Language == FormatStyle::LK_Java) &&
1169           FormatTok->is(Keywords.kw_interface)) {
1170         if (Style.Language == FormatStyle::LK_JavaScript) {
1171           // In JavaScript/TypeScript, "interface" can be used as a standalone
1172           // identifier, e.g. in `var interface = 1;`. If "interface" is
1173           // followed by another identifier, it is very like to be an actual
1174           // interface declaration.
1175           unsigned StoredPosition = Tokens->getPosition();
1176           FormatToken *Next = Tokens->getNextToken();
1177           FormatTok = Tokens->setPosition(StoredPosition);
1178           if (Next && !mustBeJSIdent(Keywords, Next)) {
1179             nextToken();
1180             break;
1181           }
1182         }
1183         parseRecord();
1184         addUnwrappedLine();
1185         return;
1186       }
1187 
1188       // See if the following token should start a new unwrapped line.
1189       StringRef Text = FormatTok->TokenText;
1190       nextToken();
1191       if (Line->Tokens.size() == 1 &&
1192           // JS doesn't have macros, and within classes colons indicate fields,
1193           // not labels.
1194           Style.Language != FormatStyle::LK_JavaScript) {
1195         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1196           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1197           parseLabel();
1198           return;
1199         }
1200         // Recognize function-like macro usages without trailing semicolon as
1201         // well as free-standing macros like Q_OBJECT.
1202         bool FunctionLike = FormatTok->is(tok::l_paren);
1203         if (FunctionLike)
1204           parseParens();
1205 
1206         bool FollowedByNewline =
1207             CommentsBeforeNextToken.empty()
1208                 ? FormatTok->NewlinesBefore > 0
1209                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1210 
1211         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1212             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1213           addUnwrappedLine();
1214           return;
1215         }
1216       }
1217       break;
1218     }
1219     case tok::equal:
1220       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1221       // TT_JsFatArrow. The always start an expression or a child block if
1222       // followed by a curly.
1223       if (FormatTok->is(TT_JsFatArrow)) {
1224         nextToken();
1225         if (FormatTok->is(tok::l_brace))
1226           parseChildBlock();
1227         break;
1228       }
1229 
1230       nextToken();
1231       if (FormatTok->Tok.is(tok::l_brace)) {
1232         nextToken();
1233         parseBracedList();
1234       } else if (Style.Language == FormatStyle::LK_Proto &&
1235                FormatTok->Tok.is(tok::less)) {
1236         nextToken();
1237         parseBracedList(/*ContinueOnSemicolons=*/false,
1238                         /*ClosingBraceKind=*/tok::greater);
1239       }
1240       break;
1241     case tok::l_square:
1242       parseSquare();
1243       break;
1244     case tok::kw_new:
1245       parseNew();
1246       break;
1247     default:
1248       nextToken();
1249       break;
1250     }
1251   } while (!eof());
1252 }
1253 
1254 bool UnwrappedLineParser::tryToParseLambda() {
1255   if (!Style.isCpp()) {
1256     nextToken();
1257     return false;
1258   }
1259   const FormatToken* Previous = getPreviousToken();
1260   if (Previous &&
1261       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1262                          tok::kw_delete) ||
1263        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1264     nextToken();
1265     return false;
1266   }
1267   assert(FormatTok->is(tok::l_square));
1268   FormatToken &LSquare = *FormatTok;
1269   if (!tryToParseLambdaIntroducer())
1270     return false;
1271 
1272   while (FormatTok->isNot(tok::l_brace)) {
1273     if (FormatTok->isSimpleTypeSpecifier()) {
1274       nextToken();
1275       continue;
1276     }
1277     switch (FormatTok->Tok.getKind()) {
1278     case tok::l_brace:
1279       break;
1280     case tok::l_paren:
1281       parseParens();
1282       break;
1283     case tok::amp:
1284     case tok::star:
1285     case tok::kw_const:
1286     case tok::comma:
1287     case tok::less:
1288     case tok::greater:
1289     case tok::identifier:
1290     case tok::numeric_constant:
1291     case tok::coloncolon:
1292     case tok::kw_mutable:
1293       nextToken();
1294       break;
1295     case tok::arrow:
1296       FormatTok->Type = TT_LambdaArrow;
1297       nextToken();
1298       break;
1299     default:
1300       return true;
1301     }
1302   }
1303   LSquare.Type = TT_LambdaLSquare;
1304   parseChildBlock();
1305   return true;
1306 }
1307 
1308 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1309   nextToken();
1310   if (FormatTok->is(tok::equal)) {
1311     nextToken();
1312     if (FormatTok->is(tok::r_square)) {
1313       nextToken();
1314       return true;
1315     }
1316     if (FormatTok->isNot(tok::comma))
1317       return false;
1318     nextToken();
1319   } else if (FormatTok->is(tok::amp)) {
1320     nextToken();
1321     if (FormatTok->is(tok::r_square)) {
1322       nextToken();
1323       return true;
1324     }
1325     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1326       return false;
1327     }
1328     if (FormatTok->is(tok::comma))
1329       nextToken();
1330   } else if (FormatTok->is(tok::r_square)) {
1331     nextToken();
1332     return true;
1333   }
1334   do {
1335     if (FormatTok->is(tok::amp))
1336       nextToken();
1337     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1338       return false;
1339     nextToken();
1340     if (FormatTok->is(tok::ellipsis))
1341       nextToken();
1342     if (FormatTok->is(tok::comma)) {
1343       nextToken();
1344     } else if (FormatTok->is(tok::r_square)) {
1345       nextToken();
1346       return true;
1347     } else {
1348       return false;
1349     }
1350   } while (!eof());
1351   return false;
1352 }
1353 
1354 void UnwrappedLineParser::tryToParseJSFunction() {
1355   assert(FormatTok->is(Keywords.kw_function) ||
1356          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1357   if (FormatTok->is(Keywords.kw_async))
1358     nextToken();
1359   // Consume "function".
1360   nextToken();
1361 
1362   // Consume * (generator function). Treat it like C++'s overloaded operators.
1363   if (FormatTok->is(tok::star)) {
1364     FormatTok->Type = TT_OverloadedOperator;
1365     nextToken();
1366   }
1367 
1368   // Consume function name.
1369   if (FormatTok->is(tok::identifier))
1370     nextToken();
1371 
1372   if (FormatTok->isNot(tok::l_paren))
1373     return;
1374 
1375   // Parse formal parameter list.
1376   parseParens();
1377 
1378   if (FormatTok->is(tok::colon)) {
1379     // Parse a type definition.
1380     nextToken();
1381 
1382     // Eat the type declaration. For braced inline object types, balance braces,
1383     // otherwise just parse until finding an l_brace for the function body.
1384     if (FormatTok->is(tok::l_brace))
1385       tryToParseBracedList();
1386     else
1387       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1388         nextToken();
1389   }
1390 
1391   if (FormatTok->is(tok::semi))
1392     return;
1393 
1394   parseChildBlock();
1395 }
1396 
1397 bool UnwrappedLineParser::tryToParseBracedList() {
1398   if (FormatTok->BlockKind == BK_Unknown)
1399     calculateBraceTypes();
1400   assert(FormatTok->BlockKind != BK_Unknown);
1401   if (FormatTok->BlockKind == BK_Block)
1402     return false;
1403   nextToken();
1404   parseBracedList();
1405   return true;
1406 }
1407 
1408 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1409                                           tok::TokenKind ClosingBraceKind) {
1410   bool HasError = false;
1411 
1412   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1413   // replace this by using parseAssigmentExpression() inside.
1414   do {
1415     if (Style.Language == FormatStyle::LK_JavaScript) {
1416       if (FormatTok->is(Keywords.kw_function) ||
1417           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1418         tryToParseJSFunction();
1419         continue;
1420       }
1421       if (FormatTok->is(TT_JsFatArrow)) {
1422         nextToken();
1423         // Fat arrows can be followed by simple expressions or by child blocks
1424         // in curly braces.
1425         if (FormatTok->is(tok::l_brace)) {
1426           parseChildBlock();
1427           continue;
1428         }
1429       }
1430       if (FormatTok->is(tok::l_brace)) {
1431         // Could be a method inside of a braced list `{a() { return 1; }}`.
1432         if (tryToParseBracedList())
1433           continue;
1434         parseChildBlock();
1435       }
1436     }
1437     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1438       nextToken();
1439       return !HasError;
1440     }
1441     switch (FormatTok->Tok.getKind()) {
1442     case tok::caret:
1443       nextToken();
1444       if (FormatTok->is(tok::l_brace)) {
1445         parseChildBlock();
1446       }
1447       break;
1448     case tok::l_square:
1449       tryToParseLambda();
1450       break;
1451     case tok::l_paren:
1452       parseParens();
1453       // JavaScript can just have free standing methods and getters/setters in
1454       // object literals. Detect them by a "{" following ")".
1455       if (Style.Language == FormatStyle::LK_JavaScript) {
1456         if (FormatTok->is(tok::l_brace))
1457           parseChildBlock();
1458         break;
1459       }
1460       break;
1461     case tok::l_brace:
1462       // Assume there are no blocks inside a braced init list apart
1463       // from the ones we explicitly parse out (like lambdas).
1464       FormatTok->BlockKind = BK_BracedInit;
1465       nextToken();
1466       parseBracedList();
1467       break;
1468     case tok::less:
1469       if (Style.Language == FormatStyle::LK_Proto) {
1470         nextToken();
1471         parseBracedList(/*ContinueOnSemicolons=*/false,
1472                         /*ClosingBraceKind=*/tok::greater);
1473       } else {
1474         nextToken();
1475       }
1476       break;
1477     case tok::semi:
1478       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1479       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1480       // used for error recovery if we have otherwise determined that this is
1481       // a braced list.
1482       if (Style.Language == FormatStyle::LK_JavaScript) {
1483         nextToken();
1484         break;
1485       }
1486       HasError = true;
1487       if (!ContinueOnSemicolons)
1488         return !HasError;
1489       nextToken();
1490       break;
1491     case tok::comma:
1492       nextToken();
1493       break;
1494     default:
1495       nextToken();
1496       break;
1497     }
1498   } while (!eof());
1499   return false;
1500 }
1501 
1502 void UnwrappedLineParser::parseParens() {
1503   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1504   nextToken();
1505   do {
1506     switch (FormatTok->Tok.getKind()) {
1507     case tok::l_paren:
1508       parseParens();
1509       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1510         parseChildBlock();
1511       break;
1512     case tok::r_paren:
1513       nextToken();
1514       return;
1515     case tok::r_brace:
1516       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1517       return;
1518     case tok::l_square:
1519       tryToParseLambda();
1520       break;
1521     case tok::l_brace:
1522       if (!tryToParseBracedList())
1523         parseChildBlock();
1524       break;
1525     case tok::at:
1526       nextToken();
1527       if (FormatTok->Tok.is(tok::l_brace)) {
1528         nextToken();
1529         parseBracedList();
1530       }
1531       break;
1532     case tok::kw_class:
1533       if (Style.Language == FormatStyle::LK_JavaScript)
1534         parseRecord(/*ParseAsExpr=*/true);
1535       else
1536         nextToken();
1537       break;
1538     case tok::identifier:
1539       if (Style.Language == FormatStyle::LK_JavaScript &&
1540           (FormatTok->is(Keywords.kw_function) ||
1541            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1542         tryToParseJSFunction();
1543       else
1544         nextToken();
1545       break;
1546     default:
1547       nextToken();
1548       break;
1549     }
1550   } while (!eof());
1551 }
1552 
1553 void UnwrappedLineParser::parseSquare() {
1554   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1555   if (tryToParseLambda())
1556     return;
1557   do {
1558     switch (FormatTok->Tok.getKind()) {
1559     case tok::l_paren:
1560       parseParens();
1561       break;
1562     case tok::r_square:
1563       nextToken();
1564       return;
1565     case tok::r_brace:
1566       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1567       return;
1568     case tok::l_square:
1569       parseSquare();
1570       break;
1571     case tok::l_brace: {
1572       if (!tryToParseBracedList())
1573         parseChildBlock();
1574       break;
1575     }
1576     case tok::at:
1577       nextToken();
1578       if (FormatTok->Tok.is(tok::l_brace)) {
1579         nextToken();
1580         parseBracedList();
1581       }
1582       break;
1583     default:
1584       nextToken();
1585       break;
1586     }
1587   } while (!eof());
1588 }
1589 
1590 void UnwrappedLineParser::parseIfThenElse() {
1591   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1592   nextToken();
1593   if (FormatTok->Tok.is(tok::kw_constexpr))
1594     nextToken();
1595   if (FormatTok->Tok.is(tok::l_paren))
1596     parseParens();
1597   bool NeedsUnwrappedLine = false;
1598   if (FormatTok->Tok.is(tok::l_brace)) {
1599     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1600     parseBlock(/*MustBeDeclaration=*/false);
1601     if (Style.BraceWrapping.BeforeElse)
1602       addUnwrappedLine();
1603     else
1604       NeedsUnwrappedLine = true;
1605   } else {
1606     addUnwrappedLine();
1607     ++Line->Level;
1608     parseStructuralElement();
1609     --Line->Level;
1610   }
1611   if (FormatTok->Tok.is(tok::kw_else)) {
1612     nextToken();
1613     if (FormatTok->Tok.is(tok::l_brace)) {
1614       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1615       parseBlock(/*MustBeDeclaration=*/false);
1616       addUnwrappedLine();
1617     } else if (FormatTok->Tok.is(tok::kw_if)) {
1618       parseIfThenElse();
1619     } else {
1620       addUnwrappedLine();
1621       ++Line->Level;
1622       parseStructuralElement();
1623       if (FormatTok->is(tok::eof))
1624         addUnwrappedLine();
1625       --Line->Level;
1626     }
1627   } else if (NeedsUnwrappedLine) {
1628     addUnwrappedLine();
1629   }
1630 }
1631 
1632 void UnwrappedLineParser::parseTryCatch() {
1633   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1634   nextToken();
1635   bool NeedsUnwrappedLine = false;
1636   if (FormatTok->is(tok::colon)) {
1637     // We are in a function try block, what comes is an initializer list.
1638     nextToken();
1639     while (FormatTok->is(tok::identifier)) {
1640       nextToken();
1641       if (FormatTok->is(tok::l_paren))
1642         parseParens();
1643       if (FormatTok->is(tok::comma))
1644         nextToken();
1645     }
1646   }
1647   // Parse try with resource.
1648   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1649     parseParens();
1650   }
1651   if (FormatTok->is(tok::l_brace)) {
1652     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1653     parseBlock(/*MustBeDeclaration=*/false);
1654     if (Style.BraceWrapping.BeforeCatch) {
1655       addUnwrappedLine();
1656     } else {
1657       NeedsUnwrappedLine = true;
1658     }
1659   } else if (!FormatTok->is(tok::kw_catch)) {
1660     // The C++ standard requires a compound-statement after a try.
1661     // If there's none, we try to assume there's a structuralElement
1662     // and try to continue.
1663     addUnwrappedLine();
1664     ++Line->Level;
1665     parseStructuralElement();
1666     --Line->Level;
1667   }
1668   while (1) {
1669     if (FormatTok->is(tok::at))
1670       nextToken();
1671     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1672                              tok::kw___finally) ||
1673           ((Style.Language == FormatStyle::LK_Java ||
1674             Style.Language == FormatStyle::LK_JavaScript) &&
1675            FormatTok->is(Keywords.kw_finally)) ||
1676           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1677            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1678       break;
1679     nextToken();
1680     while (FormatTok->isNot(tok::l_brace)) {
1681       if (FormatTok->is(tok::l_paren)) {
1682         parseParens();
1683         continue;
1684       }
1685       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1686         return;
1687       nextToken();
1688     }
1689     NeedsUnwrappedLine = false;
1690     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1691     parseBlock(/*MustBeDeclaration=*/false);
1692     if (Style.BraceWrapping.BeforeCatch)
1693       addUnwrappedLine();
1694     else
1695       NeedsUnwrappedLine = true;
1696   }
1697   if (NeedsUnwrappedLine)
1698     addUnwrappedLine();
1699 }
1700 
1701 void UnwrappedLineParser::parseNamespace() {
1702   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1703 
1704   const FormatToken &InitialToken = *FormatTok;
1705   nextToken();
1706   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1707     nextToken();
1708   if (FormatTok->Tok.is(tok::l_brace)) {
1709     if (ShouldBreakBeforeBrace(Style, InitialToken))
1710       addUnwrappedLine();
1711 
1712     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1713                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1714                      DeclarationScopeStack.size() > 1);
1715     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1716     // Munch the semicolon after a namespace. This is more common than one would
1717     // think. Puttin the semicolon into its own line is very ugly.
1718     if (FormatTok->Tok.is(tok::semi))
1719       nextToken();
1720     addUnwrappedLine();
1721   }
1722   // FIXME: Add error handling.
1723 }
1724 
1725 void UnwrappedLineParser::parseNew() {
1726   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1727   nextToken();
1728   if (Style.Language != FormatStyle::LK_Java)
1729     return;
1730 
1731   // In Java, we can parse everything up to the parens, which aren't optional.
1732   do {
1733     // There should not be a ;, { or } before the new's open paren.
1734     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1735       return;
1736 
1737     // Consume the parens.
1738     if (FormatTok->is(tok::l_paren)) {
1739       parseParens();
1740 
1741       // If there is a class body of an anonymous class, consume that as child.
1742       if (FormatTok->is(tok::l_brace))
1743         parseChildBlock();
1744       return;
1745     }
1746     nextToken();
1747   } while (!eof());
1748 }
1749 
1750 void UnwrappedLineParser::parseForOrWhileLoop() {
1751   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1752          "'for', 'while' or foreach macro expected");
1753   nextToken();
1754   // JS' for await ( ...
1755   if (Style.Language == FormatStyle::LK_JavaScript &&
1756       FormatTok->is(Keywords.kw_await))
1757     nextToken();
1758   if (FormatTok->Tok.is(tok::l_paren))
1759     parseParens();
1760   if (FormatTok->Tok.is(tok::l_brace)) {
1761     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1762     parseBlock(/*MustBeDeclaration=*/false);
1763     addUnwrappedLine();
1764   } else {
1765     addUnwrappedLine();
1766     ++Line->Level;
1767     parseStructuralElement();
1768     --Line->Level;
1769   }
1770 }
1771 
1772 void UnwrappedLineParser::parseDoWhile() {
1773   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1774   nextToken();
1775   if (FormatTok->Tok.is(tok::l_brace)) {
1776     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1777     parseBlock(/*MustBeDeclaration=*/false);
1778     if (Style.BraceWrapping.IndentBraces)
1779       addUnwrappedLine();
1780   } else {
1781     addUnwrappedLine();
1782     ++Line->Level;
1783     parseStructuralElement();
1784     --Line->Level;
1785   }
1786 
1787   // FIXME: Add error handling.
1788   if (!FormatTok->Tok.is(tok::kw_while)) {
1789     addUnwrappedLine();
1790     return;
1791   }
1792 
1793   nextToken();
1794   parseStructuralElement();
1795 }
1796 
1797 void UnwrappedLineParser::parseLabel() {
1798   nextToken();
1799   unsigned OldLineLevel = Line->Level;
1800   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1801     --Line->Level;
1802   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1803     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1804     parseBlock(/*MustBeDeclaration=*/false);
1805     if (FormatTok->Tok.is(tok::kw_break)) {
1806       if (Style.BraceWrapping.AfterControlStatement)
1807         addUnwrappedLine();
1808       parseStructuralElement();
1809     }
1810     addUnwrappedLine();
1811   } else {
1812     if (FormatTok->is(tok::semi))
1813       nextToken();
1814     addUnwrappedLine();
1815   }
1816   Line->Level = OldLineLevel;
1817   if (FormatTok->isNot(tok::l_brace)) {
1818     parseStructuralElement();
1819     addUnwrappedLine();
1820   }
1821 }
1822 
1823 void UnwrappedLineParser::parseCaseLabel() {
1824   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1825   // FIXME: fix handling of complex expressions here.
1826   do {
1827     nextToken();
1828   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1829   parseLabel();
1830 }
1831 
1832 void UnwrappedLineParser::parseSwitch() {
1833   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1834   nextToken();
1835   if (FormatTok->Tok.is(tok::l_paren))
1836     parseParens();
1837   if (FormatTok->Tok.is(tok::l_brace)) {
1838     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1839     parseBlock(/*MustBeDeclaration=*/false);
1840     addUnwrappedLine();
1841   } else {
1842     addUnwrappedLine();
1843     ++Line->Level;
1844     parseStructuralElement();
1845     --Line->Level;
1846   }
1847 }
1848 
1849 void UnwrappedLineParser::parseAccessSpecifier() {
1850   nextToken();
1851   // Understand Qt's slots.
1852   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1853     nextToken();
1854   // Otherwise, we don't know what it is, and we'd better keep the next token.
1855   if (FormatTok->Tok.is(tok::colon))
1856     nextToken();
1857   addUnwrappedLine();
1858 }
1859 
1860 bool UnwrappedLineParser::parseEnum() {
1861   // Won't be 'enum' for NS_ENUMs.
1862   if (FormatTok->Tok.is(tok::kw_enum))
1863     nextToken();
1864 
1865   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1866   // declarations. An "enum" keyword followed by a colon would be a syntax
1867   // error and thus assume it is just an identifier.
1868   if (Style.Language == FormatStyle::LK_JavaScript &&
1869       FormatTok->isOneOf(tok::colon, tok::question))
1870     return false;
1871 
1872   // Eat up enum class ...
1873   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1874     nextToken();
1875 
1876   while (FormatTok->Tok.getIdentifierInfo() ||
1877          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1878                             tok::greater, tok::comma, tok::question)) {
1879     nextToken();
1880     // We can have macros or attributes in between 'enum' and the enum name.
1881     if (FormatTok->is(tok::l_paren))
1882       parseParens();
1883     if (FormatTok->is(tok::identifier)) {
1884       nextToken();
1885       // If there are two identifiers in a row, this is likely an elaborate
1886       // return type. In Java, this can be "implements", etc.
1887       if (Style.isCpp() && FormatTok->is(tok::identifier))
1888         return false;
1889     }
1890   }
1891 
1892   // Just a declaration or something is wrong.
1893   if (FormatTok->isNot(tok::l_brace))
1894     return true;
1895   FormatTok->BlockKind = BK_Block;
1896 
1897   if (Style.Language == FormatStyle::LK_Java) {
1898     // Java enums are different.
1899     parseJavaEnumBody();
1900     return true;
1901   }
1902   if (Style.Language == FormatStyle::LK_Proto) {
1903     parseBlock(/*MustBeDeclaration=*/true);
1904     return true;
1905   }
1906 
1907   // Parse enum body.
1908   nextToken();
1909   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1910   if (HasError) {
1911     if (FormatTok->is(tok::semi))
1912       nextToken();
1913     addUnwrappedLine();
1914   }
1915   return true;
1916 
1917   // There is no addUnwrappedLine() here so that we fall through to parsing a
1918   // structural element afterwards. Thus, in "enum A {} n, m;",
1919   // "} n, m;" will end up in one unwrapped line.
1920 }
1921 
1922 void UnwrappedLineParser::parseJavaEnumBody() {
1923   // Determine whether the enum is simple, i.e. does not have a semicolon or
1924   // constants with class bodies. Simple enums can be formatted like braced
1925   // lists, contracted to a single line, etc.
1926   unsigned StoredPosition = Tokens->getPosition();
1927   bool IsSimple = true;
1928   FormatToken *Tok = Tokens->getNextToken();
1929   while (Tok) {
1930     if (Tok->is(tok::r_brace))
1931       break;
1932     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1933       IsSimple = false;
1934       break;
1935     }
1936     // FIXME: This will also mark enums with braces in the arguments to enum
1937     // constants as "not simple". This is probably fine in practice, though.
1938     Tok = Tokens->getNextToken();
1939   }
1940   FormatTok = Tokens->setPosition(StoredPosition);
1941 
1942   if (IsSimple) {
1943     nextToken();
1944     parseBracedList();
1945     addUnwrappedLine();
1946     return;
1947   }
1948 
1949   // Parse the body of a more complex enum.
1950   // First add a line for everything up to the "{".
1951   nextToken();
1952   addUnwrappedLine();
1953   ++Line->Level;
1954 
1955   // Parse the enum constants.
1956   while (FormatTok) {
1957     if (FormatTok->is(tok::l_brace)) {
1958       // Parse the constant's class body.
1959       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1960                  /*MunchSemi=*/false);
1961     } else if (FormatTok->is(tok::l_paren)) {
1962       parseParens();
1963     } else if (FormatTok->is(tok::comma)) {
1964       nextToken();
1965       addUnwrappedLine();
1966     } else if (FormatTok->is(tok::semi)) {
1967       nextToken();
1968       addUnwrappedLine();
1969       break;
1970     } else if (FormatTok->is(tok::r_brace)) {
1971       addUnwrappedLine();
1972       break;
1973     } else {
1974       nextToken();
1975     }
1976   }
1977 
1978   // Parse the class body after the enum's ";" if any.
1979   parseLevel(/*HasOpeningBrace=*/true);
1980   nextToken();
1981   --Line->Level;
1982   addUnwrappedLine();
1983 }
1984 
1985 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1986   const FormatToken &InitialToken = *FormatTok;
1987   nextToken();
1988 
1989   // The actual identifier can be a nested name specifier, and in macros
1990   // it is often token-pasted.
1991   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1992                             tok::kw___attribute, tok::kw___declspec,
1993                             tok::kw_alignas) ||
1994          ((Style.Language == FormatStyle::LK_Java ||
1995            Style.Language == FormatStyle::LK_JavaScript) &&
1996           FormatTok->isOneOf(tok::period, tok::comma))) {
1997     if (Style.Language == FormatStyle::LK_JavaScript &&
1998         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
1999       // JavaScript/TypeScript supports inline object types in
2000       // extends/implements positions:
2001       //     class Foo implements {bar: number} { }
2002       nextToken();
2003       if (FormatTok->is(tok::l_brace)) {
2004         tryToParseBracedList();
2005         continue;
2006       }
2007     }
2008     bool IsNonMacroIdentifier =
2009         FormatTok->is(tok::identifier) &&
2010         FormatTok->TokenText != FormatTok->TokenText.upper();
2011     nextToken();
2012     // We can have macros or attributes in between 'class' and the class name.
2013     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2014       parseParens();
2015   }
2016 
2017   // Note that parsing away template declarations here leads to incorrectly
2018   // accepting function declarations as record declarations.
2019   // In general, we cannot solve this problem. Consider:
2020   // class A<int> B() {}
2021   // which can be a function definition or a class definition when B() is a
2022   // macro. If we find enough real-world cases where this is a problem, we
2023   // can parse for the 'template' keyword in the beginning of the statement,
2024   // and thus rule out the record production in case there is no template
2025   // (this would still leave us with an ambiguity between template function
2026   // and class declarations).
2027   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2028     while (!eof()) {
2029       if (FormatTok->is(tok::l_brace)) {
2030         calculateBraceTypes(/*ExpectClassBody=*/true);
2031         if (!tryToParseBracedList())
2032           break;
2033       }
2034       if (FormatTok->Tok.is(tok::semi))
2035         return;
2036       nextToken();
2037     }
2038   }
2039   if (FormatTok->Tok.is(tok::l_brace)) {
2040     if (ParseAsExpr) {
2041       parseChildBlock();
2042     } else {
2043       if (ShouldBreakBeforeBrace(Style, InitialToken))
2044         addUnwrappedLine();
2045 
2046       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2047                  /*MunchSemi=*/false);
2048     }
2049   }
2050   // There is no addUnwrappedLine() here so that we fall through to parsing a
2051   // structural element afterwards. Thus, in "class A {} n, m;",
2052   // "} n, m;" will end up in one unwrapped line.
2053 }
2054 
2055 void UnwrappedLineParser::parseObjCProtocolList() {
2056   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2057   do
2058     nextToken();
2059   while (!eof() && FormatTok->Tok.isNot(tok::greater));
2060   nextToken(); // Skip '>'.
2061 }
2062 
2063 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2064   do {
2065     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2066       nextToken();
2067       addUnwrappedLine();
2068       break;
2069     }
2070     if (FormatTok->is(tok::l_brace)) {
2071       parseBlock(/*MustBeDeclaration=*/false);
2072       // In ObjC interfaces, nothing should be following the "}".
2073       addUnwrappedLine();
2074     } else if (FormatTok->is(tok::r_brace)) {
2075       // Ignore stray "}". parseStructuralElement doesn't consume them.
2076       nextToken();
2077       addUnwrappedLine();
2078     } else {
2079       parseStructuralElement();
2080     }
2081   } while (!eof());
2082 }
2083 
2084 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2085   nextToken();
2086   nextToken(); // interface name
2087 
2088   // @interface can be followed by either a base class, or a category.
2089   if (FormatTok->Tok.is(tok::colon)) {
2090     nextToken();
2091     nextToken(); // base class name
2092   } else if (FormatTok->Tok.is(tok::l_paren))
2093     // Skip category, if present.
2094     parseParens();
2095 
2096   if (FormatTok->Tok.is(tok::less))
2097     parseObjCProtocolList();
2098 
2099   if (FormatTok->Tok.is(tok::l_brace)) {
2100     if (Style.BraceWrapping.AfterObjCDeclaration)
2101       addUnwrappedLine();
2102     parseBlock(/*MustBeDeclaration=*/true);
2103   }
2104 
2105   // With instance variables, this puts '}' on its own line.  Without instance
2106   // variables, this ends the @interface line.
2107   addUnwrappedLine();
2108 
2109   parseObjCUntilAtEnd();
2110 }
2111 
2112 void UnwrappedLineParser::parseObjCProtocol() {
2113   nextToken();
2114   nextToken(); // protocol name
2115 
2116   if (FormatTok->Tok.is(tok::less))
2117     parseObjCProtocolList();
2118 
2119   // Check for protocol declaration.
2120   if (FormatTok->Tok.is(tok::semi)) {
2121     nextToken();
2122     return addUnwrappedLine();
2123   }
2124 
2125   addUnwrappedLine();
2126   parseObjCUntilAtEnd();
2127 }
2128 
2129 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2130   bool IsImport = FormatTok->is(Keywords.kw_import);
2131   assert(IsImport || FormatTok->is(tok::kw_export));
2132   nextToken();
2133 
2134   // Consume the "default" in "export default class/function".
2135   if (FormatTok->is(tok::kw_default))
2136     nextToken();
2137 
2138   // Consume "async function", "function" and "default function", so that these
2139   // get parsed as free-standing JS functions, i.e. do not require a trailing
2140   // semicolon.
2141   if (FormatTok->is(Keywords.kw_async))
2142     nextToken();
2143   if (FormatTok->is(Keywords.kw_function)) {
2144     nextToken();
2145     return;
2146   }
2147 
2148   // For imports, `export *`, `export {...}`, consume the rest of the line up
2149   // to the terminating `;`. For everything else, just return and continue
2150   // parsing the structural element, i.e. the declaration or expression for
2151   // `export default`.
2152   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2153       !FormatTok->isStringLiteral())
2154     return;
2155 
2156   while (!eof()) {
2157     if (FormatTok->is(tok::semi))
2158       return;
2159     if (Line->Tokens.size() == 0) {
2160       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2161       // import statement should terminate.
2162       return;
2163     }
2164     if (FormatTok->is(tok::l_brace)) {
2165       FormatTok->BlockKind = BK_Block;
2166       nextToken();
2167       parseBracedList();
2168     } else {
2169       nextToken();
2170     }
2171   }
2172 }
2173 
2174 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2175                                                  StringRef Prefix = "") {
2176   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2177                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2178   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2179                                                     E = Line.Tokens.end();
2180        I != E; ++I) {
2181     llvm::dbgs() << I->Tok->Tok.getName() << "["
2182                  << "T=" << I->Tok->Type
2183                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2184   }
2185   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2186                                                     E = Line.Tokens.end();
2187        I != E; ++I) {
2188     const UnwrappedLineNode &Node = *I;
2189     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2190              I = Node.Children.begin(),
2191              E = Node.Children.end();
2192          I != E; ++I) {
2193       printDebugInfo(*I, "\nChild: ");
2194     }
2195   }
2196   llvm::dbgs() << "\n";
2197 }
2198 
2199 void UnwrappedLineParser::addUnwrappedLine() {
2200   if (Line->Tokens.empty())
2201     return;
2202   DEBUG({
2203     if (CurrentLines == &Lines)
2204       printDebugInfo(*Line);
2205   });
2206   CurrentLines->push_back(std::move(*Line));
2207   Line->Tokens.clear();
2208   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2209   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2210     CurrentLines->append(
2211         std::make_move_iterator(PreprocessorDirectives.begin()),
2212         std::make_move_iterator(PreprocessorDirectives.end()));
2213     PreprocessorDirectives.clear();
2214   }
2215 }
2216 
2217 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2218 
2219 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2220   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2221          FormatTok.NewlinesBefore > 0;
2222 }
2223 
2224 // Checks if \p FormatTok is a line comment that continues the line comment
2225 // section on \p Line.
2226 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2227                                         const UnwrappedLine &Line,
2228                                         llvm::Regex &CommentPragmasRegex) {
2229   if (Line.Tokens.empty())
2230     return false;
2231 
2232   StringRef IndentContent = FormatTok.TokenText;
2233   if (FormatTok.TokenText.startswith("//") ||
2234       FormatTok.TokenText.startswith("/*"))
2235     IndentContent = FormatTok.TokenText.substr(2);
2236   if (CommentPragmasRegex.match(IndentContent))
2237     return false;
2238 
2239   // If Line starts with a line comment, then FormatTok continues the comment
2240   // section if its original column is greater or equal to the original start
2241   // column of the line.
2242   //
2243   // Define the min column token of a line as follows: if a line ends in '{' or
2244   // contains a '{' followed by a line comment, then the min column token is
2245   // that '{'. Otherwise, the min column token of the line is the first token of
2246   // the line.
2247   //
2248   // If Line starts with a token other than a line comment, then FormatTok
2249   // continues the comment section if its original column is greater than the
2250   // original start column of the min column token of the line.
2251   //
2252   // For example, the second line comment continues the first in these cases:
2253   //
2254   // // first line
2255   // // second line
2256   //
2257   // and:
2258   //
2259   // // first line
2260   //  // second line
2261   //
2262   // and:
2263   //
2264   // int i; // first line
2265   //  // second line
2266   //
2267   // and:
2268   //
2269   // do { // first line
2270   //      // second line
2271   //   int i;
2272   // } while (true);
2273   //
2274   // and:
2275   //
2276   // enum {
2277   //   a, // first line
2278   //    // second line
2279   //   b
2280   // };
2281   //
2282   // The second line comment doesn't continue the first in these cases:
2283   //
2284   //   // first line
2285   //  // second line
2286   //
2287   // and:
2288   //
2289   // int i; // first line
2290   // // second line
2291   //
2292   // and:
2293   //
2294   // do { // first line
2295   //   // second line
2296   //   int i;
2297   // } while (true);
2298   //
2299   // and:
2300   //
2301   // enum {
2302   //   a, // first line
2303   //   // second line
2304   // };
2305   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2306 
2307   // Scan for '{//'. If found, use the column of '{' as a min column for line
2308   // comment section continuation.
2309   const FormatToken *PreviousToken = nullptr;
2310   for (const UnwrappedLineNode &Node : Line.Tokens) {
2311     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2312         isLineComment(*Node.Tok)) {
2313       MinColumnToken = PreviousToken;
2314       break;
2315     }
2316     PreviousToken = Node.Tok;
2317 
2318     // Grab the last newline preceding a token in this unwrapped line.
2319     if (Node.Tok->NewlinesBefore > 0) {
2320       MinColumnToken = Node.Tok;
2321     }
2322   }
2323   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2324     MinColumnToken = PreviousToken;
2325   }
2326 
2327   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2328                               MinColumnToken);
2329 }
2330 
2331 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2332   bool JustComments = Line->Tokens.empty();
2333   for (SmallVectorImpl<FormatToken *>::const_iterator
2334            I = CommentsBeforeNextToken.begin(),
2335            E = CommentsBeforeNextToken.end();
2336        I != E; ++I) {
2337     // Line comments that belong to the same line comment section are put on the
2338     // same line since later we might want to reflow content between them.
2339     // Additional fine-grained breaking of line comment sections is controlled
2340     // by the class BreakableLineCommentSection in case it is desirable to keep
2341     // several line comment sections in the same unwrapped line.
2342     //
2343     // FIXME: Consider putting separate line comment sections as children to the
2344     // unwrapped line instead.
2345     (*I)->ContinuesLineCommentSection =
2346         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2347     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2348       addUnwrappedLine();
2349     pushToken(*I);
2350   }
2351   if (NewlineBeforeNext && JustComments)
2352     addUnwrappedLine();
2353   CommentsBeforeNextToken.clear();
2354 }
2355 
2356 void UnwrappedLineParser::nextToken(int LevelDifference) {
2357   if (eof())
2358     return;
2359   flushComments(isOnNewLine(*FormatTok));
2360   pushToken(FormatTok);
2361   if (Style.Language != FormatStyle::LK_JavaScript)
2362     readToken(LevelDifference);
2363   else
2364     readTokenWithJavaScriptASI();
2365 }
2366 
2367 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2368   // FIXME: This is a dirty way to access the previous token. Find a better
2369   // solution.
2370   if (!Line || Line->Tokens.empty())
2371     return nullptr;
2372   return Line->Tokens.back().Tok;
2373 }
2374 
2375 void UnwrappedLineParser::distributeComments(
2376     const SmallVectorImpl<FormatToken *> &Comments,
2377     const FormatToken *NextTok) {
2378   // Whether or not a line comment token continues a line is controlled by
2379   // the method continuesLineCommentSection, with the following caveat:
2380   //
2381   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2382   // that each comment line from the trail is aligned with the next token, if
2383   // the next token exists. If a trail exists, the beginning of the maximal
2384   // trail is marked as a start of a new comment section.
2385   //
2386   // For example in this code:
2387   //
2388   // int a; // line about a
2389   //   // line 1 about b
2390   //   // line 2 about b
2391   //   int b;
2392   //
2393   // the two lines about b form a maximal trail, so there are two sections, the
2394   // first one consisting of the single comment "// line about a" and the
2395   // second one consisting of the next two comments.
2396   if (Comments.empty())
2397     return;
2398   bool ShouldPushCommentsInCurrentLine = true;
2399   bool HasTrailAlignedWithNextToken = false;
2400   unsigned StartOfTrailAlignedWithNextToken = 0;
2401   if (NextTok) {
2402     // We are skipping the first element intentionally.
2403     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2404       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2405         HasTrailAlignedWithNextToken = true;
2406         StartOfTrailAlignedWithNextToken = i;
2407       }
2408     }
2409   }
2410   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2411     FormatToken *FormatTok = Comments[i];
2412     if (HasTrailAlignedWithNextToken &&
2413         i == StartOfTrailAlignedWithNextToken) {
2414       FormatTok->ContinuesLineCommentSection = false;
2415     } else {
2416       FormatTok->ContinuesLineCommentSection =
2417           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2418     }
2419     if (!FormatTok->ContinuesLineCommentSection &&
2420         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2421       ShouldPushCommentsInCurrentLine = false;
2422     }
2423     if (ShouldPushCommentsInCurrentLine) {
2424       pushToken(FormatTok);
2425     } else {
2426       CommentsBeforeNextToken.push_back(FormatTok);
2427     }
2428   }
2429 }
2430 
2431 void UnwrappedLineParser::readToken(int LevelDifference) {
2432   SmallVector<FormatToken *, 1> Comments;
2433   do {
2434     FormatTok = Tokens->getNextToken();
2435     assert(FormatTok);
2436     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2437            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2438       distributeComments(Comments, FormatTok);
2439       Comments.clear();
2440       // If there is an unfinished unwrapped line, we flush the preprocessor
2441       // directives only after that unwrapped line was finished later.
2442       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2443       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2444       assert((LevelDifference >= 0 ||
2445               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2446              "LevelDifference makes Line->Level negative");
2447       Line->Level += LevelDifference;
2448       // Comments stored before the preprocessor directive need to be output
2449       // before the preprocessor directive, at the same level as the
2450       // preprocessor directive, as we consider them to apply to the directive.
2451       flushComments(isOnNewLine(*FormatTok));
2452       parsePPDirective();
2453     }
2454     while (FormatTok->Type == TT_ConflictStart ||
2455            FormatTok->Type == TT_ConflictEnd ||
2456            FormatTok->Type == TT_ConflictAlternative) {
2457       if (FormatTok->Type == TT_ConflictStart) {
2458         conditionalCompilationStart(/*Unreachable=*/false);
2459       } else if (FormatTok->Type == TT_ConflictAlternative) {
2460         conditionalCompilationAlternative();
2461       } else if (FormatTok->Type == TT_ConflictEnd) {
2462         conditionalCompilationEnd();
2463       }
2464       FormatTok = Tokens->getNextToken();
2465       FormatTok->MustBreakBefore = true;
2466     }
2467 
2468     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2469         !Line->InPPDirective) {
2470       continue;
2471     }
2472 
2473     if (!FormatTok->Tok.is(tok::comment)) {
2474       distributeComments(Comments, FormatTok);
2475       Comments.clear();
2476       return;
2477     }
2478 
2479     Comments.push_back(FormatTok);
2480   } while (!eof());
2481 
2482   distributeComments(Comments, nullptr);
2483   Comments.clear();
2484 }
2485 
2486 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2487   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2488   if (MustBreakBeforeNextToken) {
2489     Line->Tokens.back().Tok->MustBreakBefore = true;
2490     MustBreakBeforeNextToken = false;
2491   }
2492 }
2493 
2494 } // end namespace format
2495 } // end namespace clang
2496