1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
27 class FormatTokenSource {
28 public:
29   virtual ~FormatTokenSource() {}
30   virtual FormatToken *getNextToken() = 0;
31 
32   virtual unsigned getPosition() = 0;
33   virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41                          bool MustBeDeclaration)
42       : Line(Line), Stack(Stack) {
43     Line.MustBeDeclaration = MustBeDeclaration;
44     Stack.push_back(MustBeDeclaration);
45   }
46   ~ScopedDeclarationState() {
47     Stack.pop_back();
48     if (!Stack.empty())
49       Line.MustBeDeclaration = Stack.back();
50     else
51       Line.MustBeDeclaration = true;
52   }
53 
54 private:
55   UnwrappedLine &Line;
56   std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     FakeEOF.Tok.startToken();
86     FakeEOF.Tok.setKind(tok::eof);
87     TokenSource = this;
88     Line.Level = 0;
89     Line.InPPDirective = true;
90   }
91 
92   ~ScopedMacroState() override {
93     TokenSource = PreviousTokenSource;
94     ResetToken = Token;
95     Line.InPPDirective = false;
96     Line.Level = PreviousLineLevel;
97   }
98 
99   FormatToken *getNextToken() override {
100     // The \c UnwrappedLineParser guards against this by never calling
101     // \c getNextToken() after it has encountered the first eof token.
102     assert(!eof());
103     PreviousToken = Token;
104     Token = PreviousTokenSource->getNextToken();
105     if (eof())
106       return &FakeEOF;
107     return Token;
108   }
109 
110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112   FormatToken *setPosition(unsigned Position) override {
113     PreviousToken = nullptr;
114     Token = PreviousTokenSource->setPosition(Position);
115     return Token;
116   }
117 
118 private:
119   bool eof() {
120     return Token && Token->HasUnescapedNewline &&
121            !continuesLineComment(*Token, PreviousToken,
122                                  /*MinColumnToken=*/PreviousToken);
123   }
124 
125   FormatToken FakeEOF;
126   UnwrappedLine &Line;
127   FormatTokenSource *&TokenSource;
128   FormatToken *&ResetToken;
129   unsigned PreviousLineLevel;
130   FormatTokenSource *PreviousTokenSource;
131 
132   FormatToken *Token;
133   FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
138 class ScopedLineState {
139 public:
140   ScopedLineState(UnwrappedLineParser &Parser,
141                   bool SwitchToPreprocessorLines = false)
142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143     if (SwitchToPreprocessorLines)
144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
145     else if (!Parser.Line->Tokens.empty())
146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147     PreBlockLine = std::move(Parser.Line);
148     Parser.Line = std::make_unique<UnwrappedLine>();
149     Parser.Line->Level = PreBlockLine->Level;
150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151   }
152 
153   ~ScopedLineState() {
154     if (!Parser.Line->Tokens.empty()) {
155       Parser.addUnwrappedLine();
156     }
157     assert(Parser.Line->Tokens.empty());
158     Parser.Line = std::move(PreBlockLine);
159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160       Parser.MustBreakBeforeNextToken = true;
161     Parser.CurrentLines = OriginalLines;
162   }
163 
164 private:
165   UnwrappedLineParser &Parser;
166 
167   std::unique_ptr<UnwrappedLine> PreBlockLine;
168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
171 class CompoundStatementIndenter {
172 public:
173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
174                             const FormatStyle &Style, unsigned &LineLevel)
175       : CompoundStatementIndenter(Parser, LineLevel,
176                                   Style.BraceWrapping.AfterControlStatement,
177                                   Style.BraceWrapping.IndentBraces) {}
178   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
179                             bool WrapBrace, bool IndentBrace)
180       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
181     if (WrapBrace)
182       Parser->addUnwrappedLine();
183     if (IndentBrace)
184       ++LineLevel;
185   }
186   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
187 
188 private:
189   unsigned &LineLevel;
190   unsigned OldLineLevel;
191 };
192 
193 namespace {
194 
195 class IndexedTokenSource : public FormatTokenSource {
196 public:
197   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
198       : Tokens(Tokens), Position(-1) {}
199 
200   FormatToken *getNextToken() override {
201     ++Position;
202     return Tokens[Position];
203   }
204 
205   unsigned getPosition() override {
206     assert(Position >= 0);
207     return Position;
208   }
209 
210   FormatToken *setPosition(unsigned P) override {
211     Position = P;
212     return Tokens[Position];
213   }
214 
215   void reset() { Position = -1; }
216 
217 private:
218   ArrayRef<FormatToken *> Tokens;
219   int Position;
220 };
221 
222 } // end anonymous namespace
223 
224 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
225                                          const AdditionalKeywords &Keywords,
226                                          unsigned FirstStartColumn,
227                                          ArrayRef<FormatToken *> Tokens,
228                                          UnwrappedLineConsumer &Callback)
229     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
230       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
231       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
232       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
233       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
234                        ? IG_Rejected
235                        : IG_Inited),
236       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
241                      ? IG_Rejected
242                      : IG_Inited;
243   IncludeGuardToken = nullptr;
244   Line.reset(new UnwrappedLine);
245   CommentsBeforeNextToken.clear();
246   FormatTok = nullptr;
247   MustBreakBeforeNextToken = false;
248   PreprocessorDirectives.clear();
249   CurrentLines = &Lines;
250   DeclarationScopeStack.clear();
251   PPStack.clear();
252   Line->FirstStartColumn = FirstStartColumn;
253 }
254 
255 void UnwrappedLineParser::parse() {
256   IndexedTokenSource TokenSource(AllTokens);
257   Line->FirstStartColumn = FirstStartColumn;
258   do {
259     LLVM_DEBUG(llvm::dbgs() << "----\n");
260     reset();
261     Tokens = &TokenSource;
262     TokenSource.reset();
263 
264     readToken();
265     parseFile();
266 
267     // If we found an include guard then all preprocessor directives (other than
268     // the guard) are over-indented by one.
269     if (IncludeGuard == IG_Found)
270       for (auto &Line : Lines)
271         if (Line.InPPDirective && Line.Level > 0)
272           --Line.Level;
273 
274     // Create line with eof token.
275     pushToken(FormatTok);
276     addUnwrappedLine();
277 
278     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
279                                                   E = Lines.end();
280          I != E; ++I) {
281       Callback.consumeUnwrappedLine(*I);
282     }
283     Callback.finishRun();
284     Lines.clear();
285     while (!PPLevelBranchIndex.empty() &&
286            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
287       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
288       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
289     }
290     if (!PPLevelBranchIndex.empty()) {
291       ++PPLevelBranchIndex.back();
292       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
293       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
294     }
295   } while (!PPLevelBranchIndex.empty());
296 }
297 
298 void UnwrappedLineParser::parseFile() {
299   // The top-level context in a file always has declarations, except for pre-
300   // processor directives and JavaScript files.
301   bool MustBeDeclaration =
302       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
303   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
304                                           MustBeDeclaration);
305   if (Style.Language == FormatStyle::LK_TextProto)
306     parseBracedList();
307   else
308     parseLevel(/*HasOpeningBrace=*/false);
309   // Make sure to format the remaining tokens.
310   //
311   // LK_TextProto is special since its top-level is parsed as the body of a
312   // braced list, which does not necessarily have natural line separators such
313   // as a semicolon. Comments after the last entry that have been determined to
314   // not belong to that line, as in:
315   //   key: value
316   //   // endfile comment
317   // do not have a chance to be put on a line of their own until this point.
318   // Here we add this newline before end-of-file comments.
319   if (Style.Language == FormatStyle::LK_TextProto &&
320       !CommentsBeforeNextToken.empty())
321     addUnwrappedLine();
322   flushComments(true);
323   addUnwrappedLine();
324 }
325 
326 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
327   do {
328     switch (FormatTok->Tok.getKind()) {
329     case tok::l_brace:
330       return;
331     default:
332       if (FormatTok->is(Keywords.kw_where)) {
333         addUnwrappedLine();
334         nextToken();
335         parseCSharpGenericTypeConstraint();
336         break;
337       }
338       nextToken();
339       break;
340     }
341   } while (!eof());
342 }
343 
344 void UnwrappedLineParser::parseCSharpAttribute() {
345   int UnpairedSquareBrackets = 1;
346   do {
347     switch (FormatTok->Tok.getKind()) {
348     case tok::r_square:
349       nextToken();
350       --UnpairedSquareBrackets;
351       if (UnpairedSquareBrackets == 0) {
352         addUnwrappedLine();
353         return;
354       }
355       break;
356     case tok::l_square:
357       ++UnpairedSquareBrackets;
358       nextToken();
359       break;
360     default:
361       nextToken();
362       break;
363     }
364   } while (!eof());
365 }
366 
367 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
368   bool SwitchLabelEncountered = false;
369   do {
370     tok::TokenKind kind = FormatTok->Tok.getKind();
371     if (FormatTok->Type == TT_MacroBlockBegin) {
372       kind = tok::l_brace;
373     } else if (FormatTok->Type == TT_MacroBlockEnd) {
374       kind = tok::r_brace;
375     }
376 
377     switch (kind) {
378     case tok::comment:
379       nextToken();
380       addUnwrappedLine();
381       break;
382     case tok::l_brace:
383       // FIXME: Add parameter whether this can happen - if this happens, we must
384       // be in a non-declaration context.
385       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
386         continue;
387       parseBlock(/*MustBeDeclaration=*/false);
388       addUnwrappedLine();
389       break;
390     case tok::r_brace:
391       if (HasOpeningBrace)
392         return;
393       nextToken();
394       addUnwrappedLine();
395       break;
396     case tok::kw_default: {
397       unsigned StoredPosition = Tokens->getPosition();
398       FormatToken *Next;
399       do {
400         Next = Tokens->getNextToken();
401       } while (Next && Next->is(tok::comment));
402       FormatTok = Tokens->setPosition(StoredPosition);
403       if (Next && Next->isNot(tok::colon)) {
404         // default not followed by ':' is not a case label; treat it like
405         // an identifier.
406         parseStructuralElement();
407         break;
408       }
409       // Else, if it is 'default:', fall through to the case handling.
410       LLVM_FALLTHROUGH;
411     }
412     case tok::kw_case:
413       if (Style.Language == FormatStyle::LK_JavaScript &&
414           Line->MustBeDeclaration) {
415         // A 'case: string' style field declaration.
416         parseStructuralElement();
417         break;
418       }
419       if (!SwitchLabelEncountered &&
420           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
421         ++Line->Level;
422       SwitchLabelEncountered = true;
423       parseStructuralElement();
424       break;
425     case tok::l_square:
426       if (Style.isCSharp()) {
427         nextToken();
428         parseCSharpAttribute();
429         break;
430       }
431       LLVM_FALLTHROUGH;
432     default:
433       parseStructuralElement();
434       break;
435     }
436   } while (!eof());
437 }
438 
439 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
440   // We'll parse forward through the tokens until we hit
441   // a closing brace or eof - note that getNextToken() will
442   // parse macros, so this will magically work inside macro
443   // definitions, too.
444   unsigned StoredPosition = Tokens->getPosition();
445   FormatToken *Tok = FormatTok;
446   const FormatToken *PrevTok = Tok->Previous;
447   // Keep a stack of positions of lbrace tokens. We will
448   // update information about whether an lbrace starts a
449   // braced init list or a different block during the loop.
450   SmallVector<FormatToken *, 8> LBraceStack;
451   assert(Tok->Tok.is(tok::l_brace));
452   do {
453     // Get next non-comment token.
454     FormatToken *NextTok;
455     unsigned ReadTokens = 0;
456     do {
457       NextTok = Tokens->getNextToken();
458       ++ReadTokens;
459     } while (NextTok->is(tok::comment));
460 
461     switch (Tok->Tok.getKind()) {
462     case tok::l_brace:
463       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
464         if (PrevTok->isOneOf(tok::colon, tok::less))
465           // A ':' indicates this code is in a type, or a braced list
466           // following a label in an object literal ({a: {b: 1}}).
467           // A '<' could be an object used in a comparison, but that is nonsense
468           // code (can never return true), so more likely it is a generic type
469           // argument (`X<{a: string; b: number}>`).
470           // The code below could be confused by semicolons between the
471           // individual members in a type member list, which would normally
472           // trigger BK_Block. In both cases, this must be parsed as an inline
473           // braced init.
474           Tok->BlockKind = BK_BracedInit;
475         else if (PrevTok->is(tok::r_paren))
476           // `) { }` can only occur in function or method declarations in JS.
477           Tok->BlockKind = BK_Block;
478       } else {
479         Tok->BlockKind = BK_Unknown;
480       }
481       LBraceStack.push_back(Tok);
482       break;
483     case tok::r_brace:
484       if (LBraceStack.empty())
485         break;
486       if (LBraceStack.back()->BlockKind == BK_Unknown) {
487         bool ProbablyBracedList = false;
488         if (Style.Language == FormatStyle::LK_Proto) {
489           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
490         } else {
491           // Using OriginalColumn to distinguish between ObjC methods and
492           // binary operators is a bit hacky.
493           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
494                                   NextTok->OriginalColumn == 0;
495 
496           // If there is a comma, semicolon or right paren after the closing
497           // brace, we assume this is a braced initializer list.  Note that
498           // regardless how we mark inner braces here, we will overwrite the
499           // BlockKind later if we parse a braced list (where all blocks
500           // inside are by default braced lists), or when we explicitly detect
501           // blocks (for example while parsing lambdas).
502           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
503           // braced list in JS.
504           ProbablyBracedList =
505               (Style.Language == FormatStyle::LK_JavaScript &&
506                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
507                                 Keywords.kw_as)) ||
508               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
509               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
510                                tok::r_paren, tok::r_square, tok::l_brace,
511                                tok::ellipsis) ||
512               (NextTok->is(tok::identifier) &&
513                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
514               (NextTok->is(tok::semi) &&
515                (!ExpectClassBody || LBraceStack.size() != 1)) ||
516               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
517           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
518             // We can have an array subscript after a braced init
519             // list, but C++11 attributes are expected after blocks.
520             NextTok = Tokens->getNextToken();
521             ++ReadTokens;
522             ProbablyBracedList = NextTok->isNot(tok::l_square);
523           }
524         }
525         if (ProbablyBracedList) {
526           Tok->BlockKind = BK_BracedInit;
527           LBraceStack.back()->BlockKind = BK_BracedInit;
528         } else {
529           Tok->BlockKind = BK_Block;
530           LBraceStack.back()->BlockKind = BK_Block;
531         }
532       }
533       LBraceStack.pop_back();
534       break;
535     case tok::identifier:
536       if (!Tok->is(TT_StatementMacro))
537         break;
538       LLVM_FALLTHROUGH;
539     case tok::at:
540     case tok::semi:
541     case tok::kw_if:
542     case tok::kw_while:
543     case tok::kw_for:
544     case tok::kw_switch:
545     case tok::kw_try:
546     case tok::kw___try:
547       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
548         LBraceStack.back()->BlockKind = BK_Block;
549       break;
550     default:
551       break;
552     }
553     PrevTok = Tok;
554     Tok = NextTok;
555   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
556 
557   // Assume other blocks for all unclosed opening braces.
558   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
559     if (LBraceStack[i]->BlockKind == BK_Unknown)
560       LBraceStack[i]->BlockKind = BK_Block;
561   }
562 
563   FormatTok = Tokens->setPosition(StoredPosition);
564 }
565 
566 template <class T>
567 static inline void hash_combine(std::size_t &seed, const T &v) {
568   std::hash<T> hasher;
569   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
570 }
571 
572 size_t UnwrappedLineParser::computePPHash() const {
573   size_t h = 0;
574   for (const auto &i : PPStack) {
575     hash_combine(h, size_t(i.Kind));
576     hash_combine(h, i.Line);
577   }
578   return h;
579 }
580 
581 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
582                                      bool MunchSemi) {
583   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
584          "'{' or macro block token expected");
585   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
586   FormatTok->BlockKind = BK_Block;
587 
588   size_t PPStartHash = computePPHash();
589 
590   unsigned InitialLevel = Line->Level;
591   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
592 
593   if (MacroBlock && FormatTok->is(tok::l_paren))
594     parseParens();
595 
596   size_t NbPreprocessorDirectives =
597       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
598   addUnwrappedLine();
599   size_t OpeningLineIndex =
600       CurrentLines->empty()
601           ? (UnwrappedLine::kInvalidIndex)
602           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
603 
604   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
605                                           MustBeDeclaration);
606   if (AddLevel)
607     ++Line->Level;
608   parseLevel(/*HasOpeningBrace=*/true);
609 
610   if (eof())
611     return;
612 
613   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
614                  : !FormatTok->is(tok::r_brace)) {
615     Line->Level = InitialLevel;
616     FormatTok->BlockKind = BK_Block;
617     return;
618   }
619 
620   size_t PPEndHash = computePPHash();
621 
622   // Munch the closing brace.
623   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
624 
625   if (MacroBlock && FormatTok->is(tok::l_paren))
626     parseParens();
627 
628   if (MunchSemi && FormatTok->Tok.is(tok::semi))
629     nextToken();
630   Line->Level = InitialLevel;
631 
632   if (PPStartHash == PPEndHash) {
633     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
634     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
635       // Update the opening line to add the forward reference as well
636       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
637           CurrentLines->size() - 1;
638     }
639   }
640 }
641 
642 static bool isGoogScope(const UnwrappedLine &Line) {
643   // FIXME: Closure-library specific stuff should not be hard-coded but be
644   // configurable.
645   if (Line.Tokens.size() < 4)
646     return false;
647   auto I = Line.Tokens.begin();
648   if (I->Tok->TokenText != "goog")
649     return false;
650   ++I;
651   if (I->Tok->isNot(tok::period))
652     return false;
653   ++I;
654   if (I->Tok->TokenText != "scope")
655     return false;
656   ++I;
657   return I->Tok->is(tok::l_paren);
658 }
659 
660 static bool isIIFE(const UnwrappedLine &Line,
661                    const AdditionalKeywords &Keywords) {
662   // Look for the start of an immediately invoked anonymous function.
663   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
664   // This is commonly done in JavaScript to create a new, anonymous scope.
665   // Example: (function() { ... })()
666   if (Line.Tokens.size() < 3)
667     return false;
668   auto I = Line.Tokens.begin();
669   if (I->Tok->isNot(tok::l_paren))
670     return false;
671   ++I;
672   if (I->Tok->isNot(Keywords.kw_function))
673     return false;
674   ++I;
675   return I->Tok->is(tok::l_paren);
676 }
677 
678 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
679                                    const FormatToken &InitialToken) {
680   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
681     return Style.BraceWrapping.AfterNamespace;
682   if (InitialToken.is(tok::kw_class))
683     return Style.BraceWrapping.AfterClass;
684   if (InitialToken.is(tok::kw_union))
685     return Style.BraceWrapping.AfterUnion;
686   if (InitialToken.is(tok::kw_struct))
687     return Style.BraceWrapping.AfterStruct;
688   return false;
689 }
690 
691 void UnwrappedLineParser::parseChildBlock() {
692   FormatTok->BlockKind = BK_Block;
693   nextToken();
694   {
695     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
696                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
697     ScopedLineState LineState(*this);
698     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
699                                             /*MustBeDeclaration=*/false);
700     Line->Level += SkipIndent ? 0 : 1;
701     parseLevel(/*HasOpeningBrace=*/true);
702     flushComments(isOnNewLine(*FormatTok));
703     Line->Level -= SkipIndent ? 0 : 1;
704   }
705   nextToken();
706 }
707 
708 void UnwrappedLineParser::parsePPDirective() {
709   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
710   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
711 
712   nextToken();
713 
714   if (!FormatTok->Tok.getIdentifierInfo()) {
715     parsePPUnknown();
716     return;
717   }
718 
719   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
720   case tok::pp_define:
721     parsePPDefine();
722     return;
723   case tok::pp_if:
724     parsePPIf(/*IfDef=*/false);
725     break;
726   case tok::pp_ifdef:
727   case tok::pp_ifndef:
728     parsePPIf(/*IfDef=*/true);
729     break;
730   case tok::pp_else:
731     parsePPElse();
732     break;
733   case tok::pp_elif:
734     parsePPElIf();
735     break;
736   case tok::pp_endif:
737     parsePPEndIf();
738     break;
739   default:
740     parsePPUnknown();
741     break;
742   }
743 }
744 
745 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
746   size_t Line = CurrentLines->size();
747   if (CurrentLines == &PreprocessorDirectives)
748     Line += Lines.size();
749 
750   if (Unreachable ||
751       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
752     PPStack.push_back({PP_Unreachable, Line});
753   else
754     PPStack.push_back({PP_Conditional, Line});
755 }
756 
757 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
758   ++PPBranchLevel;
759   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
760   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
761     PPLevelBranchIndex.push_back(0);
762     PPLevelBranchCount.push_back(0);
763   }
764   PPChainBranchIndex.push(0);
765   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
766   conditionalCompilationCondition(Unreachable || Skip);
767 }
768 
769 void UnwrappedLineParser::conditionalCompilationAlternative() {
770   if (!PPStack.empty())
771     PPStack.pop_back();
772   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
773   if (!PPChainBranchIndex.empty())
774     ++PPChainBranchIndex.top();
775   conditionalCompilationCondition(
776       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
777       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
778 }
779 
780 void UnwrappedLineParser::conditionalCompilationEnd() {
781   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
782   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
783     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
784       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
785     }
786   }
787   // Guard against #endif's without #if.
788   if (PPBranchLevel > -1)
789     --PPBranchLevel;
790   if (!PPChainBranchIndex.empty())
791     PPChainBranchIndex.pop();
792   if (!PPStack.empty())
793     PPStack.pop_back();
794 }
795 
796 void UnwrappedLineParser::parsePPIf(bool IfDef) {
797   bool IfNDef = FormatTok->is(tok::pp_ifndef);
798   nextToken();
799   bool Unreachable = false;
800   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
801     Unreachable = true;
802   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
803     Unreachable = true;
804   conditionalCompilationStart(Unreachable);
805   FormatToken *IfCondition = FormatTok;
806   // If there's a #ifndef on the first line, and the only lines before it are
807   // comments, it could be an include guard.
808   bool MaybeIncludeGuard = IfNDef;
809   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
810     for (auto &Line : Lines) {
811       if (!Line.Tokens.front().Tok->is(tok::comment)) {
812         MaybeIncludeGuard = false;
813         IncludeGuard = IG_Rejected;
814         break;
815       }
816     }
817   --PPBranchLevel;
818   parsePPUnknown();
819   ++PPBranchLevel;
820   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
821     IncludeGuard = IG_IfNdefed;
822     IncludeGuardToken = IfCondition;
823   }
824 }
825 
826 void UnwrappedLineParser::parsePPElse() {
827   // If a potential include guard has an #else, it's not an include guard.
828   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
829     IncludeGuard = IG_Rejected;
830   conditionalCompilationAlternative();
831   if (PPBranchLevel > -1)
832     --PPBranchLevel;
833   parsePPUnknown();
834   ++PPBranchLevel;
835 }
836 
837 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
838 
839 void UnwrappedLineParser::parsePPEndIf() {
840   conditionalCompilationEnd();
841   parsePPUnknown();
842   // If the #endif of a potential include guard is the last thing in the file,
843   // then we found an include guard.
844   unsigned TokenPosition = Tokens->getPosition();
845   FormatToken *PeekNext = AllTokens[TokenPosition];
846   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
847       PeekNext->is(tok::eof) &&
848       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
849     IncludeGuard = IG_Found;
850 }
851 
852 void UnwrappedLineParser::parsePPDefine() {
853   nextToken();
854 
855   if (!FormatTok->Tok.getIdentifierInfo()) {
856     IncludeGuard = IG_Rejected;
857     IncludeGuardToken = nullptr;
858     parsePPUnknown();
859     return;
860   }
861 
862   if (IncludeGuard == IG_IfNdefed &&
863       IncludeGuardToken->TokenText == FormatTok->TokenText) {
864     IncludeGuard = IG_Defined;
865     IncludeGuardToken = nullptr;
866     for (auto &Line : Lines) {
867       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
868         IncludeGuard = IG_Rejected;
869         break;
870       }
871     }
872   }
873 
874   nextToken();
875   if (FormatTok->Tok.getKind() == tok::l_paren &&
876       FormatTok->WhitespaceRange.getBegin() ==
877           FormatTok->WhitespaceRange.getEnd()) {
878     parseParens();
879   }
880   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
881     Line->Level += PPBranchLevel + 1;
882   addUnwrappedLine();
883   ++Line->Level;
884 
885   // Errors during a preprocessor directive can only affect the layout of the
886   // preprocessor directive, and thus we ignore them. An alternative approach
887   // would be to use the same approach we use on the file level (no
888   // re-indentation if there was a structural error) within the macro
889   // definition.
890   parseFile();
891 }
892 
893 void UnwrappedLineParser::parsePPUnknown() {
894   do {
895     nextToken();
896   } while (!eof());
897   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
898     Line->Level += PPBranchLevel + 1;
899   addUnwrappedLine();
900 }
901 
902 // Here we blacklist certain tokens that are not usually the first token in an
903 // unwrapped line. This is used in attempt to distinguish macro calls without
904 // trailing semicolons from other constructs split to several lines.
905 static bool tokenCanStartNewLine(const clang::Token &Tok) {
906   // Semicolon can be a null-statement, l_square can be a start of a macro or
907   // a C++11 attribute, but this doesn't seem to be common.
908   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
909          Tok.isNot(tok::l_square) &&
910          // Tokens that can only be used as binary operators and a part of
911          // overloaded operator names.
912          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
913          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
914          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
915          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
916          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
917          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
918          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
919          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
920          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
921          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
922          Tok.isNot(tok::lesslessequal) &&
923          // Colon is used in labels, base class lists, initializer lists,
924          // range-based for loops, ternary operator, but should never be the
925          // first token in an unwrapped line.
926          Tok.isNot(tok::colon) &&
927          // 'noexcept' is a trailing annotation.
928          Tok.isNot(tok::kw_noexcept);
929 }
930 
931 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
932                           const FormatToken *FormatTok) {
933   // FIXME: This returns true for C/C++ keywords like 'struct'.
934   return FormatTok->is(tok::identifier) &&
935          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
936           !FormatTok->isOneOf(
937               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
938               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
939               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
940               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
941               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
942               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
943               Keywords.kw_from));
944 }
945 
946 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
947                                  const FormatToken *FormatTok) {
948   return FormatTok->Tok.isLiteral() ||
949          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
950          mustBeJSIdent(Keywords, FormatTok);
951 }
952 
953 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
954 // when encountered after a value (see mustBeJSIdentOrValue).
955 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
956                            const FormatToken *FormatTok) {
957   return FormatTok->isOneOf(
958       tok::kw_return, Keywords.kw_yield,
959       // conditionals
960       tok::kw_if, tok::kw_else,
961       // loops
962       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
963       // switch/case
964       tok::kw_switch, tok::kw_case,
965       // exceptions
966       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
967       // declaration
968       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
969       Keywords.kw_async, Keywords.kw_function,
970       // import/export
971       Keywords.kw_import, tok::kw_export);
972 }
973 
974 // readTokenWithJavaScriptASI reads the next token and terminates the current
975 // line if JavaScript Automatic Semicolon Insertion must
976 // happen between the current token and the next token.
977 //
978 // This method is conservative - it cannot cover all edge cases of JavaScript,
979 // but only aims to correctly handle certain well known cases. It *must not*
980 // return true in speculative cases.
981 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
982   FormatToken *Previous = FormatTok;
983   readToken();
984   FormatToken *Next = FormatTok;
985 
986   bool IsOnSameLine =
987       CommentsBeforeNextToken.empty()
988           ? Next->NewlinesBefore == 0
989           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
990   if (IsOnSameLine)
991     return;
992 
993   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
994   bool PreviousStartsTemplateExpr =
995       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
996   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
997     // If the line contains an '@' sign, the previous token might be an
998     // annotation, which can precede another identifier/value.
999     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1000                               [](UnwrappedLineNode &LineNode) {
1001                                 return LineNode.Tok->is(tok::at);
1002                               }) != Line->Tokens.end();
1003     if (HasAt)
1004       return;
1005   }
1006   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1007     return addUnwrappedLine();
1008   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1009   bool NextEndsTemplateExpr =
1010       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1011   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1012       (PreviousMustBeValue ||
1013        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1014                          tok::minusminus)))
1015     return addUnwrappedLine();
1016   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1017       isJSDeclOrStmt(Keywords, Next))
1018     return addUnwrappedLine();
1019 }
1020 
1021 void UnwrappedLineParser::parseStructuralElement() {
1022   assert(!FormatTok->is(tok::l_brace));
1023   if (Style.Language == FormatStyle::LK_TableGen &&
1024       FormatTok->is(tok::pp_include)) {
1025     nextToken();
1026     if (FormatTok->is(tok::string_literal))
1027       nextToken();
1028     addUnwrappedLine();
1029     return;
1030   }
1031   switch (FormatTok->Tok.getKind()) {
1032   case tok::kw_asm:
1033     nextToken();
1034     if (FormatTok->is(tok::l_brace)) {
1035       FormatTok->Type = TT_InlineASMBrace;
1036       nextToken();
1037       while (FormatTok && FormatTok->isNot(tok::eof)) {
1038         if (FormatTok->is(tok::r_brace)) {
1039           FormatTok->Type = TT_InlineASMBrace;
1040           nextToken();
1041           addUnwrappedLine();
1042           break;
1043         }
1044         FormatTok->Finalized = true;
1045         nextToken();
1046       }
1047     }
1048     break;
1049   case tok::kw_namespace:
1050     parseNamespace();
1051     return;
1052   case tok::kw_public:
1053   case tok::kw_protected:
1054   case tok::kw_private:
1055     if (Style.Language == FormatStyle::LK_Java ||
1056         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1057       nextToken();
1058     else
1059       parseAccessSpecifier();
1060     return;
1061   case tok::kw_if:
1062     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1063       // field/method declaration.
1064       break;
1065     parseIfThenElse();
1066     return;
1067   case tok::kw_for:
1068   case tok::kw_while:
1069     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1070       // field/method declaration.
1071       break;
1072     parseForOrWhileLoop();
1073     return;
1074   case tok::kw_do:
1075     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1076       // field/method declaration.
1077       break;
1078     parseDoWhile();
1079     return;
1080   case tok::kw_switch:
1081     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1082       // 'switch: string' field declaration.
1083       break;
1084     parseSwitch();
1085     return;
1086   case tok::kw_default:
1087     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1088       // 'default: string' field declaration.
1089       break;
1090     nextToken();
1091     if (FormatTok->is(tok::colon)) {
1092       parseLabel();
1093       return;
1094     }
1095     // e.g. "default void f() {}" in a Java interface.
1096     break;
1097   case tok::kw_case:
1098     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1099       // 'case: string' field declaration.
1100       break;
1101     parseCaseLabel();
1102     return;
1103   case tok::kw_try:
1104   case tok::kw___try:
1105     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1106       // field/method declaration.
1107       break;
1108     parseTryCatch();
1109     return;
1110   case tok::kw_extern:
1111     nextToken();
1112     if (FormatTok->Tok.is(tok::string_literal)) {
1113       nextToken();
1114       if (FormatTok->Tok.is(tok::l_brace)) {
1115         if (Style.BraceWrapping.AfterExternBlock) {
1116           addUnwrappedLine();
1117           parseBlock(/*MustBeDeclaration=*/true);
1118         } else {
1119           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1120         }
1121         addUnwrappedLine();
1122         return;
1123       }
1124     }
1125     break;
1126   case tok::kw_export:
1127     if (Style.Language == FormatStyle::LK_JavaScript) {
1128       parseJavaScriptEs6ImportExport();
1129       return;
1130     }
1131     if (!Style.isCpp())
1132       break;
1133     // Handle C++ "(inline|export) namespace".
1134     LLVM_FALLTHROUGH;
1135   case tok::kw_inline:
1136     nextToken();
1137     if (FormatTok->Tok.is(tok::kw_namespace)) {
1138       parseNamespace();
1139       return;
1140     }
1141     break;
1142   case tok::identifier:
1143     if (FormatTok->is(TT_ForEachMacro)) {
1144       parseForOrWhileLoop();
1145       return;
1146     }
1147     if (FormatTok->is(TT_MacroBlockBegin)) {
1148       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1149                  /*MunchSemi=*/false);
1150       return;
1151     }
1152     if (FormatTok->is(Keywords.kw_import)) {
1153       if (Style.Language == FormatStyle::LK_JavaScript) {
1154         parseJavaScriptEs6ImportExport();
1155         return;
1156       }
1157       if (Style.Language == FormatStyle::LK_Proto) {
1158         nextToken();
1159         if (FormatTok->is(tok::kw_public))
1160           nextToken();
1161         if (!FormatTok->is(tok::string_literal))
1162           return;
1163         nextToken();
1164         if (FormatTok->is(tok::semi))
1165           nextToken();
1166         addUnwrappedLine();
1167         return;
1168       }
1169     }
1170     if (Style.isCpp() &&
1171         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1172                            Keywords.kw_slots, Keywords.kw_qslots)) {
1173       nextToken();
1174       if (FormatTok->is(tok::colon)) {
1175         nextToken();
1176         addUnwrappedLine();
1177         return;
1178       }
1179     }
1180     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1181       parseStatementMacro();
1182       return;
1183     }
1184     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1185       parseNamespace();
1186       return;
1187     }
1188     // In all other cases, parse the declaration.
1189     break;
1190   default:
1191     break;
1192   }
1193   do {
1194     const FormatToken *Previous = FormatTok->Previous;
1195     switch (FormatTok->Tok.getKind()) {
1196     case tok::at:
1197       nextToken();
1198       if (FormatTok->Tok.is(tok::l_brace)) {
1199         nextToken();
1200         parseBracedList();
1201         break;
1202       } else if (Style.Language == FormatStyle::LK_Java &&
1203                  FormatTok->is(Keywords.kw_interface)) {
1204         nextToken();
1205         break;
1206       }
1207       switch (FormatTok->Tok.getObjCKeywordID()) {
1208       case tok::objc_public:
1209       case tok::objc_protected:
1210       case tok::objc_package:
1211       case tok::objc_private:
1212         return parseAccessSpecifier();
1213       case tok::objc_interface:
1214       case tok::objc_implementation:
1215         return parseObjCInterfaceOrImplementation();
1216       case tok::objc_protocol:
1217         if (parseObjCProtocol())
1218           return;
1219         break;
1220       case tok::objc_end:
1221         return; // Handled by the caller.
1222       case tok::objc_optional:
1223       case tok::objc_required:
1224         nextToken();
1225         addUnwrappedLine();
1226         return;
1227       case tok::objc_autoreleasepool:
1228         nextToken();
1229         if (FormatTok->Tok.is(tok::l_brace)) {
1230           if (Style.BraceWrapping.AfterControlStatement ==
1231               FormatStyle::BWACS_Always)
1232             addUnwrappedLine();
1233           parseBlock(/*MustBeDeclaration=*/false);
1234         }
1235         addUnwrappedLine();
1236         return;
1237       case tok::objc_synchronized:
1238         nextToken();
1239         if (FormatTok->Tok.is(tok::l_paren))
1240           // Skip synchronization object
1241           parseParens();
1242         if (FormatTok->Tok.is(tok::l_brace)) {
1243           if (Style.BraceWrapping.AfterControlStatement ==
1244               FormatStyle::BWACS_Always)
1245             addUnwrappedLine();
1246           parseBlock(/*MustBeDeclaration=*/false);
1247         }
1248         addUnwrappedLine();
1249         return;
1250       case tok::objc_try:
1251         // This branch isn't strictly necessary (the kw_try case below would
1252         // do this too after the tok::at is parsed above).  But be explicit.
1253         parseTryCatch();
1254         return;
1255       default:
1256         break;
1257       }
1258       break;
1259     case tok::kw_enum:
1260       // Ignore if this is part of "template <enum ...".
1261       if (Previous && Previous->is(tok::less)) {
1262         nextToken();
1263         break;
1264       }
1265 
1266       // parseEnum falls through and does not yet add an unwrapped line as an
1267       // enum definition can start a structural element.
1268       if (!parseEnum())
1269         break;
1270       // This only applies for C++.
1271       if (!Style.isCpp()) {
1272         addUnwrappedLine();
1273         return;
1274       }
1275       break;
1276     case tok::kw_typedef:
1277       nextToken();
1278       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1279                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1280                              Keywords.kw_CF_CLOSED_ENUM,
1281                              Keywords.kw_NS_CLOSED_ENUM))
1282         parseEnum();
1283       break;
1284     case tok::kw_struct:
1285     case tok::kw_union:
1286     case tok::kw_class:
1287       // parseRecord falls through and does not yet add an unwrapped line as a
1288       // record declaration or definition can start a structural element.
1289       parseRecord();
1290       // This does not apply for Java, JavaScript and C#.
1291       if (Style.Language == FormatStyle::LK_Java ||
1292           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1293         if (FormatTok->is(tok::semi))
1294           nextToken();
1295         addUnwrappedLine();
1296         return;
1297       }
1298       break;
1299     case tok::period:
1300       nextToken();
1301       // In Java, classes have an implicit static member "class".
1302       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1303           FormatTok->is(tok::kw_class))
1304         nextToken();
1305       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1306           FormatTok->Tok.getIdentifierInfo())
1307         // JavaScript only has pseudo keywords, all keywords are allowed to
1308         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1309         nextToken();
1310       break;
1311     case tok::semi:
1312       nextToken();
1313       addUnwrappedLine();
1314       return;
1315     case tok::r_brace:
1316       addUnwrappedLine();
1317       return;
1318     case tok::l_paren:
1319       parseParens();
1320       break;
1321     case tok::kw_operator:
1322       nextToken();
1323       if (FormatTok->isBinaryOperator())
1324         nextToken();
1325       break;
1326     case tok::caret:
1327       nextToken();
1328       if (FormatTok->Tok.isAnyIdentifier() ||
1329           FormatTok->isSimpleTypeSpecifier())
1330         nextToken();
1331       if (FormatTok->is(tok::l_paren))
1332         parseParens();
1333       if (FormatTok->is(tok::l_brace))
1334         parseChildBlock();
1335       break;
1336     case tok::l_brace:
1337       if (!tryToParseBracedList()) {
1338         // A block outside of parentheses must be the last part of a
1339         // structural element.
1340         // FIXME: Figure out cases where this is not true, and add projections
1341         // for them (the one we know is missing are lambdas).
1342         if (Style.BraceWrapping.AfterFunction)
1343           addUnwrappedLine();
1344         FormatTok->Type = TT_FunctionLBrace;
1345         parseBlock(/*MustBeDeclaration=*/false);
1346         addUnwrappedLine();
1347         return;
1348       }
1349       // Otherwise this was a braced init list, and the structural
1350       // element continues.
1351       break;
1352     case tok::kw_try:
1353       if (Style.Language == FormatStyle::LK_JavaScript &&
1354           Line->MustBeDeclaration) {
1355         // field/method declaration.
1356         nextToken();
1357         break;
1358       }
1359       // We arrive here when parsing function-try blocks.
1360       if (Style.BraceWrapping.AfterFunction)
1361         addUnwrappedLine();
1362       parseTryCatch();
1363       return;
1364     case tok::identifier: {
1365       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1366           Line->MustBeDeclaration) {
1367         addUnwrappedLine();
1368         parseCSharpGenericTypeConstraint();
1369         break;
1370       }
1371       if (FormatTok->is(TT_MacroBlockEnd)) {
1372         addUnwrappedLine();
1373         return;
1374       }
1375 
1376       // Function declarations (as opposed to function expressions) are parsed
1377       // on their own unwrapped line by continuing this loop. Function
1378       // expressions (functions that are not on their own line) must not create
1379       // a new unwrapped line, so they are special cased below.
1380       size_t TokenCount = Line->Tokens.size();
1381       if (Style.Language == FormatStyle::LK_JavaScript &&
1382           FormatTok->is(Keywords.kw_function) &&
1383           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1384                                                      Keywords.kw_async)))) {
1385         tryToParseJSFunction();
1386         break;
1387       }
1388       if ((Style.Language == FormatStyle::LK_JavaScript ||
1389            Style.Language == FormatStyle::LK_Java) &&
1390           FormatTok->is(Keywords.kw_interface)) {
1391         if (Style.Language == FormatStyle::LK_JavaScript) {
1392           // In JavaScript/TypeScript, "interface" can be used as a standalone
1393           // identifier, e.g. in `var interface = 1;`. If "interface" is
1394           // followed by another identifier, it is very like to be an actual
1395           // interface declaration.
1396           unsigned StoredPosition = Tokens->getPosition();
1397           FormatToken *Next = Tokens->getNextToken();
1398           FormatTok = Tokens->setPosition(StoredPosition);
1399           if (Next && !mustBeJSIdent(Keywords, Next)) {
1400             nextToken();
1401             break;
1402           }
1403         }
1404         parseRecord();
1405         addUnwrappedLine();
1406         return;
1407       }
1408 
1409       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1410         parseStatementMacro();
1411         return;
1412       }
1413 
1414       // See if the following token should start a new unwrapped line.
1415       StringRef Text = FormatTok->TokenText;
1416       nextToken();
1417 
1418       // JS doesn't have macros, and within classes colons indicate fields, not
1419       // labels.
1420       if (Style.Language == FormatStyle::LK_JavaScript)
1421         break;
1422 
1423       TokenCount = Line->Tokens.size();
1424       if (TokenCount == 1 ||
1425           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1426         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1427           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1428           parseLabel(!Style.IndentGotoLabels);
1429           return;
1430         }
1431         // Recognize function-like macro usages without trailing semicolon as
1432         // well as free-standing macros like Q_OBJECT.
1433         bool FunctionLike = FormatTok->is(tok::l_paren);
1434         if (FunctionLike)
1435           parseParens();
1436 
1437         bool FollowedByNewline =
1438             CommentsBeforeNextToken.empty()
1439                 ? FormatTok->NewlinesBefore > 0
1440                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1441 
1442         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1443             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1444           addUnwrappedLine();
1445           return;
1446         }
1447       }
1448       break;
1449     }
1450     case tok::equal:
1451       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1452       // TT_JsFatArrow. The always start an expression or a child block if
1453       // followed by a curly.
1454       if (FormatTok->is(TT_JsFatArrow)) {
1455         nextToken();
1456         if (FormatTok->is(tok::l_brace))
1457           parseChildBlock();
1458         break;
1459       }
1460 
1461       nextToken();
1462       if (FormatTok->Tok.is(tok::l_brace)) {
1463         // Block kind should probably be set to BK_BracedInit for any language.
1464         // C# needs this change to ensure that array initialisers and object
1465         // initialisers are indented the same way.
1466         if (Style.isCSharp())
1467           FormatTok->BlockKind = BK_BracedInit;
1468         nextToken();
1469         parseBracedList();
1470       } else if (Style.Language == FormatStyle::LK_Proto &&
1471                  FormatTok->Tok.is(tok::less)) {
1472         nextToken();
1473         parseBracedList(/*ContinueOnSemicolons=*/false,
1474                         /*ClosingBraceKind=*/tok::greater);
1475       }
1476       break;
1477     case tok::l_square:
1478       parseSquare();
1479       break;
1480     case tok::kw_new:
1481       parseNew();
1482       break;
1483     default:
1484       nextToken();
1485       break;
1486     }
1487   } while (!eof());
1488 }
1489 
1490 bool UnwrappedLineParser::tryToParseLambda() {
1491   if (!Style.isCpp()) {
1492     nextToken();
1493     return false;
1494   }
1495   assert(FormatTok->is(tok::l_square));
1496   FormatToken &LSquare = *FormatTok;
1497   if (!tryToParseLambdaIntroducer())
1498     return false;
1499 
1500   bool SeenArrow = false;
1501 
1502   while (FormatTok->isNot(tok::l_brace)) {
1503     if (FormatTok->isSimpleTypeSpecifier()) {
1504       nextToken();
1505       continue;
1506     }
1507     switch (FormatTok->Tok.getKind()) {
1508     case tok::l_brace:
1509       break;
1510     case tok::l_paren:
1511       parseParens();
1512       break;
1513     case tok::amp:
1514     case tok::star:
1515     case tok::kw_const:
1516     case tok::comma:
1517     case tok::less:
1518     case tok::greater:
1519     case tok::identifier:
1520     case tok::numeric_constant:
1521     case tok::coloncolon:
1522     case tok::kw_class:
1523     case tok::kw_mutable:
1524     case tok::kw_noexcept:
1525     case tok::kw_template:
1526     case tok::kw_typename:
1527       nextToken();
1528       break;
1529     // Specialization of a template with an integer parameter can contain
1530     // arithmetic, logical, comparison and ternary operators.
1531     //
1532     // FIXME: This also accepts sequences of operators that are not in the scope
1533     // of a template argument list.
1534     //
1535     // In a C++ lambda a template type can only occur after an arrow. We use
1536     // this as an heuristic to distinguish between Objective-C expressions
1537     // followed by an `a->b` expression, such as:
1538     // ([obj func:arg] + a->b)
1539     // Otherwise the code below would parse as a lambda.
1540     //
1541     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1542     // explicit template lists: []<bool b = true && false>(U &&u){}
1543     case tok::plus:
1544     case tok::minus:
1545     case tok::exclaim:
1546     case tok::tilde:
1547     case tok::slash:
1548     case tok::percent:
1549     case tok::lessless:
1550     case tok::pipe:
1551     case tok::pipepipe:
1552     case tok::ampamp:
1553     case tok::caret:
1554     case tok::equalequal:
1555     case tok::exclaimequal:
1556     case tok::greaterequal:
1557     case tok::lessequal:
1558     case tok::question:
1559     case tok::colon:
1560     case tok::kw_true:
1561     case tok::kw_false:
1562       if (SeenArrow) {
1563         nextToken();
1564         break;
1565       }
1566       return true;
1567     case tok::arrow:
1568       // This might or might not actually be a lambda arrow (this could be an
1569       // ObjC method invocation followed by a dereferencing arrow). We might
1570       // reset this back to TT_Unknown in TokenAnnotator.
1571       FormatTok->Type = TT_LambdaArrow;
1572       SeenArrow = true;
1573       nextToken();
1574       break;
1575     default:
1576       return true;
1577     }
1578   }
1579   FormatTok->Type = TT_LambdaLBrace;
1580   LSquare.Type = TT_LambdaLSquare;
1581   parseChildBlock();
1582   return true;
1583 }
1584 
1585 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1586   const FormatToken *Previous = FormatTok->Previous;
1587   if (Previous &&
1588       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1589                          tok::kw_delete, tok::l_square) ||
1590        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1591        Previous->isSimpleTypeSpecifier())) {
1592     nextToken();
1593     return false;
1594   }
1595   nextToken();
1596   if (FormatTok->is(tok::l_square)) {
1597     return false;
1598   }
1599   parseSquare(/*LambdaIntroducer=*/true);
1600   return true;
1601 }
1602 
1603 void UnwrappedLineParser::tryToParseJSFunction() {
1604   assert(FormatTok->is(Keywords.kw_function) ||
1605          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1606   if (FormatTok->is(Keywords.kw_async))
1607     nextToken();
1608   // Consume "function".
1609   nextToken();
1610 
1611   // Consume * (generator function). Treat it like C++'s overloaded operators.
1612   if (FormatTok->is(tok::star)) {
1613     FormatTok->Type = TT_OverloadedOperator;
1614     nextToken();
1615   }
1616 
1617   // Consume function name.
1618   if (FormatTok->is(tok::identifier))
1619     nextToken();
1620 
1621   if (FormatTok->isNot(tok::l_paren))
1622     return;
1623 
1624   // Parse formal parameter list.
1625   parseParens();
1626 
1627   if (FormatTok->is(tok::colon)) {
1628     // Parse a type definition.
1629     nextToken();
1630 
1631     // Eat the type declaration. For braced inline object types, balance braces,
1632     // otherwise just parse until finding an l_brace for the function body.
1633     if (FormatTok->is(tok::l_brace))
1634       tryToParseBracedList();
1635     else
1636       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1637         nextToken();
1638   }
1639 
1640   if (FormatTok->is(tok::semi))
1641     return;
1642 
1643   parseChildBlock();
1644 }
1645 
1646 bool UnwrappedLineParser::tryToParseBracedList() {
1647   if (FormatTok->BlockKind == BK_Unknown)
1648     calculateBraceTypes();
1649   assert(FormatTok->BlockKind != BK_Unknown);
1650   if (FormatTok->BlockKind == BK_Block)
1651     return false;
1652   nextToken();
1653   parseBracedList();
1654   return true;
1655 }
1656 
1657 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1658                                           tok::TokenKind ClosingBraceKind) {
1659   bool HasError = false;
1660 
1661   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1662   // replace this by using parseAssigmentExpression() inside.
1663   do {
1664     if (Style.isCSharp()) {
1665       if (FormatTok->is(TT_JsFatArrow)) {
1666         nextToken();
1667         // Fat arrows can be followed by simple expressions or by child blocks
1668         // in curly braces.
1669         if (FormatTok->is(tok::l_brace)) {
1670           parseChildBlock();
1671           continue;
1672         }
1673       }
1674     }
1675     if (Style.Language == FormatStyle::LK_JavaScript) {
1676       if (FormatTok->is(Keywords.kw_function) ||
1677           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1678         tryToParseJSFunction();
1679         continue;
1680       }
1681       if (FormatTok->is(TT_JsFatArrow)) {
1682         nextToken();
1683         // Fat arrows can be followed by simple expressions or by child blocks
1684         // in curly braces.
1685         if (FormatTok->is(tok::l_brace)) {
1686           parseChildBlock();
1687           continue;
1688         }
1689       }
1690       if (FormatTok->is(tok::l_brace)) {
1691         // Could be a method inside of a braced list `{a() { return 1; }}`.
1692         if (tryToParseBracedList())
1693           continue;
1694         parseChildBlock();
1695       }
1696     }
1697     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1698       nextToken();
1699       return !HasError;
1700     }
1701     switch (FormatTok->Tok.getKind()) {
1702     case tok::caret:
1703       nextToken();
1704       if (FormatTok->is(tok::l_brace)) {
1705         parseChildBlock();
1706       }
1707       break;
1708     case tok::l_square:
1709       if (Style.isCSharp())
1710         parseSquare();
1711       else
1712         tryToParseLambda();
1713       break;
1714     case tok::l_paren:
1715       parseParens();
1716       // JavaScript can just have free standing methods and getters/setters in
1717       // object literals. Detect them by a "{" following ")".
1718       if (Style.Language == FormatStyle::LK_JavaScript) {
1719         if (FormatTok->is(tok::l_brace))
1720           parseChildBlock();
1721         break;
1722       }
1723       break;
1724     case tok::l_brace:
1725       // Assume there are no blocks inside a braced init list apart
1726       // from the ones we explicitly parse out (like lambdas).
1727       FormatTok->BlockKind = BK_BracedInit;
1728       nextToken();
1729       parseBracedList();
1730       break;
1731     case tok::less:
1732       if (Style.Language == FormatStyle::LK_Proto) {
1733         nextToken();
1734         parseBracedList(/*ContinueOnSemicolons=*/false,
1735                         /*ClosingBraceKind=*/tok::greater);
1736       } else {
1737         nextToken();
1738       }
1739       break;
1740     case tok::semi:
1741       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1742       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1743       // used for error recovery if we have otherwise determined that this is
1744       // a braced list.
1745       if (Style.Language == FormatStyle::LK_JavaScript) {
1746         nextToken();
1747         break;
1748       }
1749       HasError = true;
1750       if (!ContinueOnSemicolons)
1751         return !HasError;
1752       nextToken();
1753       break;
1754     case tok::comma:
1755       nextToken();
1756       break;
1757     default:
1758       nextToken();
1759       break;
1760     }
1761   } while (!eof());
1762   return false;
1763 }
1764 
1765 void UnwrappedLineParser::parseParens() {
1766   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1767   nextToken();
1768   do {
1769     switch (FormatTok->Tok.getKind()) {
1770     case tok::l_paren:
1771       parseParens();
1772       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1773         parseChildBlock();
1774       break;
1775     case tok::r_paren:
1776       nextToken();
1777       return;
1778     case tok::r_brace:
1779       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1780       return;
1781     case tok::l_square:
1782       tryToParseLambda();
1783       break;
1784     case tok::l_brace:
1785       if (!tryToParseBracedList())
1786         parseChildBlock();
1787       break;
1788     case tok::at:
1789       nextToken();
1790       if (FormatTok->Tok.is(tok::l_brace)) {
1791         nextToken();
1792         parseBracedList();
1793       }
1794       break;
1795     case tok::kw_class:
1796       if (Style.Language == FormatStyle::LK_JavaScript)
1797         parseRecord(/*ParseAsExpr=*/true);
1798       else
1799         nextToken();
1800       break;
1801     case tok::identifier:
1802       if (Style.Language == FormatStyle::LK_JavaScript &&
1803           (FormatTok->is(Keywords.kw_function) ||
1804            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1805         tryToParseJSFunction();
1806       else
1807         nextToken();
1808       break;
1809     default:
1810       nextToken();
1811       break;
1812     }
1813   } while (!eof());
1814 }
1815 
1816 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1817   if (!LambdaIntroducer) {
1818     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1819     if (tryToParseLambda())
1820       return;
1821   }
1822   do {
1823     switch (FormatTok->Tok.getKind()) {
1824     case tok::l_paren:
1825       parseParens();
1826       break;
1827     case tok::r_square:
1828       nextToken();
1829       return;
1830     case tok::r_brace:
1831       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1832       return;
1833     case tok::l_square:
1834       parseSquare();
1835       break;
1836     case tok::l_brace: {
1837       if (!tryToParseBracedList())
1838         parseChildBlock();
1839       break;
1840     }
1841     case tok::at:
1842       nextToken();
1843       if (FormatTok->Tok.is(tok::l_brace)) {
1844         nextToken();
1845         parseBracedList();
1846       }
1847       break;
1848     default:
1849       nextToken();
1850       break;
1851     }
1852   } while (!eof());
1853 }
1854 
1855 void UnwrappedLineParser::parseIfThenElse() {
1856   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1857   nextToken();
1858   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1859     nextToken();
1860   if (FormatTok->Tok.is(tok::l_paren))
1861     parseParens();
1862   bool NeedsUnwrappedLine = false;
1863   if (FormatTok->Tok.is(tok::l_brace)) {
1864     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1865     parseBlock(/*MustBeDeclaration=*/false);
1866     if (Style.BraceWrapping.BeforeElse)
1867       addUnwrappedLine();
1868     else
1869       NeedsUnwrappedLine = true;
1870   } else {
1871     addUnwrappedLine();
1872     ++Line->Level;
1873     parseStructuralElement();
1874     --Line->Level;
1875   }
1876   if (FormatTok->Tok.is(tok::kw_else)) {
1877     nextToken();
1878     if (FormatTok->Tok.is(tok::l_brace)) {
1879       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1880       parseBlock(/*MustBeDeclaration=*/false);
1881       addUnwrappedLine();
1882     } else if (FormatTok->Tok.is(tok::kw_if)) {
1883       parseIfThenElse();
1884     } else {
1885       addUnwrappedLine();
1886       ++Line->Level;
1887       parseStructuralElement();
1888       if (FormatTok->is(tok::eof))
1889         addUnwrappedLine();
1890       --Line->Level;
1891     }
1892   } else if (NeedsUnwrappedLine) {
1893     addUnwrappedLine();
1894   }
1895 }
1896 
1897 void UnwrappedLineParser::parseTryCatch() {
1898   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1899   nextToken();
1900   bool NeedsUnwrappedLine = false;
1901   if (FormatTok->is(tok::colon)) {
1902     // We are in a function try block, what comes is an initializer list.
1903     nextToken();
1904 
1905     // In case identifiers were removed by clang-tidy, what might follow is
1906     // multiple commas in sequence - before the first identifier.
1907     while (FormatTok->is(tok::comma))
1908       nextToken();
1909 
1910     while (FormatTok->is(tok::identifier)) {
1911       nextToken();
1912       if (FormatTok->is(tok::l_paren))
1913         parseParens();
1914 
1915       // In case identifiers were removed by clang-tidy, what might follow is
1916       // multiple commas in sequence - after the first identifier.
1917       while (FormatTok->is(tok::comma))
1918         nextToken();
1919     }
1920   }
1921   // Parse try with resource.
1922   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1923     parseParens();
1924   }
1925   if (FormatTok->is(tok::l_brace)) {
1926     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1927     parseBlock(/*MustBeDeclaration=*/false);
1928     if (Style.BraceWrapping.BeforeCatch) {
1929       addUnwrappedLine();
1930     } else {
1931       NeedsUnwrappedLine = true;
1932     }
1933   } else if (!FormatTok->is(tok::kw_catch)) {
1934     // The C++ standard requires a compound-statement after a try.
1935     // If there's none, we try to assume there's a structuralElement
1936     // and try to continue.
1937     addUnwrappedLine();
1938     ++Line->Level;
1939     parseStructuralElement();
1940     --Line->Level;
1941   }
1942   while (1) {
1943     if (FormatTok->is(tok::at))
1944       nextToken();
1945     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1946                              tok::kw___finally) ||
1947           ((Style.Language == FormatStyle::LK_Java ||
1948             Style.Language == FormatStyle::LK_JavaScript) &&
1949            FormatTok->is(Keywords.kw_finally)) ||
1950           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1951            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1952       break;
1953     nextToken();
1954     while (FormatTok->isNot(tok::l_brace)) {
1955       if (FormatTok->is(tok::l_paren)) {
1956         parseParens();
1957         continue;
1958       }
1959       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1960         return;
1961       nextToken();
1962     }
1963     NeedsUnwrappedLine = false;
1964     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1965     parseBlock(/*MustBeDeclaration=*/false);
1966     if (Style.BraceWrapping.BeforeCatch)
1967       addUnwrappedLine();
1968     else
1969       NeedsUnwrappedLine = true;
1970   }
1971   if (NeedsUnwrappedLine)
1972     addUnwrappedLine();
1973 }
1974 
1975 void UnwrappedLineParser::parseNamespace() {
1976   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
1977          "'namespace' expected");
1978 
1979   const FormatToken &InitialToken = *FormatTok;
1980   nextToken();
1981   if (InitialToken.is(TT_NamespaceMacro)) {
1982     parseParens();
1983   } else {
1984     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
1985                               tok::l_square)) {
1986       if (FormatTok->is(tok::l_square))
1987         parseSquare();
1988       else
1989         nextToken();
1990     }
1991   }
1992   if (FormatTok->Tok.is(tok::l_brace)) {
1993     if (ShouldBreakBeforeBrace(Style, InitialToken))
1994       addUnwrappedLine();
1995 
1996     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1997                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1998                      DeclarationScopeStack.size() > 1);
1999     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
2000     // Munch the semicolon after a namespace. This is more common than one would
2001     // think. Putting the semicolon into its own line is very ugly.
2002     if (FormatTok->Tok.is(tok::semi))
2003       nextToken();
2004     addUnwrappedLine();
2005   }
2006   // FIXME: Add error handling.
2007 }
2008 
2009 void UnwrappedLineParser::parseNew() {
2010   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2011   nextToken();
2012 
2013   if (Style.isCSharp()) {
2014     do {
2015       if (FormatTok->is(tok::l_brace))
2016         parseBracedList();
2017 
2018       if (FormatTok->isOneOf(tok::semi, tok::comma))
2019         return;
2020 
2021       nextToken();
2022     } while (!eof());
2023   }
2024 
2025   if (Style.Language != FormatStyle::LK_Java)
2026     return;
2027 
2028   // In Java, we can parse everything up to the parens, which aren't optional.
2029   do {
2030     // There should not be a ;, { or } before the new's open paren.
2031     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2032       return;
2033 
2034     // Consume the parens.
2035     if (FormatTok->is(tok::l_paren)) {
2036       parseParens();
2037 
2038       // If there is a class body of an anonymous class, consume that as child.
2039       if (FormatTok->is(tok::l_brace))
2040         parseChildBlock();
2041       return;
2042     }
2043     nextToken();
2044   } while (!eof());
2045 }
2046 
2047 void UnwrappedLineParser::parseForOrWhileLoop() {
2048   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2049          "'for', 'while' or foreach macro expected");
2050   nextToken();
2051   // JS' for await ( ...
2052   if (Style.Language == FormatStyle::LK_JavaScript &&
2053       FormatTok->is(Keywords.kw_await))
2054     nextToken();
2055   if (FormatTok->Tok.is(tok::l_paren))
2056     parseParens();
2057   if (FormatTok->Tok.is(tok::l_brace)) {
2058     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2059     parseBlock(/*MustBeDeclaration=*/false);
2060     addUnwrappedLine();
2061   } else {
2062     addUnwrappedLine();
2063     ++Line->Level;
2064     parseStructuralElement();
2065     --Line->Level;
2066   }
2067 }
2068 
2069 void UnwrappedLineParser::parseDoWhile() {
2070   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2071   nextToken();
2072   if (FormatTok->Tok.is(tok::l_brace)) {
2073     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2074     parseBlock(/*MustBeDeclaration=*/false);
2075     if (Style.BraceWrapping.IndentBraces)
2076       addUnwrappedLine();
2077   } else {
2078     addUnwrappedLine();
2079     ++Line->Level;
2080     parseStructuralElement();
2081     --Line->Level;
2082   }
2083 
2084   // FIXME: Add error handling.
2085   if (!FormatTok->Tok.is(tok::kw_while)) {
2086     addUnwrappedLine();
2087     return;
2088   }
2089 
2090   nextToken();
2091   parseStructuralElement();
2092 }
2093 
2094 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2095   nextToken();
2096   unsigned OldLineLevel = Line->Level;
2097   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2098     --Line->Level;
2099   if (LeftAlignLabel)
2100     Line->Level = 0;
2101   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2102       FormatTok->Tok.is(tok::l_brace)) {
2103     CompoundStatementIndenter Indenter(this, Line->Level,
2104                                        Style.BraceWrapping.AfterCaseLabel,
2105                                        Style.BraceWrapping.IndentBraces);
2106     parseBlock(/*MustBeDeclaration=*/false);
2107     if (FormatTok->Tok.is(tok::kw_break)) {
2108       if (Style.BraceWrapping.AfterControlStatement ==
2109           FormatStyle::BWACS_Always)
2110         addUnwrappedLine();
2111       parseStructuralElement();
2112     }
2113     addUnwrappedLine();
2114   } else {
2115     if (FormatTok->is(tok::semi))
2116       nextToken();
2117     addUnwrappedLine();
2118   }
2119   Line->Level = OldLineLevel;
2120   if (FormatTok->isNot(tok::l_brace)) {
2121     parseStructuralElement();
2122     addUnwrappedLine();
2123   }
2124 }
2125 
2126 void UnwrappedLineParser::parseCaseLabel() {
2127   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2128   // FIXME: fix handling of complex expressions here.
2129   do {
2130     nextToken();
2131   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2132   parseLabel();
2133 }
2134 
2135 void UnwrappedLineParser::parseSwitch() {
2136   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2137   nextToken();
2138   if (FormatTok->Tok.is(tok::l_paren))
2139     parseParens();
2140   if (FormatTok->Tok.is(tok::l_brace)) {
2141     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2142     parseBlock(/*MustBeDeclaration=*/false);
2143     addUnwrappedLine();
2144   } else {
2145     addUnwrappedLine();
2146     ++Line->Level;
2147     parseStructuralElement();
2148     --Line->Level;
2149   }
2150 }
2151 
2152 void UnwrappedLineParser::parseAccessSpecifier() {
2153   nextToken();
2154   // Understand Qt's slots.
2155   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2156     nextToken();
2157   // Otherwise, we don't know what it is, and we'd better keep the next token.
2158   if (FormatTok->Tok.is(tok::colon))
2159     nextToken();
2160   addUnwrappedLine();
2161 }
2162 
2163 bool UnwrappedLineParser::parseEnum() {
2164   // Won't be 'enum' for NS_ENUMs.
2165   if (FormatTok->Tok.is(tok::kw_enum))
2166     nextToken();
2167 
2168   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2169   // declarations. An "enum" keyword followed by a colon would be a syntax
2170   // error and thus assume it is just an identifier.
2171   if (Style.Language == FormatStyle::LK_JavaScript &&
2172       FormatTok->isOneOf(tok::colon, tok::question))
2173     return false;
2174 
2175   // In protobuf, "enum" can be used as a field name.
2176   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2177     return false;
2178 
2179   // Eat up enum class ...
2180   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2181     nextToken();
2182 
2183   while (FormatTok->Tok.getIdentifierInfo() ||
2184          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2185                             tok::greater, tok::comma, tok::question)) {
2186     nextToken();
2187     // We can have macros or attributes in between 'enum' and the enum name.
2188     if (FormatTok->is(tok::l_paren))
2189       parseParens();
2190     if (FormatTok->is(tok::identifier)) {
2191       nextToken();
2192       // If there are two identifiers in a row, this is likely an elaborate
2193       // return type. In Java, this can be "implements", etc.
2194       if (Style.isCpp() && FormatTok->is(tok::identifier))
2195         return false;
2196     }
2197   }
2198 
2199   // Just a declaration or something is wrong.
2200   if (FormatTok->isNot(tok::l_brace))
2201     return true;
2202   FormatTok->BlockKind = BK_Block;
2203 
2204   if (Style.Language == FormatStyle::LK_Java) {
2205     // Java enums are different.
2206     parseJavaEnumBody();
2207     return true;
2208   }
2209   if (Style.Language == FormatStyle::LK_Proto) {
2210     parseBlock(/*MustBeDeclaration=*/true);
2211     return true;
2212   }
2213 
2214   // Parse enum body.
2215   nextToken();
2216   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2217   if (HasError) {
2218     if (FormatTok->is(tok::semi))
2219       nextToken();
2220     addUnwrappedLine();
2221   }
2222   return true;
2223 
2224   // There is no addUnwrappedLine() here so that we fall through to parsing a
2225   // structural element afterwards. Thus, in "enum A {} n, m;",
2226   // "} n, m;" will end up in one unwrapped line.
2227 }
2228 
2229 void UnwrappedLineParser::parseJavaEnumBody() {
2230   // Determine whether the enum is simple, i.e. does not have a semicolon or
2231   // constants with class bodies. Simple enums can be formatted like braced
2232   // lists, contracted to a single line, etc.
2233   unsigned StoredPosition = Tokens->getPosition();
2234   bool IsSimple = true;
2235   FormatToken *Tok = Tokens->getNextToken();
2236   while (Tok) {
2237     if (Tok->is(tok::r_brace))
2238       break;
2239     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2240       IsSimple = false;
2241       break;
2242     }
2243     // FIXME: This will also mark enums with braces in the arguments to enum
2244     // constants as "not simple". This is probably fine in practice, though.
2245     Tok = Tokens->getNextToken();
2246   }
2247   FormatTok = Tokens->setPosition(StoredPosition);
2248 
2249   if (IsSimple) {
2250     nextToken();
2251     parseBracedList();
2252     addUnwrappedLine();
2253     return;
2254   }
2255 
2256   // Parse the body of a more complex enum.
2257   // First add a line for everything up to the "{".
2258   nextToken();
2259   addUnwrappedLine();
2260   ++Line->Level;
2261 
2262   // Parse the enum constants.
2263   while (FormatTok) {
2264     if (FormatTok->is(tok::l_brace)) {
2265       // Parse the constant's class body.
2266       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2267                  /*MunchSemi=*/false);
2268     } else if (FormatTok->is(tok::l_paren)) {
2269       parseParens();
2270     } else if (FormatTok->is(tok::comma)) {
2271       nextToken();
2272       addUnwrappedLine();
2273     } else if (FormatTok->is(tok::semi)) {
2274       nextToken();
2275       addUnwrappedLine();
2276       break;
2277     } else if (FormatTok->is(tok::r_brace)) {
2278       addUnwrappedLine();
2279       break;
2280     } else {
2281       nextToken();
2282     }
2283   }
2284 
2285   // Parse the class body after the enum's ";" if any.
2286   parseLevel(/*HasOpeningBrace=*/true);
2287   nextToken();
2288   --Line->Level;
2289   addUnwrappedLine();
2290 }
2291 
2292 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2293   const FormatToken &InitialToken = *FormatTok;
2294   nextToken();
2295 
2296   // The actual identifier can be a nested name specifier, and in macros
2297   // it is often token-pasted.
2298   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2299                             tok::kw___attribute, tok::kw___declspec,
2300                             tok::kw_alignas) ||
2301          ((Style.Language == FormatStyle::LK_Java ||
2302            Style.Language == FormatStyle::LK_JavaScript) &&
2303           FormatTok->isOneOf(tok::period, tok::comma))) {
2304     if (Style.Language == FormatStyle::LK_JavaScript &&
2305         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2306       // JavaScript/TypeScript supports inline object types in
2307       // extends/implements positions:
2308       //     class Foo implements {bar: number} { }
2309       nextToken();
2310       if (FormatTok->is(tok::l_brace)) {
2311         tryToParseBracedList();
2312         continue;
2313       }
2314     }
2315     bool IsNonMacroIdentifier =
2316         FormatTok->is(tok::identifier) &&
2317         FormatTok->TokenText != FormatTok->TokenText.upper();
2318     nextToken();
2319     // We can have macros or attributes in between 'class' and the class name.
2320     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2321       parseParens();
2322   }
2323 
2324   // Note that parsing away template declarations here leads to incorrectly
2325   // accepting function declarations as record declarations.
2326   // In general, we cannot solve this problem. Consider:
2327   // class A<int> B() {}
2328   // which can be a function definition or a class definition when B() is a
2329   // macro. If we find enough real-world cases where this is a problem, we
2330   // can parse for the 'template' keyword in the beginning of the statement,
2331   // and thus rule out the record production in case there is no template
2332   // (this would still leave us with an ambiguity between template function
2333   // and class declarations).
2334   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2335     while (!eof()) {
2336       if (FormatTok->is(tok::l_brace)) {
2337         calculateBraceTypes(/*ExpectClassBody=*/true);
2338         if (!tryToParseBracedList())
2339           break;
2340       }
2341       if (FormatTok->Tok.is(tok::semi))
2342         return;
2343       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2344         addUnwrappedLine();
2345         nextToken();
2346         parseCSharpGenericTypeConstraint();
2347         break;
2348       }
2349       nextToken();
2350     }
2351   }
2352   if (FormatTok->Tok.is(tok::l_brace)) {
2353     if (ParseAsExpr) {
2354       parseChildBlock();
2355     } else {
2356       if (ShouldBreakBeforeBrace(Style, InitialToken))
2357         addUnwrappedLine();
2358 
2359       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2360                  /*MunchSemi=*/false);
2361     }
2362   }
2363   // There is no addUnwrappedLine() here so that we fall through to parsing a
2364   // structural element afterwards. Thus, in "class A {} n, m;",
2365   // "} n, m;" will end up in one unwrapped line.
2366 }
2367 
2368 void UnwrappedLineParser::parseObjCMethod() {
2369   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2370          "'(' or identifier expected.");
2371   do {
2372     if (FormatTok->Tok.is(tok::semi)) {
2373       nextToken();
2374       addUnwrappedLine();
2375       return;
2376     } else if (FormatTok->Tok.is(tok::l_brace)) {
2377       if (Style.BraceWrapping.AfterFunction)
2378         addUnwrappedLine();
2379       parseBlock(/*MustBeDeclaration=*/false);
2380       addUnwrappedLine();
2381       return;
2382     } else {
2383       nextToken();
2384     }
2385   } while (!eof());
2386 }
2387 
2388 void UnwrappedLineParser::parseObjCProtocolList() {
2389   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2390   do {
2391     nextToken();
2392     // Early exit in case someone forgot a close angle.
2393     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2394         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2395       return;
2396   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2397   nextToken(); // Skip '>'.
2398 }
2399 
2400 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2401   do {
2402     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2403       nextToken();
2404       addUnwrappedLine();
2405       break;
2406     }
2407     if (FormatTok->is(tok::l_brace)) {
2408       parseBlock(/*MustBeDeclaration=*/false);
2409       // In ObjC interfaces, nothing should be following the "}".
2410       addUnwrappedLine();
2411     } else if (FormatTok->is(tok::r_brace)) {
2412       // Ignore stray "}". parseStructuralElement doesn't consume them.
2413       nextToken();
2414       addUnwrappedLine();
2415     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2416       nextToken();
2417       parseObjCMethod();
2418     } else {
2419       parseStructuralElement();
2420     }
2421   } while (!eof());
2422 }
2423 
2424 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2425   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2426          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2427   nextToken();
2428   nextToken(); // interface name
2429 
2430   // @interface can be followed by a lightweight generic
2431   // specialization list, then either a base class or a category.
2432   if (FormatTok->Tok.is(tok::less)) {
2433     // Unlike protocol lists, generic parameterizations support
2434     // nested angles:
2435     //
2436     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2437     //     NSObject <NSCopying, NSSecureCoding>
2438     //
2439     // so we need to count how many open angles we have left.
2440     unsigned NumOpenAngles = 1;
2441     do {
2442       nextToken();
2443       // Early exit in case someone forgot a close angle.
2444       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2445           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2446         break;
2447       if (FormatTok->Tok.is(tok::less))
2448         ++NumOpenAngles;
2449       else if (FormatTok->Tok.is(tok::greater)) {
2450         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2451         --NumOpenAngles;
2452       }
2453     } while (!eof() && NumOpenAngles != 0);
2454     nextToken(); // Skip '>'.
2455   }
2456   if (FormatTok->Tok.is(tok::colon)) {
2457     nextToken();
2458     nextToken(); // base class name
2459   } else if (FormatTok->Tok.is(tok::l_paren))
2460     // Skip category, if present.
2461     parseParens();
2462 
2463   if (FormatTok->Tok.is(tok::less))
2464     parseObjCProtocolList();
2465 
2466   if (FormatTok->Tok.is(tok::l_brace)) {
2467     if (Style.BraceWrapping.AfterObjCDeclaration)
2468       addUnwrappedLine();
2469     parseBlock(/*MustBeDeclaration=*/true);
2470   }
2471 
2472   // With instance variables, this puts '}' on its own line.  Without instance
2473   // variables, this ends the @interface line.
2474   addUnwrappedLine();
2475 
2476   parseObjCUntilAtEnd();
2477 }
2478 
2479 // Returns true for the declaration/definition form of @protocol,
2480 // false for the expression form.
2481 bool UnwrappedLineParser::parseObjCProtocol() {
2482   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2483   nextToken();
2484 
2485   if (FormatTok->is(tok::l_paren))
2486     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2487     return false;
2488 
2489   // The definition/declaration form,
2490   // @protocol Foo
2491   // - (int)someMethod;
2492   // @end
2493 
2494   nextToken(); // protocol name
2495 
2496   if (FormatTok->Tok.is(tok::less))
2497     parseObjCProtocolList();
2498 
2499   // Check for protocol declaration.
2500   if (FormatTok->Tok.is(tok::semi)) {
2501     nextToken();
2502     addUnwrappedLine();
2503     return true;
2504   }
2505 
2506   addUnwrappedLine();
2507   parseObjCUntilAtEnd();
2508   return true;
2509 }
2510 
2511 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2512   bool IsImport = FormatTok->is(Keywords.kw_import);
2513   assert(IsImport || FormatTok->is(tok::kw_export));
2514   nextToken();
2515 
2516   // Consume the "default" in "export default class/function".
2517   if (FormatTok->is(tok::kw_default))
2518     nextToken();
2519 
2520   // Consume "async function", "function" and "default function", so that these
2521   // get parsed as free-standing JS functions, i.e. do not require a trailing
2522   // semicolon.
2523   if (FormatTok->is(Keywords.kw_async))
2524     nextToken();
2525   if (FormatTok->is(Keywords.kw_function)) {
2526     nextToken();
2527     return;
2528   }
2529 
2530   // For imports, `export *`, `export {...}`, consume the rest of the line up
2531   // to the terminating `;`. For everything else, just return and continue
2532   // parsing the structural element, i.e. the declaration or expression for
2533   // `export default`.
2534   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2535       !FormatTok->isStringLiteral())
2536     return;
2537 
2538   while (!eof()) {
2539     if (FormatTok->is(tok::semi))
2540       return;
2541     if (Line->Tokens.empty()) {
2542       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2543       // import statement should terminate.
2544       return;
2545     }
2546     if (FormatTok->is(tok::l_brace)) {
2547       FormatTok->BlockKind = BK_Block;
2548       nextToken();
2549       parseBracedList();
2550     } else {
2551       nextToken();
2552     }
2553   }
2554 }
2555 
2556 void UnwrappedLineParser::parseStatementMacro() {
2557   nextToken();
2558   if (FormatTok->is(tok::l_paren))
2559     parseParens();
2560   if (FormatTok->is(tok::semi))
2561     nextToken();
2562   addUnwrappedLine();
2563 }
2564 
2565 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2566                                                  StringRef Prefix = "") {
2567   llvm::dbgs() << Prefix << "Line(" << Line.Level
2568                << ", FSC=" << Line.FirstStartColumn << ")"
2569                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2570   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2571                                                     E = Line.Tokens.end();
2572        I != E; ++I) {
2573     llvm::dbgs() << I->Tok->Tok.getName() << "["
2574                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2575                  << "] ";
2576   }
2577   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2578                                                     E = Line.Tokens.end();
2579        I != E; ++I) {
2580     const UnwrappedLineNode &Node = *I;
2581     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2582              I = Node.Children.begin(),
2583              E = Node.Children.end();
2584          I != E; ++I) {
2585       printDebugInfo(*I, "\nChild: ");
2586     }
2587   }
2588   llvm::dbgs() << "\n";
2589 }
2590 
2591 void UnwrappedLineParser::addUnwrappedLine() {
2592   if (Line->Tokens.empty())
2593     return;
2594   LLVM_DEBUG({
2595     if (CurrentLines == &Lines)
2596       printDebugInfo(*Line);
2597   });
2598   CurrentLines->push_back(std::move(*Line));
2599   Line->Tokens.clear();
2600   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2601   Line->FirstStartColumn = 0;
2602   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2603     CurrentLines->append(
2604         std::make_move_iterator(PreprocessorDirectives.begin()),
2605         std::make_move_iterator(PreprocessorDirectives.end()));
2606     PreprocessorDirectives.clear();
2607   }
2608   // Disconnect the current token from the last token on the previous line.
2609   FormatTok->Previous = nullptr;
2610 }
2611 
2612 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2613 
2614 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2615   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2616          FormatTok.NewlinesBefore > 0;
2617 }
2618 
2619 // Checks if \p FormatTok is a line comment that continues the line comment
2620 // section on \p Line.
2621 static bool
2622 continuesLineCommentSection(const FormatToken &FormatTok,
2623                             const UnwrappedLine &Line,
2624                             const llvm::Regex &CommentPragmasRegex) {
2625   if (Line.Tokens.empty())
2626     return false;
2627 
2628   StringRef IndentContent = FormatTok.TokenText;
2629   if (FormatTok.TokenText.startswith("//") ||
2630       FormatTok.TokenText.startswith("/*"))
2631     IndentContent = FormatTok.TokenText.substr(2);
2632   if (CommentPragmasRegex.match(IndentContent))
2633     return false;
2634 
2635   // If Line starts with a line comment, then FormatTok continues the comment
2636   // section if its original column is greater or equal to the original start
2637   // column of the line.
2638   //
2639   // Define the min column token of a line as follows: if a line ends in '{' or
2640   // contains a '{' followed by a line comment, then the min column token is
2641   // that '{'. Otherwise, the min column token of the line is the first token of
2642   // the line.
2643   //
2644   // If Line starts with a token other than a line comment, then FormatTok
2645   // continues the comment section if its original column is greater than the
2646   // original start column of the min column token of the line.
2647   //
2648   // For example, the second line comment continues the first in these cases:
2649   //
2650   // // first line
2651   // // second line
2652   //
2653   // and:
2654   //
2655   // // first line
2656   //  // second line
2657   //
2658   // and:
2659   //
2660   // int i; // first line
2661   //  // second line
2662   //
2663   // and:
2664   //
2665   // do { // first line
2666   //      // second line
2667   //   int i;
2668   // } while (true);
2669   //
2670   // and:
2671   //
2672   // enum {
2673   //   a, // first line
2674   //    // second line
2675   //   b
2676   // };
2677   //
2678   // The second line comment doesn't continue the first in these cases:
2679   //
2680   //   // first line
2681   //  // second line
2682   //
2683   // and:
2684   //
2685   // int i; // first line
2686   // // second line
2687   //
2688   // and:
2689   //
2690   // do { // first line
2691   //   // second line
2692   //   int i;
2693   // } while (true);
2694   //
2695   // and:
2696   //
2697   // enum {
2698   //   a, // first line
2699   //   // second line
2700   // };
2701   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2702 
2703   // Scan for '{//'. If found, use the column of '{' as a min column for line
2704   // comment section continuation.
2705   const FormatToken *PreviousToken = nullptr;
2706   for (const UnwrappedLineNode &Node : Line.Tokens) {
2707     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2708         isLineComment(*Node.Tok)) {
2709       MinColumnToken = PreviousToken;
2710       break;
2711     }
2712     PreviousToken = Node.Tok;
2713 
2714     // Grab the last newline preceding a token in this unwrapped line.
2715     if (Node.Tok->NewlinesBefore > 0) {
2716       MinColumnToken = Node.Tok;
2717     }
2718   }
2719   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2720     MinColumnToken = PreviousToken;
2721   }
2722 
2723   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2724                               MinColumnToken);
2725 }
2726 
2727 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2728   bool JustComments = Line->Tokens.empty();
2729   for (SmallVectorImpl<FormatToken *>::const_iterator
2730            I = CommentsBeforeNextToken.begin(),
2731            E = CommentsBeforeNextToken.end();
2732        I != E; ++I) {
2733     // Line comments that belong to the same line comment section are put on the
2734     // same line since later we might want to reflow content between them.
2735     // Additional fine-grained breaking of line comment sections is controlled
2736     // by the class BreakableLineCommentSection in case it is desirable to keep
2737     // several line comment sections in the same unwrapped line.
2738     //
2739     // FIXME: Consider putting separate line comment sections as children to the
2740     // unwrapped line instead.
2741     (*I)->ContinuesLineCommentSection =
2742         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2743     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2744       addUnwrappedLine();
2745     pushToken(*I);
2746   }
2747   if (NewlineBeforeNext && JustComments)
2748     addUnwrappedLine();
2749   CommentsBeforeNextToken.clear();
2750 }
2751 
2752 void UnwrappedLineParser::nextToken(int LevelDifference) {
2753   if (eof())
2754     return;
2755   flushComments(isOnNewLine(*FormatTok));
2756   pushToken(FormatTok);
2757   FormatToken *Previous = FormatTok;
2758   if (Style.Language != FormatStyle::LK_JavaScript)
2759     readToken(LevelDifference);
2760   else
2761     readTokenWithJavaScriptASI();
2762   FormatTok->Previous = Previous;
2763 }
2764 
2765 void UnwrappedLineParser::distributeComments(
2766     const SmallVectorImpl<FormatToken *> &Comments,
2767     const FormatToken *NextTok) {
2768   // Whether or not a line comment token continues a line is controlled by
2769   // the method continuesLineCommentSection, with the following caveat:
2770   //
2771   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2772   // that each comment line from the trail is aligned with the next token, if
2773   // the next token exists. If a trail exists, the beginning of the maximal
2774   // trail is marked as a start of a new comment section.
2775   //
2776   // For example in this code:
2777   //
2778   // int a; // line about a
2779   //   // line 1 about b
2780   //   // line 2 about b
2781   //   int b;
2782   //
2783   // the two lines about b form a maximal trail, so there are two sections, the
2784   // first one consisting of the single comment "// line about a" and the
2785   // second one consisting of the next two comments.
2786   if (Comments.empty())
2787     return;
2788   bool ShouldPushCommentsInCurrentLine = true;
2789   bool HasTrailAlignedWithNextToken = false;
2790   unsigned StartOfTrailAlignedWithNextToken = 0;
2791   if (NextTok) {
2792     // We are skipping the first element intentionally.
2793     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2794       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2795         HasTrailAlignedWithNextToken = true;
2796         StartOfTrailAlignedWithNextToken = i;
2797       }
2798     }
2799   }
2800   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2801     FormatToken *FormatTok = Comments[i];
2802     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2803       FormatTok->ContinuesLineCommentSection = false;
2804     } else {
2805       FormatTok->ContinuesLineCommentSection =
2806           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2807     }
2808     if (!FormatTok->ContinuesLineCommentSection &&
2809         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2810       ShouldPushCommentsInCurrentLine = false;
2811     }
2812     if (ShouldPushCommentsInCurrentLine) {
2813       pushToken(FormatTok);
2814     } else {
2815       CommentsBeforeNextToken.push_back(FormatTok);
2816     }
2817   }
2818 }
2819 
2820 void UnwrappedLineParser::readToken(int LevelDifference) {
2821   SmallVector<FormatToken *, 1> Comments;
2822   do {
2823     FormatTok = Tokens->getNextToken();
2824     assert(FormatTok);
2825     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2826            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2827       distributeComments(Comments, FormatTok);
2828       Comments.clear();
2829       // If there is an unfinished unwrapped line, we flush the preprocessor
2830       // directives only after that unwrapped line was finished later.
2831       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2832       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2833       assert((LevelDifference >= 0 ||
2834               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2835              "LevelDifference makes Line->Level negative");
2836       Line->Level += LevelDifference;
2837       // Comments stored before the preprocessor directive need to be output
2838       // before the preprocessor directive, at the same level as the
2839       // preprocessor directive, as we consider them to apply to the directive.
2840       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2841           PPBranchLevel > 0)
2842         Line->Level += PPBranchLevel;
2843       flushComments(isOnNewLine(*FormatTok));
2844       parsePPDirective();
2845     }
2846     while (FormatTok->Type == TT_ConflictStart ||
2847            FormatTok->Type == TT_ConflictEnd ||
2848            FormatTok->Type == TT_ConflictAlternative) {
2849       if (FormatTok->Type == TT_ConflictStart) {
2850         conditionalCompilationStart(/*Unreachable=*/false);
2851       } else if (FormatTok->Type == TT_ConflictAlternative) {
2852         conditionalCompilationAlternative();
2853       } else if (FormatTok->Type == TT_ConflictEnd) {
2854         conditionalCompilationEnd();
2855       }
2856       FormatTok = Tokens->getNextToken();
2857       FormatTok->MustBreakBefore = true;
2858     }
2859 
2860     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2861         !Line->InPPDirective) {
2862       continue;
2863     }
2864 
2865     if (!FormatTok->Tok.is(tok::comment)) {
2866       distributeComments(Comments, FormatTok);
2867       Comments.clear();
2868       return;
2869     }
2870 
2871     Comments.push_back(FormatTok);
2872   } while (!eof());
2873 
2874   distributeComments(Comments, nullptr);
2875   Comments.clear();
2876 }
2877 
2878 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2879   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2880   if (MustBreakBeforeNextToken) {
2881     Line->Tokens.back().Tok->MustBreakBefore = true;
2882     MustBreakBeforeNextToken = false;
2883   }
2884 }
2885 
2886 } // end namespace format
2887 } // end namespace clang
2888