1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
27 class FormatTokenSource {
28 public:
29   virtual ~FormatTokenSource() {}
30   virtual FormatToken *getNextToken() = 0;
31 
32   virtual unsigned getPosition() = 0;
33   virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41                          bool MustBeDeclaration)
42       : Line(Line), Stack(Stack) {
43     Line.MustBeDeclaration = MustBeDeclaration;
44     Stack.push_back(MustBeDeclaration);
45   }
46   ~ScopedDeclarationState() {
47     Stack.pop_back();
48     if (!Stack.empty())
49       Line.MustBeDeclaration = Stack.back();
50     else
51       Line.MustBeDeclaration = true;
52   }
53 
54 private:
55   UnwrappedLine &Line;
56   std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     FakeEOF.Tok.startToken();
86     FakeEOF.Tok.setKind(tok::eof);
87     TokenSource = this;
88     Line.Level = 0;
89     Line.InPPDirective = true;
90   }
91 
92   ~ScopedMacroState() override {
93     TokenSource = PreviousTokenSource;
94     ResetToken = Token;
95     Line.InPPDirective = false;
96     Line.Level = PreviousLineLevel;
97   }
98 
99   FormatToken *getNextToken() override {
100     // The \c UnwrappedLineParser guards against this by never calling
101     // \c getNextToken() after it has encountered the first eof token.
102     assert(!eof());
103     PreviousToken = Token;
104     Token = PreviousTokenSource->getNextToken();
105     if (eof())
106       return &FakeEOF;
107     return Token;
108   }
109 
110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112   FormatToken *setPosition(unsigned Position) override {
113     PreviousToken = nullptr;
114     Token = PreviousTokenSource->setPosition(Position);
115     return Token;
116   }
117 
118 private:
119   bool eof() {
120     return Token && Token->HasUnescapedNewline &&
121            !continuesLineComment(*Token, PreviousToken,
122                                  /*MinColumnToken=*/PreviousToken);
123   }
124 
125   FormatToken FakeEOF;
126   UnwrappedLine &Line;
127   FormatTokenSource *&TokenSource;
128   FormatToken *&ResetToken;
129   unsigned PreviousLineLevel;
130   FormatTokenSource *PreviousTokenSource;
131 
132   FormatToken *Token;
133   FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
138 class ScopedLineState {
139 public:
140   ScopedLineState(UnwrappedLineParser &Parser,
141                   bool SwitchToPreprocessorLines = false)
142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143     if (SwitchToPreprocessorLines)
144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
145     else if (!Parser.Line->Tokens.empty())
146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147     PreBlockLine = std::move(Parser.Line);
148     Parser.Line = std::make_unique<UnwrappedLine>();
149     Parser.Line->Level = PreBlockLine->Level;
150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151   }
152 
153   ~ScopedLineState() {
154     if (!Parser.Line->Tokens.empty()) {
155       Parser.addUnwrappedLine();
156     }
157     assert(Parser.Line->Tokens.empty());
158     Parser.Line = std::move(PreBlockLine);
159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160       Parser.MustBreakBeforeNextToken = true;
161     Parser.CurrentLines = OriginalLines;
162   }
163 
164 private:
165   UnwrappedLineParser &Parser;
166 
167   std::unique_ptr<UnwrappedLine> PreBlockLine;
168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
171 class CompoundStatementIndenter {
172 public:
173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
174                             const FormatStyle &Style, unsigned &LineLevel)
175       : CompoundStatementIndenter(Parser, LineLevel,
176                                   Style.BraceWrapping.AfterControlStatement,
177                                   Style.BraceWrapping.IndentBraces) {}
178   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
179                             bool WrapBrace, bool IndentBrace)
180       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
181     if (WrapBrace)
182       Parser->addUnwrappedLine();
183     if (IndentBrace)
184       ++LineLevel;
185   }
186   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
187 
188 private:
189   unsigned &LineLevel;
190   unsigned OldLineLevel;
191 };
192 
193 namespace {
194 
195 class IndexedTokenSource : public FormatTokenSource {
196 public:
197   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
198       : Tokens(Tokens), Position(-1) {}
199 
200   FormatToken *getNextToken() override {
201     ++Position;
202     return Tokens[Position];
203   }
204 
205   unsigned getPosition() override {
206     assert(Position >= 0);
207     return Position;
208   }
209 
210   FormatToken *setPosition(unsigned P) override {
211     Position = P;
212     return Tokens[Position];
213   }
214 
215   void reset() { Position = -1; }
216 
217 private:
218   ArrayRef<FormatToken *> Tokens;
219   int Position;
220 };
221 
222 } // end anonymous namespace
223 
224 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
225                                          const AdditionalKeywords &Keywords,
226                                          unsigned FirstStartColumn,
227                                          ArrayRef<FormatToken *> Tokens,
228                                          UnwrappedLineConsumer &Callback)
229     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
230       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
231       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
232       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
233       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
234                        ? IG_Rejected
235                        : IG_Inited),
236       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
241                      ? IG_Rejected
242                      : IG_Inited;
243   IncludeGuardToken = nullptr;
244   Line.reset(new UnwrappedLine);
245   CommentsBeforeNextToken.clear();
246   FormatTok = nullptr;
247   MustBreakBeforeNextToken = false;
248   PreprocessorDirectives.clear();
249   CurrentLines = &Lines;
250   DeclarationScopeStack.clear();
251   PPStack.clear();
252   Line->FirstStartColumn = FirstStartColumn;
253 }
254 
255 void UnwrappedLineParser::parse() {
256   IndexedTokenSource TokenSource(AllTokens);
257   Line->FirstStartColumn = FirstStartColumn;
258   do {
259     LLVM_DEBUG(llvm::dbgs() << "----\n");
260     reset();
261     Tokens = &TokenSource;
262     TokenSource.reset();
263 
264     readToken();
265     parseFile();
266 
267     // If we found an include guard then all preprocessor directives (other than
268     // the guard) are over-indented by one.
269     if (IncludeGuard == IG_Found)
270       for (auto &Line : Lines)
271         if (Line.InPPDirective && Line.Level > 0)
272           --Line.Level;
273 
274     // Create line with eof token.
275     pushToken(FormatTok);
276     addUnwrappedLine();
277 
278     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
279                                                   E = Lines.end();
280          I != E; ++I) {
281       Callback.consumeUnwrappedLine(*I);
282     }
283     Callback.finishRun();
284     Lines.clear();
285     while (!PPLevelBranchIndex.empty() &&
286            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
287       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
288       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
289     }
290     if (!PPLevelBranchIndex.empty()) {
291       ++PPLevelBranchIndex.back();
292       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
293       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
294     }
295   } while (!PPLevelBranchIndex.empty());
296 }
297 
298 void UnwrappedLineParser::parseFile() {
299   // The top-level context in a file always has declarations, except for pre-
300   // processor directives and JavaScript files.
301   bool MustBeDeclaration =
302       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
303   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
304                                           MustBeDeclaration);
305   if (Style.Language == FormatStyle::LK_TextProto)
306     parseBracedList();
307   else
308     parseLevel(/*HasOpeningBrace=*/false);
309   // Make sure to format the remaining tokens.
310   //
311   // LK_TextProto is special since its top-level is parsed as the body of a
312   // braced list, which does not necessarily have natural line separators such
313   // as a semicolon. Comments after the last entry that have been determined to
314   // not belong to that line, as in:
315   //   key: value
316   //   // endfile comment
317   // do not have a chance to be put on a line of their own until this point.
318   // Here we add this newline before end-of-file comments.
319   if (Style.Language == FormatStyle::LK_TextProto &&
320       !CommentsBeforeNextToken.empty())
321     addUnwrappedLine();
322   flushComments(true);
323   addUnwrappedLine();
324 }
325 
326 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
327   do {
328     switch (FormatTok->Tok.getKind()) {
329     case tok::l_brace:
330       return;
331     default:
332       if (FormatTok->is(Keywords.kw_where)) {
333         addUnwrappedLine();
334         nextToken();
335         parseCSharpGenericTypeConstraint();
336         break;
337       }
338       nextToken();
339       break;
340     }
341   } while (!eof());
342 }
343 
344 void UnwrappedLineParser::parseCSharpAttribute() {
345   int UnpairedSquareBrackets = 1;
346   do {
347     switch (FormatTok->Tok.getKind()) {
348     case tok::r_square:
349       nextToken();
350       --UnpairedSquareBrackets;
351       if (UnpairedSquareBrackets == 0) {
352         addUnwrappedLine();
353         return;
354       }
355       break;
356     case tok::l_square:
357       ++UnpairedSquareBrackets;
358       nextToken();
359       break;
360     default:
361       nextToken();
362       break;
363     }
364   } while (!eof());
365 }
366 
367 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
368   bool SwitchLabelEncountered = false;
369   do {
370     tok::TokenKind kind = FormatTok->Tok.getKind();
371     if (FormatTok->Type == TT_MacroBlockBegin) {
372       kind = tok::l_brace;
373     } else if (FormatTok->Type == TT_MacroBlockEnd) {
374       kind = tok::r_brace;
375     }
376 
377     switch (kind) {
378     case tok::comment:
379       nextToken();
380       addUnwrappedLine();
381       break;
382     case tok::l_brace:
383       // FIXME: Add parameter whether this can happen - if this happens, we must
384       // be in a non-declaration context.
385       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
386         continue;
387       parseBlock(/*MustBeDeclaration=*/false);
388       addUnwrappedLine();
389       break;
390     case tok::r_brace:
391       if (HasOpeningBrace)
392         return;
393       nextToken();
394       addUnwrappedLine();
395       break;
396     case tok::kw_default: {
397       unsigned StoredPosition = Tokens->getPosition();
398       FormatToken *Next;
399       do {
400         Next = Tokens->getNextToken();
401       } while (Next && Next->is(tok::comment));
402       FormatTok = Tokens->setPosition(StoredPosition);
403       if (Next && Next->isNot(tok::colon)) {
404         // default not followed by ':' is not a case label; treat it like
405         // an identifier.
406         parseStructuralElement();
407         break;
408       }
409       // Else, if it is 'default:', fall through to the case handling.
410       LLVM_FALLTHROUGH;
411     }
412     case tok::kw_case:
413       if (Style.Language == FormatStyle::LK_JavaScript &&
414           Line->MustBeDeclaration) {
415         // A 'case: string' style field declaration.
416         parseStructuralElement();
417         break;
418       }
419       if (!SwitchLabelEncountered &&
420           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
421         ++Line->Level;
422       SwitchLabelEncountered = true;
423       parseStructuralElement();
424       break;
425     case tok::l_square:
426       if (Style.isCSharp()) {
427         nextToken();
428         parseCSharpAttribute();
429         break;
430       }
431       LLVM_FALLTHROUGH;
432     default:
433       parseStructuralElement();
434       break;
435     }
436   } while (!eof());
437 }
438 
439 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
440   // We'll parse forward through the tokens until we hit
441   // a closing brace or eof - note that getNextToken() will
442   // parse macros, so this will magically work inside macro
443   // definitions, too.
444   unsigned StoredPosition = Tokens->getPosition();
445   FormatToken *Tok = FormatTok;
446   const FormatToken *PrevTok = Tok->Previous;
447   // Keep a stack of positions of lbrace tokens. We will
448   // update information about whether an lbrace starts a
449   // braced init list or a different block during the loop.
450   SmallVector<FormatToken *, 8> LBraceStack;
451   assert(Tok->Tok.is(tok::l_brace));
452   do {
453     // Get next non-comment token.
454     FormatToken *NextTok;
455     unsigned ReadTokens = 0;
456     do {
457       NextTok = Tokens->getNextToken();
458       ++ReadTokens;
459     } while (NextTok->is(tok::comment));
460 
461     switch (Tok->Tok.getKind()) {
462     case tok::l_brace:
463       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
464         if (PrevTok->isOneOf(tok::colon, tok::less))
465           // A ':' indicates this code is in a type, or a braced list
466           // following a label in an object literal ({a: {b: 1}}).
467           // A '<' could be an object used in a comparison, but that is nonsense
468           // code (can never return true), so more likely it is a generic type
469           // argument (`X<{a: string; b: number}>`).
470           // The code below could be confused by semicolons between the
471           // individual members in a type member list, which would normally
472           // trigger BK_Block. In both cases, this must be parsed as an inline
473           // braced init.
474           Tok->BlockKind = BK_BracedInit;
475         else if (PrevTok->is(tok::r_paren))
476           // `) { }` can only occur in function or method declarations in JS.
477           Tok->BlockKind = BK_Block;
478       } else {
479         Tok->BlockKind = BK_Unknown;
480       }
481       LBraceStack.push_back(Tok);
482       break;
483     case tok::r_brace:
484       if (LBraceStack.empty())
485         break;
486       if (LBraceStack.back()->BlockKind == BK_Unknown) {
487         bool ProbablyBracedList = false;
488         if (Style.Language == FormatStyle::LK_Proto) {
489           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
490         } else {
491           // Using OriginalColumn to distinguish between ObjC methods and
492           // binary operators is a bit hacky.
493           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
494                                   NextTok->OriginalColumn == 0;
495 
496           // If there is a comma, semicolon or right paren after the closing
497           // brace, we assume this is a braced initializer list.  Note that
498           // regardless how we mark inner braces here, we will overwrite the
499           // BlockKind later if we parse a braced list (where all blocks
500           // inside are by default braced lists), or when we explicitly detect
501           // blocks (for example while parsing lambdas).
502           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
503           // braced list in JS.
504           ProbablyBracedList =
505               (Style.Language == FormatStyle::LK_JavaScript &&
506                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
507                                 Keywords.kw_as)) ||
508               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
509               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
510                                tok::r_paren, tok::r_square, tok::l_brace,
511                                tok::ellipsis) ||
512               (NextTok->is(tok::identifier) &&
513                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
514               (NextTok->is(tok::semi) &&
515                (!ExpectClassBody || LBraceStack.size() != 1)) ||
516               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
517           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
518             // We can have an array subscript after a braced init
519             // list, but C++11 attributes are expected after blocks.
520             NextTok = Tokens->getNextToken();
521             ++ReadTokens;
522             ProbablyBracedList = NextTok->isNot(tok::l_square);
523           }
524         }
525         if (ProbablyBracedList) {
526           Tok->BlockKind = BK_BracedInit;
527           LBraceStack.back()->BlockKind = BK_BracedInit;
528         } else {
529           Tok->BlockKind = BK_Block;
530           LBraceStack.back()->BlockKind = BK_Block;
531         }
532       }
533       LBraceStack.pop_back();
534       break;
535     case tok::identifier:
536       if (!Tok->is(TT_StatementMacro))
537         break;
538       LLVM_FALLTHROUGH;
539     case tok::at:
540     case tok::semi:
541     case tok::kw_if:
542     case tok::kw_while:
543     case tok::kw_for:
544     case tok::kw_switch:
545     case tok::kw_try:
546     case tok::kw___try:
547       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
548         LBraceStack.back()->BlockKind = BK_Block;
549       break;
550     default:
551       break;
552     }
553     PrevTok = Tok;
554     Tok = NextTok;
555   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
556 
557   // Assume other blocks for all unclosed opening braces.
558   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
559     if (LBraceStack[i]->BlockKind == BK_Unknown)
560       LBraceStack[i]->BlockKind = BK_Block;
561   }
562 
563   FormatTok = Tokens->setPosition(StoredPosition);
564 }
565 
566 template <class T>
567 static inline void hash_combine(std::size_t &seed, const T &v) {
568   std::hash<T> hasher;
569   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
570 }
571 
572 size_t UnwrappedLineParser::computePPHash() const {
573   size_t h = 0;
574   for (const auto &i : PPStack) {
575     hash_combine(h, size_t(i.Kind));
576     hash_combine(h, i.Line);
577   }
578   return h;
579 }
580 
581 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
582                                      bool MunchSemi) {
583   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
584          "'{' or macro block token expected");
585   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
586   FormatTok->BlockKind = BK_Block;
587 
588   size_t PPStartHash = computePPHash();
589 
590   unsigned InitialLevel = Line->Level;
591   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
592 
593   if (MacroBlock && FormatTok->is(tok::l_paren))
594     parseParens();
595 
596   size_t NbPreprocessorDirectives =
597       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
598   addUnwrappedLine();
599   size_t OpeningLineIndex =
600       CurrentLines->empty()
601           ? (UnwrappedLine::kInvalidIndex)
602           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
603 
604   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
605                                           MustBeDeclaration);
606   if (AddLevel)
607     ++Line->Level;
608   parseLevel(/*HasOpeningBrace=*/true);
609 
610   if (eof())
611     return;
612 
613   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
614                  : !FormatTok->is(tok::r_brace)) {
615     Line->Level = InitialLevel;
616     FormatTok->BlockKind = BK_Block;
617     return;
618   }
619 
620   size_t PPEndHash = computePPHash();
621 
622   // Munch the closing brace.
623   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
624 
625   if (MacroBlock && FormatTok->is(tok::l_paren))
626     parseParens();
627 
628   if (MunchSemi && FormatTok->Tok.is(tok::semi))
629     nextToken();
630   Line->Level = InitialLevel;
631 
632   if (PPStartHash == PPEndHash) {
633     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
634     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
635       // Update the opening line to add the forward reference as well
636       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
637           CurrentLines->size() - 1;
638     }
639   }
640 }
641 
642 static bool isGoogScope(const UnwrappedLine &Line) {
643   // FIXME: Closure-library specific stuff should not be hard-coded but be
644   // configurable.
645   if (Line.Tokens.size() < 4)
646     return false;
647   auto I = Line.Tokens.begin();
648   if (I->Tok->TokenText != "goog")
649     return false;
650   ++I;
651   if (I->Tok->isNot(tok::period))
652     return false;
653   ++I;
654   if (I->Tok->TokenText != "scope")
655     return false;
656   ++I;
657   return I->Tok->is(tok::l_paren);
658 }
659 
660 static bool isIIFE(const UnwrappedLine &Line,
661                    const AdditionalKeywords &Keywords) {
662   // Look for the start of an immediately invoked anonymous function.
663   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
664   // This is commonly done in JavaScript to create a new, anonymous scope.
665   // Example: (function() { ... })()
666   if (Line.Tokens.size() < 3)
667     return false;
668   auto I = Line.Tokens.begin();
669   if (I->Tok->isNot(tok::l_paren))
670     return false;
671   ++I;
672   if (I->Tok->isNot(Keywords.kw_function))
673     return false;
674   ++I;
675   return I->Tok->is(tok::l_paren);
676 }
677 
678 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
679                                    const FormatToken &InitialToken) {
680   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
681     return Style.BraceWrapping.AfterNamespace;
682   if (InitialToken.is(tok::kw_class))
683     return Style.BraceWrapping.AfterClass;
684   if (InitialToken.is(tok::kw_union))
685     return Style.BraceWrapping.AfterUnion;
686   if (InitialToken.is(tok::kw_struct))
687     return Style.BraceWrapping.AfterStruct;
688   return false;
689 }
690 
691 void UnwrappedLineParser::parseChildBlock() {
692   FormatTok->BlockKind = BK_Block;
693   nextToken();
694   {
695     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
696                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
697     ScopedLineState LineState(*this);
698     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
699                                             /*MustBeDeclaration=*/false);
700     Line->Level += SkipIndent ? 0 : 1;
701     parseLevel(/*HasOpeningBrace=*/true);
702     flushComments(isOnNewLine(*FormatTok));
703     Line->Level -= SkipIndent ? 0 : 1;
704   }
705   nextToken();
706 }
707 
708 void UnwrappedLineParser::parsePPDirective() {
709   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
710   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
711 
712   nextToken();
713 
714   if (!FormatTok->Tok.getIdentifierInfo()) {
715     parsePPUnknown();
716     return;
717   }
718 
719   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
720   case tok::pp_define:
721     parsePPDefine();
722     return;
723   case tok::pp_if:
724     parsePPIf(/*IfDef=*/false);
725     break;
726   case tok::pp_ifdef:
727   case tok::pp_ifndef:
728     parsePPIf(/*IfDef=*/true);
729     break;
730   case tok::pp_else:
731     parsePPElse();
732     break;
733   case tok::pp_elif:
734     parsePPElIf();
735     break;
736   case tok::pp_endif:
737     parsePPEndIf();
738     break;
739   default:
740     parsePPUnknown();
741     break;
742   }
743 }
744 
745 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
746   size_t Line = CurrentLines->size();
747   if (CurrentLines == &PreprocessorDirectives)
748     Line += Lines.size();
749 
750   if (Unreachable ||
751       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
752     PPStack.push_back({PP_Unreachable, Line});
753   else
754     PPStack.push_back({PP_Conditional, Line});
755 }
756 
757 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
758   ++PPBranchLevel;
759   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
760   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
761     PPLevelBranchIndex.push_back(0);
762     PPLevelBranchCount.push_back(0);
763   }
764   PPChainBranchIndex.push(0);
765   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
766   conditionalCompilationCondition(Unreachable || Skip);
767 }
768 
769 void UnwrappedLineParser::conditionalCompilationAlternative() {
770   if (!PPStack.empty())
771     PPStack.pop_back();
772   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
773   if (!PPChainBranchIndex.empty())
774     ++PPChainBranchIndex.top();
775   conditionalCompilationCondition(
776       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
777       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
778 }
779 
780 void UnwrappedLineParser::conditionalCompilationEnd() {
781   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
782   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
783     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
784       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
785     }
786   }
787   // Guard against #endif's without #if.
788   if (PPBranchLevel > -1)
789     --PPBranchLevel;
790   if (!PPChainBranchIndex.empty())
791     PPChainBranchIndex.pop();
792   if (!PPStack.empty())
793     PPStack.pop_back();
794 }
795 
796 void UnwrappedLineParser::parsePPIf(bool IfDef) {
797   bool IfNDef = FormatTok->is(tok::pp_ifndef);
798   nextToken();
799   bool Unreachable = false;
800   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
801     Unreachable = true;
802   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
803     Unreachable = true;
804   conditionalCompilationStart(Unreachable);
805   FormatToken *IfCondition = FormatTok;
806   // If there's a #ifndef on the first line, and the only lines before it are
807   // comments, it could be an include guard.
808   bool MaybeIncludeGuard = IfNDef;
809   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
810     for (auto &Line : Lines) {
811       if (!Line.Tokens.front().Tok->is(tok::comment)) {
812         MaybeIncludeGuard = false;
813         IncludeGuard = IG_Rejected;
814         break;
815       }
816     }
817   --PPBranchLevel;
818   parsePPUnknown();
819   ++PPBranchLevel;
820   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
821     IncludeGuard = IG_IfNdefed;
822     IncludeGuardToken = IfCondition;
823   }
824 }
825 
826 void UnwrappedLineParser::parsePPElse() {
827   // If a potential include guard has an #else, it's not an include guard.
828   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
829     IncludeGuard = IG_Rejected;
830   conditionalCompilationAlternative();
831   if (PPBranchLevel > -1)
832     --PPBranchLevel;
833   parsePPUnknown();
834   ++PPBranchLevel;
835 }
836 
837 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
838 
839 void UnwrappedLineParser::parsePPEndIf() {
840   conditionalCompilationEnd();
841   parsePPUnknown();
842   // If the #endif of a potential include guard is the last thing in the file,
843   // then we found an include guard.
844   unsigned TokenPosition = Tokens->getPosition();
845   FormatToken *PeekNext = AllTokens[TokenPosition];
846   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
847       PeekNext->is(tok::eof) &&
848       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
849     IncludeGuard = IG_Found;
850 }
851 
852 void UnwrappedLineParser::parsePPDefine() {
853   nextToken();
854 
855   if (!FormatTok->Tok.getIdentifierInfo()) {
856     IncludeGuard = IG_Rejected;
857     IncludeGuardToken = nullptr;
858     parsePPUnknown();
859     return;
860   }
861 
862   if (IncludeGuard == IG_IfNdefed &&
863       IncludeGuardToken->TokenText == FormatTok->TokenText) {
864     IncludeGuard = IG_Defined;
865     IncludeGuardToken = nullptr;
866     for (auto &Line : Lines) {
867       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
868         IncludeGuard = IG_Rejected;
869         break;
870       }
871     }
872   }
873 
874   nextToken();
875   if (FormatTok->Tok.getKind() == tok::l_paren &&
876       FormatTok->WhitespaceRange.getBegin() ==
877           FormatTok->WhitespaceRange.getEnd()) {
878     parseParens();
879   }
880   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
881     Line->Level += PPBranchLevel + 1;
882   addUnwrappedLine();
883   ++Line->Level;
884 
885   // Errors during a preprocessor directive can only affect the layout of the
886   // preprocessor directive, and thus we ignore them. An alternative approach
887   // would be to use the same approach we use on the file level (no
888   // re-indentation if there was a structural error) within the macro
889   // definition.
890   parseFile();
891 }
892 
893 void UnwrappedLineParser::parsePPUnknown() {
894   do {
895     nextToken();
896   } while (!eof());
897   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
898     Line->Level += PPBranchLevel + 1;
899   addUnwrappedLine();
900 }
901 
902 // Here we blacklist certain tokens that are not usually the first token in an
903 // unwrapped line. This is used in attempt to distinguish macro calls without
904 // trailing semicolons from other constructs split to several lines.
905 static bool tokenCanStartNewLine(const clang::Token &Tok) {
906   // Semicolon can be a null-statement, l_square can be a start of a macro or
907   // a C++11 attribute, but this doesn't seem to be common.
908   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
909          Tok.isNot(tok::l_square) &&
910          // Tokens that can only be used as binary operators and a part of
911          // overloaded operator names.
912          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
913          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
914          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
915          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
916          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
917          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
918          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
919          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
920          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
921          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
922          Tok.isNot(tok::lesslessequal) &&
923          // Colon is used in labels, base class lists, initializer lists,
924          // range-based for loops, ternary operator, but should never be the
925          // first token in an unwrapped line.
926          Tok.isNot(tok::colon) &&
927          // 'noexcept' is a trailing annotation.
928          Tok.isNot(tok::kw_noexcept);
929 }
930 
931 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
932                           const FormatToken *FormatTok) {
933   // FIXME: This returns true for C/C++ keywords like 'struct'.
934   return FormatTok->is(tok::identifier) &&
935          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
936           !FormatTok->isOneOf(
937               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
938               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
939               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
940               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
941               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
942               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
943               Keywords.kw_from));
944 }
945 
946 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
947                                  const FormatToken *FormatTok) {
948   return FormatTok->Tok.isLiteral() ||
949          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
950          mustBeJSIdent(Keywords, FormatTok);
951 }
952 
953 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
954 // when encountered after a value (see mustBeJSIdentOrValue).
955 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
956                            const FormatToken *FormatTok) {
957   return FormatTok->isOneOf(
958       tok::kw_return, Keywords.kw_yield,
959       // conditionals
960       tok::kw_if, tok::kw_else,
961       // loops
962       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
963       // switch/case
964       tok::kw_switch, tok::kw_case,
965       // exceptions
966       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
967       // declaration
968       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
969       Keywords.kw_async, Keywords.kw_function,
970       // import/export
971       Keywords.kw_import, tok::kw_export);
972 }
973 
974 // readTokenWithJavaScriptASI reads the next token and terminates the current
975 // line if JavaScript Automatic Semicolon Insertion must
976 // happen between the current token and the next token.
977 //
978 // This method is conservative - it cannot cover all edge cases of JavaScript,
979 // but only aims to correctly handle certain well known cases. It *must not*
980 // return true in speculative cases.
981 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
982   FormatToken *Previous = FormatTok;
983   readToken();
984   FormatToken *Next = FormatTok;
985 
986   bool IsOnSameLine =
987       CommentsBeforeNextToken.empty()
988           ? Next->NewlinesBefore == 0
989           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
990   if (IsOnSameLine)
991     return;
992 
993   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
994   bool PreviousStartsTemplateExpr =
995       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
996   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
997     // If the line contains an '@' sign, the previous token might be an
998     // annotation, which can precede another identifier/value.
999     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1000                               [](UnwrappedLineNode &LineNode) {
1001                                 return LineNode.Tok->is(tok::at);
1002                               }) != Line->Tokens.end();
1003     if (HasAt)
1004       return;
1005   }
1006   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1007     return addUnwrappedLine();
1008   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1009   bool NextEndsTemplateExpr =
1010       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1011   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1012       (PreviousMustBeValue ||
1013        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1014                          tok::minusminus)))
1015     return addUnwrappedLine();
1016   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1017       isJSDeclOrStmt(Keywords, Next))
1018     return addUnwrappedLine();
1019 }
1020 
1021 void UnwrappedLineParser::parseStructuralElement() {
1022   assert(!FormatTok->is(tok::l_brace));
1023   if (Style.Language == FormatStyle::LK_TableGen &&
1024       FormatTok->is(tok::pp_include)) {
1025     nextToken();
1026     if (FormatTok->is(tok::string_literal))
1027       nextToken();
1028     addUnwrappedLine();
1029     return;
1030   }
1031   switch (FormatTok->Tok.getKind()) {
1032   case tok::kw_asm:
1033     nextToken();
1034     if (FormatTok->is(tok::l_brace)) {
1035       FormatTok->Type = TT_InlineASMBrace;
1036       nextToken();
1037       while (FormatTok && FormatTok->isNot(tok::eof)) {
1038         if (FormatTok->is(tok::r_brace)) {
1039           FormatTok->Type = TT_InlineASMBrace;
1040           nextToken();
1041           addUnwrappedLine();
1042           break;
1043         }
1044         FormatTok->Finalized = true;
1045         nextToken();
1046       }
1047     }
1048     break;
1049   case tok::kw_namespace:
1050     parseNamespace();
1051     return;
1052   case tok::kw_public:
1053   case tok::kw_protected:
1054   case tok::kw_private:
1055     if (Style.Language == FormatStyle::LK_Java ||
1056         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1057       nextToken();
1058     else
1059       parseAccessSpecifier();
1060     return;
1061   case tok::kw_if:
1062     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1063       // field/method declaration.
1064       break;
1065     parseIfThenElse();
1066     return;
1067   case tok::kw_for:
1068   case tok::kw_while:
1069     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1070       // field/method declaration.
1071       break;
1072     parseForOrWhileLoop();
1073     return;
1074   case tok::kw_do:
1075     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1076       // field/method declaration.
1077       break;
1078     parseDoWhile();
1079     return;
1080   case tok::kw_switch:
1081     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1082       // 'switch: string' field declaration.
1083       break;
1084     parseSwitch();
1085     return;
1086   case tok::kw_default:
1087     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1088       // 'default: string' field declaration.
1089       break;
1090     nextToken();
1091     if (FormatTok->is(tok::colon)) {
1092       parseLabel();
1093       return;
1094     }
1095     // e.g. "default void f() {}" in a Java interface.
1096     break;
1097   case tok::kw_case:
1098     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1099       // 'case: string' field declaration.
1100       break;
1101     parseCaseLabel();
1102     return;
1103   case tok::kw_try:
1104   case tok::kw___try:
1105     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1106       // field/method declaration.
1107       break;
1108     parseTryCatch();
1109     return;
1110   case tok::kw_extern:
1111     nextToken();
1112     if (FormatTok->Tok.is(tok::string_literal)) {
1113       nextToken();
1114       if (FormatTok->Tok.is(tok::l_brace)) {
1115         if (Style.BraceWrapping.AfterExternBlock) {
1116           addUnwrappedLine();
1117           parseBlock(/*MustBeDeclaration=*/true);
1118         } else {
1119           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1120         }
1121         addUnwrappedLine();
1122         return;
1123       }
1124     }
1125     break;
1126   case tok::kw_export:
1127     if (Style.Language == FormatStyle::LK_JavaScript) {
1128       parseJavaScriptEs6ImportExport();
1129       return;
1130     }
1131     if (!Style.isCpp())
1132       break;
1133     // Handle C++ "(inline|export) namespace".
1134     LLVM_FALLTHROUGH;
1135   case tok::kw_inline:
1136     nextToken();
1137     if (FormatTok->Tok.is(tok::kw_namespace)) {
1138       parseNamespace();
1139       return;
1140     }
1141     break;
1142   case tok::identifier:
1143     if (FormatTok->is(TT_ForEachMacro)) {
1144       parseForOrWhileLoop();
1145       return;
1146     }
1147     if (FormatTok->is(TT_MacroBlockBegin)) {
1148       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1149                  /*MunchSemi=*/false);
1150       return;
1151     }
1152     if (FormatTok->is(Keywords.kw_import)) {
1153       if (Style.Language == FormatStyle::LK_JavaScript) {
1154         parseJavaScriptEs6ImportExport();
1155         return;
1156       }
1157       if (Style.Language == FormatStyle::LK_Proto) {
1158         nextToken();
1159         if (FormatTok->is(tok::kw_public))
1160           nextToken();
1161         if (!FormatTok->is(tok::string_literal))
1162           return;
1163         nextToken();
1164         if (FormatTok->is(tok::semi))
1165           nextToken();
1166         addUnwrappedLine();
1167         return;
1168       }
1169     }
1170     if (Style.isCpp() &&
1171         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1172                            Keywords.kw_slots, Keywords.kw_qslots)) {
1173       nextToken();
1174       if (FormatTok->is(tok::colon)) {
1175         nextToken();
1176         addUnwrappedLine();
1177         return;
1178       }
1179     }
1180     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1181       parseStatementMacro();
1182       return;
1183     }
1184     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1185       parseNamespace();
1186       return;
1187     }
1188     // In all other cases, parse the declaration.
1189     break;
1190   default:
1191     break;
1192   }
1193   do {
1194     const FormatToken *Previous = FormatTok->Previous;
1195     switch (FormatTok->Tok.getKind()) {
1196     case tok::at:
1197       nextToken();
1198       if (FormatTok->Tok.is(tok::l_brace)) {
1199         nextToken();
1200         parseBracedList();
1201         break;
1202       } else if (Style.Language == FormatStyle::LK_Java &&
1203                  FormatTok->is(Keywords.kw_interface)) {
1204         nextToken();
1205         break;
1206       }
1207       switch (FormatTok->Tok.getObjCKeywordID()) {
1208       case tok::objc_public:
1209       case tok::objc_protected:
1210       case tok::objc_package:
1211       case tok::objc_private:
1212         return parseAccessSpecifier();
1213       case tok::objc_interface:
1214       case tok::objc_implementation:
1215         return parseObjCInterfaceOrImplementation();
1216       case tok::objc_protocol:
1217         if (parseObjCProtocol())
1218           return;
1219         break;
1220       case tok::objc_end:
1221         return; // Handled by the caller.
1222       case tok::objc_optional:
1223       case tok::objc_required:
1224         nextToken();
1225         addUnwrappedLine();
1226         return;
1227       case tok::objc_autoreleasepool:
1228         nextToken();
1229         if (FormatTok->Tok.is(tok::l_brace)) {
1230           if (Style.BraceWrapping.AfterControlStatement ==
1231               FormatStyle::BWACS_Always)
1232             addUnwrappedLine();
1233           parseBlock(/*MustBeDeclaration=*/false);
1234         }
1235         addUnwrappedLine();
1236         return;
1237       case tok::objc_synchronized:
1238         nextToken();
1239         if (FormatTok->Tok.is(tok::l_paren))
1240           // Skip synchronization object
1241           parseParens();
1242         if (FormatTok->Tok.is(tok::l_brace)) {
1243           if (Style.BraceWrapping.AfterControlStatement ==
1244               FormatStyle::BWACS_Always)
1245             addUnwrappedLine();
1246           parseBlock(/*MustBeDeclaration=*/false);
1247         }
1248         addUnwrappedLine();
1249         return;
1250       case tok::objc_try:
1251         // This branch isn't strictly necessary (the kw_try case below would
1252         // do this too after the tok::at is parsed above).  But be explicit.
1253         parseTryCatch();
1254         return;
1255       default:
1256         break;
1257       }
1258       break;
1259     case tok::kw_enum:
1260       // Ignore if this is part of "template <enum ...".
1261       if (Previous && Previous->is(tok::less)) {
1262         nextToken();
1263         break;
1264       }
1265 
1266       // parseEnum falls through and does not yet add an unwrapped line as an
1267       // enum definition can start a structural element.
1268       if (!parseEnum())
1269         break;
1270       // This only applies for C++.
1271       if (!Style.isCpp()) {
1272         addUnwrappedLine();
1273         return;
1274       }
1275       break;
1276     case tok::kw_typedef:
1277       nextToken();
1278       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1279                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1280                              Keywords.kw_CF_CLOSED_ENUM,
1281                              Keywords.kw_NS_CLOSED_ENUM))
1282         parseEnum();
1283       break;
1284     case tok::kw_struct:
1285     case tok::kw_union:
1286     case tok::kw_class:
1287       // parseRecord falls through and does not yet add an unwrapped line as a
1288       // record declaration or definition can start a structural element.
1289       parseRecord();
1290       // This does not apply for Java, JavaScript and C#.
1291       if (Style.Language == FormatStyle::LK_Java ||
1292           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1293         if (FormatTok->is(tok::semi))
1294           nextToken();
1295         addUnwrappedLine();
1296         return;
1297       }
1298       break;
1299     case tok::period:
1300       nextToken();
1301       // In Java, classes have an implicit static member "class".
1302       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1303           FormatTok->is(tok::kw_class))
1304         nextToken();
1305       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1306           FormatTok->Tok.getIdentifierInfo())
1307         // JavaScript only has pseudo keywords, all keywords are allowed to
1308         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1309         nextToken();
1310       break;
1311     case tok::semi:
1312       nextToken();
1313       addUnwrappedLine();
1314       return;
1315     case tok::r_brace:
1316       addUnwrappedLine();
1317       return;
1318     case tok::l_paren:
1319       parseParens();
1320       break;
1321     case tok::kw_operator:
1322       nextToken();
1323       if (FormatTok->isBinaryOperator())
1324         nextToken();
1325       break;
1326     case tok::caret:
1327       nextToken();
1328       if (FormatTok->Tok.isAnyIdentifier() ||
1329           FormatTok->isSimpleTypeSpecifier())
1330         nextToken();
1331       if (FormatTok->is(tok::l_paren))
1332         parseParens();
1333       if (FormatTok->is(tok::l_brace))
1334         parseChildBlock();
1335       break;
1336     case tok::l_brace:
1337       if (!tryToParseBracedList()) {
1338         // A block outside of parentheses must be the last part of a
1339         // structural element.
1340         // FIXME: Figure out cases where this is not true, and add projections
1341         // for them (the one we know is missing are lambdas).
1342         if (Style.BraceWrapping.AfterFunction)
1343           addUnwrappedLine();
1344         FormatTok->Type = TT_FunctionLBrace;
1345         parseBlock(/*MustBeDeclaration=*/false);
1346         addUnwrappedLine();
1347         return;
1348       }
1349       // Otherwise this was a braced init list, and the structural
1350       // element continues.
1351       break;
1352     case tok::kw_try:
1353       if (Style.Language == FormatStyle::LK_JavaScript &&
1354           Line->MustBeDeclaration) {
1355         // field/method declaration.
1356         nextToken();
1357         break;
1358       }
1359       // We arrive here when parsing function-try blocks.
1360       if (Style.BraceWrapping.AfterFunction)
1361         addUnwrappedLine();
1362       parseTryCatch();
1363       return;
1364     case tok::identifier: {
1365       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1366           Line->MustBeDeclaration) {
1367         addUnwrappedLine();
1368         parseCSharpGenericTypeConstraint();
1369         break;
1370       }
1371       if (FormatTok->is(TT_MacroBlockEnd)) {
1372         addUnwrappedLine();
1373         return;
1374       }
1375 
1376       // Function declarations (as opposed to function expressions) are parsed
1377       // on their own unwrapped line by continuing this loop. Function
1378       // expressions (functions that are not on their own line) must not create
1379       // a new unwrapped line, so they are special cased below.
1380       size_t TokenCount = Line->Tokens.size();
1381       if (Style.Language == FormatStyle::LK_JavaScript &&
1382           FormatTok->is(Keywords.kw_function) &&
1383           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1384                                                      Keywords.kw_async)))) {
1385         tryToParseJSFunction();
1386         break;
1387       }
1388       if ((Style.Language == FormatStyle::LK_JavaScript ||
1389            Style.Language == FormatStyle::LK_Java) &&
1390           FormatTok->is(Keywords.kw_interface)) {
1391         if (Style.Language == FormatStyle::LK_JavaScript) {
1392           // In JavaScript/TypeScript, "interface" can be used as a standalone
1393           // identifier, e.g. in `var interface = 1;`. If "interface" is
1394           // followed by another identifier, it is very like to be an actual
1395           // interface declaration.
1396           unsigned StoredPosition = Tokens->getPosition();
1397           FormatToken *Next = Tokens->getNextToken();
1398           FormatTok = Tokens->setPosition(StoredPosition);
1399           if (Next && !mustBeJSIdent(Keywords, Next)) {
1400             nextToken();
1401             break;
1402           }
1403         }
1404         parseRecord();
1405         addUnwrappedLine();
1406         return;
1407       }
1408 
1409       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1410         parseStatementMacro();
1411         return;
1412       }
1413 
1414       // See if the following token should start a new unwrapped line.
1415       StringRef Text = FormatTok->TokenText;
1416       nextToken();
1417 
1418       // JS doesn't have macros, and within classes colons indicate fields, not
1419       // labels.
1420       if (Style.Language == FormatStyle::LK_JavaScript)
1421         break;
1422 
1423       TokenCount = Line->Tokens.size();
1424       if (TokenCount == 1 ||
1425           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1426         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1427           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1428           parseLabel(!Style.IndentGotoLabels);
1429           return;
1430         }
1431         // Recognize function-like macro usages without trailing semicolon as
1432         // well as free-standing macros like Q_OBJECT.
1433         bool FunctionLike = FormatTok->is(tok::l_paren);
1434         if (FunctionLike)
1435           parseParens();
1436 
1437         bool FollowedByNewline =
1438             CommentsBeforeNextToken.empty()
1439                 ? FormatTok->NewlinesBefore > 0
1440                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1441 
1442         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1443             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1444           addUnwrappedLine();
1445           return;
1446         }
1447       }
1448       break;
1449     }
1450     case tok::equal:
1451       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1452       // TT_JsFatArrow. The always start an expression or a child block if
1453       // followed by a curly.
1454       if (FormatTok->is(TT_JsFatArrow)) {
1455         nextToken();
1456         if (FormatTok->is(tok::l_brace))
1457           parseChildBlock();
1458         break;
1459       }
1460 
1461       nextToken();
1462       if (FormatTok->Tok.is(tok::l_brace)) {
1463         nextToken();
1464         parseBracedList();
1465       } else if (Style.Language == FormatStyle::LK_Proto &&
1466                  FormatTok->Tok.is(tok::less)) {
1467         nextToken();
1468         parseBracedList(/*ContinueOnSemicolons=*/false,
1469                         /*ClosingBraceKind=*/tok::greater);
1470       }
1471       break;
1472     case tok::l_square:
1473       parseSquare();
1474       break;
1475     case tok::kw_new:
1476       parseNew();
1477       break;
1478     default:
1479       nextToken();
1480       break;
1481     }
1482   } while (!eof());
1483 }
1484 
1485 bool UnwrappedLineParser::tryToParseLambda() {
1486   if (!Style.isCpp()) {
1487     nextToken();
1488     return false;
1489   }
1490   assert(FormatTok->is(tok::l_square));
1491   FormatToken &LSquare = *FormatTok;
1492   if (!tryToParseLambdaIntroducer())
1493     return false;
1494 
1495   bool SeenArrow = false;
1496 
1497   while (FormatTok->isNot(tok::l_brace)) {
1498     if (FormatTok->isSimpleTypeSpecifier()) {
1499       nextToken();
1500       continue;
1501     }
1502     switch (FormatTok->Tok.getKind()) {
1503     case tok::l_brace:
1504       break;
1505     case tok::l_paren:
1506       parseParens();
1507       break;
1508     case tok::amp:
1509     case tok::star:
1510     case tok::kw_const:
1511     case tok::comma:
1512     case tok::less:
1513     case tok::greater:
1514     case tok::identifier:
1515     case tok::numeric_constant:
1516     case tok::coloncolon:
1517     case tok::kw_class:
1518     case tok::kw_mutable:
1519     case tok::kw_noexcept:
1520     case tok::kw_template:
1521     case tok::kw_typename:
1522       nextToken();
1523       break;
1524     // Specialization of a template with an integer parameter can contain
1525     // arithmetic, logical, comparison and ternary operators.
1526     //
1527     // FIXME: This also accepts sequences of operators that are not in the scope
1528     // of a template argument list.
1529     //
1530     // In a C++ lambda a template type can only occur after an arrow. We use
1531     // this as an heuristic to distinguish between Objective-C expressions
1532     // followed by an `a->b` expression, such as:
1533     // ([obj func:arg] + a->b)
1534     // Otherwise the code below would parse as a lambda.
1535     //
1536     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1537     // explicit template lists: []<bool b = true && false>(U &&u){}
1538     case tok::plus:
1539     case tok::minus:
1540     case tok::exclaim:
1541     case tok::tilde:
1542     case tok::slash:
1543     case tok::percent:
1544     case tok::lessless:
1545     case tok::pipe:
1546     case tok::pipepipe:
1547     case tok::ampamp:
1548     case tok::caret:
1549     case tok::equalequal:
1550     case tok::exclaimequal:
1551     case tok::greaterequal:
1552     case tok::lessequal:
1553     case tok::question:
1554     case tok::colon:
1555     case tok::kw_true:
1556     case tok::kw_false:
1557       if (SeenArrow) {
1558         nextToken();
1559         break;
1560       }
1561       return true;
1562     case tok::arrow:
1563       // This might or might not actually be a lambda arrow (this could be an
1564       // ObjC method invocation followed by a dereferencing arrow). We might
1565       // reset this back to TT_Unknown in TokenAnnotator.
1566       FormatTok->Type = TT_LambdaArrow;
1567       SeenArrow = true;
1568       nextToken();
1569       break;
1570     default:
1571       return true;
1572     }
1573   }
1574   FormatTok->Type = TT_LambdaLBrace;
1575   LSquare.Type = TT_LambdaLSquare;
1576   parseChildBlock();
1577   return true;
1578 }
1579 
1580 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1581   const FormatToken *Previous = FormatTok->Previous;
1582   if (Previous &&
1583       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1584                          tok::kw_delete, tok::l_square) ||
1585        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1586        Previous->isSimpleTypeSpecifier())) {
1587     nextToken();
1588     return false;
1589   }
1590   nextToken();
1591   if (FormatTok->is(tok::l_square)) {
1592     return false;
1593   }
1594   parseSquare(/*LambdaIntroducer=*/true);
1595   return true;
1596 }
1597 
1598 void UnwrappedLineParser::tryToParseJSFunction() {
1599   assert(FormatTok->is(Keywords.kw_function) ||
1600          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1601   if (FormatTok->is(Keywords.kw_async))
1602     nextToken();
1603   // Consume "function".
1604   nextToken();
1605 
1606   // Consume * (generator function). Treat it like C++'s overloaded operators.
1607   if (FormatTok->is(tok::star)) {
1608     FormatTok->Type = TT_OverloadedOperator;
1609     nextToken();
1610   }
1611 
1612   // Consume function name.
1613   if (FormatTok->is(tok::identifier))
1614     nextToken();
1615 
1616   if (FormatTok->isNot(tok::l_paren))
1617     return;
1618 
1619   // Parse formal parameter list.
1620   parseParens();
1621 
1622   if (FormatTok->is(tok::colon)) {
1623     // Parse a type definition.
1624     nextToken();
1625 
1626     // Eat the type declaration. For braced inline object types, balance braces,
1627     // otherwise just parse until finding an l_brace for the function body.
1628     if (FormatTok->is(tok::l_brace))
1629       tryToParseBracedList();
1630     else
1631       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1632         nextToken();
1633   }
1634 
1635   if (FormatTok->is(tok::semi))
1636     return;
1637 
1638   parseChildBlock();
1639 }
1640 
1641 bool UnwrappedLineParser::tryToParseBracedList() {
1642   if (FormatTok->BlockKind == BK_Unknown)
1643     calculateBraceTypes();
1644   assert(FormatTok->BlockKind != BK_Unknown);
1645   if (FormatTok->BlockKind == BK_Block)
1646     return false;
1647   nextToken();
1648   parseBracedList();
1649   return true;
1650 }
1651 
1652 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1653                                           tok::TokenKind ClosingBraceKind) {
1654   bool HasError = false;
1655 
1656   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1657   // replace this by using parseAssigmentExpression() inside.
1658   do {
1659     if (Style.isCSharp()) {
1660       if (FormatTok->is(TT_JsFatArrow)) {
1661         nextToken();
1662         // Fat arrows can be followed by simple expressions or by child blocks
1663         // in curly braces.
1664         if (FormatTok->is(tok::l_brace)) {
1665           parseChildBlock();
1666           continue;
1667         }
1668       }
1669     }
1670     if (Style.Language == FormatStyle::LK_JavaScript) {
1671       if (FormatTok->is(Keywords.kw_function) ||
1672           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1673         tryToParseJSFunction();
1674         continue;
1675       }
1676       if (FormatTok->is(TT_JsFatArrow)) {
1677         nextToken();
1678         // Fat arrows can be followed by simple expressions or by child blocks
1679         // in curly braces.
1680         if (FormatTok->is(tok::l_brace)) {
1681           parseChildBlock();
1682           continue;
1683         }
1684       }
1685       if (FormatTok->is(tok::l_brace)) {
1686         // Could be a method inside of a braced list `{a() { return 1; }}`.
1687         if (tryToParseBracedList())
1688           continue;
1689         parseChildBlock();
1690       }
1691     }
1692     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1693       nextToken();
1694       return !HasError;
1695     }
1696     switch (FormatTok->Tok.getKind()) {
1697     case tok::caret:
1698       nextToken();
1699       if (FormatTok->is(tok::l_brace)) {
1700         parseChildBlock();
1701       }
1702       break;
1703     case tok::l_square:
1704       if (Style.isCSharp())
1705         parseSquare();
1706       else
1707         tryToParseLambda();
1708       break;
1709     case tok::l_paren:
1710       parseParens();
1711       // JavaScript can just have free standing methods and getters/setters in
1712       // object literals. Detect them by a "{" following ")".
1713       if (Style.Language == FormatStyle::LK_JavaScript) {
1714         if (FormatTok->is(tok::l_brace))
1715           parseChildBlock();
1716         break;
1717       }
1718       break;
1719     case tok::l_brace:
1720       // Assume there are no blocks inside a braced init list apart
1721       // from the ones we explicitly parse out (like lambdas).
1722       FormatTok->BlockKind = BK_BracedInit;
1723       nextToken();
1724       parseBracedList();
1725       break;
1726     case tok::less:
1727       if (Style.Language == FormatStyle::LK_Proto) {
1728         nextToken();
1729         parseBracedList(/*ContinueOnSemicolons=*/false,
1730                         /*ClosingBraceKind=*/tok::greater);
1731       } else {
1732         nextToken();
1733       }
1734       break;
1735     case tok::semi:
1736       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1737       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1738       // used for error recovery if we have otherwise determined that this is
1739       // a braced list.
1740       if (Style.Language == FormatStyle::LK_JavaScript) {
1741         nextToken();
1742         break;
1743       }
1744       HasError = true;
1745       if (!ContinueOnSemicolons)
1746         return !HasError;
1747       nextToken();
1748       break;
1749     case tok::comma:
1750       nextToken();
1751       break;
1752     default:
1753       nextToken();
1754       break;
1755     }
1756   } while (!eof());
1757   return false;
1758 }
1759 
1760 void UnwrappedLineParser::parseParens() {
1761   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1762   nextToken();
1763   do {
1764     switch (FormatTok->Tok.getKind()) {
1765     case tok::l_paren:
1766       parseParens();
1767       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1768         parseChildBlock();
1769       break;
1770     case tok::r_paren:
1771       nextToken();
1772       return;
1773     case tok::r_brace:
1774       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1775       return;
1776     case tok::l_square:
1777       tryToParseLambda();
1778       break;
1779     case tok::l_brace:
1780       if (!tryToParseBracedList())
1781         parseChildBlock();
1782       break;
1783     case tok::at:
1784       nextToken();
1785       if (FormatTok->Tok.is(tok::l_brace)) {
1786         nextToken();
1787         parseBracedList();
1788       }
1789       break;
1790     case tok::kw_class:
1791       if (Style.Language == FormatStyle::LK_JavaScript)
1792         parseRecord(/*ParseAsExpr=*/true);
1793       else
1794         nextToken();
1795       break;
1796     case tok::identifier:
1797       if (Style.Language == FormatStyle::LK_JavaScript &&
1798           (FormatTok->is(Keywords.kw_function) ||
1799            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1800         tryToParseJSFunction();
1801       else
1802         nextToken();
1803       break;
1804     default:
1805       nextToken();
1806       break;
1807     }
1808   } while (!eof());
1809 }
1810 
1811 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1812   if (!LambdaIntroducer) {
1813     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1814     if (tryToParseLambda())
1815       return;
1816   }
1817   do {
1818     switch (FormatTok->Tok.getKind()) {
1819     case tok::l_paren:
1820       parseParens();
1821       break;
1822     case tok::r_square:
1823       nextToken();
1824       return;
1825     case tok::r_brace:
1826       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1827       return;
1828     case tok::l_square:
1829       parseSquare();
1830       break;
1831     case tok::l_brace: {
1832       if (!tryToParseBracedList())
1833         parseChildBlock();
1834       break;
1835     }
1836     case tok::at:
1837       nextToken();
1838       if (FormatTok->Tok.is(tok::l_brace)) {
1839         nextToken();
1840         parseBracedList();
1841       }
1842       break;
1843     default:
1844       nextToken();
1845       break;
1846     }
1847   } while (!eof());
1848 }
1849 
1850 void UnwrappedLineParser::parseIfThenElse() {
1851   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1852   nextToken();
1853   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1854     nextToken();
1855   if (FormatTok->Tok.is(tok::l_paren))
1856     parseParens();
1857   bool NeedsUnwrappedLine = false;
1858   if (FormatTok->Tok.is(tok::l_brace)) {
1859     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1860     parseBlock(/*MustBeDeclaration=*/false);
1861     if (Style.BraceWrapping.BeforeElse)
1862       addUnwrappedLine();
1863     else
1864       NeedsUnwrappedLine = true;
1865   } else {
1866     addUnwrappedLine();
1867     ++Line->Level;
1868     parseStructuralElement();
1869     --Line->Level;
1870   }
1871   if (FormatTok->Tok.is(tok::kw_else)) {
1872     nextToken();
1873     if (FormatTok->Tok.is(tok::l_brace)) {
1874       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1875       parseBlock(/*MustBeDeclaration=*/false);
1876       addUnwrappedLine();
1877     } else if (FormatTok->Tok.is(tok::kw_if)) {
1878       parseIfThenElse();
1879     } else {
1880       addUnwrappedLine();
1881       ++Line->Level;
1882       parseStructuralElement();
1883       if (FormatTok->is(tok::eof))
1884         addUnwrappedLine();
1885       --Line->Level;
1886     }
1887   } else if (NeedsUnwrappedLine) {
1888     addUnwrappedLine();
1889   }
1890 }
1891 
1892 void UnwrappedLineParser::parseTryCatch() {
1893   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1894   nextToken();
1895   bool NeedsUnwrappedLine = false;
1896   if (FormatTok->is(tok::colon)) {
1897     // We are in a function try block, what comes is an initializer list.
1898     nextToken();
1899 
1900     // In case identifiers were removed by clang-tidy, what might follow is
1901     // multiple commas in sequence - before the first identifier.
1902     while (FormatTok->is(tok::comma))
1903       nextToken();
1904 
1905     while (FormatTok->is(tok::identifier)) {
1906       nextToken();
1907       if (FormatTok->is(tok::l_paren))
1908         parseParens();
1909 
1910       // In case identifiers were removed by clang-tidy, what might follow is
1911       // multiple commas in sequence - after the first identifier.
1912       while (FormatTok->is(tok::comma))
1913         nextToken();
1914     }
1915   }
1916   // Parse try with resource.
1917   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1918     parseParens();
1919   }
1920   if (FormatTok->is(tok::l_brace)) {
1921     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1922     parseBlock(/*MustBeDeclaration=*/false);
1923     if (Style.BraceWrapping.BeforeCatch) {
1924       addUnwrappedLine();
1925     } else {
1926       NeedsUnwrappedLine = true;
1927     }
1928   } else if (!FormatTok->is(tok::kw_catch)) {
1929     // The C++ standard requires a compound-statement after a try.
1930     // If there's none, we try to assume there's a structuralElement
1931     // and try to continue.
1932     addUnwrappedLine();
1933     ++Line->Level;
1934     parseStructuralElement();
1935     --Line->Level;
1936   }
1937   while (1) {
1938     if (FormatTok->is(tok::at))
1939       nextToken();
1940     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1941                              tok::kw___finally) ||
1942           ((Style.Language == FormatStyle::LK_Java ||
1943             Style.Language == FormatStyle::LK_JavaScript) &&
1944            FormatTok->is(Keywords.kw_finally)) ||
1945           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1946            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1947       break;
1948     nextToken();
1949     while (FormatTok->isNot(tok::l_brace)) {
1950       if (FormatTok->is(tok::l_paren)) {
1951         parseParens();
1952         continue;
1953       }
1954       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1955         return;
1956       nextToken();
1957     }
1958     NeedsUnwrappedLine = false;
1959     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1960     parseBlock(/*MustBeDeclaration=*/false);
1961     if (Style.BraceWrapping.BeforeCatch)
1962       addUnwrappedLine();
1963     else
1964       NeedsUnwrappedLine = true;
1965   }
1966   if (NeedsUnwrappedLine)
1967     addUnwrappedLine();
1968 }
1969 
1970 void UnwrappedLineParser::parseNamespace() {
1971   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
1972          "'namespace' expected");
1973 
1974   const FormatToken &InitialToken = *FormatTok;
1975   nextToken();
1976   if (InitialToken.is(TT_NamespaceMacro)) {
1977     parseParens();
1978   } else {
1979     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
1980                               tok::l_square)) {
1981       if (FormatTok->is(tok::l_square))
1982         parseSquare();
1983       else
1984         nextToken();
1985     }
1986   }
1987   if (FormatTok->Tok.is(tok::l_brace)) {
1988     if (ShouldBreakBeforeBrace(Style, InitialToken))
1989       addUnwrappedLine();
1990 
1991     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1992                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1993                      DeclarationScopeStack.size() > 1);
1994     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1995     // Munch the semicolon after a namespace. This is more common than one would
1996     // think. Putting the semicolon into its own line is very ugly.
1997     if (FormatTok->Tok.is(tok::semi))
1998       nextToken();
1999     addUnwrappedLine();
2000   }
2001   // FIXME: Add error handling.
2002 }
2003 
2004 void UnwrappedLineParser::parseNew() {
2005   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2006   nextToken();
2007 
2008   if (Style.isCSharp()) {
2009     do {
2010       if (FormatTok->is(tok::l_brace))
2011         parseBracedList();
2012 
2013       if (FormatTok->isOneOf(tok::semi, tok::comma))
2014         return;
2015 
2016       nextToken();
2017     } while (!eof());
2018   }
2019 
2020   if (Style.Language != FormatStyle::LK_Java)
2021     return;
2022 
2023   // In Java, we can parse everything up to the parens, which aren't optional.
2024   do {
2025     // There should not be a ;, { or } before the new's open paren.
2026     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2027       return;
2028 
2029     // Consume the parens.
2030     if (FormatTok->is(tok::l_paren)) {
2031       parseParens();
2032 
2033       // If there is a class body of an anonymous class, consume that as child.
2034       if (FormatTok->is(tok::l_brace))
2035         parseChildBlock();
2036       return;
2037     }
2038     nextToken();
2039   } while (!eof());
2040 }
2041 
2042 void UnwrappedLineParser::parseForOrWhileLoop() {
2043   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2044          "'for', 'while' or foreach macro expected");
2045   nextToken();
2046   // JS' for await ( ...
2047   if (Style.Language == FormatStyle::LK_JavaScript &&
2048       FormatTok->is(Keywords.kw_await))
2049     nextToken();
2050   if (FormatTok->Tok.is(tok::l_paren))
2051     parseParens();
2052   if (FormatTok->Tok.is(tok::l_brace)) {
2053     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2054     parseBlock(/*MustBeDeclaration=*/false);
2055     addUnwrappedLine();
2056   } else {
2057     addUnwrappedLine();
2058     ++Line->Level;
2059     parseStructuralElement();
2060     --Line->Level;
2061   }
2062 }
2063 
2064 void UnwrappedLineParser::parseDoWhile() {
2065   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2066   nextToken();
2067   if (FormatTok->Tok.is(tok::l_brace)) {
2068     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2069     parseBlock(/*MustBeDeclaration=*/false);
2070     if (Style.BraceWrapping.IndentBraces)
2071       addUnwrappedLine();
2072   } else {
2073     addUnwrappedLine();
2074     ++Line->Level;
2075     parseStructuralElement();
2076     --Line->Level;
2077   }
2078 
2079   // FIXME: Add error handling.
2080   if (!FormatTok->Tok.is(tok::kw_while)) {
2081     addUnwrappedLine();
2082     return;
2083   }
2084 
2085   nextToken();
2086   parseStructuralElement();
2087 }
2088 
2089 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2090   nextToken();
2091   unsigned OldLineLevel = Line->Level;
2092   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2093     --Line->Level;
2094   if (LeftAlignLabel)
2095     Line->Level = 0;
2096   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2097       FormatTok->Tok.is(tok::l_brace)) {
2098     CompoundStatementIndenter Indenter(this, Line->Level,
2099                                        Style.BraceWrapping.AfterCaseLabel,
2100                                        Style.BraceWrapping.IndentBraces);
2101     parseBlock(/*MustBeDeclaration=*/false);
2102     if (FormatTok->Tok.is(tok::kw_break)) {
2103       if (Style.BraceWrapping.AfterControlStatement ==
2104           FormatStyle::BWACS_Always)
2105         addUnwrappedLine();
2106       parseStructuralElement();
2107     }
2108     addUnwrappedLine();
2109   } else {
2110     if (FormatTok->is(tok::semi))
2111       nextToken();
2112     addUnwrappedLine();
2113   }
2114   Line->Level = OldLineLevel;
2115   if (FormatTok->isNot(tok::l_brace)) {
2116     parseStructuralElement();
2117     addUnwrappedLine();
2118   }
2119 }
2120 
2121 void UnwrappedLineParser::parseCaseLabel() {
2122   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2123   // FIXME: fix handling of complex expressions here.
2124   do {
2125     nextToken();
2126   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2127   parseLabel();
2128 }
2129 
2130 void UnwrappedLineParser::parseSwitch() {
2131   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2132   nextToken();
2133   if (FormatTok->Tok.is(tok::l_paren))
2134     parseParens();
2135   if (FormatTok->Tok.is(tok::l_brace)) {
2136     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2137     parseBlock(/*MustBeDeclaration=*/false);
2138     addUnwrappedLine();
2139   } else {
2140     addUnwrappedLine();
2141     ++Line->Level;
2142     parseStructuralElement();
2143     --Line->Level;
2144   }
2145 }
2146 
2147 void UnwrappedLineParser::parseAccessSpecifier() {
2148   nextToken();
2149   // Understand Qt's slots.
2150   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2151     nextToken();
2152   // Otherwise, we don't know what it is, and we'd better keep the next token.
2153   if (FormatTok->Tok.is(tok::colon))
2154     nextToken();
2155   addUnwrappedLine();
2156 }
2157 
2158 bool UnwrappedLineParser::parseEnum() {
2159   // Won't be 'enum' for NS_ENUMs.
2160   if (FormatTok->Tok.is(tok::kw_enum))
2161     nextToken();
2162 
2163   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2164   // declarations. An "enum" keyword followed by a colon would be a syntax
2165   // error and thus assume it is just an identifier.
2166   if (Style.Language == FormatStyle::LK_JavaScript &&
2167       FormatTok->isOneOf(tok::colon, tok::question))
2168     return false;
2169 
2170   // In protobuf, "enum" can be used as a field name.
2171   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2172     return false;
2173 
2174   // Eat up enum class ...
2175   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2176     nextToken();
2177 
2178   while (FormatTok->Tok.getIdentifierInfo() ||
2179          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2180                             tok::greater, tok::comma, tok::question)) {
2181     nextToken();
2182     // We can have macros or attributes in between 'enum' and the enum name.
2183     if (FormatTok->is(tok::l_paren))
2184       parseParens();
2185     if (FormatTok->is(tok::identifier)) {
2186       nextToken();
2187       // If there are two identifiers in a row, this is likely an elaborate
2188       // return type. In Java, this can be "implements", etc.
2189       if (Style.isCpp() && FormatTok->is(tok::identifier))
2190         return false;
2191     }
2192   }
2193 
2194   // Just a declaration or something is wrong.
2195   if (FormatTok->isNot(tok::l_brace))
2196     return true;
2197   FormatTok->BlockKind = BK_Block;
2198 
2199   if (Style.Language == FormatStyle::LK_Java) {
2200     // Java enums are different.
2201     parseJavaEnumBody();
2202     return true;
2203   }
2204   if (Style.Language == FormatStyle::LK_Proto) {
2205     parseBlock(/*MustBeDeclaration=*/true);
2206     return true;
2207   }
2208 
2209   // Parse enum body.
2210   nextToken();
2211   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2212   if (HasError) {
2213     if (FormatTok->is(tok::semi))
2214       nextToken();
2215     addUnwrappedLine();
2216   }
2217   return true;
2218 
2219   // There is no addUnwrappedLine() here so that we fall through to parsing a
2220   // structural element afterwards. Thus, in "enum A {} n, m;",
2221   // "} n, m;" will end up in one unwrapped line.
2222 }
2223 
2224 void UnwrappedLineParser::parseJavaEnumBody() {
2225   // Determine whether the enum is simple, i.e. does not have a semicolon or
2226   // constants with class bodies. Simple enums can be formatted like braced
2227   // lists, contracted to a single line, etc.
2228   unsigned StoredPosition = Tokens->getPosition();
2229   bool IsSimple = true;
2230   FormatToken *Tok = Tokens->getNextToken();
2231   while (Tok) {
2232     if (Tok->is(tok::r_brace))
2233       break;
2234     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2235       IsSimple = false;
2236       break;
2237     }
2238     // FIXME: This will also mark enums with braces in the arguments to enum
2239     // constants as "not simple". This is probably fine in practice, though.
2240     Tok = Tokens->getNextToken();
2241   }
2242   FormatTok = Tokens->setPosition(StoredPosition);
2243 
2244   if (IsSimple) {
2245     nextToken();
2246     parseBracedList();
2247     addUnwrappedLine();
2248     return;
2249   }
2250 
2251   // Parse the body of a more complex enum.
2252   // First add a line for everything up to the "{".
2253   nextToken();
2254   addUnwrappedLine();
2255   ++Line->Level;
2256 
2257   // Parse the enum constants.
2258   while (FormatTok) {
2259     if (FormatTok->is(tok::l_brace)) {
2260       // Parse the constant's class body.
2261       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2262                  /*MunchSemi=*/false);
2263     } else if (FormatTok->is(tok::l_paren)) {
2264       parseParens();
2265     } else if (FormatTok->is(tok::comma)) {
2266       nextToken();
2267       addUnwrappedLine();
2268     } else if (FormatTok->is(tok::semi)) {
2269       nextToken();
2270       addUnwrappedLine();
2271       break;
2272     } else if (FormatTok->is(tok::r_brace)) {
2273       addUnwrappedLine();
2274       break;
2275     } else {
2276       nextToken();
2277     }
2278   }
2279 
2280   // Parse the class body after the enum's ";" if any.
2281   parseLevel(/*HasOpeningBrace=*/true);
2282   nextToken();
2283   --Line->Level;
2284   addUnwrappedLine();
2285 }
2286 
2287 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2288   const FormatToken &InitialToken = *FormatTok;
2289   nextToken();
2290 
2291   // The actual identifier can be a nested name specifier, and in macros
2292   // it is often token-pasted.
2293   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2294                             tok::kw___attribute, tok::kw___declspec,
2295                             tok::kw_alignas) ||
2296          ((Style.Language == FormatStyle::LK_Java ||
2297            Style.Language == FormatStyle::LK_JavaScript) &&
2298           FormatTok->isOneOf(tok::period, tok::comma))) {
2299     if (Style.Language == FormatStyle::LK_JavaScript &&
2300         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2301       // JavaScript/TypeScript supports inline object types in
2302       // extends/implements positions:
2303       //     class Foo implements {bar: number} { }
2304       nextToken();
2305       if (FormatTok->is(tok::l_brace)) {
2306         tryToParseBracedList();
2307         continue;
2308       }
2309     }
2310     bool IsNonMacroIdentifier =
2311         FormatTok->is(tok::identifier) &&
2312         FormatTok->TokenText != FormatTok->TokenText.upper();
2313     nextToken();
2314     // We can have macros or attributes in between 'class' and the class name.
2315     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2316       parseParens();
2317   }
2318 
2319   // Note that parsing away template declarations here leads to incorrectly
2320   // accepting function declarations as record declarations.
2321   // In general, we cannot solve this problem. Consider:
2322   // class A<int> B() {}
2323   // which can be a function definition or a class definition when B() is a
2324   // macro. If we find enough real-world cases where this is a problem, we
2325   // can parse for the 'template' keyword in the beginning of the statement,
2326   // and thus rule out the record production in case there is no template
2327   // (this would still leave us with an ambiguity between template function
2328   // and class declarations).
2329   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2330     while (!eof()) {
2331       if (FormatTok->is(tok::l_brace)) {
2332         calculateBraceTypes(/*ExpectClassBody=*/true);
2333         if (!tryToParseBracedList())
2334           break;
2335       }
2336       if (FormatTok->Tok.is(tok::semi))
2337         return;
2338       nextToken();
2339     }
2340   }
2341   if (FormatTok->Tok.is(tok::l_brace)) {
2342     if (ParseAsExpr) {
2343       parseChildBlock();
2344     } else {
2345       if (ShouldBreakBeforeBrace(Style, InitialToken))
2346         addUnwrappedLine();
2347 
2348       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2349                  /*MunchSemi=*/false);
2350     }
2351   }
2352   // There is no addUnwrappedLine() here so that we fall through to parsing a
2353   // structural element afterwards. Thus, in "class A {} n, m;",
2354   // "} n, m;" will end up in one unwrapped line.
2355 }
2356 
2357 void UnwrappedLineParser::parseObjCMethod() {
2358   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2359          "'(' or identifier expected.");
2360   do {
2361     if (FormatTok->Tok.is(tok::semi)) {
2362       nextToken();
2363       addUnwrappedLine();
2364       return;
2365     } else if (FormatTok->Tok.is(tok::l_brace)) {
2366       if (Style.BraceWrapping.AfterFunction)
2367         addUnwrappedLine();
2368       parseBlock(/*MustBeDeclaration=*/false);
2369       addUnwrappedLine();
2370       return;
2371     } else {
2372       nextToken();
2373     }
2374   } while (!eof());
2375 }
2376 
2377 void UnwrappedLineParser::parseObjCProtocolList() {
2378   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2379   do {
2380     nextToken();
2381     // Early exit in case someone forgot a close angle.
2382     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2383         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2384       return;
2385   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2386   nextToken(); // Skip '>'.
2387 }
2388 
2389 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2390   do {
2391     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2392       nextToken();
2393       addUnwrappedLine();
2394       break;
2395     }
2396     if (FormatTok->is(tok::l_brace)) {
2397       parseBlock(/*MustBeDeclaration=*/false);
2398       // In ObjC interfaces, nothing should be following the "}".
2399       addUnwrappedLine();
2400     } else if (FormatTok->is(tok::r_brace)) {
2401       // Ignore stray "}". parseStructuralElement doesn't consume them.
2402       nextToken();
2403       addUnwrappedLine();
2404     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2405       nextToken();
2406       parseObjCMethod();
2407     } else {
2408       parseStructuralElement();
2409     }
2410   } while (!eof());
2411 }
2412 
2413 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2414   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2415          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2416   nextToken();
2417   nextToken(); // interface name
2418 
2419   // @interface can be followed by a lightweight generic
2420   // specialization list, then either a base class or a category.
2421   if (FormatTok->Tok.is(tok::less)) {
2422     // Unlike protocol lists, generic parameterizations support
2423     // nested angles:
2424     //
2425     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2426     //     NSObject <NSCopying, NSSecureCoding>
2427     //
2428     // so we need to count how many open angles we have left.
2429     unsigned NumOpenAngles = 1;
2430     do {
2431       nextToken();
2432       // Early exit in case someone forgot a close angle.
2433       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2434           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2435         break;
2436       if (FormatTok->Tok.is(tok::less))
2437         ++NumOpenAngles;
2438       else if (FormatTok->Tok.is(tok::greater)) {
2439         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2440         --NumOpenAngles;
2441       }
2442     } while (!eof() && NumOpenAngles != 0);
2443     nextToken(); // Skip '>'.
2444   }
2445   if (FormatTok->Tok.is(tok::colon)) {
2446     nextToken();
2447     nextToken(); // base class name
2448   } else if (FormatTok->Tok.is(tok::l_paren))
2449     // Skip category, if present.
2450     parseParens();
2451 
2452   if (FormatTok->Tok.is(tok::less))
2453     parseObjCProtocolList();
2454 
2455   if (FormatTok->Tok.is(tok::l_brace)) {
2456     if (Style.BraceWrapping.AfterObjCDeclaration)
2457       addUnwrappedLine();
2458     parseBlock(/*MustBeDeclaration=*/true);
2459   }
2460 
2461   // With instance variables, this puts '}' on its own line.  Without instance
2462   // variables, this ends the @interface line.
2463   addUnwrappedLine();
2464 
2465   parseObjCUntilAtEnd();
2466 }
2467 
2468 // Returns true for the declaration/definition form of @protocol,
2469 // false for the expression form.
2470 bool UnwrappedLineParser::parseObjCProtocol() {
2471   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2472   nextToken();
2473 
2474   if (FormatTok->is(tok::l_paren))
2475     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2476     return false;
2477 
2478   // The definition/declaration form,
2479   // @protocol Foo
2480   // - (int)someMethod;
2481   // @end
2482 
2483   nextToken(); // protocol name
2484 
2485   if (FormatTok->Tok.is(tok::less))
2486     parseObjCProtocolList();
2487 
2488   // Check for protocol declaration.
2489   if (FormatTok->Tok.is(tok::semi)) {
2490     nextToken();
2491     addUnwrappedLine();
2492     return true;
2493   }
2494 
2495   addUnwrappedLine();
2496   parseObjCUntilAtEnd();
2497   return true;
2498 }
2499 
2500 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2501   bool IsImport = FormatTok->is(Keywords.kw_import);
2502   assert(IsImport || FormatTok->is(tok::kw_export));
2503   nextToken();
2504 
2505   // Consume the "default" in "export default class/function".
2506   if (FormatTok->is(tok::kw_default))
2507     nextToken();
2508 
2509   // Consume "async function", "function" and "default function", so that these
2510   // get parsed as free-standing JS functions, i.e. do not require a trailing
2511   // semicolon.
2512   if (FormatTok->is(Keywords.kw_async))
2513     nextToken();
2514   if (FormatTok->is(Keywords.kw_function)) {
2515     nextToken();
2516     return;
2517   }
2518 
2519   // For imports, `export *`, `export {...}`, consume the rest of the line up
2520   // to the terminating `;`. For everything else, just return and continue
2521   // parsing the structural element, i.e. the declaration or expression for
2522   // `export default`.
2523   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2524       !FormatTok->isStringLiteral())
2525     return;
2526 
2527   while (!eof()) {
2528     if (FormatTok->is(tok::semi))
2529       return;
2530     if (Line->Tokens.empty()) {
2531       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2532       // import statement should terminate.
2533       return;
2534     }
2535     if (FormatTok->is(tok::l_brace)) {
2536       FormatTok->BlockKind = BK_Block;
2537       nextToken();
2538       parseBracedList();
2539     } else {
2540       nextToken();
2541     }
2542   }
2543 }
2544 
2545 void UnwrappedLineParser::parseStatementMacro() {
2546   nextToken();
2547   if (FormatTok->is(tok::l_paren))
2548     parseParens();
2549   if (FormatTok->is(tok::semi))
2550     nextToken();
2551   addUnwrappedLine();
2552 }
2553 
2554 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2555                                                  StringRef Prefix = "") {
2556   llvm::dbgs() << Prefix << "Line(" << Line.Level
2557                << ", FSC=" << Line.FirstStartColumn << ")"
2558                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2559   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2560                                                     E = Line.Tokens.end();
2561        I != E; ++I) {
2562     llvm::dbgs() << I->Tok->Tok.getName() << "["
2563                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2564                  << "] ";
2565   }
2566   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2567                                                     E = Line.Tokens.end();
2568        I != E; ++I) {
2569     const UnwrappedLineNode &Node = *I;
2570     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2571              I = Node.Children.begin(),
2572              E = Node.Children.end();
2573          I != E; ++I) {
2574       printDebugInfo(*I, "\nChild: ");
2575     }
2576   }
2577   llvm::dbgs() << "\n";
2578 }
2579 
2580 void UnwrappedLineParser::addUnwrappedLine() {
2581   if (Line->Tokens.empty())
2582     return;
2583   LLVM_DEBUG({
2584     if (CurrentLines == &Lines)
2585       printDebugInfo(*Line);
2586   });
2587   CurrentLines->push_back(std::move(*Line));
2588   Line->Tokens.clear();
2589   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2590   Line->FirstStartColumn = 0;
2591   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2592     CurrentLines->append(
2593         std::make_move_iterator(PreprocessorDirectives.begin()),
2594         std::make_move_iterator(PreprocessorDirectives.end()));
2595     PreprocessorDirectives.clear();
2596   }
2597   // Disconnect the current token from the last token on the previous line.
2598   FormatTok->Previous = nullptr;
2599 }
2600 
2601 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2602 
2603 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2604   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2605          FormatTok.NewlinesBefore > 0;
2606 }
2607 
2608 // Checks if \p FormatTok is a line comment that continues the line comment
2609 // section on \p Line.
2610 static bool
2611 continuesLineCommentSection(const FormatToken &FormatTok,
2612                             const UnwrappedLine &Line,
2613                             const llvm::Regex &CommentPragmasRegex) {
2614   if (Line.Tokens.empty())
2615     return false;
2616 
2617   StringRef IndentContent = FormatTok.TokenText;
2618   if (FormatTok.TokenText.startswith("//") ||
2619       FormatTok.TokenText.startswith("/*"))
2620     IndentContent = FormatTok.TokenText.substr(2);
2621   if (CommentPragmasRegex.match(IndentContent))
2622     return false;
2623 
2624   // If Line starts with a line comment, then FormatTok continues the comment
2625   // section if its original column is greater or equal to the original start
2626   // column of the line.
2627   //
2628   // Define the min column token of a line as follows: if a line ends in '{' or
2629   // contains a '{' followed by a line comment, then the min column token is
2630   // that '{'. Otherwise, the min column token of the line is the first token of
2631   // the line.
2632   //
2633   // If Line starts with a token other than a line comment, then FormatTok
2634   // continues the comment section if its original column is greater than the
2635   // original start column of the min column token of the line.
2636   //
2637   // For example, the second line comment continues the first in these cases:
2638   //
2639   // // first line
2640   // // second line
2641   //
2642   // and:
2643   //
2644   // // first line
2645   //  // second line
2646   //
2647   // and:
2648   //
2649   // int i; // first line
2650   //  // second line
2651   //
2652   // and:
2653   //
2654   // do { // first line
2655   //      // second line
2656   //   int i;
2657   // } while (true);
2658   //
2659   // and:
2660   //
2661   // enum {
2662   //   a, // first line
2663   //    // second line
2664   //   b
2665   // };
2666   //
2667   // The second line comment doesn't continue the first in these cases:
2668   //
2669   //   // first line
2670   //  // second line
2671   //
2672   // and:
2673   //
2674   // int i; // first line
2675   // // second line
2676   //
2677   // and:
2678   //
2679   // do { // first line
2680   //   // second line
2681   //   int i;
2682   // } while (true);
2683   //
2684   // and:
2685   //
2686   // enum {
2687   //   a, // first line
2688   //   // second line
2689   // };
2690   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2691 
2692   // Scan for '{//'. If found, use the column of '{' as a min column for line
2693   // comment section continuation.
2694   const FormatToken *PreviousToken = nullptr;
2695   for (const UnwrappedLineNode &Node : Line.Tokens) {
2696     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2697         isLineComment(*Node.Tok)) {
2698       MinColumnToken = PreviousToken;
2699       break;
2700     }
2701     PreviousToken = Node.Tok;
2702 
2703     // Grab the last newline preceding a token in this unwrapped line.
2704     if (Node.Tok->NewlinesBefore > 0) {
2705       MinColumnToken = Node.Tok;
2706     }
2707   }
2708   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2709     MinColumnToken = PreviousToken;
2710   }
2711 
2712   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2713                               MinColumnToken);
2714 }
2715 
2716 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2717   bool JustComments = Line->Tokens.empty();
2718   for (SmallVectorImpl<FormatToken *>::const_iterator
2719            I = CommentsBeforeNextToken.begin(),
2720            E = CommentsBeforeNextToken.end();
2721        I != E; ++I) {
2722     // Line comments that belong to the same line comment section are put on the
2723     // same line since later we might want to reflow content between them.
2724     // Additional fine-grained breaking of line comment sections is controlled
2725     // by the class BreakableLineCommentSection in case it is desirable to keep
2726     // several line comment sections in the same unwrapped line.
2727     //
2728     // FIXME: Consider putting separate line comment sections as children to the
2729     // unwrapped line instead.
2730     (*I)->ContinuesLineCommentSection =
2731         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2732     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2733       addUnwrappedLine();
2734     pushToken(*I);
2735   }
2736   if (NewlineBeforeNext && JustComments)
2737     addUnwrappedLine();
2738   CommentsBeforeNextToken.clear();
2739 }
2740 
2741 void UnwrappedLineParser::nextToken(int LevelDifference) {
2742   if (eof())
2743     return;
2744   flushComments(isOnNewLine(*FormatTok));
2745   pushToken(FormatTok);
2746   FormatToken *Previous = FormatTok;
2747   if (Style.Language != FormatStyle::LK_JavaScript)
2748     readToken(LevelDifference);
2749   else
2750     readTokenWithJavaScriptASI();
2751   FormatTok->Previous = Previous;
2752 }
2753 
2754 void UnwrappedLineParser::distributeComments(
2755     const SmallVectorImpl<FormatToken *> &Comments,
2756     const FormatToken *NextTok) {
2757   // Whether or not a line comment token continues a line is controlled by
2758   // the method continuesLineCommentSection, with the following caveat:
2759   //
2760   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2761   // that each comment line from the trail is aligned with the next token, if
2762   // the next token exists. If a trail exists, the beginning of the maximal
2763   // trail is marked as a start of a new comment section.
2764   //
2765   // For example in this code:
2766   //
2767   // int a; // line about a
2768   //   // line 1 about b
2769   //   // line 2 about b
2770   //   int b;
2771   //
2772   // the two lines about b form a maximal trail, so there are two sections, the
2773   // first one consisting of the single comment "// line about a" and the
2774   // second one consisting of the next two comments.
2775   if (Comments.empty())
2776     return;
2777   bool ShouldPushCommentsInCurrentLine = true;
2778   bool HasTrailAlignedWithNextToken = false;
2779   unsigned StartOfTrailAlignedWithNextToken = 0;
2780   if (NextTok) {
2781     // We are skipping the first element intentionally.
2782     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2783       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2784         HasTrailAlignedWithNextToken = true;
2785         StartOfTrailAlignedWithNextToken = i;
2786       }
2787     }
2788   }
2789   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2790     FormatToken *FormatTok = Comments[i];
2791     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2792       FormatTok->ContinuesLineCommentSection = false;
2793     } else {
2794       FormatTok->ContinuesLineCommentSection =
2795           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2796     }
2797     if (!FormatTok->ContinuesLineCommentSection &&
2798         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2799       ShouldPushCommentsInCurrentLine = false;
2800     }
2801     if (ShouldPushCommentsInCurrentLine) {
2802       pushToken(FormatTok);
2803     } else {
2804       CommentsBeforeNextToken.push_back(FormatTok);
2805     }
2806   }
2807 }
2808 
2809 void UnwrappedLineParser::readToken(int LevelDifference) {
2810   SmallVector<FormatToken *, 1> Comments;
2811   do {
2812     FormatTok = Tokens->getNextToken();
2813     assert(FormatTok);
2814     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2815            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2816       distributeComments(Comments, FormatTok);
2817       Comments.clear();
2818       // If there is an unfinished unwrapped line, we flush the preprocessor
2819       // directives only after that unwrapped line was finished later.
2820       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2821       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2822       assert((LevelDifference >= 0 ||
2823               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2824              "LevelDifference makes Line->Level negative");
2825       Line->Level += LevelDifference;
2826       // Comments stored before the preprocessor directive need to be output
2827       // before the preprocessor directive, at the same level as the
2828       // preprocessor directive, as we consider them to apply to the directive.
2829       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2830           PPBranchLevel > 0)
2831         Line->Level += PPBranchLevel;
2832       flushComments(isOnNewLine(*FormatTok));
2833       parsePPDirective();
2834     }
2835     while (FormatTok->Type == TT_ConflictStart ||
2836            FormatTok->Type == TT_ConflictEnd ||
2837            FormatTok->Type == TT_ConflictAlternative) {
2838       if (FormatTok->Type == TT_ConflictStart) {
2839         conditionalCompilationStart(/*Unreachable=*/false);
2840       } else if (FormatTok->Type == TT_ConflictAlternative) {
2841         conditionalCompilationAlternative();
2842       } else if (FormatTok->Type == TT_ConflictEnd) {
2843         conditionalCompilationEnd();
2844       }
2845       FormatTok = Tokens->getNextToken();
2846       FormatTok->MustBreakBefore = true;
2847     }
2848 
2849     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2850         !Line->InPPDirective) {
2851       continue;
2852     }
2853 
2854     if (!FormatTok->Tok.is(tok::comment)) {
2855       distributeComments(Comments, FormatTok);
2856       Comments.clear();
2857       return;
2858     }
2859 
2860     Comments.push_back(FormatTok);
2861   } while (!eof());
2862 
2863   distributeComments(Comments, nullptr);
2864   Comments.clear();
2865 }
2866 
2867 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2868   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2869   if (MustBreakBeforeNextToken) {
2870     Line->Tokens.back().Tok->MustBreakBefore = true;
2871     MustBreakBeforeNextToken = false;
2872   }
2873 }
2874 
2875 } // end namespace format
2876 } // end namespace clang
2877