1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31   virtual FormatToken *getNextToken() = 0;
32 
33   virtual unsigned getPosition() = 0;
34   virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42                          bool MustBeDeclaration)
43       : Line(Line), Stack(Stack) {
44     Line.MustBeDeclaration = MustBeDeclaration;
45     Stack.push_back(MustBeDeclaration);
46   }
47   ~ScopedDeclarationState() {
48     Stack.pop_back();
49     if (!Stack.empty())
50       Line.MustBeDeclaration = Stack.back();
51     else
52       Line.MustBeDeclaration = true;
53   }
54 
55 private:
56   UnwrappedLine &Line;
57   std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68                                  const FormatToken *Previous,
69                                  const FormatToken *MinColumnToken) {
70   if (!Previous || !MinColumnToken)
71     return false;
72   unsigned MinContinueColumn =
73       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75          isLineComment(*Previous) &&
76          FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82                    FormatToken *&ResetToken)
83       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85         Token(nullptr), PreviousToken(nullptr) {
86     FakeEOF.Tok.startToken();
87     FakeEOF.Tok.setKind(tok::eof);
88     TokenSource = this;
89     Line.Level = 0;
90     Line.InPPDirective = true;
91   }
92 
93   ~ScopedMacroState() override {
94     TokenSource = PreviousTokenSource;
95     ResetToken = Token;
96     Line.InPPDirective = false;
97     Line.Level = PreviousLineLevel;
98   }
99 
100   FormatToken *getNextToken() override {
101     // The \c UnwrappedLineParser guards against this by never calling
102     // \c getNextToken() after it has encountered the first eof token.
103     assert(!eof());
104     PreviousToken = Token;
105     Token = PreviousTokenSource->getNextToken();
106     if (eof())
107       return &FakeEOF;
108     return Token;
109   }
110 
111   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
113   FormatToken *setPosition(unsigned Position) override {
114     PreviousToken = nullptr;
115     Token = PreviousTokenSource->setPosition(Position);
116     return Token;
117   }
118 
119 private:
120   bool eof() {
121     return Token && Token->HasUnescapedNewline &&
122            !continuesLineComment(*Token, PreviousToken,
123                                  /*MinColumnToken=*/PreviousToken);
124   }
125 
126   FormatToken FakeEOF;
127   UnwrappedLine &Line;
128   FormatTokenSource *&TokenSource;
129   FormatToken *&ResetToken;
130   unsigned PreviousLineLevel;
131   FormatTokenSource *PreviousTokenSource;
132 
133   FormatToken *Token;
134   FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
139 class ScopedLineState {
140 public:
141   ScopedLineState(UnwrappedLineParser &Parser,
142                   bool SwitchToPreprocessorLines = false)
143       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144     if (SwitchToPreprocessorLines)
145       Parser.CurrentLines = &Parser.PreprocessorDirectives;
146     else if (!Parser.Line->Tokens.empty())
147       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148     PreBlockLine = std::move(Parser.Line);
149     Parser.Line = std::make_unique<UnwrappedLine>();
150     Parser.Line->Level = PreBlockLine->Level;
151     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152   }
153 
154   ~ScopedLineState() {
155     if (!Parser.Line->Tokens.empty()) {
156       Parser.addUnwrappedLine();
157     }
158     assert(Parser.Line->Tokens.empty());
159     Parser.Line = std::move(PreBlockLine);
160     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161       Parser.MustBreakBeforeNextToken = true;
162     Parser.CurrentLines = OriginalLines;
163   }
164 
165 private:
166   UnwrappedLineParser &Parser;
167 
168   std::unique_ptr<UnwrappedLine> PreBlockLine;
169   SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
172 class CompoundStatementIndenter {
173 public:
174   CompoundStatementIndenter(UnwrappedLineParser *Parser,
175                             const FormatStyle &Style, unsigned &LineLevel)
176       : CompoundStatementIndenter(Parser, LineLevel,
177                                   Style.BraceWrapping.AfterControlStatement,
178                                   Style.BraceWrapping.IndentBraces) {}
179   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180                             bool WrapBrace, bool IndentBrace)
181       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182     if (WrapBrace)
183       Parser->addUnwrappedLine();
184     if (IndentBrace)
185       ++LineLevel;
186   }
187   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188 
189 private:
190   unsigned &LineLevel;
191   unsigned OldLineLevel;
192 };
193 
194 namespace {
195 
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
198   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199       : Tokens(Tokens), Position(-1) {}
200 
201   FormatToken *getNextToken() override {
202     ++Position;
203     return Tokens[Position];
204   }
205 
206   unsigned getPosition() override {
207     assert(Position >= 0);
208     return Position;
209   }
210 
211   FormatToken *setPosition(unsigned P) override {
212     Position = P;
213     return Tokens[Position];
214   }
215 
216   void reset() { Position = -1; }
217 
218 private:
219   ArrayRef<FormatToken *> Tokens;
220   int Position;
221 };
222 
223 } // end anonymous namespace
224 
225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226                                          const AdditionalKeywords &Keywords,
227                                          unsigned FirstStartColumn,
228                                          ArrayRef<FormatToken *> Tokens,
229                                          UnwrappedLineConsumer &Callback)
230     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235                        ? IG_Rejected
236                        : IG_Inited),
237       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238 
239 void UnwrappedLineParser::reset() {
240   PPBranchLevel = -1;
241   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242                      ? IG_Rejected
243                      : IG_Inited;
244   IncludeGuardToken = nullptr;
245   Line.reset(new UnwrappedLine);
246   CommentsBeforeNextToken.clear();
247   FormatTok = nullptr;
248   MustBreakBeforeNextToken = false;
249   PreprocessorDirectives.clear();
250   CurrentLines = &Lines;
251   DeclarationScopeStack.clear();
252   PPStack.clear();
253   Line->FirstStartColumn = FirstStartColumn;
254 }
255 
256 void UnwrappedLineParser::parse() {
257   IndexedTokenSource TokenSource(AllTokens);
258   Line->FirstStartColumn = FirstStartColumn;
259   do {
260     LLVM_DEBUG(llvm::dbgs() << "----\n");
261     reset();
262     Tokens = &TokenSource;
263     TokenSource.reset();
264 
265     readToken();
266     parseFile();
267 
268     // If we found an include guard then all preprocessor directives (other than
269     // the guard) are over-indented by one.
270     if (IncludeGuard == IG_Found)
271       for (auto &Line : Lines)
272         if (Line.InPPDirective && Line.Level > 0)
273           --Line.Level;
274 
275     // Create line with eof token.
276     pushToken(FormatTok);
277     addUnwrappedLine();
278 
279     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280                                                   E = Lines.end();
281          I != E; ++I) {
282       Callback.consumeUnwrappedLine(*I);
283     }
284     Callback.finishRun();
285     Lines.clear();
286     while (!PPLevelBranchIndex.empty() &&
287            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290     }
291     if (!PPLevelBranchIndex.empty()) {
292       ++PPLevelBranchIndex.back();
293       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295     }
296   } while (!PPLevelBranchIndex.empty());
297 }
298 
299 void UnwrappedLineParser::parseFile() {
300   // The top-level context in a file always has declarations, except for pre-
301   // processor directives and JavaScript files.
302   bool MustBeDeclaration =
303       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305                                           MustBeDeclaration);
306   if (Style.Language == FormatStyle::LK_TextProto)
307     parseBracedList();
308   else
309     parseLevel(/*HasOpeningBrace=*/false);
310   // Make sure to format the remaining tokens.
311   //
312   // LK_TextProto is special since its top-level is parsed as the body of a
313   // braced list, which does not necessarily have natural line separators such
314   // as a semicolon. Comments after the last entry that have been determined to
315   // not belong to that line, as in:
316   //   key: value
317   //   // endfile comment
318   // do not have a chance to be put on a line of their own until this point.
319   // Here we add this newline before end-of-file comments.
320   if (Style.Language == FormatStyle::LK_TextProto &&
321       !CommentsBeforeNextToken.empty())
322     addUnwrappedLine();
323   flushComments(true);
324   addUnwrappedLine();
325 }
326 
327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328   do {
329     switch (FormatTok->Tok.getKind()) {
330     case tok::l_brace:
331       return;
332     default:
333       if (FormatTok->is(Keywords.kw_where)) {
334         addUnwrappedLine();
335         nextToken();
336         parseCSharpGenericTypeConstraint();
337         break;
338       }
339       nextToken();
340       break;
341     }
342   } while (!eof());
343 }
344 
345 void UnwrappedLineParser::parseCSharpAttribute() {
346   int UnpairedSquareBrackets = 1;
347   do {
348     switch (FormatTok->Tok.getKind()) {
349     case tok::r_square:
350       nextToken();
351       --UnpairedSquareBrackets;
352       if (UnpairedSquareBrackets == 0) {
353         addUnwrappedLine();
354         return;
355       }
356       break;
357     case tok::l_square:
358       ++UnpairedSquareBrackets;
359       nextToken();
360       break;
361     default:
362       nextToken();
363       break;
364     }
365   } while (!eof());
366 }
367 
368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369   bool SwitchLabelEncountered = false;
370   do {
371     tok::TokenKind kind = FormatTok->Tok.getKind();
372     if (FormatTok->getType() == TT_MacroBlockBegin) {
373       kind = tok::l_brace;
374     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375       kind = tok::r_brace;
376     }
377 
378     switch (kind) {
379     case tok::comment:
380       nextToken();
381       addUnwrappedLine();
382       break;
383     case tok::l_brace:
384       // FIXME: Add parameter whether this can happen - if this happens, we must
385       // be in a non-declaration context.
386       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387         continue;
388       parseBlock(/*MustBeDeclaration=*/false);
389       addUnwrappedLine();
390       break;
391     case tok::r_brace:
392       if (HasOpeningBrace)
393         return;
394       nextToken();
395       addUnwrappedLine();
396       break;
397     case tok::kw_default: {
398       unsigned StoredPosition = Tokens->getPosition();
399       FormatToken *Next;
400       do {
401         Next = Tokens->getNextToken();
402       } while (Next && Next->is(tok::comment));
403       FormatTok = Tokens->setPosition(StoredPosition);
404       if (Next && Next->isNot(tok::colon)) {
405         // default not followed by ':' is not a case label; treat it like
406         // an identifier.
407         parseStructuralElement();
408         break;
409       }
410       // Else, if it is 'default:', fall through to the case handling.
411       LLVM_FALLTHROUGH;
412     }
413     case tok::kw_case:
414       if (Style.Language == FormatStyle::LK_JavaScript &&
415           Line->MustBeDeclaration) {
416         // A 'case: string' style field declaration.
417         parseStructuralElement();
418         break;
419       }
420       if (!SwitchLabelEncountered &&
421           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422         ++Line->Level;
423       SwitchLabelEncountered = true;
424       parseStructuralElement();
425       break;
426     case tok::l_square:
427       if (Style.isCSharp()) {
428         nextToken();
429         parseCSharpAttribute();
430         break;
431       }
432       LLVM_FALLTHROUGH;
433     default:
434       parseStructuralElement();
435       break;
436     }
437   } while (!eof());
438 }
439 
440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441   // We'll parse forward through the tokens until we hit
442   // a closing brace or eof - note that getNextToken() will
443   // parse macros, so this will magically work inside macro
444   // definitions, too.
445   unsigned StoredPosition = Tokens->getPosition();
446   FormatToken *Tok = FormatTok;
447   const FormatToken *PrevTok = Tok->Previous;
448   // Keep a stack of positions of lbrace tokens. We will
449   // update information about whether an lbrace starts a
450   // braced init list or a different block during the loop.
451   SmallVector<FormatToken *, 8> LBraceStack;
452   assert(Tok->Tok.is(tok::l_brace));
453   do {
454     // Get next non-comment token.
455     FormatToken *NextTok;
456     unsigned ReadTokens = 0;
457     do {
458       NextTok = Tokens->getNextToken();
459       ++ReadTokens;
460     } while (NextTok->is(tok::comment));
461 
462     switch (Tok->Tok.getKind()) {
463     case tok::l_brace:
464       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465         if (PrevTok->isOneOf(tok::colon, tok::less))
466           // A ':' indicates this code is in a type, or a braced list
467           // following a label in an object literal ({a: {b: 1}}).
468           // A '<' could be an object used in a comparison, but that is nonsense
469           // code (can never return true), so more likely it is a generic type
470           // argument (`X<{a: string; b: number}>`).
471           // The code below could be confused by semicolons between the
472           // individual members in a type member list, which would normally
473           // trigger BK_Block. In both cases, this must be parsed as an inline
474           // braced init.
475           Tok->BlockKind = BK_BracedInit;
476         else if (PrevTok->is(tok::r_paren))
477           // `) { }` can only occur in function or method declarations in JS.
478           Tok->BlockKind = BK_Block;
479       } else {
480         Tok->BlockKind = BK_Unknown;
481       }
482       LBraceStack.push_back(Tok);
483       break;
484     case tok::r_brace:
485       if (LBraceStack.empty())
486         break;
487       if (LBraceStack.back()->BlockKind == BK_Unknown) {
488         bool ProbablyBracedList = false;
489         if (Style.Language == FormatStyle::LK_Proto) {
490           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491         } else {
492           // Using OriginalColumn to distinguish between ObjC methods and
493           // binary operators is a bit hacky.
494           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495                                   NextTok->OriginalColumn == 0;
496 
497           // If there is a comma, semicolon or right paren after the closing
498           // brace, we assume this is a braced initializer list.  Note that
499           // regardless how we mark inner braces here, we will overwrite the
500           // BlockKind later if we parse a braced list (where all blocks
501           // inside are by default braced lists), or when we explicitly detect
502           // blocks (for example while parsing lambdas).
503           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504           // braced list in JS.
505           ProbablyBracedList =
506               (Style.Language == FormatStyle::LK_JavaScript &&
507                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508                                 Keywords.kw_as)) ||
509               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511                                tok::r_paren, tok::r_square, tok::l_brace,
512                                tok::ellipsis) ||
513               (NextTok->is(tok::identifier) &&
514                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515               (NextTok->is(tok::semi) &&
516                (!ExpectClassBody || LBraceStack.size() != 1)) ||
517               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519             // We can have an array subscript after a braced init
520             // list, but C++11 attributes are expected after blocks.
521             NextTok = Tokens->getNextToken();
522             ++ReadTokens;
523             ProbablyBracedList = NextTok->isNot(tok::l_square);
524           }
525         }
526         if (ProbablyBracedList) {
527           Tok->BlockKind = BK_BracedInit;
528           LBraceStack.back()->BlockKind = BK_BracedInit;
529         } else {
530           Tok->BlockKind = BK_Block;
531           LBraceStack.back()->BlockKind = BK_Block;
532         }
533       }
534       LBraceStack.pop_back();
535       break;
536     case tok::identifier:
537       if (!Tok->is(TT_StatementMacro))
538         break;
539       LLVM_FALLTHROUGH;
540     case tok::at:
541     case tok::semi:
542     case tok::kw_if:
543     case tok::kw_while:
544     case tok::kw_for:
545     case tok::kw_switch:
546     case tok::kw_try:
547     case tok::kw___try:
548       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
549         LBraceStack.back()->BlockKind = BK_Block;
550       break;
551     default:
552       break;
553     }
554     PrevTok = Tok;
555     Tok = NextTok;
556   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557 
558   // Assume other blocks for all unclosed opening braces.
559   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560     if (LBraceStack[i]->BlockKind == BK_Unknown)
561       LBraceStack[i]->BlockKind = BK_Block;
562   }
563 
564   FormatTok = Tokens->setPosition(StoredPosition);
565 }
566 
567 template <class T>
568 static inline void hash_combine(std::size_t &seed, const T &v) {
569   std::hash<T> hasher;
570   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571 }
572 
573 size_t UnwrappedLineParser::computePPHash() const {
574   size_t h = 0;
575   for (const auto &i : PPStack) {
576     hash_combine(h, size_t(i.Kind));
577     hash_combine(h, i.Line);
578   }
579   return h;
580 }
581 
582 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
583                                      bool MunchSemi) {
584   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
585          "'{' or macro block token expected");
586   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
587   FormatTok->BlockKind = BK_Block;
588 
589   size_t PPStartHash = computePPHash();
590 
591   unsigned InitialLevel = Line->Level;
592   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
593 
594   if (MacroBlock && FormatTok->is(tok::l_paren))
595     parseParens();
596 
597   size_t NbPreprocessorDirectives =
598       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
599   addUnwrappedLine();
600   size_t OpeningLineIndex =
601       CurrentLines->empty()
602           ? (UnwrappedLine::kInvalidIndex)
603           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
604 
605   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
606                                           MustBeDeclaration);
607   if (AddLevel)
608     ++Line->Level;
609   parseLevel(/*HasOpeningBrace=*/true);
610 
611   if (eof())
612     return;
613 
614   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
615                  : !FormatTok->is(tok::r_brace)) {
616     Line->Level = InitialLevel;
617     FormatTok->BlockKind = BK_Block;
618     return;
619   }
620 
621   size_t PPEndHash = computePPHash();
622 
623   // Munch the closing brace.
624   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
625 
626   if (MacroBlock && FormatTok->is(tok::l_paren))
627     parseParens();
628 
629   if (MunchSemi && FormatTok->Tok.is(tok::semi))
630     nextToken();
631   Line->Level = InitialLevel;
632 
633   if (PPStartHash == PPEndHash) {
634     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
635     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
636       // Update the opening line to add the forward reference as well
637       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
638           CurrentLines->size() - 1;
639     }
640   }
641 }
642 
643 static bool isGoogScope(const UnwrappedLine &Line) {
644   // FIXME: Closure-library specific stuff should not be hard-coded but be
645   // configurable.
646   if (Line.Tokens.size() < 4)
647     return false;
648   auto I = Line.Tokens.begin();
649   if (I->Tok->TokenText != "goog")
650     return false;
651   ++I;
652   if (I->Tok->isNot(tok::period))
653     return false;
654   ++I;
655   if (I->Tok->TokenText != "scope")
656     return false;
657   ++I;
658   return I->Tok->is(tok::l_paren);
659 }
660 
661 static bool isIIFE(const UnwrappedLine &Line,
662                    const AdditionalKeywords &Keywords) {
663   // Look for the start of an immediately invoked anonymous function.
664   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
665   // This is commonly done in JavaScript to create a new, anonymous scope.
666   // Example: (function() { ... })()
667   if (Line.Tokens.size() < 3)
668     return false;
669   auto I = Line.Tokens.begin();
670   if (I->Tok->isNot(tok::l_paren))
671     return false;
672   ++I;
673   if (I->Tok->isNot(Keywords.kw_function))
674     return false;
675   ++I;
676   return I->Tok->is(tok::l_paren);
677 }
678 
679 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
680                                    const FormatToken &InitialToken) {
681   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
682     return Style.BraceWrapping.AfterNamespace;
683   if (InitialToken.is(tok::kw_class))
684     return Style.BraceWrapping.AfterClass;
685   if (InitialToken.is(tok::kw_union))
686     return Style.BraceWrapping.AfterUnion;
687   if (InitialToken.is(tok::kw_struct))
688     return Style.BraceWrapping.AfterStruct;
689   return false;
690 }
691 
692 void UnwrappedLineParser::parseChildBlock() {
693   FormatTok->BlockKind = BK_Block;
694   nextToken();
695   {
696     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
697                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
698     ScopedLineState LineState(*this);
699     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
700                                             /*MustBeDeclaration=*/false);
701     Line->Level += SkipIndent ? 0 : 1;
702     parseLevel(/*HasOpeningBrace=*/true);
703     flushComments(isOnNewLine(*FormatTok));
704     Line->Level -= SkipIndent ? 0 : 1;
705   }
706   nextToken();
707 }
708 
709 void UnwrappedLineParser::parsePPDirective() {
710   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
711   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
712 
713   nextToken();
714 
715   if (!FormatTok->Tok.getIdentifierInfo()) {
716     parsePPUnknown();
717     return;
718   }
719 
720   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
721   case tok::pp_define:
722     parsePPDefine();
723     return;
724   case tok::pp_if:
725     parsePPIf(/*IfDef=*/false);
726     break;
727   case tok::pp_ifdef:
728   case tok::pp_ifndef:
729     parsePPIf(/*IfDef=*/true);
730     break;
731   case tok::pp_else:
732     parsePPElse();
733     break;
734   case tok::pp_elif:
735     parsePPElIf();
736     break;
737   case tok::pp_endif:
738     parsePPEndIf();
739     break;
740   default:
741     parsePPUnknown();
742     break;
743   }
744 }
745 
746 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
747   size_t Line = CurrentLines->size();
748   if (CurrentLines == &PreprocessorDirectives)
749     Line += Lines.size();
750 
751   if (Unreachable ||
752       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
753     PPStack.push_back({PP_Unreachable, Line});
754   else
755     PPStack.push_back({PP_Conditional, Line});
756 }
757 
758 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
759   ++PPBranchLevel;
760   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
761   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
762     PPLevelBranchIndex.push_back(0);
763     PPLevelBranchCount.push_back(0);
764   }
765   PPChainBranchIndex.push(0);
766   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
767   conditionalCompilationCondition(Unreachable || Skip);
768 }
769 
770 void UnwrappedLineParser::conditionalCompilationAlternative() {
771   if (!PPStack.empty())
772     PPStack.pop_back();
773   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
774   if (!PPChainBranchIndex.empty())
775     ++PPChainBranchIndex.top();
776   conditionalCompilationCondition(
777       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
778       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
779 }
780 
781 void UnwrappedLineParser::conditionalCompilationEnd() {
782   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
783   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
784     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
785       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
786     }
787   }
788   // Guard against #endif's without #if.
789   if (PPBranchLevel > -1)
790     --PPBranchLevel;
791   if (!PPChainBranchIndex.empty())
792     PPChainBranchIndex.pop();
793   if (!PPStack.empty())
794     PPStack.pop_back();
795 }
796 
797 void UnwrappedLineParser::parsePPIf(bool IfDef) {
798   bool IfNDef = FormatTok->is(tok::pp_ifndef);
799   nextToken();
800   bool Unreachable = false;
801   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
802     Unreachable = true;
803   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
804     Unreachable = true;
805   conditionalCompilationStart(Unreachable);
806   FormatToken *IfCondition = FormatTok;
807   // If there's a #ifndef on the first line, and the only lines before it are
808   // comments, it could be an include guard.
809   bool MaybeIncludeGuard = IfNDef;
810   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
811     for (auto &Line : Lines) {
812       if (!Line.Tokens.front().Tok->is(tok::comment)) {
813         MaybeIncludeGuard = false;
814         IncludeGuard = IG_Rejected;
815         break;
816       }
817     }
818   --PPBranchLevel;
819   parsePPUnknown();
820   ++PPBranchLevel;
821   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
822     IncludeGuard = IG_IfNdefed;
823     IncludeGuardToken = IfCondition;
824   }
825 }
826 
827 void UnwrappedLineParser::parsePPElse() {
828   // If a potential include guard has an #else, it's not an include guard.
829   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
830     IncludeGuard = IG_Rejected;
831   conditionalCompilationAlternative();
832   if (PPBranchLevel > -1)
833     --PPBranchLevel;
834   parsePPUnknown();
835   ++PPBranchLevel;
836 }
837 
838 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
839 
840 void UnwrappedLineParser::parsePPEndIf() {
841   conditionalCompilationEnd();
842   parsePPUnknown();
843   // If the #endif of a potential include guard is the last thing in the file,
844   // then we found an include guard.
845   unsigned TokenPosition = Tokens->getPosition();
846   FormatToken *PeekNext = AllTokens[TokenPosition];
847   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
848       PeekNext->is(tok::eof) &&
849       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
850     IncludeGuard = IG_Found;
851 }
852 
853 void UnwrappedLineParser::parsePPDefine() {
854   nextToken();
855 
856   if (!FormatTok->Tok.getIdentifierInfo()) {
857     IncludeGuard = IG_Rejected;
858     IncludeGuardToken = nullptr;
859     parsePPUnknown();
860     return;
861   }
862 
863   if (IncludeGuard == IG_IfNdefed &&
864       IncludeGuardToken->TokenText == FormatTok->TokenText) {
865     IncludeGuard = IG_Defined;
866     IncludeGuardToken = nullptr;
867     for (auto &Line : Lines) {
868       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
869         IncludeGuard = IG_Rejected;
870         break;
871       }
872     }
873   }
874 
875   nextToken();
876   if (FormatTok->Tok.getKind() == tok::l_paren &&
877       FormatTok->WhitespaceRange.getBegin() ==
878           FormatTok->WhitespaceRange.getEnd()) {
879     parseParens();
880   }
881   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
882     Line->Level += PPBranchLevel + 1;
883   addUnwrappedLine();
884   ++Line->Level;
885 
886   // Errors during a preprocessor directive can only affect the layout of the
887   // preprocessor directive, and thus we ignore them. An alternative approach
888   // would be to use the same approach we use on the file level (no
889   // re-indentation if there was a structural error) within the macro
890   // definition.
891   parseFile();
892 }
893 
894 void UnwrappedLineParser::parsePPUnknown() {
895   do {
896     nextToken();
897   } while (!eof());
898   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
899     Line->Level += PPBranchLevel + 1;
900   addUnwrappedLine();
901 }
902 
903 // Here we blacklist certain tokens that are not usually the first token in an
904 // unwrapped line. This is used in attempt to distinguish macro calls without
905 // trailing semicolons from other constructs split to several lines.
906 static bool tokenCanStartNewLine(const FormatToken &Tok) {
907   // Semicolon can be a null-statement, l_square can be a start of a macro or
908   // a C++11 attribute, but this doesn't seem to be common.
909   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
910          Tok.isNot(TT_AttributeSquare) &&
911          // Tokens that can only be used as binary operators and a part of
912          // overloaded operator names.
913          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
914          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
915          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
916          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
917          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
918          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
919          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
920          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
921          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
922          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
923          Tok.isNot(tok::lesslessequal) &&
924          // Colon is used in labels, base class lists, initializer lists,
925          // range-based for loops, ternary operator, but should never be the
926          // first token in an unwrapped line.
927          Tok.isNot(tok::colon) &&
928          // 'noexcept' is a trailing annotation.
929          Tok.isNot(tok::kw_noexcept);
930 }
931 
932 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
933                           const FormatToken *FormatTok) {
934   // FIXME: This returns true for C/C++ keywords like 'struct'.
935   return FormatTok->is(tok::identifier) &&
936          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
937           !FormatTok->isOneOf(
938               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
939               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
940               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
941               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
942               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
943               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
944               Keywords.kw_from));
945 }
946 
947 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
948                                  const FormatToken *FormatTok) {
949   return FormatTok->Tok.isLiteral() ||
950          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
951          mustBeJSIdent(Keywords, FormatTok);
952 }
953 
954 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
955 // when encountered after a value (see mustBeJSIdentOrValue).
956 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
957                            const FormatToken *FormatTok) {
958   return FormatTok->isOneOf(
959       tok::kw_return, Keywords.kw_yield,
960       // conditionals
961       tok::kw_if, tok::kw_else,
962       // loops
963       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
964       // switch/case
965       tok::kw_switch, tok::kw_case,
966       // exceptions
967       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
968       // declaration
969       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
970       Keywords.kw_async, Keywords.kw_function,
971       // import/export
972       Keywords.kw_import, tok::kw_export);
973 }
974 
975 // readTokenWithJavaScriptASI reads the next token and terminates the current
976 // line if JavaScript Automatic Semicolon Insertion must
977 // happen between the current token and the next token.
978 //
979 // This method is conservative - it cannot cover all edge cases of JavaScript,
980 // but only aims to correctly handle certain well known cases. It *must not*
981 // return true in speculative cases.
982 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
983   FormatToken *Previous = FormatTok;
984   readToken();
985   FormatToken *Next = FormatTok;
986 
987   bool IsOnSameLine =
988       CommentsBeforeNextToken.empty()
989           ? Next->NewlinesBefore == 0
990           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
991   if (IsOnSameLine)
992     return;
993 
994   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
995   bool PreviousStartsTemplateExpr =
996       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
997   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
998     // If the line contains an '@' sign, the previous token might be an
999     // annotation, which can precede another identifier/value.
1000     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1001                               [](UnwrappedLineNode &LineNode) {
1002                                 return LineNode.Tok->is(tok::at);
1003                               }) != Line->Tokens.end();
1004     if (HasAt)
1005       return;
1006   }
1007   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1008     return addUnwrappedLine();
1009   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1010   bool NextEndsTemplateExpr =
1011       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1012   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1013       (PreviousMustBeValue ||
1014        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1015                          tok::minusminus)))
1016     return addUnwrappedLine();
1017   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1018       isJSDeclOrStmt(Keywords, Next))
1019     return addUnwrappedLine();
1020 }
1021 
1022 void UnwrappedLineParser::parseStructuralElement() {
1023   assert(!FormatTok->is(tok::l_brace));
1024   if (Style.Language == FormatStyle::LK_TableGen &&
1025       FormatTok->is(tok::pp_include)) {
1026     nextToken();
1027     if (FormatTok->is(tok::string_literal))
1028       nextToken();
1029     addUnwrappedLine();
1030     return;
1031   }
1032   switch (FormatTok->Tok.getKind()) {
1033   case tok::kw_asm:
1034     nextToken();
1035     if (FormatTok->is(tok::l_brace)) {
1036       FormatTok->setType(TT_InlineASMBrace);
1037       nextToken();
1038       while (FormatTok && FormatTok->isNot(tok::eof)) {
1039         if (FormatTok->is(tok::r_brace)) {
1040           FormatTok->setType(TT_InlineASMBrace);
1041           nextToken();
1042           addUnwrappedLine();
1043           break;
1044         }
1045         FormatTok->Finalized = true;
1046         nextToken();
1047       }
1048     }
1049     break;
1050   case tok::kw_namespace:
1051     parseNamespace();
1052     return;
1053   case tok::kw_public:
1054   case tok::kw_protected:
1055   case tok::kw_private:
1056     if (Style.Language == FormatStyle::LK_Java ||
1057         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1058       nextToken();
1059     else
1060       parseAccessSpecifier();
1061     return;
1062   case tok::kw_if:
1063     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1064       // field/method declaration.
1065       break;
1066     parseIfThenElse();
1067     return;
1068   case tok::kw_for:
1069   case tok::kw_while:
1070     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1071       // field/method declaration.
1072       break;
1073     parseForOrWhileLoop();
1074     return;
1075   case tok::kw_do:
1076     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1077       // field/method declaration.
1078       break;
1079     parseDoWhile();
1080     return;
1081   case tok::kw_switch:
1082     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1083       // 'switch: string' field declaration.
1084       break;
1085     parseSwitch();
1086     return;
1087   case tok::kw_default:
1088     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1089       // 'default: string' field declaration.
1090       break;
1091     nextToken();
1092     if (FormatTok->is(tok::colon)) {
1093       parseLabel();
1094       return;
1095     }
1096     // e.g. "default void f() {}" in a Java interface.
1097     break;
1098   case tok::kw_case:
1099     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1100       // 'case: string' field declaration.
1101       break;
1102     parseCaseLabel();
1103     return;
1104   case tok::kw_try:
1105   case tok::kw___try:
1106     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1107       // field/method declaration.
1108       break;
1109     parseTryCatch();
1110     return;
1111   case tok::kw_extern:
1112     nextToken();
1113     if (FormatTok->Tok.is(tok::string_literal)) {
1114       nextToken();
1115       if (FormatTok->Tok.is(tok::l_brace)) {
1116         if (!Style.IndentExternBlock) {
1117           if (Style.BraceWrapping.AfterExternBlock) {
1118             addUnwrappedLine();
1119           }
1120           parseBlock(/*MustBeDeclaration=*/true,
1121                      /*AddLevel=*/Style.BraceWrapping.AfterExternBlock);
1122         } else {
1123           parseBlock(/*MustBeDeclaration=*/true,
1124                      /*AddLevel=*/Style.IndentExternBlock ==
1125                          FormatStyle::IEBS_Indent);
1126         }
1127         addUnwrappedLine();
1128         return;
1129       }
1130     }
1131     break;
1132   case tok::kw_export:
1133     if (Style.Language == FormatStyle::LK_JavaScript) {
1134       parseJavaScriptEs6ImportExport();
1135       return;
1136     }
1137     if (!Style.isCpp())
1138       break;
1139     // Handle C++ "(inline|export) namespace".
1140     LLVM_FALLTHROUGH;
1141   case tok::kw_inline:
1142     nextToken();
1143     if (FormatTok->Tok.is(tok::kw_namespace)) {
1144       parseNamespace();
1145       return;
1146     }
1147     break;
1148   case tok::identifier:
1149     if (FormatTok->is(TT_ForEachMacro)) {
1150       parseForOrWhileLoop();
1151       return;
1152     }
1153     if (FormatTok->is(TT_MacroBlockBegin)) {
1154       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1155                  /*MunchSemi=*/false);
1156       return;
1157     }
1158     if (FormatTok->is(Keywords.kw_import)) {
1159       if (Style.Language == FormatStyle::LK_JavaScript) {
1160         parseJavaScriptEs6ImportExport();
1161         return;
1162       }
1163       if (Style.Language == FormatStyle::LK_Proto) {
1164         nextToken();
1165         if (FormatTok->is(tok::kw_public))
1166           nextToken();
1167         if (!FormatTok->is(tok::string_literal))
1168           return;
1169         nextToken();
1170         if (FormatTok->is(tok::semi))
1171           nextToken();
1172         addUnwrappedLine();
1173         return;
1174       }
1175     }
1176     if (Style.isCpp() &&
1177         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1178                            Keywords.kw_slots, Keywords.kw_qslots)) {
1179       nextToken();
1180       if (FormatTok->is(tok::colon)) {
1181         nextToken();
1182         addUnwrappedLine();
1183         return;
1184       }
1185     }
1186     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1187       parseStatementMacro();
1188       return;
1189     }
1190     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1191       parseNamespace();
1192       return;
1193     }
1194     // In all other cases, parse the declaration.
1195     break;
1196   default:
1197     break;
1198   }
1199   do {
1200     const FormatToken *Previous = FormatTok->Previous;
1201     switch (FormatTok->Tok.getKind()) {
1202     case tok::at:
1203       nextToken();
1204       if (FormatTok->Tok.is(tok::l_brace)) {
1205         nextToken();
1206         parseBracedList();
1207         break;
1208       } else if (Style.Language == FormatStyle::LK_Java &&
1209                  FormatTok->is(Keywords.kw_interface)) {
1210         nextToken();
1211         break;
1212       }
1213       switch (FormatTok->Tok.getObjCKeywordID()) {
1214       case tok::objc_public:
1215       case tok::objc_protected:
1216       case tok::objc_package:
1217       case tok::objc_private:
1218         return parseAccessSpecifier();
1219       case tok::objc_interface:
1220       case tok::objc_implementation:
1221         return parseObjCInterfaceOrImplementation();
1222       case tok::objc_protocol:
1223         if (parseObjCProtocol())
1224           return;
1225         break;
1226       case tok::objc_end:
1227         return; // Handled by the caller.
1228       case tok::objc_optional:
1229       case tok::objc_required:
1230         nextToken();
1231         addUnwrappedLine();
1232         return;
1233       case tok::objc_autoreleasepool:
1234         nextToken();
1235         if (FormatTok->Tok.is(tok::l_brace)) {
1236           if (Style.BraceWrapping.AfterControlStatement ==
1237               FormatStyle::BWACS_Always)
1238             addUnwrappedLine();
1239           parseBlock(/*MustBeDeclaration=*/false);
1240         }
1241         addUnwrappedLine();
1242         return;
1243       case tok::objc_synchronized:
1244         nextToken();
1245         if (FormatTok->Tok.is(tok::l_paren))
1246           // Skip synchronization object
1247           parseParens();
1248         if (FormatTok->Tok.is(tok::l_brace)) {
1249           if (Style.BraceWrapping.AfterControlStatement ==
1250               FormatStyle::BWACS_Always)
1251             addUnwrappedLine();
1252           parseBlock(/*MustBeDeclaration=*/false);
1253         }
1254         addUnwrappedLine();
1255         return;
1256       case tok::objc_try:
1257         // This branch isn't strictly necessary (the kw_try case below would
1258         // do this too after the tok::at is parsed above).  But be explicit.
1259         parseTryCatch();
1260         return;
1261       default:
1262         break;
1263       }
1264       break;
1265     case tok::kw_enum:
1266       // Ignore if this is part of "template <enum ...".
1267       if (Previous && Previous->is(tok::less)) {
1268         nextToken();
1269         break;
1270       }
1271 
1272       // parseEnum falls through and does not yet add an unwrapped line as an
1273       // enum definition can start a structural element.
1274       if (!parseEnum())
1275         break;
1276       // This only applies for C++.
1277       if (!Style.isCpp()) {
1278         addUnwrappedLine();
1279         return;
1280       }
1281       break;
1282     case tok::kw_typedef:
1283       nextToken();
1284       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1285                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1286                              Keywords.kw_CF_CLOSED_ENUM,
1287                              Keywords.kw_NS_CLOSED_ENUM))
1288         parseEnum();
1289       break;
1290     case tok::kw_struct:
1291     case tok::kw_union:
1292     case tok::kw_class:
1293       // parseRecord falls through and does not yet add an unwrapped line as a
1294       // record declaration or definition can start a structural element.
1295       parseRecord();
1296       // This does not apply for Java, JavaScript and C#.
1297       if (Style.Language == FormatStyle::LK_Java ||
1298           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1299         if (FormatTok->is(tok::semi))
1300           nextToken();
1301         addUnwrappedLine();
1302         return;
1303       }
1304       break;
1305     case tok::period:
1306       nextToken();
1307       // In Java, classes have an implicit static member "class".
1308       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1309           FormatTok->is(tok::kw_class))
1310         nextToken();
1311       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1312           FormatTok->Tok.getIdentifierInfo())
1313         // JavaScript only has pseudo keywords, all keywords are allowed to
1314         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1315         nextToken();
1316       break;
1317     case tok::semi:
1318       nextToken();
1319       addUnwrappedLine();
1320       return;
1321     case tok::r_brace:
1322       addUnwrappedLine();
1323       return;
1324     case tok::l_paren:
1325       parseParens();
1326       break;
1327     case tok::kw_operator:
1328       nextToken();
1329       if (FormatTok->isBinaryOperator())
1330         nextToken();
1331       break;
1332     case tok::caret:
1333       nextToken();
1334       if (FormatTok->Tok.isAnyIdentifier() ||
1335           FormatTok->isSimpleTypeSpecifier())
1336         nextToken();
1337       if (FormatTok->is(tok::l_paren))
1338         parseParens();
1339       if (FormatTok->is(tok::l_brace))
1340         parseChildBlock();
1341       break;
1342     case tok::l_brace:
1343       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1344         // A block outside of parentheses must be the last part of a
1345         // structural element.
1346         // FIXME: Figure out cases where this is not true, and add projections
1347         // for them (the one we know is missing are lambdas).
1348         if (Style.BraceWrapping.AfterFunction)
1349           addUnwrappedLine();
1350         FormatTok->setType(TT_FunctionLBrace);
1351         parseBlock(/*MustBeDeclaration=*/false);
1352         addUnwrappedLine();
1353         return;
1354       }
1355       // Otherwise this was a braced init list, and the structural
1356       // element continues.
1357       break;
1358     case tok::kw_try:
1359       if (Style.Language == FormatStyle::LK_JavaScript &&
1360           Line->MustBeDeclaration) {
1361         // field/method declaration.
1362         nextToken();
1363         break;
1364       }
1365       // We arrive here when parsing function-try blocks.
1366       if (Style.BraceWrapping.AfterFunction)
1367         addUnwrappedLine();
1368       parseTryCatch();
1369       return;
1370     case tok::identifier: {
1371       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1372           Line->MustBeDeclaration) {
1373         addUnwrappedLine();
1374         parseCSharpGenericTypeConstraint();
1375         break;
1376       }
1377       if (FormatTok->is(TT_MacroBlockEnd)) {
1378         addUnwrappedLine();
1379         return;
1380       }
1381 
1382       // Function declarations (as opposed to function expressions) are parsed
1383       // on their own unwrapped line by continuing this loop. Function
1384       // expressions (functions that are not on their own line) must not create
1385       // a new unwrapped line, so they are special cased below.
1386       size_t TokenCount = Line->Tokens.size();
1387       if (Style.Language == FormatStyle::LK_JavaScript &&
1388           FormatTok->is(Keywords.kw_function) &&
1389           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1390                                                      Keywords.kw_async)))) {
1391         tryToParseJSFunction();
1392         break;
1393       }
1394       if ((Style.Language == FormatStyle::LK_JavaScript ||
1395            Style.Language == FormatStyle::LK_Java) &&
1396           FormatTok->is(Keywords.kw_interface)) {
1397         if (Style.Language == FormatStyle::LK_JavaScript) {
1398           // In JavaScript/TypeScript, "interface" can be used as a standalone
1399           // identifier, e.g. in `var interface = 1;`. If "interface" is
1400           // followed by another identifier, it is very like to be an actual
1401           // interface declaration.
1402           unsigned StoredPosition = Tokens->getPosition();
1403           FormatToken *Next = Tokens->getNextToken();
1404           FormatTok = Tokens->setPosition(StoredPosition);
1405           if (Next && !mustBeJSIdent(Keywords, Next)) {
1406             nextToken();
1407             break;
1408           }
1409         }
1410         parseRecord();
1411         addUnwrappedLine();
1412         return;
1413       }
1414 
1415       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1416         parseStatementMacro();
1417         return;
1418       }
1419 
1420       // See if the following token should start a new unwrapped line.
1421       StringRef Text = FormatTok->TokenText;
1422       nextToken();
1423 
1424       // JS doesn't have macros, and within classes colons indicate fields, not
1425       // labels.
1426       if (Style.Language == FormatStyle::LK_JavaScript)
1427         break;
1428 
1429       TokenCount = Line->Tokens.size();
1430       if (TokenCount == 1 ||
1431           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1432         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1433           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1434           parseLabel(!Style.IndentGotoLabels);
1435           return;
1436         }
1437         // Recognize function-like macro usages without trailing semicolon as
1438         // well as free-standing macros like Q_OBJECT.
1439         bool FunctionLike = FormatTok->is(tok::l_paren);
1440         if (FunctionLike)
1441           parseParens();
1442 
1443         bool FollowedByNewline =
1444             CommentsBeforeNextToken.empty()
1445                 ? FormatTok->NewlinesBefore > 0
1446                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1447 
1448         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1449             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1450           addUnwrappedLine();
1451           return;
1452         }
1453       }
1454       break;
1455     }
1456     case tok::equal:
1457       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1458       // TT_JsFatArrow. The always start an expression or a child block if
1459       // followed by a curly.
1460       if (FormatTok->is(TT_JsFatArrow)) {
1461         nextToken();
1462         if (FormatTok->is(tok::l_brace))
1463           parseChildBlock();
1464         break;
1465       }
1466 
1467       nextToken();
1468       if (FormatTok->Tok.is(tok::l_brace)) {
1469         // Block kind should probably be set to BK_BracedInit for any language.
1470         // C# needs this change to ensure that array initialisers and object
1471         // initialisers are indented the same way.
1472         if (Style.isCSharp())
1473           FormatTok->BlockKind = BK_BracedInit;
1474         nextToken();
1475         parseBracedList();
1476       } else if (Style.Language == FormatStyle::LK_Proto &&
1477                  FormatTok->Tok.is(tok::less)) {
1478         nextToken();
1479         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1480                         /*ClosingBraceKind=*/tok::greater);
1481       }
1482       break;
1483     case tok::l_square:
1484       parseSquare();
1485       break;
1486     case tok::kw_new:
1487       parseNew();
1488       break;
1489     default:
1490       nextToken();
1491       break;
1492     }
1493   } while (!eof());
1494 }
1495 
1496 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1497   assert(FormatTok->is(tok::l_brace));
1498   if (!Style.isCSharp())
1499     return false;
1500   // See if it's a property accessor.
1501   if (FormatTok->Previous->isNot(tok::identifier))
1502     return false;
1503 
1504   // See if we are inside a property accessor.
1505   //
1506   // Record the current tokenPosition so that we can advance and
1507   // reset the current token. `Next` is not set yet so we need
1508   // another way to advance along the token stream.
1509   unsigned int StoredPosition = Tokens->getPosition();
1510   FormatToken *Tok = Tokens->getNextToken();
1511 
1512   // A trivial property accessor is of the form:
1513   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1514   // Track these as they do not require line breaks to be introduced.
1515   bool HasGetOrSet = false;
1516   bool IsTrivialPropertyAccessor = true;
1517   while (!eof()) {
1518     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1519                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1520                      Keywords.kw_set)) {
1521       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1522         HasGetOrSet = true;
1523       Tok = Tokens->getNextToken();
1524       continue;
1525     }
1526     if (Tok->isNot(tok::r_brace))
1527       IsTrivialPropertyAccessor = false;
1528     break;
1529   }
1530 
1531   if (!HasGetOrSet) {
1532     Tokens->setPosition(StoredPosition);
1533     return false;
1534   }
1535 
1536   // Try to parse the property accessor:
1537   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1538   Tokens->setPosition(StoredPosition);
1539   if (Style.BraceWrapping.AfterFunction == true)
1540     addUnwrappedLine();
1541   nextToken();
1542   do {
1543     switch (FormatTok->Tok.getKind()) {
1544     case tok::r_brace:
1545       nextToken();
1546       if (FormatTok->is(tok::equal)) {
1547         while (!eof() && FormatTok->isNot(tok::semi))
1548           nextToken();
1549         nextToken();
1550       }
1551       addUnwrappedLine();
1552       return true;
1553     case tok::l_brace:
1554       ++Line->Level;
1555       parseBlock(/*MustBeDeclaration=*/true);
1556       addUnwrappedLine();
1557       --Line->Level;
1558       break;
1559     case tok::equal:
1560       if (FormatTok->is(TT_JsFatArrow)) {
1561         ++Line->Level;
1562         do {
1563           nextToken();
1564         } while (!eof() && FormatTok->isNot(tok::semi));
1565         nextToken();
1566         addUnwrappedLine();
1567         --Line->Level;
1568         break;
1569       }
1570       nextToken();
1571       break;
1572     default:
1573       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1574           !IsTrivialPropertyAccessor) {
1575         // Non-trivial get/set needs to be on its own line.
1576         addUnwrappedLine();
1577       }
1578       nextToken();
1579     }
1580   } while (!eof());
1581 
1582   // Unreachable for well-formed code (paired '{' and '}').
1583   return true;
1584 }
1585 
1586 bool UnwrappedLineParser::tryToParseLambda() {
1587   if (!Style.isCpp()) {
1588     nextToken();
1589     return false;
1590   }
1591   assert(FormatTok->is(tok::l_square));
1592   FormatToken &LSquare = *FormatTok;
1593   if (!tryToParseLambdaIntroducer())
1594     return false;
1595 
1596   bool SeenArrow = false;
1597 
1598   while (FormatTok->isNot(tok::l_brace)) {
1599     if (FormatTok->isSimpleTypeSpecifier()) {
1600       nextToken();
1601       continue;
1602     }
1603     switch (FormatTok->Tok.getKind()) {
1604     case tok::l_brace:
1605       break;
1606     case tok::l_paren:
1607       parseParens();
1608       break;
1609     case tok::amp:
1610     case tok::star:
1611     case tok::kw_const:
1612     case tok::comma:
1613     case tok::less:
1614     case tok::greater:
1615     case tok::identifier:
1616     case tok::numeric_constant:
1617     case tok::coloncolon:
1618     case tok::kw_class:
1619     case tok::kw_mutable:
1620     case tok::kw_noexcept:
1621     case tok::kw_template:
1622     case tok::kw_typename:
1623       nextToken();
1624       break;
1625     // Specialization of a template with an integer parameter can contain
1626     // arithmetic, logical, comparison and ternary operators.
1627     //
1628     // FIXME: This also accepts sequences of operators that are not in the scope
1629     // of a template argument list.
1630     //
1631     // In a C++ lambda a template type can only occur after an arrow. We use
1632     // this as an heuristic to distinguish between Objective-C expressions
1633     // followed by an `a->b` expression, such as:
1634     // ([obj func:arg] + a->b)
1635     // Otherwise the code below would parse as a lambda.
1636     //
1637     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1638     // explicit template lists: []<bool b = true && false>(U &&u){}
1639     case tok::plus:
1640     case tok::minus:
1641     case tok::exclaim:
1642     case tok::tilde:
1643     case tok::slash:
1644     case tok::percent:
1645     case tok::lessless:
1646     case tok::pipe:
1647     case tok::pipepipe:
1648     case tok::ampamp:
1649     case tok::caret:
1650     case tok::equalequal:
1651     case tok::exclaimequal:
1652     case tok::greaterequal:
1653     case tok::lessequal:
1654     case tok::question:
1655     case tok::colon:
1656     case tok::ellipsis:
1657     case tok::kw_true:
1658     case tok::kw_false:
1659       if (SeenArrow) {
1660         nextToken();
1661         break;
1662       }
1663       return true;
1664     case tok::arrow:
1665       // This might or might not actually be a lambda arrow (this could be an
1666       // ObjC method invocation followed by a dereferencing arrow). We might
1667       // reset this back to TT_Unknown in TokenAnnotator.
1668       FormatTok->setType(TT_LambdaArrow);
1669       SeenArrow = true;
1670       nextToken();
1671       break;
1672     default:
1673       return true;
1674     }
1675   }
1676   FormatTok->setType(TT_LambdaLBrace);
1677   LSquare.setType(TT_LambdaLSquare);
1678   parseChildBlock();
1679   return true;
1680 }
1681 
1682 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1683   const FormatToken *Previous = FormatTok->Previous;
1684   if (Previous &&
1685       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1686                          tok::kw_delete, tok::l_square) ||
1687        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1688        Previous->isSimpleTypeSpecifier())) {
1689     nextToken();
1690     return false;
1691   }
1692   nextToken();
1693   if (FormatTok->is(tok::l_square)) {
1694     return false;
1695   }
1696   parseSquare(/*LambdaIntroducer=*/true);
1697   return true;
1698 }
1699 
1700 void UnwrappedLineParser::tryToParseJSFunction() {
1701   assert(FormatTok->is(Keywords.kw_function) ||
1702          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1703   if (FormatTok->is(Keywords.kw_async))
1704     nextToken();
1705   // Consume "function".
1706   nextToken();
1707 
1708   // Consume * (generator function). Treat it like C++'s overloaded operators.
1709   if (FormatTok->is(tok::star)) {
1710     FormatTok->setType(TT_OverloadedOperator);
1711     nextToken();
1712   }
1713 
1714   // Consume function name.
1715   if (FormatTok->is(tok::identifier))
1716     nextToken();
1717 
1718   if (FormatTok->isNot(tok::l_paren))
1719     return;
1720 
1721   // Parse formal parameter list.
1722   parseParens();
1723 
1724   if (FormatTok->is(tok::colon)) {
1725     // Parse a type definition.
1726     nextToken();
1727 
1728     // Eat the type declaration. For braced inline object types, balance braces,
1729     // otherwise just parse until finding an l_brace for the function body.
1730     if (FormatTok->is(tok::l_brace))
1731       tryToParseBracedList();
1732     else
1733       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1734         nextToken();
1735   }
1736 
1737   if (FormatTok->is(tok::semi))
1738     return;
1739 
1740   parseChildBlock();
1741 }
1742 
1743 bool UnwrappedLineParser::tryToParseBracedList() {
1744   if (FormatTok->BlockKind == BK_Unknown)
1745     calculateBraceTypes();
1746   assert(FormatTok->BlockKind != BK_Unknown);
1747   if (FormatTok->BlockKind == BK_Block)
1748     return false;
1749   nextToken();
1750   parseBracedList();
1751   return true;
1752 }
1753 
1754 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1755                                           bool IsEnum,
1756                                           tok::TokenKind ClosingBraceKind) {
1757   bool HasError = false;
1758 
1759   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1760   // replace this by using parseAssigmentExpression() inside.
1761   do {
1762     if (Style.isCSharp()) {
1763       if (FormatTok->is(TT_JsFatArrow)) {
1764         nextToken();
1765         // Fat arrows can be followed by simple expressions or by child blocks
1766         // in curly braces.
1767         if (FormatTok->is(tok::l_brace)) {
1768           parseChildBlock();
1769           continue;
1770         }
1771       }
1772     }
1773     if (Style.Language == FormatStyle::LK_JavaScript) {
1774       if (FormatTok->is(Keywords.kw_function) ||
1775           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1776         tryToParseJSFunction();
1777         continue;
1778       }
1779       if (FormatTok->is(TT_JsFatArrow)) {
1780         nextToken();
1781         // Fat arrows can be followed by simple expressions or by child blocks
1782         // in curly braces.
1783         if (FormatTok->is(tok::l_brace)) {
1784           parseChildBlock();
1785           continue;
1786         }
1787       }
1788       if (FormatTok->is(tok::l_brace)) {
1789         // Could be a method inside of a braced list `{a() { return 1; }}`.
1790         if (tryToParseBracedList())
1791           continue;
1792         parseChildBlock();
1793       }
1794     }
1795     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1796       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1797         addUnwrappedLine();
1798       nextToken();
1799       return !HasError;
1800     }
1801     switch (FormatTok->Tok.getKind()) {
1802     case tok::caret:
1803       nextToken();
1804       if (FormatTok->is(tok::l_brace)) {
1805         parseChildBlock();
1806       }
1807       break;
1808     case tok::l_square:
1809       if (Style.isCSharp())
1810         parseSquare();
1811       else
1812         tryToParseLambda();
1813       break;
1814     case tok::l_paren:
1815       parseParens();
1816       // JavaScript can just have free standing methods and getters/setters in
1817       // object literals. Detect them by a "{" following ")".
1818       if (Style.Language == FormatStyle::LK_JavaScript) {
1819         if (FormatTok->is(tok::l_brace))
1820           parseChildBlock();
1821         break;
1822       }
1823       break;
1824     case tok::l_brace:
1825       // Assume there are no blocks inside a braced init list apart
1826       // from the ones we explicitly parse out (like lambdas).
1827       FormatTok->BlockKind = BK_BracedInit;
1828       nextToken();
1829       parseBracedList();
1830       break;
1831     case tok::less:
1832       if (Style.Language == FormatStyle::LK_Proto) {
1833         nextToken();
1834         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1835                         /*ClosingBraceKind=*/tok::greater);
1836       } else {
1837         nextToken();
1838       }
1839       break;
1840     case tok::semi:
1841       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1842       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1843       // used for error recovery if we have otherwise determined that this is
1844       // a braced list.
1845       if (Style.Language == FormatStyle::LK_JavaScript) {
1846         nextToken();
1847         break;
1848       }
1849       HasError = true;
1850       if (!ContinueOnSemicolons)
1851         return !HasError;
1852       nextToken();
1853       break;
1854     case tok::comma:
1855       nextToken();
1856       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1857         addUnwrappedLine();
1858       break;
1859     default:
1860       nextToken();
1861       break;
1862     }
1863   } while (!eof());
1864   return false;
1865 }
1866 
1867 void UnwrappedLineParser::parseParens() {
1868   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1869   nextToken();
1870   do {
1871     switch (FormatTok->Tok.getKind()) {
1872     case tok::l_paren:
1873       parseParens();
1874       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1875         parseChildBlock();
1876       break;
1877     case tok::r_paren:
1878       nextToken();
1879       return;
1880     case tok::r_brace:
1881       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1882       return;
1883     case tok::l_square:
1884       tryToParseLambda();
1885       break;
1886     case tok::l_brace:
1887       if (!tryToParseBracedList())
1888         parseChildBlock();
1889       break;
1890     case tok::at:
1891       nextToken();
1892       if (FormatTok->Tok.is(tok::l_brace)) {
1893         nextToken();
1894         parseBracedList();
1895       }
1896       break;
1897     case tok::kw_class:
1898       if (Style.Language == FormatStyle::LK_JavaScript)
1899         parseRecord(/*ParseAsExpr=*/true);
1900       else
1901         nextToken();
1902       break;
1903     case tok::identifier:
1904       if (Style.Language == FormatStyle::LK_JavaScript &&
1905           (FormatTok->is(Keywords.kw_function) ||
1906            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1907         tryToParseJSFunction();
1908       else
1909         nextToken();
1910       break;
1911     default:
1912       nextToken();
1913       break;
1914     }
1915   } while (!eof());
1916 }
1917 
1918 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1919   if (!LambdaIntroducer) {
1920     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1921     if (tryToParseLambda())
1922       return;
1923   }
1924   do {
1925     switch (FormatTok->Tok.getKind()) {
1926     case tok::l_paren:
1927       parseParens();
1928       break;
1929     case tok::r_square:
1930       nextToken();
1931       return;
1932     case tok::r_brace:
1933       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1934       return;
1935     case tok::l_square:
1936       parseSquare();
1937       break;
1938     case tok::l_brace: {
1939       if (!tryToParseBracedList())
1940         parseChildBlock();
1941       break;
1942     }
1943     case tok::at:
1944       nextToken();
1945       if (FormatTok->Tok.is(tok::l_brace)) {
1946         nextToken();
1947         parseBracedList();
1948       }
1949       break;
1950     default:
1951       nextToken();
1952       break;
1953     }
1954   } while (!eof());
1955 }
1956 
1957 void UnwrappedLineParser::parseIfThenElse() {
1958   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1959   nextToken();
1960   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1961     nextToken();
1962   if (FormatTok->Tok.is(tok::l_paren))
1963     parseParens();
1964   // handle [[likely]] / [[unlikely]]
1965   if (FormatTok->is(tok::l_square))
1966     parseSquare();
1967   bool NeedsUnwrappedLine = false;
1968   if (FormatTok->Tok.is(tok::l_brace)) {
1969     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1970     parseBlock(/*MustBeDeclaration=*/false);
1971     if (Style.BraceWrapping.BeforeElse)
1972       addUnwrappedLine();
1973     else
1974       NeedsUnwrappedLine = true;
1975   } else {
1976     addUnwrappedLine();
1977     ++Line->Level;
1978     parseStructuralElement();
1979     --Line->Level;
1980   }
1981   if (FormatTok->Tok.is(tok::kw_else)) {
1982     nextToken();
1983     // handle [[likely]] / [[unlikely]]
1984     if (FormatTok->is(tok::l_square))
1985       parseSquare();
1986     if (FormatTok->Tok.is(tok::l_brace)) {
1987       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1988       parseBlock(/*MustBeDeclaration=*/false);
1989       addUnwrappedLine();
1990     } else if (FormatTok->Tok.is(tok::kw_if)) {
1991       parseIfThenElse();
1992     } else {
1993       addUnwrappedLine();
1994       ++Line->Level;
1995       parseStructuralElement();
1996       if (FormatTok->is(tok::eof))
1997         addUnwrappedLine();
1998       --Line->Level;
1999     }
2000   } else if (NeedsUnwrappedLine) {
2001     addUnwrappedLine();
2002   }
2003 }
2004 
2005 void UnwrappedLineParser::parseTryCatch() {
2006   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2007   nextToken();
2008   bool NeedsUnwrappedLine = false;
2009   if (FormatTok->is(tok::colon)) {
2010     // We are in a function try block, what comes is an initializer list.
2011     nextToken();
2012 
2013     // In case identifiers were removed by clang-tidy, what might follow is
2014     // multiple commas in sequence - before the first identifier.
2015     while (FormatTok->is(tok::comma))
2016       nextToken();
2017 
2018     while (FormatTok->is(tok::identifier)) {
2019       nextToken();
2020       if (FormatTok->is(tok::l_paren))
2021         parseParens();
2022 
2023       // In case identifiers were removed by clang-tidy, what might follow is
2024       // multiple commas in sequence - after the first identifier.
2025       while (FormatTok->is(tok::comma))
2026         nextToken();
2027     }
2028   }
2029   // Parse try with resource.
2030   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2031     parseParens();
2032   }
2033   if (FormatTok->is(tok::l_brace)) {
2034     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2035     parseBlock(/*MustBeDeclaration=*/false);
2036     if (Style.BraceWrapping.BeforeCatch) {
2037       addUnwrappedLine();
2038     } else {
2039       NeedsUnwrappedLine = true;
2040     }
2041   } else if (!FormatTok->is(tok::kw_catch)) {
2042     // The C++ standard requires a compound-statement after a try.
2043     // If there's none, we try to assume there's a structuralElement
2044     // and try to continue.
2045     addUnwrappedLine();
2046     ++Line->Level;
2047     parseStructuralElement();
2048     --Line->Level;
2049   }
2050   while (1) {
2051     if (FormatTok->is(tok::at))
2052       nextToken();
2053     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2054                              tok::kw___finally) ||
2055           ((Style.Language == FormatStyle::LK_Java ||
2056             Style.Language == FormatStyle::LK_JavaScript) &&
2057            FormatTok->is(Keywords.kw_finally)) ||
2058           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2059            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2060       break;
2061     nextToken();
2062     while (FormatTok->isNot(tok::l_brace)) {
2063       if (FormatTok->is(tok::l_paren)) {
2064         parseParens();
2065         continue;
2066       }
2067       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2068         return;
2069       nextToken();
2070     }
2071     NeedsUnwrappedLine = false;
2072     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2073     parseBlock(/*MustBeDeclaration=*/false);
2074     if (Style.BraceWrapping.BeforeCatch)
2075       addUnwrappedLine();
2076     else
2077       NeedsUnwrappedLine = true;
2078   }
2079   if (NeedsUnwrappedLine)
2080     addUnwrappedLine();
2081 }
2082 
2083 void UnwrappedLineParser::parseNamespace() {
2084   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2085          "'namespace' expected");
2086 
2087   const FormatToken &InitialToken = *FormatTok;
2088   nextToken();
2089   if (InitialToken.is(TT_NamespaceMacro)) {
2090     parseParens();
2091   } else {
2092     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2093                               tok::l_square)) {
2094       if (FormatTok->is(tok::l_square))
2095         parseSquare();
2096       else
2097         nextToken();
2098     }
2099   }
2100   if (FormatTok->Tok.is(tok::l_brace)) {
2101     if (ShouldBreakBeforeBrace(Style, InitialToken))
2102       addUnwrappedLine();
2103 
2104     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
2105                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2106                      DeclarationScopeStack.size() > 1);
2107     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
2108     // Munch the semicolon after a namespace. This is more common than one would
2109     // think. Putting the semicolon into its own line is very ugly.
2110     if (FormatTok->Tok.is(tok::semi))
2111       nextToken();
2112     addUnwrappedLine();
2113   }
2114   // FIXME: Add error handling.
2115 }
2116 
2117 void UnwrappedLineParser::parseNew() {
2118   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2119   nextToken();
2120 
2121   if (Style.isCSharp()) {
2122     do {
2123       if (FormatTok->is(tok::l_brace))
2124         parseBracedList();
2125 
2126       if (FormatTok->isOneOf(tok::semi, tok::comma))
2127         return;
2128 
2129       nextToken();
2130     } while (!eof());
2131   }
2132 
2133   if (Style.Language != FormatStyle::LK_Java)
2134     return;
2135 
2136   // In Java, we can parse everything up to the parens, which aren't optional.
2137   do {
2138     // There should not be a ;, { or } before the new's open paren.
2139     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2140       return;
2141 
2142     // Consume the parens.
2143     if (FormatTok->is(tok::l_paren)) {
2144       parseParens();
2145 
2146       // If there is a class body of an anonymous class, consume that as child.
2147       if (FormatTok->is(tok::l_brace))
2148         parseChildBlock();
2149       return;
2150     }
2151     nextToken();
2152   } while (!eof());
2153 }
2154 
2155 void UnwrappedLineParser::parseForOrWhileLoop() {
2156   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2157          "'for', 'while' or foreach macro expected");
2158   nextToken();
2159   // JS' for await ( ...
2160   if (Style.Language == FormatStyle::LK_JavaScript &&
2161       FormatTok->is(Keywords.kw_await))
2162     nextToken();
2163   if (FormatTok->Tok.is(tok::l_paren))
2164     parseParens();
2165   if (FormatTok->Tok.is(tok::l_brace)) {
2166     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2167     parseBlock(/*MustBeDeclaration=*/false);
2168     addUnwrappedLine();
2169   } else {
2170     addUnwrappedLine();
2171     ++Line->Level;
2172     parseStructuralElement();
2173     --Line->Level;
2174   }
2175 }
2176 
2177 void UnwrappedLineParser::parseDoWhile() {
2178   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2179   nextToken();
2180   if (FormatTok->Tok.is(tok::l_brace)) {
2181     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2182     parseBlock(/*MustBeDeclaration=*/false);
2183     if (Style.BraceWrapping.BeforeWhile)
2184       addUnwrappedLine();
2185   } else {
2186     addUnwrappedLine();
2187     ++Line->Level;
2188     parseStructuralElement();
2189     --Line->Level;
2190   }
2191 
2192   // FIXME: Add error handling.
2193   if (!FormatTok->Tok.is(tok::kw_while)) {
2194     addUnwrappedLine();
2195     return;
2196   }
2197 
2198   nextToken();
2199   parseStructuralElement();
2200 }
2201 
2202 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2203   nextToken();
2204   unsigned OldLineLevel = Line->Level;
2205   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2206     --Line->Level;
2207   if (LeftAlignLabel)
2208     Line->Level = 0;
2209   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2210       FormatTok->Tok.is(tok::l_brace)) {
2211     CompoundStatementIndenter Indenter(this, Line->Level,
2212                                        Style.BraceWrapping.AfterCaseLabel,
2213                                        Style.BraceWrapping.IndentBraces);
2214     parseBlock(/*MustBeDeclaration=*/false);
2215     if (FormatTok->Tok.is(tok::kw_break)) {
2216       if (Style.BraceWrapping.AfterControlStatement ==
2217           FormatStyle::BWACS_Always)
2218         addUnwrappedLine();
2219       parseStructuralElement();
2220     }
2221     addUnwrappedLine();
2222   } else {
2223     if (FormatTok->is(tok::semi))
2224       nextToken();
2225     addUnwrappedLine();
2226   }
2227   Line->Level = OldLineLevel;
2228   if (FormatTok->isNot(tok::l_brace)) {
2229     parseStructuralElement();
2230     addUnwrappedLine();
2231   }
2232 }
2233 
2234 void UnwrappedLineParser::parseCaseLabel() {
2235   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2236   // FIXME: fix handling of complex expressions here.
2237   do {
2238     nextToken();
2239   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2240   parseLabel();
2241 }
2242 
2243 void UnwrappedLineParser::parseSwitch() {
2244   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2245   nextToken();
2246   if (FormatTok->Tok.is(tok::l_paren))
2247     parseParens();
2248   if (FormatTok->Tok.is(tok::l_brace)) {
2249     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2250     parseBlock(/*MustBeDeclaration=*/false);
2251     addUnwrappedLine();
2252   } else {
2253     addUnwrappedLine();
2254     ++Line->Level;
2255     parseStructuralElement();
2256     --Line->Level;
2257   }
2258 }
2259 
2260 void UnwrappedLineParser::parseAccessSpecifier() {
2261   nextToken();
2262   // Understand Qt's slots.
2263   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2264     nextToken();
2265   // Otherwise, we don't know what it is, and we'd better keep the next token.
2266   if (FormatTok->Tok.is(tok::colon))
2267     nextToken();
2268   addUnwrappedLine();
2269 }
2270 
2271 bool UnwrappedLineParser::parseEnum() {
2272   // Won't be 'enum' for NS_ENUMs.
2273   if (FormatTok->Tok.is(tok::kw_enum))
2274     nextToken();
2275 
2276   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2277   // declarations. An "enum" keyword followed by a colon would be a syntax
2278   // error and thus assume it is just an identifier.
2279   if (Style.Language == FormatStyle::LK_JavaScript &&
2280       FormatTok->isOneOf(tok::colon, tok::question))
2281     return false;
2282 
2283   // In protobuf, "enum" can be used as a field name.
2284   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2285     return false;
2286 
2287   // Eat up enum class ...
2288   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2289     nextToken();
2290 
2291   while (FormatTok->Tok.getIdentifierInfo() ||
2292          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2293                             tok::greater, tok::comma, tok::question)) {
2294     nextToken();
2295     // We can have macros or attributes in between 'enum' and the enum name.
2296     if (FormatTok->is(tok::l_paren))
2297       parseParens();
2298     if (FormatTok->is(tok::identifier)) {
2299       nextToken();
2300       // If there are two identifiers in a row, this is likely an elaborate
2301       // return type. In Java, this can be "implements", etc.
2302       if (Style.isCpp() && FormatTok->is(tok::identifier))
2303         return false;
2304     }
2305   }
2306 
2307   // Just a declaration or something is wrong.
2308   if (FormatTok->isNot(tok::l_brace))
2309     return true;
2310   FormatTok->BlockKind = BK_Block;
2311 
2312   if (Style.Language == FormatStyle::LK_Java) {
2313     // Java enums are different.
2314     parseJavaEnumBody();
2315     return true;
2316   }
2317   if (Style.Language == FormatStyle::LK_Proto) {
2318     parseBlock(/*MustBeDeclaration=*/true);
2319     return true;
2320   }
2321 
2322   if (!Style.AllowShortEnumsOnASingleLine)
2323     addUnwrappedLine();
2324   // Parse enum body.
2325   nextToken();
2326   if (!Style.AllowShortEnumsOnASingleLine) {
2327     addUnwrappedLine();
2328     Line->Level += 1;
2329   }
2330   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2331                                    /*IsEnum=*/true);
2332   if (!Style.AllowShortEnumsOnASingleLine)
2333     Line->Level -= 1;
2334   if (HasError) {
2335     if (FormatTok->is(tok::semi))
2336       nextToken();
2337     addUnwrappedLine();
2338   }
2339   return true;
2340 
2341   // There is no addUnwrappedLine() here so that we fall through to parsing a
2342   // structural element afterwards. Thus, in "enum A {} n, m;",
2343   // "} n, m;" will end up in one unwrapped line.
2344 }
2345 
2346 void UnwrappedLineParser::parseJavaEnumBody() {
2347   // Determine whether the enum is simple, i.e. does not have a semicolon or
2348   // constants with class bodies. Simple enums can be formatted like braced
2349   // lists, contracted to a single line, etc.
2350   unsigned StoredPosition = Tokens->getPosition();
2351   bool IsSimple = true;
2352   FormatToken *Tok = Tokens->getNextToken();
2353   while (Tok) {
2354     if (Tok->is(tok::r_brace))
2355       break;
2356     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2357       IsSimple = false;
2358       break;
2359     }
2360     // FIXME: This will also mark enums with braces in the arguments to enum
2361     // constants as "not simple". This is probably fine in practice, though.
2362     Tok = Tokens->getNextToken();
2363   }
2364   FormatTok = Tokens->setPosition(StoredPosition);
2365 
2366   if (IsSimple) {
2367     nextToken();
2368     parseBracedList();
2369     addUnwrappedLine();
2370     return;
2371   }
2372 
2373   // Parse the body of a more complex enum.
2374   // First add a line for everything up to the "{".
2375   nextToken();
2376   addUnwrappedLine();
2377   ++Line->Level;
2378 
2379   // Parse the enum constants.
2380   while (FormatTok) {
2381     if (FormatTok->is(tok::l_brace)) {
2382       // Parse the constant's class body.
2383       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2384                  /*MunchSemi=*/false);
2385     } else if (FormatTok->is(tok::l_paren)) {
2386       parseParens();
2387     } else if (FormatTok->is(tok::comma)) {
2388       nextToken();
2389       addUnwrappedLine();
2390     } else if (FormatTok->is(tok::semi)) {
2391       nextToken();
2392       addUnwrappedLine();
2393       break;
2394     } else if (FormatTok->is(tok::r_brace)) {
2395       addUnwrappedLine();
2396       break;
2397     } else {
2398       nextToken();
2399     }
2400   }
2401 
2402   // Parse the class body after the enum's ";" if any.
2403   parseLevel(/*HasOpeningBrace=*/true);
2404   nextToken();
2405   --Line->Level;
2406   addUnwrappedLine();
2407 }
2408 
2409 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2410   const FormatToken &InitialToken = *FormatTok;
2411   nextToken();
2412 
2413   // The actual identifier can be a nested name specifier, and in macros
2414   // it is often token-pasted.
2415   // An [[attribute]] can be before the identifier.
2416   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2417                             tok::kw___attribute, tok::kw___declspec,
2418                             tok::kw_alignas, tok::l_square, tok::r_square) ||
2419          ((Style.Language == FormatStyle::LK_Java ||
2420            Style.Language == FormatStyle::LK_JavaScript) &&
2421           FormatTok->isOneOf(tok::period, tok::comma))) {
2422     if (Style.Language == FormatStyle::LK_JavaScript &&
2423         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2424       // JavaScript/TypeScript supports inline object types in
2425       // extends/implements positions:
2426       //     class Foo implements {bar: number} { }
2427       nextToken();
2428       if (FormatTok->is(tok::l_brace)) {
2429         tryToParseBracedList();
2430         continue;
2431       }
2432     }
2433     bool IsNonMacroIdentifier =
2434         FormatTok->is(tok::identifier) &&
2435         FormatTok->TokenText != FormatTok->TokenText.upper();
2436     nextToken();
2437     // We can have macros or attributes in between 'class' and the class name.
2438     if (!IsNonMacroIdentifier) {
2439       if (FormatTok->Tok.is(tok::l_paren)) {
2440         parseParens();
2441       } else if (FormatTok->is(TT_AttributeSquare)) {
2442         parseSquare();
2443         // Consume the closing TT_AttributeSquare.
2444         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2445           nextToken();
2446       }
2447     }
2448   }
2449 
2450   // Note that parsing away template declarations here leads to incorrectly
2451   // accepting function declarations as record declarations.
2452   // In general, we cannot solve this problem. Consider:
2453   // class A<int> B() {}
2454   // which can be a function definition or a class definition when B() is a
2455   // macro. If we find enough real-world cases where this is a problem, we
2456   // can parse for the 'template' keyword in the beginning of the statement,
2457   // and thus rule out the record production in case there is no template
2458   // (this would still leave us with an ambiguity between template function
2459   // and class declarations).
2460   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2461     while (!eof()) {
2462       if (FormatTok->is(tok::l_brace)) {
2463         calculateBraceTypes(/*ExpectClassBody=*/true);
2464         if (!tryToParseBracedList())
2465           break;
2466       }
2467       if (FormatTok->Tok.is(tok::semi))
2468         return;
2469       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2470         addUnwrappedLine();
2471         nextToken();
2472         parseCSharpGenericTypeConstraint();
2473         break;
2474       }
2475       nextToken();
2476     }
2477   }
2478   if (FormatTok->Tok.is(tok::l_brace)) {
2479     if (ParseAsExpr) {
2480       parseChildBlock();
2481     } else {
2482       if (ShouldBreakBeforeBrace(Style, InitialToken))
2483         addUnwrappedLine();
2484 
2485       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2486                  /*MunchSemi=*/false);
2487     }
2488   }
2489   // There is no addUnwrappedLine() here so that we fall through to parsing a
2490   // structural element afterwards. Thus, in "class A {} n, m;",
2491   // "} n, m;" will end up in one unwrapped line.
2492 }
2493 
2494 void UnwrappedLineParser::parseObjCMethod() {
2495   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2496          "'(' or identifier expected.");
2497   do {
2498     if (FormatTok->Tok.is(tok::semi)) {
2499       nextToken();
2500       addUnwrappedLine();
2501       return;
2502     } else if (FormatTok->Tok.is(tok::l_brace)) {
2503       if (Style.BraceWrapping.AfterFunction)
2504         addUnwrappedLine();
2505       parseBlock(/*MustBeDeclaration=*/false);
2506       addUnwrappedLine();
2507       return;
2508     } else {
2509       nextToken();
2510     }
2511   } while (!eof());
2512 }
2513 
2514 void UnwrappedLineParser::parseObjCProtocolList() {
2515   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2516   do {
2517     nextToken();
2518     // Early exit in case someone forgot a close angle.
2519     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2520         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2521       return;
2522   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2523   nextToken(); // Skip '>'.
2524 }
2525 
2526 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2527   do {
2528     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2529       nextToken();
2530       addUnwrappedLine();
2531       break;
2532     }
2533     if (FormatTok->is(tok::l_brace)) {
2534       parseBlock(/*MustBeDeclaration=*/false);
2535       // In ObjC interfaces, nothing should be following the "}".
2536       addUnwrappedLine();
2537     } else if (FormatTok->is(tok::r_brace)) {
2538       // Ignore stray "}". parseStructuralElement doesn't consume them.
2539       nextToken();
2540       addUnwrappedLine();
2541     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2542       nextToken();
2543       parseObjCMethod();
2544     } else {
2545       parseStructuralElement();
2546     }
2547   } while (!eof());
2548 }
2549 
2550 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2551   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2552          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2553   nextToken();
2554   nextToken(); // interface name
2555 
2556   // @interface can be followed by a lightweight generic
2557   // specialization list, then either a base class or a category.
2558   if (FormatTok->Tok.is(tok::less)) {
2559     // Unlike protocol lists, generic parameterizations support
2560     // nested angles:
2561     //
2562     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2563     //     NSObject <NSCopying, NSSecureCoding>
2564     //
2565     // so we need to count how many open angles we have left.
2566     unsigned NumOpenAngles = 1;
2567     do {
2568       nextToken();
2569       // Early exit in case someone forgot a close angle.
2570       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2571           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2572         break;
2573       if (FormatTok->Tok.is(tok::less))
2574         ++NumOpenAngles;
2575       else if (FormatTok->Tok.is(tok::greater)) {
2576         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2577         --NumOpenAngles;
2578       }
2579     } while (!eof() && NumOpenAngles != 0);
2580     nextToken(); // Skip '>'.
2581   }
2582   if (FormatTok->Tok.is(tok::colon)) {
2583     nextToken();
2584     nextToken(); // base class name
2585   } else if (FormatTok->Tok.is(tok::l_paren))
2586     // Skip category, if present.
2587     parseParens();
2588 
2589   if (FormatTok->Tok.is(tok::less))
2590     parseObjCProtocolList();
2591 
2592   if (FormatTok->Tok.is(tok::l_brace)) {
2593     if (Style.BraceWrapping.AfterObjCDeclaration)
2594       addUnwrappedLine();
2595     parseBlock(/*MustBeDeclaration=*/true);
2596   }
2597 
2598   // With instance variables, this puts '}' on its own line.  Without instance
2599   // variables, this ends the @interface line.
2600   addUnwrappedLine();
2601 
2602   parseObjCUntilAtEnd();
2603 }
2604 
2605 // Returns true for the declaration/definition form of @protocol,
2606 // false for the expression form.
2607 bool UnwrappedLineParser::parseObjCProtocol() {
2608   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2609   nextToken();
2610 
2611   if (FormatTok->is(tok::l_paren))
2612     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2613     return false;
2614 
2615   // The definition/declaration form,
2616   // @protocol Foo
2617   // - (int)someMethod;
2618   // @end
2619 
2620   nextToken(); // protocol name
2621 
2622   if (FormatTok->Tok.is(tok::less))
2623     parseObjCProtocolList();
2624 
2625   // Check for protocol declaration.
2626   if (FormatTok->Tok.is(tok::semi)) {
2627     nextToken();
2628     addUnwrappedLine();
2629     return true;
2630   }
2631 
2632   addUnwrappedLine();
2633   parseObjCUntilAtEnd();
2634   return true;
2635 }
2636 
2637 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2638   bool IsImport = FormatTok->is(Keywords.kw_import);
2639   assert(IsImport || FormatTok->is(tok::kw_export));
2640   nextToken();
2641 
2642   // Consume the "default" in "export default class/function".
2643   if (FormatTok->is(tok::kw_default))
2644     nextToken();
2645 
2646   // Consume "async function", "function" and "default function", so that these
2647   // get parsed as free-standing JS functions, i.e. do not require a trailing
2648   // semicolon.
2649   if (FormatTok->is(Keywords.kw_async))
2650     nextToken();
2651   if (FormatTok->is(Keywords.kw_function)) {
2652     nextToken();
2653     return;
2654   }
2655 
2656   // For imports, `export *`, `export {...}`, consume the rest of the line up
2657   // to the terminating `;`. For everything else, just return and continue
2658   // parsing the structural element, i.e. the declaration or expression for
2659   // `export default`.
2660   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2661       !FormatTok->isStringLiteral())
2662     return;
2663 
2664   while (!eof()) {
2665     if (FormatTok->is(tok::semi))
2666       return;
2667     if (Line->Tokens.empty()) {
2668       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2669       // import statement should terminate.
2670       return;
2671     }
2672     if (FormatTok->is(tok::l_brace)) {
2673       FormatTok->BlockKind = BK_Block;
2674       nextToken();
2675       parseBracedList();
2676     } else {
2677       nextToken();
2678     }
2679   }
2680 }
2681 
2682 void UnwrappedLineParser::parseStatementMacro() {
2683   nextToken();
2684   if (FormatTok->is(tok::l_paren))
2685     parseParens();
2686   if (FormatTok->is(tok::semi))
2687     nextToken();
2688   addUnwrappedLine();
2689 }
2690 
2691 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2692                                                  StringRef Prefix = "") {
2693   llvm::dbgs() << Prefix << "Line(" << Line.Level
2694                << ", FSC=" << Line.FirstStartColumn << ")"
2695                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2696   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2697                                                     E = Line.Tokens.end();
2698        I != E; ++I) {
2699     llvm::dbgs() << I->Tok->Tok.getName() << "["
2700                  << "T=" << I->Tok->getType()
2701                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2702   }
2703   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2704                                                     E = Line.Tokens.end();
2705        I != E; ++I) {
2706     const UnwrappedLineNode &Node = *I;
2707     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2708              I = Node.Children.begin(),
2709              E = Node.Children.end();
2710          I != E; ++I) {
2711       printDebugInfo(*I, "\nChild: ");
2712     }
2713   }
2714   llvm::dbgs() << "\n";
2715 }
2716 
2717 void UnwrappedLineParser::addUnwrappedLine() {
2718   if (Line->Tokens.empty())
2719     return;
2720   LLVM_DEBUG({
2721     if (CurrentLines == &Lines)
2722       printDebugInfo(*Line);
2723   });
2724   CurrentLines->push_back(std::move(*Line));
2725   Line->Tokens.clear();
2726   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2727   Line->FirstStartColumn = 0;
2728   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2729     CurrentLines->append(
2730         std::make_move_iterator(PreprocessorDirectives.begin()),
2731         std::make_move_iterator(PreprocessorDirectives.end()));
2732     PreprocessorDirectives.clear();
2733   }
2734   // Disconnect the current token from the last token on the previous line.
2735   FormatTok->Previous = nullptr;
2736 }
2737 
2738 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2739 
2740 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2741   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2742          FormatTok.NewlinesBefore > 0;
2743 }
2744 
2745 // Checks if \p FormatTok is a line comment that continues the line comment
2746 // section on \p Line.
2747 static bool
2748 continuesLineCommentSection(const FormatToken &FormatTok,
2749                             const UnwrappedLine &Line,
2750                             const llvm::Regex &CommentPragmasRegex) {
2751   if (Line.Tokens.empty())
2752     return false;
2753 
2754   StringRef IndentContent = FormatTok.TokenText;
2755   if (FormatTok.TokenText.startswith("//") ||
2756       FormatTok.TokenText.startswith("/*"))
2757     IndentContent = FormatTok.TokenText.substr(2);
2758   if (CommentPragmasRegex.match(IndentContent))
2759     return false;
2760 
2761   // If Line starts with a line comment, then FormatTok continues the comment
2762   // section if its original column is greater or equal to the original start
2763   // column of the line.
2764   //
2765   // Define the min column token of a line as follows: if a line ends in '{' or
2766   // contains a '{' followed by a line comment, then the min column token is
2767   // that '{'. Otherwise, the min column token of the line is the first token of
2768   // the line.
2769   //
2770   // If Line starts with a token other than a line comment, then FormatTok
2771   // continues the comment section if its original column is greater than the
2772   // original start column of the min column token of the line.
2773   //
2774   // For example, the second line comment continues the first in these cases:
2775   //
2776   // // first line
2777   // // second line
2778   //
2779   // and:
2780   //
2781   // // first line
2782   //  // second line
2783   //
2784   // and:
2785   //
2786   // int i; // first line
2787   //  // second line
2788   //
2789   // and:
2790   //
2791   // do { // first line
2792   //      // second line
2793   //   int i;
2794   // } while (true);
2795   //
2796   // and:
2797   //
2798   // enum {
2799   //   a, // first line
2800   //    // second line
2801   //   b
2802   // };
2803   //
2804   // The second line comment doesn't continue the first in these cases:
2805   //
2806   //   // first line
2807   //  // second line
2808   //
2809   // and:
2810   //
2811   // int i; // first line
2812   // // second line
2813   //
2814   // and:
2815   //
2816   // do { // first line
2817   //   // second line
2818   //   int i;
2819   // } while (true);
2820   //
2821   // and:
2822   //
2823   // enum {
2824   //   a, // first line
2825   //   // second line
2826   // };
2827   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2828 
2829   // Scan for '{//'. If found, use the column of '{' as a min column for line
2830   // comment section continuation.
2831   const FormatToken *PreviousToken = nullptr;
2832   for (const UnwrappedLineNode &Node : Line.Tokens) {
2833     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2834         isLineComment(*Node.Tok)) {
2835       MinColumnToken = PreviousToken;
2836       break;
2837     }
2838     PreviousToken = Node.Tok;
2839 
2840     // Grab the last newline preceding a token in this unwrapped line.
2841     if (Node.Tok->NewlinesBefore > 0) {
2842       MinColumnToken = Node.Tok;
2843     }
2844   }
2845   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2846     MinColumnToken = PreviousToken;
2847   }
2848 
2849   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2850                               MinColumnToken);
2851 }
2852 
2853 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2854   bool JustComments = Line->Tokens.empty();
2855   for (SmallVectorImpl<FormatToken *>::const_iterator
2856            I = CommentsBeforeNextToken.begin(),
2857            E = CommentsBeforeNextToken.end();
2858        I != E; ++I) {
2859     // Line comments that belong to the same line comment section are put on the
2860     // same line since later we might want to reflow content between them.
2861     // Additional fine-grained breaking of line comment sections is controlled
2862     // by the class BreakableLineCommentSection in case it is desirable to keep
2863     // several line comment sections in the same unwrapped line.
2864     //
2865     // FIXME: Consider putting separate line comment sections as children to the
2866     // unwrapped line instead.
2867     (*I)->ContinuesLineCommentSection =
2868         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2869     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2870       addUnwrappedLine();
2871     pushToken(*I);
2872   }
2873   if (NewlineBeforeNext && JustComments)
2874     addUnwrappedLine();
2875   CommentsBeforeNextToken.clear();
2876 }
2877 
2878 void UnwrappedLineParser::nextToken(int LevelDifference) {
2879   if (eof())
2880     return;
2881   flushComments(isOnNewLine(*FormatTok));
2882   pushToken(FormatTok);
2883   FormatToken *Previous = FormatTok;
2884   if (Style.Language != FormatStyle::LK_JavaScript)
2885     readToken(LevelDifference);
2886   else
2887     readTokenWithJavaScriptASI();
2888   FormatTok->Previous = Previous;
2889 }
2890 
2891 void UnwrappedLineParser::distributeComments(
2892     const SmallVectorImpl<FormatToken *> &Comments,
2893     const FormatToken *NextTok) {
2894   // Whether or not a line comment token continues a line is controlled by
2895   // the method continuesLineCommentSection, with the following caveat:
2896   //
2897   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2898   // that each comment line from the trail is aligned with the next token, if
2899   // the next token exists. If a trail exists, the beginning of the maximal
2900   // trail is marked as a start of a new comment section.
2901   //
2902   // For example in this code:
2903   //
2904   // int a; // line about a
2905   //   // line 1 about b
2906   //   // line 2 about b
2907   //   int b;
2908   //
2909   // the two lines about b form a maximal trail, so there are two sections, the
2910   // first one consisting of the single comment "// line about a" and the
2911   // second one consisting of the next two comments.
2912   if (Comments.empty())
2913     return;
2914   bool ShouldPushCommentsInCurrentLine = true;
2915   bool HasTrailAlignedWithNextToken = false;
2916   unsigned StartOfTrailAlignedWithNextToken = 0;
2917   if (NextTok) {
2918     // We are skipping the first element intentionally.
2919     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2920       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2921         HasTrailAlignedWithNextToken = true;
2922         StartOfTrailAlignedWithNextToken = i;
2923       }
2924     }
2925   }
2926   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2927     FormatToken *FormatTok = Comments[i];
2928     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2929       FormatTok->ContinuesLineCommentSection = false;
2930     } else {
2931       FormatTok->ContinuesLineCommentSection =
2932           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2933     }
2934     if (!FormatTok->ContinuesLineCommentSection &&
2935         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2936       ShouldPushCommentsInCurrentLine = false;
2937     }
2938     if (ShouldPushCommentsInCurrentLine) {
2939       pushToken(FormatTok);
2940     } else {
2941       CommentsBeforeNextToken.push_back(FormatTok);
2942     }
2943   }
2944 }
2945 
2946 void UnwrappedLineParser::readToken(int LevelDifference) {
2947   SmallVector<FormatToken *, 1> Comments;
2948   do {
2949     FormatTok = Tokens->getNextToken();
2950     assert(FormatTok);
2951     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2952            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2953       distributeComments(Comments, FormatTok);
2954       Comments.clear();
2955       // If there is an unfinished unwrapped line, we flush the preprocessor
2956       // directives only after that unwrapped line was finished later.
2957       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2958       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2959       assert((LevelDifference >= 0 ||
2960               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2961              "LevelDifference makes Line->Level negative");
2962       Line->Level += LevelDifference;
2963       // Comments stored before the preprocessor directive need to be output
2964       // before the preprocessor directive, at the same level as the
2965       // preprocessor directive, as we consider them to apply to the directive.
2966       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2967           PPBranchLevel > 0)
2968         Line->Level += PPBranchLevel;
2969       flushComments(isOnNewLine(*FormatTok));
2970       parsePPDirective();
2971     }
2972     while (FormatTok->getType() == TT_ConflictStart ||
2973            FormatTok->getType() == TT_ConflictEnd ||
2974            FormatTok->getType() == TT_ConflictAlternative) {
2975       if (FormatTok->getType() == TT_ConflictStart) {
2976         conditionalCompilationStart(/*Unreachable=*/false);
2977       } else if (FormatTok->getType() == TT_ConflictAlternative) {
2978         conditionalCompilationAlternative();
2979       } else if (FormatTok->getType() == TT_ConflictEnd) {
2980         conditionalCompilationEnd();
2981       }
2982       FormatTok = Tokens->getNextToken();
2983       FormatTok->MustBreakBefore = true;
2984       FormatTok->MustBreakAlignBefore = true;
2985     }
2986 
2987     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2988         !Line->InPPDirective) {
2989       continue;
2990     }
2991 
2992     if (!FormatTok->Tok.is(tok::comment)) {
2993       distributeComments(Comments, FormatTok);
2994       Comments.clear();
2995       return;
2996     }
2997 
2998     Comments.push_back(FormatTok);
2999   } while (!eof());
3000 
3001   distributeComments(Comments, nullptr);
3002   Comments.clear();
3003 }
3004 
3005 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3006   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3007   if (MustBreakBeforeNextToken) {
3008     Line->Tokens.back().Tok->MustBreakBefore = true;
3009     Line->Tokens.back().Tok->MustBreakAlignBefore = true;
3010     MustBreakBeforeNextToken = false;
3011   }
3012 }
3013 
3014 } // end namespace format
3015 } // end namespace clang
3016