1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31   virtual FormatToken *getNextToken() = 0;
32 
33   virtual unsigned getPosition() = 0;
34   virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42                          bool MustBeDeclaration)
43       : Line(Line), Stack(Stack) {
44     Line.MustBeDeclaration = MustBeDeclaration;
45     Stack.push_back(MustBeDeclaration);
46   }
47   ~ScopedDeclarationState() {
48     Stack.pop_back();
49     if (!Stack.empty())
50       Line.MustBeDeclaration = Stack.back();
51     else
52       Line.MustBeDeclaration = true;
53   }
54 
55 private:
56   UnwrappedLine &Line;
57   std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68                                  const FormatToken *Previous,
69                                  const FormatToken *MinColumnToken) {
70   if (!Previous || !MinColumnToken)
71     return false;
72   unsigned MinContinueColumn =
73       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75          isLineComment(*Previous) &&
76          FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82                    FormatToken *&ResetToken)
83       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85         Token(nullptr), PreviousToken(nullptr) {
86     FakeEOF.Tok.startToken();
87     FakeEOF.Tok.setKind(tok::eof);
88     TokenSource = this;
89     Line.Level = 0;
90     Line.InPPDirective = true;
91   }
92 
93   ~ScopedMacroState() override {
94     TokenSource = PreviousTokenSource;
95     ResetToken = Token;
96     Line.InPPDirective = false;
97     Line.Level = PreviousLineLevel;
98   }
99 
100   FormatToken *getNextToken() override {
101     // The \c UnwrappedLineParser guards against this by never calling
102     // \c getNextToken() after it has encountered the first eof token.
103     assert(!eof());
104     PreviousToken = Token;
105     Token = PreviousTokenSource->getNextToken();
106     if (eof())
107       return &FakeEOF;
108     return Token;
109   }
110 
111   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112 
113   FormatToken *setPosition(unsigned Position) override {
114     PreviousToken = nullptr;
115     Token = PreviousTokenSource->setPosition(Position);
116     return Token;
117   }
118 
119 private:
120   bool eof() {
121     return Token && Token->HasUnescapedNewline &&
122            !continuesLineComment(*Token, PreviousToken,
123                                  /*MinColumnToken=*/PreviousToken);
124   }
125 
126   FormatToken FakeEOF;
127   UnwrappedLine &Line;
128   FormatTokenSource *&TokenSource;
129   FormatToken *&ResetToken;
130   unsigned PreviousLineLevel;
131   FormatTokenSource *PreviousTokenSource;
132 
133   FormatToken *Token;
134   FormatToken *PreviousToken;
135 };
136 
137 } // end anonymous namespace
138 
139 class ScopedLineState {
140 public:
141   ScopedLineState(UnwrappedLineParser &Parser,
142                   bool SwitchToPreprocessorLines = false)
143       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144     if (SwitchToPreprocessorLines)
145       Parser.CurrentLines = &Parser.PreprocessorDirectives;
146     else if (!Parser.Line->Tokens.empty())
147       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148     PreBlockLine = std::move(Parser.Line);
149     Parser.Line = std::make_unique<UnwrappedLine>();
150     Parser.Line->Level = PreBlockLine->Level;
151     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152   }
153 
154   ~ScopedLineState() {
155     if (!Parser.Line->Tokens.empty()) {
156       Parser.addUnwrappedLine();
157     }
158     assert(Parser.Line->Tokens.empty());
159     Parser.Line = std::move(PreBlockLine);
160     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161       Parser.MustBreakBeforeNextToken = true;
162     Parser.CurrentLines = OriginalLines;
163   }
164 
165 private:
166   UnwrappedLineParser &Parser;
167 
168   std::unique_ptr<UnwrappedLine> PreBlockLine;
169   SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171 
172 class CompoundStatementIndenter {
173 public:
174   CompoundStatementIndenter(UnwrappedLineParser *Parser,
175                             const FormatStyle &Style, unsigned &LineLevel)
176       : CompoundStatementIndenter(Parser, LineLevel,
177                                   Style.BraceWrapping.AfterControlStatement,
178                                   Style.BraceWrapping.IndentBraces) {}
179   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180                             bool WrapBrace, bool IndentBrace)
181       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182     if (WrapBrace)
183       Parser->addUnwrappedLine();
184     if (IndentBrace)
185       ++LineLevel;
186   }
187   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188 
189 private:
190   unsigned &LineLevel;
191   unsigned OldLineLevel;
192 };
193 
194 namespace {
195 
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
198   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199       : Tokens(Tokens), Position(-1) {}
200 
201   FormatToken *getNextToken() override {
202     ++Position;
203     return Tokens[Position];
204   }
205 
206   unsigned getPosition() override {
207     assert(Position >= 0);
208     return Position;
209   }
210 
211   FormatToken *setPosition(unsigned P) override {
212     Position = P;
213     return Tokens[Position];
214   }
215 
216   void reset() { Position = -1; }
217 
218 private:
219   ArrayRef<FormatToken *> Tokens;
220   int Position;
221 };
222 
223 } // end anonymous namespace
224 
225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226                                          const AdditionalKeywords &Keywords,
227                                          unsigned FirstStartColumn,
228                                          ArrayRef<FormatToken *> Tokens,
229                                          UnwrappedLineConsumer &Callback)
230     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235                        ? IG_Rejected
236                        : IG_Inited),
237       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238 
239 void UnwrappedLineParser::reset() {
240   PPBranchLevel = -1;
241   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242                      ? IG_Rejected
243                      : IG_Inited;
244   IncludeGuardToken = nullptr;
245   Line.reset(new UnwrappedLine);
246   CommentsBeforeNextToken.clear();
247   FormatTok = nullptr;
248   MustBreakBeforeNextToken = false;
249   PreprocessorDirectives.clear();
250   CurrentLines = &Lines;
251   DeclarationScopeStack.clear();
252   PPStack.clear();
253   Line->FirstStartColumn = FirstStartColumn;
254 }
255 
256 void UnwrappedLineParser::parse() {
257   IndexedTokenSource TokenSource(AllTokens);
258   Line->FirstStartColumn = FirstStartColumn;
259   do {
260     LLVM_DEBUG(llvm::dbgs() << "----\n");
261     reset();
262     Tokens = &TokenSource;
263     TokenSource.reset();
264 
265     readToken();
266     parseFile();
267 
268     // If we found an include guard then all preprocessor directives (other than
269     // the guard) are over-indented by one.
270     if (IncludeGuard == IG_Found)
271       for (auto &Line : Lines)
272         if (Line.InPPDirective && Line.Level > 0)
273           --Line.Level;
274 
275     // Create line with eof token.
276     pushToken(FormatTok);
277     addUnwrappedLine();
278 
279     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280                                                   E = Lines.end();
281          I != E; ++I) {
282       Callback.consumeUnwrappedLine(*I);
283     }
284     Callback.finishRun();
285     Lines.clear();
286     while (!PPLevelBranchIndex.empty() &&
287            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290     }
291     if (!PPLevelBranchIndex.empty()) {
292       ++PPLevelBranchIndex.back();
293       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295     }
296   } while (!PPLevelBranchIndex.empty());
297 }
298 
299 void UnwrappedLineParser::parseFile() {
300   // The top-level context in a file always has declarations, except for pre-
301   // processor directives and JavaScript files.
302   bool MustBeDeclaration =
303       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305                                           MustBeDeclaration);
306   if (Style.Language == FormatStyle::LK_TextProto)
307     parseBracedList();
308   else
309     parseLevel(/*HasOpeningBrace=*/false);
310   // Make sure to format the remaining tokens.
311   //
312   // LK_TextProto is special since its top-level is parsed as the body of a
313   // braced list, which does not necessarily have natural line separators such
314   // as a semicolon. Comments after the last entry that have been determined to
315   // not belong to that line, as in:
316   //   key: value
317   //   // endfile comment
318   // do not have a chance to be put on a line of their own until this point.
319   // Here we add this newline before end-of-file comments.
320   if (Style.Language == FormatStyle::LK_TextProto &&
321       !CommentsBeforeNextToken.empty())
322     addUnwrappedLine();
323   flushComments(true);
324   addUnwrappedLine();
325 }
326 
327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328   do {
329     switch (FormatTok->Tok.getKind()) {
330     case tok::l_brace:
331       return;
332     default:
333       if (FormatTok->is(Keywords.kw_where)) {
334         addUnwrappedLine();
335         nextToken();
336         parseCSharpGenericTypeConstraint();
337         break;
338       }
339       nextToken();
340       break;
341     }
342   } while (!eof());
343 }
344 
345 void UnwrappedLineParser::parseCSharpAttribute() {
346   int UnpairedSquareBrackets = 1;
347   do {
348     switch (FormatTok->Tok.getKind()) {
349     case tok::r_square:
350       nextToken();
351       --UnpairedSquareBrackets;
352       if (UnpairedSquareBrackets == 0) {
353         addUnwrappedLine();
354         return;
355       }
356       break;
357     case tok::l_square:
358       ++UnpairedSquareBrackets;
359       nextToken();
360       break;
361     default:
362       nextToken();
363       break;
364     }
365   } while (!eof());
366 }
367 
368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369   bool SwitchLabelEncountered = false;
370   do {
371     tok::TokenKind kind = FormatTok->Tok.getKind();
372     if (FormatTok->getType() == TT_MacroBlockBegin) {
373       kind = tok::l_brace;
374     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375       kind = tok::r_brace;
376     }
377 
378     switch (kind) {
379     case tok::comment:
380       nextToken();
381       addUnwrappedLine();
382       break;
383     case tok::l_brace:
384       // FIXME: Add parameter whether this can happen - if this happens, we must
385       // be in a non-declaration context.
386       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387         continue;
388       parseBlock(/*MustBeDeclaration=*/false);
389       addUnwrappedLine();
390       break;
391     case tok::r_brace:
392       if (HasOpeningBrace)
393         return;
394       nextToken();
395       addUnwrappedLine();
396       break;
397     case tok::kw_default: {
398       unsigned StoredPosition = Tokens->getPosition();
399       FormatToken *Next;
400       do {
401         Next = Tokens->getNextToken();
402       } while (Next && Next->is(tok::comment));
403       FormatTok = Tokens->setPosition(StoredPosition);
404       if (Next && Next->isNot(tok::colon)) {
405         // default not followed by ':' is not a case label; treat it like
406         // an identifier.
407         parseStructuralElement();
408         break;
409       }
410       // Else, if it is 'default:', fall through to the case handling.
411       LLVM_FALLTHROUGH;
412     }
413     case tok::kw_case:
414       if (Style.Language == FormatStyle::LK_JavaScript &&
415           Line->MustBeDeclaration) {
416         // A 'case: string' style field declaration.
417         parseStructuralElement();
418         break;
419       }
420       if (!SwitchLabelEncountered &&
421           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422         ++Line->Level;
423       SwitchLabelEncountered = true;
424       parseStructuralElement();
425       break;
426     case tok::l_square:
427       if (Style.isCSharp()) {
428         nextToken();
429         parseCSharpAttribute();
430         break;
431       }
432       LLVM_FALLTHROUGH;
433     default:
434       parseStructuralElement();
435       break;
436     }
437   } while (!eof());
438 }
439 
440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441   // We'll parse forward through the tokens until we hit
442   // a closing brace or eof - note that getNextToken() will
443   // parse macros, so this will magically work inside macro
444   // definitions, too.
445   unsigned StoredPosition = Tokens->getPosition();
446   FormatToken *Tok = FormatTok;
447   const FormatToken *PrevTok = Tok->Previous;
448   // Keep a stack of positions of lbrace tokens. We will
449   // update information about whether an lbrace starts a
450   // braced init list or a different block during the loop.
451   SmallVector<FormatToken *, 8> LBraceStack;
452   assert(Tok->Tok.is(tok::l_brace));
453   do {
454     // Get next non-comment token.
455     FormatToken *NextTok;
456     unsigned ReadTokens = 0;
457     do {
458       NextTok = Tokens->getNextToken();
459       ++ReadTokens;
460     } while (NextTok->is(tok::comment));
461 
462     switch (Tok->Tok.getKind()) {
463     case tok::l_brace:
464       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465         if (PrevTok->isOneOf(tok::colon, tok::less))
466           // A ':' indicates this code is in a type, or a braced list
467           // following a label in an object literal ({a: {b: 1}}).
468           // A '<' could be an object used in a comparison, but that is nonsense
469           // code (can never return true), so more likely it is a generic type
470           // argument (`X<{a: string; b: number}>`).
471           // The code below could be confused by semicolons between the
472           // individual members in a type member list, which would normally
473           // trigger BK_Block. In both cases, this must be parsed as an inline
474           // braced init.
475           Tok->BlockKind = BK_BracedInit;
476         else if (PrevTok->is(tok::r_paren))
477           // `) { }` can only occur in function or method declarations in JS.
478           Tok->BlockKind = BK_Block;
479       } else {
480         Tok->BlockKind = BK_Unknown;
481       }
482       LBraceStack.push_back(Tok);
483       break;
484     case tok::r_brace:
485       if (LBraceStack.empty())
486         break;
487       if (LBraceStack.back()->BlockKind == BK_Unknown) {
488         bool ProbablyBracedList = false;
489         if (Style.Language == FormatStyle::LK_Proto) {
490           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491         } else {
492           // Using OriginalColumn to distinguish between ObjC methods and
493           // binary operators is a bit hacky.
494           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495                                   NextTok->OriginalColumn == 0;
496 
497           // If there is a comma, semicolon or right paren after the closing
498           // brace, we assume this is a braced initializer list.  Note that
499           // regardless how we mark inner braces here, we will overwrite the
500           // BlockKind later if we parse a braced list (where all blocks
501           // inside are by default braced lists), or when we explicitly detect
502           // blocks (for example while parsing lambdas).
503           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504           // braced list in JS.
505           ProbablyBracedList =
506               (Style.Language == FormatStyle::LK_JavaScript &&
507                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508                                 Keywords.kw_as)) ||
509               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511                                tok::r_paren, tok::r_square, tok::l_brace,
512                                tok::ellipsis) ||
513               (NextTok->is(tok::identifier) &&
514                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515               (NextTok->is(tok::semi) &&
516                (!ExpectClassBody || LBraceStack.size() != 1)) ||
517               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519             // We can have an array subscript after a braced init
520             // list, but C++11 attributes are expected after blocks.
521             NextTok = Tokens->getNextToken();
522             ++ReadTokens;
523             ProbablyBracedList = NextTok->isNot(tok::l_square);
524           }
525         }
526         if (ProbablyBracedList) {
527           Tok->BlockKind = BK_BracedInit;
528           LBraceStack.back()->BlockKind = BK_BracedInit;
529         } else {
530           Tok->BlockKind = BK_Block;
531           LBraceStack.back()->BlockKind = BK_Block;
532         }
533       }
534       LBraceStack.pop_back();
535       break;
536     case tok::identifier:
537       if (!Tok->is(TT_StatementMacro))
538         break;
539       LLVM_FALLTHROUGH;
540     case tok::at:
541     case tok::semi:
542     case tok::kw_if:
543     case tok::kw_while:
544     case tok::kw_for:
545     case tok::kw_switch:
546     case tok::kw_try:
547     case tok::kw___try:
548       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
549         LBraceStack.back()->BlockKind = BK_Block;
550       break;
551     default:
552       break;
553     }
554     PrevTok = Tok;
555     Tok = NextTok;
556   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557 
558   // Assume other blocks for all unclosed opening braces.
559   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560     if (LBraceStack[i]->BlockKind == BK_Unknown)
561       LBraceStack[i]->BlockKind = BK_Block;
562   }
563 
564   FormatTok = Tokens->setPosition(StoredPosition);
565 }
566 
567 template <class T>
568 static inline void hash_combine(std::size_t &seed, const T &v) {
569   std::hash<T> hasher;
570   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571 }
572 
573 size_t UnwrappedLineParser::computePPHash() const {
574   size_t h = 0;
575   for (const auto &i : PPStack) {
576     hash_combine(h, size_t(i.Kind));
577     hash_combine(h, i.Line);
578   }
579   return h;
580 }
581 
582 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
583                                      bool MunchSemi) {
584   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
585          "'{' or macro block token expected");
586   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
587   FormatTok->BlockKind = BK_Block;
588 
589   size_t PPStartHash = computePPHash();
590 
591   unsigned InitialLevel = Line->Level;
592   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
593 
594   if (MacroBlock && FormatTok->is(tok::l_paren))
595     parseParens();
596 
597   size_t NbPreprocessorDirectives =
598       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
599   addUnwrappedLine();
600   size_t OpeningLineIndex =
601       CurrentLines->empty()
602           ? (UnwrappedLine::kInvalidIndex)
603           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
604 
605   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
606                                           MustBeDeclaration);
607   if (AddLevel)
608     ++Line->Level;
609   parseLevel(/*HasOpeningBrace=*/true);
610 
611   if (eof())
612     return;
613 
614   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
615                  : !FormatTok->is(tok::r_brace)) {
616     Line->Level = InitialLevel;
617     FormatTok->BlockKind = BK_Block;
618     return;
619   }
620 
621   size_t PPEndHash = computePPHash();
622 
623   // Munch the closing brace.
624   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
625 
626   if (MacroBlock && FormatTok->is(tok::l_paren))
627     parseParens();
628 
629   if (MunchSemi && FormatTok->Tok.is(tok::semi))
630     nextToken();
631   Line->Level = InitialLevel;
632 
633   if (PPStartHash == PPEndHash) {
634     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
635     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
636       // Update the opening line to add the forward reference as well
637       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
638           CurrentLines->size() - 1;
639     }
640   }
641 }
642 
643 static bool isGoogScope(const UnwrappedLine &Line) {
644   // FIXME: Closure-library specific stuff should not be hard-coded but be
645   // configurable.
646   if (Line.Tokens.size() < 4)
647     return false;
648   auto I = Line.Tokens.begin();
649   if (I->Tok->TokenText != "goog")
650     return false;
651   ++I;
652   if (I->Tok->isNot(tok::period))
653     return false;
654   ++I;
655   if (I->Tok->TokenText != "scope")
656     return false;
657   ++I;
658   return I->Tok->is(tok::l_paren);
659 }
660 
661 static bool isIIFE(const UnwrappedLine &Line,
662                    const AdditionalKeywords &Keywords) {
663   // Look for the start of an immediately invoked anonymous function.
664   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
665   // This is commonly done in JavaScript to create a new, anonymous scope.
666   // Example: (function() { ... })()
667   if (Line.Tokens.size() < 3)
668     return false;
669   auto I = Line.Tokens.begin();
670   if (I->Tok->isNot(tok::l_paren))
671     return false;
672   ++I;
673   if (I->Tok->isNot(Keywords.kw_function))
674     return false;
675   ++I;
676   return I->Tok->is(tok::l_paren);
677 }
678 
679 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
680                                    const FormatToken &InitialToken) {
681   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
682     return Style.BraceWrapping.AfterNamespace;
683   if (InitialToken.is(tok::kw_class))
684     return Style.BraceWrapping.AfterClass;
685   if (InitialToken.is(tok::kw_union))
686     return Style.BraceWrapping.AfterUnion;
687   if (InitialToken.is(tok::kw_struct))
688     return Style.BraceWrapping.AfterStruct;
689   return false;
690 }
691 
692 void UnwrappedLineParser::parseChildBlock() {
693   FormatTok->BlockKind = BK_Block;
694   nextToken();
695   {
696     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
697                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
698     ScopedLineState LineState(*this);
699     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
700                                             /*MustBeDeclaration=*/false);
701     Line->Level += SkipIndent ? 0 : 1;
702     parseLevel(/*HasOpeningBrace=*/true);
703     flushComments(isOnNewLine(*FormatTok));
704     Line->Level -= SkipIndent ? 0 : 1;
705   }
706   nextToken();
707 }
708 
709 void UnwrappedLineParser::parsePPDirective() {
710   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
711   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
712 
713   nextToken();
714 
715   if (!FormatTok->Tok.getIdentifierInfo()) {
716     parsePPUnknown();
717     return;
718   }
719 
720   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
721   case tok::pp_define:
722     parsePPDefine();
723     return;
724   case tok::pp_if:
725     parsePPIf(/*IfDef=*/false);
726     break;
727   case tok::pp_ifdef:
728   case tok::pp_ifndef:
729     parsePPIf(/*IfDef=*/true);
730     break;
731   case tok::pp_else:
732     parsePPElse();
733     break;
734   case tok::pp_elif:
735     parsePPElIf();
736     break;
737   case tok::pp_endif:
738     parsePPEndIf();
739     break;
740   default:
741     parsePPUnknown();
742     break;
743   }
744 }
745 
746 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
747   size_t Line = CurrentLines->size();
748   if (CurrentLines == &PreprocessorDirectives)
749     Line += Lines.size();
750 
751   if (Unreachable ||
752       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
753     PPStack.push_back({PP_Unreachable, Line});
754   else
755     PPStack.push_back({PP_Conditional, Line});
756 }
757 
758 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
759   ++PPBranchLevel;
760   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
761   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
762     PPLevelBranchIndex.push_back(0);
763     PPLevelBranchCount.push_back(0);
764   }
765   PPChainBranchIndex.push(0);
766   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
767   conditionalCompilationCondition(Unreachable || Skip);
768 }
769 
770 void UnwrappedLineParser::conditionalCompilationAlternative() {
771   if (!PPStack.empty())
772     PPStack.pop_back();
773   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
774   if (!PPChainBranchIndex.empty())
775     ++PPChainBranchIndex.top();
776   conditionalCompilationCondition(
777       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
778       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
779 }
780 
781 void UnwrappedLineParser::conditionalCompilationEnd() {
782   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
783   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
784     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
785       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
786     }
787   }
788   // Guard against #endif's without #if.
789   if (PPBranchLevel > -1)
790     --PPBranchLevel;
791   if (!PPChainBranchIndex.empty())
792     PPChainBranchIndex.pop();
793   if (!PPStack.empty())
794     PPStack.pop_back();
795 }
796 
797 void UnwrappedLineParser::parsePPIf(bool IfDef) {
798   bool IfNDef = FormatTok->is(tok::pp_ifndef);
799   nextToken();
800   bool Unreachable = false;
801   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
802     Unreachable = true;
803   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
804     Unreachable = true;
805   conditionalCompilationStart(Unreachable);
806   FormatToken *IfCondition = FormatTok;
807   // If there's a #ifndef on the first line, and the only lines before it are
808   // comments, it could be an include guard.
809   bool MaybeIncludeGuard = IfNDef;
810   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
811     for (auto &Line : Lines) {
812       if (!Line.Tokens.front().Tok->is(tok::comment)) {
813         MaybeIncludeGuard = false;
814         IncludeGuard = IG_Rejected;
815         break;
816       }
817     }
818   --PPBranchLevel;
819   parsePPUnknown();
820   ++PPBranchLevel;
821   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
822     IncludeGuard = IG_IfNdefed;
823     IncludeGuardToken = IfCondition;
824   }
825 }
826 
827 void UnwrappedLineParser::parsePPElse() {
828   // If a potential include guard has an #else, it's not an include guard.
829   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
830     IncludeGuard = IG_Rejected;
831   conditionalCompilationAlternative();
832   if (PPBranchLevel > -1)
833     --PPBranchLevel;
834   parsePPUnknown();
835   ++PPBranchLevel;
836 }
837 
838 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
839 
840 void UnwrappedLineParser::parsePPEndIf() {
841   conditionalCompilationEnd();
842   parsePPUnknown();
843   // If the #endif of a potential include guard is the last thing in the file,
844   // then we found an include guard.
845   unsigned TokenPosition = Tokens->getPosition();
846   FormatToken *PeekNext = AllTokens[TokenPosition];
847   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
848       PeekNext->is(tok::eof) &&
849       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
850     IncludeGuard = IG_Found;
851 }
852 
853 void UnwrappedLineParser::parsePPDefine() {
854   nextToken();
855 
856   if (!FormatTok->Tok.getIdentifierInfo()) {
857     IncludeGuard = IG_Rejected;
858     IncludeGuardToken = nullptr;
859     parsePPUnknown();
860     return;
861   }
862 
863   if (IncludeGuard == IG_IfNdefed &&
864       IncludeGuardToken->TokenText == FormatTok->TokenText) {
865     IncludeGuard = IG_Defined;
866     IncludeGuardToken = nullptr;
867     for (auto &Line : Lines) {
868       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
869         IncludeGuard = IG_Rejected;
870         break;
871       }
872     }
873   }
874 
875   nextToken();
876   if (FormatTok->Tok.getKind() == tok::l_paren &&
877       FormatTok->WhitespaceRange.getBegin() ==
878           FormatTok->WhitespaceRange.getEnd()) {
879     parseParens();
880   }
881   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
882     Line->Level += PPBranchLevel + 1;
883   addUnwrappedLine();
884   ++Line->Level;
885 
886   // Errors during a preprocessor directive can only affect the layout of the
887   // preprocessor directive, and thus we ignore them. An alternative approach
888   // would be to use the same approach we use on the file level (no
889   // re-indentation if there was a structural error) within the macro
890   // definition.
891   parseFile();
892 }
893 
894 void UnwrappedLineParser::parsePPUnknown() {
895   do {
896     nextToken();
897   } while (!eof());
898   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
899     Line->Level += PPBranchLevel + 1;
900   addUnwrappedLine();
901 }
902 
903 // Here we blacklist certain tokens that are not usually the first token in an
904 // unwrapped line. This is used in attempt to distinguish macro calls without
905 // trailing semicolons from other constructs split to several lines.
906 static bool tokenCanStartNewLine(const FormatToken &Tok) {
907   // Semicolon can be a null-statement, l_square can be a start of a macro or
908   // a C++11 attribute, but this doesn't seem to be common.
909   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
910          Tok.isNot(TT_AttributeSquare) &&
911          // Tokens that can only be used as binary operators and a part of
912          // overloaded operator names.
913          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
914          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
915          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
916          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
917          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
918          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
919          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
920          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
921          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
922          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
923          Tok.isNot(tok::lesslessequal) &&
924          // Colon is used in labels, base class lists, initializer lists,
925          // range-based for loops, ternary operator, but should never be the
926          // first token in an unwrapped line.
927          Tok.isNot(tok::colon) &&
928          // 'noexcept' is a trailing annotation.
929          Tok.isNot(tok::kw_noexcept);
930 }
931 
932 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
933                           const FormatToken *FormatTok) {
934   // FIXME: This returns true for C/C++ keywords like 'struct'.
935   return FormatTok->is(tok::identifier) &&
936          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
937           !FormatTok->isOneOf(
938               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
939               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
940               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
941               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
942               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
943               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
944               Keywords.kw_from));
945 }
946 
947 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
948                                  const FormatToken *FormatTok) {
949   return FormatTok->Tok.isLiteral() ||
950          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
951          mustBeJSIdent(Keywords, FormatTok);
952 }
953 
954 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
955 // when encountered after a value (see mustBeJSIdentOrValue).
956 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
957                            const FormatToken *FormatTok) {
958   return FormatTok->isOneOf(
959       tok::kw_return, Keywords.kw_yield,
960       // conditionals
961       tok::kw_if, tok::kw_else,
962       // loops
963       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
964       // switch/case
965       tok::kw_switch, tok::kw_case,
966       // exceptions
967       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
968       // declaration
969       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
970       Keywords.kw_async, Keywords.kw_function,
971       // import/export
972       Keywords.kw_import, tok::kw_export);
973 }
974 
975 // readTokenWithJavaScriptASI reads the next token and terminates the current
976 // line if JavaScript Automatic Semicolon Insertion must
977 // happen between the current token and the next token.
978 //
979 // This method is conservative - it cannot cover all edge cases of JavaScript,
980 // but only aims to correctly handle certain well known cases. It *must not*
981 // return true in speculative cases.
982 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
983   FormatToken *Previous = FormatTok;
984   readToken();
985   FormatToken *Next = FormatTok;
986 
987   bool IsOnSameLine =
988       CommentsBeforeNextToken.empty()
989           ? Next->NewlinesBefore == 0
990           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
991   if (IsOnSameLine)
992     return;
993 
994   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
995   bool PreviousStartsTemplateExpr =
996       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
997   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
998     // If the line contains an '@' sign, the previous token might be an
999     // annotation, which can precede another identifier/value.
1000     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1001                               [](UnwrappedLineNode &LineNode) {
1002                                 return LineNode.Tok->is(tok::at);
1003                               }) != Line->Tokens.end();
1004     if (HasAt)
1005       return;
1006   }
1007   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1008     return addUnwrappedLine();
1009   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1010   bool NextEndsTemplateExpr =
1011       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1012   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1013       (PreviousMustBeValue ||
1014        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1015                          tok::minusminus)))
1016     return addUnwrappedLine();
1017   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1018       isJSDeclOrStmt(Keywords, Next))
1019     return addUnwrappedLine();
1020 }
1021 
1022 void UnwrappedLineParser::parseStructuralElement() {
1023   assert(!FormatTok->is(tok::l_brace));
1024   if (Style.Language == FormatStyle::LK_TableGen &&
1025       FormatTok->is(tok::pp_include)) {
1026     nextToken();
1027     if (FormatTok->is(tok::string_literal))
1028       nextToken();
1029     addUnwrappedLine();
1030     return;
1031   }
1032   switch (FormatTok->Tok.getKind()) {
1033   case tok::kw_asm:
1034     nextToken();
1035     if (FormatTok->is(tok::l_brace)) {
1036       FormatTok->setType(TT_InlineASMBrace);
1037       nextToken();
1038       while (FormatTok && FormatTok->isNot(tok::eof)) {
1039         if (FormatTok->is(tok::r_brace)) {
1040           FormatTok->setType(TT_InlineASMBrace);
1041           nextToken();
1042           addUnwrappedLine();
1043           break;
1044         }
1045         FormatTok->Finalized = true;
1046         nextToken();
1047       }
1048     }
1049     break;
1050   case tok::kw_namespace:
1051     parseNamespace();
1052     return;
1053   case tok::kw_public:
1054   case tok::kw_protected:
1055   case tok::kw_private:
1056     if (Style.Language == FormatStyle::LK_Java ||
1057         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1058       nextToken();
1059     else
1060       parseAccessSpecifier();
1061     return;
1062   case tok::kw_if:
1063     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1064       // field/method declaration.
1065       break;
1066     parseIfThenElse();
1067     return;
1068   case tok::kw_for:
1069   case tok::kw_while:
1070     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1071       // field/method declaration.
1072       break;
1073     parseForOrWhileLoop();
1074     return;
1075   case tok::kw_do:
1076     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1077       // field/method declaration.
1078       break;
1079     parseDoWhile();
1080     return;
1081   case tok::kw_switch:
1082     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1083       // 'switch: string' field declaration.
1084       break;
1085     parseSwitch();
1086     return;
1087   case tok::kw_default:
1088     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1089       // 'default: string' field declaration.
1090       break;
1091     nextToken();
1092     if (FormatTok->is(tok::colon)) {
1093       parseLabel();
1094       return;
1095     }
1096     // e.g. "default void f() {}" in a Java interface.
1097     break;
1098   case tok::kw_case:
1099     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1100       // 'case: string' field declaration.
1101       break;
1102     parseCaseLabel();
1103     return;
1104   case tok::kw_try:
1105   case tok::kw___try:
1106     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1107       // field/method declaration.
1108       break;
1109     parseTryCatch();
1110     return;
1111   case tok::kw_extern:
1112     nextToken();
1113     if (FormatTok->Tok.is(tok::string_literal)) {
1114       nextToken();
1115       if (FormatTok->Tok.is(tok::l_brace)) {
1116         if (Style.BraceWrapping.AfterExternBlock) {
1117           addUnwrappedLine();
1118           parseBlock(/*MustBeDeclaration=*/true);
1119         } else {
1120           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1121         }
1122         addUnwrappedLine();
1123         return;
1124       }
1125     }
1126     break;
1127   case tok::kw_export:
1128     if (Style.Language == FormatStyle::LK_JavaScript) {
1129       parseJavaScriptEs6ImportExport();
1130       return;
1131     }
1132     if (!Style.isCpp())
1133       break;
1134     // Handle C++ "(inline|export) namespace".
1135     LLVM_FALLTHROUGH;
1136   case tok::kw_inline:
1137     nextToken();
1138     if (FormatTok->Tok.is(tok::kw_namespace)) {
1139       parseNamespace();
1140       return;
1141     }
1142     break;
1143   case tok::identifier:
1144     if (FormatTok->is(TT_ForEachMacro)) {
1145       parseForOrWhileLoop();
1146       return;
1147     }
1148     if (FormatTok->is(TT_MacroBlockBegin)) {
1149       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1150                  /*MunchSemi=*/false);
1151       return;
1152     }
1153     if (FormatTok->is(Keywords.kw_import)) {
1154       if (Style.Language == FormatStyle::LK_JavaScript) {
1155         parseJavaScriptEs6ImportExport();
1156         return;
1157       }
1158       if (Style.Language == FormatStyle::LK_Proto) {
1159         nextToken();
1160         if (FormatTok->is(tok::kw_public))
1161           nextToken();
1162         if (!FormatTok->is(tok::string_literal))
1163           return;
1164         nextToken();
1165         if (FormatTok->is(tok::semi))
1166           nextToken();
1167         addUnwrappedLine();
1168         return;
1169       }
1170     }
1171     if (Style.isCpp() &&
1172         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1173                            Keywords.kw_slots, Keywords.kw_qslots)) {
1174       nextToken();
1175       if (FormatTok->is(tok::colon)) {
1176         nextToken();
1177         addUnwrappedLine();
1178         return;
1179       }
1180     }
1181     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1182       parseStatementMacro();
1183       return;
1184     }
1185     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1186       parseNamespace();
1187       return;
1188     }
1189     // In all other cases, parse the declaration.
1190     break;
1191   default:
1192     break;
1193   }
1194   do {
1195     const FormatToken *Previous = FormatTok->Previous;
1196     switch (FormatTok->Tok.getKind()) {
1197     case tok::at:
1198       nextToken();
1199       if (FormatTok->Tok.is(tok::l_brace)) {
1200         nextToken();
1201         parseBracedList();
1202         break;
1203       } else if (Style.Language == FormatStyle::LK_Java &&
1204                  FormatTok->is(Keywords.kw_interface)) {
1205         nextToken();
1206         break;
1207       }
1208       switch (FormatTok->Tok.getObjCKeywordID()) {
1209       case tok::objc_public:
1210       case tok::objc_protected:
1211       case tok::objc_package:
1212       case tok::objc_private:
1213         return parseAccessSpecifier();
1214       case tok::objc_interface:
1215       case tok::objc_implementation:
1216         return parseObjCInterfaceOrImplementation();
1217       case tok::objc_protocol:
1218         if (parseObjCProtocol())
1219           return;
1220         break;
1221       case tok::objc_end:
1222         return; // Handled by the caller.
1223       case tok::objc_optional:
1224       case tok::objc_required:
1225         nextToken();
1226         addUnwrappedLine();
1227         return;
1228       case tok::objc_autoreleasepool:
1229         nextToken();
1230         if (FormatTok->Tok.is(tok::l_brace)) {
1231           if (Style.BraceWrapping.AfterControlStatement ==
1232               FormatStyle::BWACS_Always)
1233             addUnwrappedLine();
1234           parseBlock(/*MustBeDeclaration=*/false);
1235         }
1236         addUnwrappedLine();
1237         return;
1238       case tok::objc_synchronized:
1239         nextToken();
1240         if (FormatTok->Tok.is(tok::l_paren))
1241           // Skip synchronization object
1242           parseParens();
1243         if (FormatTok->Tok.is(tok::l_brace)) {
1244           if (Style.BraceWrapping.AfterControlStatement ==
1245               FormatStyle::BWACS_Always)
1246             addUnwrappedLine();
1247           parseBlock(/*MustBeDeclaration=*/false);
1248         }
1249         addUnwrappedLine();
1250         return;
1251       case tok::objc_try:
1252         // This branch isn't strictly necessary (the kw_try case below would
1253         // do this too after the tok::at is parsed above).  But be explicit.
1254         parseTryCatch();
1255         return;
1256       default:
1257         break;
1258       }
1259       break;
1260     case tok::kw_enum:
1261       // Ignore if this is part of "template <enum ...".
1262       if (Previous && Previous->is(tok::less)) {
1263         nextToken();
1264         break;
1265       }
1266 
1267       // parseEnum falls through and does not yet add an unwrapped line as an
1268       // enum definition can start a structural element.
1269       if (!parseEnum())
1270         break;
1271       // This only applies for C++.
1272       if (!Style.isCpp()) {
1273         addUnwrappedLine();
1274         return;
1275       }
1276       break;
1277     case tok::kw_typedef:
1278       nextToken();
1279       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1280                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1281                              Keywords.kw_CF_CLOSED_ENUM,
1282                              Keywords.kw_NS_CLOSED_ENUM))
1283         parseEnum();
1284       break;
1285     case tok::kw_struct:
1286     case tok::kw_union:
1287     case tok::kw_class:
1288       // parseRecord falls through and does not yet add an unwrapped line as a
1289       // record declaration or definition can start a structural element.
1290       parseRecord();
1291       // This does not apply for Java, JavaScript and C#.
1292       if (Style.Language == FormatStyle::LK_Java ||
1293           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1294         if (FormatTok->is(tok::semi))
1295           nextToken();
1296         addUnwrappedLine();
1297         return;
1298       }
1299       break;
1300     case tok::period:
1301       nextToken();
1302       // In Java, classes have an implicit static member "class".
1303       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1304           FormatTok->is(tok::kw_class))
1305         nextToken();
1306       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1307           FormatTok->Tok.getIdentifierInfo())
1308         // JavaScript only has pseudo keywords, all keywords are allowed to
1309         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1310         nextToken();
1311       break;
1312     case tok::semi:
1313       nextToken();
1314       addUnwrappedLine();
1315       return;
1316     case tok::r_brace:
1317       addUnwrappedLine();
1318       return;
1319     case tok::l_paren:
1320       parseParens();
1321       break;
1322     case tok::kw_operator:
1323       nextToken();
1324       if (FormatTok->isBinaryOperator())
1325         nextToken();
1326       break;
1327     case tok::caret:
1328       nextToken();
1329       if (FormatTok->Tok.isAnyIdentifier() ||
1330           FormatTok->isSimpleTypeSpecifier())
1331         nextToken();
1332       if (FormatTok->is(tok::l_paren))
1333         parseParens();
1334       if (FormatTok->is(tok::l_brace))
1335         parseChildBlock();
1336       break;
1337     case tok::l_brace:
1338       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1339         // A block outside of parentheses must be the last part of a
1340         // structural element.
1341         // FIXME: Figure out cases where this is not true, and add projections
1342         // for them (the one we know is missing are lambdas).
1343         if (Style.BraceWrapping.AfterFunction)
1344           addUnwrappedLine();
1345         FormatTok->setType(TT_FunctionLBrace);
1346         parseBlock(/*MustBeDeclaration=*/false);
1347         addUnwrappedLine();
1348         return;
1349       }
1350       // Otherwise this was a braced init list, and the structural
1351       // element continues.
1352       break;
1353     case tok::kw_try:
1354       if (Style.Language == FormatStyle::LK_JavaScript &&
1355           Line->MustBeDeclaration) {
1356         // field/method declaration.
1357         nextToken();
1358         break;
1359       }
1360       // We arrive here when parsing function-try blocks.
1361       if (Style.BraceWrapping.AfterFunction)
1362         addUnwrappedLine();
1363       parseTryCatch();
1364       return;
1365     case tok::identifier: {
1366       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1367           Line->MustBeDeclaration) {
1368         addUnwrappedLine();
1369         parseCSharpGenericTypeConstraint();
1370         break;
1371       }
1372       if (FormatTok->is(TT_MacroBlockEnd)) {
1373         addUnwrappedLine();
1374         return;
1375       }
1376 
1377       // Function declarations (as opposed to function expressions) are parsed
1378       // on their own unwrapped line by continuing this loop. Function
1379       // expressions (functions that are not on their own line) must not create
1380       // a new unwrapped line, so they are special cased below.
1381       size_t TokenCount = Line->Tokens.size();
1382       if (Style.Language == FormatStyle::LK_JavaScript &&
1383           FormatTok->is(Keywords.kw_function) &&
1384           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1385                                                      Keywords.kw_async)))) {
1386         tryToParseJSFunction();
1387         break;
1388       }
1389       if ((Style.Language == FormatStyle::LK_JavaScript ||
1390            Style.Language == FormatStyle::LK_Java) &&
1391           FormatTok->is(Keywords.kw_interface)) {
1392         if (Style.Language == FormatStyle::LK_JavaScript) {
1393           // In JavaScript/TypeScript, "interface" can be used as a standalone
1394           // identifier, e.g. in `var interface = 1;`. If "interface" is
1395           // followed by another identifier, it is very like to be an actual
1396           // interface declaration.
1397           unsigned StoredPosition = Tokens->getPosition();
1398           FormatToken *Next = Tokens->getNextToken();
1399           FormatTok = Tokens->setPosition(StoredPosition);
1400           if (Next && !mustBeJSIdent(Keywords, Next)) {
1401             nextToken();
1402             break;
1403           }
1404         }
1405         parseRecord();
1406         addUnwrappedLine();
1407         return;
1408       }
1409 
1410       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1411         parseStatementMacro();
1412         return;
1413       }
1414 
1415       // See if the following token should start a new unwrapped line.
1416       StringRef Text = FormatTok->TokenText;
1417       nextToken();
1418 
1419       // JS doesn't have macros, and within classes colons indicate fields, not
1420       // labels.
1421       if (Style.Language == FormatStyle::LK_JavaScript)
1422         break;
1423 
1424       TokenCount = Line->Tokens.size();
1425       if (TokenCount == 1 ||
1426           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1427         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1428           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1429           parseLabel(!Style.IndentGotoLabels);
1430           return;
1431         }
1432         // Recognize function-like macro usages without trailing semicolon as
1433         // well as free-standing macros like Q_OBJECT.
1434         bool FunctionLike = FormatTok->is(tok::l_paren);
1435         if (FunctionLike)
1436           parseParens();
1437 
1438         bool FollowedByNewline =
1439             CommentsBeforeNextToken.empty()
1440                 ? FormatTok->NewlinesBefore > 0
1441                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1442 
1443         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1444             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1445           addUnwrappedLine();
1446           return;
1447         }
1448       }
1449       break;
1450     }
1451     case tok::equal:
1452       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1453       // TT_JsFatArrow. The always start an expression or a child block if
1454       // followed by a curly.
1455       if (FormatTok->is(TT_JsFatArrow)) {
1456         nextToken();
1457         if (FormatTok->is(tok::l_brace))
1458           parseChildBlock();
1459         break;
1460       }
1461 
1462       nextToken();
1463       if (FormatTok->Tok.is(tok::l_brace)) {
1464         // Block kind should probably be set to BK_BracedInit for any language.
1465         // C# needs this change to ensure that array initialisers and object
1466         // initialisers are indented the same way.
1467         if (Style.isCSharp())
1468           FormatTok->BlockKind = BK_BracedInit;
1469         nextToken();
1470         parseBracedList();
1471       } else if (Style.Language == FormatStyle::LK_Proto &&
1472                  FormatTok->Tok.is(tok::less)) {
1473         nextToken();
1474         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1475                         /*ClosingBraceKind=*/tok::greater);
1476       }
1477       break;
1478     case tok::l_square:
1479       parseSquare();
1480       break;
1481     case tok::kw_new:
1482       parseNew();
1483       break;
1484     default:
1485       nextToken();
1486       break;
1487     }
1488   } while (!eof());
1489 }
1490 
1491 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1492   assert(FormatTok->is(tok::l_brace));
1493   if (!Style.isCSharp())
1494     return false;
1495   // See if it's a property accessor.
1496   if (FormatTok->Previous->isNot(tok::identifier))
1497     return false;
1498 
1499   // See if we are inside a property accessor.
1500   //
1501   // Record the current tokenPosition so that we can advance and
1502   // reset the current token. `Next` is not set yet so we need
1503   // another way to advance along the token stream.
1504   unsigned int StoredPosition = Tokens->getPosition();
1505   FormatToken *Tok = Tokens->getNextToken();
1506 
1507   // A trivial property accessor is of the form:
1508   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1509   // Track these as they do not require line breaks to be introduced.
1510   bool HasGetOrSet = false;
1511   bool IsTrivialPropertyAccessor = true;
1512   while (!eof()) {
1513     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1514                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1515                      Keywords.kw_set)) {
1516       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1517         HasGetOrSet = true;
1518       Tok = Tokens->getNextToken();
1519       continue;
1520     }
1521     if (Tok->isNot(tok::r_brace))
1522       IsTrivialPropertyAccessor = false;
1523     break;
1524   }
1525 
1526   if (!HasGetOrSet) {
1527     Tokens->setPosition(StoredPosition);
1528     return false;
1529   }
1530 
1531   // Try to parse the property accessor:
1532   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1533   Tokens->setPosition(StoredPosition);
1534   if (Style.BraceWrapping.AfterFunction == true)
1535     addUnwrappedLine();
1536   nextToken();
1537   do {
1538     switch (FormatTok->Tok.getKind()) {
1539     case tok::r_brace:
1540       nextToken();
1541       if (FormatTok->is(tok::equal)) {
1542         while (!eof() && FormatTok->isNot(tok::semi))
1543           nextToken();
1544         nextToken();
1545       }
1546       addUnwrappedLine();
1547       return true;
1548     case tok::l_brace:
1549       ++Line->Level;
1550       parseBlock(/*MustBeDeclaration=*/true);
1551       addUnwrappedLine();
1552       --Line->Level;
1553       break;
1554     case tok::equal:
1555       if (FormatTok->is(TT_JsFatArrow)) {
1556         ++Line->Level;
1557         do {
1558           nextToken();
1559         } while (!eof() && FormatTok->isNot(tok::semi));
1560         nextToken();
1561         addUnwrappedLine();
1562         --Line->Level;
1563         break;
1564       }
1565       nextToken();
1566       break;
1567     default:
1568       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1569           !IsTrivialPropertyAccessor) {
1570         // Non-trivial get/set needs to be on its own line.
1571         addUnwrappedLine();
1572       }
1573       nextToken();
1574     }
1575   } while (!eof());
1576 
1577   // Unreachable for well-formed code (paired '{' and '}').
1578   return true;
1579 }
1580 
1581 bool UnwrappedLineParser::tryToParseLambda() {
1582   if (!Style.isCpp()) {
1583     nextToken();
1584     return false;
1585   }
1586   assert(FormatTok->is(tok::l_square));
1587   FormatToken &LSquare = *FormatTok;
1588   if (!tryToParseLambdaIntroducer())
1589     return false;
1590 
1591   bool SeenArrow = false;
1592 
1593   while (FormatTok->isNot(tok::l_brace)) {
1594     if (FormatTok->isSimpleTypeSpecifier()) {
1595       nextToken();
1596       continue;
1597     }
1598     switch (FormatTok->Tok.getKind()) {
1599     case tok::l_brace:
1600       break;
1601     case tok::l_paren:
1602       parseParens();
1603       break;
1604     case tok::amp:
1605     case tok::star:
1606     case tok::kw_const:
1607     case tok::comma:
1608     case tok::less:
1609     case tok::greater:
1610     case tok::identifier:
1611     case tok::numeric_constant:
1612     case tok::coloncolon:
1613     case tok::kw_class:
1614     case tok::kw_mutable:
1615     case tok::kw_noexcept:
1616     case tok::kw_template:
1617     case tok::kw_typename:
1618       nextToken();
1619       break;
1620     // Specialization of a template with an integer parameter can contain
1621     // arithmetic, logical, comparison and ternary operators.
1622     //
1623     // FIXME: This also accepts sequences of operators that are not in the scope
1624     // of a template argument list.
1625     //
1626     // In a C++ lambda a template type can only occur after an arrow. We use
1627     // this as an heuristic to distinguish between Objective-C expressions
1628     // followed by an `a->b` expression, such as:
1629     // ([obj func:arg] + a->b)
1630     // Otherwise the code below would parse as a lambda.
1631     //
1632     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1633     // explicit template lists: []<bool b = true && false>(U &&u){}
1634     case tok::plus:
1635     case tok::minus:
1636     case tok::exclaim:
1637     case tok::tilde:
1638     case tok::slash:
1639     case tok::percent:
1640     case tok::lessless:
1641     case tok::pipe:
1642     case tok::pipepipe:
1643     case tok::ampamp:
1644     case tok::caret:
1645     case tok::equalequal:
1646     case tok::exclaimequal:
1647     case tok::greaterequal:
1648     case tok::lessequal:
1649     case tok::question:
1650     case tok::colon:
1651     case tok::ellipsis:
1652     case tok::kw_true:
1653     case tok::kw_false:
1654       if (SeenArrow) {
1655         nextToken();
1656         break;
1657       }
1658       return true;
1659     case tok::arrow:
1660       // This might or might not actually be a lambda arrow (this could be an
1661       // ObjC method invocation followed by a dereferencing arrow). We might
1662       // reset this back to TT_Unknown in TokenAnnotator.
1663       FormatTok->setType(TT_LambdaArrow);
1664       SeenArrow = true;
1665       nextToken();
1666       break;
1667     default:
1668       return true;
1669     }
1670   }
1671   FormatTok->setType(TT_LambdaLBrace);
1672   LSquare.setType(TT_LambdaLSquare);
1673   parseChildBlock();
1674   return true;
1675 }
1676 
1677 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1678   const FormatToken *Previous = FormatTok->Previous;
1679   if (Previous &&
1680       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1681                          tok::kw_delete, tok::l_square) ||
1682        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1683        Previous->isSimpleTypeSpecifier())) {
1684     nextToken();
1685     return false;
1686   }
1687   nextToken();
1688   if (FormatTok->is(tok::l_square)) {
1689     return false;
1690   }
1691   parseSquare(/*LambdaIntroducer=*/true);
1692   return true;
1693 }
1694 
1695 void UnwrappedLineParser::tryToParseJSFunction() {
1696   assert(FormatTok->is(Keywords.kw_function) ||
1697          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1698   if (FormatTok->is(Keywords.kw_async))
1699     nextToken();
1700   // Consume "function".
1701   nextToken();
1702 
1703   // Consume * (generator function). Treat it like C++'s overloaded operators.
1704   if (FormatTok->is(tok::star)) {
1705     FormatTok->setType(TT_OverloadedOperator);
1706     nextToken();
1707   }
1708 
1709   // Consume function name.
1710   if (FormatTok->is(tok::identifier))
1711     nextToken();
1712 
1713   if (FormatTok->isNot(tok::l_paren))
1714     return;
1715 
1716   // Parse formal parameter list.
1717   parseParens();
1718 
1719   if (FormatTok->is(tok::colon)) {
1720     // Parse a type definition.
1721     nextToken();
1722 
1723     // Eat the type declaration. For braced inline object types, balance braces,
1724     // otherwise just parse until finding an l_brace for the function body.
1725     if (FormatTok->is(tok::l_brace))
1726       tryToParseBracedList();
1727     else
1728       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1729         nextToken();
1730   }
1731 
1732   if (FormatTok->is(tok::semi))
1733     return;
1734 
1735   parseChildBlock();
1736 }
1737 
1738 bool UnwrappedLineParser::tryToParseBracedList() {
1739   if (FormatTok->BlockKind == BK_Unknown)
1740     calculateBraceTypes();
1741   assert(FormatTok->BlockKind != BK_Unknown);
1742   if (FormatTok->BlockKind == BK_Block)
1743     return false;
1744   nextToken();
1745   parseBracedList();
1746   return true;
1747 }
1748 
1749 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1750                                           bool IsEnum,
1751                                           tok::TokenKind ClosingBraceKind) {
1752   bool HasError = false;
1753 
1754   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1755   // replace this by using parseAssigmentExpression() inside.
1756   do {
1757     if (Style.isCSharp()) {
1758       if (FormatTok->is(TT_JsFatArrow)) {
1759         nextToken();
1760         // Fat arrows can be followed by simple expressions or by child blocks
1761         // in curly braces.
1762         if (FormatTok->is(tok::l_brace)) {
1763           parseChildBlock();
1764           continue;
1765         }
1766       }
1767     }
1768     if (Style.Language == FormatStyle::LK_JavaScript) {
1769       if (FormatTok->is(Keywords.kw_function) ||
1770           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1771         tryToParseJSFunction();
1772         continue;
1773       }
1774       if (FormatTok->is(TT_JsFatArrow)) {
1775         nextToken();
1776         // Fat arrows can be followed by simple expressions or by child blocks
1777         // in curly braces.
1778         if (FormatTok->is(tok::l_brace)) {
1779           parseChildBlock();
1780           continue;
1781         }
1782       }
1783       if (FormatTok->is(tok::l_brace)) {
1784         // Could be a method inside of a braced list `{a() { return 1; }}`.
1785         if (tryToParseBracedList())
1786           continue;
1787         parseChildBlock();
1788       }
1789     }
1790     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1791       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1792         addUnwrappedLine();
1793       nextToken();
1794       return !HasError;
1795     }
1796     switch (FormatTok->Tok.getKind()) {
1797     case tok::caret:
1798       nextToken();
1799       if (FormatTok->is(tok::l_brace)) {
1800         parseChildBlock();
1801       }
1802       break;
1803     case tok::l_square:
1804       if (Style.isCSharp())
1805         parseSquare();
1806       else
1807         tryToParseLambda();
1808       break;
1809     case tok::l_paren:
1810       parseParens();
1811       // JavaScript can just have free standing methods and getters/setters in
1812       // object literals. Detect them by a "{" following ")".
1813       if (Style.Language == FormatStyle::LK_JavaScript) {
1814         if (FormatTok->is(tok::l_brace))
1815           parseChildBlock();
1816         break;
1817       }
1818       break;
1819     case tok::l_brace:
1820       // Assume there are no blocks inside a braced init list apart
1821       // from the ones we explicitly parse out (like lambdas).
1822       FormatTok->BlockKind = BK_BracedInit;
1823       nextToken();
1824       parseBracedList();
1825       break;
1826     case tok::less:
1827       if (Style.Language == FormatStyle::LK_Proto) {
1828         nextToken();
1829         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1830                         /*ClosingBraceKind=*/tok::greater);
1831       } else {
1832         nextToken();
1833       }
1834       break;
1835     case tok::semi:
1836       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1837       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1838       // used for error recovery if we have otherwise determined that this is
1839       // a braced list.
1840       if (Style.Language == FormatStyle::LK_JavaScript) {
1841         nextToken();
1842         break;
1843       }
1844       HasError = true;
1845       if (!ContinueOnSemicolons)
1846         return !HasError;
1847       nextToken();
1848       break;
1849     case tok::comma:
1850       nextToken();
1851       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1852         addUnwrappedLine();
1853       break;
1854     default:
1855       nextToken();
1856       break;
1857     }
1858   } while (!eof());
1859   return false;
1860 }
1861 
1862 void UnwrappedLineParser::parseParens() {
1863   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1864   nextToken();
1865   do {
1866     switch (FormatTok->Tok.getKind()) {
1867     case tok::l_paren:
1868       parseParens();
1869       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1870         parseChildBlock();
1871       break;
1872     case tok::r_paren:
1873       nextToken();
1874       return;
1875     case tok::r_brace:
1876       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1877       return;
1878     case tok::l_square:
1879       tryToParseLambda();
1880       break;
1881     case tok::l_brace:
1882       if (!tryToParseBracedList())
1883         parseChildBlock();
1884       break;
1885     case tok::at:
1886       nextToken();
1887       if (FormatTok->Tok.is(tok::l_brace)) {
1888         nextToken();
1889         parseBracedList();
1890       }
1891       break;
1892     case tok::kw_class:
1893       if (Style.Language == FormatStyle::LK_JavaScript)
1894         parseRecord(/*ParseAsExpr=*/true);
1895       else
1896         nextToken();
1897       break;
1898     case tok::identifier:
1899       if (Style.Language == FormatStyle::LK_JavaScript &&
1900           (FormatTok->is(Keywords.kw_function) ||
1901            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1902         tryToParseJSFunction();
1903       else
1904         nextToken();
1905       break;
1906     default:
1907       nextToken();
1908       break;
1909     }
1910   } while (!eof());
1911 }
1912 
1913 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1914   if (!LambdaIntroducer) {
1915     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1916     if (tryToParseLambda())
1917       return;
1918   }
1919   do {
1920     switch (FormatTok->Tok.getKind()) {
1921     case tok::l_paren:
1922       parseParens();
1923       break;
1924     case tok::r_square:
1925       nextToken();
1926       return;
1927     case tok::r_brace:
1928       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1929       return;
1930     case tok::l_square:
1931       parseSquare();
1932       break;
1933     case tok::l_brace: {
1934       if (!tryToParseBracedList())
1935         parseChildBlock();
1936       break;
1937     }
1938     case tok::at:
1939       nextToken();
1940       if (FormatTok->Tok.is(tok::l_brace)) {
1941         nextToken();
1942         parseBracedList();
1943       }
1944       break;
1945     default:
1946       nextToken();
1947       break;
1948     }
1949   } while (!eof());
1950 }
1951 
1952 void UnwrappedLineParser::parseIfThenElse() {
1953   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1954   nextToken();
1955   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1956     nextToken();
1957   if (FormatTok->Tok.is(tok::l_paren))
1958     parseParens();
1959   // handle [[likely]] / [[unlikely]]
1960   if (FormatTok->is(tok::l_square))
1961     parseSquare();
1962   bool NeedsUnwrappedLine = false;
1963   if (FormatTok->Tok.is(tok::l_brace)) {
1964     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1965     parseBlock(/*MustBeDeclaration=*/false);
1966     if (Style.BraceWrapping.BeforeElse)
1967       addUnwrappedLine();
1968     else
1969       NeedsUnwrappedLine = true;
1970   } else {
1971     addUnwrappedLine();
1972     ++Line->Level;
1973     parseStructuralElement();
1974     --Line->Level;
1975   }
1976   if (FormatTok->Tok.is(tok::kw_else)) {
1977     nextToken();
1978     // handle [[likely]] / [[unlikely]]
1979     if (FormatTok->is(tok::l_square))
1980       parseSquare();
1981     if (FormatTok->Tok.is(tok::l_brace)) {
1982       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1983       parseBlock(/*MustBeDeclaration=*/false);
1984       addUnwrappedLine();
1985     } else if (FormatTok->Tok.is(tok::kw_if)) {
1986       parseIfThenElse();
1987     } else {
1988       addUnwrappedLine();
1989       ++Line->Level;
1990       parseStructuralElement();
1991       if (FormatTok->is(tok::eof))
1992         addUnwrappedLine();
1993       --Line->Level;
1994     }
1995   } else if (NeedsUnwrappedLine) {
1996     addUnwrappedLine();
1997   }
1998 }
1999 
2000 void UnwrappedLineParser::parseTryCatch() {
2001   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2002   nextToken();
2003   bool NeedsUnwrappedLine = false;
2004   if (FormatTok->is(tok::colon)) {
2005     // We are in a function try block, what comes is an initializer list.
2006     nextToken();
2007 
2008     // In case identifiers were removed by clang-tidy, what might follow is
2009     // multiple commas in sequence - before the first identifier.
2010     while (FormatTok->is(tok::comma))
2011       nextToken();
2012 
2013     while (FormatTok->is(tok::identifier)) {
2014       nextToken();
2015       if (FormatTok->is(tok::l_paren))
2016         parseParens();
2017 
2018       // In case identifiers were removed by clang-tidy, what might follow is
2019       // multiple commas in sequence - after the first identifier.
2020       while (FormatTok->is(tok::comma))
2021         nextToken();
2022     }
2023   }
2024   // Parse try with resource.
2025   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2026     parseParens();
2027   }
2028   if (FormatTok->is(tok::l_brace)) {
2029     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2030     parseBlock(/*MustBeDeclaration=*/false);
2031     if (Style.BraceWrapping.BeforeCatch) {
2032       addUnwrappedLine();
2033     } else {
2034       NeedsUnwrappedLine = true;
2035     }
2036   } else if (!FormatTok->is(tok::kw_catch)) {
2037     // The C++ standard requires a compound-statement after a try.
2038     // If there's none, we try to assume there's a structuralElement
2039     // and try to continue.
2040     addUnwrappedLine();
2041     ++Line->Level;
2042     parseStructuralElement();
2043     --Line->Level;
2044   }
2045   while (1) {
2046     if (FormatTok->is(tok::at))
2047       nextToken();
2048     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2049                              tok::kw___finally) ||
2050           ((Style.Language == FormatStyle::LK_Java ||
2051             Style.Language == FormatStyle::LK_JavaScript) &&
2052            FormatTok->is(Keywords.kw_finally)) ||
2053           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2054            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2055       break;
2056     nextToken();
2057     while (FormatTok->isNot(tok::l_brace)) {
2058       if (FormatTok->is(tok::l_paren)) {
2059         parseParens();
2060         continue;
2061       }
2062       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2063         return;
2064       nextToken();
2065     }
2066     NeedsUnwrappedLine = false;
2067     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2068     parseBlock(/*MustBeDeclaration=*/false);
2069     if (Style.BraceWrapping.BeforeCatch)
2070       addUnwrappedLine();
2071     else
2072       NeedsUnwrappedLine = true;
2073   }
2074   if (NeedsUnwrappedLine)
2075     addUnwrappedLine();
2076 }
2077 
2078 void UnwrappedLineParser::parseNamespace() {
2079   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2080          "'namespace' expected");
2081 
2082   const FormatToken &InitialToken = *FormatTok;
2083   nextToken();
2084   if (InitialToken.is(TT_NamespaceMacro)) {
2085     parseParens();
2086   } else {
2087     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2088                               tok::l_square)) {
2089       if (FormatTok->is(tok::l_square))
2090         parseSquare();
2091       else
2092         nextToken();
2093     }
2094   }
2095   if (FormatTok->Tok.is(tok::l_brace)) {
2096     if (ShouldBreakBeforeBrace(Style, InitialToken))
2097       addUnwrappedLine();
2098 
2099     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
2100                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2101                      DeclarationScopeStack.size() > 1);
2102     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
2103     // Munch the semicolon after a namespace. This is more common than one would
2104     // think. Putting the semicolon into its own line is very ugly.
2105     if (FormatTok->Tok.is(tok::semi))
2106       nextToken();
2107     addUnwrappedLine();
2108   }
2109   // FIXME: Add error handling.
2110 }
2111 
2112 void UnwrappedLineParser::parseNew() {
2113   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2114   nextToken();
2115 
2116   if (Style.isCSharp()) {
2117     do {
2118       if (FormatTok->is(tok::l_brace))
2119         parseBracedList();
2120 
2121       if (FormatTok->isOneOf(tok::semi, tok::comma))
2122         return;
2123 
2124       nextToken();
2125     } while (!eof());
2126   }
2127 
2128   if (Style.Language != FormatStyle::LK_Java)
2129     return;
2130 
2131   // In Java, we can parse everything up to the parens, which aren't optional.
2132   do {
2133     // There should not be a ;, { or } before the new's open paren.
2134     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2135       return;
2136 
2137     // Consume the parens.
2138     if (FormatTok->is(tok::l_paren)) {
2139       parseParens();
2140 
2141       // If there is a class body of an anonymous class, consume that as child.
2142       if (FormatTok->is(tok::l_brace))
2143         parseChildBlock();
2144       return;
2145     }
2146     nextToken();
2147   } while (!eof());
2148 }
2149 
2150 void UnwrappedLineParser::parseForOrWhileLoop() {
2151   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2152          "'for', 'while' or foreach macro expected");
2153   nextToken();
2154   // JS' for await ( ...
2155   if (Style.Language == FormatStyle::LK_JavaScript &&
2156       FormatTok->is(Keywords.kw_await))
2157     nextToken();
2158   if (FormatTok->Tok.is(tok::l_paren))
2159     parseParens();
2160   if (FormatTok->Tok.is(tok::l_brace)) {
2161     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2162     parseBlock(/*MustBeDeclaration=*/false);
2163     addUnwrappedLine();
2164   } else {
2165     addUnwrappedLine();
2166     ++Line->Level;
2167     parseStructuralElement();
2168     --Line->Level;
2169   }
2170 }
2171 
2172 void UnwrappedLineParser::parseDoWhile() {
2173   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2174   nextToken();
2175   if (FormatTok->Tok.is(tok::l_brace)) {
2176     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2177     parseBlock(/*MustBeDeclaration=*/false);
2178     if (Style.BraceWrapping.BeforeWhile)
2179       addUnwrappedLine();
2180   } else {
2181     addUnwrappedLine();
2182     ++Line->Level;
2183     parseStructuralElement();
2184     --Line->Level;
2185   }
2186 
2187   // FIXME: Add error handling.
2188   if (!FormatTok->Tok.is(tok::kw_while)) {
2189     addUnwrappedLine();
2190     return;
2191   }
2192 
2193   nextToken();
2194   parseStructuralElement();
2195 }
2196 
2197 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2198   nextToken();
2199   unsigned OldLineLevel = Line->Level;
2200   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2201     --Line->Level;
2202   if (LeftAlignLabel)
2203     Line->Level = 0;
2204   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2205       FormatTok->Tok.is(tok::l_brace)) {
2206     CompoundStatementIndenter Indenter(this, Line->Level,
2207                                        Style.BraceWrapping.AfterCaseLabel,
2208                                        Style.BraceWrapping.IndentBraces);
2209     parseBlock(/*MustBeDeclaration=*/false);
2210     if (FormatTok->Tok.is(tok::kw_break)) {
2211       if (Style.BraceWrapping.AfterControlStatement ==
2212           FormatStyle::BWACS_Always)
2213         addUnwrappedLine();
2214       parseStructuralElement();
2215     }
2216     addUnwrappedLine();
2217   } else {
2218     if (FormatTok->is(tok::semi))
2219       nextToken();
2220     addUnwrappedLine();
2221   }
2222   Line->Level = OldLineLevel;
2223   if (FormatTok->isNot(tok::l_brace)) {
2224     parseStructuralElement();
2225     addUnwrappedLine();
2226   }
2227 }
2228 
2229 void UnwrappedLineParser::parseCaseLabel() {
2230   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2231   // FIXME: fix handling of complex expressions here.
2232   do {
2233     nextToken();
2234   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2235   parseLabel();
2236 }
2237 
2238 void UnwrappedLineParser::parseSwitch() {
2239   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2240   nextToken();
2241   if (FormatTok->Tok.is(tok::l_paren))
2242     parseParens();
2243   if (FormatTok->Tok.is(tok::l_brace)) {
2244     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2245     parseBlock(/*MustBeDeclaration=*/false);
2246     addUnwrappedLine();
2247   } else {
2248     addUnwrappedLine();
2249     ++Line->Level;
2250     parseStructuralElement();
2251     --Line->Level;
2252   }
2253 }
2254 
2255 void UnwrappedLineParser::parseAccessSpecifier() {
2256   nextToken();
2257   // Understand Qt's slots.
2258   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2259     nextToken();
2260   // Otherwise, we don't know what it is, and we'd better keep the next token.
2261   if (FormatTok->Tok.is(tok::colon))
2262     nextToken();
2263   addUnwrappedLine();
2264 }
2265 
2266 bool UnwrappedLineParser::parseEnum() {
2267   // Won't be 'enum' for NS_ENUMs.
2268   if (FormatTok->Tok.is(tok::kw_enum))
2269     nextToken();
2270 
2271   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2272   // declarations. An "enum" keyword followed by a colon would be a syntax
2273   // error and thus assume it is just an identifier.
2274   if (Style.Language == FormatStyle::LK_JavaScript &&
2275       FormatTok->isOneOf(tok::colon, tok::question))
2276     return false;
2277 
2278   // In protobuf, "enum" can be used as a field name.
2279   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2280     return false;
2281 
2282   // Eat up enum class ...
2283   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2284     nextToken();
2285 
2286   while (FormatTok->Tok.getIdentifierInfo() ||
2287          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2288                             tok::greater, tok::comma, tok::question)) {
2289     nextToken();
2290     // We can have macros or attributes in between 'enum' and the enum name.
2291     if (FormatTok->is(tok::l_paren))
2292       parseParens();
2293     if (FormatTok->is(tok::identifier)) {
2294       nextToken();
2295       // If there are two identifiers in a row, this is likely an elaborate
2296       // return type. In Java, this can be "implements", etc.
2297       if (Style.isCpp() && FormatTok->is(tok::identifier))
2298         return false;
2299     }
2300   }
2301 
2302   // Just a declaration or something is wrong.
2303   if (FormatTok->isNot(tok::l_brace))
2304     return true;
2305   FormatTok->BlockKind = BK_Block;
2306 
2307   if (Style.Language == FormatStyle::LK_Java) {
2308     // Java enums are different.
2309     parseJavaEnumBody();
2310     return true;
2311   }
2312   if (Style.Language == FormatStyle::LK_Proto) {
2313     parseBlock(/*MustBeDeclaration=*/true);
2314     return true;
2315   }
2316 
2317   if (!Style.AllowShortEnumsOnASingleLine)
2318     addUnwrappedLine();
2319   // Parse enum body.
2320   nextToken();
2321   if (!Style.AllowShortEnumsOnASingleLine) {
2322     addUnwrappedLine();
2323     Line->Level += 1;
2324   }
2325   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2326                                    /*IsEnum=*/true);
2327   if (!Style.AllowShortEnumsOnASingleLine)
2328     Line->Level -= 1;
2329   if (HasError) {
2330     if (FormatTok->is(tok::semi))
2331       nextToken();
2332     addUnwrappedLine();
2333   }
2334   return true;
2335 
2336   // There is no addUnwrappedLine() here so that we fall through to parsing a
2337   // structural element afterwards. Thus, in "enum A {} n, m;",
2338   // "} n, m;" will end up in one unwrapped line.
2339 }
2340 
2341 void UnwrappedLineParser::parseJavaEnumBody() {
2342   // Determine whether the enum is simple, i.e. does not have a semicolon or
2343   // constants with class bodies. Simple enums can be formatted like braced
2344   // lists, contracted to a single line, etc.
2345   unsigned StoredPosition = Tokens->getPosition();
2346   bool IsSimple = true;
2347   FormatToken *Tok = Tokens->getNextToken();
2348   while (Tok) {
2349     if (Tok->is(tok::r_brace))
2350       break;
2351     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2352       IsSimple = false;
2353       break;
2354     }
2355     // FIXME: This will also mark enums with braces in the arguments to enum
2356     // constants as "not simple". This is probably fine in practice, though.
2357     Tok = Tokens->getNextToken();
2358   }
2359   FormatTok = Tokens->setPosition(StoredPosition);
2360 
2361   if (IsSimple) {
2362     nextToken();
2363     parseBracedList();
2364     addUnwrappedLine();
2365     return;
2366   }
2367 
2368   // Parse the body of a more complex enum.
2369   // First add a line for everything up to the "{".
2370   nextToken();
2371   addUnwrappedLine();
2372   ++Line->Level;
2373 
2374   // Parse the enum constants.
2375   while (FormatTok) {
2376     if (FormatTok->is(tok::l_brace)) {
2377       // Parse the constant's class body.
2378       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2379                  /*MunchSemi=*/false);
2380     } else if (FormatTok->is(tok::l_paren)) {
2381       parseParens();
2382     } else if (FormatTok->is(tok::comma)) {
2383       nextToken();
2384       addUnwrappedLine();
2385     } else if (FormatTok->is(tok::semi)) {
2386       nextToken();
2387       addUnwrappedLine();
2388       break;
2389     } else if (FormatTok->is(tok::r_brace)) {
2390       addUnwrappedLine();
2391       break;
2392     } else {
2393       nextToken();
2394     }
2395   }
2396 
2397   // Parse the class body after the enum's ";" if any.
2398   parseLevel(/*HasOpeningBrace=*/true);
2399   nextToken();
2400   --Line->Level;
2401   addUnwrappedLine();
2402 }
2403 
2404 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2405   const FormatToken &InitialToken = *FormatTok;
2406   nextToken();
2407 
2408   // The actual identifier can be a nested name specifier, and in macros
2409   // it is often token-pasted.
2410   // An [[attribute]] can be before the identifier.
2411   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2412                             tok::kw___attribute, tok::kw___declspec,
2413                             tok::kw_alignas, tok::l_square, tok::r_square) ||
2414          ((Style.Language == FormatStyle::LK_Java ||
2415            Style.Language == FormatStyle::LK_JavaScript) &&
2416           FormatTok->isOneOf(tok::period, tok::comma))) {
2417     if (Style.Language == FormatStyle::LK_JavaScript &&
2418         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2419       // JavaScript/TypeScript supports inline object types in
2420       // extends/implements positions:
2421       //     class Foo implements {bar: number} { }
2422       nextToken();
2423       if (FormatTok->is(tok::l_brace)) {
2424         tryToParseBracedList();
2425         continue;
2426       }
2427     }
2428     bool IsNonMacroIdentifier =
2429         FormatTok->is(tok::identifier) &&
2430         FormatTok->TokenText != FormatTok->TokenText.upper();
2431     nextToken();
2432     // We can have macros or attributes in between 'class' and the class name.
2433     if (!IsNonMacroIdentifier) {
2434       if (FormatTok->Tok.is(tok::l_paren)) {
2435         parseParens();
2436       } else if (FormatTok->is(TT_AttributeSquare)) {
2437         parseSquare();
2438         // Consume the closing TT_AttributeSquare.
2439         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2440           nextToken();
2441       }
2442     }
2443   }
2444 
2445   // Note that parsing away template declarations here leads to incorrectly
2446   // accepting function declarations as record declarations.
2447   // In general, we cannot solve this problem. Consider:
2448   // class A<int> B() {}
2449   // which can be a function definition or a class definition when B() is a
2450   // macro. If we find enough real-world cases where this is a problem, we
2451   // can parse for the 'template' keyword in the beginning of the statement,
2452   // and thus rule out the record production in case there is no template
2453   // (this would still leave us with an ambiguity between template function
2454   // and class declarations).
2455   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2456     while (!eof()) {
2457       if (FormatTok->is(tok::l_brace)) {
2458         calculateBraceTypes(/*ExpectClassBody=*/true);
2459         if (!tryToParseBracedList())
2460           break;
2461       }
2462       if (FormatTok->Tok.is(tok::semi))
2463         return;
2464       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2465         addUnwrappedLine();
2466         nextToken();
2467         parseCSharpGenericTypeConstraint();
2468         break;
2469       }
2470       nextToken();
2471     }
2472   }
2473   if (FormatTok->Tok.is(tok::l_brace)) {
2474     if (ParseAsExpr) {
2475       parseChildBlock();
2476     } else {
2477       if (ShouldBreakBeforeBrace(Style, InitialToken))
2478         addUnwrappedLine();
2479 
2480       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2481                  /*MunchSemi=*/false);
2482     }
2483   }
2484   // There is no addUnwrappedLine() here so that we fall through to parsing a
2485   // structural element afterwards. Thus, in "class A {} n, m;",
2486   // "} n, m;" will end up in one unwrapped line.
2487 }
2488 
2489 void UnwrappedLineParser::parseObjCMethod() {
2490   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2491          "'(' or identifier expected.");
2492   do {
2493     if (FormatTok->Tok.is(tok::semi)) {
2494       nextToken();
2495       addUnwrappedLine();
2496       return;
2497     } else if (FormatTok->Tok.is(tok::l_brace)) {
2498       if (Style.BraceWrapping.AfterFunction)
2499         addUnwrappedLine();
2500       parseBlock(/*MustBeDeclaration=*/false);
2501       addUnwrappedLine();
2502       return;
2503     } else {
2504       nextToken();
2505     }
2506   } while (!eof());
2507 }
2508 
2509 void UnwrappedLineParser::parseObjCProtocolList() {
2510   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2511   do {
2512     nextToken();
2513     // Early exit in case someone forgot a close angle.
2514     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2515         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2516       return;
2517   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2518   nextToken(); // Skip '>'.
2519 }
2520 
2521 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2522   do {
2523     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2524       nextToken();
2525       addUnwrappedLine();
2526       break;
2527     }
2528     if (FormatTok->is(tok::l_brace)) {
2529       parseBlock(/*MustBeDeclaration=*/false);
2530       // In ObjC interfaces, nothing should be following the "}".
2531       addUnwrappedLine();
2532     } else if (FormatTok->is(tok::r_brace)) {
2533       // Ignore stray "}". parseStructuralElement doesn't consume them.
2534       nextToken();
2535       addUnwrappedLine();
2536     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2537       nextToken();
2538       parseObjCMethod();
2539     } else {
2540       parseStructuralElement();
2541     }
2542   } while (!eof());
2543 }
2544 
2545 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2546   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2547          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2548   nextToken();
2549   nextToken(); // interface name
2550 
2551   // @interface can be followed by a lightweight generic
2552   // specialization list, then either a base class or a category.
2553   if (FormatTok->Tok.is(tok::less)) {
2554     // Unlike protocol lists, generic parameterizations support
2555     // nested angles:
2556     //
2557     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2558     //     NSObject <NSCopying, NSSecureCoding>
2559     //
2560     // so we need to count how many open angles we have left.
2561     unsigned NumOpenAngles = 1;
2562     do {
2563       nextToken();
2564       // Early exit in case someone forgot a close angle.
2565       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2566           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2567         break;
2568       if (FormatTok->Tok.is(tok::less))
2569         ++NumOpenAngles;
2570       else if (FormatTok->Tok.is(tok::greater)) {
2571         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2572         --NumOpenAngles;
2573       }
2574     } while (!eof() && NumOpenAngles != 0);
2575     nextToken(); // Skip '>'.
2576   }
2577   if (FormatTok->Tok.is(tok::colon)) {
2578     nextToken();
2579     nextToken(); // base class name
2580   } else if (FormatTok->Tok.is(tok::l_paren))
2581     // Skip category, if present.
2582     parseParens();
2583 
2584   if (FormatTok->Tok.is(tok::less))
2585     parseObjCProtocolList();
2586 
2587   if (FormatTok->Tok.is(tok::l_brace)) {
2588     if (Style.BraceWrapping.AfterObjCDeclaration)
2589       addUnwrappedLine();
2590     parseBlock(/*MustBeDeclaration=*/true);
2591   }
2592 
2593   // With instance variables, this puts '}' on its own line.  Without instance
2594   // variables, this ends the @interface line.
2595   addUnwrappedLine();
2596 
2597   parseObjCUntilAtEnd();
2598 }
2599 
2600 // Returns true for the declaration/definition form of @protocol,
2601 // false for the expression form.
2602 bool UnwrappedLineParser::parseObjCProtocol() {
2603   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2604   nextToken();
2605 
2606   if (FormatTok->is(tok::l_paren))
2607     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2608     return false;
2609 
2610   // The definition/declaration form,
2611   // @protocol Foo
2612   // - (int)someMethod;
2613   // @end
2614 
2615   nextToken(); // protocol name
2616 
2617   if (FormatTok->Tok.is(tok::less))
2618     parseObjCProtocolList();
2619 
2620   // Check for protocol declaration.
2621   if (FormatTok->Tok.is(tok::semi)) {
2622     nextToken();
2623     addUnwrappedLine();
2624     return true;
2625   }
2626 
2627   addUnwrappedLine();
2628   parseObjCUntilAtEnd();
2629   return true;
2630 }
2631 
2632 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2633   bool IsImport = FormatTok->is(Keywords.kw_import);
2634   assert(IsImport || FormatTok->is(tok::kw_export));
2635   nextToken();
2636 
2637   // Consume the "default" in "export default class/function".
2638   if (FormatTok->is(tok::kw_default))
2639     nextToken();
2640 
2641   // Consume "async function", "function" and "default function", so that these
2642   // get parsed as free-standing JS functions, i.e. do not require a trailing
2643   // semicolon.
2644   if (FormatTok->is(Keywords.kw_async))
2645     nextToken();
2646   if (FormatTok->is(Keywords.kw_function)) {
2647     nextToken();
2648     return;
2649   }
2650 
2651   // For imports, `export *`, `export {...}`, consume the rest of the line up
2652   // to the terminating `;`. For everything else, just return and continue
2653   // parsing the structural element, i.e. the declaration or expression for
2654   // `export default`.
2655   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2656       !FormatTok->isStringLiteral())
2657     return;
2658 
2659   while (!eof()) {
2660     if (FormatTok->is(tok::semi))
2661       return;
2662     if (Line->Tokens.empty()) {
2663       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2664       // import statement should terminate.
2665       return;
2666     }
2667     if (FormatTok->is(tok::l_brace)) {
2668       FormatTok->BlockKind = BK_Block;
2669       nextToken();
2670       parseBracedList();
2671     } else {
2672       nextToken();
2673     }
2674   }
2675 }
2676 
2677 void UnwrappedLineParser::parseStatementMacro() {
2678   nextToken();
2679   if (FormatTok->is(tok::l_paren))
2680     parseParens();
2681   if (FormatTok->is(tok::semi))
2682     nextToken();
2683   addUnwrappedLine();
2684 }
2685 
2686 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2687                                                  StringRef Prefix = "") {
2688   llvm::dbgs() << Prefix << "Line(" << Line.Level
2689                << ", FSC=" << Line.FirstStartColumn << ")"
2690                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2691   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2692                                                     E = Line.Tokens.end();
2693        I != E; ++I) {
2694     llvm::dbgs() << I->Tok->Tok.getName() << "["
2695                  << "T=" << I->Tok->getType()
2696                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2697   }
2698   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2699                                                     E = Line.Tokens.end();
2700        I != E; ++I) {
2701     const UnwrappedLineNode &Node = *I;
2702     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2703              I = Node.Children.begin(),
2704              E = Node.Children.end();
2705          I != E; ++I) {
2706       printDebugInfo(*I, "\nChild: ");
2707     }
2708   }
2709   llvm::dbgs() << "\n";
2710 }
2711 
2712 void UnwrappedLineParser::addUnwrappedLine() {
2713   if (Line->Tokens.empty())
2714     return;
2715   LLVM_DEBUG({
2716     if (CurrentLines == &Lines)
2717       printDebugInfo(*Line);
2718   });
2719   CurrentLines->push_back(std::move(*Line));
2720   Line->Tokens.clear();
2721   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2722   Line->FirstStartColumn = 0;
2723   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2724     CurrentLines->append(
2725         std::make_move_iterator(PreprocessorDirectives.begin()),
2726         std::make_move_iterator(PreprocessorDirectives.end()));
2727     PreprocessorDirectives.clear();
2728   }
2729   // Disconnect the current token from the last token on the previous line.
2730   FormatTok->Previous = nullptr;
2731 }
2732 
2733 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2734 
2735 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2736   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2737          FormatTok.NewlinesBefore > 0;
2738 }
2739 
2740 // Checks if \p FormatTok is a line comment that continues the line comment
2741 // section on \p Line.
2742 static bool
2743 continuesLineCommentSection(const FormatToken &FormatTok,
2744                             const UnwrappedLine &Line,
2745                             const llvm::Regex &CommentPragmasRegex) {
2746   if (Line.Tokens.empty())
2747     return false;
2748 
2749   StringRef IndentContent = FormatTok.TokenText;
2750   if (FormatTok.TokenText.startswith("//") ||
2751       FormatTok.TokenText.startswith("/*"))
2752     IndentContent = FormatTok.TokenText.substr(2);
2753   if (CommentPragmasRegex.match(IndentContent))
2754     return false;
2755 
2756   // If Line starts with a line comment, then FormatTok continues the comment
2757   // section if its original column is greater or equal to the original start
2758   // column of the line.
2759   //
2760   // Define the min column token of a line as follows: if a line ends in '{' or
2761   // contains a '{' followed by a line comment, then the min column token is
2762   // that '{'. Otherwise, the min column token of the line is the first token of
2763   // the line.
2764   //
2765   // If Line starts with a token other than a line comment, then FormatTok
2766   // continues the comment section if its original column is greater than the
2767   // original start column of the min column token of the line.
2768   //
2769   // For example, the second line comment continues the first in these cases:
2770   //
2771   // // first line
2772   // // second line
2773   //
2774   // and:
2775   //
2776   // // first line
2777   //  // second line
2778   //
2779   // and:
2780   //
2781   // int i; // first line
2782   //  // second line
2783   //
2784   // and:
2785   //
2786   // do { // first line
2787   //      // second line
2788   //   int i;
2789   // } while (true);
2790   //
2791   // and:
2792   //
2793   // enum {
2794   //   a, // first line
2795   //    // second line
2796   //   b
2797   // };
2798   //
2799   // The second line comment doesn't continue the first in these cases:
2800   //
2801   //   // first line
2802   //  // second line
2803   //
2804   // and:
2805   //
2806   // int i; // first line
2807   // // second line
2808   //
2809   // and:
2810   //
2811   // do { // first line
2812   //   // second line
2813   //   int i;
2814   // } while (true);
2815   //
2816   // and:
2817   //
2818   // enum {
2819   //   a, // first line
2820   //   // second line
2821   // };
2822   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2823 
2824   // Scan for '{//'. If found, use the column of '{' as a min column for line
2825   // comment section continuation.
2826   const FormatToken *PreviousToken = nullptr;
2827   for (const UnwrappedLineNode &Node : Line.Tokens) {
2828     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2829         isLineComment(*Node.Tok)) {
2830       MinColumnToken = PreviousToken;
2831       break;
2832     }
2833     PreviousToken = Node.Tok;
2834 
2835     // Grab the last newline preceding a token in this unwrapped line.
2836     if (Node.Tok->NewlinesBefore > 0) {
2837       MinColumnToken = Node.Tok;
2838     }
2839   }
2840   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2841     MinColumnToken = PreviousToken;
2842   }
2843 
2844   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2845                               MinColumnToken);
2846 }
2847 
2848 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2849   bool JustComments = Line->Tokens.empty();
2850   for (SmallVectorImpl<FormatToken *>::const_iterator
2851            I = CommentsBeforeNextToken.begin(),
2852            E = CommentsBeforeNextToken.end();
2853        I != E; ++I) {
2854     // Line comments that belong to the same line comment section are put on the
2855     // same line since later we might want to reflow content between them.
2856     // Additional fine-grained breaking of line comment sections is controlled
2857     // by the class BreakableLineCommentSection in case it is desirable to keep
2858     // several line comment sections in the same unwrapped line.
2859     //
2860     // FIXME: Consider putting separate line comment sections as children to the
2861     // unwrapped line instead.
2862     (*I)->ContinuesLineCommentSection =
2863         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2864     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2865       addUnwrappedLine();
2866     pushToken(*I);
2867   }
2868   if (NewlineBeforeNext && JustComments)
2869     addUnwrappedLine();
2870   CommentsBeforeNextToken.clear();
2871 }
2872 
2873 void UnwrappedLineParser::nextToken(int LevelDifference) {
2874   if (eof())
2875     return;
2876   flushComments(isOnNewLine(*FormatTok));
2877   pushToken(FormatTok);
2878   FormatToken *Previous = FormatTok;
2879   if (Style.Language != FormatStyle::LK_JavaScript)
2880     readToken(LevelDifference);
2881   else
2882     readTokenWithJavaScriptASI();
2883   FormatTok->Previous = Previous;
2884 }
2885 
2886 void UnwrappedLineParser::distributeComments(
2887     const SmallVectorImpl<FormatToken *> &Comments,
2888     const FormatToken *NextTok) {
2889   // Whether or not a line comment token continues a line is controlled by
2890   // the method continuesLineCommentSection, with the following caveat:
2891   //
2892   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2893   // that each comment line from the trail is aligned with the next token, if
2894   // the next token exists. If a trail exists, the beginning of the maximal
2895   // trail is marked as a start of a new comment section.
2896   //
2897   // For example in this code:
2898   //
2899   // int a; // line about a
2900   //   // line 1 about b
2901   //   // line 2 about b
2902   //   int b;
2903   //
2904   // the two lines about b form a maximal trail, so there are two sections, the
2905   // first one consisting of the single comment "// line about a" and the
2906   // second one consisting of the next two comments.
2907   if (Comments.empty())
2908     return;
2909   bool ShouldPushCommentsInCurrentLine = true;
2910   bool HasTrailAlignedWithNextToken = false;
2911   unsigned StartOfTrailAlignedWithNextToken = 0;
2912   if (NextTok) {
2913     // We are skipping the first element intentionally.
2914     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2915       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2916         HasTrailAlignedWithNextToken = true;
2917         StartOfTrailAlignedWithNextToken = i;
2918       }
2919     }
2920   }
2921   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2922     FormatToken *FormatTok = Comments[i];
2923     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2924       FormatTok->ContinuesLineCommentSection = false;
2925     } else {
2926       FormatTok->ContinuesLineCommentSection =
2927           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2928     }
2929     if (!FormatTok->ContinuesLineCommentSection &&
2930         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2931       ShouldPushCommentsInCurrentLine = false;
2932     }
2933     if (ShouldPushCommentsInCurrentLine) {
2934       pushToken(FormatTok);
2935     } else {
2936       CommentsBeforeNextToken.push_back(FormatTok);
2937     }
2938   }
2939 }
2940 
2941 void UnwrappedLineParser::readToken(int LevelDifference) {
2942   SmallVector<FormatToken *, 1> Comments;
2943   do {
2944     FormatTok = Tokens->getNextToken();
2945     assert(FormatTok);
2946     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2947            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2948       distributeComments(Comments, FormatTok);
2949       Comments.clear();
2950       // If there is an unfinished unwrapped line, we flush the preprocessor
2951       // directives only after that unwrapped line was finished later.
2952       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2953       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2954       assert((LevelDifference >= 0 ||
2955               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2956              "LevelDifference makes Line->Level negative");
2957       Line->Level += LevelDifference;
2958       // Comments stored before the preprocessor directive need to be output
2959       // before the preprocessor directive, at the same level as the
2960       // preprocessor directive, as we consider them to apply to the directive.
2961       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2962           PPBranchLevel > 0)
2963         Line->Level += PPBranchLevel;
2964       flushComments(isOnNewLine(*FormatTok));
2965       parsePPDirective();
2966     }
2967     while (FormatTok->getType() == TT_ConflictStart ||
2968            FormatTok->getType() == TT_ConflictEnd ||
2969            FormatTok->getType() == TT_ConflictAlternative) {
2970       if (FormatTok->getType() == TT_ConflictStart) {
2971         conditionalCompilationStart(/*Unreachable=*/false);
2972       } else if (FormatTok->getType() == TT_ConflictAlternative) {
2973         conditionalCompilationAlternative();
2974       } else if (FormatTok->getType() == TT_ConflictEnd) {
2975         conditionalCompilationEnd();
2976       }
2977       FormatTok = Tokens->getNextToken();
2978       FormatTok->MustBreakBefore = true;
2979       FormatTok->MustBreakAlignBefore = true;
2980     }
2981 
2982     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2983         !Line->InPPDirective) {
2984       continue;
2985     }
2986 
2987     if (!FormatTok->Tok.is(tok::comment)) {
2988       distributeComments(Comments, FormatTok);
2989       Comments.clear();
2990       return;
2991     }
2992 
2993     Comments.push_back(FormatTok);
2994   } while (!eof());
2995 
2996   distributeComments(Comments, nullptr);
2997   Comments.clear();
2998 }
2999 
3000 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3001   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3002   if (MustBreakBeforeNextToken) {
3003     Line->Tokens.back().Tok->MustBreakBefore = true;
3004     Line->Tokens.back().Tok->MustBreakAlignBefore = true;
3005     MustBreakBeforeNextToken = false;
3006   }
3007 }
3008 
3009 } // end namespace format
3010 } // end namespace clang
3011