1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
27 class FormatTokenSource {
28 public:
29   virtual ~FormatTokenSource() {}
30   virtual FormatToken *getNextToken() = 0;
31 
32   virtual unsigned getPosition() = 0;
33   virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41                          bool MustBeDeclaration)
42       : Line(Line), Stack(Stack) {
43     Line.MustBeDeclaration = MustBeDeclaration;
44     Stack.push_back(MustBeDeclaration);
45   }
46   ~ScopedDeclarationState() {
47     Stack.pop_back();
48     if (!Stack.empty())
49       Line.MustBeDeclaration = Stack.back();
50     else
51       Line.MustBeDeclaration = true;
52   }
53 
54 private:
55   UnwrappedLine &Line;
56   std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     FakeEOF.Tok.startToken();
86     FakeEOF.Tok.setKind(tok::eof);
87     TokenSource = this;
88     Line.Level = 0;
89     Line.InPPDirective = true;
90   }
91 
92   ~ScopedMacroState() override {
93     TokenSource = PreviousTokenSource;
94     ResetToken = Token;
95     Line.InPPDirective = false;
96     Line.Level = PreviousLineLevel;
97   }
98 
99   FormatToken *getNextToken() override {
100     // The \c UnwrappedLineParser guards against this by never calling
101     // \c getNextToken() after it has encountered the first eof token.
102     assert(!eof());
103     PreviousToken = Token;
104     Token = PreviousTokenSource->getNextToken();
105     if (eof())
106       return &FakeEOF;
107     return Token;
108   }
109 
110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112   FormatToken *setPosition(unsigned Position) override {
113     PreviousToken = nullptr;
114     Token = PreviousTokenSource->setPosition(Position);
115     return Token;
116   }
117 
118 private:
119   bool eof() {
120     return Token && Token->HasUnescapedNewline &&
121            !continuesLineComment(*Token, PreviousToken,
122                                  /*MinColumnToken=*/PreviousToken);
123   }
124 
125   FormatToken FakeEOF;
126   UnwrappedLine &Line;
127   FormatTokenSource *&TokenSource;
128   FormatToken *&ResetToken;
129   unsigned PreviousLineLevel;
130   FormatTokenSource *PreviousTokenSource;
131 
132   FormatToken *Token;
133   FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
138 class ScopedLineState {
139 public:
140   ScopedLineState(UnwrappedLineParser &Parser,
141                   bool SwitchToPreprocessorLines = false)
142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143     if (SwitchToPreprocessorLines)
144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
145     else if (!Parser.Line->Tokens.empty())
146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147     PreBlockLine = std::move(Parser.Line);
148     Parser.Line = std::make_unique<UnwrappedLine>();
149     Parser.Line->Level = PreBlockLine->Level;
150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151   }
152 
153   ~ScopedLineState() {
154     if (!Parser.Line->Tokens.empty()) {
155       Parser.addUnwrappedLine();
156     }
157     assert(Parser.Line->Tokens.empty());
158     Parser.Line = std::move(PreBlockLine);
159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160       Parser.MustBreakBeforeNextToken = true;
161     Parser.CurrentLines = OriginalLines;
162   }
163 
164 private:
165   UnwrappedLineParser &Parser;
166 
167   std::unique_ptr<UnwrappedLine> PreBlockLine;
168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
171 class CompoundStatementIndenter {
172 public:
173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
174                             const FormatStyle &Style, unsigned &LineLevel)
175       : CompoundStatementIndenter(Parser, LineLevel,
176                                   Style.BraceWrapping.AfterControlStatement,
177                                   Style.BraceWrapping.IndentBraces) {}
178   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
179                             bool WrapBrace, bool IndentBrace)
180       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
181     if (WrapBrace)
182       Parser->addUnwrappedLine();
183     if (IndentBrace)
184       ++LineLevel;
185   }
186   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
187 
188 private:
189   unsigned &LineLevel;
190   unsigned OldLineLevel;
191 };
192 
193 namespace {
194 
195 class IndexedTokenSource : public FormatTokenSource {
196 public:
197   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
198       : Tokens(Tokens), Position(-1) {}
199 
200   FormatToken *getNextToken() override {
201     ++Position;
202     return Tokens[Position];
203   }
204 
205   unsigned getPosition() override {
206     assert(Position >= 0);
207     return Position;
208   }
209 
210   FormatToken *setPosition(unsigned P) override {
211     Position = P;
212     return Tokens[Position];
213   }
214 
215   void reset() { Position = -1; }
216 
217 private:
218   ArrayRef<FormatToken *> Tokens;
219   int Position;
220 };
221 
222 } // end anonymous namespace
223 
224 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
225                                          const AdditionalKeywords &Keywords,
226                                          unsigned FirstStartColumn,
227                                          ArrayRef<FormatToken *> Tokens,
228                                          UnwrappedLineConsumer &Callback)
229     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
230       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
231       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
232       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
233       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
234                        ? IG_Rejected
235                        : IG_Inited),
236       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
241                      ? IG_Rejected
242                      : IG_Inited;
243   IncludeGuardToken = nullptr;
244   Line.reset(new UnwrappedLine);
245   CommentsBeforeNextToken.clear();
246   FormatTok = nullptr;
247   MustBreakBeforeNextToken = false;
248   PreprocessorDirectives.clear();
249   CurrentLines = &Lines;
250   DeclarationScopeStack.clear();
251   PPStack.clear();
252   Line->FirstStartColumn = FirstStartColumn;
253 }
254 
255 void UnwrappedLineParser::parse() {
256   IndexedTokenSource TokenSource(AllTokens);
257   Line->FirstStartColumn = FirstStartColumn;
258   do {
259     LLVM_DEBUG(llvm::dbgs() << "----\n");
260     reset();
261     Tokens = &TokenSource;
262     TokenSource.reset();
263 
264     readToken();
265     parseFile();
266 
267     // If we found an include guard then all preprocessor directives (other than
268     // the guard) are over-indented by one.
269     if (IncludeGuard == IG_Found)
270       for (auto &Line : Lines)
271         if (Line.InPPDirective && Line.Level > 0)
272           --Line.Level;
273 
274     // Create line with eof token.
275     pushToken(FormatTok);
276     addUnwrappedLine();
277 
278     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
279                                                   E = Lines.end();
280          I != E; ++I) {
281       Callback.consumeUnwrappedLine(*I);
282     }
283     Callback.finishRun();
284     Lines.clear();
285     while (!PPLevelBranchIndex.empty() &&
286            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
287       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
288       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
289     }
290     if (!PPLevelBranchIndex.empty()) {
291       ++PPLevelBranchIndex.back();
292       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
293       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
294     }
295   } while (!PPLevelBranchIndex.empty());
296 }
297 
298 void UnwrappedLineParser::parseFile() {
299   // The top-level context in a file always has declarations, except for pre-
300   // processor directives and JavaScript files.
301   bool MustBeDeclaration =
302       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
303   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
304                                           MustBeDeclaration);
305   if (Style.Language == FormatStyle::LK_TextProto)
306     parseBracedList();
307   else
308     parseLevel(/*HasOpeningBrace=*/false);
309   // Make sure to format the remaining tokens.
310   //
311   // LK_TextProto is special since its top-level is parsed as the body of a
312   // braced list, which does not necessarily have natural line separators such
313   // as a semicolon. Comments after the last entry that have been determined to
314   // not belong to that line, as in:
315   //   key: value
316   //   // endfile comment
317   // do not have a chance to be put on a line of their own until this point.
318   // Here we add this newline before end-of-file comments.
319   if (Style.Language == FormatStyle::LK_TextProto &&
320       !CommentsBeforeNextToken.empty())
321     addUnwrappedLine();
322   flushComments(true);
323   addUnwrappedLine();
324 }
325 
326 void UnwrappedLineParser::parseCSharpAttribute() {
327   int UnpairedSquareBrackets = 1;
328   do {
329     switch (FormatTok->Tok.getKind()) {
330     case tok::r_square:
331       nextToken();
332       --UnpairedSquareBrackets;
333       if (UnpairedSquareBrackets == 0) {
334         addUnwrappedLine();
335         return;
336       }
337       break;
338     case tok::l_square:
339       ++UnpairedSquareBrackets;
340       nextToken();
341       break;
342     default:
343       nextToken();
344       break;
345     }
346   } while (!eof());
347 }
348 
349 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
350   bool SwitchLabelEncountered = false;
351   do {
352     tok::TokenKind kind = FormatTok->Tok.getKind();
353     if (FormatTok->Type == TT_MacroBlockBegin) {
354       kind = tok::l_brace;
355     } else if (FormatTok->Type == TT_MacroBlockEnd) {
356       kind = tok::r_brace;
357     }
358 
359     switch (kind) {
360     case tok::comment:
361       nextToken();
362       addUnwrappedLine();
363       break;
364     case tok::l_brace:
365       // FIXME: Add parameter whether this can happen - if this happens, we must
366       // be in a non-declaration context.
367       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
368         continue;
369       parseBlock(/*MustBeDeclaration=*/false);
370       addUnwrappedLine();
371       break;
372     case tok::r_brace:
373       if (HasOpeningBrace)
374         return;
375       nextToken();
376       addUnwrappedLine();
377       break;
378     case tok::kw_default: {
379       unsigned StoredPosition = Tokens->getPosition();
380       FormatToken *Next;
381       do {
382         Next = Tokens->getNextToken();
383       } while (Next && Next->is(tok::comment));
384       FormatTok = Tokens->setPosition(StoredPosition);
385       if (Next && Next->isNot(tok::colon)) {
386         // default not followed by ':' is not a case label; treat it like
387         // an identifier.
388         parseStructuralElement();
389         break;
390       }
391       // Else, if it is 'default:', fall through to the case handling.
392       LLVM_FALLTHROUGH;
393     }
394     case tok::kw_case:
395       if (Style.Language == FormatStyle::LK_JavaScript &&
396           Line->MustBeDeclaration) {
397         // A 'case: string' style field declaration.
398         parseStructuralElement();
399         break;
400       }
401       if (!SwitchLabelEncountered &&
402           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
403         ++Line->Level;
404       SwitchLabelEncountered = true;
405       parseStructuralElement();
406       break;
407     case tok::l_square:
408       if (Style.isCSharp()) {
409         nextToken();
410         parseCSharpAttribute();
411         break;
412       }
413       LLVM_FALLTHROUGH;
414     default:
415       parseStructuralElement();
416       break;
417     }
418   } while (!eof());
419 }
420 
421 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
422   // We'll parse forward through the tokens until we hit
423   // a closing brace or eof - note that getNextToken() will
424   // parse macros, so this will magically work inside macro
425   // definitions, too.
426   unsigned StoredPosition = Tokens->getPosition();
427   FormatToken *Tok = FormatTok;
428   const FormatToken *PrevTok = Tok->Previous;
429   // Keep a stack of positions of lbrace tokens. We will
430   // update information about whether an lbrace starts a
431   // braced init list or a different block during the loop.
432   SmallVector<FormatToken *, 8> LBraceStack;
433   assert(Tok->Tok.is(tok::l_brace));
434   do {
435     // Get next non-comment token.
436     FormatToken *NextTok;
437     unsigned ReadTokens = 0;
438     do {
439       NextTok = Tokens->getNextToken();
440       ++ReadTokens;
441     } while (NextTok->is(tok::comment));
442 
443     switch (Tok->Tok.getKind()) {
444     case tok::l_brace:
445       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
446         if (PrevTok->isOneOf(tok::colon, tok::less))
447           // A ':' indicates this code is in a type, or a braced list
448           // following a label in an object literal ({a: {b: 1}}).
449           // A '<' could be an object used in a comparison, but that is nonsense
450           // code (can never return true), so more likely it is a generic type
451           // argument (`X<{a: string; b: number}>`).
452           // The code below could be confused by semicolons between the
453           // individual members in a type member list, which would normally
454           // trigger BK_Block. In both cases, this must be parsed as an inline
455           // braced init.
456           Tok->BlockKind = BK_BracedInit;
457         else if (PrevTok->is(tok::r_paren))
458           // `) { }` can only occur in function or method declarations in JS.
459           Tok->BlockKind = BK_Block;
460       } else {
461         Tok->BlockKind = BK_Unknown;
462       }
463       LBraceStack.push_back(Tok);
464       break;
465     case tok::r_brace:
466       if (LBraceStack.empty())
467         break;
468       if (LBraceStack.back()->BlockKind == BK_Unknown) {
469         bool ProbablyBracedList = false;
470         if (Style.Language == FormatStyle::LK_Proto) {
471           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
472         } else {
473           // Using OriginalColumn to distinguish between ObjC methods and
474           // binary operators is a bit hacky.
475           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
476                                   NextTok->OriginalColumn == 0;
477 
478           // If there is a comma, semicolon or right paren after the closing
479           // brace, we assume this is a braced initializer list.  Note that
480           // regardless how we mark inner braces here, we will overwrite the
481           // BlockKind later if we parse a braced list (where all blocks
482           // inside are by default braced lists), or when we explicitly detect
483           // blocks (for example while parsing lambdas).
484           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
485           // braced list in JS.
486           ProbablyBracedList =
487               (Style.Language == FormatStyle::LK_JavaScript &&
488                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
489                                 Keywords.kw_as)) ||
490               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
491               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
492                                tok::r_paren, tok::r_square, tok::l_brace,
493                                tok::ellipsis) ||
494               (NextTok->is(tok::identifier) &&
495                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
496               (NextTok->is(tok::semi) &&
497                (!ExpectClassBody || LBraceStack.size() != 1)) ||
498               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
499           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
500             // We can have an array subscript after a braced init
501             // list, but C++11 attributes are expected after blocks.
502             NextTok = Tokens->getNextToken();
503             ++ReadTokens;
504             ProbablyBracedList = NextTok->isNot(tok::l_square);
505           }
506         }
507         if (ProbablyBracedList) {
508           Tok->BlockKind = BK_BracedInit;
509           LBraceStack.back()->BlockKind = BK_BracedInit;
510         } else {
511           Tok->BlockKind = BK_Block;
512           LBraceStack.back()->BlockKind = BK_Block;
513         }
514       }
515       LBraceStack.pop_back();
516       break;
517     case tok::identifier:
518       if (!Tok->is(TT_StatementMacro))
519         break;
520       LLVM_FALLTHROUGH;
521     case tok::at:
522     case tok::semi:
523     case tok::kw_if:
524     case tok::kw_while:
525     case tok::kw_for:
526     case tok::kw_switch:
527     case tok::kw_try:
528     case tok::kw___try:
529       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
530         LBraceStack.back()->BlockKind = BK_Block;
531       break;
532     default:
533       break;
534     }
535     PrevTok = Tok;
536     Tok = NextTok;
537   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
538 
539   // Assume other blocks for all unclosed opening braces.
540   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
541     if (LBraceStack[i]->BlockKind == BK_Unknown)
542       LBraceStack[i]->BlockKind = BK_Block;
543   }
544 
545   FormatTok = Tokens->setPosition(StoredPosition);
546 }
547 
548 template <class T>
549 static inline void hash_combine(std::size_t &seed, const T &v) {
550   std::hash<T> hasher;
551   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
552 }
553 
554 size_t UnwrappedLineParser::computePPHash() const {
555   size_t h = 0;
556   for (const auto &i : PPStack) {
557     hash_combine(h, size_t(i.Kind));
558     hash_combine(h, i.Line);
559   }
560   return h;
561 }
562 
563 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
564                                      bool MunchSemi) {
565   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
566          "'{' or macro block token expected");
567   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
568   FormatTok->BlockKind = BK_Block;
569 
570   size_t PPStartHash = computePPHash();
571 
572   unsigned InitialLevel = Line->Level;
573   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
574 
575   if (MacroBlock && FormatTok->is(tok::l_paren))
576     parseParens();
577 
578   size_t NbPreprocessorDirectives =
579       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
580   addUnwrappedLine();
581   size_t OpeningLineIndex =
582       CurrentLines->empty()
583           ? (UnwrappedLine::kInvalidIndex)
584           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
585 
586   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
587                                           MustBeDeclaration);
588   if (AddLevel)
589     ++Line->Level;
590   parseLevel(/*HasOpeningBrace=*/true);
591 
592   if (eof())
593     return;
594 
595   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
596                  : !FormatTok->is(tok::r_brace)) {
597     Line->Level = InitialLevel;
598     FormatTok->BlockKind = BK_Block;
599     return;
600   }
601 
602   size_t PPEndHash = computePPHash();
603 
604   // Munch the closing brace.
605   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
606 
607   if (MacroBlock && FormatTok->is(tok::l_paren))
608     parseParens();
609 
610   if (MunchSemi && FormatTok->Tok.is(tok::semi))
611     nextToken();
612   Line->Level = InitialLevel;
613 
614   if (PPStartHash == PPEndHash) {
615     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
616     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
617       // Update the opening line to add the forward reference as well
618       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
619           CurrentLines->size() - 1;
620     }
621   }
622 }
623 
624 static bool isGoogScope(const UnwrappedLine &Line) {
625   // FIXME: Closure-library specific stuff should not be hard-coded but be
626   // configurable.
627   if (Line.Tokens.size() < 4)
628     return false;
629   auto I = Line.Tokens.begin();
630   if (I->Tok->TokenText != "goog")
631     return false;
632   ++I;
633   if (I->Tok->isNot(tok::period))
634     return false;
635   ++I;
636   if (I->Tok->TokenText != "scope")
637     return false;
638   ++I;
639   return I->Tok->is(tok::l_paren);
640 }
641 
642 static bool isIIFE(const UnwrappedLine &Line,
643                    const AdditionalKeywords &Keywords) {
644   // Look for the start of an immediately invoked anonymous function.
645   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
646   // This is commonly done in JavaScript to create a new, anonymous scope.
647   // Example: (function() { ... })()
648   if (Line.Tokens.size() < 3)
649     return false;
650   auto I = Line.Tokens.begin();
651   if (I->Tok->isNot(tok::l_paren))
652     return false;
653   ++I;
654   if (I->Tok->isNot(Keywords.kw_function))
655     return false;
656   ++I;
657   return I->Tok->is(tok::l_paren);
658 }
659 
660 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
661                                    const FormatToken &InitialToken) {
662   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
663     return Style.BraceWrapping.AfterNamespace;
664   if (InitialToken.is(tok::kw_class))
665     return Style.BraceWrapping.AfterClass;
666   if (InitialToken.is(tok::kw_union))
667     return Style.BraceWrapping.AfterUnion;
668   if (InitialToken.is(tok::kw_struct))
669     return Style.BraceWrapping.AfterStruct;
670   return false;
671 }
672 
673 void UnwrappedLineParser::parseChildBlock() {
674   FormatTok->BlockKind = BK_Block;
675   nextToken();
676   {
677     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
678                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
679     ScopedLineState LineState(*this);
680     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
681                                             /*MustBeDeclaration=*/false);
682     Line->Level += SkipIndent ? 0 : 1;
683     parseLevel(/*HasOpeningBrace=*/true);
684     flushComments(isOnNewLine(*FormatTok));
685     Line->Level -= SkipIndent ? 0 : 1;
686   }
687   nextToken();
688 }
689 
690 void UnwrappedLineParser::parsePPDirective() {
691   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
692   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
693 
694   nextToken();
695 
696   if (!FormatTok->Tok.getIdentifierInfo()) {
697     parsePPUnknown();
698     return;
699   }
700 
701   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
702   case tok::pp_define:
703     parsePPDefine();
704     return;
705   case tok::pp_if:
706     parsePPIf(/*IfDef=*/false);
707     break;
708   case tok::pp_ifdef:
709   case tok::pp_ifndef:
710     parsePPIf(/*IfDef=*/true);
711     break;
712   case tok::pp_else:
713     parsePPElse();
714     break;
715   case tok::pp_elif:
716     parsePPElIf();
717     break;
718   case tok::pp_endif:
719     parsePPEndIf();
720     break;
721   default:
722     parsePPUnknown();
723     break;
724   }
725 }
726 
727 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
728   size_t Line = CurrentLines->size();
729   if (CurrentLines == &PreprocessorDirectives)
730     Line += Lines.size();
731 
732   if (Unreachable ||
733       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
734     PPStack.push_back({PP_Unreachable, Line});
735   else
736     PPStack.push_back({PP_Conditional, Line});
737 }
738 
739 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
740   ++PPBranchLevel;
741   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
742   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
743     PPLevelBranchIndex.push_back(0);
744     PPLevelBranchCount.push_back(0);
745   }
746   PPChainBranchIndex.push(0);
747   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
748   conditionalCompilationCondition(Unreachable || Skip);
749 }
750 
751 void UnwrappedLineParser::conditionalCompilationAlternative() {
752   if (!PPStack.empty())
753     PPStack.pop_back();
754   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
755   if (!PPChainBranchIndex.empty())
756     ++PPChainBranchIndex.top();
757   conditionalCompilationCondition(
758       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
759       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
760 }
761 
762 void UnwrappedLineParser::conditionalCompilationEnd() {
763   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
764   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
765     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
766       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
767     }
768   }
769   // Guard against #endif's without #if.
770   if (PPBranchLevel > -1)
771     --PPBranchLevel;
772   if (!PPChainBranchIndex.empty())
773     PPChainBranchIndex.pop();
774   if (!PPStack.empty())
775     PPStack.pop_back();
776 }
777 
778 void UnwrappedLineParser::parsePPIf(bool IfDef) {
779   bool IfNDef = FormatTok->is(tok::pp_ifndef);
780   nextToken();
781   bool Unreachable = false;
782   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
783     Unreachable = true;
784   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
785     Unreachable = true;
786   conditionalCompilationStart(Unreachable);
787   FormatToken *IfCondition = FormatTok;
788   // If there's a #ifndef on the first line, and the only lines before it are
789   // comments, it could be an include guard.
790   bool MaybeIncludeGuard = IfNDef;
791   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
792     for (auto &Line : Lines) {
793       if (!Line.Tokens.front().Tok->is(tok::comment)) {
794         MaybeIncludeGuard = false;
795         IncludeGuard = IG_Rejected;
796         break;
797       }
798     }
799   --PPBranchLevel;
800   parsePPUnknown();
801   ++PPBranchLevel;
802   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
803     IncludeGuard = IG_IfNdefed;
804     IncludeGuardToken = IfCondition;
805   }
806 }
807 
808 void UnwrappedLineParser::parsePPElse() {
809   // If a potential include guard has an #else, it's not an include guard.
810   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
811     IncludeGuard = IG_Rejected;
812   conditionalCompilationAlternative();
813   if (PPBranchLevel > -1)
814     --PPBranchLevel;
815   parsePPUnknown();
816   ++PPBranchLevel;
817 }
818 
819 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
820 
821 void UnwrappedLineParser::parsePPEndIf() {
822   conditionalCompilationEnd();
823   parsePPUnknown();
824   // If the #endif of a potential include guard is the last thing in the file,
825   // then we found an include guard.
826   unsigned TokenPosition = Tokens->getPosition();
827   FormatToken *PeekNext = AllTokens[TokenPosition];
828   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
829       PeekNext->is(tok::eof) &&
830       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
831     IncludeGuard = IG_Found;
832 }
833 
834 void UnwrappedLineParser::parsePPDefine() {
835   nextToken();
836 
837   if (!FormatTok->Tok.getIdentifierInfo()) {
838     IncludeGuard = IG_Rejected;
839     IncludeGuardToken = nullptr;
840     parsePPUnknown();
841     return;
842   }
843 
844   if (IncludeGuard == IG_IfNdefed &&
845       IncludeGuardToken->TokenText == FormatTok->TokenText) {
846     IncludeGuard = IG_Defined;
847     IncludeGuardToken = nullptr;
848     for (auto &Line : Lines) {
849       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
850         IncludeGuard = IG_Rejected;
851         break;
852       }
853     }
854   }
855 
856   nextToken();
857   if (FormatTok->Tok.getKind() == tok::l_paren &&
858       FormatTok->WhitespaceRange.getBegin() ==
859           FormatTok->WhitespaceRange.getEnd()) {
860     parseParens();
861   }
862   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
863     Line->Level += PPBranchLevel + 1;
864   addUnwrappedLine();
865   ++Line->Level;
866 
867   // Errors during a preprocessor directive can only affect the layout of the
868   // preprocessor directive, and thus we ignore them. An alternative approach
869   // would be to use the same approach we use on the file level (no
870   // re-indentation if there was a structural error) within the macro
871   // definition.
872   parseFile();
873 }
874 
875 void UnwrappedLineParser::parsePPUnknown() {
876   do {
877     nextToken();
878   } while (!eof());
879   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
880     Line->Level += PPBranchLevel + 1;
881   addUnwrappedLine();
882 }
883 
884 // Here we blacklist certain tokens that are not usually the first token in an
885 // unwrapped line. This is used in attempt to distinguish macro calls without
886 // trailing semicolons from other constructs split to several lines.
887 static bool tokenCanStartNewLine(const clang::Token &Tok) {
888   // Semicolon can be a null-statement, l_square can be a start of a macro or
889   // a C++11 attribute, but this doesn't seem to be common.
890   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
891          Tok.isNot(tok::l_square) &&
892          // Tokens that can only be used as binary operators and a part of
893          // overloaded operator names.
894          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
895          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
896          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
897          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
898          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
899          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
900          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
901          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
902          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
903          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
904          Tok.isNot(tok::lesslessequal) &&
905          // Colon is used in labels, base class lists, initializer lists,
906          // range-based for loops, ternary operator, but should never be the
907          // first token in an unwrapped line.
908          Tok.isNot(tok::colon) &&
909          // 'noexcept' is a trailing annotation.
910          Tok.isNot(tok::kw_noexcept);
911 }
912 
913 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
914                           const FormatToken *FormatTok) {
915   // FIXME: This returns true for C/C++ keywords like 'struct'.
916   return FormatTok->is(tok::identifier) &&
917          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
918           !FormatTok->isOneOf(
919               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
920               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
921               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
922               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
923               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
924               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
925               Keywords.kw_from));
926 }
927 
928 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
929                                  const FormatToken *FormatTok) {
930   return FormatTok->Tok.isLiteral() ||
931          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
932          mustBeJSIdent(Keywords, FormatTok);
933 }
934 
935 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
936 // when encountered after a value (see mustBeJSIdentOrValue).
937 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
938                            const FormatToken *FormatTok) {
939   return FormatTok->isOneOf(
940       tok::kw_return, Keywords.kw_yield,
941       // conditionals
942       tok::kw_if, tok::kw_else,
943       // loops
944       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
945       // switch/case
946       tok::kw_switch, tok::kw_case,
947       // exceptions
948       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
949       // declaration
950       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
951       Keywords.kw_async, Keywords.kw_function,
952       // import/export
953       Keywords.kw_import, tok::kw_export);
954 }
955 
956 // readTokenWithJavaScriptASI reads the next token and terminates the current
957 // line if JavaScript Automatic Semicolon Insertion must
958 // happen between the current token and the next token.
959 //
960 // This method is conservative - it cannot cover all edge cases of JavaScript,
961 // but only aims to correctly handle certain well known cases. It *must not*
962 // return true in speculative cases.
963 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
964   FormatToken *Previous = FormatTok;
965   readToken();
966   FormatToken *Next = FormatTok;
967 
968   bool IsOnSameLine =
969       CommentsBeforeNextToken.empty()
970           ? Next->NewlinesBefore == 0
971           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
972   if (IsOnSameLine)
973     return;
974 
975   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
976   bool PreviousStartsTemplateExpr =
977       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
978   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
979     // If the line contains an '@' sign, the previous token might be an
980     // annotation, which can precede another identifier/value.
981     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
982                               [](UnwrappedLineNode &LineNode) {
983                                 return LineNode.Tok->is(tok::at);
984                               }) != Line->Tokens.end();
985     if (HasAt)
986       return;
987   }
988   if (Next->is(tok::exclaim) && PreviousMustBeValue)
989     return addUnwrappedLine();
990   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
991   bool NextEndsTemplateExpr =
992       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
993   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
994       (PreviousMustBeValue ||
995        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
996                          tok::minusminus)))
997     return addUnwrappedLine();
998   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
999       isJSDeclOrStmt(Keywords, Next))
1000     return addUnwrappedLine();
1001 }
1002 
1003 void UnwrappedLineParser::parseStructuralElement() {
1004   assert(!FormatTok->is(tok::l_brace));
1005   if (Style.Language == FormatStyle::LK_TableGen &&
1006       FormatTok->is(tok::pp_include)) {
1007     nextToken();
1008     if (FormatTok->is(tok::string_literal))
1009       nextToken();
1010     addUnwrappedLine();
1011     return;
1012   }
1013   switch (FormatTok->Tok.getKind()) {
1014   case tok::kw_asm:
1015     nextToken();
1016     if (FormatTok->is(tok::l_brace)) {
1017       FormatTok->Type = TT_InlineASMBrace;
1018       nextToken();
1019       while (FormatTok && FormatTok->isNot(tok::eof)) {
1020         if (FormatTok->is(tok::r_brace)) {
1021           FormatTok->Type = TT_InlineASMBrace;
1022           nextToken();
1023           addUnwrappedLine();
1024           break;
1025         }
1026         FormatTok->Finalized = true;
1027         nextToken();
1028       }
1029     }
1030     break;
1031   case tok::kw_namespace:
1032     parseNamespace();
1033     return;
1034   case tok::kw_public:
1035   case tok::kw_protected:
1036   case tok::kw_private:
1037     if (Style.Language == FormatStyle::LK_Java ||
1038         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1039       nextToken();
1040     else
1041       parseAccessSpecifier();
1042     return;
1043   case tok::kw_if:
1044     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1045       // field/method declaration.
1046       break;
1047     parseIfThenElse();
1048     return;
1049   case tok::kw_for:
1050   case tok::kw_while:
1051     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1052       // field/method declaration.
1053       break;
1054     parseForOrWhileLoop();
1055     return;
1056   case tok::kw_do:
1057     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1058       // field/method declaration.
1059       break;
1060     parseDoWhile();
1061     return;
1062   case tok::kw_switch:
1063     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1064       // 'switch: string' field declaration.
1065       break;
1066     parseSwitch();
1067     return;
1068   case tok::kw_default:
1069     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1070       // 'default: string' field declaration.
1071       break;
1072     nextToken();
1073     if (FormatTok->is(tok::colon)) {
1074       parseLabel();
1075       return;
1076     }
1077     // e.g. "default void f() {}" in a Java interface.
1078     break;
1079   case tok::kw_case:
1080     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1081       // 'case: string' field declaration.
1082       break;
1083     parseCaseLabel();
1084     return;
1085   case tok::kw_try:
1086   case tok::kw___try:
1087     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1088       // field/method declaration.
1089       break;
1090     parseTryCatch();
1091     return;
1092   case tok::kw_extern:
1093     nextToken();
1094     if (FormatTok->Tok.is(tok::string_literal)) {
1095       nextToken();
1096       if (FormatTok->Tok.is(tok::l_brace)) {
1097         if (Style.BraceWrapping.AfterExternBlock) {
1098           addUnwrappedLine();
1099           parseBlock(/*MustBeDeclaration=*/true);
1100         } else {
1101           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1102         }
1103         addUnwrappedLine();
1104         return;
1105       }
1106     }
1107     break;
1108   case tok::kw_export:
1109     if (Style.Language == FormatStyle::LK_JavaScript) {
1110       parseJavaScriptEs6ImportExport();
1111       return;
1112     }
1113     if (!Style.isCpp())
1114       break;
1115     // Handle C++ "(inline|export) namespace".
1116     LLVM_FALLTHROUGH;
1117   case tok::kw_inline:
1118     nextToken();
1119     if (FormatTok->Tok.is(tok::kw_namespace)) {
1120       parseNamespace();
1121       return;
1122     }
1123     break;
1124   case tok::identifier:
1125     if (FormatTok->is(TT_ForEachMacro)) {
1126       parseForOrWhileLoop();
1127       return;
1128     }
1129     if (FormatTok->is(TT_MacroBlockBegin)) {
1130       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1131                  /*MunchSemi=*/false);
1132       return;
1133     }
1134     if (FormatTok->is(Keywords.kw_import)) {
1135       if (Style.Language == FormatStyle::LK_JavaScript) {
1136         parseJavaScriptEs6ImportExport();
1137         return;
1138       }
1139       if (Style.Language == FormatStyle::LK_Proto) {
1140         nextToken();
1141         if (FormatTok->is(tok::kw_public))
1142           nextToken();
1143         if (!FormatTok->is(tok::string_literal))
1144           return;
1145         nextToken();
1146         if (FormatTok->is(tok::semi))
1147           nextToken();
1148         addUnwrappedLine();
1149         return;
1150       }
1151     }
1152     if (Style.isCpp() &&
1153         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1154                            Keywords.kw_slots, Keywords.kw_qslots)) {
1155       nextToken();
1156       if (FormatTok->is(tok::colon)) {
1157         nextToken();
1158         addUnwrappedLine();
1159         return;
1160       }
1161     }
1162     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1163       parseStatementMacro();
1164       return;
1165     }
1166     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1167       parseNamespace();
1168       return;
1169     }
1170     // In all other cases, parse the declaration.
1171     break;
1172   default:
1173     break;
1174   }
1175   do {
1176     const FormatToken *Previous = FormatTok->Previous;
1177     switch (FormatTok->Tok.getKind()) {
1178     case tok::at:
1179       nextToken();
1180       if (FormatTok->Tok.is(tok::l_brace)) {
1181         nextToken();
1182         parseBracedList();
1183         break;
1184       } else if (Style.Language == FormatStyle::LK_Java &&
1185                  FormatTok->is(Keywords.kw_interface)) {
1186         nextToken();
1187         break;
1188       }
1189       switch (FormatTok->Tok.getObjCKeywordID()) {
1190       case tok::objc_public:
1191       case tok::objc_protected:
1192       case tok::objc_package:
1193       case tok::objc_private:
1194         return parseAccessSpecifier();
1195       case tok::objc_interface:
1196       case tok::objc_implementation:
1197         return parseObjCInterfaceOrImplementation();
1198       case tok::objc_protocol:
1199         if (parseObjCProtocol())
1200           return;
1201         break;
1202       case tok::objc_end:
1203         return; // Handled by the caller.
1204       case tok::objc_optional:
1205       case tok::objc_required:
1206         nextToken();
1207         addUnwrappedLine();
1208         return;
1209       case tok::objc_autoreleasepool:
1210         nextToken();
1211         if (FormatTok->Tok.is(tok::l_brace)) {
1212           if (Style.BraceWrapping.AfterControlStatement ==
1213               FormatStyle::BWACS_Always)
1214             addUnwrappedLine();
1215           parseBlock(/*MustBeDeclaration=*/false);
1216         }
1217         addUnwrappedLine();
1218         return;
1219       case tok::objc_synchronized:
1220         nextToken();
1221         if (FormatTok->Tok.is(tok::l_paren))
1222           // Skip synchronization object
1223           parseParens();
1224         if (FormatTok->Tok.is(tok::l_brace)) {
1225           if (Style.BraceWrapping.AfterControlStatement ==
1226               FormatStyle::BWACS_Always)
1227             addUnwrappedLine();
1228           parseBlock(/*MustBeDeclaration=*/false);
1229         }
1230         addUnwrappedLine();
1231         return;
1232       case tok::objc_try:
1233         // This branch isn't strictly necessary (the kw_try case below would
1234         // do this too after the tok::at is parsed above).  But be explicit.
1235         parseTryCatch();
1236         return;
1237       default:
1238         break;
1239       }
1240       break;
1241     case tok::kw_enum:
1242       // Ignore if this is part of "template <enum ...".
1243       if (Previous && Previous->is(tok::less)) {
1244         nextToken();
1245         break;
1246       }
1247 
1248       // parseEnum falls through and does not yet add an unwrapped line as an
1249       // enum definition can start a structural element.
1250       if (!parseEnum())
1251         break;
1252       // This only applies for C++.
1253       if (!Style.isCpp()) {
1254         addUnwrappedLine();
1255         return;
1256       }
1257       break;
1258     case tok::kw_typedef:
1259       nextToken();
1260       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1261                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1262                              Keywords.kw_CF_CLOSED_ENUM,
1263                              Keywords.kw_NS_CLOSED_ENUM))
1264         parseEnum();
1265       break;
1266     case tok::kw_struct:
1267     case tok::kw_union:
1268     case tok::kw_class:
1269       // parseRecord falls through and does not yet add an unwrapped line as a
1270       // record declaration or definition can start a structural element.
1271       parseRecord();
1272       // This does not apply for Java, JavaScript and C#.
1273       if (Style.Language == FormatStyle::LK_Java ||
1274           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1275         if (FormatTok->is(tok::semi))
1276           nextToken();
1277         addUnwrappedLine();
1278         return;
1279       }
1280       break;
1281     case tok::period:
1282       nextToken();
1283       // In Java, classes have an implicit static member "class".
1284       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1285           FormatTok->is(tok::kw_class))
1286         nextToken();
1287       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1288           FormatTok->Tok.getIdentifierInfo())
1289         // JavaScript only has pseudo keywords, all keywords are allowed to
1290         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1291         nextToken();
1292       break;
1293     case tok::semi:
1294       nextToken();
1295       addUnwrappedLine();
1296       return;
1297     case tok::r_brace:
1298       addUnwrappedLine();
1299       return;
1300     case tok::l_paren:
1301       parseParens();
1302       break;
1303     case tok::kw_operator:
1304       nextToken();
1305       if (FormatTok->isBinaryOperator())
1306         nextToken();
1307       break;
1308     case tok::caret:
1309       nextToken();
1310       if (FormatTok->Tok.isAnyIdentifier() ||
1311           FormatTok->isSimpleTypeSpecifier())
1312         nextToken();
1313       if (FormatTok->is(tok::l_paren))
1314         parseParens();
1315       if (FormatTok->is(tok::l_brace))
1316         parseChildBlock();
1317       break;
1318     case tok::l_brace:
1319       if (!tryToParseBracedList()) {
1320         // A block outside of parentheses must be the last part of a
1321         // structural element.
1322         // FIXME: Figure out cases where this is not true, and add projections
1323         // for them (the one we know is missing are lambdas).
1324         if (Style.BraceWrapping.AfterFunction)
1325           addUnwrappedLine();
1326         FormatTok->Type = TT_FunctionLBrace;
1327         parseBlock(/*MustBeDeclaration=*/false);
1328         addUnwrappedLine();
1329         return;
1330       }
1331       // Otherwise this was a braced init list, and the structural
1332       // element continues.
1333       break;
1334     case tok::kw_try:
1335       if (Style.Language == FormatStyle::LK_JavaScript &&
1336           Line->MustBeDeclaration) {
1337         // field/method declaration.
1338         nextToken();
1339         break;
1340       }
1341       // We arrive here when parsing function-try blocks.
1342       if (Style.BraceWrapping.AfterFunction)
1343         addUnwrappedLine();
1344       parseTryCatch();
1345       return;
1346     case tok::identifier: {
1347       if (FormatTok->is(TT_MacroBlockEnd)) {
1348         addUnwrappedLine();
1349         return;
1350       }
1351 
1352       // Function declarations (as opposed to function expressions) are parsed
1353       // on their own unwrapped line by continuing this loop. Function
1354       // expressions (functions that are not on their own line) must not create
1355       // a new unwrapped line, so they are special cased below.
1356       size_t TokenCount = Line->Tokens.size();
1357       if (Style.Language == FormatStyle::LK_JavaScript &&
1358           FormatTok->is(Keywords.kw_function) &&
1359           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1360                                                      Keywords.kw_async)))) {
1361         tryToParseJSFunction();
1362         break;
1363       }
1364       if ((Style.Language == FormatStyle::LK_JavaScript ||
1365            Style.Language == FormatStyle::LK_Java) &&
1366           FormatTok->is(Keywords.kw_interface)) {
1367         if (Style.Language == FormatStyle::LK_JavaScript) {
1368           // In JavaScript/TypeScript, "interface" can be used as a standalone
1369           // identifier, e.g. in `var interface = 1;`. If "interface" is
1370           // followed by another identifier, it is very like to be an actual
1371           // interface declaration.
1372           unsigned StoredPosition = Tokens->getPosition();
1373           FormatToken *Next = Tokens->getNextToken();
1374           FormatTok = Tokens->setPosition(StoredPosition);
1375           if (Next && !mustBeJSIdent(Keywords, Next)) {
1376             nextToken();
1377             break;
1378           }
1379         }
1380         parseRecord();
1381         addUnwrappedLine();
1382         return;
1383       }
1384 
1385       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1386         parseStatementMacro();
1387         return;
1388       }
1389 
1390       // See if the following token should start a new unwrapped line.
1391       StringRef Text = FormatTok->TokenText;
1392       nextToken();
1393 
1394       // JS doesn't have macros, and within classes colons indicate fields, not
1395       // labels.
1396       if (Style.Language == FormatStyle::LK_JavaScript)
1397         break;
1398 
1399       TokenCount = Line->Tokens.size();
1400       if (TokenCount == 1 ||
1401           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1402         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1403           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1404           parseLabel(!Style.IndentGotoLabels);
1405           return;
1406         }
1407         // Recognize function-like macro usages without trailing semicolon as
1408         // well as free-standing macros like Q_OBJECT.
1409         bool FunctionLike = FormatTok->is(tok::l_paren);
1410         if (FunctionLike)
1411           parseParens();
1412 
1413         bool FollowedByNewline =
1414             CommentsBeforeNextToken.empty()
1415                 ? FormatTok->NewlinesBefore > 0
1416                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1417 
1418         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1419             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1420           addUnwrappedLine();
1421           return;
1422         }
1423       }
1424       break;
1425     }
1426     case tok::equal:
1427       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1428       // TT_JsFatArrow. The always start an expression or a child block if
1429       // followed by a curly.
1430       if (FormatTok->is(TT_JsFatArrow)) {
1431         nextToken();
1432         if (FormatTok->is(tok::l_brace))
1433           parseChildBlock();
1434         break;
1435       }
1436 
1437       nextToken();
1438       if (FormatTok->Tok.is(tok::l_brace)) {
1439         nextToken();
1440         parseBracedList();
1441       } else if (Style.Language == FormatStyle::LK_Proto &&
1442                  FormatTok->Tok.is(tok::less)) {
1443         nextToken();
1444         parseBracedList(/*ContinueOnSemicolons=*/false,
1445                         /*ClosingBraceKind=*/tok::greater);
1446       }
1447       break;
1448     case tok::l_square:
1449       parseSquare();
1450       break;
1451     case tok::kw_new:
1452       parseNew();
1453       break;
1454     default:
1455       nextToken();
1456       break;
1457     }
1458   } while (!eof());
1459 }
1460 
1461 bool UnwrappedLineParser::tryToParseLambda() {
1462   if (!Style.isCpp()) {
1463     nextToken();
1464     return false;
1465   }
1466   assert(FormatTok->is(tok::l_square));
1467   FormatToken &LSquare = *FormatTok;
1468   if (!tryToParseLambdaIntroducer())
1469     return false;
1470 
1471   bool SeenArrow = false;
1472 
1473   while (FormatTok->isNot(tok::l_brace)) {
1474     if (FormatTok->isSimpleTypeSpecifier()) {
1475       nextToken();
1476       continue;
1477     }
1478     switch (FormatTok->Tok.getKind()) {
1479     case tok::l_brace:
1480       break;
1481     case tok::l_paren:
1482       parseParens();
1483       break;
1484     case tok::amp:
1485     case tok::star:
1486     case tok::kw_const:
1487     case tok::comma:
1488     case tok::less:
1489     case tok::greater:
1490     case tok::identifier:
1491     case tok::numeric_constant:
1492     case tok::coloncolon:
1493     case tok::kw_class:
1494     case tok::kw_mutable:
1495     case tok::kw_noexcept:
1496     case tok::kw_template:
1497     case tok::kw_typename:
1498       nextToken();
1499       break;
1500     // Specialization of a template with an integer parameter can contain
1501     // arithmetic, logical, comparison and ternary operators.
1502     //
1503     // FIXME: This also accepts sequences of operators that are not in the scope
1504     // of a template argument list.
1505     //
1506     // In a C++ lambda a template type can only occur after an arrow. We use
1507     // this as an heuristic to distinguish between Objective-C expressions
1508     // followed by an `a->b` expression, such as:
1509     // ([obj func:arg] + a->b)
1510     // Otherwise the code below would parse as a lambda.
1511     //
1512     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1513     // explicit template lists: []<bool b = true && false>(U &&u){}
1514     case tok::plus:
1515     case tok::minus:
1516     case tok::exclaim:
1517     case tok::tilde:
1518     case tok::slash:
1519     case tok::percent:
1520     case tok::lessless:
1521     case tok::pipe:
1522     case tok::pipepipe:
1523     case tok::ampamp:
1524     case tok::caret:
1525     case tok::equalequal:
1526     case tok::exclaimequal:
1527     case tok::greaterequal:
1528     case tok::lessequal:
1529     case tok::question:
1530     case tok::colon:
1531     case tok::kw_true:
1532     case tok::kw_false:
1533       if (SeenArrow) {
1534         nextToken();
1535         break;
1536       }
1537       return true;
1538     case tok::arrow:
1539       // This might or might not actually be a lambda arrow (this could be an
1540       // ObjC method invocation followed by a dereferencing arrow). We might
1541       // reset this back to TT_Unknown in TokenAnnotator.
1542       FormatTok->Type = TT_LambdaArrow;
1543       SeenArrow = true;
1544       nextToken();
1545       break;
1546     default:
1547       return true;
1548     }
1549   }
1550   FormatTok->Type = TT_LambdaLBrace;
1551   LSquare.Type = TT_LambdaLSquare;
1552   parseChildBlock();
1553   return true;
1554 }
1555 
1556 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1557   const FormatToken *Previous = FormatTok->Previous;
1558   if (Previous &&
1559       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1560                          tok::kw_delete, tok::l_square) ||
1561        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1562        Previous->isSimpleTypeSpecifier())) {
1563     nextToken();
1564     return false;
1565   }
1566   nextToken();
1567   if (FormatTok->is(tok::l_square)) {
1568     return false;
1569   }
1570   parseSquare(/*LambdaIntroducer=*/true);
1571   return true;
1572 }
1573 
1574 void UnwrappedLineParser::tryToParseJSFunction() {
1575   assert(FormatTok->is(Keywords.kw_function) ||
1576          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1577   if (FormatTok->is(Keywords.kw_async))
1578     nextToken();
1579   // Consume "function".
1580   nextToken();
1581 
1582   // Consume * (generator function). Treat it like C++'s overloaded operators.
1583   if (FormatTok->is(tok::star)) {
1584     FormatTok->Type = TT_OverloadedOperator;
1585     nextToken();
1586   }
1587 
1588   // Consume function name.
1589   if (FormatTok->is(tok::identifier))
1590     nextToken();
1591 
1592   if (FormatTok->isNot(tok::l_paren))
1593     return;
1594 
1595   // Parse formal parameter list.
1596   parseParens();
1597 
1598   if (FormatTok->is(tok::colon)) {
1599     // Parse a type definition.
1600     nextToken();
1601 
1602     // Eat the type declaration. For braced inline object types, balance braces,
1603     // otherwise just parse until finding an l_brace for the function body.
1604     if (FormatTok->is(tok::l_brace))
1605       tryToParseBracedList();
1606     else
1607       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1608         nextToken();
1609   }
1610 
1611   if (FormatTok->is(tok::semi))
1612     return;
1613 
1614   parseChildBlock();
1615 }
1616 
1617 bool UnwrappedLineParser::tryToParseBracedList() {
1618   if (FormatTok->BlockKind == BK_Unknown)
1619     calculateBraceTypes();
1620   assert(FormatTok->BlockKind != BK_Unknown);
1621   if (FormatTok->BlockKind == BK_Block)
1622     return false;
1623   nextToken();
1624   parseBracedList();
1625   return true;
1626 }
1627 
1628 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1629                                           tok::TokenKind ClosingBraceKind) {
1630   bool HasError = false;
1631 
1632   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1633   // replace this by using parseAssigmentExpression() inside.
1634   do {
1635     if (Style.isCSharp()) {
1636       if (FormatTok->is(TT_JsFatArrow)) {
1637         nextToken();
1638         // Fat arrows can be followed by simple expressions or by child blocks
1639         // in curly braces.
1640         if (FormatTok->is(tok::l_brace)) {
1641           parseChildBlock();
1642           continue;
1643         }
1644       }
1645     }
1646     if (Style.Language == FormatStyle::LK_JavaScript) {
1647       if (FormatTok->is(Keywords.kw_function) ||
1648           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1649         tryToParseJSFunction();
1650         continue;
1651       }
1652       if (FormatTok->is(TT_JsFatArrow)) {
1653         nextToken();
1654         // Fat arrows can be followed by simple expressions or by child blocks
1655         // in curly braces.
1656         if (FormatTok->is(tok::l_brace)) {
1657           parseChildBlock();
1658           continue;
1659         }
1660       }
1661       if (FormatTok->is(tok::l_brace)) {
1662         // Could be a method inside of a braced list `{a() { return 1; }}`.
1663         if (tryToParseBracedList())
1664           continue;
1665         parseChildBlock();
1666       }
1667     }
1668     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1669       nextToken();
1670       return !HasError;
1671     }
1672     switch (FormatTok->Tok.getKind()) {
1673     case tok::caret:
1674       nextToken();
1675       if (FormatTok->is(tok::l_brace)) {
1676         parseChildBlock();
1677       }
1678       break;
1679     case tok::l_square:
1680       if (Style.isCSharp())
1681         parseSquare();
1682       else
1683         tryToParseLambda();
1684       break;
1685     case tok::l_paren:
1686       parseParens();
1687       // JavaScript can just have free standing methods and getters/setters in
1688       // object literals. Detect them by a "{" following ")".
1689       if (Style.Language == FormatStyle::LK_JavaScript) {
1690         if (FormatTok->is(tok::l_brace))
1691           parseChildBlock();
1692         break;
1693       }
1694       break;
1695     case tok::l_brace:
1696       // Assume there are no blocks inside a braced init list apart
1697       // from the ones we explicitly parse out (like lambdas).
1698       FormatTok->BlockKind = BK_BracedInit;
1699       nextToken();
1700       parseBracedList();
1701       break;
1702     case tok::less:
1703       if (Style.Language == FormatStyle::LK_Proto) {
1704         nextToken();
1705         parseBracedList(/*ContinueOnSemicolons=*/false,
1706                         /*ClosingBraceKind=*/tok::greater);
1707       } else {
1708         nextToken();
1709       }
1710       break;
1711     case tok::semi:
1712       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1713       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1714       // used for error recovery if we have otherwise determined that this is
1715       // a braced list.
1716       if (Style.Language == FormatStyle::LK_JavaScript) {
1717         nextToken();
1718         break;
1719       }
1720       HasError = true;
1721       if (!ContinueOnSemicolons)
1722         return !HasError;
1723       nextToken();
1724       break;
1725     case tok::comma:
1726       nextToken();
1727       break;
1728     default:
1729       nextToken();
1730       break;
1731     }
1732   } while (!eof());
1733   return false;
1734 }
1735 
1736 void UnwrappedLineParser::parseParens() {
1737   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1738   nextToken();
1739   do {
1740     switch (FormatTok->Tok.getKind()) {
1741     case tok::l_paren:
1742       parseParens();
1743       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1744         parseChildBlock();
1745       break;
1746     case tok::r_paren:
1747       nextToken();
1748       return;
1749     case tok::r_brace:
1750       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1751       return;
1752     case tok::l_square:
1753       tryToParseLambda();
1754       break;
1755     case tok::l_brace:
1756       if (!tryToParseBracedList())
1757         parseChildBlock();
1758       break;
1759     case tok::at:
1760       nextToken();
1761       if (FormatTok->Tok.is(tok::l_brace)) {
1762         nextToken();
1763         parseBracedList();
1764       }
1765       break;
1766     case tok::kw_class:
1767       if (Style.Language == FormatStyle::LK_JavaScript)
1768         parseRecord(/*ParseAsExpr=*/true);
1769       else
1770         nextToken();
1771       break;
1772     case tok::identifier:
1773       if (Style.Language == FormatStyle::LK_JavaScript &&
1774           (FormatTok->is(Keywords.kw_function) ||
1775            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1776         tryToParseJSFunction();
1777       else
1778         nextToken();
1779       break;
1780     default:
1781       nextToken();
1782       break;
1783     }
1784   } while (!eof());
1785 }
1786 
1787 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1788   if (!LambdaIntroducer) {
1789     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1790     if (tryToParseLambda())
1791       return;
1792   }
1793   do {
1794     switch (FormatTok->Tok.getKind()) {
1795     case tok::l_paren:
1796       parseParens();
1797       break;
1798     case tok::r_square:
1799       nextToken();
1800       return;
1801     case tok::r_brace:
1802       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1803       return;
1804     case tok::l_square:
1805       parseSquare();
1806       break;
1807     case tok::l_brace: {
1808       if (!tryToParseBracedList())
1809         parseChildBlock();
1810       break;
1811     }
1812     case tok::at:
1813       nextToken();
1814       if (FormatTok->Tok.is(tok::l_brace)) {
1815         nextToken();
1816         parseBracedList();
1817       }
1818       break;
1819     default:
1820       nextToken();
1821       break;
1822     }
1823   } while (!eof());
1824 }
1825 
1826 void UnwrappedLineParser::parseIfThenElse() {
1827   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1828   nextToken();
1829   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1830     nextToken();
1831   if (FormatTok->Tok.is(tok::l_paren))
1832     parseParens();
1833   bool NeedsUnwrappedLine = false;
1834   if (FormatTok->Tok.is(tok::l_brace)) {
1835     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1836     parseBlock(/*MustBeDeclaration=*/false);
1837     if (Style.BraceWrapping.BeforeElse)
1838       addUnwrappedLine();
1839     else
1840       NeedsUnwrappedLine = true;
1841   } else {
1842     addUnwrappedLine();
1843     ++Line->Level;
1844     parseStructuralElement();
1845     --Line->Level;
1846   }
1847   if (FormatTok->Tok.is(tok::kw_else)) {
1848     nextToken();
1849     if (FormatTok->Tok.is(tok::l_brace)) {
1850       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1851       parseBlock(/*MustBeDeclaration=*/false);
1852       addUnwrappedLine();
1853     } else if (FormatTok->Tok.is(tok::kw_if)) {
1854       parseIfThenElse();
1855     } else {
1856       addUnwrappedLine();
1857       ++Line->Level;
1858       parseStructuralElement();
1859       if (FormatTok->is(tok::eof))
1860         addUnwrappedLine();
1861       --Line->Level;
1862     }
1863   } else if (NeedsUnwrappedLine) {
1864     addUnwrappedLine();
1865   }
1866 }
1867 
1868 void UnwrappedLineParser::parseTryCatch() {
1869   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1870   nextToken();
1871   bool NeedsUnwrappedLine = false;
1872   if (FormatTok->is(tok::colon)) {
1873     // We are in a function try block, what comes is an initializer list.
1874     nextToken();
1875 
1876     // In case identifiers were removed by clang-tidy, what might follow is
1877     // multiple commas in sequence - before the first identifier.
1878     while (FormatTok->is(tok::comma))
1879       nextToken();
1880 
1881     while (FormatTok->is(tok::identifier)) {
1882       nextToken();
1883       if (FormatTok->is(tok::l_paren))
1884         parseParens();
1885 
1886       // In case identifiers were removed by clang-tidy, what might follow is
1887       // multiple commas in sequence - after the first identifier.
1888       while (FormatTok->is(tok::comma))
1889         nextToken();
1890     }
1891   }
1892   // Parse try with resource.
1893   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1894     parseParens();
1895   }
1896   if (FormatTok->is(tok::l_brace)) {
1897     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1898     parseBlock(/*MustBeDeclaration=*/false);
1899     if (Style.BraceWrapping.BeforeCatch) {
1900       addUnwrappedLine();
1901     } else {
1902       NeedsUnwrappedLine = true;
1903     }
1904   } else if (!FormatTok->is(tok::kw_catch)) {
1905     // The C++ standard requires a compound-statement after a try.
1906     // If there's none, we try to assume there's a structuralElement
1907     // and try to continue.
1908     addUnwrappedLine();
1909     ++Line->Level;
1910     parseStructuralElement();
1911     --Line->Level;
1912   }
1913   while (1) {
1914     if (FormatTok->is(tok::at))
1915       nextToken();
1916     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1917                              tok::kw___finally) ||
1918           ((Style.Language == FormatStyle::LK_Java ||
1919             Style.Language == FormatStyle::LK_JavaScript) &&
1920            FormatTok->is(Keywords.kw_finally)) ||
1921           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1922            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1923       break;
1924     nextToken();
1925     while (FormatTok->isNot(tok::l_brace)) {
1926       if (FormatTok->is(tok::l_paren)) {
1927         parseParens();
1928         continue;
1929       }
1930       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1931         return;
1932       nextToken();
1933     }
1934     NeedsUnwrappedLine = false;
1935     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1936     parseBlock(/*MustBeDeclaration=*/false);
1937     if (Style.BraceWrapping.BeforeCatch)
1938       addUnwrappedLine();
1939     else
1940       NeedsUnwrappedLine = true;
1941   }
1942   if (NeedsUnwrappedLine)
1943     addUnwrappedLine();
1944 }
1945 
1946 void UnwrappedLineParser::parseNamespace() {
1947   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
1948          "'namespace' expected");
1949 
1950   const FormatToken &InitialToken = *FormatTok;
1951   nextToken();
1952   if (InitialToken.is(TT_NamespaceMacro)) {
1953     parseParens();
1954   } else {
1955     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
1956                               tok::l_square)) {
1957       if (FormatTok->is(tok::l_square))
1958         parseSquare();
1959       else
1960         nextToken();
1961     }
1962   }
1963   if (FormatTok->Tok.is(tok::l_brace)) {
1964     if (ShouldBreakBeforeBrace(Style, InitialToken))
1965       addUnwrappedLine();
1966 
1967     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1968                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1969                      DeclarationScopeStack.size() > 1);
1970     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1971     // Munch the semicolon after a namespace. This is more common than one would
1972     // think. Putting the semicolon into its own line is very ugly.
1973     if (FormatTok->Tok.is(tok::semi))
1974       nextToken();
1975     addUnwrappedLine();
1976   }
1977   // FIXME: Add error handling.
1978 }
1979 
1980 void UnwrappedLineParser::parseNew() {
1981   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1982   nextToken();
1983 
1984   if (Style.isCSharp()) {
1985     do {
1986       if (FormatTok->is(tok::l_brace))
1987         parseBracedList();
1988 
1989       if (FormatTok->isOneOf(tok::semi, tok::comma))
1990         return;
1991 
1992       nextToken();
1993     } while (!eof());
1994   }
1995 
1996   if (Style.Language != FormatStyle::LK_Java)
1997     return;
1998 
1999   // In Java, we can parse everything up to the parens, which aren't optional.
2000   do {
2001     // There should not be a ;, { or } before the new's open paren.
2002     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2003       return;
2004 
2005     // Consume the parens.
2006     if (FormatTok->is(tok::l_paren)) {
2007       parseParens();
2008 
2009       // If there is a class body of an anonymous class, consume that as child.
2010       if (FormatTok->is(tok::l_brace))
2011         parseChildBlock();
2012       return;
2013     }
2014     nextToken();
2015   } while (!eof());
2016 }
2017 
2018 void UnwrappedLineParser::parseForOrWhileLoop() {
2019   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2020          "'for', 'while' or foreach macro expected");
2021   nextToken();
2022   // JS' for await ( ...
2023   if (Style.Language == FormatStyle::LK_JavaScript &&
2024       FormatTok->is(Keywords.kw_await))
2025     nextToken();
2026   if (FormatTok->Tok.is(tok::l_paren))
2027     parseParens();
2028   if (FormatTok->Tok.is(tok::l_brace)) {
2029     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2030     parseBlock(/*MustBeDeclaration=*/false);
2031     addUnwrappedLine();
2032   } else {
2033     addUnwrappedLine();
2034     ++Line->Level;
2035     parseStructuralElement();
2036     --Line->Level;
2037   }
2038 }
2039 
2040 void UnwrappedLineParser::parseDoWhile() {
2041   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2042   nextToken();
2043   if (FormatTok->Tok.is(tok::l_brace)) {
2044     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2045     parseBlock(/*MustBeDeclaration=*/false);
2046     if (Style.BraceWrapping.IndentBraces)
2047       addUnwrappedLine();
2048   } else {
2049     addUnwrappedLine();
2050     ++Line->Level;
2051     parseStructuralElement();
2052     --Line->Level;
2053   }
2054 
2055   // FIXME: Add error handling.
2056   if (!FormatTok->Tok.is(tok::kw_while)) {
2057     addUnwrappedLine();
2058     return;
2059   }
2060 
2061   nextToken();
2062   parseStructuralElement();
2063 }
2064 
2065 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2066   nextToken();
2067   unsigned OldLineLevel = Line->Level;
2068   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2069     --Line->Level;
2070   if (LeftAlignLabel)
2071     Line->Level = 0;
2072   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2073       FormatTok->Tok.is(tok::l_brace)) {
2074     CompoundStatementIndenter Indenter(this, Line->Level,
2075                                        Style.BraceWrapping.AfterCaseLabel,
2076                                        Style.BraceWrapping.IndentBraces);
2077     parseBlock(/*MustBeDeclaration=*/false);
2078     if (FormatTok->Tok.is(tok::kw_break)) {
2079       if (Style.BraceWrapping.AfterControlStatement ==
2080           FormatStyle::BWACS_Always)
2081         addUnwrappedLine();
2082       parseStructuralElement();
2083     }
2084     addUnwrappedLine();
2085   } else {
2086     if (FormatTok->is(tok::semi))
2087       nextToken();
2088     addUnwrappedLine();
2089   }
2090   Line->Level = OldLineLevel;
2091   if (FormatTok->isNot(tok::l_brace)) {
2092     parseStructuralElement();
2093     addUnwrappedLine();
2094   }
2095 }
2096 
2097 void UnwrappedLineParser::parseCaseLabel() {
2098   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2099   // FIXME: fix handling of complex expressions here.
2100   do {
2101     nextToken();
2102   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2103   parseLabel();
2104 }
2105 
2106 void UnwrappedLineParser::parseSwitch() {
2107   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2108   nextToken();
2109   if (FormatTok->Tok.is(tok::l_paren))
2110     parseParens();
2111   if (FormatTok->Tok.is(tok::l_brace)) {
2112     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2113     parseBlock(/*MustBeDeclaration=*/false);
2114     addUnwrappedLine();
2115   } else {
2116     addUnwrappedLine();
2117     ++Line->Level;
2118     parseStructuralElement();
2119     --Line->Level;
2120   }
2121 }
2122 
2123 void UnwrappedLineParser::parseAccessSpecifier() {
2124   nextToken();
2125   // Understand Qt's slots.
2126   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2127     nextToken();
2128   // Otherwise, we don't know what it is, and we'd better keep the next token.
2129   if (FormatTok->Tok.is(tok::colon))
2130     nextToken();
2131   addUnwrappedLine();
2132 }
2133 
2134 bool UnwrappedLineParser::parseEnum() {
2135   // Won't be 'enum' for NS_ENUMs.
2136   if (FormatTok->Tok.is(tok::kw_enum))
2137     nextToken();
2138 
2139   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2140   // declarations. An "enum" keyword followed by a colon would be a syntax
2141   // error and thus assume it is just an identifier.
2142   if (Style.Language == FormatStyle::LK_JavaScript &&
2143       FormatTok->isOneOf(tok::colon, tok::question))
2144     return false;
2145 
2146   // In protobuf, "enum" can be used as a field name.
2147   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2148     return false;
2149 
2150   // Eat up enum class ...
2151   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2152     nextToken();
2153 
2154   while (FormatTok->Tok.getIdentifierInfo() ||
2155          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2156                             tok::greater, tok::comma, tok::question)) {
2157     nextToken();
2158     // We can have macros or attributes in between 'enum' and the enum name.
2159     if (FormatTok->is(tok::l_paren))
2160       parseParens();
2161     if (FormatTok->is(tok::identifier)) {
2162       nextToken();
2163       // If there are two identifiers in a row, this is likely an elaborate
2164       // return type. In Java, this can be "implements", etc.
2165       if (Style.isCpp() && FormatTok->is(tok::identifier))
2166         return false;
2167     }
2168   }
2169 
2170   // Just a declaration or something is wrong.
2171   if (FormatTok->isNot(tok::l_brace))
2172     return true;
2173   FormatTok->BlockKind = BK_Block;
2174 
2175   if (Style.Language == FormatStyle::LK_Java) {
2176     // Java enums are different.
2177     parseJavaEnumBody();
2178     return true;
2179   }
2180   if (Style.Language == FormatStyle::LK_Proto) {
2181     parseBlock(/*MustBeDeclaration=*/true);
2182     return true;
2183   }
2184 
2185   // Parse enum body.
2186   nextToken();
2187   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2188   if (HasError) {
2189     if (FormatTok->is(tok::semi))
2190       nextToken();
2191     addUnwrappedLine();
2192   }
2193   return true;
2194 
2195   // There is no addUnwrappedLine() here so that we fall through to parsing a
2196   // structural element afterwards. Thus, in "enum A {} n, m;",
2197   // "} n, m;" will end up in one unwrapped line.
2198 }
2199 
2200 void UnwrappedLineParser::parseJavaEnumBody() {
2201   // Determine whether the enum is simple, i.e. does not have a semicolon or
2202   // constants with class bodies. Simple enums can be formatted like braced
2203   // lists, contracted to a single line, etc.
2204   unsigned StoredPosition = Tokens->getPosition();
2205   bool IsSimple = true;
2206   FormatToken *Tok = Tokens->getNextToken();
2207   while (Tok) {
2208     if (Tok->is(tok::r_brace))
2209       break;
2210     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2211       IsSimple = false;
2212       break;
2213     }
2214     // FIXME: This will also mark enums with braces in the arguments to enum
2215     // constants as "not simple". This is probably fine in practice, though.
2216     Tok = Tokens->getNextToken();
2217   }
2218   FormatTok = Tokens->setPosition(StoredPosition);
2219 
2220   if (IsSimple) {
2221     nextToken();
2222     parseBracedList();
2223     addUnwrappedLine();
2224     return;
2225   }
2226 
2227   // Parse the body of a more complex enum.
2228   // First add a line for everything up to the "{".
2229   nextToken();
2230   addUnwrappedLine();
2231   ++Line->Level;
2232 
2233   // Parse the enum constants.
2234   while (FormatTok) {
2235     if (FormatTok->is(tok::l_brace)) {
2236       // Parse the constant's class body.
2237       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2238                  /*MunchSemi=*/false);
2239     } else if (FormatTok->is(tok::l_paren)) {
2240       parseParens();
2241     } else if (FormatTok->is(tok::comma)) {
2242       nextToken();
2243       addUnwrappedLine();
2244     } else if (FormatTok->is(tok::semi)) {
2245       nextToken();
2246       addUnwrappedLine();
2247       break;
2248     } else if (FormatTok->is(tok::r_brace)) {
2249       addUnwrappedLine();
2250       break;
2251     } else {
2252       nextToken();
2253     }
2254   }
2255 
2256   // Parse the class body after the enum's ";" if any.
2257   parseLevel(/*HasOpeningBrace=*/true);
2258   nextToken();
2259   --Line->Level;
2260   addUnwrappedLine();
2261 }
2262 
2263 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2264   const FormatToken &InitialToken = *FormatTok;
2265   nextToken();
2266 
2267   // The actual identifier can be a nested name specifier, and in macros
2268   // it is often token-pasted.
2269   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2270                             tok::kw___attribute, tok::kw___declspec,
2271                             tok::kw_alignas) ||
2272          ((Style.Language == FormatStyle::LK_Java ||
2273            Style.Language == FormatStyle::LK_JavaScript) &&
2274           FormatTok->isOneOf(tok::period, tok::comma))) {
2275     if (Style.Language == FormatStyle::LK_JavaScript &&
2276         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2277       // JavaScript/TypeScript supports inline object types in
2278       // extends/implements positions:
2279       //     class Foo implements {bar: number} { }
2280       nextToken();
2281       if (FormatTok->is(tok::l_brace)) {
2282         tryToParseBracedList();
2283         continue;
2284       }
2285     }
2286     bool IsNonMacroIdentifier =
2287         FormatTok->is(tok::identifier) &&
2288         FormatTok->TokenText != FormatTok->TokenText.upper();
2289     nextToken();
2290     // We can have macros or attributes in between 'class' and the class name.
2291     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2292       parseParens();
2293   }
2294 
2295   // Note that parsing away template declarations here leads to incorrectly
2296   // accepting function declarations as record declarations.
2297   // In general, we cannot solve this problem. Consider:
2298   // class A<int> B() {}
2299   // which can be a function definition or a class definition when B() is a
2300   // macro. If we find enough real-world cases where this is a problem, we
2301   // can parse for the 'template' keyword in the beginning of the statement,
2302   // and thus rule out the record production in case there is no template
2303   // (this would still leave us with an ambiguity between template function
2304   // and class declarations).
2305   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2306     while (!eof()) {
2307       if (FormatTok->is(tok::l_brace)) {
2308         calculateBraceTypes(/*ExpectClassBody=*/true);
2309         if (!tryToParseBracedList())
2310           break;
2311       }
2312       if (FormatTok->Tok.is(tok::semi))
2313         return;
2314       nextToken();
2315     }
2316   }
2317   if (FormatTok->Tok.is(tok::l_brace)) {
2318     if (ParseAsExpr) {
2319       parseChildBlock();
2320     } else {
2321       if (ShouldBreakBeforeBrace(Style, InitialToken))
2322         addUnwrappedLine();
2323 
2324       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2325                  /*MunchSemi=*/false);
2326     }
2327   }
2328   // There is no addUnwrappedLine() here so that we fall through to parsing a
2329   // structural element afterwards. Thus, in "class A {} n, m;",
2330   // "} n, m;" will end up in one unwrapped line.
2331 }
2332 
2333 void UnwrappedLineParser::parseObjCMethod() {
2334   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2335          "'(' or identifier expected.");
2336   do {
2337     if (FormatTok->Tok.is(tok::semi)) {
2338       nextToken();
2339       addUnwrappedLine();
2340       return;
2341     } else if (FormatTok->Tok.is(tok::l_brace)) {
2342       if (Style.BraceWrapping.AfterFunction)
2343         addUnwrappedLine();
2344       parseBlock(/*MustBeDeclaration=*/false);
2345       addUnwrappedLine();
2346       return;
2347     } else {
2348       nextToken();
2349     }
2350   } while (!eof());
2351 }
2352 
2353 void UnwrappedLineParser::parseObjCProtocolList() {
2354   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2355   do {
2356     nextToken();
2357     // Early exit in case someone forgot a close angle.
2358     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2359         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2360       return;
2361   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2362   nextToken(); // Skip '>'.
2363 }
2364 
2365 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2366   do {
2367     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2368       nextToken();
2369       addUnwrappedLine();
2370       break;
2371     }
2372     if (FormatTok->is(tok::l_brace)) {
2373       parseBlock(/*MustBeDeclaration=*/false);
2374       // In ObjC interfaces, nothing should be following the "}".
2375       addUnwrappedLine();
2376     } else if (FormatTok->is(tok::r_brace)) {
2377       // Ignore stray "}". parseStructuralElement doesn't consume them.
2378       nextToken();
2379       addUnwrappedLine();
2380     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2381       nextToken();
2382       parseObjCMethod();
2383     } else {
2384       parseStructuralElement();
2385     }
2386   } while (!eof());
2387 }
2388 
2389 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2390   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2391          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2392   nextToken();
2393   nextToken(); // interface name
2394 
2395   // @interface can be followed by a lightweight generic
2396   // specialization list, then either a base class or a category.
2397   if (FormatTok->Tok.is(tok::less)) {
2398     // Unlike protocol lists, generic parameterizations support
2399     // nested angles:
2400     //
2401     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2402     //     NSObject <NSCopying, NSSecureCoding>
2403     //
2404     // so we need to count how many open angles we have left.
2405     unsigned NumOpenAngles = 1;
2406     do {
2407       nextToken();
2408       // Early exit in case someone forgot a close angle.
2409       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2410           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2411         break;
2412       if (FormatTok->Tok.is(tok::less))
2413         ++NumOpenAngles;
2414       else if (FormatTok->Tok.is(tok::greater)) {
2415         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2416         --NumOpenAngles;
2417       }
2418     } while (!eof() && NumOpenAngles != 0);
2419     nextToken(); // Skip '>'.
2420   }
2421   if (FormatTok->Tok.is(tok::colon)) {
2422     nextToken();
2423     nextToken(); // base class name
2424   } else if (FormatTok->Tok.is(tok::l_paren))
2425     // Skip category, if present.
2426     parseParens();
2427 
2428   if (FormatTok->Tok.is(tok::less))
2429     parseObjCProtocolList();
2430 
2431   if (FormatTok->Tok.is(tok::l_brace)) {
2432     if (Style.BraceWrapping.AfterObjCDeclaration)
2433       addUnwrappedLine();
2434     parseBlock(/*MustBeDeclaration=*/true);
2435   }
2436 
2437   // With instance variables, this puts '}' on its own line.  Without instance
2438   // variables, this ends the @interface line.
2439   addUnwrappedLine();
2440 
2441   parseObjCUntilAtEnd();
2442 }
2443 
2444 // Returns true for the declaration/definition form of @protocol,
2445 // false for the expression form.
2446 bool UnwrappedLineParser::parseObjCProtocol() {
2447   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2448   nextToken();
2449 
2450   if (FormatTok->is(tok::l_paren))
2451     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2452     return false;
2453 
2454   // The definition/declaration form,
2455   // @protocol Foo
2456   // - (int)someMethod;
2457   // @end
2458 
2459   nextToken(); // protocol name
2460 
2461   if (FormatTok->Tok.is(tok::less))
2462     parseObjCProtocolList();
2463 
2464   // Check for protocol declaration.
2465   if (FormatTok->Tok.is(tok::semi)) {
2466     nextToken();
2467     addUnwrappedLine();
2468     return true;
2469   }
2470 
2471   addUnwrappedLine();
2472   parseObjCUntilAtEnd();
2473   return true;
2474 }
2475 
2476 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2477   bool IsImport = FormatTok->is(Keywords.kw_import);
2478   assert(IsImport || FormatTok->is(tok::kw_export));
2479   nextToken();
2480 
2481   // Consume the "default" in "export default class/function".
2482   if (FormatTok->is(tok::kw_default))
2483     nextToken();
2484 
2485   // Consume "async function", "function" and "default function", so that these
2486   // get parsed as free-standing JS functions, i.e. do not require a trailing
2487   // semicolon.
2488   if (FormatTok->is(Keywords.kw_async))
2489     nextToken();
2490   if (FormatTok->is(Keywords.kw_function)) {
2491     nextToken();
2492     return;
2493   }
2494 
2495   // For imports, `export *`, `export {...}`, consume the rest of the line up
2496   // to the terminating `;`. For everything else, just return and continue
2497   // parsing the structural element, i.e. the declaration or expression for
2498   // `export default`.
2499   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2500       !FormatTok->isStringLiteral())
2501     return;
2502 
2503   while (!eof()) {
2504     if (FormatTok->is(tok::semi))
2505       return;
2506     if (Line->Tokens.empty()) {
2507       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2508       // import statement should terminate.
2509       return;
2510     }
2511     if (FormatTok->is(tok::l_brace)) {
2512       FormatTok->BlockKind = BK_Block;
2513       nextToken();
2514       parseBracedList();
2515     } else {
2516       nextToken();
2517     }
2518   }
2519 }
2520 
2521 void UnwrappedLineParser::parseStatementMacro() {
2522   nextToken();
2523   if (FormatTok->is(tok::l_paren))
2524     parseParens();
2525   if (FormatTok->is(tok::semi))
2526     nextToken();
2527   addUnwrappedLine();
2528 }
2529 
2530 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2531                                                  StringRef Prefix = "") {
2532   llvm::dbgs() << Prefix << "Line(" << Line.Level
2533                << ", FSC=" << Line.FirstStartColumn << ")"
2534                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2535   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2536                                                     E = Line.Tokens.end();
2537        I != E; ++I) {
2538     llvm::dbgs() << I->Tok->Tok.getName() << "["
2539                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2540                  << "] ";
2541   }
2542   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2543                                                     E = Line.Tokens.end();
2544        I != E; ++I) {
2545     const UnwrappedLineNode &Node = *I;
2546     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2547              I = Node.Children.begin(),
2548              E = Node.Children.end();
2549          I != E; ++I) {
2550       printDebugInfo(*I, "\nChild: ");
2551     }
2552   }
2553   llvm::dbgs() << "\n";
2554 }
2555 
2556 void UnwrappedLineParser::addUnwrappedLine() {
2557   if (Line->Tokens.empty())
2558     return;
2559   LLVM_DEBUG({
2560     if (CurrentLines == &Lines)
2561       printDebugInfo(*Line);
2562   });
2563   CurrentLines->push_back(std::move(*Line));
2564   Line->Tokens.clear();
2565   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2566   Line->FirstStartColumn = 0;
2567   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2568     CurrentLines->append(
2569         std::make_move_iterator(PreprocessorDirectives.begin()),
2570         std::make_move_iterator(PreprocessorDirectives.end()));
2571     PreprocessorDirectives.clear();
2572   }
2573   // Disconnect the current token from the last token on the previous line.
2574   FormatTok->Previous = nullptr;
2575 }
2576 
2577 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2578 
2579 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2580   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2581          FormatTok.NewlinesBefore > 0;
2582 }
2583 
2584 // Checks if \p FormatTok is a line comment that continues the line comment
2585 // section on \p Line.
2586 static bool
2587 continuesLineCommentSection(const FormatToken &FormatTok,
2588                             const UnwrappedLine &Line,
2589                             const llvm::Regex &CommentPragmasRegex) {
2590   if (Line.Tokens.empty())
2591     return false;
2592 
2593   StringRef IndentContent = FormatTok.TokenText;
2594   if (FormatTok.TokenText.startswith("//") ||
2595       FormatTok.TokenText.startswith("/*"))
2596     IndentContent = FormatTok.TokenText.substr(2);
2597   if (CommentPragmasRegex.match(IndentContent))
2598     return false;
2599 
2600   // If Line starts with a line comment, then FormatTok continues the comment
2601   // section if its original column is greater or equal to the original start
2602   // column of the line.
2603   //
2604   // Define the min column token of a line as follows: if a line ends in '{' or
2605   // contains a '{' followed by a line comment, then the min column token is
2606   // that '{'. Otherwise, the min column token of the line is the first token of
2607   // the line.
2608   //
2609   // If Line starts with a token other than a line comment, then FormatTok
2610   // continues the comment section if its original column is greater than the
2611   // original start column of the min column token of the line.
2612   //
2613   // For example, the second line comment continues the first in these cases:
2614   //
2615   // // first line
2616   // // second line
2617   //
2618   // and:
2619   //
2620   // // first line
2621   //  // second line
2622   //
2623   // and:
2624   //
2625   // int i; // first line
2626   //  // second line
2627   //
2628   // and:
2629   //
2630   // do { // first line
2631   //      // second line
2632   //   int i;
2633   // } while (true);
2634   //
2635   // and:
2636   //
2637   // enum {
2638   //   a, // first line
2639   //    // second line
2640   //   b
2641   // };
2642   //
2643   // The second line comment doesn't continue the first in these cases:
2644   //
2645   //   // first line
2646   //  // second line
2647   //
2648   // and:
2649   //
2650   // int i; // first line
2651   // // second line
2652   //
2653   // and:
2654   //
2655   // do { // first line
2656   //   // second line
2657   //   int i;
2658   // } while (true);
2659   //
2660   // and:
2661   //
2662   // enum {
2663   //   a, // first line
2664   //   // second line
2665   // };
2666   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2667 
2668   // Scan for '{//'. If found, use the column of '{' as a min column for line
2669   // comment section continuation.
2670   const FormatToken *PreviousToken = nullptr;
2671   for (const UnwrappedLineNode &Node : Line.Tokens) {
2672     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2673         isLineComment(*Node.Tok)) {
2674       MinColumnToken = PreviousToken;
2675       break;
2676     }
2677     PreviousToken = Node.Tok;
2678 
2679     // Grab the last newline preceding a token in this unwrapped line.
2680     if (Node.Tok->NewlinesBefore > 0) {
2681       MinColumnToken = Node.Tok;
2682     }
2683   }
2684   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2685     MinColumnToken = PreviousToken;
2686   }
2687 
2688   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2689                               MinColumnToken);
2690 }
2691 
2692 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2693   bool JustComments = Line->Tokens.empty();
2694   for (SmallVectorImpl<FormatToken *>::const_iterator
2695            I = CommentsBeforeNextToken.begin(),
2696            E = CommentsBeforeNextToken.end();
2697        I != E; ++I) {
2698     // Line comments that belong to the same line comment section are put on the
2699     // same line since later we might want to reflow content between them.
2700     // Additional fine-grained breaking of line comment sections is controlled
2701     // by the class BreakableLineCommentSection in case it is desirable to keep
2702     // several line comment sections in the same unwrapped line.
2703     //
2704     // FIXME: Consider putting separate line comment sections as children to the
2705     // unwrapped line instead.
2706     (*I)->ContinuesLineCommentSection =
2707         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2708     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2709       addUnwrappedLine();
2710     pushToken(*I);
2711   }
2712   if (NewlineBeforeNext && JustComments)
2713     addUnwrappedLine();
2714   CommentsBeforeNextToken.clear();
2715 }
2716 
2717 void UnwrappedLineParser::nextToken(int LevelDifference) {
2718   if (eof())
2719     return;
2720   flushComments(isOnNewLine(*FormatTok));
2721   pushToken(FormatTok);
2722   FormatToken *Previous = FormatTok;
2723   if (Style.Language != FormatStyle::LK_JavaScript)
2724     readToken(LevelDifference);
2725   else
2726     readTokenWithJavaScriptASI();
2727   FormatTok->Previous = Previous;
2728 }
2729 
2730 void UnwrappedLineParser::distributeComments(
2731     const SmallVectorImpl<FormatToken *> &Comments,
2732     const FormatToken *NextTok) {
2733   // Whether or not a line comment token continues a line is controlled by
2734   // the method continuesLineCommentSection, with the following caveat:
2735   //
2736   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2737   // that each comment line from the trail is aligned with the next token, if
2738   // the next token exists. If a trail exists, the beginning of the maximal
2739   // trail is marked as a start of a new comment section.
2740   //
2741   // For example in this code:
2742   //
2743   // int a; // line about a
2744   //   // line 1 about b
2745   //   // line 2 about b
2746   //   int b;
2747   //
2748   // the two lines about b form a maximal trail, so there are two sections, the
2749   // first one consisting of the single comment "// line about a" and the
2750   // second one consisting of the next two comments.
2751   if (Comments.empty())
2752     return;
2753   bool ShouldPushCommentsInCurrentLine = true;
2754   bool HasTrailAlignedWithNextToken = false;
2755   unsigned StartOfTrailAlignedWithNextToken = 0;
2756   if (NextTok) {
2757     // We are skipping the first element intentionally.
2758     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2759       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2760         HasTrailAlignedWithNextToken = true;
2761         StartOfTrailAlignedWithNextToken = i;
2762       }
2763     }
2764   }
2765   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2766     FormatToken *FormatTok = Comments[i];
2767     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2768       FormatTok->ContinuesLineCommentSection = false;
2769     } else {
2770       FormatTok->ContinuesLineCommentSection =
2771           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2772     }
2773     if (!FormatTok->ContinuesLineCommentSection &&
2774         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2775       ShouldPushCommentsInCurrentLine = false;
2776     }
2777     if (ShouldPushCommentsInCurrentLine) {
2778       pushToken(FormatTok);
2779     } else {
2780       CommentsBeforeNextToken.push_back(FormatTok);
2781     }
2782   }
2783 }
2784 
2785 void UnwrappedLineParser::readToken(int LevelDifference) {
2786   SmallVector<FormatToken *, 1> Comments;
2787   do {
2788     FormatTok = Tokens->getNextToken();
2789     assert(FormatTok);
2790     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2791            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2792       distributeComments(Comments, FormatTok);
2793       Comments.clear();
2794       // If there is an unfinished unwrapped line, we flush the preprocessor
2795       // directives only after that unwrapped line was finished later.
2796       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2797       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2798       assert((LevelDifference >= 0 ||
2799               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2800              "LevelDifference makes Line->Level negative");
2801       Line->Level += LevelDifference;
2802       // Comments stored before the preprocessor directive need to be output
2803       // before the preprocessor directive, at the same level as the
2804       // preprocessor directive, as we consider them to apply to the directive.
2805       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2806           PPBranchLevel > 0)
2807         Line->Level += PPBranchLevel;
2808       flushComments(isOnNewLine(*FormatTok));
2809       parsePPDirective();
2810     }
2811     while (FormatTok->Type == TT_ConflictStart ||
2812            FormatTok->Type == TT_ConflictEnd ||
2813            FormatTok->Type == TT_ConflictAlternative) {
2814       if (FormatTok->Type == TT_ConflictStart) {
2815         conditionalCompilationStart(/*Unreachable=*/false);
2816       } else if (FormatTok->Type == TT_ConflictAlternative) {
2817         conditionalCompilationAlternative();
2818       } else if (FormatTok->Type == TT_ConflictEnd) {
2819         conditionalCompilationEnd();
2820       }
2821       FormatTok = Tokens->getNextToken();
2822       FormatTok->MustBreakBefore = true;
2823     }
2824 
2825     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2826         !Line->InPPDirective) {
2827       continue;
2828     }
2829 
2830     if (!FormatTok->Tok.is(tok::comment)) {
2831       distributeComments(Comments, FormatTok);
2832       Comments.clear();
2833       return;
2834     }
2835 
2836     Comments.push_back(FormatTok);
2837   } while (!eof());
2838 
2839   distributeComments(Comments, nullptr);
2840   Comments.clear();
2841 }
2842 
2843 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2844   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2845   if (MustBreakBeforeNextToken) {
2846     Line->Tokens.back().Tok->MustBreakBefore = true;
2847     MustBreakBeforeNextToken = false;
2848   }
2849 }
2850 
2851 } // end namespace format
2852 } // end namespace clang
2853