1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
27 class FormatTokenSource {
28 public:
29   virtual ~FormatTokenSource() {}
30   virtual FormatToken *getNextToken() = 0;
31 
32   virtual unsigned getPosition() = 0;
33   virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41                          bool MustBeDeclaration)
42       : Line(Line), Stack(Stack) {
43     Line.MustBeDeclaration = MustBeDeclaration;
44     Stack.push_back(MustBeDeclaration);
45   }
46   ~ScopedDeclarationState() {
47     Stack.pop_back();
48     if (!Stack.empty())
49       Line.MustBeDeclaration = Stack.back();
50     else
51       Line.MustBeDeclaration = true;
52   }
53 
54 private:
55   UnwrappedLine &Line;
56   std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     FakeEOF.Tok.startToken();
86     FakeEOF.Tok.setKind(tok::eof);
87     TokenSource = this;
88     Line.Level = 0;
89     Line.InPPDirective = true;
90   }
91 
92   ~ScopedMacroState() override {
93     TokenSource = PreviousTokenSource;
94     ResetToken = Token;
95     Line.InPPDirective = false;
96     Line.Level = PreviousLineLevel;
97   }
98 
99   FormatToken *getNextToken() override {
100     // The \c UnwrappedLineParser guards against this by never calling
101     // \c getNextToken() after it has encountered the first eof token.
102     assert(!eof());
103     PreviousToken = Token;
104     Token = PreviousTokenSource->getNextToken();
105     if (eof())
106       return &FakeEOF;
107     return Token;
108   }
109 
110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112   FormatToken *setPosition(unsigned Position) override {
113     PreviousToken = nullptr;
114     Token = PreviousTokenSource->setPosition(Position);
115     return Token;
116   }
117 
118 private:
119   bool eof() {
120     return Token && Token->HasUnescapedNewline &&
121            !continuesLineComment(*Token, PreviousToken,
122                                  /*MinColumnToken=*/PreviousToken);
123   }
124 
125   FormatToken FakeEOF;
126   UnwrappedLine &Line;
127   FormatTokenSource *&TokenSource;
128   FormatToken *&ResetToken;
129   unsigned PreviousLineLevel;
130   FormatTokenSource *PreviousTokenSource;
131 
132   FormatToken *Token;
133   FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
138 class ScopedLineState {
139 public:
140   ScopedLineState(UnwrappedLineParser &Parser,
141                   bool SwitchToPreprocessorLines = false)
142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143     if (SwitchToPreprocessorLines)
144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
145     else if (!Parser.Line->Tokens.empty())
146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147     PreBlockLine = std::move(Parser.Line);
148     Parser.Line = std::make_unique<UnwrappedLine>();
149     Parser.Line->Level = PreBlockLine->Level;
150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151   }
152 
153   ~ScopedLineState() {
154     if (!Parser.Line->Tokens.empty()) {
155       Parser.addUnwrappedLine();
156     }
157     assert(Parser.Line->Tokens.empty());
158     Parser.Line = std::move(PreBlockLine);
159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160       Parser.MustBreakBeforeNextToken = true;
161     Parser.CurrentLines = OriginalLines;
162   }
163 
164 private:
165   UnwrappedLineParser &Parser;
166 
167   std::unique_ptr<UnwrappedLine> PreBlockLine;
168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
171 class CompoundStatementIndenter {
172 public:
173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
174                             const FormatStyle &Style, unsigned &LineLevel)
175       : CompoundStatementIndenter(Parser, LineLevel,
176                                   Style.BraceWrapping.AfterControlStatement,
177                                   Style.BraceWrapping.IndentBraces) {}
178   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
179                             bool WrapBrace, bool IndentBrace)
180       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
181     if (WrapBrace)
182       Parser->addUnwrappedLine();
183     if (IndentBrace)
184       ++LineLevel;
185   }
186   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
187 
188 private:
189   unsigned &LineLevel;
190   unsigned OldLineLevel;
191 };
192 
193 namespace {
194 
195 class IndexedTokenSource : public FormatTokenSource {
196 public:
197   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
198       : Tokens(Tokens), Position(-1) {}
199 
200   FormatToken *getNextToken() override {
201     ++Position;
202     return Tokens[Position];
203   }
204 
205   unsigned getPosition() override {
206     assert(Position >= 0);
207     return Position;
208   }
209 
210   FormatToken *setPosition(unsigned P) override {
211     Position = P;
212     return Tokens[Position];
213   }
214 
215   void reset() { Position = -1; }
216 
217 private:
218   ArrayRef<FormatToken *> Tokens;
219   int Position;
220 };
221 
222 } // end anonymous namespace
223 
224 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
225                                          const AdditionalKeywords &Keywords,
226                                          unsigned FirstStartColumn,
227                                          ArrayRef<FormatToken *> Tokens,
228                                          UnwrappedLineConsumer &Callback)
229     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
230       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
231       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
232       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
233       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
234                        ? IG_Rejected
235                        : IG_Inited),
236       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
241                      ? IG_Rejected
242                      : IG_Inited;
243   IncludeGuardToken = nullptr;
244   Line.reset(new UnwrappedLine);
245   CommentsBeforeNextToken.clear();
246   FormatTok = nullptr;
247   MustBreakBeforeNextToken = false;
248   PreprocessorDirectives.clear();
249   CurrentLines = &Lines;
250   DeclarationScopeStack.clear();
251   PPStack.clear();
252   Line->FirstStartColumn = FirstStartColumn;
253 }
254 
255 void UnwrappedLineParser::parse() {
256   IndexedTokenSource TokenSource(AllTokens);
257   Line->FirstStartColumn = FirstStartColumn;
258   do {
259     LLVM_DEBUG(llvm::dbgs() << "----\n");
260     reset();
261     Tokens = &TokenSource;
262     TokenSource.reset();
263 
264     readToken();
265     parseFile();
266 
267     // If we found an include guard then all preprocessor directives (other than
268     // the guard) are over-indented by one.
269     if (IncludeGuard == IG_Found)
270       for (auto &Line : Lines)
271         if (Line.InPPDirective && Line.Level > 0)
272           --Line.Level;
273 
274     // Create line with eof token.
275     pushToken(FormatTok);
276     addUnwrappedLine();
277 
278     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
279                                                   E = Lines.end();
280          I != E; ++I) {
281       Callback.consumeUnwrappedLine(*I);
282     }
283     Callback.finishRun();
284     Lines.clear();
285     while (!PPLevelBranchIndex.empty() &&
286            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
287       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
288       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
289     }
290     if (!PPLevelBranchIndex.empty()) {
291       ++PPLevelBranchIndex.back();
292       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
293       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
294     }
295   } while (!PPLevelBranchIndex.empty());
296 }
297 
298 void UnwrappedLineParser::parseFile() {
299   // The top-level context in a file always has declarations, except for pre-
300   // processor directives and JavaScript files.
301   bool MustBeDeclaration =
302       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
303   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
304                                           MustBeDeclaration);
305   if (Style.Language == FormatStyle::LK_TextProto)
306     parseBracedList();
307   else
308     parseLevel(/*HasOpeningBrace=*/false);
309   // Make sure to format the remaining tokens.
310   //
311   // LK_TextProto is special since its top-level is parsed as the body of a
312   // braced list, which does not necessarily have natural line separators such
313   // as a semicolon. Comments after the last entry that have been determined to
314   // not belong to that line, as in:
315   //   key: value
316   //   // endfile comment
317   // do not have a chance to be put on a line of their own until this point.
318   // Here we add this newline before end-of-file comments.
319   if (Style.Language == FormatStyle::LK_TextProto &&
320       !CommentsBeforeNextToken.empty())
321     addUnwrappedLine();
322   flushComments(true);
323   addUnwrappedLine();
324 }
325 
326 void UnwrappedLineParser::parseCSharpAttribute() {
327   int UnpairedSquareBrackets = 1;
328   do {
329     switch (FormatTok->Tok.getKind()) {
330     case tok::r_square:
331       nextToken();
332       --UnpairedSquareBrackets;
333       if (UnpairedSquareBrackets == 0) {
334         addUnwrappedLine();
335         return;
336       }
337       break;
338     case tok::l_square:
339       ++UnpairedSquareBrackets;
340       nextToken();
341       break;
342     default:
343       nextToken();
344       break;
345     }
346   } while (!eof());
347 }
348 
349 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
350   bool SwitchLabelEncountered = false;
351   do {
352     tok::TokenKind kind = FormatTok->Tok.getKind();
353     if (FormatTok->Type == TT_MacroBlockBegin) {
354       kind = tok::l_brace;
355     } else if (FormatTok->Type == TT_MacroBlockEnd) {
356       kind = tok::r_brace;
357     }
358 
359     switch (kind) {
360     case tok::comment:
361       nextToken();
362       addUnwrappedLine();
363       break;
364     case tok::l_brace:
365       // FIXME: Add parameter whether this can happen - if this happens, we must
366       // be in a non-declaration context.
367       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
368         continue;
369       parseBlock(/*MustBeDeclaration=*/false);
370       addUnwrappedLine();
371       break;
372     case tok::r_brace:
373       if (HasOpeningBrace)
374         return;
375       nextToken();
376       addUnwrappedLine();
377       break;
378     case tok::kw_default: {
379       unsigned StoredPosition = Tokens->getPosition();
380       FormatToken *Next;
381       do {
382         Next = Tokens->getNextToken();
383       } while (Next && Next->is(tok::comment));
384       FormatTok = Tokens->setPosition(StoredPosition);
385       if (Next && Next->isNot(tok::colon)) {
386         // default not followed by ':' is not a case label; treat it like
387         // an identifier.
388         parseStructuralElement();
389         break;
390       }
391       // Else, if it is 'default:', fall through to the case handling.
392       LLVM_FALLTHROUGH;
393     }
394     case tok::kw_case:
395       if (Style.Language == FormatStyle::LK_JavaScript &&
396           Line->MustBeDeclaration) {
397         // A 'case: string' style field declaration.
398         parseStructuralElement();
399         break;
400       }
401       if (!SwitchLabelEncountered &&
402           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
403         ++Line->Level;
404       SwitchLabelEncountered = true;
405       parseStructuralElement();
406       break;
407     case tok::l_square:
408       if (Style.isCSharp()) {
409         nextToken();
410         parseCSharpAttribute();
411         break;
412       }
413       LLVM_FALLTHROUGH;
414     default:
415       parseStructuralElement();
416       break;
417     }
418   } while (!eof());
419 }
420 
421 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
422   // We'll parse forward through the tokens until we hit
423   // a closing brace or eof - note that getNextToken() will
424   // parse macros, so this will magically work inside macro
425   // definitions, too.
426   unsigned StoredPosition = Tokens->getPosition();
427   FormatToken *Tok = FormatTok;
428   const FormatToken *PrevTok = Tok->Previous;
429   // Keep a stack of positions of lbrace tokens. We will
430   // update information about whether an lbrace starts a
431   // braced init list or a different block during the loop.
432   SmallVector<FormatToken *, 8> LBraceStack;
433   assert(Tok->Tok.is(tok::l_brace));
434   do {
435     // Get next non-comment token.
436     FormatToken *NextTok;
437     unsigned ReadTokens = 0;
438     do {
439       NextTok = Tokens->getNextToken();
440       ++ReadTokens;
441     } while (NextTok->is(tok::comment));
442 
443     switch (Tok->Tok.getKind()) {
444     case tok::l_brace:
445       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
446         if (PrevTok->isOneOf(tok::colon, tok::less))
447           // A ':' indicates this code is in a type, or a braced list
448           // following a label in an object literal ({a: {b: 1}}).
449           // A '<' could be an object used in a comparison, but that is nonsense
450           // code (can never return true), so more likely it is a generic type
451           // argument (`X<{a: string; b: number}>`).
452           // The code below could be confused by semicolons between the
453           // individual members in a type member list, which would normally
454           // trigger BK_Block. In both cases, this must be parsed as an inline
455           // braced init.
456           Tok->BlockKind = BK_BracedInit;
457         else if (PrevTok->is(tok::r_paren))
458           // `) { }` can only occur in function or method declarations in JS.
459           Tok->BlockKind = BK_Block;
460       } else {
461         Tok->BlockKind = BK_Unknown;
462       }
463       LBraceStack.push_back(Tok);
464       break;
465     case tok::r_brace:
466       if (LBraceStack.empty())
467         break;
468       if (LBraceStack.back()->BlockKind == BK_Unknown) {
469         bool ProbablyBracedList = false;
470         if (Style.Language == FormatStyle::LK_Proto) {
471           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
472         } else {
473           // Using OriginalColumn to distinguish between ObjC methods and
474           // binary operators is a bit hacky.
475           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
476                                   NextTok->OriginalColumn == 0;
477 
478           // If there is a comma, semicolon or right paren after the closing
479           // brace, we assume this is a braced initializer list.  Note that
480           // regardless how we mark inner braces here, we will overwrite the
481           // BlockKind later if we parse a braced list (where all blocks
482           // inside are by default braced lists), or when we explicitly detect
483           // blocks (for example while parsing lambdas).
484           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
485           // braced list in JS.
486           ProbablyBracedList =
487               (Style.Language == FormatStyle::LK_JavaScript &&
488                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
489                                 Keywords.kw_as)) ||
490               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
491               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
492                                tok::r_paren, tok::r_square, tok::l_brace,
493                                tok::ellipsis) ||
494               (NextTok->is(tok::identifier) &&
495                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
496               (NextTok->is(tok::semi) &&
497                (!ExpectClassBody || LBraceStack.size() != 1)) ||
498               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
499           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
500             // We can have an array subscript after a braced init
501             // list, but C++11 attributes are expected after blocks.
502             NextTok = Tokens->getNextToken();
503             ++ReadTokens;
504             ProbablyBracedList = NextTok->isNot(tok::l_square);
505           }
506         }
507         if (ProbablyBracedList) {
508           Tok->BlockKind = BK_BracedInit;
509           LBraceStack.back()->BlockKind = BK_BracedInit;
510         } else {
511           Tok->BlockKind = BK_Block;
512           LBraceStack.back()->BlockKind = BK_Block;
513         }
514       }
515       LBraceStack.pop_back();
516       break;
517     case tok::identifier:
518       if (!Tok->is(TT_StatementMacro))
519         break;
520       LLVM_FALLTHROUGH;
521     case tok::at:
522     case tok::semi:
523     case tok::kw_if:
524     case tok::kw_while:
525     case tok::kw_for:
526     case tok::kw_switch:
527     case tok::kw_try:
528     case tok::kw___try:
529       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
530         LBraceStack.back()->BlockKind = BK_Block;
531       break;
532     default:
533       break;
534     }
535     PrevTok = Tok;
536     Tok = NextTok;
537   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
538 
539   // Assume other blocks for all unclosed opening braces.
540   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
541     if (LBraceStack[i]->BlockKind == BK_Unknown)
542       LBraceStack[i]->BlockKind = BK_Block;
543   }
544 
545   FormatTok = Tokens->setPosition(StoredPosition);
546 }
547 
548 template <class T>
549 static inline void hash_combine(std::size_t &seed, const T &v) {
550   std::hash<T> hasher;
551   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
552 }
553 
554 size_t UnwrappedLineParser::computePPHash() const {
555   size_t h = 0;
556   for (const auto &i : PPStack) {
557     hash_combine(h, size_t(i.Kind));
558     hash_combine(h, i.Line);
559   }
560   return h;
561 }
562 
563 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
564                                      bool MunchSemi) {
565   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
566          "'{' or macro block token expected");
567   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
568   FormatTok->BlockKind = BK_Block;
569 
570   size_t PPStartHash = computePPHash();
571 
572   unsigned InitialLevel = Line->Level;
573   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
574 
575   if (MacroBlock && FormatTok->is(tok::l_paren))
576     parseParens();
577 
578   size_t NbPreprocessorDirectives =
579       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
580   addUnwrappedLine();
581   size_t OpeningLineIndex =
582       CurrentLines->empty()
583           ? (UnwrappedLine::kInvalidIndex)
584           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
585 
586   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
587                                           MustBeDeclaration);
588   if (AddLevel)
589     ++Line->Level;
590   parseLevel(/*HasOpeningBrace=*/true);
591 
592   if (eof())
593     return;
594 
595   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
596                  : !FormatTok->is(tok::r_brace)) {
597     Line->Level = InitialLevel;
598     FormatTok->BlockKind = BK_Block;
599     return;
600   }
601 
602   size_t PPEndHash = computePPHash();
603 
604   // Munch the closing brace.
605   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
606 
607   if (MacroBlock && FormatTok->is(tok::l_paren))
608     parseParens();
609 
610   if (MunchSemi && FormatTok->Tok.is(tok::semi))
611     nextToken();
612   Line->Level = InitialLevel;
613 
614   if (PPStartHash == PPEndHash) {
615     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
616     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
617       // Update the opening line to add the forward reference as well
618       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
619           CurrentLines->size() - 1;
620     }
621   }
622 }
623 
624 static bool isGoogScope(const UnwrappedLine &Line) {
625   // FIXME: Closure-library specific stuff should not be hard-coded but be
626   // configurable.
627   if (Line.Tokens.size() < 4)
628     return false;
629   auto I = Line.Tokens.begin();
630   if (I->Tok->TokenText != "goog")
631     return false;
632   ++I;
633   if (I->Tok->isNot(tok::period))
634     return false;
635   ++I;
636   if (I->Tok->TokenText != "scope")
637     return false;
638   ++I;
639   return I->Tok->is(tok::l_paren);
640 }
641 
642 static bool isIIFE(const UnwrappedLine &Line,
643                    const AdditionalKeywords &Keywords) {
644   // Look for the start of an immediately invoked anonymous function.
645   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
646   // This is commonly done in JavaScript to create a new, anonymous scope.
647   // Example: (function() { ... })()
648   if (Line.Tokens.size() < 3)
649     return false;
650   auto I = Line.Tokens.begin();
651   if (I->Tok->isNot(tok::l_paren))
652     return false;
653   ++I;
654   if (I->Tok->isNot(Keywords.kw_function))
655     return false;
656   ++I;
657   return I->Tok->is(tok::l_paren);
658 }
659 
660 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
661                                    const FormatToken &InitialToken) {
662   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
663     return Style.BraceWrapping.AfterNamespace;
664   if (InitialToken.is(tok::kw_class))
665     return Style.BraceWrapping.AfterClass;
666   if (InitialToken.is(tok::kw_union))
667     return Style.BraceWrapping.AfterUnion;
668   if (InitialToken.is(tok::kw_struct))
669     return Style.BraceWrapping.AfterStruct;
670   return false;
671 }
672 
673 void UnwrappedLineParser::parseChildBlock() {
674   FormatTok->BlockKind = BK_Block;
675   nextToken();
676   {
677     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
678                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
679     ScopedLineState LineState(*this);
680     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
681                                             /*MustBeDeclaration=*/false);
682     Line->Level += SkipIndent ? 0 : 1;
683     parseLevel(/*HasOpeningBrace=*/true);
684     flushComments(isOnNewLine(*FormatTok));
685     Line->Level -= SkipIndent ? 0 : 1;
686   }
687   nextToken();
688 }
689 
690 void UnwrappedLineParser::parsePPDirective() {
691   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
692   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
693 
694   nextToken();
695 
696   if (!FormatTok->Tok.getIdentifierInfo()) {
697     parsePPUnknown();
698     return;
699   }
700 
701   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
702   case tok::pp_define:
703     parsePPDefine();
704     return;
705   case tok::pp_if:
706     parsePPIf(/*IfDef=*/false);
707     break;
708   case tok::pp_ifdef:
709   case tok::pp_ifndef:
710     parsePPIf(/*IfDef=*/true);
711     break;
712   case tok::pp_else:
713     parsePPElse();
714     break;
715   case tok::pp_elif:
716     parsePPElIf();
717     break;
718   case tok::pp_endif:
719     parsePPEndIf();
720     break;
721   default:
722     parsePPUnknown();
723     break;
724   }
725 }
726 
727 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
728   size_t Line = CurrentLines->size();
729   if (CurrentLines == &PreprocessorDirectives)
730     Line += Lines.size();
731 
732   if (Unreachable ||
733       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
734     PPStack.push_back({PP_Unreachable, Line});
735   else
736     PPStack.push_back({PP_Conditional, Line});
737 }
738 
739 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
740   ++PPBranchLevel;
741   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
742   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
743     PPLevelBranchIndex.push_back(0);
744     PPLevelBranchCount.push_back(0);
745   }
746   PPChainBranchIndex.push(0);
747   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
748   conditionalCompilationCondition(Unreachable || Skip);
749 }
750 
751 void UnwrappedLineParser::conditionalCompilationAlternative() {
752   if (!PPStack.empty())
753     PPStack.pop_back();
754   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
755   if (!PPChainBranchIndex.empty())
756     ++PPChainBranchIndex.top();
757   conditionalCompilationCondition(
758       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
759       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
760 }
761 
762 void UnwrappedLineParser::conditionalCompilationEnd() {
763   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
764   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
765     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
766       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
767     }
768   }
769   // Guard against #endif's without #if.
770   if (PPBranchLevel > -1)
771     --PPBranchLevel;
772   if (!PPChainBranchIndex.empty())
773     PPChainBranchIndex.pop();
774   if (!PPStack.empty())
775     PPStack.pop_back();
776 }
777 
778 void UnwrappedLineParser::parsePPIf(bool IfDef) {
779   bool IfNDef = FormatTok->is(tok::pp_ifndef);
780   nextToken();
781   bool Unreachable = false;
782   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
783     Unreachable = true;
784   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
785     Unreachable = true;
786   conditionalCompilationStart(Unreachable);
787   FormatToken *IfCondition = FormatTok;
788   // If there's a #ifndef on the first line, and the only lines before it are
789   // comments, it could be an include guard.
790   bool MaybeIncludeGuard = IfNDef;
791   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
792     for (auto &Line : Lines) {
793       if (!Line.Tokens.front().Tok->is(tok::comment)) {
794         MaybeIncludeGuard = false;
795         IncludeGuard = IG_Rejected;
796         break;
797       }
798     }
799   --PPBranchLevel;
800   parsePPUnknown();
801   ++PPBranchLevel;
802   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
803     IncludeGuard = IG_IfNdefed;
804     IncludeGuardToken = IfCondition;
805   }
806 }
807 
808 void UnwrappedLineParser::parsePPElse() {
809   // If a potential include guard has an #else, it's not an include guard.
810   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
811     IncludeGuard = IG_Rejected;
812   conditionalCompilationAlternative();
813   if (PPBranchLevel > -1)
814     --PPBranchLevel;
815   parsePPUnknown();
816   ++PPBranchLevel;
817 }
818 
819 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
820 
821 void UnwrappedLineParser::parsePPEndIf() {
822   conditionalCompilationEnd();
823   parsePPUnknown();
824   // If the #endif of a potential include guard is the last thing in the file,
825   // then we found an include guard.
826   unsigned TokenPosition = Tokens->getPosition();
827   FormatToken *PeekNext = AllTokens[TokenPosition];
828   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
829       PeekNext->is(tok::eof) &&
830       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
831     IncludeGuard = IG_Found;
832 }
833 
834 void UnwrappedLineParser::parsePPDefine() {
835   nextToken();
836 
837   if (!FormatTok->Tok.getIdentifierInfo()) {
838     IncludeGuard = IG_Rejected;
839     IncludeGuardToken = nullptr;
840     parsePPUnknown();
841     return;
842   }
843 
844   if (IncludeGuard == IG_IfNdefed &&
845       IncludeGuardToken->TokenText == FormatTok->TokenText) {
846     IncludeGuard = IG_Defined;
847     IncludeGuardToken = nullptr;
848     for (auto &Line : Lines) {
849       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
850         IncludeGuard = IG_Rejected;
851         break;
852       }
853     }
854   }
855 
856   nextToken();
857   if (FormatTok->Tok.getKind() == tok::l_paren &&
858       FormatTok->WhitespaceRange.getBegin() ==
859           FormatTok->WhitespaceRange.getEnd()) {
860     parseParens();
861   }
862   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
863     Line->Level += PPBranchLevel + 1;
864   addUnwrappedLine();
865   ++Line->Level;
866 
867   // Errors during a preprocessor directive can only affect the layout of the
868   // preprocessor directive, and thus we ignore them. An alternative approach
869   // would be to use the same approach we use on the file level (no
870   // re-indentation if there was a structural error) within the macro
871   // definition.
872   parseFile();
873 }
874 
875 void UnwrappedLineParser::parsePPUnknown() {
876   do {
877     nextToken();
878   } while (!eof());
879   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
880     Line->Level += PPBranchLevel + 1;
881   addUnwrappedLine();
882 }
883 
884 // Here we blacklist certain tokens that are not usually the first token in an
885 // unwrapped line. This is used in attempt to distinguish macro calls without
886 // trailing semicolons from other constructs split to several lines.
887 static bool tokenCanStartNewLine(const clang::Token &Tok) {
888   // Semicolon can be a null-statement, l_square can be a start of a macro or
889   // a C++11 attribute, but this doesn't seem to be common.
890   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
891          Tok.isNot(tok::l_square) &&
892          // Tokens that can only be used as binary operators and a part of
893          // overloaded operator names.
894          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
895          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
896          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
897          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
898          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
899          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
900          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
901          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
902          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
903          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
904          Tok.isNot(tok::lesslessequal) &&
905          // Colon is used in labels, base class lists, initializer lists,
906          // range-based for loops, ternary operator, but should never be the
907          // first token in an unwrapped line.
908          Tok.isNot(tok::colon) &&
909          // 'noexcept' is a trailing annotation.
910          Tok.isNot(tok::kw_noexcept);
911 }
912 
913 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
914                           const FormatToken *FormatTok) {
915   // FIXME: This returns true for C/C++ keywords like 'struct'.
916   return FormatTok->is(tok::identifier) &&
917          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
918           !FormatTok->isOneOf(
919               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
920               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
921               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
922               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
923               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
924               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
925               Keywords.kw_from));
926 }
927 
928 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
929                                  const FormatToken *FormatTok) {
930   return FormatTok->Tok.isLiteral() ||
931          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
932          mustBeJSIdent(Keywords, FormatTok);
933 }
934 
935 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
936 // when encountered after a value (see mustBeJSIdentOrValue).
937 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
938                            const FormatToken *FormatTok) {
939   return FormatTok->isOneOf(
940       tok::kw_return, Keywords.kw_yield,
941       // conditionals
942       tok::kw_if, tok::kw_else,
943       // loops
944       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
945       // switch/case
946       tok::kw_switch, tok::kw_case,
947       // exceptions
948       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
949       // declaration
950       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
951       Keywords.kw_async, Keywords.kw_function,
952       // import/export
953       Keywords.kw_import, tok::kw_export);
954 }
955 
956 // readTokenWithJavaScriptASI reads the next token and terminates the current
957 // line if JavaScript Automatic Semicolon Insertion must
958 // happen between the current token and the next token.
959 //
960 // This method is conservative - it cannot cover all edge cases of JavaScript,
961 // but only aims to correctly handle certain well known cases. It *must not*
962 // return true in speculative cases.
963 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
964   FormatToken *Previous = FormatTok;
965   readToken();
966   FormatToken *Next = FormatTok;
967 
968   bool IsOnSameLine =
969       CommentsBeforeNextToken.empty()
970           ? Next->NewlinesBefore == 0
971           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
972   if (IsOnSameLine)
973     return;
974 
975   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
976   bool PreviousStartsTemplateExpr =
977       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
978   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
979     // If the line contains an '@' sign, the previous token might be an
980     // annotation, which can precede another identifier/value.
981     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
982                               [](UnwrappedLineNode &LineNode) {
983                                 return LineNode.Tok->is(tok::at);
984                               }) != Line->Tokens.end();
985     if (HasAt)
986       return;
987   }
988   if (Next->is(tok::exclaim) && PreviousMustBeValue)
989     return addUnwrappedLine();
990   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
991   bool NextEndsTemplateExpr =
992       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
993   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
994       (PreviousMustBeValue ||
995        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
996                          tok::minusminus)))
997     return addUnwrappedLine();
998   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
999       isJSDeclOrStmt(Keywords, Next))
1000     return addUnwrappedLine();
1001 }
1002 
1003 void UnwrappedLineParser::parseStructuralElement() {
1004   assert(!FormatTok->is(tok::l_brace));
1005   if (Style.Language == FormatStyle::LK_TableGen &&
1006       FormatTok->is(tok::pp_include)) {
1007     nextToken();
1008     if (FormatTok->is(tok::string_literal))
1009       nextToken();
1010     addUnwrappedLine();
1011     return;
1012   }
1013   switch (FormatTok->Tok.getKind()) {
1014   case tok::kw_asm:
1015     nextToken();
1016     if (FormatTok->is(tok::l_brace)) {
1017       FormatTok->Type = TT_InlineASMBrace;
1018       nextToken();
1019       while (FormatTok && FormatTok->isNot(tok::eof)) {
1020         if (FormatTok->is(tok::r_brace)) {
1021           FormatTok->Type = TT_InlineASMBrace;
1022           nextToken();
1023           addUnwrappedLine();
1024           break;
1025         }
1026         FormatTok->Finalized = true;
1027         nextToken();
1028       }
1029     }
1030     break;
1031   case tok::kw_namespace:
1032     parseNamespace();
1033     return;
1034   case tok::kw_public:
1035   case tok::kw_protected:
1036   case tok::kw_private:
1037     if (Style.Language == FormatStyle::LK_Java ||
1038         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1039       nextToken();
1040     else
1041       parseAccessSpecifier();
1042     return;
1043   case tok::kw_if:
1044     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1045       // field/method declaration.
1046       break;
1047     parseIfThenElse();
1048     return;
1049   case tok::kw_for:
1050   case tok::kw_while:
1051     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1052       // field/method declaration.
1053       break;
1054     parseForOrWhileLoop();
1055     return;
1056   case tok::kw_do:
1057     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1058       // field/method declaration.
1059       break;
1060     parseDoWhile();
1061     return;
1062   case tok::kw_switch:
1063     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1064       // 'switch: string' field declaration.
1065       break;
1066     parseSwitch();
1067     return;
1068   case tok::kw_default:
1069     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1070       // 'default: string' field declaration.
1071       break;
1072     nextToken();
1073     if (FormatTok->is(tok::colon)) {
1074       parseLabel();
1075       return;
1076     }
1077     // e.g. "default void f() {}" in a Java interface.
1078     break;
1079   case tok::kw_case:
1080     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1081       // 'case: string' field declaration.
1082       break;
1083     parseCaseLabel();
1084     return;
1085   case tok::kw_try:
1086   case tok::kw___try:
1087     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1088       // field/method declaration.
1089       break;
1090     parseTryCatch();
1091     return;
1092   case tok::kw_extern:
1093     nextToken();
1094     if (FormatTok->Tok.is(tok::string_literal)) {
1095       nextToken();
1096       if (FormatTok->Tok.is(tok::l_brace)) {
1097         if (Style.BraceWrapping.AfterExternBlock) {
1098           addUnwrappedLine();
1099           parseBlock(/*MustBeDeclaration=*/true);
1100         } else {
1101           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1102         }
1103         addUnwrappedLine();
1104         return;
1105       }
1106     }
1107     break;
1108   case tok::kw_export:
1109     if (Style.Language == FormatStyle::LK_JavaScript) {
1110       parseJavaScriptEs6ImportExport();
1111       return;
1112     }
1113     if (!Style.isCpp())
1114       break;
1115     // Handle C++ "(inline|export) namespace".
1116     LLVM_FALLTHROUGH;
1117   case tok::kw_inline:
1118     nextToken();
1119     if (FormatTok->Tok.is(tok::kw_namespace)) {
1120       parseNamespace();
1121       return;
1122     }
1123     break;
1124   case tok::identifier:
1125     if (FormatTok->is(TT_ForEachMacro)) {
1126       parseForOrWhileLoop();
1127       return;
1128     }
1129     if (FormatTok->is(TT_MacroBlockBegin)) {
1130       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1131                  /*MunchSemi=*/false);
1132       return;
1133     }
1134     if (FormatTok->is(Keywords.kw_import)) {
1135       if (Style.Language == FormatStyle::LK_JavaScript) {
1136         parseJavaScriptEs6ImportExport();
1137         return;
1138       }
1139       if (Style.Language == FormatStyle::LK_Proto) {
1140         nextToken();
1141         if (FormatTok->is(tok::kw_public))
1142           nextToken();
1143         if (!FormatTok->is(tok::string_literal))
1144           return;
1145         nextToken();
1146         if (FormatTok->is(tok::semi))
1147           nextToken();
1148         addUnwrappedLine();
1149         return;
1150       }
1151     }
1152     if (Style.isCpp() &&
1153         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1154                            Keywords.kw_slots, Keywords.kw_qslots)) {
1155       nextToken();
1156       if (FormatTok->is(tok::colon)) {
1157         nextToken();
1158         addUnwrappedLine();
1159         return;
1160       }
1161     }
1162     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1163       parseStatementMacro();
1164       return;
1165     }
1166     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1167       parseNamespace();
1168       return;
1169     }
1170     // In all other cases, parse the declaration.
1171     break;
1172   default:
1173     break;
1174   }
1175   do {
1176     const FormatToken *Previous = FormatTok->Previous;
1177     switch (FormatTok->Tok.getKind()) {
1178     case tok::at:
1179       nextToken();
1180       if (FormatTok->Tok.is(tok::l_brace)) {
1181         nextToken();
1182         parseBracedList();
1183         break;
1184       } else if (Style.Language == FormatStyle::LK_Java &&
1185                  FormatTok->is(Keywords.kw_interface)) {
1186         nextToken();
1187         break;
1188       }
1189       switch (FormatTok->Tok.getObjCKeywordID()) {
1190       case tok::objc_public:
1191       case tok::objc_protected:
1192       case tok::objc_package:
1193       case tok::objc_private:
1194         return parseAccessSpecifier();
1195       case tok::objc_interface:
1196       case tok::objc_implementation:
1197         return parseObjCInterfaceOrImplementation();
1198       case tok::objc_protocol:
1199         if (parseObjCProtocol())
1200           return;
1201         break;
1202       case tok::objc_end:
1203         return; // Handled by the caller.
1204       case tok::objc_optional:
1205       case tok::objc_required:
1206         nextToken();
1207         addUnwrappedLine();
1208         return;
1209       case tok::objc_autoreleasepool:
1210         nextToken();
1211         if (FormatTok->Tok.is(tok::l_brace)) {
1212           if (Style.BraceWrapping.AfterControlStatement ==
1213               FormatStyle::BWACS_Always)
1214             addUnwrappedLine();
1215           parseBlock(/*MustBeDeclaration=*/false);
1216         }
1217         addUnwrappedLine();
1218         return;
1219       case tok::objc_synchronized:
1220         nextToken();
1221         if (FormatTok->Tok.is(tok::l_paren))
1222           // Skip synchronization object
1223           parseParens();
1224         if (FormatTok->Tok.is(tok::l_brace)) {
1225           if (Style.BraceWrapping.AfterControlStatement ==
1226               FormatStyle::BWACS_Always)
1227             addUnwrappedLine();
1228           parseBlock(/*MustBeDeclaration=*/false);
1229         }
1230         addUnwrappedLine();
1231         return;
1232       case tok::objc_try:
1233         // This branch isn't strictly necessary (the kw_try case below would
1234         // do this too after the tok::at is parsed above).  But be explicit.
1235         parseTryCatch();
1236         return;
1237       default:
1238         break;
1239       }
1240       break;
1241     case tok::kw_enum:
1242       // Ignore if this is part of "template <enum ...".
1243       if (Previous && Previous->is(tok::less)) {
1244         nextToken();
1245         break;
1246       }
1247 
1248       // parseEnum falls through and does not yet add an unwrapped line as an
1249       // enum definition can start a structural element.
1250       if (!parseEnum())
1251         break;
1252       // This only applies for C++.
1253       if (!Style.isCpp()) {
1254         addUnwrappedLine();
1255         return;
1256       }
1257       break;
1258     case tok::kw_typedef:
1259       nextToken();
1260       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1261                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1262                              Keywords.kw_CF_CLOSED_ENUM,
1263                              Keywords.kw_NS_CLOSED_ENUM))
1264         parseEnum();
1265       break;
1266     case tok::kw_struct:
1267     case tok::kw_union:
1268     case tok::kw_class:
1269       // parseRecord falls through and does not yet add an unwrapped line as a
1270       // record declaration or definition can start a structural element.
1271       parseRecord();
1272       // This does not apply for Java, JavaScript and C#.
1273       if (Style.Language == FormatStyle::LK_Java ||
1274           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1275         if (FormatTok->is(tok::semi))
1276           nextToken();
1277         addUnwrappedLine();
1278         return;
1279       }
1280       break;
1281     case tok::period:
1282       nextToken();
1283       // In Java, classes have an implicit static member "class".
1284       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1285           FormatTok->is(tok::kw_class))
1286         nextToken();
1287       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1288           FormatTok->Tok.getIdentifierInfo())
1289         // JavaScript only has pseudo keywords, all keywords are allowed to
1290         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1291         nextToken();
1292       break;
1293     case tok::semi:
1294       nextToken();
1295       addUnwrappedLine();
1296       return;
1297     case tok::r_brace:
1298       addUnwrappedLine();
1299       return;
1300     case tok::l_paren:
1301       parseParens();
1302       break;
1303     case tok::kw_operator:
1304       nextToken();
1305       if (FormatTok->isBinaryOperator())
1306         nextToken();
1307       break;
1308     case tok::caret:
1309       nextToken();
1310       if (FormatTok->Tok.isAnyIdentifier() ||
1311           FormatTok->isSimpleTypeSpecifier())
1312         nextToken();
1313       if (FormatTok->is(tok::l_paren))
1314         parseParens();
1315       if (FormatTok->is(tok::l_brace))
1316         parseChildBlock();
1317       break;
1318     case tok::l_brace:
1319       if (!tryToParseBracedList()) {
1320         // A block outside of parentheses must be the last part of a
1321         // structural element.
1322         // FIXME: Figure out cases where this is not true, and add projections
1323         // for them (the one we know is missing are lambdas).
1324         if (Style.BraceWrapping.AfterFunction)
1325           addUnwrappedLine();
1326         FormatTok->Type = TT_FunctionLBrace;
1327         parseBlock(/*MustBeDeclaration=*/false);
1328         addUnwrappedLine();
1329         return;
1330       }
1331       // Otherwise this was a braced init list, and the structural
1332       // element continues.
1333       break;
1334     case tok::kw_try:
1335       if (Style.Language == FormatStyle::LK_JavaScript &&
1336           Line->MustBeDeclaration) {
1337         // field/method declaration.
1338         nextToken();
1339         break;
1340       }
1341       // We arrive here when parsing function-try blocks.
1342       if (Style.BraceWrapping.AfterFunction)
1343         addUnwrappedLine();
1344       parseTryCatch();
1345       return;
1346     case tok::identifier: {
1347       if (FormatTok->is(TT_MacroBlockEnd)) {
1348         addUnwrappedLine();
1349         return;
1350       }
1351 
1352       // Function declarations (as opposed to function expressions) are parsed
1353       // on their own unwrapped line by continuing this loop. Function
1354       // expressions (functions that are not on their own line) must not create
1355       // a new unwrapped line, so they are special cased below.
1356       size_t TokenCount = Line->Tokens.size();
1357       if (Style.Language == FormatStyle::LK_JavaScript &&
1358           FormatTok->is(Keywords.kw_function) &&
1359           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1360                                                      Keywords.kw_async)))) {
1361         tryToParseJSFunction();
1362         break;
1363       }
1364       if ((Style.Language == FormatStyle::LK_JavaScript ||
1365            Style.Language == FormatStyle::LK_Java) &&
1366           FormatTok->is(Keywords.kw_interface)) {
1367         if (Style.Language == FormatStyle::LK_JavaScript) {
1368           // In JavaScript/TypeScript, "interface" can be used as a standalone
1369           // identifier, e.g. in `var interface = 1;`. If "interface" is
1370           // followed by another identifier, it is very like to be an actual
1371           // interface declaration.
1372           unsigned StoredPosition = Tokens->getPosition();
1373           FormatToken *Next = Tokens->getNextToken();
1374           FormatTok = Tokens->setPosition(StoredPosition);
1375           if (Next && !mustBeJSIdent(Keywords, Next)) {
1376             nextToken();
1377             break;
1378           }
1379         }
1380         parseRecord();
1381         addUnwrappedLine();
1382         return;
1383       }
1384 
1385       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1386         parseStatementMacro();
1387         return;
1388       }
1389 
1390       // See if the following token should start a new unwrapped line.
1391       StringRef Text = FormatTok->TokenText;
1392       nextToken();
1393 
1394       // JS doesn't have macros, and within classes colons indicate fields, not
1395       // labels.
1396       if (Style.Language == FormatStyle::LK_JavaScript)
1397         break;
1398 
1399       TokenCount = Line->Tokens.size();
1400       if (TokenCount == 1 ||
1401           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1402         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1403           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1404           parseLabel(!Style.IndentGotoLabels);
1405           return;
1406         }
1407         // Recognize function-like macro usages without trailing semicolon as
1408         // well as free-standing macros like Q_OBJECT.
1409         bool FunctionLike = FormatTok->is(tok::l_paren);
1410         if (FunctionLike)
1411           parseParens();
1412 
1413         bool FollowedByNewline =
1414             CommentsBeforeNextToken.empty()
1415                 ? FormatTok->NewlinesBefore > 0
1416                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1417 
1418         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1419             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1420           addUnwrappedLine();
1421           return;
1422         }
1423       }
1424       break;
1425     }
1426     case tok::equal:
1427       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1428       // TT_JsFatArrow. The always start an expression or a child block if
1429       // followed by a curly.
1430       if (FormatTok->is(TT_JsFatArrow)) {
1431         nextToken();
1432         if (FormatTok->is(tok::l_brace))
1433           parseChildBlock();
1434         break;
1435       }
1436 
1437       nextToken();
1438       if (FormatTok->Tok.is(tok::l_brace)) {
1439         nextToken();
1440         parseBracedList();
1441       } else if (Style.Language == FormatStyle::LK_Proto &&
1442                  FormatTok->Tok.is(tok::less)) {
1443         nextToken();
1444         parseBracedList(/*ContinueOnSemicolons=*/false,
1445                         /*ClosingBraceKind=*/tok::greater);
1446       }
1447       break;
1448     case tok::l_square:
1449       parseSquare();
1450       break;
1451     case tok::kw_new:
1452       parseNew();
1453       break;
1454     default:
1455       nextToken();
1456       break;
1457     }
1458   } while (!eof());
1459 }
1460 
1461 bool UnwrappedLineParser::tryToParseLambda() {
1462   if (!Style.isCpp()) {
1463     nextToken();
1464     return false;
1465   }
1466   assert(FormatTok->is(tok::l_square));
1467   FormatToken &LSquare = *FormatTok;
1468   if (!tryToParseLambdaIntroducer())
1469     return false;
1470 
1471   bool SeenArrow = false;
1472 
1473   while (FormatTok->isNot(tok::l_brace)) {
1474     if (FormatTok->isSimpleTypeSpecifier()) {
1475       nextToken();
1476       continue;
1477     }
1478     switch (FormatTok->Tok.getKind()) {
1479     case tok::l_brace:
1480       break;
1481     case tok::l_paren:
1482       parseParens();
1483       break;
1484     case tok::amp:
1485     case tok::star:
1486     case tok::kw_const:
1487     case tok::comma:
1488     case tok::less:
1489     case tok::greater:
1490     case tok::identifier:
1491     case tok::numeric_constant:
1492     case tok::coloncolon:
1493     case tok::kw_class:
1494     case tok::kw_mutable:
1495     case tok::kw_noexcept:
1496     case tok::kw_template:
1497     case tok::kw_typename:
1498       nextToken();
1499       break;
1500     // Specialization of a template with an integer parameter can contain
1501     // arithmetic, logical, comparison and ternary operators.
1502     //
1503     // FIXME: This also accepts sequences of operators that are not in the scope
1504     // of a template argument list.
1505     //
1506     // In a C++ lambda a template type can only occur after an arrow. We use
1507     // this as an heuristic to distinguish between Objective-C expressions
1508     // followed by an `a->b` expression, such as:
1509     // ([obj func:arg] + a->b)
1510     // Otherwise the code below would parse as a lambda.
1511     //
1512     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1513     // explicit template lists: []<bool b = true && false>(U &&u){}
1514     case tok::plus:
1515     case tok::minus:
1516     case tok::exclaim:
1517     case tok::tilde:
1518     case tok::slash:
1519     case tok::percent:
1520     case tok::lessless:
1521     case tok::pipe:
1522     case tok::pipepipe:
1523     case tok::ampamp:
1524     case tok::caret:
1525     case tok::equalequal:
1526     case tok::exclaimequal:
1527     case tok::greaterequal:
1528     case tok::lessequal:
1529     case tok::question:
1530     case tok::colon:
1531     case tok::kw_true:
1532     case tok::kw_false:
1533       if (SeenArrow) {
1534         nextToken();
1535         break;
1536       }
1537       return true;
1538     case tok::arrow:
1539       // This might or might not actually be a lambda arrow (this could be an
1540       // ObjC method invocation followed by a dereferencing arrow). We might
1541       // reset this back to TT_Unknown in TokenAnnotator.
1542       FormatTok->Type = TT_LambdaArrow;
1543       SeenArrow = true;
1544       nextToken();
1545       break;
1546     default:
1547       return true;
1548     }
1549   }
1550   FormatTok->Type = TT_LambdaLBrace;
1551   LSquare.Type = TT_LambdaLSquare;
1552   parseChildBlock();
1553   return true;
1554 }
1555 
1556 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1557   const FormatToken *Previous = FormatTok->Previous;
1558   if (Previous &&
1559       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1560                          tok::kw_delete, tok::l_square) ||
1561        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1562        Previous->isSimpleTypeSpecifier())) {
1563     nextToken();
1564     return false;
1565   }
1566   nextToken();
1567   if (FormatTok->is(tok::l_square)) {
1568     return false;
1569   }
1570   parseSquare(/*LambdaIntroducer=*/true);
1571   return true;
1572 }
1573 
1574 void UnwrappedLineParser::tryToParseJSFunction() {
1575   assert(FormatTok->is(Keywords.kw_function) ||
1576          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1577   if (FormatTok->is(Keywords.kw_async))
1578     nextToken();
1579   // Consume "function".
1580   nextToken();
1581 
1582   // Consume * (generator function). Treat it like C++'s overloaded operators.
1583   if (FormatTok->is(tok::star)) {
1584     FormatTok->Type = TT_OverloadedOperator;
1585     nextToken();
1586   }
1587 
1588   // Consume function name.
1589   if (FormatTok->is(tok::identifier))
1590     nextToken();
1591 
1592   if (FormatTok->isNot(tok::l_paren))
1593     return;
1594 
1595   // Parse formal parameter list.
1596   parseParens();
1597 
1598   if (FormatTok->is(tok::colon)) {
1599     // Parse a type definition.
1600     nextToken();
1601 
1602     // Eat the type declaration. For braced inline object types, balance braces,
1603     // otherwise just parse until finding an l_brace for the function body.
1604     if (FormatTok->is(tok::l_brace))
1605       tryToParseBracedList();
1606     else
1607       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1608         nextToken();
1609   }
1610 
1611   if (FormatTok->is(tok::semi))
1612     return;
1613 
1614   parseChildBlock();
1615 }
1616 
1617 bool UnwrappedLineParser::tryToParseBracedList() {
1618   if (FormatTok->BlockKind == BK_Unknown)
1619     calculateBraceTypes();
1620   assert(FormatTok->BlockKind != BK_Unknown);
1621   if (FormatTok->BlockKind == BK_Block)
1622     return false;
1623   nextToken();
1624   parseBracedList();
1625   return true;
1626 }
1627 
1628 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1629                                           tok::TokenKind ClosingBraceKind) {
1630   bool HasError = false;
1631 
1632   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1633   // replace this by using parseAssigmentExpression() inside.
1634   do {
1635     if (Style.isCSharp()) {
1636       if (FormatTok->is(TT_JsFatArrow)) {
1637         nextToken();
1638         // Fat arrows can be followed by simple expressions or by child blocks
1639         // in curly braces.
1640         if (FormatTok->is(tok::l_brace)) {
1641           parseChildBlock();
1642           continue;
1643         }
1644       }
1645     }
1646     if (Style.Language == FormatStyle::LK_JavaScript) {
1647       if (FormatTok->is(Keywords.kw_function) ||
1648           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1649         tryToParseJSFunction();
1650         continue;
1651       }
1652       if (FormatTok->is(TT_JsFatArrow)) {
1653         nextToken();
1654         // Fat arrows can be followed by simple expressions or by child blocks
1655         // in curly braces.
1656         if (FormatTok->is(tok::l_brace)) {
1657           parseChildBlock();
1658           continue;
1659         }
1660       }
1661       if (FormatTok->is(tok::l_brace)) {
1662         // Could be a method inside of a braced list `{a() { return 1; }}`.
1663         if (tryToParseBracedList())
1664           continue;
1665         parseChildBlock();
1666       }
1667     }
1668     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1669       nextToken();
1670       return !HasError;
1671     }
1672     switch (FormatTok->Tok.getKind()) {
1673     case tok::caret:
1674       nextToken();
1675       if (FormatTok->is(tok::l_brace)) {
1676         parseChildBlock();
1677       }
1678       break;
1679     case tok::l_square:
1680       tryToParseLambda();
1681       break;
1682     case tok::l_paren:
1683       parseParens();
1684       // JavaScript can just have free standing methods and getters/setters in
1685       // object literals. Detect them by a "{" following ")".
1686       if (Style.Language == FormatStyle::LK_JavaScript) {
1687         if (FormatTok->is(tok::l_brace))
1688           parseChildBlock();
1689         break;
1690       }
1691       break;
1692     case tok::l_brace:
1693       // Assume there are no blocks inside a braced init list apart
1694       // from the ones we explicitly parse out (like lambdas).
1695       FormatTok->BlockKind = BK_BracedInit;
1696       nextToken();
1697       parseBracedList();
1698       break;
1699     case tok::less:
1700       if (Style.Language == FormatStyle::LK_Proto) {
1701         nextToken();
1702         parseBracedList(/*ContinueOnSemicolons=*/false,
1703                         /*ClosingBraceKind=*/tok::greater);
1704       } else {
1705         nextToken();
1706       }
1707       break;
1708     case tok::semi:
1709       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1710       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1711       // used for error recovery if we have otherwise determined that this is
1712       // a braced list.
1713       if (Style.Language == FormatStyle::LK_JavaScript) {
1714         nextToken();
1715         break;
1716       }
1717       HasError = true;
1718       if (!ContinueOnSemicolons)
1719         return !HasError;
1720       nextToken();
1721       break;
1722     case tok::comma:
1723       nextToken();
1724       break;
1725     default:
1726       nextToken();
1727       break;
1728     }
1729   } while (!eof());
1730   return false;
1731 }
1732 
1733 void UnwrappedLineParser::parseParens() {
1734   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1735   nextToken();
1736   do {
1737     switch (FormatTok->Tok.getKind()) {
1738     case tok::l_paren:
1739       parseParens();
1740       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1741         parseChildBlock();
1742       break;
1743     case tok::r_paren:
1744       nextToken();
1745       return;
1746     case tok::r_brace:
1747       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1748       return;
1749     case tok::l_square:
1750       tryToParseLambda();
1751       break;
1752     case tok::l_brace:
1753       if (!tryToParseBracedList())
1754         parseChildBlock();
1755       break;
1756     case tok::at:
1757       nextToken();
1758       if (FormatTok->Tok.is(tok::l_brace)) {
1759         nextToken();
1760         parseBracedList();
1761       }
1762       break;
1763     case tok::kw_class:
1764       if (Style.Language == FormatStyle::LK_JavaScript)
1765         parseRecord(/*ParseAsExpr=*/true);
1766       else
1767         nextToken();
1768       break;
1769     case tok::identifier:
1770       if (Style.Language == FormatStyle::LK_JavaScript &&
1771           (FormatTok->is(Keywords.kw_function) ||
1772            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1773         tryToParseJSFunction();
1774       else
1775         nextToken();
1776       break;
1777     default:
1778       nextToken();
1779       break;
1780     }
1781   } while (!eof());
1782 }
1783 
1784 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1785   if (!LambdaIntroducer) {
1786     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1787     if (tryToParseLambda())
1788       return;
1789   }
1790   do {
1791     switch (FormatTok->Tok.getKind()) {
1792     case tok::l_paren:
1793       parseParens();
1794       break;
1795     case tok::r_square:
1796       nextToken();
1797       return;
1798     case tok::r_brace:
1799       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1800       return;
1801     case tok::l_square:
1802       parseSquare();
1803       break;
1804     case tok::l_brace: {
1805       if (!tryToParseBracedList())
1806         parseChildBlock();
1807       break;
1808     }
1809     case tok::at:
1810       nextToken();
1811       if (FormatTok->Tok.is(tok::l_brace)) {
1812         nextToken();
1813         parseBracedList();
1814       }
1815       break;
1816     default:
1817       nextToken();
1818       break;
1819     }
1820   } while (!eof());
1821 }
1822 
1823 void UnwrappedLineParser::parseIfThenElse() {
1824   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1825   nextToken();
1826   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1827     nextToken();
1828   if (FormatTok->Tok.is(tok::l_paren))
1829     parseParens();
1830   bool NeedsUnwrappedLine = false;
1831   if (FormatTok->Tok.is(tok::l_brace)) {
1832     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1833     parseBlock(/*MustBeDeclaration=*/false);
1834     if (Style.BraceWrapping.BeforeElse)
1835       addUnwrappedLine();
1836     else
1837       NeedsUnwrappedLine = true;
1838   } else {
1839     addUnwrappedLine();
1840     ++Line->Level;
1841     parseStructuralElement();
1842     --Line->Level;
1843   }
1844   if (FormatTok->Tok.is(tok::kw_else)) {
1845     nextToken();
1846     if (FormatTok->Tok.is(tok::l_brace)) {
1847       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1848       parseBlock(/*MustBeDeclaration=*/false);
1849       addUnwrappedLine();
1850     } else if (FormatTok->Tok.is(tok::kw_if)) {
1851       parseIfThenElse();
1852     } else {
1853       addUnwrappedLine();
1854       ++Line->Level;
1855       parseStructuralElement();
1856       if (FormatTok->is(tok::eof))
1857         addUnwrappedLine();
1858       --Line->Level;
1859     }
1860   } else if (NeedsUnwrappedLine) {
1861     addUnwrappedLine();
1862   }
1863 }
1864 
1865 void UnwrappedLineParser::parseTryCatch() {
1866   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1867   nextToken();
1868   bool NeedsUnwrappedLine = false;
1869   if (FormatTok->is(tok::colon)) {
1870     // We are in a function try block, what comes is an initializer list.
1871     nextToken();
1872 
1873     // In case identifiers were removed by clang-tidy, what might follow is
1874     // multiple commas in sequence - before the first identifier.
1875     while (FormatTok->is(tok::comma))
1876       nextToken();
1877 
1878     while (FormatTok->is(tok::identifier)) {
1879       nextToken();
1880       if (FormatTok->is(tok::l_paren))
1881         parseParens();
1882 
1883       // In case identifiers were removed by clang-tidy, what might follow is
1884       // multiple commas in sequence - after the first identifier.
1885       while (FormatTok->is(tok::comma))
1886         nextToken();
1887     }
1888   }
1889   // Parse try with resource.
1890   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1891     parseParens();
1892   }
1893   if (FormatTok->is(tok::l_brace)) {
1894     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1895     parseBlock(/*MustBeDeclaration=*/false);
1896     if (Style.BraceWrapping.BeforeCatch) {
1897       addUnwrappedLine();
1898     } else {
1899       NeedsUnwrappedLine = true;
1900     }
1901   } else if (!FormatTok->is(tok::kw_catch)) {
1902     // The C++ standard requires a compound-statement after a try.
1903     // If there's none, we try to assume there's a structuralElement
1904     // and try to continue.
1905     addUnwrappedLine();
1906     ++Line->Level;
1907     parseStructuralElement();
1908     --Line->Level;
1909   }
1910   while (1) {
1911     if (FormatTok->is(tok::at))
1912       nextToken();
1913     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1914                              tok::kw___finally) ||
1915           ((Style.Language == FormatStyle::LK_Java ||
1916             Style.Language == FormatStyle::LK_JavaScript) &&
1917            FormatTok->is(Keywords.kw_finally)) ||
1918           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1919            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1920       break;
1921     nextToken();
1922     while (FormatTok->isNot(tok::l_brace)) {
1923       if (FormatTok->is(tok::l_paren)) {
1924         parseParens();
1925         continue;
1926       }
1927       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1928         return;
1929       nextToken();
1930     }
1931     NeedsUnwrappedLine = false;
1932     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1933     parseBlock(/*MustBeDeclaration=*/false);
1934     if (Style.BraceWrapping.BeforeCatch)
1935       addUnwrappedLine();
1936     else
1937       NeedsUnwrappedLine = true;
1938   }
1939   if (NeedsUnwrappedLine)
1940     addUnwrappedLine();
1941 }
1942 
1943 void UnwrappedLineParser::parseNamespace() {
1944   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
1945          "'namespace' expected");
1946 
1947   const FormatToken &InitialToken = *FormatTok;
1948   nextToken();
1949   if (InitialToken.is(TT_NamespaceMacro)) {
1950     parseParens();
1951   } else {
1952     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
1953                               tok::l_square)) {
1954       if (FormatTok->is(tok::l_square))
1955         parseSquare();
1956       else
1957         nextToken();
1958     }
1959   }
1960   if (FormatTok->Tok.is(tok::l_brace)) {
1961     if (ShouldBreakBeforeBrace(Style, InitialToken))
1962       addUnwrappedLine();
1963 
1964     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1965                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1966                      DeclarationScopeStack.size() > 1);
1967     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1968     // Munch the semicolon after a namespace. This is more common than one would
1969     // think. Putting the semicolon into its own line is very ugly.
1970     if (FormatTok->Tok.is(tok::semi))
1971       nextToken();
1972     addUnwrappedLine();
1973   }
1974   // FIXME: Add error handling.
1975 }
1976 
1977 void UnwrappedLineParser::parseNew() {
1978   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1979   nextToken();
1980 
1981   if (Style.isCSharp()) {
1982     do {
1983       if (FormatTok->is(tok::l_brace))
1984         parseBracedList();
1985 
1986       if (FormatTok->isOneOf(tok::semi, tok::comma))
1987         return;
1988 
1989       nextToken();
1990     } while (!eof());
1991   }
1992 
1993   if (Style.Language != FormatStyle::LK_Java)
1994     return;
1995 
1996   // In Java, we can parse everything up to the parens, which aren't optional.
1997   do {
1998     // There should not be a ;, { or } before the new's open paren.
1999     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2000       return;
2001 
2002     // Consume the parens.
2003     if (FormatTok->is(tok::l_paren)) {
2004       parseParens();
2005 
2006       // If there is a class body of an anonymous class, consume that as child.
2007       if (FormatTok->is(tok::l_brace))
2008         parseChildBlock();
2009       return;
2010     }
2011     nextToken();
2012   } while (!eof());
2013 }
2014 
2015 void UnwrappedLineParser::parseForOrWhileLoop() {
2016   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2017          "'for', 'while' or foreach macro expected");
2018   nextToken();
2019   // JS' for await ( ...
2020   if (Style.Language == FormatStyle::LK_JavaScript &&
2021       FormatTok->is(Keywords.kw_await))
2022     nextToken();
2023   if (FormatTok->Tok.is(tok::l_paren))
2024     parseParens();
2025   if (FormatTok->Tok.is(tok::l_brace)) {
2026     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2027     parseBlock(/*MustBeDeclaration=*/false);
2028     addUnwrappedLine();
2029   } else {
2030     addUnwrappedLine();
2031     ++Line->Level;
2032     parseStructuralElement();
2033     --Line->Level;
2034   }
2035 }
2036 
2037 void UnwrappedLineParser::parseDoWhile() {
2038   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2039   nextToken();
2040   if (FormatTok->Tok.is(tok::l_brace)) {
2041     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2042     parseBlock(/*MustBeDeclaration=*/false);
2043     if (Style.BraceWrapping.IndentBraces)
2044       addUnwrappedLine();
2045   } else {
2046     addUnwrappedLine();
2047     ++Line->Level;
2048     parseStructuralElement();
2049     --Line->Level;
2050   }
2051 
2052   // FIXME: Add error handling.
2053   if (!FormatTok->Tok.is(tok::kw_while)) {
2054     addUnwrappedLine();
2055     return;
2056   }
2057 
2058   nextToken();
2059   parseStructuralElement();
2060 }
2061 
2062 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2063   nextToken();
2064   unsigned OldLineLevel = Line->Level;
2065   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2066     --Line->Level;
2067   if (LeftAlignLabel)
2068     Line->Level = 0;
2069   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2070       FormatTok->Tok.is(tok::l_brace)) {
2071     CompoundStatementIndenter Indenter(this, Line->Level,
2072                                        Style.BraceWrapping.AfterCaseLabel,
2073                                        Style.BraceWrapping.IndentBraces);
2074     parseBlock(/*MustBeDeclaration=*/false);
2075     if (FormatTok->Tok.is(tok::kw_break)) {
2076       if (Style.BraceWrapping.AfterControlStatement ==
2077           FormatStyle::BWACS_Always)
2078         addUnwrappedLine();
2079       parseStructuralElement();
2080     }
2081     addUnwrappedLine();
2082   } else {
2083     if (FormatTok->is(tok::semi))
2084       nextToken();
2085     addUnwrappedLine();
2086   }
2087   Line->Level = OldLineLevel;
2088   if (FormatTok->isNot(tok::l_brace)) {
2089     parseStructuralElement();
2090     addUnwrappedLine();
2091   }
2092 }
2093 
2094 void UnwrappedLineParser::parseCaseLabel() {
2095   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2096   // FIXME: fix handling of complex expressions here.
2097   do {
2098     nextToken();
2099   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2100   parseLabel();
2101 }
2102 
2103 void UnwrappedLineParser::parseSwitch() {
2104   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2105   nextToken();
2106   if (FormatTok->Tok.is(tok::l_paren))
2107     parseParens();
2108   if (FormatTok->Tok.is(tok::l_brace)) {
2109     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2110     parseBlock(/*MustBeDeclaration=*/false);
2111     addUnwrappedLine();
2112   } else {
2113     addUnwrappedLine();
2114     ++Line->Level;
2115     parseStructuralElement();
2116     --Line->Level;
2117   }
2118 }
2119 
2120 void UnwrappedLineParser::parseAccessSpecifier() {
2121   nextToken();
2122   // Understand Qt's slots.
2123   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2124     nextToken();
2125   // Otherwise, we don't know what it is, and we'd better keep the next token.
2126   if (FormatTok->Tok.is(tok::colon))
2127     nextToken();
2128   addUnwrappedLine();
2129 }
2130 
2131 bool UnwrappedLineParser::parseEnum() {
2132   // Won't be 'enum' for NS_ENUMs.
2133   if (FormatTok->Tok.is(tok::kw_enum))
2134     nextToken();
2135 
2136   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2137   // declarations. An "enum" keyword followed by a colon would be a syntax
2138   // error and thus assume it is just an identifier.
2139   if (Style.Language == FormatStyle::LK_JavaScript &&
2140       FormatTok->isOneOf(tok::colon, tok::question))
2141     return false;
2142 
2143   // In protobuf, "enum" can be used as a field name.
2144   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2145     return false;
2146 
2147   // Eat up enum class ...
2148   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2149     nextToken();
2150 
2151   while (FormatTok->Tok.getIdentifierInfo() ||
2152          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2153                             tok::greater, tok::comma, tok::question)) {
2154     nextToken();
2155     // We can have macros or attributes in between 'enum' and the enum name.
2156     if (FormatTok->is(tok::l_paren))
2157       parseParens();
2158     if (FormatTok->is(tok::identifier)) {
2159       nextToken();
2160       // If there are two identifiers in a row, this is likely an elaborate
2161       // return type. In Java, this can be "implements", etc.
2162       if (Style.isCpp() && FormatTok->is(tok::identifier))
2163         return false;
2164     }
2165   }
2166 
2167   // Just a declaration or something is wrong.
2168   if (FormatTok->isNot(tok::l_brace))
2169     return true;
2170   FormatTok->BlockKind = BK_Block;
2171 
2172   if (Style.Language == FormatStyle::LK_Java) {
2173     // Java enums are different.
2174     parseJavaEnumBody();
2175     return true;
2176   }
2177   if (Style.Language == FormatStyle::LK_Proto) {
2178     parseBlock(/*MustBeDeclaration=*/true);
2179     return true;
2180   }
2181 
2182   // Parse enum body.
2183   nextToken();
2184   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2185   if (HasError) {
2186     if (FormatTok->is(tok::semi))
2187       nextToken();
2188     addUnwrappedLine();
2189   }
2190   return true;
2191 
2192   // There is no addUnwrappedLine() here so that we fall through to parsing a
2193   // structural element afterwards. Thus, in "enum A {} n, m;",
2194   // "} n, m;" will end up in one unwrapped line.
2195 }
2196 
2197 void UnwrappedLineParser::parseJavaEnumBody() {
2198   // Determine whether the enum is simple, i.e. does not have a semicolon or
2199   // constants with class bodies. Simple enums can be formatted like braced
2200   // lists, contracted to a single line, etc.
2201   unsigned StoredPosition = Tokens->getPosition();
2202   bool IsSimple = true;
2203   FormatToken *Tok = Tokens->getNextToken();
2204   while (Tok) {
2205     if (Tok->is(tok::r_brace))
2206       break;
2207     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2208       IsSimple = false;
2209       break;
2210     }
2211     // FIXME: This will also mark enums with braces in the arguments to enum
2212     // constants as "not simple". This is probably fine in practice, though.
2213     Tok = Tokens->getNextToken();
2214   }
2215   FormatTok = Tokens->setPosition(StoredPosition);
2216 
2217   if (IsSimple) {
2218     nextToken();
2219     parseBracedList();
2220     addUnwrappedLine();
2221     return;
2222   }
2223 
2224   // Parse the body of a more complex enum.
2225   // First add a line for everything up to the "{".
2226   nextToken();
2227   addUnwrappedLine();
2228   ++Line->Level;
2229 
2230   // Parse the enum constants.
2231   while (FormatTok) {
2232     if (FormatTok->is(tok::l_brace)) {
2233       // Parse the constant's class body.
2234       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2235                  /*MunchSemi=*/false);
2236     } else if (FormatTok->is(tok::l_paren)) {
2237       parseParens();
2238     } else if (FormatTok->is(tok::comma)) {
2239       nextToken();
2240       addUnwrappedLine();
2241     } else if (FormatTok->is(tok::semi)) {
2242       nextToken();
2243       addUnwrappedLine();
2244       break;
2245     } else if (FormatTok->is(tok::r_brace)) {
2246       addUnwrappedLine();
2247       break;
2248     } else {
2249       nextToken();
2250     }
2251   }
2252 
2253   // Parse the class body after the enum's ";" if any.
2254   parseLevel(/*HasOpeningBrace=*/true);
2255   nextToken();
2256   --Line->Level;
2257   addUnwrappedLine();
2258 }
2259 
2260 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2261   const FormatToken &InitialToken = *FormatTok;
2262   nextToken();
2263 
2264   // The actual identifier can be a nested name specifier, and in macros
2265   // it is often token-pasted.
2266   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2267                             tok::kw___attribute, tok::kw___declspec,
2268                             tok::kw_alignas) ||
2269          ((Style.Language == FormatStyle::LK_Java ||
2270            Style.Language == FormatStyle::LK_JavaScript) &&
2271           FormatTok->isOneOf(tok::period, tok::comma))) {
2272     if (Style.Language == FormatStyle::LK_JavaScript &&
2273         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2274       // JavaScript/TypeScript supports inline object types in
2275       // extends/implements positions:
2276       //     class Foo implements {bar: number} { }
2277       nextToken();
2278       if (FormatTok->is(tok::l_brace)) {
2279         tryToParseBracedList();
2280         continue;
2281       }
2282     }
2283     bool IsNonMacroIdentifier =
2284         FormatTok->is(tok::identifier) &&
2285         FormatTok->TokenText != FormatTok->TokenText.upper();
2286     nextToken();
2287     // We can have macros or attributes in between 'class' and the class name.
2288     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2289       parseParens();
2290   }
2291 
2292   // Note that parsing away template declarations here leads to incorrectly
2293   // accepting function declarations as record declarations.
2294   // In general, we cannot solve this problem. Consider:
2295   // class A<int> B() {}
2296   // which can be a function definition or a class definition when B() is a
2297   // macro. If we find enough real-world cases where this is a problem, we
2298   // can parse for the 'template' keyword in the beginning of the statement,
2299   // and thus rule out the record production in case there is no template
2300   // (this would still leave us with an ambiguity between template function
2301   // and class declarations).
2302   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2303     while (!eof()) {
2304       if (FormatTok->is(tok::l_brace)) {
2305         calculateBraceTypes(/*ExpectClassBody=*/true);
2306         if (!tryToParseBracedList())
2307           break;
2308       }
2309       if (FormatTok->Tok.is(tok::semi))
2310         return;
2311       nextToken();
2312     }
2313   }
2314   if (FormatTok->Tok.is(tok::l_brace)) {
2315     if (ParseAsExpr) {
2316       parseChildBlock();
2317     } else {
2318       if (ShouldBreakBeforeBrace(Style, InitialToken))
2319         addUnwrappedLine();
2320 
2321       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2322                  /*MunchSemi=*/false);
2323     }
2324   }
2325   // There is no addUnwrappedLine() here so that we fall through to parsing a
2326   // structural element afterwards. Thus, in "class A {} n, m;",
2327   // "} n, m;" will end up in one unwrapped line.
2328 }
2329 
2330 void UnwrappedLineParser::parseObjCMethod() {
2331   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2332          "'(' or identifier expected.");
2333   do {
2334     if (FormatTok->Tok.is(tok::semi)) {
2335       nextToken();
2336       addUnwrappedLine();
2337       return;
2338     } else if (FormatTok->Tok.is(tok::l_brace)) {
2339       if (Style.BraceWrapping.AfterFunction)
2340         addUnwrappedLine();
2341       parseBlock(/*MustBeDeclaration=*/false);
2342       addUnwrappedLine();
2343       return;
2344     } else {
2345       nextToken();
2346     }
2347   } while (!eof());
2348 }
2349 
2350 void UnwrappedLineParser::parseObjCProtocolList() {
2351   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2352   do {
2353     nextToken();
2354     // Early exit in case someone forgot a close angle.
2355     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2356         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2357       return;
2358   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2359   nextToken(); // Skip '>'.
2360 }
2361 
2362 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2363   do {
2364     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2365       nextToken();
2366       addUnwrappedLine();
2367       break;
2368     }
2369     if (FormatTok->is(tok::l_brace)) {
2370       parseBlock(/*MustBeDeclaration=*/false);
2371       // In ObjC interfaces, nothing should be following the "}".
2372       addUnwrappedLine();
2373     } else if (FormatTok->is(tok::r_brace)) {
2374       // Ignore stray "}". parseStructuralElement doesn't consume them.
2375       nextToken();
2376       addUnwrappedLine();
2377     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2378       nextToken();
2379       parseObjCMethod();
2380     } else {
2381       parseStructuralElement();
2382     }
2383   } while (!eof());
2384 }
2385 
2386 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2387   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2388          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2389   nextToken();
2390   nextToken(); // interface name
2391 
2392   // @interface can be followed by a lightweight generic
2393   // specialization list, then either a base class or a category.
2394   if (FormatTok->Tok.is(tok::less)) {
2395     // Unlike protocol lists, generic parameterizations support
2396     // nested angles:
2397     //
2398     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2399     //     NSObject <NSCopying, NSSecureCoding>
2400     //
2401     // so we need to count how many open angles we have left.
2402     unsigned NumOpenAngles = 1;
2403     do {
2404       nextToken();
2405       // Early exit in case someone forgot a close angle.
2406       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2407           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2408         break;
2409       if (FormatTok->Tok.is(tok::less))
2410         ++NumOpenAngles;
2411       else if (FormatTok->Tok.is(tok::greater)) {
2412         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2413         --NumOpenAngles;
2414       }
2415     } while (!eof() && NumOpenAngles != 0);
2416     nextToken(); // Skip '>'.
2417   }
2418   if (FormatTok->Tok.is(tok::colon)) {
2419     nextToken();
2420     nextToken(); // base class name
2421   } else if (FormatTok->Tok.is(tok::l_paren))
2422     // Skip category, if present.
2423     parseParens();
2424 
2425   if (FormatTok->Tok.is(tok::less))
2426     parseObjCProtocolList();
2427 
2428   if (FormatTok->Tok.is(tok::l_brace)) {
2429     if (Style.BraceWrapping.AfterObjCDeclaration)
2430       addUnwrappedLine();
2431     parseBlock(/*MustBeDeclaration=*/true);
2432   }
2433 
2434   // With instance variables, this puts '}' on its own line.  Without instance
2435   // variables, this ends the @interface line.
2436   addUnwrappedLine();
2437 
2438   parseObjCUntilAtEnd();
2439 }
2440 
2441 // Returns true for the declaration/definition form of @protocol,
2442 // false for the expression form.
2443 bool UnwrappedLineParser::parseObjCProtocol() {
2444   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2445   nextToken();
2446 
2447   if (FormatTok->is(tok::l_paren))
2448     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2449     return false;
2450 
2451   // The definition/declaration form,
2452   // @protocol Foo
2453   // - (int)someMethod;
2454   // @end
2455 
2456   nextToken(); // protocol name
2457 
2458   if (FormatTok->Tok.is(tok::less))
2459     parseObjCProtocolList();
2460 
2461   // Check for protocol declaration.
2462   if (FormatTok->Tok.is(tok::semi)) {
2463     nextToken();
2464     addUnwrappedLine();
2465     return true;
2466   }
2467 
2468   addUnwrappedLine();
2469   parseObjCUntilAtEnd();
2470   return true;
2471 }
2472 
2473 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2474   bool IsImport = FormatTok->is(Keywords.kw_import);
2475   assert(IsImport || FormatTok->is(tok::kw_export));
2476   nextToken();
2477 
2478   // Consume the "default" in "export default class/function".
2479   if (FormatTok->is(tok::kw_default))
2480     nextToken();
2481 
2482   // Consume "async function", "function" and "default function", so that these
2483   // get parsed as free-standing JS functions, i.e. do not require a trailing
2484   // semicolon.
2485   if (FormatTok->is(Keywords.kw_async))
2486     nextToken();
2487   if (FormatTok->is(Keywords.kw_function)) {
2488     nextToken();
2489     return;
2490   }
2491 
2492   // For imports, `export *`, `export {...}`, consume the rest of the line up
2493   // to the terminating `;`. For everything else, just return and continue
2494   // parsing the structural element, i.e. the declaration or expression for
2495   // `export default`.
2496   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2497       !FormatTok->isStringLiteral())
2498     return;
2499 
2500   while (!eof()) {
2501     if (FormatTok->is(tok::semi))
2502       return;
2503     if (Line->Tokens.empty()) {
2504       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2505       // import statement should terminate.
2506       return;
2507     }
2508     if (FormatTok->is(tok::l_brace)) {
2509       FormatTok->BlockKind = BK_Block;
2510       nextToken();
2511       parseBracedList();
2512     } else {
2513       nextToken();
2514     }
2515   }
2516 }
2517 
2518 void UnwrappedLineParser::parseStatementMacro() {
2519   nextToken();
2520   if (FormatTok->is(tok::l_paren))
2521     parseParens();
2522   if (FormatTok->is(tok::semi))
2523     nextToken();
2524   addUnwrappedLine();
2525 }
2526 
2527 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2528                                                  StringRef Prefix = "") {
2529   llvm::dbgs() << Prefix << "Line(" << Line.Level
2530                << ", FSC=" << Line.FirstStartColumn << ")"
2531                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2532   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2533                                                     E = Line.Tokens.end();
2534        I != E; ++I) {
2535     llvm::dbgs() << I->Tok->Tok.getName() << "["
2536                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2537                  << "] ";
2538   }
2539   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2540                                                     E = Line.Tokens.end();
2541        I != E; ++I) {
2542     const UnwrappedLineNode &Node = *I;
2543     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2544              I = Node.Children.begin(),
2545              E = Node.Children.end();
2546          I != E; ++I) {
2547       printDebugInfo(*I, "\nChild: ");
2548     }
2549   }
2550   llvm::dbgs() << "\n";
2551 }
2552 
2553 void UnwrappedLineParser::addUnwrappedLine() {
2554   if (Line->Tokens.empty())
2555     return;
2556   LLVM_DEBUG({
2557     if (CurrentLines == &Lines)
2558       printDebugInfo(*Line);
2559   });
2560   CurrentLines->push_back(std::move(*Line));
2561   Line->Tokens.clear();
2562   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2563   Line->FirstStartColumn = 0;
2564   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2565     CurrentLines->append(
2566         std::make_move_iterator(PreprocessorDirectives.begin()),
2567         std::make_move_iterator(PreprocessorDirectives.end()));
2568     PreprocessorDirectives.clear();
2569   }
2570   // Disconnect the current token from the last token on the previous line.
2571   FormatTok->Previous = nullptr;
2572 }
2573 
2574 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2575 
2576 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2577   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2578          FormatTok.NewlinesBefore > 0;
2579 }
2580 
2581 // Checks if \p FormatTok is a line comment that continues the line comment
2582 // section on \p Line.
2583 static bool
2584 continuesLineCommentSection(const FormatToken &FormatTok,
2585                             const UnwrappedLine &Line,
2586                             const llvm::Regex &CommentPragmasRegex) {
2587   if (Line.Tokens.empty())
2588     return false;
2589 
2590   StringRef IndentContent = FormatTok.TokenText;
2591   if (FormatTok.TokenText.startswith("//") ||
2592       FormatTok.TokenText.startswith("/*"))
2593     IndentContent = FormatTok.TokenText.substr(2);
2594   if (CommentPragmasRegex.match(IndentContent))
2595     return false;
2596 
2597   // If Line starts with a line comment, then FormatTok continues the comment
2598   // section if its original column is greater or equal to the original start
2599   // column of the line.
2600   //
2601   // Define the min column token of a line as follows: if a line ends in '{' or
2602   // contains a '{' followed by a line comment, then the min column token is
2603   // that '{'. Otherwise, the min column token of the line is the first token of
2604   // the line.
2605   //
2606   // If Line starts with a token other than a line comment, then FormatTok
2607   // continues the comment section if its original column is greater than the
2608   // original start column of the min column token of the line.
2609   //
2610   // For example, the second line comment continues the first in these cases:
2611   //
2612   // // first line
2613   // // second line
2614   //
2615   // and:
2616   //
2617   // // first line
2618   //  // second line
2619   //
2620   // and:
2621   //
2622   // int i; // first line
2623   //  // second line
2624   //
2625   // and:
2626   //
2627   // do { // first line
2628   //      // second line
2629   //   int i;
2630   // } while (true);
2631   //
2632   // and:
2633   //
2634   // enum {
2635   //   a, // first line
2636   //    // second line
2637   //   b
2638   // };
2639   //
2640   // The second line comment doesn't continue the first in these cases:
2641   //
2642   //   // first line
2643   //  // second line
2644   //
2645   // and:
2646   //
2647   // int i; // first line
2648   // // second line
2649   //
2650   // and:
2651   //
2652   // do { // first line
2653   //   // second line
2654   //   int i;
2655   // } while (true);
2656   //
2657   // and:
2658   //
2659   // enum {
2660   //   a, // first line
2661   //   // second line
2662   // };
2663   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2664 
2665   // Scan for '{//'. If found, use the column of '{' as a min column for line
2666   // comment section continuation.
2667   const FormatToken *PreviousToken = nullptr;
2668   for (const UnwrappedLineNode &Node : Line.Tokens) {
2669     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2670         isLineComment(*Node.Tok)) {
2671       MinColumnToken = PreviousToken;
2672       break;
2673     }
2674     PreviousToken = Node.Tok;
2675 
2676     // Grab the last newline preceding a token in this unwrapped line.
2677     if (Node.Tok->NewlinesBefore > 0) {
2678       MinColumnToken = Node.Tok;
2679     }
2680   }
2681   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2682     MinColumnToken = PreviousToken;
2683   }
2684 
2685   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2686                               MinColumnToken);
2687 }
2688 
2689 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2690   bool JustComments = Line->Tokens.empty();
2691   for (SmallVectorImpl<FormatToken *>::const_iterator
2692            I = CommentsBeforeNextToken.begin(),
2693            E = CommentsBeforeNextToken.end();
2694        I != E; ++I) {
2695     // Line comments that belong to the same line comment section are put on the
2696     // same line since later we might want to reflow content between them.
2697     // Additional fine-grained breaking of line comment sections is controlled
2698     // by the class BreakableLineCommentSection in case it is desirable to keep
2699     // several line comment sections in the same unwrapped line.
2700     //
2701     // FIXME: Consider putting separate line comment sections as children to the
2702     // unwrapped line instead.
2703     (*I)->ContinuesLineCommentSection =
2704         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2705     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2706       addUnwrappedLine();
2707     pushToken(*I);
2708   }
2709   if (NewlineBeforeNext && JustComments)
2710     addUnwrappedLine();
2711   CommentsBeforeNextToken.clear();
2712 }
2713 
2714 void UnwrappedLineParser::nextToken(int LevelDifference) {
2715   if (eof())
2716     return;
2717   flushComments(isOnNewLine(*FormatTok));
2718   pushToken(FormatTok);
2719   FormatToken *Previous = FormatTok;
2720   if (Style.Language != FormatStyle::LK_JavaScript)
2721     readToken(LevelDifference);
2722   else
2723     readTokenWithJavaScriptASI();
2724   FormatTok->Previous = Previous;
2725 }
2726 
2727 void UnwrappedLineParser::distributeComments(
2728     const SmallVectorImpl<FormatToken *> &Comments,
2729     const FormatToken *NextTok) {
2730   // Whether or not a line comment token continues a line is controlled by
2731   // the method continuesLineCommentSection, with the following caveat:
2732   //
2733   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2734   // that each comment line from the trail is aligned with the next token, if
2735   // the next token exists. If a trail exists, the beginning of the maximal
2736   // trail is marked as a start of a new comment section.
2737   //
2738   // For example in this code:
2739   //
2740   // int a; // line about a
2741   //   // line 1 about b
2742   //   // line 2 about b
2743   //   int b;
2744   //
2745   // the two lines about b form a maximal trail, so there are two sections, the
2746   // first one consisting of the single comment "// line about a" and the
2747   // second one consisting of the next two comments.
2748   if (Comments.empty())
2749     return;
2750   bool ShouldPushCommentsInCurrentLine = true;
2751   bool HasTrailAlignedWithNextToken = false;
2752   unsigned StartOfTrailAlignedWithNextToken = 0;
2753   if (NextTok) {
2754     // We are skipping the first element intentionally.
2755     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2756       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2757         HasTrailAlignedWithNextToken = true;
2758         StartOfTrailAlignedWithNextToken = i;
2759       }
2760     }
2761   }
2762   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2763     FormatToken *FormatTok = Comments[i];
2764     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2765       FormatTok->ContinuesLineCommentSection = false;
2766     } else {
2767       FormatTok->ContinuesLineCommentSection =
2768           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2769     }
2770     if (!FormatTok->ContinuesLineCommentSection &&
2771         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2772       ShouldPushCommentsInCurrentLine = false;
2773     }
2774     if (ShouldPushCommentsInCurrentLine) {
2775       pushToken(FormatTok);
2776     } else {
2777       CommentsBeforeNextToken.push_back(FormatTok);
2778     }
2779   }
2780 }
2781 
2782 void UnwrappedLineParser::readToken(int LevelDifference) {
2783   SmallVector<FormatToken *, 1> Comments;
2784   do {
2785     FormatTok = Tokens->getNextToken();
2786     assert(FormatTok);
2787     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2788            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2789       distributeComments(Comments, FormatTok);
2790       Comments.clear();
2791       // If there is an unfinished unwrapped line, we flush the preprocessor
2792       // directives only after that unwrapped line was finished later.
2793       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2794       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2795       assert((LevelDifference >= 0 ||
2796               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2797              "LevelDifference makes Line->Level negative");
2798       Line->Level += LevelDifference;
2799       // Comments stored before the preprocessor directive need to be output
2800       // before the preprocessor directive, at the same level as the
2801       // preprocessor directive, as we consider them to apply to the directive.
2802       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2803           PPBranchLevel > 0)
2804         Line->Level += PPBranchLevel;
2805       flushComments(isOnNewLine(*FormatTok));
2806       parsePPDirective();
2807     }
2808     while (FormatTok->Type == TT_ConflictStart ||
2809            FormatTok->Type == TT_ConflictEnd ||
2810            FormatTok->Type == TT_ConflictAlternative) {
2811       if (FormatTok->Type == TT_ConflictStart) {
2812         conditionalCompilationStart(/*Unreachable=*/false);
2813       } else if (FormatTok->Type == TT_ConflictAlternative) {
2814         conditionalCompilationAlternative();
2815       } else if (FormatTok->Type == TT_ConflictEnd) {
2816         conditionalCompilationEnd();
2817       }
2818       FormatTok = Tokens->getNextToken();
2819       FormatTok->MustBreakBefore = true;
2820     }
2821 
2822     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2823         !Line->InPPDirective) {
2824       continue;
2825     }
2826 
2827     if (!FormatTok->Tok.is(tok::comment)) {
2828       distributeComments(Comments, FormatTok);
2829       Comments.clear();
2830       return;
2831     }
2832 
2833     Comments.push_back(FormatTok);
2834   } while (!eof());
2835 
2836   distributeComments(Comments, nullptr);
2837   Comments.clear();
2838 }
2839 
2840 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2841   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2842   if (MustBreakBeforeNextToken) {
2843     Line->Tokens.back().Tok->MustBreakBefore = true;
2844     MustBreakBeforeNextToken = false;
2845   }
2846 }
2847 
2848 } // end namespace format
2849 } // end namespace clang
2850