1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
27 class FormatTokenSource {
28 public:
29   virtual ~FormatTokenSource() {}
30   virtual FormatToken *getNextToken() = 0;
31 
32   virtual unsigned getPosition() = 0;
33   virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41                          bool MustBeDeclaration)
42       : Line(Line), Stack(Stack) {
43     Line.MustBeDeclaration = MustBeDeclaration;
44     Stack.push_back(MustBeDeclaration);
45   }
46   ~ScopedDeclarationState() {
47     Stack.pop_back();
48     if (!Stack.empty())
49       Line.MustBeDeclaration = Stack.back();
50     else
51       Line.MustBeDeclaration = true;
52   }
53 
54 private:
55   UnwrappedLine &Line;
56   std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     FakeEOF.Tok.startToken();
86     FakeEOF.Tok.setKind(tok::eof);
87     TokenSource = this;
88     Line.Level = 0;
89     Line.InPPDirective = true;
90   }
91 
92   ~ScopedMacroState() override {
93     TokenSource = PreviousTokenSource;
94     ResetToken = Token;
95     Line.InPPDirective = false;
96     Line.Level = PreviousLineLevel;
97   }
98 
99   FormatToken *getNextToken() override {
100     // The \c UnwrappedLineParser guards against this by never calling
101     // \c getNextToken() after it has encountered the first eof token.
102     assert(!eof());
103     PreviousToken = Token;
104     Token = PreviousTokenSource->getNextToken();
105     if (eof())
106       return &FakeEOF;
107     return Token;
108   }
109 
110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112   FormatToken *setPosition(unsigned Position) override {
113     PreviousToken = nullptr;
114     Token = PreviousTokenSource->setPosition(Position);
115     return Token;
116   }
117 
118 private:
119   bool eof() {
120     return Token && Token->HasUnescapedNewline &&
121            !continuesLineComment(*Token, PreviousToken,
122                                  /*MinColumnToken=*/PreviousToken);
123   }
124 
125   FormatToken FakeEOF;
126   UnwrappedLine &Line;
127   FormatTokenSource *&TokenSource;
128   FormatToken *&ResetToken;
129   unsigned PreviousLineLevel;
130   FormatTokenSource *PreviousTokenSource;
131 
132   FormatToken *Token;
133   FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
138 class ScopedLineState {
139 public:
140   ScopedLineState(UnwrappedLineParser &Parser,
141                   bool SwitchToPreprocessorLines = false)
142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143     if (SwitchToPreprocessorLines)
144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
145     else if (!Parser.Line->Tokens.empty())
146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147     PreBlockLine = std::move(Parser.Line);
148     Parser.Line = llvm::make_unique<UnwrappedLine>();
149     Parser.Line->Level = PreBlockLine->Level;
150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151   }
152 
153   ~ScopedLineState() {
154     if (!Parser.Line->Tokens.empty()) {
155       Parser.addUnwrappedLine();
156     }
157     assert(Parser.Line->Tokens.empty());
158     Parser.Line = std::move(PreBlockLine);
159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160       Parser.MustBreakBeforeNextToken = true;
161     Parser.CurrentLines = OriginalLines;
162   }
163 
164 private:
165   UnwrappedLineParser &Parser;
166 
167   std::unique_ptr<UnwrappedLine> PreBlockLine;
168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
171 class CompoundStatementIndenter {
172 public:
173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
174                             const FormatStyle &Style, unsigned &LineLevel)
175       : CompoundStatementIndenter(Parser, LineLevel,
176                                   Style.BraceWrapping.AfterControlStatement,
177                                   Style.BraceWrapping.IndentBraces) {
178   }
179   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180                             bool WrapBrace, bool IndentBrace)
181       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182     if (WrapBrace)
183       Parser->addUnwrappedLine();
184     if (IndentBrace)
185       ++LineLevel;
186   }
187   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188 
189 private:
190   unsigned &LineLevel;
191   unsigned OldLineLevel;
192 };
193 
194 namespace {
195 
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
198   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199       : Tokens(Tokens), Position(-1) {}
200 
201   FormatToken *getNextToken() override {
202     ++Position;
203     return Tokens[Position];
204   }
205 
206   unsigned getPosition() override {
207     assert(Position >= 0);
208     return Position;
209   }
210 
211   FormatToken *setPosition(unsigned P) override {
212     Position = P;
213     return Tokens[Position];
214   }
215 
216   void reset() { Position = -1; }
217 
218 private:
219   ArrayRef<FormatToken *> Tokens;
220   int Position;
221 };
222 
223 } // end anonymous namespace
224 
225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226                                          const AdditionalKeywords &Keywords,
227                                          unsigned FirstStartColumn,
228                                          ArrayRef<FormatToken *> Tokens,
229                                          UnwrappedLineConsumer &Callback)
230     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235                        ? IG_Rejected
236                        : IG_Inited),
237       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238 
239 void UnwrappedLineParser::reset() {
240   PPBranchLevel = -1;
241   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242                      ? IG_Rejected
243                      : IG_Inited;
244   IncludeGuardToken = nullptr;
245   Line.reset(new UnwrappedLine);
246   CommentsBeforeNextToken.clear();
247   FormatTok = nullptr;
248   MustBreakBeforeNextToken = false;
249   PreprocessorDirectives.clear();
250   CurrentLines = &Lines;
251   DeclarationScopeStack.clear();
252   PPStack.clear();
253   Line->FirstStartColumn = FirstStartColumn;
254 }
255 
256 void UnwrappedLineParser::parse() {
257   IndexedTokenSource TokenSource(AllTokens);
258   Line->FirstStartColumn = FirstStartColumn;
259   do {
260     LLVM_DEBUG(llvm::dbgs() << "----\n");
261     reset();
262     Tokens = &TokenSource;
263     TokenSource.reset();
264 
265     readToken();
266     parseFile();
267 
268     // If we found an include guard then all preprocessor directives (other than
269     // the guard) are over-indented by one.
270     if (IncludeGuard == IG_Found)
271       for (auto &Line : Lines)
272         if (Line.InPPDirective && Line.Level > 0)
273           --Line.Level;
274 
275     // Create line with eof token.
276     pushToken(FormatTok);
277     addUnwrappedLine();
278 
279     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280                                                   E = Lines.end();
281          I != E; ++I) {
282       Callback.consumeUnwrappedLine(*I);
283     }
284     Callback.finishRun();
285     Lines.clear();
286     while (!PPLevelBranchIndex.empty() &&
287            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290     }
291     if (!PPLevelBranchIndex.empty()) {
292       ++PPLevelBranchIndex.back();
293       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295     }
296   } while (!PPLevelBranchIndex.empty());
297 }
298 
299 void UnwrappedLineParser::parseFile() {
300   // The top-level context in a file always has declarations, except for pre-
301   // processor directives and JavaScript files.
302   bool MustBeDeclaration =
303       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305                                           MustBeDeclaration);
306   if (Style.Language == FormatStyle::LK_TextProto)
307     parseBracedList();
308   else
309     parseLevel(/*HasOpeningBrace=*/false);
310   // Make sure to format the remaining tokens.
311   //
312   // LK_TextProto is special since its top-level is parsed as the body of a
313   // braced list, which does not necessarily have natural line separators such
314   // as a semicolon. Comments after the last entry that have been determined to
315   // not belong to that line, as in:
316   //   key: value
317   //   // endfile comment
318   // do not have a chance to be put on a line of their own until this point.
319   // Here we add this newline before end-of-file comments.
320   if (Style.Language == FormatStyle::LK_TextProto &&
321       !CommentsBeforeNextToken.empty())
322     addUnwrappedLine();
323   flushComments(true);
324   addUnwrappedLine();
325 }
326 
327 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
328   bool SwitchLabelEncountered = false;
329   do {
330     tok::TokenKind kind = FormatTok->Tok.getKind();
331     if (FormatTok->Type == TT_MacroBlockBegin) {
332       kind = tok::l_brace;
333     } else if (FormatTok->Type == TT_MacroBlockEnd) {
334       kind = tok::r_brace;
335     }
336 
337     switch (kind) {
338     case tok::comment:
339       nextToken();
340       addUnwrappedLine();
341       break;
342     case tok::l_brace:
343       // FIXME: Add parameter whether this can happen - if this happens, we must
344       // be in a non-declaration context.
345       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
346         continue;
347       parseBlock(/*MustBeDeclaration=*/false);
348       addUnwrappedLine();
349       break;
350     case tok::r_brace:
351       if (HasOpeningBrace)
352         return;
353       nextToken();
354       addUnwrappedLine();
355       break;
356     case tok::kw_default: {
357       unsigned StoredPosition = Tokens->getPosition();
358       FormatToken *Next;
359       do {
360         Next = Tokens->getNextToken();
361       } while (Next && Next->is(tok::comment));
362       FormatTok = Tokens->setPosition(StoredPosition);
363       if (Next && Next->isNot(tok::colon)) {
364         // default not followed by ':' is not a case label; treat it like
365         // an identifier.
366         parseStructuralElement();
367         break;
368       }
369       // Else, if it is 'default:', fall through to the case handling.
370       LLVM_FALLTHROUGH;
371     }
372     case tok::kw_case:
373       if (Style.Language == FormatStyle::LK_JavaScript &&
374           Line->MustBeDeclaration) {
375         // A 'case: string' style field declaration.
376         parseStructuralElement();
377         break;
378       }
379       if (!SwitchLabelEncountered &&
380           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
381         ++Line->Level;
382       SwitchLabelEncountered = true;
383       parseStructuralElement();
384       break;
385     default:
386       parseStructuralElement();
387       break;
388     }
389   } while (!eof());
390 }
391 
392 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
393   // We'll parse forward through the tokens until we hit
394   // a closing brace or eof - note that getNextToken() will
395   // parse macros, so this will magically work inside macro
396   // definitions, too.
397   unsigned StoredPosition = Tokens->getPosition();
398   FormatToken *Tok = FormatTok;
399   const FormatToken *PrevTok = Tok->Previous;
400   // Keep a stack of positions of lbrace tokens. We will
401   // update information about whether an lbrace starts a
402   // braced init list or a different block during the loop.
403   SmallVector<FormatToken *, 8> LBraceStack;
404   assert(Tok->Tok.is(tok::l_brace));
405   do {
406     // Get next non-comment token.
407     FormatToken *NextTok;
408     unsigned ReadTokens = 0;
409     do {
410       NextTok = Tokens->getNextToken();
411       ++ReadTokens;
412     } while (NextTok->is(tok::comment));
413 
414     switch (Tok->Tok.getKind()) {
415     case tok::l_brace:
416       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
417         if (PrevTok->isOneOf(tok::colon, tok::less))
418           // A ':' indicates this code is in a type, or a braced list
419           // following a label in an object literal ({a: {b: 1}}).
420           // A '<' could be an object used in a comparison, but that is nonsense
421           // code (can never return true), so more likely it is a generic type
422           // argument (`X<{a: string; b: number}>`).
423           // The code below could be confused by semicolons between the
424           // individual members in a type member list, which would normally
425           // trigger BK_Block. In both cases, this must be parsed as an inline
426           // braced init.
427           Tok->BlockKind = BK_BracedInit;
428         else if (PrevTok->is(tok::r_paren))
429           // `) { }` can only occur in function or method declarations in JS.
430           Tok->BlockKind = BK_Block;
431       } else {
432         Tok->BlockKind = BK_Unknown;
433       }
434       LBraceStack.push_back(Tok);
435       break;
436     case tok::r_brace:
437       if (LBraceStack.empty())
438         break;
439       if (LBraceStack.back()->BlockKind == BK_Unknown) {
440         bool ProbablyBracedList = false;
441         if (Style.Language == FormatStyle::LK_Proto) {
442           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
443         } else {
444           // Using OriginalColumn to distinguish between ObjC methods and
445           // binary operators is a bit hacky.
446           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
447                                   NextTok->OriginalColumn == 0;
448 
449           // If there is a comma, semicolon or right paren after the closing
450           // brace, we assume this is a braced initializer list.  Note that
451           // regardless how we mark inner braces here, we will overwrite the
452           // BlockKind later if we parse a braced list (where all blocks
453           // inside are by default braced lists), or when we explicitly detect
454           // blocks (for example while parsing lambdas).
455           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
456           // braced list in JS.
457           ProbablyBracedList =
458               (Style.Language == FormatStyle::LK_JavaScript &&
459                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
460                                 Keywords.kw_as)) ||
461               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
462               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
463                                tok::r_paren, tok::r_square, tok::l_brace,
464                                tok::ellipsis) ||
465               (NextTok->is(tok::identifier) &&
466                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
467               (NextTok->is(tok::semi) &&
468                (!ExpectClassBody || LBraceStack.size() != 1)) ||
469               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
470           if (NextTok->is(tok::l_square)) {
471             // We can have an array subscript after a braced init
472             // list, but C++11 attributes are expected after blocks.
473             NextTok = Tokens->getNextToken();
474             ++ReadTokens;
475             ProbablyBracedList = NextTok->isNot(tok::l_square);
476           }
477         }
478         if (ProbablyBracedList) {
479           Tok->BlockKind = BK_BracedInit;
480           LBraceStack.back()->BlockKind = BK_BracedInit;
481         } else {
482           Tok->BlockKind = BK_Block;
483           LBraceStack.back()->BlockKind = BK_Block;
484         }
485       }
486       LBraceStack.pop_back();
487       break;
488     case tok::identifier:
489       if (!Tok->is(TT_StatementMacro))
490         break;
491       LLVM_FALLTHROUGH;
492     case tok::at:
493     case tok::semi:
494     case tok::kw_if:
495     case tok::kw_while:
496     case tok::kw_for:
497     case tok::kw_switch:
498     case tok::kw_try:
499     case tok::kw___try:
500       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
501         LBraceStack.back()->BlockKind = BK_Block;
502       break;
503     default:
504       break;
505     }
506     PrevTok = Tok;
507     Tok = NextTok;
508   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
509 
510   // Assume other blocks for all unclosed opening braces.
511   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
512     if (LBraceStack[i]->BlockKind == BK_Unknown)
513       LBraceStack[i]->BlockKind = BK_Block;
514   }
515 
516   FormatTok = Tokens->setPosition(StoredPosition);
517 }
518 
519 template <class T>
520 static inline void hash_combine(std::size_t &seed, const T &v) {
521   std::hash<T> hasher;
522   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
523 }
524 
525 size_t UnwrappedLineParser::computePPHash() const {
526   size_t h = 0;
527   for (const auto &i : PPStack) {
528     hash_combine(h, size_t(i.Kind));
529     hash_combine(h, i.Line);
530   }
531   return h;
532 }
533 
534 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
535                                      bool MunchSemi) {
536   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
537          "'{' or macro block token expected");
538   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
539   FormatTok->BlockKind = BK_Block;
540 
541   size_t PPStartHash = computePPHash();
542 
543   unsigned InitialLevel = Line->Level;
544   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
545 
546   if (MacroBlock && FormatTok->is(tok::l_paren))
547     parseParens();
548 
549   size_t NbPreprocessorDirectives =
550       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
551   addUnwrappedLine();
552   size_t OpeningLineIndex =
553       CurrentLines->empty()
554           ? (UnwrappedLine::kInvalidIndex)
555           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
556 
557   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
558                                           MustBeDeclaration);
559   if (AddLevel)
560     ++Line->Level;
561   parseLevel(/*HasOpeningBrace=*/true);
562 
563   if (eof())
564     return;
565 
566   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
567                  : !FormatTok->is(tok::r_brace)) {
568     Line->Level = InitialLevel;
569     FormatTok->BlockKind = BK_Block;
570     return;
571   }
572 
573   size_t PPEndHash = computePPHash();
574 
575   // Munch the closing brace.
576   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
577 
578   if (MacroBlock && FormatTok->is(tok::l_paren))
579     parseParens();
580 
581   if (MunchSemi && FormatTok->Tok.is(tok::semi))
582     nextToken();
583   Line->Level = InitialLevel;
584 
585   if (PPStartHash == PPEndHash) {
586     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
587     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
588       // Update the opening line to add the forward reference as well
589       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
590           CurrentLines->size() - 1;
591     }
592   }
593 }
594 
595 static bool isGoogScope(const UnwrappedLine &Line) {
596   // FIXME: Closure-library specific stuff should not be hard-coded but be
597   // configurable.
598   if (Line.Tokens.size() < 4)
599     return false;
600   auto I = Line.Tokens.begin();
601   if (I->Tok->TokenText != "goog")
602     return false;
603   ++I;
604   if (I->Tok->isNot(tok::period))
605     return false;
606   ++I;
607   if (I->Tok->TokenText != "scope")
608     return false;
609   ++I;
610   return I->Tok->is(tok::l_paren);
611 }
612 
613 static bool isIIFE(const UnwrappedLine &Line,
614                    const AdditionalKeywords &Keywords) {
615   // Look for the start of an immediately invoked anonymous function.
616   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
617   // This is commonly done in JavaScript to create a new, anonymous scope.
618   // Example: (function() { ... })()
619   if (Line.Tokens.size() < 3)
620     return false;
621   auto I = Line.Tokens.begin();
622   if (I->Tok->isNot(tok::l_paren))
623     return false;
624   ++I;
625   if (I->Tok->isNot(Keywords.kw_function))
626     return false;
627   ++I;
628   return I->Tok->is(tok::l_paren);
629 }
630 
631 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
632                                    const FormatToken &InitialToken) {
633   if (InitialToken.is(tok::kw_namespace))
634     return Style.BraceWrapping.AfterNamespace;
635   if (InitialToken.is(tok::kw_class))
636     return Style.BraceWrapping.AfterClass;
637   if (InitialToken.is(tok::kw_union))
638     return Style.BraceWrapping.AfterUnion;
639   if (InitialToken.is(tok::kw_struct))
640     return Style.BraceWrapping.AfterStruct;
641   return false;
642 }
643 
644 void UnwrappedLineParser::parseChildBlock() {
645   FormatTok->BlockKind = BK_Block;
646   nextToken();
647   {
648     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
649                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
650     ScopedLineState LineState(*this);
651     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
652                                             /*MustBeDeclaration=*/false);
653     Line->Level += SkipIndent ? 0 : 1;
654     parseLevel(/*HasOpeningBrace=*/true);
655     flushComments(isOnNewLine(*FormatTok));
656     Line->Level -= SkipIndent ? 0 : 1;
657   }
658   nextToken();
659 }
660 
661 void UnwrappedLineParser::parsePPDirective() {
662   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
663   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
664 
665   nextToken();
666 
667   if (!FormatTok->Tok.getIdentifierInfo()) {
668     parsePPUnknown();
669     return;
670   }
671 
672   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
673   case tok::pp_define:
674     parsePPDefine();
675     return;
676   case tok::pp_if:
677     parsePPIf(/*IfDef=*/false);
678     break;
679   case tok::pp_ifdef:
680   case tok::pp_ifndef:
681     parsePPIf(/*IfDef=*/true);
682     break;
683   case tok::pp_else:
684     parsePPElse();
685     break;
686   case tok::pp_elif:
687     parsePPElIf();
688     break;
689   case tok::pp_endif:
690     parsePPEndIf();
691     break;
692   default:
693     parsePPUnknown();
694     break;
695   }
696 }
697 
698 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
699   size_t Line = CurrentLines->size();
700   if (CurrentLines == &PreprocessorDirectives)
701     Line += Lines.size();
702 
703   if (Unreachable ||
704       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
705     PPStack.push_back({PP_Unreachable, Line});
706   else
707     PPStack.push_back({PP_Conditional, Line});
708 }
709 
710 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
711   ++PPBranchLevel;
712   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
713   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
714     PPLevelBranchIndex.push_back(0);
715     PPLevelBranchCount.push_back(0);
716   }
717   PPChainBranchIndex.push(0);
718   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
719   conditionalCompilationCondition(Unreachable || Skip);
720 }
721 
722 void UnwrappedLineParser::conditionalCompilationAlternative() {
723   if (!PPStack.empty())
724     PPStack.pop_back();
725   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
726   if (!PPChainBranchIndex.empty())
727     ++PPChainBranchIndex.top();
728   conditionalCompilationCondition(
729       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
730       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
731 }
732 
733 void UnwrappedLineParser::conditionalCompilationEnd() {
734   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
735   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
736     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
737       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
738     }
739   }
740   // Guard against #endif's without #if.
741   if (PPBranchLevel > -1)
742     --PPBranchLevel;
743   if (!PPChainBranchIndex.empty())
744     PPChainBranchIndex.pop();
745   if (!PPStack.empty())
746     PPStack.pop_back();
747 }
748 
749 void UnwrappedLineParser::parsePPIf(bool IfDef) {
750   bool IfNDef = FormatTok->is(tok::pp_ifndef);
751   nextToken();
752   bool Unreachable = false;
753   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
754     Unreachable = true;
755   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
756     Unreachable = true;
757   conditionalCompilationStart(Unreachable);
758   FormatToken *IfCondition = FormatTok;
759   // If there's a #ifndef on the first line, and the only lines before it are
760   // comments, it could be an include guard.
761   bool MaybeIncludeGuard = IfNDef;
762   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
763     for (auto &Line : Lines) {
764       if (!Line.Tokens.front().Tok->is(tok::comment)) {
765         MaybeIncludeGuard = false;
766         IncludeGuard = IG_Rejected;
767         break;
768       }
769     }
770   --PPBranchLevel;
771   parsePPUnknown();
772   ++PPBranchLevel;
773   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
774     IncludeGuard = IG_IfNdefed;
775     IncludeGuardToken = IfCondition;
776   }
777 }
778 
779 void UnwrappedLineParser::parsePPElse() {
780   // If a potential include guard has an #else, it's not an include guard.
781   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
782     IncludeGuard = IG_Rejected;
783   conditionalCompilationAlternative();
784   if (PPBranchLevel > -1)
785     --PPBranchLevel;
786   parsePPUnknown();
787   ++PPBranchLevel;
788 }
789 
790 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
791 
792 void UnwrappedLineParser::parsePPEndIf() {
793   conditionalCompilationEnd();
794   parsePPUnknown();
795   // If the #endif of a potential include guard is the last thing in the file,
796   // then we found an include guard.
797   unsigned TokenPosition = Tokens->getPosition();
798   FormatToken *PeekNext = AllTokens[TokenPosition];
799   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
800       PeekNext->is(tok::eof) &&
801       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
802     IncludeGuard = IG_Found;
803 }
804 
805 void UnwrappedLineParser::parsePPDefine() {
806   nextToken();
807 
808   if (!FormatTok->Tok.getIdentifierInfo()) {
809     IncludeGuard = IG_Rejected;
810     IncludeGuardToken = nullptr;
811     parsePPUnknown();
812     return;
813   }
814 
815   if (IncludeGuard == IG_IfNdefed &&
816       IncludeGuardToken->TokenText == FormatTok->TokenText) {
817     IncludeGuard = IG_Defined;
818     IncludeGuardToken = nullptr;
819     for (auto &Line : Lines) {
820       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
821         IncludeGuard = IG_Rejected;
822         break;
823       }
824     }
825   }
826 
827   nextToken();
828   if (FormatTok->Tok.getKind() == tok::l_paren &&
829       FormatTok->WhitespaceRange.getBegin() ==
830           FormatTok->WhitespaceRange.getEnd()) {
831     parseParens();
832   }
833   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
834     Line->Level += PPBranchLevel + 1;
835   addUnwrappedLine();
836   ++Line->Level;
837 
838   // Errors during a preprocessor directive can only affect the layout of the
839   // preprocessor directive, and thus we ignore them. An alternative approach
840   // would be to use the same approach we use on the file level (no
841   // re-indentation if there was a structural error) within the macro
842   // definition.
843   parseFile();
844 }
845 
846 void UnwrappedLineParser::parsePPUnknown() {
847   do {
848     nextToken();
849   } while (!eof());
850   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
851     Line->Level += PPBranchLevel + 1;
852   addUnwrappedLine();
853 }
854 
855 // Here we blacklist certain tokens that are not usually the first token in an
856 // unwrapped line. This is used in attempt to distinguish macro calls without
857 // trailing semicolons from other constructs split to several lines.
858 static bool tokenCanStartNewLine(const clang::Token &Tok) {
859   // Semicolon can be a null-statement, l_square can be a start of a macro or
860   // a C++11 attribute, but this doesn't seem to be common.
861   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
862          Tok.isNot(tok::l_square) &&
863          // Tokens that can only be used as binary operators and a part of
864          // overloaded operator names.
865          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
866          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
867          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
868          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
869          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
870          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
871          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
872          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
873          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
874          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
875          Tok.isNot(tok::lesslessequal) &&
876          // Colon is used in labels, base class lists, initializer lists,
877          // range-based for loops, ternary operator, but should never be the
878          // first token in an unwrapped line.
879          Tok.isNot(tok::colon) &&
880          // 'noexcept' is a trailing annotation.
881          Tok.isNot(tok::kw_noexcept);
882 }
883 
884 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
885                           const FormatToken *FormatTok) {
886   // FIXME: This returns true for C/C++ keywords like 'struct'.
887   return FormatTok->is(tok::identifier) &&
888          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
889           !FormatTok->isOneOf(
890               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
891               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
892               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
893               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
894               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
895               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
896               Keywords.kw_from));
897 }
898 
899 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
900                                  const FormatToken *FormatTok) {
901   return FormatTok->Tok.isLiteral() ||
902          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
903          mustBeJSIdent(Keywords, FormatTok);
904 }
905 
906 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
907 // when encountered after a value (see mustBeJSIdentOrValue).
908 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
909                            const FormatToken *FormatTok) {
910   return FormatTok->isOneOf(
911       tok::kw_return, Keywords.kw_yield,
912       // conditionals
913       tok::kw_if, tok::kw_else,
914       // loops
915       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
916       // switch/case
917       tok::kw_switch, tok::kw_case,
918       // exceptions
919       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
920       // declaration
921       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
922       Keywords.kw_async, Keywords.kw_function,
923       // import/export
924       Keywords.kw_import, tok::kw_export);
925 }
926 
927 // readTokenWithJavaScriptASI reads the next token and terminates the current
928 // line if JavaScript Automatic Semicolon Insertion must
929 // happen between the current token and the next token.
930 //
931 // This method is conservative - it cannot cover all edge cases of JavaScript,
932 // but only aims to correctly handle certain well known cases. It *must not*
933 // return true in speculative cases.
934 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
935   FormatToken *Previous = FormatTok;
936   readToken();
937   FormatToken *Next = FormatTok;
938 
939   bool IsOnSameLine =
940       CommentsBeforeNextToken.empty()
941           ? Next->NewlinesBefore == 0
942           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
943   if (IsOnSameLine)
944     return;
945 
946   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
947   bool PreviousStartsTemplateExpr =
948       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
949   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
950     // If the line contains an '@' sign, the previous token might be an
951     // annotation, which can precede another identifier/value.
952     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
953                               [](UnwrappedLineNode &LineNode) {
954                                 return LineNode.Tok->is(tok::at);
955                               }) != Line->Tokens.end();
956     if (HasAt)
957       return;
958   }
959   if (Next->is(tok::exclaim) && PreviousMustBeValue)
960     return addUnwrappedLine();
961   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
962   bool NextEndsTemplateExpr =
963       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
964   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
965       (PreviousMustBeValue ||
966        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
967                          tok::minusminus)))
968     return addUnwrappedLine();
969   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
970       isJSDeclOrStmt(Keywords, Next))
971     return addUnwrappedLine();
972 }
973 
974 void UnwrappedLineParser::parseStructuralElement() {
975   assert(!FormatTok->is(tok::l_brace));
976   if (Style.Language == FormatStyle::LK_TableGen &&
977       FormatTok->is(tok::pp_include)) {
978     nextToken();
979     if (FormatTok->is(tok::string_literal))
980       nextToken();
981     addUnwrappedLine();
982     return;
983   }
984   switch (FormatTok->Tok.getKind()) {
985   case tok::kw_asm:
986     nextToken();
987     if (FormatTok->is(tok::l_brace)) {
988       FormatTok->Type = TT_InlineASMBrace;
989       nextToken();
990       while (FormatTok && FormatTok->isNot(tok::eof)) {
991         if (FormatTok->is(tok::r_brace)) {
992           FormatTok->Type = TT_InlineASMBrace;
993           nextToken();
994           addUnwrappedLine();
995           break;
996         }
997         FormatTok->Finalized = true;
998         nextToken();
999       }
1000     }
1001     break;
1002   case tok::kw_namespace:
1003     parseNamespace();
1004     return;
1005   case tok::kw_public:
1006   case tok::kw_protected:
1007   case tok::kw_private:
1008     if (Style.Language == FormatStyle::LK_Java ||
1009         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1010       nextToken();
1011     else
1012       parseAccessSpecifier();
1013     return;
1014   case tok::kw_if:
1015     parseIfThenElse();
1016     return;
1017   case tok::kw_for:
1018   case tok::kw_while:
1019     parseForOrWhileLoop();
1020     return;
1021   case tok::kw_do:
1022     parseDoWhile();
1023     return;
1024   case tok::kw_switch:
1025     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1026       // 'switch: string' field declaration.
1027       break;
1028     parseSwitch();
1029     return;
1030   case tok::kw_default:
1031     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1032       // 'default: string' field declaration.
1033       break;
1034     nextToken();
1035     if (FormatTok->is(tok::colon)) {
1036       parseLabel();
1037       return;
1038     }
1039     // e.g. "default void f() {}" in a Java interface.
1040     break;
1041   case tok::kw_case:
1042     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1043       // 'case: string' field declaration.
1044       break;
1045     parseCaseLabel();
1046     return;
1047   case tok::kw_try:
1048   case tok::kw___try:
1049     parseTryCatch();
1050     return;
1051   case tok::kw_extern:
1052     nextToken();
1053     if (FormatTok->Tok.is(tok::string_literal)) {
1054       nextToken();
1055       if (FormatTok->Tok.is(tok::l_brace)) {
1056         if (Style.BraceWrapping.AfterExternBlock) {
1057           addUnwrappedLine();
1058           parseBlock(/*MustBeDeclaration=*/true);
1059         } else {
1060           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1061         }
1062         addUnwrappedLine();
1063         return;
1064       }
1065     }
1066     break;
1067   case tok::kw_export:
1068     if (Style.Language == FormatStyle::LK_JavaScript) {
1069       parseJavaScriptEs6ImportExport();
1070       return;
1071     }
1072     if (!Style.isCpp())
1073       break;
1074     // Handle C++ "(inline|export) namespace".
1075     LLVM_FALLTHROUGH;
1076   case tok::kw_inline:
1077     nextToken();
1078     if (FormatTok->Tok.is(tok::kw_namespace)) {
1079       parseNamespace();
1080       return;
1081     }
1082     break;
1083   case tok::identifier:
1084     if (FormatTok->is(TT_ForEachMacro)) {
1085       parseForOrWhileLoop();
1086       return;
1087     }
1088     if (FormatTok->is(TT_MacroBlockBegin)) {
1089       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1090                  /*MunchSemi=*/false);
1091       return;
1092     }
1093     if (FormatTok->is(Keywords.kw_import)) {
1094       if (Style.Language == FormatStyle::LK_JavaScript) {
1095         parseJavaScriptEs6ImportExport();
1096         return;
1097       }
1098       if (Style.Language == FormatStyle::LK_Proto) {
1099         nextToken();
1100         if (FormatTok->is(tok::kw_public))
1101           nextToken();
1102         if (!FormatTok->is(tok::string_literal))
1103           return;
1104         nextToken();
1105         if (FormatTok->is(tok::semi))
1106           nextToken();
1107         addUnwrappedLine();
1108         return;
1109       }
1110     }
1111     if (Style.isCpp() &&
1112         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1113                            Keywords.kw_slots, Keywords.kw_qslots)) {
1114       nextToken();
1115       if (FormatTok->is(tok::colon)) {
1116         nextToken();
1117         addUnwrappedLine();
1118         return;
1119       }
1120     }
1121     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1122       parseStatementMacro();
1123       return;
1124     }
1125     // In all other cases, parse the declaration.
1126     break;
1127   default:
1128     break;
1129   }
1130   do {
1131     const FormatToken *Previous = FormatTok->Previous;
1132     switch (FormatTok->Tok.getKind()) {
1133     case tok::at:
1134       nextToken();
1135       if (FormatTok->Tok.is(tok::l_brace)) {
1136         nextToken();
1137         parseBracedList();
1138         break;
1139       } else if (Style.Language == FormatStyle::LK_Java &&
1140                  FormatTok->is(Keywords.kw_interface)) {
1141         nextToken();
1142         break;
1143       }
1144       switch (FormatTok->Tok.getObjCKeywordID()) {
1145       case tok::objc_public:
1146       case tok::objc_protected:
1147       case tok::objc_package:
1148       case tok::objc_private:
1149         return parseAccessSpecifier();
1150       case tok::objc_interface:
1151       case tok::objc_implementation:
1152         return parseObjCInterfaceOrImplementation();
1153       case tok::objc_protocol:
1154         if (parseObjCProtocol())
1155           return;
1156         break;
1157       case tok::objc_end:
1158         return; // Handled by the caller.
1159       case tok::objc_optional:
1160       case tok::objc_required:
1161         nextToken();
1162         addUnwrappedLine();
1163         return;
1164       case tok::objc_autoreleasepool:
1165         nextToken();
1166         if (FormatTok->Tok.is(tok::l_brace)) {
1167           if (Style.BraceWrapping.AfterControlStatement)
1168             addUnwrappedLine();
1169           parseBlock(/*MustBeDeclaration=*/false);
1170         }
1171         addUnwrappedLine();
1172         return;
1173       case tok::objc_synchronized:
1174         nextToken();
1175         if (FormatTok->Tok.is(tok::l_paren))
1176           // Skip synchronization object
1177           parseParens();
1178         if (FormatTok->Tok.is(tok::l_brace)) {
1179           if (Style.BraceWrapping.AfterControlStatement)
1180             addUnwrappedLine();
1181           parseBlock(/*MustBeDeclaration=*/false);
1182         }
1183         addUnwrappedLine();
1184         return;
1185       case tok::objc_try:
1186         // This branch isn't strictly necessary (the kw_try case below would
1187         // do this too after the tok::at is parsed above).  But be explicit.
1188         parseTryCatch();
1189         return;
1190       default:
1191         break;
1192       }
1193       break;
1194     case tok::kw_enum:
1195       // Ignore if this is part of "template <enum ...".
1196       if (Previous && Previous->is(tok::less)) {
1197         nextToken();
1198         break;
1199       }
1200 
1201       // parseEnum falls through and does not yet add an unwrapped line as an
1202       // enum definition can start a structural element.
1203       if (!parseEnum())
1204         break;
1205       // This only applies for C++.
1206       if (!Style.isCpp()) {
1207         addUnwrappedLine();
1208         return;
1209       }
1210       break;
1211     case tok::kw_typedef:
1212       nextToken();
1213       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1214                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1215         parseEnum();
1216       break;
1217     case tok::kw_struct:
1218     case tok::kw_union:
1219     case tok::kw_class:
1220       // parseRecord falls through and does not yet add an unwrapped line as a
1221       // record declaration or definition can start a structural element.
1222       parseRecord();
1223       // This does not apply for Java, JavaScript and C#.
1224       if (Style.Language == FormatStyle::LK_Java ||
1225           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1226         if (FormatTok->is(tok::semi))
1227           nextToken();
1228         addUnwrappedLine();
1229         return;
1230       }
1231       break;
1232     case tok::period:
1233       nextToken();
1234       // In Java, classes have an implicit static member "class".
1235       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1236           FormatTok->is(tok::kw_class))
1237         nextToken();
1238       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1239           FormatTok->Tok.getIdentifierInfo())
1240         // JavaScript only has pseudo keywords, all keywords are allowed to
1241         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1242         nextToken();
1243       break;
1244     case tok::semi:
1245       nextToken();
1246       addUnwrappedLine();
1247       return;
1248     case tok::r_brace:
1249       addUnwrappedLine();
1250       return;
1251     case tok::l_paren:
1252       parseParens();
1253       break;
1254     case tok::kw_operator:
1255       nextToken();
1256       if (FormatTok->isBinaryOperator())
1257         nextToken();
1258       break;
1259     case tok::caret:
1260       nextToken();
1261       if (FormatTok->Tok.isAnyIdentifier() ||
1262           FormatTok->isSimpleTypeSpecifier())
1263         nextToken();
1264       if (FormatTok->is(tok::l_paren))
1265         parseParens();
1266       if (FormatTok->is(tok::l_brace))
1267         parseChildBlock();
1268       break;
1269     case tok::l_brace:
1270       if (!tryToParseBracedList()) {
1271         // A block outside of parentheses must be the last part of a
1272         // structural element.
1273         // FIXME: Figure out cases where this is not true, and add projections
1274         // for them (the one we know is missing are lambdas).
1275         if (Style.BraceWrapping.AfterFunction)
1276           addUnwrappedLine();
1277         FormatTok->Type = TT_FunctionLBrace;
1278         parseBlock(/*MustBeDeclaration=*/false);
1279         addUnwrappedLine();
1280         return;
1281       }
1282       // Otherwise this was a braced init list, and the structural
1283       // element continues.
1284       break;
1285     case tok::kw_try:
1286       // We arrive here when parsing function-try blocks.
1287       if (Style.BraceWrapping.AfterFunction)
1288         addUnwrappedLine();
1289       parseTryCatch();
1290       return;
1291     case tok::identifier: {
1292       if (FormatTok->is(TT_MacroBlockEnd)) {
1293         addUnwrappedLine();
1294         return;
1295       }
1296 
1297       // Function declarations (as opposed to function expressions) are parsed
1298       // on their own unwrapped line by continuing this loop. Function
1299       // expressions (functions that are not on their own line) must not create
1300       // a new unwrapped line, so they are special cased below.
1301       size_t TokenCount = Line->Tokens.size();
1302       if (Style.Language == FormatStyle::LK_JavaScript &&
1303           FormatTok->is(Keywords.kw_function) &&
1304           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1305                                                      Keywords.kw_async)))) {
1306         tryToParseJSFunction();
1307         break;
1308       }
1309       if ((Style.Language == FormatStyle::LK_JavaScript ||
1310            Style.Language == FormatStyle::LK_Java) &&
1311           FormatTok->is(Keywords.kw_interface)) {
1312         if (Style.Language == FormatStyle::LK_JavaScript) {
1313           // In JavaScript/TypeScript, "interface" can be used as a standalone
1314           // identifier, e.g. in `var interface = 1;`. If "interface" is
1315           // followed by another identifier, it is very like to be an actual
1316           // interface declaration.
1317           unsigned StoredPosition = Tokens->getPosition();
1318           FormatToken *Next = Tokens->getNextToken();
1319           FormatTok = Tokens->setPosition(StoredPosition);
1320           if (Next && !mustBeJSIdent(Keywords, Next)) {
1321             nextToken();
1322             break;
1323           }
1324         }
1325         parseRecord();
1326         addUnwrappedLine();
1327         return;
1328       }
1329 
1330       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1331         parseStatementMacro();
1332         return;
1333       }
1334 
1335       // See if the following token should start a new unwrapped line.
1336       StringRef Text = FormatTok->TokenText;
1337       nextToken();
1338 
1339       // JS doesn't have macros, and within classes colons indicate fields, not
1340       // labels.
1341       if (Style.Language == FormatStyle::LK_JavaScript)
1342         break;
1343 
1344       TokenCount = Line->Tokens.size();
1345       if (TokenCount == 1 ||
1346           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1347         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1348           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1349           parseLabel();
1350           return;
1351         }
1352         // Recognize function-like macro usages without trailing semicolon as
1353         // well as free-standing macros like Q_OBJECT.
1354         bool FunctionLike = FormatTok->is(tok::l_paren);
1355         if (FunctionLike)
1356           parseParens();
1357 
1358         bool FollowedByNewline =
1359             CommentsBeforeNextToken.empty()
1360                 ? FormatTok->NewlinesBefore > 0
1361                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1362 
1363         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1364             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1365           addUnwrappedLine();
1366           return;
1367         }
1368       }
1369       break;
1370     }
1371     case tok::equal:
1372       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1373       // TT_JsFatArrow. The always start an expression or a child block if
1374       // followed by a curly.
1375       if (FormatTok->is(TT_JsFatArrow)) {
1376         nextToken();
1377         if (FormatTok->is(tok::l_brace))
1378           parseChildBlock();
1379         break;
1380       }
1381 
1382       nextToken();
1383       if (FormatTok->Tok.is(tok::l_brace)) {
1384         nextToken();
1385         parseBracedList();
1386       } else if (Style.Language == FormatStyle::LK_Proto &&
1387                  FormatTok->Tok.is(tok::less)) {
1388         nextToken();
1389         parseBracedList(/*ContinueOnSemicolons=*/false,
1390                         /*ClosingBraceKind=*/tok::greater);
1391       }
1392       break;
1393     case tok::l_square:
1394       parseSquare();
1395       break;
1396     case tok::kw_new:
1397       parseNew();
1398       break;
1399     default:
1400       nextToken();
1401       break;
1402     }
1403   } while (!eof());
1404 }
1405 
1406 bool UnwrappedLineParser::tryToParseLambda() {
1407   if (!Style.isCpp()) {
1408     nextToken();
1409     return false;
1410   }
1411   assert(FormatTok->is(tok::l_square));
1412   FormatToken &LSquare = *FormatTok;
1413   if (!tryToParseLambdaIntroducer())
1414     return false;
1415 
1416   bool SeenArrow = false;
1417 
1418   while (FormatTok->isNot(tok::l_brace)) {
1419     if (FormatTok->isSimpleTypeSpecifier()) {
1420       nextToken();
1421       continue;
1422     }
1423     switch (FormatTok->Tok.getKind()) {
1424     case tok::l_brace:
1425       break;
1426     case tok::l_paren:
1427       parseParens();
1428       break;
1429     case tok::amp:
1430     case tok::star:
1431     case tok::kw_const:
1432     case tok::comma:
1433     case tok::less:
1434     case tok::greater:
1435     case tok::identifier:
1436     case tok::numeric_constant:
1437     case tok::coloncolon:
1438     case tok::kw_mutable:
1439     case tok::kw_noexcept:
1440       nextToken();
1441       break;
1442     // Specialization of a template with an integer parameter can contain
1443     // arithmetic, logical, comparison and ternary operators.
1444     //
1445     // FIXME: This also accepts sequences of operators that are not in the scope
1446     // of a template argument list.
1447     //
1448     // In a C++ lambda a template type can only occur after an arrow. We use
1449     // this as an heuristic to distinguish between Objective-C expressions
1450     // followed by an `a->b` expression, such as:
1451     // ([obj func:arg] + a->b)
1452     // Otherwise the code below would parse as a lambda.
1453     case tok::plus:
1454     case tok::minus:
1455     case tok::exclaim:
1456     case tok::tilde:
1457     case tok::slash:
1458     case tok::percent:
1459     case tok::lessless:
1460     case tok::pipe:
1461     case tok::pipepipe:
1462     case tok::ampamp:
1463     case tok::caret:
1464     case tok::equalequal:
1465     case tok::exclaimequal:
1466     case tok::greaterequal:
1467     case tok::lessequal:
1468     case tok::question:
1469     case tok::colon:
1470     case tok::kw_true:
1471     case tok::kw_false:
1472       if (SeenArrow) {
1473         nextToken();
1474         break;
1475       }
1476       return true;
1477     case tok::arrow:
1478       // This might or might not actually be a lambda arrow (this could be an
1479       // ObjC method invocation followed by a dereferencing arrow). We might
1480       // reset this back to TT_Unknown in TokenAnnotator.
1481       FormatTok->Type = TT_LambdaArrow;
1482       SeenArrow = true;
1483       nextToken();
1484       break;
1485     default:
1486       return true;
1487     }
1488   }
1489   FormatTok->Type = TT_LambdaLBrace;
1490   LSquare.Type = TT_LambdaLSquare;
1491   parseChildBlock();
1492   return true;
1493 }
1494 
1495 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1496   const FormatToken *Previous = FormatTok->Previous;
1497   if (Previous &&
1498       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1499                          tok::kw_delete, tok::l_square) ||
1500        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1501        Previous->isSimpleTypeSpecifier())) {
1502     nextToken();
1503     return false;
1504   }
1505   nextToken();
1506   if (FormatTok->is(tok::l_square)) {
1507     return false;
1508   }
1509   parseSquare(/*LambdaIntroducer=*/true);
1510   return true;
1511 }
1512 
1513 void UnwrappedLineParser::tryToParseJSFunction() {
1514   assert(FormatTok->is(Keywords.kw_function) ||
1515          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1516   if (FormatTok->is(Keywords.kw_async))
1517     nextToken();
1518   // Consume "function".
1519   nextToken();
1520 
1521   // Consume * (generator function). Treat it like C++'s overloaded operators.
1522   if (FormatTok->is(tok::star)) {
1523     FormatTok->Type = TT_OverloadedOperator;
1524     nextToken();
1525   }
1526 
1527   // Consume function name.
1528   if (FormatTok->is(tok::identifier))
1529     nextToken();
1530 
1531   if (FormatTok->isNot(tok::l_paren))
1532     return;
1533 
1534   // Parse formal parameter list.
1535   parseParens();
1536 
1537   if (FormatTok->is(tok::colon)) {
1538     // Parse a type definition.
1539     nextToken();
1540 
1541     // Eat the type declaration. For braced inline object types, balance braces,
1542     // otherwise just parse until finding an l_brace for the function body.
1543     if (FormatTok->is(tok::l_brace))
1544       tryToParseBracedList();
1545     else
1546       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1547         nextToken();
1548   }
1549 
1550   if (FormatTok->is(tok::semi))
1551     return;
1552 
1553   parseChildBlock();
1554 }
1555 
1556 bool UnwrappedLineParser::tryToParseBracedList() {
1557   if (FormatTok->BlockKind == BK_Unknown)
1558     calculateBraceTypes();
1559   assert(FormatTok->BlockKind != BK_Unknown);
1560   if (FormatTok->BlockKind == BK_Block)
1561     return false;
1562   nextToken();
1563   parseBracedList();
1564   return true;
1565 }
1566 
1567 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1568                                           tok::TokenKind ClosingBraceKind) {
1569   bool HasError = false;
1570 
1571   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1572   // replace this by using parseAssigmentExpression() inside.
1573   do {
1574     if (Style.Language == FormatStyle::LK_JavaScript) {
1575       if (FormatTok->is(Keywords.kw_function) ||
1576           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1577         tryToParseJSFunction();
1578         continue;
1579       }
1580       if (FormatTok->is(TT_JsFatArrow)) {
1581         nextToken();
1582         // Fat arrows can be followed by simple expressions or by child blocks
1583         // in curly braces.
1584         if (FormatTok->is(tok::l_brace)) {
1585           parseChildBlock();
1586           continue;
1587         }
1588       }
1589       if (FormatTok->is(tok::l_brace)) {
1590         // Could be a method inside of a braced list `{a() { return 1; }}`.
1591         if (tryToParseBracedList())
1592           continue;
1593         parseChildBlock();
1594       }
1595     }
1596     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1597       nextToken();
1598       return !HasError;
1599     }
1600     switch (FormatTok->Tok.getKind()) {
1601     case tok::caret:
1602       nextToken();
1603       if (FormatTok->is(tok::l_brace)) {
1604         parseChildBlock();
1605       }
1606       break;
1607     case tok::l_square:
1608       tryToParseLambda();
1609       break;
1610     case tok::l_paren:
1611       parseParens();
1612       // JavaScript can just have free standing methods and getters/setters in
1613       // object literals. Detect them by a "{" following ")".
1614       if (Style.Language == FormatStyle::LK_JavaScript) {
1615         if (FormatTok->is(tok::l_brace))
1616           parseChildBlock();
1617         break;
1618       }
1619       break;
1620     case tok::l_brace:
1621       // Assume there are no blocks inside a braced init list apart
1622       // from the ones we explicitly parse out (like lambdas).
1623       FormatTok->BlockKind = BK_BracedInit;
1624       nextToken();
1625       parseBracedList();
1626       break;
1627     case tok::less:
1628       if (Style.Language == FormatStyle::LK_Proto) {
1629         nextToken();
1630         parseBracedList(/*ContinueOnSemicolons=*/false,
1631                         /*ClosingBraceKind=*/tok::greater);
1632       } else {
1633         nextToken();
1634       }
1635       break;
1636     case tok::semi:
1637       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1638       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1639       // used for error recovery if we have otherwise determined that this is
1640       // a braced list.
1641       if (Style.Language == FormatStyle::LK_JavaScript) {
1642         nextToken();
1643         break;
1644       }
1645       HasError = true;
1646       if (!ContinueOnSemicolons)
1647         return !HasError;
1648       nextToken();
1649       break;
1650     case tok::comma:
1651       nextToken();
1652       break;
1653     default:
1654       nextToken();
1655       break;
1656     }
1657   } while (!eof());
1658   return false;
1659 }
1660 
1661 void UnwrappedLineParser::parseParens() {
1662   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1663   nextToken();
1664   do {
1665     switch (FormatTok->Tok.getKind()) {
1666     case tok::l_paren:
1667       parseParens();
1668       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1669         parseChildBlock();
1670       break;
1671     case tok::r_paren:
1672       nextToken();
1673       return;
1674     case tok::r_brace:
1675       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1676       return;
1677     case tok::l_square:
1678       tryToParseLambda();
1679       break;
1680     case tok::l_brace:
1681       if (!tryToParseBracedList())
1682         parseChildBlock();
1683       break;
1684     case tok::at:
1685       nextToken();
1686       if (FormatTok->Tok.is(tok::l_brace)) {
1687         nextToken();
1688         parseBracedList();
1689       }
1690       break;
1691     case tok::kw_class:
1692       if (Style.Language == FormatStyle::LK_JavaScript)
1693         parseRecord(/*ParseAsExpr=*/true);
1694       else
1695         nextToken();
1696       break;
1697     case tok::identifier:
1698       if (Style.Language == FormatStyle::LK_JavaScript &&
1699           (FormatTok->is(Keywords.kw_function) ||
1700            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1701         tryToParseJSFunction();
1702       else
1703         nextToken();
1704       break;
1705     default:
1706       nextToken();
1707       break;
1708     }
1709   } while (!eof());
1710 }
1711 
1712 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1713   if (!LambdaIntroducer) {
1714     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1715     if (tryToParseLambda())
1716       return;
1717   }
1718   do {
1719     switch (FormatTok->Tok.getKind()) {
1720     case tok::l_paren:
1721       parseParens();
1722       break;
1723     case tok::r_square:
1724       nextToken();
1725       return;
1726     case tok::r_brace:
1727       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1728       return;
1729     case tok::l_square:
1730       parseSquare();
1731       break;
1732     case tok::l_brace: {
1733       if (!tryToParseBracedList())
1734         parseChildBlock();
1735       break;
1736     }
1737     case tok::at:
1738       nextToken();
1739       if (FormatTok->Tok.is(tok::l_brace)) {
1740         nextToken();
1741         parseBracedList();
1742       }
1743       break;
1744     default:
1745       nextToken();
1746       break;
1747     }
1748   } while (!eof());
1749 }
1750 
1751 void UnwrappedLineParser::parseIfThenElse() {
1752   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1753   nextToken();
1754   if (FormatTok->Tok.is(tok::kw_constexpr))
1755     nextToken();
1756   if (FormatTok->Tok.is(tok::l_paren))
1757     parseParens();
1758   bool NeedsUnwrappedLine = false;
1759   if (FormatTok->Tok.is(tok::l_brace)) {
1760     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1761     parseBlock(/*MustBeDeclaration=*/false);
1762     if (Style.BraceWrapping.BeforeElse)
1763       addUnwrappedLine();
1764     else
1765       NeedsUnwrappedLine = true;
1766   } else {
1767     addUnwrappedLine();
1768     ++Line->Level;
1769     parseStructuralElement();
1770     --Line->Level;
1771   }
1772   if (FormatTok->Tok.is(tok::kw_else)) {
1773     nextToken();
1774     if (FormatTok->Tok.is(tok::l_brace)) {
1775       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1776       parseBlock(/*MustBeDeclaration=*/false);
1777       addUnwrappedLine();
1778     } else if (FormatTok->Tok.is(tok::kw_if)) {
1779       parseIfThenElse();
1780     } else {
1781       addUnwrappedLine();
1782       ++Line->Level;
1783       parseStructuralElement();
1784       if (FormatTok->is(tok::eof))
1785         addUnwrappedLine();
1786       --Line->Level;
1787     }
1788   } else if (NeedsUnwrappedLine) {
1789     addUnwrappedLine();
1790   }
1791 }
1792 
1793 void UnwrappedLineParser::parseTryCatch() {
1794   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1795   nextToken();
1796   bool NeedsUnwrappedLine = false;
1797   if (FormatTok->is(tok::colon)) {
1798     // We are in a function try block, what comes is an initializer list.
1799     nextToken();
1800     while (FormatTok->is(tok::identifier)) {
1801       nextToken();
1802       if (FormatTok->is(tok::l_paren))
1803         parseParens();
1804       if (FormatTok->is(tok::comma))
1805         nextToken();
1806     }
1807   }
1808   // Parse try with resource.
1809   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1810     parseParens();
1811   }
1812   if (FormatTok->is(tok::l_brace)) {
1813     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1814     parseBlock(/*MustBeDeclaration=*/false);
1815     if (Style.BraceWrapping.BeforeCatch) {
1816       addUnwrappedLine();
1817     } else {
1818       NeedsUnwrappedLine = true;
1819     }
1820   } else if (!FormatTok->is(tok::kw_catch)) {
1821     // The C++ standard requires a compound-statement after a try.
1822     // If there's none, we try to assume there's a structuralElement
1823     // and try to continue.
1824     addUnwrappedLine();
1825     ++Line->Level;
1826     parseStructuralElement();
1827     --Line->Level;
1828   }
1829   while (1) {
1830     if (FormatTok->is(tok::at))
1831       nextToken();
1832     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1833                              tok::kw___finally) ||
1834           ((Style.Language == FormatStyle::LK_Java ||
1835             Style.Language == FormatStyle::LK_JavaScript) &&
1836            FormatTok->is(Keywords.kw_finally)) ||
1837           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1838            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1839       break;
1840     nextToken();
1841     while (FormatTok->isNot(tok::l_brace)) {
1842       if (FormatTok->is(tok::l_paren)) {
1843         parseParens();
1844         continue;
1845       }
1846       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1847         return;
1848       nextToken();
1849     }
1850     NeedsUnwrappedLine = false;
1851     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1852     parseBlock(/*MustBeDeclaration=*/false);
1853     if (Style.BraceWrapping.BeforeCatch)
1854       addUnwrappedLine();
1855     else
1856       NeedsUnwrappedLine = true;
1857   }
1858   if (NeedsUnwrappedLine)
1859     addUnwrappedLine();
1860 }
1861 
1862 void UnwrappedLineParser::parseNamespace() {
1863   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1864 
1865   const FormatToken &InitialToken = *FormatTok;
1866   nextToken();
1867   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1868     nextToken();
1869   if (FormatTok->Tok.is(tok::l_brace)) {
1870     if (ShouldBreakBeforeBrace(Style, InitialToken))
1871       addUnwrappedLine();
1872 
1873     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1874                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1875                      DeclarationScopeStack.size() > 1);
1876     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1877     // Munch the semicolon after a namespace. This is more common than one would
1878     // think. Puttin the semicolon into its own line is very ugly.
1879     if (FormatTok->Tok.is(tok::semi))
1880       nextToken();
1881     addUnwrappedLine();
1882   }
1883   // FIXME: Add error handling.
1884 }
1885 
1886 void UnwrappedLineParser::parseNew() {
1887   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1888   nextToken();
1889   if (Style.Language != FormatStyle::LK_Java)
1890     return;
1891 
1892   // In Java, we can parse everything up to the parens, which aren't optional.
1893   do {
1894     // There should not be a ;, { or } before the new's open paren.
1895     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1896       return;
1897 
1898     // Consume the parens.
1899     if (FormatTok->is(tok::l_paren)) {
1900       parseParens();
1901 
1902       // If there is a class body of an anonymous class, consume that as child.
1903       if (FormatTok->is(tok::l_brace))
1904         parseChildBlock();
1905       return;
1906     }
1907     nextToken();
1908   } while (!eof());
1909 }
1910 
1911 void UnwrappedLineParser::parseForOrWhileLoop() {
1912   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1913          "'for', 'while' or foreach macro expected");
1914   nextToken();
1915   // JS' for await ( ...
1916   if (Style.Language == FormatStyle::LK_JavaScript &&
1917       FormatTok->is(Keywords.kw_await))
1918     nextToken();
1919   if (FormatTok->Tok.is(tok::l_paren))
1920     parseParens();
1921   if (FormatTok->Tok.is(tok::l_brace)) {
1922     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1923     parseBlock(/*MustBeDeclaration=*/false);
1924     addUnwrappedLine();
1925   } else {
1926     addUnwrappedLine();
1927     ++Line->Level;
1928     parseStructuralElement();
1929     --Line->Level;
1930   }
1931 }
1932 
1933 void UnwrappedLineParser::parseDoWhile() {
1934   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1935   nextToken();
1936   if (FormatTok->Tok.is(tok::l_brace)) {
1937     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1938     parseBlock(/*MustBeDeclaration=*/false);
1939     if (Style.BraceWrapping.IndentBraces)
1940       addUnwrappedLine();
1941   } else {
1942     addUnwrappedLine();
1943     ++Line->Level;
1944     parseStructuralElement();
1945     --Line->Level;
1946   }
1947 
1948   // FIXME: Add error handling.
1949   if (!FormatTok->Tok.is(tok::kw_while)) {
1950     addUnwrappedLine();
1951     return;
1952   }
1953 
1954   nextToken();
1955   parseStructuralElement();
1956 }
1957 
1958 void UnwrappedLineParser::parseLabel() {
1959   nextToken();
1960   unsigned OldLineLevel = Line->Level;
1961   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1962     --Line->Level;
1963   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1964     CompoundStatementIndenter Indenter(this, Line->Level,
1965                                        Style.BraceWrapping.AfterCaseLabel,
1966                                        Style.BraceWrapping.IndentBraces);
1967     parseBlock(/*MustBeDeclaration=*/false);
1968     if (FormatTok->Tok.is(tok::kw_break)) {
1969       if (Style.BraceWrapping.AfterControlStatement)
1970         addUnwrappedLine();
1971       parseStructuralElement();
1972     }
1973     addUnwrappedLine();
1974   } else {
1975     if (FormatTok->is(tok::semi))
1976       nextToken();
1977     addUnwrappedLine();
1978   }
1979   Line->Level = OldLineLevel;
1980   if (FormatTok->isNot(tok::l_brace)) {
1981     parseStructuralElement();
1982     addUnwrappedLine();
1983   }
1984 }
1985 
1986 void UnwrappedLineParser::parseCaseLabel() {
1987   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1988   // FIXME: fix handling of complex expressions here.
1989   do {
1990     nextToken();
1991   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1992   parseLabel();
1993 }
1994 
1995 void UnwrappedLineParser::parseSwitch() {
1996   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1997   nextToken();
1998   if (FormatTok->Tok.is(tok::l_paren))
1999     parseParens();
2000   if (FormatTok->Tok.is(tok::l_brace)) {
2001     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2002     parseBlock(/*MustBeDeclaration=*/false);
2003     addUnwrappedLine();
2004   } else {
2005     addUnwrappedLine();
2006     ++Line->Level;
2007     parseStructuralElement();
2008     --Line->Level;
2009   }
2010 }
2011 
2012 void UnwrappedLineParser::parseAccessSpecifier() {
2013   nextToken();
2014   // Understand Qt's slots.
2015   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2016     nextToken();
2017   // Otherwise, we don't know what it is, and we'd better keep the next token.
2018   if (FormatTok->Tok.is(tok::colon))
2019     nextToken();
2020   addUnwrappedLine();
2021 }
2022 
2023 bool UnwrappedLineParser::parseEnum() {
2024   // Won't be 'enum' for NS_ENUMs.
2025   if (FormatTok->Tok.is(tok::kw_enum))
2026     nextToken();
2027 
2028   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2029   // declarations. An "enum" keyword followed by a colon would be a syntax
2030   // error and thus assume it is just an identifier.
2031   if (Style.Language == FormatStyle::LK_JavaScript &&
2032       FormatTok->isOneOf(tok::colon, tok::question))
2033     return false;
2034 
2035   // In protobuf, "enum" can be used as a field name.
2036   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2037     return false;
2038 
2039   // Eat up enum class ...
2040   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2041     nextToken();
2042 
2043   while (FormatTok->Tok.getIdentifierInfo() ||
2044          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2045                             tok::greater, tok::comma, tok::question)) {
2046     nextToken();
2047     // We can have macros or attributes in between 'enum' and the enum name.
2048     if (FormatTok->is(tok::l_paren))
2049       parseParens();
2050     if (FormatTok->is(tok::identifier)) {
2051       nextToken();
2052       // If there are two identifiers in a row, this is likely an elaborate
2053       // return type. In Java, this can be "implements", etc.
2054       if (Style.isCpp() && FormatTok->is(tok::identifier))
2055         return false;
2056     }
2057   }
2058 
2059   // Just a declaration or something is wrong.
2060   if (FormatTok->isNot(tok::l_brace))
2061     return true;
2062   FormatTok->BlockKind = BK_Block;
2063 
2064   if (Style.Language == FormatStyle::LK_Java) {
2065     // Java enums are different.
2066     parseJavaEnumBody();
2067     return true;
2068   }
2069   if (Style.Language == FormatStyle::LK_Proto) {
2070     parseBlock(/*MustBeDeclaration=*/true);
2071     return true;
2072   }
2073 
2074   // Parse enum body.
2075   nextToken();
2076   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2077   if (HasError) {
2078     if (FormatTok->is(tok::semi))
2079       nextToken();
2080     addUnwrappedLine();
2081   }
2082   return true;
2083 
2084   // There is no addUnwrappedLine() here so that we fall through to parsing a
2085   // structural element afterwards. Thus, in "enum A {} n, m;",
2086   // "} n, m;" will end up in one unwrapped line.
2087 }
2088 
2089 void UnwrappedLineParser::parseJavaEnumBody() {
2090   // Determine whether the enum is simple, i.e. does not have a semicolon or
2091   // constants with class bodies. Simple enums can be formatted like braced
2092   // lists, contracted to a single line, etc.
2093   unsigned StoredPosition = Tokens->getPosition();
2094   bool IsSimple = true;
2095   FormatToken *Tok = Tokens->getNextToken();
2096   while (Tok) {
2097     if (Tok->is(tok::r_brace))
2098       break;
2099     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2100       IsSimple = false;
2101       break;
2102     }
2103     // FIXME: This will also mark enums with braces in the arguments to enum
2104     // constants as "not simple". This is probably fine in practice, though.
2105     Tok = Tokens->getNextToken();
2106   }
2107   FormatTok = Tokens->setPosition(StoredPosition);
2108 
2109   if (IsSimple) {
2110     nextToken();
2111     parseBracedList();
2112     addUnwrappedLine();
2113     return;
2114   }
2115 
2116   // Parse the body of a more complex enum.
2117   // First add a line for everything up to the "{".
2118   nextToken();
2119   addUnwrappedLine();
2120   ++Line->Level;
2121 
2122   // Parse the enum constants.
2123   while (FormatTok) {
2124     if (FormatTok->is(tok::l_brace)) {
2125       // Parse the constant's class body.
2126       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2127                  /*MunchSemi=*/false);
2128     } else if (FormatTok->is(tok::l_paren)) {
2129       parseParens();
2130     } else if (FormatTok->is(tok::comma)) {
2131       nextToken();
2132       addUnwrappedLine();
2133     } else if (FormatTok->is(tok::semi)) {
2134       nextToken();
2135       addUnwrappedLine();
2136       break;
2137     } else if (FormatTok->is(tok::r_brace)) {
2138       addUnwrappedLine();
2139       break;
2140     } else {
2141       nextToken();
2142     }
2143   }
2144 
2145   // Parse the class body after the enum's ";" if any.
2146   parseLevel(/*HasOpeningBrace=*/true);
2147   nextToken();
2148   --Line->Level;
2149   addUnwrappedLine();
2150 }
2151 
2152 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2153   const FormatToken &InitialToken = *FormatTok;
2154   nextToken();
2155 
2156   // The actual identifier can be a nested name specifier, and in macros
2157   // it is often token-pasted.
2158   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2159                             tok::kw___attribute, tok::kw___declspec,
2160                             tok::kw_alignas) ||
2161          ((Style.Language == FormatStyle::LK_Java ||
2162            Style.Language == FormatStyle::LK_JavaScript) &&
2163           FormatTok->isOneOf(tok::period, tok::comma))) {
2164     if (Style.Language == FormatStyle::LK_JavaScript &&
2165         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2166       // JavaScript/TypeScript supports inline object types in
2167       // extends/implements positions:
2168       //     class Foo implements {bar: number} { }
2169       nextToken();
2170       if (FormatTok->is(tok::l_brace)) {
2171         tryToParseBracedList();
2172         continue;
2173       }
2174     }
2175     bool IsNonMacroIdentifier =
2176         FormatTok->is(tok::identifier) &&
2177         FormatTok->TokenText != FormatTok->TokenText.upper();
2178     nextToken();
2179     // We can have macros or attributes in between 'class' and the class name.
2180     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2181       parseParens();
2182   }
2183 
2184   // Note that parsing away template declarations here leads to incorrectly
2185   // accepting function declarations as record declarations.
2186   // In general, we cannot solve this problem. Consider:
2187   // class A<int> B() {}
2188   // which can be a function definition or a class definition when B() is a
2189   // macro. If we find enough real-world cases where this is a problem, we
2190   // can parse for the 'template' keyword in the beginning of the statement,
2191   // and thus rule out the record production in case there is no template
2192   // (this would still leave us with an ambiguity between template function
2193   // and class declarations).
2194   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2195     while (!eof()) {
2196       if (FormatTok->is(tok::l_brace)) {
2197         calculateBraceTypes(/*ExpectClassBody=*/true);
2198         if (!tryToParseBracedList())
2199           break;
2200       }
2201       if (FormatTok->Tok.is(tok::semi))
2202         return;
2203       nextToken();
2204     }
2205   }
2206   if (FormatTok->Tok.is(tok::l_brace)) {
2207     if (ParseAsExpr) {
2208       parseChildBlock();
2209     } else {
2210       if (ShouldBreakBeforeBrace(Style, InitialToken))
2211         addUnwrappedLine();
2212 
2213       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2214                  /*MunchSemi=*/false);
2215     }
2216   }
2217   // There is no addUnwrappedLine() here so that we fall through to parsing a
2218   // structural element afterwards. Thus, in "class A {} n, m;",
2219   // "} n, m;" will end up in one unwrapped line.
2220 }
2221 
2222 void UnwrappedLineParser::parseObjCMethod() {
2223   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2224          "'(' or identifier expected.");
2225   do {
2226     if (FormatTok->Tok.is(tok::semi)) {
2227       nextToken();
2228       addUnwrappedLine();
2229       return;
2230     } else if (FormatTok->Tok.is(tok::l_brace)) {
2231       if (Style.BraceWrapping.AfterFunction)
2232         addUnwrappedLine();
2233       parseBlock(/*MustBeDeclaration=*/false);
2234       addUnwrappedLine();
2235       return;
2236     } else {
2237       nextToken();
2238     }
2239   } while (!eof());
2240 }
2241 
2242 void UnwrappedLineParser::parseObjCProtocolList() {
2243   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2244   do {
2245     nextToken();
2246     // Early exit in case someone forgot a close angle.
2247     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2248         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2249       return;
2250   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2251   nextToken(); // Skip '>'.
2252 }
2253 
2254 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2255   do {
2256     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2257       nextToken();
2258       addUnwrappedLine();
2259       break;
2260     }
2261     if (FormatTok->is(tok::l_brace)) {
2262       parseBlock(/*MustBeDeclaration=*/false);
2263       // In ObjC interfaces, nothing should be following the "}".
2264       addUnwrappedLine();
2265     } else if (FormatTok->is(tok::r_brace)) {
2266       // Ignore stray "}". parseStructuralElement doesn't consume them.
2267       nextToken();
2268       addUnwrappedLine();
2269     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2270       nextToken();
2271       parseObjCMethod();
2272     } else {
2273       parseStructuralElement();
2274     }
2275   } while (!eof());
2276 }
2277 
2278 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2279   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2280          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2281   nextToken();
2282   nextToken(); // interface name
2283 
2284   // @interface can be followed by a lightweight generic
2285   // specialization list, then either a base class or a category.
2286   if (FormatTok->Tok.is(tok::less)) {
2287     // Unlike protocol lists, generic parameterizations support
2288     // nested angles:
2289     //
2290     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2291     //     NSObject <NSCopying, NSSecureCoding>
2292     //
2293     // so we need to count how many open angles we have left.
2294     unsigned NumOpenAngles = 1;
2295     do {
2296       nextToken();
2297       // Early exit in case someone forgot a close angle.
2298       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2299           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2300         break;
2301       if (FormatTok->Tok.is(tok::less))
2302         ++NumOpenAngles;
2303       else if (FormatTok->Tok.is(tok::greater)) {
2304         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2305         --NumOpenAngles;
2306       }
2307     } while (!eof() && NumOpenAngles != 0);
2308     nextToken(); // Skip '>'.
2309   }
2310   if (FormatTok->Tok.is(tok::colon)) {
2311     nextToken();
2312     nextToken(); // base class name
2313   } else if (FormatTok->Tok.is(tok::l_paren))
2314     // Skip category, if present.
2315     parseParens();
2316 
2317   if (FormatTok->Tok.is(tok::less))
2318     parseObjCProtocolList();
2319 
2320   if (FormatTok->Tok.is(tok::l_brace)) {
2321     if (Style.BraceWrapping.AfterObjCDeclaration)
2322       addUnwrappedLine();
2323     parseBlock(/*MustBeDeclaration=*/true);
2324   }
2325 
2326   // With instance variables, this puts '}' on its own line.  Without instance
2327   // variables, this ends the @interface line.
2328   addUnwrappedLine();
2329 
2330   parseObjCUntilAtEnd();
2331 }
2332 
2333 // Returns true for the declaration/definition form of @protocol,
2334 // false for the expression form.
2335 bool UnwrappedLineParser::parseObjCProtocol() {
2336   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2337   nextToken();
2338 
2339   if (FormatTok->is(tok::l_paren))
2340     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2341     return false;
2342 
2343   // The definition/declaration form,
2344   // @protocol Foo
2345   // - (int)someMethod;
2346   // @end
2347 
2348   nextToken(); // protocol name
2349 
2350   if (FormatTok->Tok.is(tok::less))
2351     parseObjCProtocolList();
2352 
2353   // Check for protocol declaration.
2354   if (FormatTok->Tok.is(tok::semi)) {
2355     nextToken();
2356     addUnwrappedLine();
2357     return true;
2358   }
2359 
2360   addUnwrappedLine();
2361   parseObjCUntilAtEnd();
2362   return true;
2363 }
2364 
2365 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2366   bool IsImport = FormatTok->is(Keywords.kw_import);
2367   assert(IsImport || FormatTok->is(tok::kw_export));
2368   nextToken();
2369 
2370   // Consume the "default" in "export default class/function".
2371   if (FormatTok->is(tok::kw_default))
2372     nextToken();
2373 
2374   // Consume "async function", "function" and "default function", so that these
2375   // get parsed as free-standing JS functions, i.e. do not require a trailing
2376   // semicolon.
2377   if (FormatTok->is(Keywords.kw_async))
2378     nextToken();
2379   if (FormatTok->is(Keywords.kw_function)) {
2380     nextToken();
2381     return;
2382   }
2383 
2384   // For imports, `export *`, `export {...}`, consume the rest of the line up
2385   // to the terminating `;`. For everything else, just return and continue
2386   // parsing the structural element, i.e. the declaration or expression for
2387   // `export default`.
2388   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2389       !FormatTok->isStringLiteral())
2390     return;
2391 
2392   while (!eof()) {
2393     if (FormatTok->is(tok::semi))
2394       return;
2395     if (Line->Tokens.empty()) {
2396       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2397       // import statement should terminate.
2398       return;
2399     }
2400     if (FormatTok->is(tok::l_brace)) {
2401       FormatTok->BlockKind = BK_Block;
2402       nextToken();
2403       parseBracedList();
2404     } else {
2405       nextToken();
2406     }
2407   }
2408 }
2409 
2410 void UnwrappedLineParser::parseStatementMacro() {
2411   nextToken();
2412   if (FormatTok->is(tok::l_paren))
2413     parseParens();
2414   if (FormatTok->is(tok::semi))
2415     nextToken();
2416   addUnwrappedLine();
2417 }
2418 
2419 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2420                                                  StringRef Prefix = "") {
2421   llvm::dbgs() << Prefix << "Line(" << Line.Level
2422                << ", FSC=" << Line.FirstStartColumn << ")"
2423                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2424   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2425                                                     E = Line.Tokens.end();
2426        I != E; ++I) {
2427     llvm::dbgs() << I->Tok->Tok.getName() << "["
2428                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2429                  << "] ";
2430   }
2431   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2432                                                     E = Line.Tokens.end();
2433        I != E; ++I) {
2434     const UnwrappedLineNode &Node = *I;
2435     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2436              I = Node.Children.begin(),
2437              E = Node.Children.end();
2438          I != E; ++I) {
2439       printDebugInfo(*I, "\nChild: ");
2440     }
2441   }
2442   llvm::dbgs() << "\n";
2443 }
2444 
2445 void UnwrappedLineParser::addUnwrappedLine() {
2446   if (Line->Tokens.empty())
2447     return;
2448   LLVM_DEBUG({
2449     if (CurrentLines == &Lines)
2450       printDebugInfo(*Line);
2451   });
2452   CurrentLines->push_back(std::move(*Line));
2453   Line->Tokens.clear();
2454   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2455   Line->FirstStartColumn = 0;
2456   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2457     CurrentLines->append(
2458         std::make_move_iterator(PreprocessorDirectives.begin()),
2459         std::make_move_iterator(PreprocessorDirectives.end()));
2460     PreprocessorDirectives.clear();
2461   }
2462   // Disconnect the current token from the last token on the previous line.
2463   FormatTok->Previous = nullptr;
2464 }
2465 
2466 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2467 
2468 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2469   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2470          FormatTok.NewlinesBefore > 0;
2471 }
2472 
2473 // Checks if \p FormatTok is a line comment that continues the line comment
2474 // section on \p Line.
2475 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2476                                         const UnwrappedLine &Line,
2477                                         llvm::Regex &CommentPragmasRegex) {
2478   if (Line.Tokens.empty())
2479     return false;
2480 
2481   StringRef IndentContent = FormatTok.TokenText;
2482   if (FormatTok.TokenText.startswith("//") ||
2483       FormatTok.TokenText.startswith("/*"))
2484     IndentContent = FormatTok.TokenText.substr(2);
2485   if (CommentPragmasRegex.match(IndentContent))
2486     return false;
2487 
2488   // If Line starts with a line comment, then FormatTok continues the comment
2489   // section if its original column is greater or equal to the original start
2490   // column of the line.
2491   //
2492   // Define the min column token of a line as follows: if a line ends in '{' or
2493   // contains a '{' followed by a line comment, then the min column token is
2494   // that '{'. Otherwise, the min column token of the line is the first token of
2495   // the line.
2496   //
2497   // If Line starts with a token other than a line comment, then FormatTok
2498   // continues the comment section if its original column is greater than the
2499   // original start column of the min column token of the line.
2500   //
2501   // For example, the second line comment continues the first in these cases:
2502   //
2503   // // first line
2504   // // second line
2505   //
2506   // and:
2507   //
2508   // // first line
2509   //  // second line
2510   //
2511   // and:
2512   //
2513   // int i; // first line
2514   //  // second line
2515   //
2516   // and:
2517   //
2518   // do { // first line
2519   //      // second line
2520   //   int i;
2521   // } while (true);
2522   //
2523   // and:
2524   //
2525   // enum {
2526   //   a, // first line
2527   //    // second line
2528   //   b
2529   // };
2530   //
2531   // The second line comment doesn't continue the first in these cases:
2532   //
2533   //   // first line
2534   //  // second line
2535   //
2536   // and:
2537   //
2538   // int i; // first line
2539   // // second line
2540   //
2541   // and:
2542   //
2543   // do { // first line
2544   //   // second line
2545   //   int i;
2546   // } while (true);
2547   //
2548   // and:
2549   //
2550   // enum {
2551   //   a, // first line
2552   //   // second line
2553   // };
2554   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2555 
2556   // Scan for '{//'. If found, use the column of '{' as a min column for line
2557   // comment section continuation.
2558   const FormatToken *PreviousToken = nullptr;
2559   for (const UnwrappedLineNode &Node : Line.Tokens) {
2560     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2561         isLineComment(*Node.Tok)) {
2562       MinColumnToken = PreviousToken;
2563       break;
2564     }
2565     PreviousToken = Node.Tok;
2566 
2567     // Grab the last newline preceding a token in this unwrapped line.
2568     if (Node.Tok->NewlinesBefore > 0) {
2569       MinColumnToken = Node.Tok;
2570     }
2571   }
2572   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2573     MinColumnToken = PreviousToken;
2574   }
2575 
2576   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2577                               MinColumnToken);
2578 }
2579 
2580 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2581   bool JustComments = Line->Tokens.empty();
2582   for (SmallVectorImpl<FormatToken *>::const_iterator
2583            I = CommentsBeforeNextToken.begin(),
2584            E = CommentsBeforeNextToken.end();
2585        I != E; ++I) {
2586     // Line comments that belong to the same line comment section are put on the
2587     // same line since later we might want to reflow content between them.
2588     // Additional fine-grained breaking of line comment sections is controlled
2589     // by the class BreakableLineCommentSection in case it is desirable to keep
2590     // several line comment sections in the same unwrapped line.
2591     //
2592     // FIXME: Consider putting separate line comment sections as children to the
2593     // unwrapped line instead.
2594     (*I)->ContinuesLineCommentSection =
2595         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2596     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2597       addUnwrappedLine();
2598     pushToken(*I);
2599   }
2600   if (NewlineBeforeNext && JustComments)
2601     addUnwrappedLine();
2602   CommentsBeforeNextToken.clear();
2603 }
2604 
2605 void UnwrappedLineParser::nextToken(int LevelDifference) {
2606   if (eof())
2607     return;
2608   flushComments(isOnNewLine(*FormatTok));
2609   pushToken(FormatTok);
2610   FormatToken *Previous = FormatTok;
2611   if (Style.Language != FormatStyle::LK_JavaScript)
2612     readToken(LevelDifference);
2613   else
2614     readTokenWithJavaScriptASI();
2615   FormatTok->Previous = Previous;
2616 }
2617 
2618 void UnwrappedLineParser::distributeComments(
2619     const SmallVectorImpl<FormatToken *> &Comments,
2620     const FormatToken *NextTok) {
2621   // Whether or not a line comment token continues a line is controlled by
2622   // the method continuesLineCommentSection, with the following caveat:
2623   //
2624   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2625   // that each comment line from the trail is aligned with the next token, if
2626   // the next token exists. If a trail exists, the beginning of the maximal
2627   // trail is marked as a start of a new comment section.
2628   //
2629   // For example in this code:
2630   //
2631   // int a; // line about a
2632   //   // line 1 about b
2633   //   // line 2 about b
2634   //   int b;
2635   //
2636   // the two lines about b form a maximal trail, so there are two sections, the
2637   // first one consisting of the single comment "// line about a" and the
2638   // second one consisting of the next two comments.
2639   if (Comments.empty())
2640     return;
2641   bool ShouldPushCommentsInCurrentLine = true;
2642   bool HasTrailAlignedWithNextToken = false;
2643   unsigned StartOfTrailAlignedWithNextToken = 0;
2644   if (NextTok) {
2645     // We are skipping the first element intentionally.
2646     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2647       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2648         HasTrailAlignedWithNextToken = true;
2649         StartOfTrailAlignedWithNextToken = i;
2650       }
2651     }
2652   }
2653   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2654     FormatToken *FormatTok = Comments[i];
2655     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2656       FormatTok->ContinuesLineCommentSection = false;
2657     } else {
2658       FormatTok->ContinuesLineCommentSection =
2659           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2660     }
2661     if (!FormatTok->ContinuesLineCommentSection &&
2662         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2663       ShouldPushCommentsInCurrentLine = false;
2664     }
2665     if (ShouldPushCommentsInCurrentLine) {
2666       pushToken(FormatTok);
2667     } else {
2668       CommentsBeforeNextToken.push_back(FormatTok);
2669     }
2670   }
2671 }
2672 
2673 void UnwrappedLineParser::readToken(int LevelDifference) {
2674   SmallVector<FormatToken *, 1> Comments;
2675   do {
2676     FormatTok = Tokens->getNextToken();
2677     assert(FormatTok);
2678     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2679            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2680       distributeComments(Comments, FormatTok);
2681       Comments.clear();
2682       // If there is an unfinished unwrapped line, we flush the preprocessor
2683       // directives only after that unwrapped line was finished later.
2684       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2685       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2686       assert((LevelDifference >= 0 ||
2687               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2688              "LevelDifference makes Line->Level negative");
2689       Line->Level += LevelDifference;
2690       // Comments stored before the preprocessor directive need to be output
2691       // before the preprocessor directive, at the same level as the
2692       // preprocessor directive, as we consider them to apply to the directive.
2693       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2694           PPBranchLevel > 0)
2695         Line->Level += PPBranchLevel;
2696       flushComments(isOnNewLine(*FormatTok));
2697       parsePPDirective();
2698     }
2699     while (FormatTok->Type == TT_ConflictStart ||
2700            FormatTok->Type == TT_ConflictEnd ||
2701            FormatTok->Type == TT_ConflictAlternative) {
2702       if (FormatTok->Type == TT_ConflictStart) {
2703         conditionalCompilationStart(/*Unreachable=*/false);
2704       } else if (FormatTok->Type == TT_ConflictAlternative) {
2705         conditionalCompilationAlternative();
2706       } else if (FormatTok->Type == TT_ConflictEnd) {
2707         conditionalCompilationEnd();
2708       }
2709       FormatTok = Tokens->getNextToken();
2710       FormatTok->MustBreakBefore = true;
2711     }
2712 
2713     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2714         !Line->InPPDirective) {
2715       continue;
2716     }
2717 
2718     if (!FormatTok->Tok.is(tok::comment)) {
2719       distributeComments(Comments, FormatTok);
2720       Comments.clear();
2721       return;
2722     }
2723 
2724     Comments.push_back(FormatTok);
2725   } while (!eof());
2726 
2727   distributeComments(Comments, nullptr);
2728   Comments.clear();
2729 }
2730 
2731 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2732   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2733   if (MustBreakBeforeNextToken) {
2734     Line->Tokens.back().Tok->MustBreakBefore = true;
2735     MustBreakBeforeNextToken = false;
2736   }
2737 }
2738 
2739 } // end namespace format
2740 } // end namespace clang
2741