1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
27 class FormatTokenSource {
28 public:
29   virtual ~FormatTokenSource() {}
30   virtual FormatToken *getNextToken() = 0;
31 
32   virtual unsigned getPosition() = 0;
33   virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41                          bool MustBeDeclaration)
42       : Line(Line), Stack(Stack) {
43     Line.MustBeDeclaration = MustBeDeclaration;
44     Stack.push_back(MustBeDeclaration);
45   }
46   ~ScopedDeclarationState() {
47     Stack.pop_back();
48     if (!Stack.empty())
49       Line.MustBeDeclaration = Stack.back();
50     else
51       Line.MustBeDeclaration = true;
52   }
53 
54 private:
55   UnwrappedLine &Line;
56   std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     FakeEOF.Tok.startToken();
86     FakeEOF.Tok.setKind(tok::eof);
87     TokenSource = this;
88     Line.Level = 0;
89     Line.InPPDirective = true;
90   }
91 
92   ~ScopedMacroState() override {
93     TokenSource = PreviousTokenSource;
94     ResetToken = Token;
95     Line.InPPDirective = false;
96     Line.Level = PreviousLineLevel;
97   }
98 
99   FormatToken *getNextToken() override {
100     // The \c UnwrappedLineParser guards against this by never calling
101     // \c getNextToken() after it has encountered the first eof token.
102     assert(!eof());
103     PreviousToken = Token;
104     Token = PreviousTokenSource->getNextToken();
105     if (eof())
106       return &FakeEOF;
107     return Token;
108   }
109 
110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112   FormatToken *setPosition(unsigned Position) override {
113     PreviousToken = nullptr;
114     Token = PreviousTokenSource->setPosition(Position);
115     return Token;
116   }
117 
118 private:
119   bool eof() {
120     return Token && Token->HasUnescapedNewline &&
121            !continuesLineComment(*Token, PreviousToken,
122                                  /*MinColumnToken=*/PreviousToken);
123   }
124 
125   FormatToken FakeEOF;
126   UnwrappedLine &Line;
127   FormatTokenSource *&TokenSource;
128   FormatToken *&ResetToken;
129   unsigned PreviousLineLevel;
130   FormatTokenSource *PreviousTokenSource;
131 
132   FormatToken *Token;
133   FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
138 class ScopedLineState {
139 public:
140   ScopedLineState(UnwrappedLineParser &Parser,
141                   bool SwitchToPreprocessorLines = false)
142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143     if (SwitchToPreprocessorLines)
144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
145     else if (!Parser.Line->Tokens.empty())
146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147     PreBlockLine = std::move(Parser.Line);
148     Parser.Line = std::make_unique<UnwrappedLine>();
149     Parser.Line->Level = PreBlockLine->Level;
150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151   }
152 
153   ~ScopedLineState() {
154     if (!Parser.Line->Tokens.empty()) {
155       Parser.addUnwrappedLine();
156     }
157     assert(Parser.Line->Tokens.empty());
158     Parser.Line = std::move(PreBlockLine);
159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160       Parser.MustBreakBeforeNextToken = true;
161     Parser.CurrentLines = OriginalLines;
162   }
163 
164 private:
165   UnwrappedLineParser &Parser;
166 
167   std::unique_ptr<UnwrappedLine> PreBlockLine;
168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
171 class CompoundStatementIndenter {
172 public:
173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
174                             const FormatStyle &Style, unsigned &LineLevel)
175       : CompoundStatementIndenter(Parser, LineLevel,
176                                   Style.BraceWrapping.AfterControlStatement,
177                                   Style.BraceWrapping.IndentBraces) {}
178   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
179                             bool WrapBrace, bool IndentBrace)
180       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
181     if (WrapBrace)
182       Parser->addUnwrappedLine();
183     if (IndentBrace)
184       ++LineLevel;
185   }
186   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
187 
188 private:
189   unsigned &LineLevel;
190   unsigned OldLineLevel;
191 };
192 
193 namespace {
194 
195 class IndexedTokenSource : public FormatTokenSource {
196 public:
197   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
198       : Tokens(Tokens), Position(-1) {}
199 
200   FormatToken *getNextToken() override {
201     ++Position;
202     return Tokens[Position];
203   }
204 
205   unsigned getPosition() override {
206     assert(Position >= 0);
207     return Position;
208   }
209 
210   FormatToken *setPosition(unsigned P) override {
211     Position = P;
212     return Tokens[Position];
213   }
214 
215   void reset() { Position = -1; }
216 
217 private:
218   ArrayRef<FormatToken *> Tokens;
219   int Position;
220 };
221 
222 } // end anonymous namespace
223 
224 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
225                                          const AdditionalKeywords &Keywords,
226                                          unsigned FirstStartColumn,
227                                          ArrayRef<FormatToken *> Tokens,
228                                          UnwrappedLineConsumer &Callback)
229     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
230       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
231       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
232       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
233       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
234                        ? IG_Rejected
235                        : IG_Inited),
236       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
241                      ? IG_Rejected
242                      : IG_Inited;
243   IncludeGuardToken = nullptr;
244   Line.reset(new UnwrappedLine);
245   CommentsBeforeNextToken.clear();
246   FormatTok = nullptr;
247   MustBreakBeforeNextToken = false;
248   PreprocessorDirectives.clear();
249   CurrentLines = &Lines;
250   DeclarationScopeStack.clear();
251   PPStack.clear();
252   Line->FirstStartColumn = FirstStartColumn;
253 }
254 
255 void UnwrappedLineParser::parse() {
256   IndexedTokenSource TokenSource(AllTokens);
257   Line->FirstStartColumn = FirstStartColumn;
258   do {
259     LLVM_DEBUG(llvm::dbgs() << "----\n");
260     reset();
261     Tokens = &TokenSource;
262     TokenSource.reset();
263 
264     readToken();
265     parseFile();
266 
267     // If we found an include guard then all preprocessor directives (other than
268     // the guard) are over-indented by one.
269     if (IncludeGuard == IG_Found)
270       for (auto &Line : Lines)
271         if (Line.InPPDirective && Line.Level > 0)
272           --Line.Level;
273 
274     // Create line with eof token.
275     pushToken(FormatTok);
276     addUnwrappedLine();
277 
278     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
279                                                   E = Lines.end();
280          I != E; ++I) {
281       Callback.consumeUnwrappedLine(*I);
282     }
283     Callback.finishRun();
284     Lines.clear();
285     while (!PPLevelBranchIndex.empty() &&
286            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
287       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
288       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
289     }
290     if (!PPLevelBranchIndex.empty()) {
291       ++PPLevelBranchIndex.back();
292       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
293       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
294     }
295   } while (!PPLevelBranchIndex.empty());
296 }
297 
298 void UnwrappedLineParser::parseFile() {
299   // The top-level context in a file always has declarations, except for pre-
300   // processor directives and JavaScript files.
301   bool MustBeDeclaration =
302       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
303   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
304                                           MustBeDeclaration);
305   if (Style.Language == FormatStyle::LK_TextProto)
306     parseBracedList();
307   else
308     parseLevel(/*HasOpeningBrace=*/false);
309   // Make sure to format the remaining tokens.
310   //
311   // LK_TextProto is special since its top-level is parsed as the body of a
312   // braced list, which does not necessarily have natural line separators such
313   // as a semicolon. Comments after the last entry that have been determined to
314   // not belong to that line, as in:
315   //   key: value
316   //   // endfile comment
317   // do not have a chance to be put on a line of their own until this point.
318   // Here we add this newline before end-of-file comments.
319   if (Style.Language == FormatStyle::LK_TextProto &&
320       !CommentsBeforeNextToken.empty())
321     addUnwrappedLine();
322   flushComments(true);
323   addUnwrappedLine();
324 }
325 
326 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
327   bool SwitchLabelEncountered = false;
328   do {
329     tok::TokenKind kind = FormatTok->Tok.getKind();
330     if (FormatTok->Type == TT_MacroBlockBegin) {
331       kind = tok::l_brace;
332     } else if (FormatTok->Type == TT_MacroBlockEnd) {
333       kind = tok::r_brace;
334     }
335 
336     switch (kind) {
337     case tok::comment:
338       nextToken();
339       addUnwrappedLine();
340       break;
341     case tok::l_brace:
342       // FIXME: Add parameter whether this can happen - if this happens, we must
343       // be in a non-declaration context.
344       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
345         continue;
346       parseBlock(/*MustBeDeclaration=*/false);
347       addUnwrappedLine();
348       break;
349     case tok::r_brace:
350       if (HasOpeningBrace)
351         return;
352       nextToken();
353       addUnwrappedLine();
354       break;
355     case tok::kw_default: {
356       unsigned StoredPosition = Tokens->getPosition();
357       FormatToken *Next;
358       do {
359         Next = Tokens->getNextToken();
360       } while (Next && Next->is(tok::comment));
361       FormatTok = Tokens->setPosition(StoredPosition);
362       if (Next && Next->isNot(tok::colon)) {
363         // default not followed by ':' is not a case label; treat it like
364         // an identifier.
365         parseStructuralElement();
366         break;
367       }
368       // Else, if it is 'default:', fall through to the case handling.
369       LLVM_FALLTHROUGH;
370     }
371     case tok::kw_case:
372       if (Style.Language == FormatStyle::LK_JavaScript &&
373           Line->MustBeDeclaration) {
374         // A 'case: string' style field declaration.
375         parseStructuralElement();
376         break;
377       }
378       if (!SwitchLabelEncountered &&
379           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
380         ++Line->Level;
381       SwitchLabelEncountered = true;
382       parseStructuralElement();
383       break;
384     default:
385       parseStructuralElement();
386       break;
387     }
388   } while (!eof());
389 }
390 
391 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
392   // We'll parse forward through the tokens until we hit
393   // a closing brace or eof - note that getNextToken() will
394   // parse macros, so this will magically work inside macro
395   // definitions, too.
396   unsigned StoredPosition = Tokens->getPosition();
397   FormatToken *Tok = FormatTok;
398   const FormatToken *PrevTok = Tok->Previous;
399   // Keep a stack of positions of lbrace tokens. We will
400   // update information about whether an lbrace starts a
401   // braced init list or a different block during the loop.
402   SmallVector<FormatToken *, 8> LBraceStack;
403   assert(Tok->Tok.is(tok::l_brace));
404   do {
405     // Get next non-comment token.
406     FormatToken *NextTok;
407     unsigned ReadTokens = 0;
408     do {
409       NextTok = Tokens->getNextToken();
410       ++ReadTokens;
411     } while (NextTok->is(tok::comment));
412 
413     switch (Tok->Tok.getKind()) {
414     case tok::l_brace:
415       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
416         if (PrevTok->isOneOf(tok::colon, tok::less))
417           // A ':' indicates this code is in a type, or a braced list
418           // following a label in an object literal ({a: {b: 1}}).
419           // A '<' could be an object used in a comparison, but that is nonsense
420           // code (can never return true), so more likely it is a generic type
421           // argument (`X<{a: string; b: number}>`).
422           // The code below could be confused by semicolons between the
423           // individual members in a type member list, which would normally
424           // trigger BK_Block. In both cases, this must be parsed as an inline
425           // braced init.
426           Tok->BlockKind = BK_BracedInit;
427         else if (PrevTok->is(tok::r_paren))
428           // `) { }` can only occur in function or method declarations in JS.
429           Tok->BlockKind = BK_Block;
430       } else {
431         Tok->BlockKind = BK_Unknown;
432       }
433       LBraceStack.push_back(Tok);
434       break;
435     case tok::r_brace:
436       if (LBraceStack.empty())
437         break;
438       if (LBraceStack.back()->BlockKind == BK_Unknown) {
439         bool ProbablyBracedList = false;
440         if (Style.Language == FormatStyle::LK_Proto) {
441           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
442         } else {
443           // Using OriginalColumn to distinguish between ObjC methods and
444           // binary operators is a bit hacky.
445           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
446                                   NextTok->OriginalColumn == 0;
447 
448           // If there is a comma, semicolon or right paren after the closing
449           // brace, we assume this is a braced initializer list.  Note that
450           // regardless how we mark inner braces here, we will overwrite the
451           // BlockKind later if we parse a braced list (where all blocks
452           // inside are by default braced lists), or when we explicitly detect
453           // blocks (for example while parsing lambdas).
454           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
455           // braced list in JS.
456           ProbablyBracedList =
457               (Style.Language == FormatStyle::LK_JavaScript &&
458                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
459                                 Keywords.kw_as)) ||
460               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
461               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
462                                tok::r_paren, tok::r_square, tok::l_brace,
463                                tok::ellipsis) ||
464               (NextTok->is(tok::identifier) &&
465                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
466               (NextTok->is(tok::semi) &&
467                (!ExpectClassBody || LBraceStack.size() != 1)) ||
468               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
469           if (NextTok->is(tok::l_square)) {
470             // We can have an array subscript after a braced init
471             // list, but C++11 attributes are expected after blocks.
472             NextTok = Tokens->getNextToken();
473             ++ReadTokens;
474             ProbablyBracedList = NextTok->isNot(tok::l_square);
475           }
476         }
477         if (ProbablyBracedList) {
478           Tok->BlockKind = BK_BracedInit;
479           LBraceStack.back()->BlockKind = BK_BracedInit;
480         } else {
481           Tok->BlockKind = BK_Block;
482           LBraceStack.back()->BlockKind = BK_Block;
483         }
484       }
485       LBraceStack.pop_back();
486       break;
487     case tok::identifier:
488       if (!Tok->is(TT_StatementMacro))
489         break;
490       LLVM_FALLTHROUGH;
491     case tok::at:
492     case tok::semi:
493     case tok::kw_if:
494     case tok::kw_while:
495     case tok::kw_for:
496     case tok::kw_switch:
497     case tok::kw_try:
498     case tok::kw___try:
499       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
500         LBraceStack.back()->BlockKind = BK_Block;
501       break;
502     default:
503       break;
504     }
505     PrevTok = Tok;
506     Tok = NextTok;
507   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
508 
509   // Assume other blocks for all unclosed opening braces.
510   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
511     if (LBraceStack[i]->BlockKind == BK_Unknown)
512       LBraceStack[i]->BlockKind = BK_Block;
513   }
514 
515   FormatTok = Tokens->setPosition(StoredPosition);
516 }
517 
518 template <class T>
519 static inline void hash_combine(std::size_t &seed, const T &v) {
520   std::hash<T> hasher;
521   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
522 }
523 
524 size_t UnwrappedLineParser::computePPHash() const {
525   size_t h = 0;
526   for (const auto &i : PPStack) {
527     hash_combine(h, size_t(i.Kind));
528     hash_combine(h, i.Line);
529   }
530   return h;
531 }
532 
533 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
534                                      bool MunchSemi) {
535   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
536          "'{' or macro block token expected");
537   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
538   FormatTok->BlockKind = BK_Block;
539 
540   size_t PPStartHash = computePPHash();
541 
542   unsigned InitialLevel = Line->Level;
543   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
544 
545   if (MacroBlock && FormatTok->is(tok::l_paren))
546     parseParens();
547 
548   size_t NbPreprocessorDirectives =
549       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
550   addUnwrappedLine();
551   size_t OpeningLineIndex =
552       CurrentLines->empty()
553           ? (UnwrappedLine::kInvalidIndex)
554           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
555 
556   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
557                                           MustBeDeclaration);
558   if (AddLevel)
559     ++Line->Level;
560   parseLevel(/*HasOpeningBrace=*/true);
561 
562   if (eof())
563     return;
564 
565   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
566                  : !FormatTok->is(tok::r_brace)) {
567     Line->Level = InitialLevel;
568     FormatTok->BlockKind = BK_Block;
569     return;
570   }
571 
572   size_t PPEndHash = computePPHash();
573 
574   // Munch the closing brace.
575   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
576 
577   if (MacroBlock && FormatTok->is(tok::l_paren))
578     parseParens();
579 
580   if (MunchSemi && FormatTok->Tok.is(tok::semi))
581     nextToken();
582   Line->Level = InitialLevel;
583 
584   if (PPStartHash == PPEndHash) {
585     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
586     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
587       // Update the opening line to add the forward reference as well
588       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
589           CurrentLines->size() - 1;
590     }
591   }
592 }
593 
594 static bool isGoogScope(const UnwrappedLine &Line) {
595   // FIXME: Closure-library specific stuff should not be hard-coded but be
596   // configurable.
597   if (Line.Tokens.size() < 4)
598     return false;
599   auto I = Line.Tokens.begin();
600   if (I->Tok->TokenText != "goog")
601     return false;
602   ++I;
603   if (I->Tok->isNot(tok::period))
604     return false;
605   ++I;
606   if (I->Tok->TokenText != "scope")
607     return false;
608   ++I;
609   return I->Tok->is(tok::l_paren);
610 }
611 
612 static bool isIIFE(const UnwrappedLine &Line,
613                    const AdditionalKeywords &Keywords) {
614   // Look for the start of an immediately invoked anonymous function.
615   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
616   // This is commonly done in JavaScript to create a new, anonymous scope.
617   // Example: (function() { ... })()
618   if (Line.Tokens.size() < 3)
619     return false;
620   auto I = Line.Tokens.begin();
621   if (I->Tok->isNot(tok::l_paren))
622     return false;
623   ++I;
624   if (I->Tok->isNot(Keywords.kw_function))
625     return false;
626   ++I;
627   return I->Tok->is(tok::l_paren);
628 }
629 
630 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
631                                    const FormatToken &InitialToken) {
632   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
633     return Style.BraceWrapping.AfterNamespace;
634   if (InitialToken.is(tok::kw_class))
635     return Style.BraceWrapping.AfterClass;
636   if (InitialToken.is(tok::kw_union))
637     return Style.BraceWrapping.AfterUnion;
638   if (InitialToken.is(tok::kw_struct))
639     return Style.BraceWrapping.AfterStruct;
640   return false;
641 }
642 
643 void UnwrappedLineParser::parseChildBlock() {
644   FormatTok->BlockKind = BK_Block;
645   nextToken();
646   {
647     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
648                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
649     ScopedLineState LineState(*this);
650     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
651                                             /*MustBeDeclaration=*/false);
652     Line->Level += SkipIndent ? 0 : 1;
653     parseLevel(/*HasOpeningBrace=*/true);
654     flushComments(isOnNewLine(*FormatTok));
655     Line->Level -= SkipIndent ? 0 : 1;
656   }
657   nextToken();
658 }
659 
660 void UnwrappedLineParser::parsePPDirective() {
661   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
662   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
663 
664   nextToken();
665 
666   if (!FormatTok->Tok.getIdentifierInfo()) {
667     parsePPUnknown();
668     return;
669   }
670 
671   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
672   case tok::pp_define:
673     parsePPDefine();
674     return;
675   case tok::pp_if:
676     parsePPIf(/*IfDef=*/false);
677     break;
678   case tok::pp_ifdef:
679   case tok::pp_ifndef:
680     parsePPIf(/*IfDef=*/true);
681     break;
682   case tok::pp_else:
683     parsePPElse();
684     break;
685   case tok::pp_elif:
686     parsePPElIf();
687     break;
688   case tok::pp_endif:
689     parsePPEndIf();
690     break;
691   default:
692     parsePPUnknown();
693     break;
694   }
695 }
696 
697 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
698   size_t Line = CurrentLines->size();
699   if (CurrentLines == &PreprocessorDirectives)
700     Line += Lines.size();
701 
702   if (Unreachable ||
703       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
704     PPStack.push_back({PP_Unreachable, Line});
705   else
706     PPStack.push_back({PP_Conditional, Line});
707 }
708 
709 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
710   ++PPBranchLevel;
711   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
712   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
713     PPLevelBranchIndex.push_back(0);
714     PPLevelBranchCount.push_back(0);
715   }
716   PPChainBranchIndex.push(0);
717   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
718   conditionalCompilationCondition(Unreachable || Skip);
719 }
720 
721 void UnwrappedLineParser::conditionalCompilationAlternative() {
722   if (!PPStack.empty())
723     PPStack.pop_back();
724   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
725   if (!PPChainBranchIndex.empty())
726     ++PPChainBranchIndex.top();
727   conditionalCompilationCondition(
728       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
729       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
730 }
731 
732 void UnwrappedLineParser::conditionalCompilationEnd() {
733   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
734   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
735     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
736       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
737     }
738   }
739   // Guard against #endif's without #if.
740   if (PPBranchLevel > -1)
741     --PPBranchLevel;
742   if (!PPChainBranchIndex.empty())
743     PPChainBranchIndex.pop();
744   if (!PPStack.empty())
745     PPStack.pop_back();
746 }
747 
748 void UnwrappedLineParser::parsePPIf(bool IfDef) {
749   bool IfNDef = FormatTok->is(tok::pp_ifndef);
750   nextToken();
751   bool Unreachable = false;
752   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
753     Unreachable = true;
754   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
755     Unreachable = true;
756   conditionalCompilationStart(Unreachable);
757   FormatToken *IfCondition = FormatTok;
758   // If there's a #ifndef on the first line, and the only lines before it are
759   // comments, it could be an include guard.
760   bool MaybeIncludeGuard = IfNDef;
761   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
762     for (auto &Line : Lines) {
763       if (!Line.Tokens.front().Tok->is(tok::comment)) {
764         MaybeIncludeGuard = false;
765         IncludeGuard = IG_Rejected;
766         break;
767       }
768     }
769   --PPBranchLevel;
770   parsePPUnknown();
771   ++PPBranchLevel;
772   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
773     IncludeGuard = IG_IfNdefed;
774     IncludeGuardToken = IfCondition;
775   }
776 }
777 
778 void UnwrappedLineParser::parsePPElse() {
779   // If a potential include guard has an #else, it's not an include guard.
780   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
781     IncludeGuard = IG_Rejected;
782   conditionalCompilationAlternative();
783   if (PPBranchLevel > -1)
784     --PPBranchLevel;
785   parsePPUnknown();
786   ++PPBranchLevel;
787 }
788 
789 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
790 
791 void UnwrappedLineParser::parsePPEndIf() {
792   conditionalCompilationEnd();
793   parsePPUnknown();
794   // If the #endif of a potential include guard is the last thing in the file,
795   // then we found an include guard.
796   unsigned TokenPosition = Tokens->getPosition();
797   FormatToken *PeekNext = AllTokens[TokenPosition];
798   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
799       PeekNext->is(tok::eof) &&
800       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
801     IncludeGuard = IG_Found;
802 }
803 
804 void UnwrappedLineParser::parsePPDefine() {
805   nextToken();
806 
807   if (!FormatTok->Tok.getIdentifierInfo()) {
808     IncludeGuard = IG_Rejected;
809     IncludeGuardToken = nullptr;
810     parsePPUnknown();
811     return;
812   }
813 
814   if (IncludeGuard == IG_IfNdefed &&
815       IncludeGuardToken->TokenText == FormatTok->TokenText) {
816     IncludeGuard = IG_Defined;
817     IncludeGuardToken = nullptr;
818     for (auto &Line : Lines) {
819       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
820         IncludeGuard = IG_Rejected;
821         break;
822       }
823     }
824   }
825 
826   nextToken();
827   if (FormatTok->Tok.getKind() == tok::l_paren &&
828       FormatTok->WhitespaceRange.getBegin() ==
829           FormatTok->WhitespaceRange.getEnd()) {
830     parseParens();
831   }
832   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
833     Line->Level += PPBranchLevel + 1;
834   addUnwrappedLine();
835   ++Line->Level;
836 
837   // Errors during a preprocessor directive can only affect the layout of the
838   // preprocessor directive, and thus we ignore them. An alternative approach
839   // would be to use the same approach we use on the file level (no
840   // re-indentation if there was a structural error) within the macro
841   // definition.
842   parseFile();
843 }
844 
845 void UnwrappedLineParser::parsePPUnknown() {
846   do {
847     nextToken();
848   } while (!eof());
849   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
850     Line->Level += PPBranchLevel + 1;
851   addUnwrappedLine();
852 }
853 
854 // Here we blacklist certain tokens that are not usually the first token in an
855 // unwrapped line. This is used in attempt to distinguish macro calls without
856 // trailing semicolons from other constructs split to several lines.
857 static bool tokenCanStartNewLine(const clang::Token &Tok) {
858   // Semicolon can be a null-statement, l_square can be a start of a macro or
859   // a C++11 attribute, but this doesn't seem to be common.
860   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
861          Tok.isNot(tok::l_square) &&
862          // Tokens that can only be used as binary operators and a part of
863          // overloaded operator names.
864          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
865          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
866          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
867          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
868          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
869          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
870          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
871          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
872          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
873          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
874          Tok.isNot(tok::lesslessequal) &&
875          // Colon is used in labels, base class lists, initializer lists,
876          // range-based for loops, ternary operator, but should never be the
877          // first token in an unwrapped line.
878          Tok.isNot(tok::colon) &&
879          // 'noexcept' is a trailing annotation.
880          Tok.isNot(tok::kw_noexcept);
881 }
882 
883 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
884                           const FormatToken *FormatTok) {
885   // FIXME: This returns true for C/C++ keywords like 'struct'.
886   return FormatTok->is(tok::identifier) &&
887          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
888           !FormatTok->isOneOf(
889               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
890               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
891               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
892               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
893               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
894               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
895               Keywords.kw_from));
896 }
897 
898 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
899                                  const FormatToken *FormatTok) {
900   return FormatTok->Tok.isLiteral() ||
901          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
902          mustBeJSIdent(Keywords, FormatTok);
903 }
904 
905 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
906 // when encountered after a value (see mustBeJSIdentOrValue).
907 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
908                            const FormatToken *FormatTok) {
909   return FormatTok->isOneOf(
910       tok::kw_return, Keywords.kw_yield,
911       // conditionals
912       tok::kw_if, tok::kw_else,
913       // loops
914       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
915       // switch/case
916       tok::kw_switch, tok::kw_case,
917       // exceptions
918       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
919       // declaration
920       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
921       Keywords.kw_async, Keywords.kw_function,
922       // import/export
923       Keywords.kw_import, tok::kw_export);
924 }
925 
926 // readTokenWithJavaScriptASI reads the next token and terminates the current
927 // line if JavaScript Automatic Semicolon Insertion must
928 // happen between the current token and the next token.
929 //
930 // This method is conservative - it cannot cover all edge cases of JavaScript,
931 // but only aims to correctly handle certain well known cases. It *must not*
932 // return true in speculative cases.
933 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
934   FormatToken *Previous = FormatTok;
935   readToken();
936   FormatToken *Next = FormatTok;
937 
938   bool IsOnSameLine =
939       CommentsBeforeNextToken.empty()
940           ? Next->NewlinesBefore == 0
941           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
942   if (IsOnSameLine)
943     return;
944 
945   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
946   bool PreviousStartsTemplateExpr =
947       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
948   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
949     // If the line contains an '@' sign, the previous token might be an
950     // annotation, which can precede another identifier/value.
951     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
952                               [](UnwrappedLineNode &LineNode) {
953                                 return LineNode.Tok->is(tok::at);
954                               }) != Line->Tokens.end();
955     if (HasAt)
956       return;
957   }
958   if (Next->is(tok::exclaim) && PreviousMustBeValue)
959     return addUnwrappedLine();
960   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
961   bool NextEndsTemplateExpr =
962       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
963   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
964       (PreviousMustBeValue ||
965        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
966                          tok::minusminus)))
967     return addUnwrappedLine();
968   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
969       isJSDeclOrStmt(Keywords, Next))
970     return addUnwrappedLine();
971 }
972 
973 void UnwrappedLineParser::parseStructuralElement() {
974   assert(!FormatTok->is(tok::l_brace));
975   if (Style.Language == FormatStyle::LK_TableGen &&
976       FormatTok->is(tok::pp_include)) {
977     nextToken();
978     if (FormatTok->is(tok::string_literal))
979       nextToken();
980     addUnwrappedLine();
981     return;
982   }
983   switch (FormatTok->Tok.getKind()) {
984   case tok::kw_asm:
985     nextToken();
986     if (FormatTok->is(tok::l_brace)) {
987       FormatTok->Type = TT_InlineASMBrace;
988       nextToken();
989       while (FormatTok && FormatTok->isNot(tok::eof)) {
990         if (FormatTok->is(tok::r_brace)) {
991           FormatTok->Type = TT_InlineASMBrace;
992           nextToken();
993           addUnwrappedLine();
994           break;
995         }
996         FormatTok->Finalized = true;
997         nextToken();
998       }
999     }
1000     break;
1001   case tok::kw_namespace:
1002     parseNamespace();
1003     return;
1004   case tok::kw_public:
1005   case tok::kw_protected:
1006   case tok::kw_private:
1007     if (Style.Language == FormatStyle::LK_Java ||
1008         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1009       nextToken();
1010     else
1011       parseAccessSpecifier();
1012     return;
1013   case tok::kw_if:
1014     parseIfThenElse();
1015     return;
1016   case tok::kw_for:
1017   case tok::kw_while:
1018     parseForOrWhileLoop();
1019     return;
1020   case tok::kw_do:
1021     parseDoWhile();
1022     return;
1023   case tok::kw_switch:
1024     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1025       // 'switch: string' field declaration.
1026       break;
1027     parseSwitch();
1028     return;
1029   case tok::kw_default:
1030     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1031       // 'default: string' field declaration.
1032       break;
1033     nextToken();
1034     if (FormatTok->is(tok::colon)) {
1035       parseLabel();
1036       return;
1037     }
1038     // e.g. "default void f() {}" in a Java interface.
1039     break;
1040   case tok::kw_case:
1041     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1042       // 'case: string' field declaration.
1043       break;
1044     parseCaseLabel();
1045     return;
1046   case tok::kw_try:
1047   case tok::kw___try:
1048     parseTryCatch();
1049     return;
1050   case tok::kw_extern:
1051     nextToken();
1052     if (FormatTok->Tok.is(tok::string_literal)) {
1053       nextToken();
1054       if (FormatTok->Tok.is(tok::l_brace)) {
1055         if (Style.BraceWrapping.AfterExternBlock) {
1056           addUnwrappedLine();
1057           parseBlock(/*MustBeDeclaration=*/true);
1058         } else {
1059           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1060         }
1061         addUnwrappedLine();
1062         return;
1063       }
1064     }
1065     break;
1066   case tok::kw_export:
1067     if (Style.Language == FormatStyle::LK_JavaScript) {
1068       parseJavaScriptEs6ImportExport();
1069       return;
1070     }
1071     if (!Style.isCpp())
1072       break;
1073     // Handle C++ "(inline|export) namespace".
1074     LLVM_FALLTHROUGH;
1075   case tok::kw_inline:
1076     nextToken();
1077     if (FormatTok->Tok.is(tok::kw_namespace)) {
1078       parseNamespace();
1079       return;
1080     }
1081     break;
1082   case tok::identifier:
1083     if (FormatTok->is(TT_ForEachMacro)) {
1084       parseForOrWhileLoop();
1085       return;
1086     }
1087     if (FormatTok->is(TT_MacroBlockBegin)) {
1088       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1089                  /*MunchSemi=*/false);
1090       return;
1091     }
1092     if (FormatTok->is(Keywords.kw_import)) {
1093       if (Style.Language == FormatStyle::LK_JavaScript) {
1094         parseJavaScriptEs6ImportExport();
1095         return;
1096       }
1097       if (Style.Language == FormatStyle::LK_Proto) {
1098         nextToken();
1099         if (FormatTok->is(tok::kw_public))
1100           nextToken();
1101         if (!FormatTok->is(tok::string_literal))
1102           return;
1103         nextToken();
1104         if (FormatTok->is(tok::semi))
1105           nextToken();
1106         addUnwrappedLine();
1107         return;
1108       }
1109     }
1110     if (Style.isCpp() &&
1111         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1112                            Keywords.kw_slots, Keywords.kw_qslots)) {
1113       nextToken();
1114       if (FormatTok->is(tok::colon)) {
1115         nextToken();
1116         addUnwrappedLine();
1117         return;
1118       }
1119     }
1120     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1121       parseStatementMacro();
1122       return;
1123     }
1124     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1125       parseNamespace();
1126       return;
1127     }
1128     // In all other cases, parse the declaration.
1129     break;
1130   default:
1131     break;
1132   }
1133   do {
1134     const FormatToken *Previous = FormatTok->Previous;
1135     switch (FormatTok->Tok.getKind()) {
1136     case tok::at:
1137       nextToken();
1138       if (FormatTok->Tok.is(tok::l_brace)) {
1139         nextToken();
1140         parseBracedList();
1141         break;
1142       } else if (Style.Language == FormatStyle::LK_Java &&
1143                  FormatTok->is(Keywords.kw_interface)) {
1144         nextToken();
1145         break;
1146       }
1147       switch (FormatTok->Tok.getObjCKeywordID()) {
1148       case tok::objc_public:
1149       case tok::objc_protected:
1150       case tok::objc_package:
1151       case tok::objc_private:
1152         return parseAccessSpecifier();
1153       case tok::objc_interface:
1154       case tok::objc_implementation:
1155         return parseObjCInterfaceOrImplementation();
1156       case tok::objc_protocol:
1157         if (parseObjCProtocol())
1158           return;
1159         break;
1160       case tok::objc_end:
1161         return; // Handled by the caller.
1162       case tok::objc_optional:
1163       case tok::objc_required:
1164         nextToken();
1165         addUnwrappedLine();
1166         return;
1167       case tok::objc_autoreleasepool:
1168         nextToken();
1169         if (FormatTok->Tok.is(tok::l_brace)) {
1170           if (Style.BraceWrapping.AfterControlStatement)
1171             addUnwrappedLine();
1172           parseBlock(/*MustBeDeclaration=*/false);
1173         }
1174         addUnwrappedLine();
1175         return;
1176       case tok::objc_synchronized:
1177         nextToken();
1178         if (FormatTok->Tok.is(tok::l_paren))
1179           // Skip synchronization object
1180           parseParens();
1181         if (FormatTok->Tok.is(tok::l_brace)) {
1182           if (Style.BraceWrapping.AfterControlStatement)
1183             addUnwrappedLine();
1184           parseBlock(/*MustBeDeclaration=*/false);
1185         }
1186         addUnwrappedLine();
1187         return;
1188       case tok::objc_try:
1189         // This branch isn't strictly necessary (the kw_try case below would
1190         // do this too after the tok::at is parsed above).  But be explicit.
1191         parseTryCatch();
1192         return;
1193       default:
1194         break;
1195       }
1196       break;
1197     case tok::kw_enum:
1198       // Ignore if this is part of "template <enum ...".
1199       if (Previous && Previous->is(tok::less)) {
1200         nextToken();
1201         break;
1202       }
1203 
1204       // parseEnum falls through and does not yet add an unwrapped line as an
1205       // enum definition can start a structural element.
1206       if (!parseEnum())
1207         break;
1208       // This only applies for C++.
1209       if (!Style.isCpp()) {
1210         addUnwrappedLine();
1211         return;
1212       }
1213       break;
1214     case tok::kw_typedef:
1215       nextToken();
1216       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1217                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1218                              Keywords.kw_CF_CLOSED_ENUM,
1219                              Keywords.kw_NS_CLOSED_ENUM))
1220         parseEnum();
1221       break;
1222     case tok::kw_struct:
1223     case tok::kw_union:
1224     case tok::kw_class:
1225       // parseRecord falls through and does not yet add an unwrapped line as a
1226       // record declaration or definition can start a structural element.
1227       parseRecord();
1228       // This does not apply for Java, JavaScript and C#.
1229       if (Style.Language == FormatStyle::LK_Java ||
1230           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1231         if (FormatTok->is(tok::semi))
1232           nextToken();
1233         addUnwrappedLine();
1234         return;
1235       }
1236       break;
1237     case tok::period:
1238       nextToken();
1239       // In Java, classes have an implicit static member "class".
1240       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1241           FormatTok->is(tok::kw_class))
1242         nextToken();
1243       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1244           FormatTok->Tok.getIdentifierInfo())
1245         // JavaScript only has pseudo keywords, all keywords are allowed to
1246         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1247         nextToken();
1248       break;
1249     case tok::semi:
1250       nextToken();
1251       addUnwrappedLine();
1252       return;
1253     case tok::r_brace:
1254       addUnwrappedLine();
1255       return;
1256     case tok::l_paren:
1257       parseParens();
1258       break;
1259     case tok::kw_operator:
1260       nextToken();
1261       if (FormatTok->isBinaryOperator())
1262         nextToken();
1263       break;
1264     case tok::caret:
1265       nextToken();
1266       if (FormatTok->Tok.isAnyIdentifier() ||
1267           FormatTok->isSimpleTypeSpecifier())
1268         nextToken();
1269       if (FormatTok->is(tok::l_paren))
1270         parseParens();
1271       if (FormatTok->is(tok::l_brace))
1272         parseChildBlock();
1273       break;
1274     case tok::l_brace:
1275       if (!tryToParseBracedList()) {
1276         // A block outside of parentheses must be the last part of a
1277         // structural element.
1278         // FIXME: Figure out cases where this is not true, and add projections
1279         // for them (the one we know is missing are lambdas).
1280         if (Style.BraceWrapping.AfterFunction)
1281           addUnwrappedLine();
1282         FormatTok->Type = TT_FunctionLBrace;
1283         parseBlock(/*MustBeDeclaration=*/false);
1284         addUnwrappedLine();
1285         return;
1286       }
1287       // Otherwise this was a braced init list, and the structural
1288       // element continues.
1289       break;
1290     case tok::kw_try:
1291       // We arrive here when parsing function-try blocks.
1292       if (Style.BraceWrapping.AfterFunction)
1293         addUnwrappedLine();
1294       parseTryCatch();
1295       return;
1296     case tok::identifier: {
1297       if (FormatTok->is(TT_MacroBlockEnd)) {
1298         addUnwrappedLine();
1299         return;
1300       }
1301 
1302       // Function declarations (as opposed to function expressions) are parsed
1303       // on their own unwrapped line by continuing this loop. Function
1304       // expressions (functions that are not on their own line) must not create
1305       // a new unwrapped line, so they are special cased below.
1306       size_t TokenCount = Line->Tokens.size();
1307       if (Style.Language == FormatStyle::LK_JavaScript &&
1308           FormatTok->is(Keywords.kw_function) &&
1309           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1310                                                      Keywords.kw_async)))) {
1311         tryToParseJSFunction();
1312         break;
1313       }
1314       if ((Style.Language == FormatStyle::LK_JavaScript ||
1315            Style.Language == FormatStyle::LK_Java) &&
1316           FormatTok->is(Keywords.kw_interface)) {
1317         if (Style.Language == FormatStyle::LK_JavaScript) {
1318           // In JavaScript/TypeScript, "interface" can be used as a standalone
1319           // identifier, e.g. in `var interface = 1;`. If "interface" is
1320           // followed by another identifier, it is very like to be an actual
1321           // interface declaration.
1322           unsigned StoredPosition = Tokens->getPosition();
1323           FormatToken *Next = Tokens->getNextToken();
1324           FormatTok = Tokens->setPosition(StoredPosition);
1325           if (Next && !mustBeJSIdent(Keywords, Next)) {
1326             nextToken();
1327             break;
1328           }
1329         }
1330         parseRecord();
1331         addUnwrappedLine();
1332         return;
1333       }
1334 
1335       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1336         parseStatementMacro();
1337         return;
1338       }
1339 
1340       // See if the following token should start a new unwrapped line.
1341       StringRef Text = FormatTok->TokenText;
1342       nextToken();
1343 
1344       // JS doesn't have macros, and within classes colons indicate fields, not
1345       // labels.
1346       if (Style.Language == FormatStyle::LK_JavaScript)
1347         break;
1348 
1349       TokenCount = Line->Tokens.size();
1350       if (TokenCount == 1 ||
1351           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1352         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1353           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1354           parseLabel(!Style.IndentGotoLabels);
1355           return;
1356         }
1357         // Recognize function-like macro usages without trailing semicolon as
1358         // well as free-standing macros like Q_OBJECT.
1359         bool FunctionLike = FormatTok->is(tok::l_paren);
1360         if (FunctionLike)
1361           parseParens();
1362 
1363         bool FollowedByNewline =
1364             CommentsBeforeNextToken.empty()
1365                 ? FormatTok->NewlinesBefore > 0
1366                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1367 
1368         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1369             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1370           addUnwrappedLine();
1371           return;
1372         }
1373       }
1374       break;
1375     }
1376     case tok::equal:
1377       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1378       // TT_JsFatArrow. The always start an expression or a child block if
1379       // followed by a curly.
1380       if (FormatTok->is(TT_JsFatArrow)) {
1381         nextToken();
1382         if (FormatTok->is(tok::l_brace))
1383           parseChildBlock();
1384         break;
1385       }
1386 
1387       nextToken();
1388       if (FormatTok->Tok.is(tok::l_brace)) {
1389         nextToken();
1390         parseBracedList();
1391       } else if (Style.Language == FormatStyle::LK_Proto &&
1392                  FormatTok->Tok.is(tok::less)) {
1393         nextToken();
1394         parseBracedList(/*ContinueOnSemicolons=*/false,
1395                         /*ClosingBraceKind=*/tok::greater);
1396       }
1397       break;
1398     case tok::l_square:
1399       parseSquare();
1400       break;
1401     case tok::kw_new:
1402       parseNew();
1403       break;
1404     default:
1405       nextToken();
1406       break;
1407     }
1408   } while (!eof());
1409 }
1410 
1411 bool UnwrappedLineParser::tryToParseLambda() {
1412   if (!Style.isCpp()) {
1413     nextToken();
1414     return false;
1415   }
1416   assert(FormatTok->is(tok::l_square));
1417   FormatToken &LSquare = *FormatTok;
1418   if (!tryToParseLambdaIntroducer())
1419     return false;
1420 
1421   bool SeenArrow = false;
1422 
1423   while (FormatTok->isNot(tok::l_brace)) {
1424     if (FormatTok->isSimpleTypeSpecifier()) {
1425       nextToken();
1426       continue;
1427     }
1428     switch (FormatTok->Tok.getKind()) {
1429     case tok::l_brace:
1430       break;
1431     case tok::l_paren:
1432       parseParens();
1433       break;
1434     case tok::amp:
1435     case tok::star:
1436     case tok::kw_const:
1437     case tok::comma:
1438     case tok::less:
1439     case tok::greater:
1440     case tok::identifier:
1441     case tok::numeric_constant:
1442     case tok::coloncolon:
1443     case tok::kw_class:
1444     case tok::kw_mutable:
1445     case tok::kw_noexcept:
1446     case tok::kw_template:
1447     case tok::kw_typename:
1448       nextToken();
1449       break;
1450     // Specialization of a template with an integer parameter can contain
1451     // arithmetic, logical, comparison and ternary operators.
1452     //
1453     // FIXME: This also accepts sequences of operators that are not in the scope
1454     // of a template argument list.
1455     //
1456     // In a C++ lambda a template type can only occur after an arrow. We use
1457     // this as an heuristic to distinguish between Objective-C expressions
1458     // followed by an `a->b` expression, such as:
1459     // ([obj func:arg] + a->b)
1460     // Otherwise the code below would parse as a lambda.
1461     //
1462     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1463     // explicit template lists: []<bool b = true && false>(U &&u){}
1464     case tok::plus:
1465     case tok::minus:
1466     case tok::exclaim:
1467     case tok::tilde:
1468     case tok::slash:
1469     case tok::percent:
1470     case tok::lessless:
1471     case tok::pipe:
1472     case tok::pipepipe:
1473     case tok::ampamp:
1474     case tok::caret:
1475     case tok::equalequal:
1476     case tok::exclaimequal:
1477     case tok::greaterequal:
1478     case tok::lessequal:
1479     case tok::question:
1480     case tok::colon:
1481     case tok::kw_true:
1482     case tok::kw_false:
1483       if (SeenArrow) {
1484         nextToken();
1485         break;
1486       }
1487       return true;
1488     case tok::arrow:
1489       // This might or might not actually be a lambda arrow (this could be an
1490       // ObjC method invocation followed by a dereferencing arrow). We might
1491       // reset this back to TT_Unknown in TokenAnnotator.
1492       FormatTok->Type = TT_LambdaArrow;
1493       SeenArrow = true;
1494       nextToken();
1495       break;
1496     default:
1497       return true;
1498     }
1499   }
1500   FormatTok->Type = TT_LambdaLBrace;
1501   LSquare.Type = TT_LambdaLSquare;
1502   parseChildBlock();
1503   return true;
1504 }
1505 
1506 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1507   const FormatToken *Previous = FormatTok->Previous;
1508   if (Previous &&
1509       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1510                          tok::kw_delete, tok::l_square) ||
1511        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1512        Previous->isSimpleTypeSpecifier())) {
1513     nextToken();
1514     return false;
1515   }
1516   nextToken();
1517   if (FormatTok->is(tok::l_square)) {
1518     return false;
1519   }
1520   parseSquare(/*LambdaIntroducer=*/true);
1521   return true;
1522 }
1523 
1524 void UnwrappedLineParser::tryToParseJSFunction() {
1525   assert(FormatTok->is(Keywords.kw_function) ||
1526          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1527   if (FormatTok->is(Keywords.kw_async))
1528     nextToken();
1529   // Consume "function".
1530   nextToken();
1531 
1532   // Consume * (generator function). Treat it like C++'s overloaded operators.
1533   if (FormatTok->is(tok::star)) {
1534     FormatTok->Type = TT_OverloadedOperator;
1535     nextToken();
1536   }
1537 
1538   // Consume function name.
1539   if (FormatTok->is(tok::identifier))
1540     nextToken();
1541 
1542   if (FormatTok->isNot(tok::l_paren))
1543     return;
1544 
1545   // Parse formal parameter list.
1546   parseParens();
1547 
1548   if (FormatTok->is(tok::colon)) {
1549     // Parse a type definition.
1550     nextToken();
1551 
1552     // Eat the type declaration. For braced inline object types, balance braces,
1553     // otherwise just parse until finding an l_brace for the function body.
1554     if (FormatTok->is(tok::l_brace))
1555       tryToParseBracedList();
1556     else
1557       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1558         nextToken();
1559   }
1560 
1561   if (FormatTok->is(tok::semi))
1562     return;
1563 
1564   parseChildBlock();
1565 }
1566 
1567 bool UnwrappedLineParser::tryToParseBracedList() {
1568   if (FormatTok->BlockKind == BK_Unknown)
1569     calculateBraceTypes();
1570   assert(FormatTok->BlockKind != BK_Unknown);
1571   if (FormatTok->BlockKind == BK_Block)
1572     return false;
1573   nextToken();
1574   parseBracedList();
1575   return true;
1576 }
1577 
1578 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1579                                           tok::TokenKind ClosingBraceKind) {
1580   bool HasError = false;
1581 
1582   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1583   // replace this by using parseAssigmentExpression() inside.
1584   do {
1585     if (Style.Language == FormatStyle::LK_JavaScript) {
1586       if (FormatTok->is(Keywords.kw_function) ||
1587           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1588         tryToParseJSFunction();
1589         continue;
1590       }
1591       if (FormatTok->is(TT_JsFatArrow)) {
1592         nextToken();
1593         // Fat arrows can be followed by simple expressions or by child blocks
1594         // in curly braces.
1595         if (FormatTok->is(tok::l_brace)) {
1596           parseChildBlock();
1597           continue;
1598         }
1599       }
1600       if (FormatTok->is(tok::l_brace)) {
1601         // Could be a method inside of a braced list `{a() { return 1; }}`.
1602         if (tryToParseBracedList())
1603           continue;
1604         parseChildBlock();
1605       }
1606     }
1607     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1608       nextToken();
1609       return !HasError;
1610     }
1611     switch (FormatTok->Tok.getKind()) {
1612     case tok::caret:
1613       nextToken();
1614       if (FormatTok->is(tok::l_brace)) {
1615         parseChildBlock();
1616       }
1617       break;
1618     case tok::l_square:
1619       tryToParseLambda();
1620       break;
1621     case tok::l_paren:
1622       parseParens();
1623       // JavaScript can just have free standing methods and getters/setters in
1624       // object literals. Detect them by a "{" following ")".
1625       if (Style.Language == FormatStyle::LK_JavaScript) {
1626         if (FormatTok->is(tok::l_brace))
1627           parseChildBlock();
1628         break;
1629       }
1630       break;
1631     case tok::l_brace:
1632       // Assume there are no blocks inside a braced init list apart
1633       // from the ones we explicitly parse out (like lambdas).
1634       FormatTok->BlockKind = BK_BracedInit;
1635       nextToken();
1636       parseBracedList();
1637       break;
1638     case tok::less:
1639       if (Style.Language == FormatStyle::LK_Proto) {
1640         nextToken();
1641         parseBracedList(/*ContinueOnSemicolons=*/false,
1642                         /*ClosingBraceKind=*/tok::greater);
1643       } else {
1644         nextToken();
1645       }
1646       break;
1647     case tok::semi:
1648       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1649       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1650       // used for error recovery if we have otherwise determined that this is
1651       // a braced list.
1652       if (Style.Language == FormatStyle::LK_JavaScript) {
1653         nextToken();
1654         break;
1655       }
1656       HasError = true;
1657       if (!ContinueOnSemicolons)
1658         return !HasError;
1659       nextToken();
1660       break;
1661     case tok::comma:
1662       nextToken();
1663       break;
1664     default:
1665       nextToken();
1666       break;
1667     }
1668   } while (!eof());
1669   return false;
1670 }
1671 
1672 void UnwrappedLineParser::parseParens() {
1673   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1674   nextToken();
1675   do {
1676     switch (FormatTok->Tok.getKind()) {
1677     case tok::l_paren:
1678       parseParens();
1679       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1680         parseChildBlock();
1681       break;
1682     case tok::r_paren:
1683       nextToken();
1684       return;
1685     case tok::r_brace:
1686       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1687       return;
1688     case tok::l_square:
1689       tryToParseLambda();
1690       break;
1691     case tok::l_brace:
1692       if (!tryToParseBracedList())
1693         parseChildBlock();
1694       break;
1695     case tok::at:
1696       nextToken();
1697       if (FormatTok->Tok.is(tok::l_brace)) {
1698         nextToken();
1699         parseBracedList();
1700       }
1701       break;
1702     case tok::kw_class:
1703       if (Style.Language == FormatStyle::LK_JavaScript)
1704         parseRecord(/*ParseAsExpr=*/true);
1705       else
1706         nextToken();
1707       break;
1708     case tok::identifier:
1709       if (Style.Language == FormatStyle::LK_JavaScript &&
1710           (FormatTok->is(Keywords.kw_function) ||
1711            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1712         tryToParseJSFunction();
1713       else
1714         nextToken();
1715       break;
1716     default:
1717       nextToken();
1718       break;
1719     }
1720   } while (!eof());
1721 }
1722 
1723 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1724   if (!LambdaIntroducer) {
1725     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1726     if (tryToParseLambda())
1727       return;
1728   }
1729   do {
1730     switch (FormatTok->Tok.getKind()) {
1731     case tok::l_paren:
1732       parseParens();
1733       break;
1734     case tok::r_square:
1735       nextToken();
1736       return;
1737     case tok::r_brace:
1738       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1739       return;
1740     case tok::l_square:
1741       parseSquare();
1742       break;
1743     case tok::l_brace: {
1744       if (!tryToParseBracedList())
1745         parseChildBlock();
1746       break;
1747     }
1748     case tok::at:
1749       nextToken();
1750       if (FormatTok->Tok.is(tok::l_brace)) {
1751         nextToken();
1752         parseBracedList();
1753       }
1754       break;
1755     default:
1756       nextToken();
1757       break;
1758     }
1759   } while (!eof());
1760 }
1761 
1762 void UnwrappedLineParser::parseIfThenElse() {
1763   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1764   nextToken();
1765   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1766     nextToken();
1767   if (FormatTok->Tok.is(tok::l_paren))
1768     parseParens();
1769   bool NeedsUnwrappedLine = false;
1770   if (FormatTok->Tok.is(tok::l_brace)) {
1771     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1772     parseBlock(/*MustBeDeclaration=*/false);
1773     if (Style.BraceWrapping.BeforeElse)
1774       addUnwrappedLine();
1775     else
1776       NeedsUnwrappedLine = true;
1777   } else {
1778     addUnwrappedLine();
1779     ++Line->Level;
1780     parseStructuralElement();
1781     --Line->Level;
1782   }
1783   if (FormatTok->Tok.is(tok::kw_else)) {
1784     nextToken();
1785     if (FormatTok->Tok.is(tok::l_brace)) {
1786       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1787       parseBlock(/*MustBeDeclaration=*/false);
1788       addUnwrappedLine();
1789     } else if (FormatTok->Tok.is(tok::kw_if)) {
1790       parseIfThenElse();
1791     } else {
1792       addUnwrappedLine();
1793       ++Line->Level;
1794       parseStructuralElement();
1795       if (FormatTok->is(tok::eof))
1796         addUnwrappedLine();
1797       --Line->Level;
1798     }
1799   } else if (NeedsUnwrappedLine) {
1800     addUnwrappedLine();
1801   }
1802 }
1803 
1804 void UnwrappedLineParser::parseTryCatch() {
1805   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1806   nextToken();
1807   bool NeedsUnwrappedLine = false;
1808   if (FormatTok->is(tok::colon)) {
1809     // We are in a function try block, what comes is an initializer list.
1810     nextToken();
1811     while (FormatTok->is(tok::identifier)) {
1812       nextToken();
1813       if (FormatTok->is(tok::l_paren))
1814         parseParens();
1815       if (FormatTok->is(tok::comma))
1816         nextToken();
1817     }
1818   }
1819   // Parse try with resource.
1820   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1821     parseParens();
1822   }
1823   if (FormatTok->is(tok::l_brace)) {
1824     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1825     parseBlock(/*MustBeDeclaration=*/false);
1826     if (Style.BraceWrapping.BeforeCatch) {
1827       addUnwrappedLine();
1828     } else {
1829       NeedsUnwrappedLine = true;
1830     }
1831   } else if (!FormatTok->is(tok::kw_catch)) {
1832     // The C++ standard requires a compound-statement after a try.
1833     // If there's none, we try to assume there's a structuralElement
1834     // and try to continue.
1835     addUnwrappedLine();
1836     ++Line->Level;
1837     parseStructuralElement();
1838     --Line->Level;
1839   }
1840   while (1) {
1841     if (FormatTok->is(tok::at))
1842       nextToken();
1843     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1844                              tok::kw___finally) ||
1845           ((Style.Language == FormatStyle::LK_Java ||
1846             Style.Language == FormatStyle::LK_JavaScript) &&
1847            FormatTok->is(Keywords.kw_finally)) ||
1848           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1849            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1850       break;
1851     nextToken();
1852     while (FormatTok->isNot(tok::l_brace)) {
1853       if (FormatTok->is(tok::l_paren)) {
1854         parseParens();
1855         continue;
1856       }
1857       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1858         return;
1859       nextToken();
1860     }
1861     NeedsUnwrappedLine = false;
1862     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1863     parseBlock(/*MustBeDeclaration=*/false);
1864     if (Style.BraceWrapping.BeforeCatch)
1865       addUnwrappedLine();
1866     else
1867       NeedsUnwrappedLine = true;
1868   }
1869   if (NeedsUnwrappedLine)
1870     addUnwrappedLine();
1871 }
1872 
1873 void UnwrappedLineParser::parseNamespace() {
1874   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
1875          "'namespace' expected");
1876 
1877   const FormatToken &InitialToken = *FormatTok;
1878   nextToken();
1879   if (InitialToken.is(TT_NamespaceMacro)) {
1880     parseParens();
1881   } else {
1882     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
1883                               tok::l_square)) {
1884       if (FormatTok->is(tok::l_square))
1885         parseSquare();
1886       else
1887         nextToken();
1888     }
1889   }
1890   if (FormatTok->Tok.is(tok::l_brace)) {
1891     if (ShouldBreakBeforeBrace(Style, InitialToken))
1892       addUnwrappedLine();
1893 
1894     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1895                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1896                      DeclarationScopeStack.size() > 1);
1897     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1898     // Munch the semicolon after a namespace. This is more common than one would
1899     // think. Puttin the semicolon into its own line is very ugly.
1900     if (FormatTok->Tok.is(tok::semi))
1901       nextToken();
1902     addUnwrappedLine();
1903   }
1904   // FIXME: Add error handling.
1905 }
1906 
1907 void UnwrappedLineParser::parseNew() {
1908   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1909   nextToken();
1910   if (Style.Language != FormatStyle::LK_Java)
1911     return;
1912 
1913   // In Java, we can parse everything up to the parens, which aren't optional.
1914   do {
1915     // There should not be a ;, { or } before the new's open paren.
1916     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1917       return;
1918 
1919     // Consume the parens.
1920     if (FormatTok->is(tok::l_paren)) {
1921       parseParens();
1922 
1923       // If there is a class body of an anonymous class, consume that as child.
1924       if (FormatTok->is(tok::l_brace))
1925         parseChildBlock();
1926       return;
1927     }
1928     nextToken();
1929   } while (!eof());
1930 }
1931 
1932 void UnwrappedLineParser::parseForOrWhileLoop() {
1933   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1934          "'for', 'while' or foreach macro expected");
1935   nextToken();
1936   // JS' for await ( ...
1937   if (Style.Language == FormatStyle::LK_JavaScript &&
1938       FormatTok->is(Keywords.kw_await))
1939     nextToken();
1940   if (FormatTok->Tok.is(tok::l_paren))
1941     parseParens();
1942   if (FormatTok->Tok.is(tok::l_brace)) {
1943     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1944     parseBlock(/*MustBeDeclaration=*/false);
1945     addUnwrappedLine();
1946   } else {
1947     addUnwrappedLine();
1948     ++Line->Level;
1949     parseStructuralElement();
1950     --Line->Level;
1951   }
1952 }
1953 
1954 void UnwrappedLineParser::parseDoWhile() {
1955   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1956   nextToken();
1957   if (FormatTok->Tok.is(tok::l_brace)) {
1958     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1959     parseBlock(/*MustBeDeclaration=*/false);
1960     if (Style.BraceWrapping.IndentBraces)
1961       addUnwrappedLine();
1962   } else {
1963     addUnwrappedLine();
1964     ++Line->Level;
1965     parseStructuralElement();
1966     --Line->Level;
1967   }
1968 
1969   // FIXME: Add error handling.
1970   if (!FormatTok->Tok.is(tok::kw_while)) {
1971     addUnwrappedLine();
1972     return;
1973   }
1974 
1975   nextToken();
1976   parseStructuralElement();
1977 }
1978 
1979 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
1980   nextToken();
1981   unsigned OldLineLevel = Line->Level;
1982   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1983     --Line->Level;
1984   if (LeftAlignLabel)
1985     Line->Level = 0;
1986   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1987     CompoundStatementIndenter Indenter(this, Line->Level,
1988                                        Style.BraceWrapping.AfterCaseLabel,
1989                                        Style.BraceWrapping.IndentBraces);
1990     parseBlock(/*MustBeDeclaration=*/false);
1991     if (FormatTok->Tok.is(tok::kw_break)) {
1992       if (Style.BraceWrapping.AfterControlStatement)
1993         addUnwrappedLine();
1994       parseStructuralElement();
1995     }
1996     addUnwrappedLine();
1997   } else {
1998     if (FormatTok->is(tok::semi))
1999       nextToken();
2000     addUnwrappedLine();
2001   }
2002   Line->Level = OldLineLevel;
2003   if (FormatTok->isNot(tok::l_brace)) {
2004     parseStructuralElement();
2005     addUnwrappedLine();
2006   }
2007 }
2008 
2009 void UnwrappedLineParser::parseCaseLabel() {
2010   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2011   // FIXME: fix handling of complex expressions here.
2012   do {
2013     nextToken();
2014   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2015   parseLabel();
2016 }
2017 
2018 void UnwrappedLineParser::parseSwitch() {
2019   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2020   nextToken();
2021   if (FormatTok->Tok.is(tok::l_paren))
2022     parseParens();
2023   if (FormatTok->Tok.is(tok::l_brace)) {
2024     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2025     parseBlock(/*MustBeDeclaration=*/false);
2026     addUnwrappedLine();
2027   } else {
2028     addUnwrappedLine();
2029     ++Line->Level;
2030     parseStructuralElement();
2031     --Line->Level;
2032   }
2033 }
2034 
2035 void UnwrappedLineParser::parseAccessSpecifier() {
2036   nextToken();
2037   // Understand Qt's slots.
2038   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2039     nextToken();
2040   // Otherwise, we don't know what it is, and we'd better keep the next token.
2041   if (FormatTok->Tok.is(tok::colon))
2042     nextToken();
2043   addUnwrappedLine();
2044 }
2045 
2046 bool UnwrappedLineParser::parseEnum() {
2047   // Won't be 'enum' for NS_ENUMs.
2048   if (FormatTok->Tok.is(tok::kw_enum))
2049     nextToken();
2050 
2051   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2052   // declarations. An "enum" keyword followed by a colon would be a syntax
2053   // error and thus assume it is just an identifier.
2054   if (Style.Language == FormatStyle::LK_JavaScript &&
2055       FormatTok->isOneOf(tok::colon, tok::question))
2056     return false;
2057 
2058   // In protobuf, "enum" can be used as a field name.
2059   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2060     return false;
2061 
2062   // Eat up enum class ...
2063   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2064     nextToken();
2065 
2066   while (FormatTok->Tok.getIdentifierInfo() ||
2067          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2068                             tok::greater, tok::comma, tok::question)) {
2069     nextToken();
2070     // We can have macros or attributes in between 'enum' and the enum name.
2071     if (FormatTok->is(tok::l_paren))
2072       parseParens();
2073     if (FormatTok->is(tok::identifier)) {
2074       nextToken();
2075       // If there are two identifiers in a row, this is likely an elaborate
2076       // return type. In Java, this can be "implements", etc.
2077       if (Style.isCpp() && FormatTok->is(tok::identifier))
2078         return false;
2079     }
2080   }
2081 
2082   // Just a declaration or something is wrong.
2083   if (FormatTok->isNot(tok::l_brace))
2084     return true;
2085   FormatTok->BlockKind = BK_Block;
2086 
2087   if (Style.Language == FormatStyle::LK_Java) {
2088     // Java enums are different.
2089     parseJavaEnumBody();
2090     return true;
2091   }
2092   if (Style.Language == FormatStyle::LK_Proto) {
2093     parseBlock(/*MustBeDeclaration=*/true);
2094     return true;
2095   }
2096 
2097   // Parse enum body.
2098   nextToken();
2099   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2100   if (HasError) {
2101     if (FormatTok->is(tok::semi))
2102       nextToken();
2103     addUnwrappedLine();
2104   }
2105   return true;
2106 
2107   // There is no addUnwrappedLine() here so that we fall through to parsing a
2108   // structural element afterwards. Thus, in "enum A {} n, m;",
2109   // "} n, m;" will end up in one unwrapped line.
2110 }
2111 
2112 void UnwrappedLineParser::parseJavaEnumBody() {
2113   // Determine whether the enum is simple, i.e. does not have a semicolon or
2114   // constants with class bodies. Simple enums can be formatted like braced
2115   // lists, contracted to a single line, etc.
2116   unsigned StoredPosition = Tokens->getPosition();
2117   bool IsSimple = true;
2118   FormatToken *Tok = Tokens->getNextToken();
2119   while (Tok) {
2120     if (Tok->is(tok::r_brace))
2121       break;
2122     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2123       IsSimple = false;
2124       break;
2125     }
2126     // FIXME: This will also mark enums with braces in the arguments to enum
2127     // constants as "not simple". This is probably fine in practice, though.
2128     Tok = Tokens->getNextToken();
2129   }
2130   FormatTok = Tokens->setPosition(StoredPosition);
2131 
2132   if (IsSimple) {
2133     nextToken();
2134     parseBracedList();
2135     addUnwrappedLine();
2136     return;
2137   }
2138 
2139   // Parse the body of a more complex enum.
2140   // First add a line for everything up to the "{".
2141   nextToken();
2142   addUnwrappedLine();
2143   ++Line->Level;
2144 
2145   // Parse the enum constants.
2146   while (FormatTok) {
2147     if (FormatTok->is(tok::l_brace)) {
2148       // Parse the constant's class body.
2149       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2150                  /*MunchSemi=*/false);
2151     } else if (FormatTok->is(tok::l_paren)) {
2152       parseParens();
2153     } else if (FormatTok->is(tok::comma)) {
2154       nextToken();
2155       addUnwrappedLine();
2156     } else if (FormatTok->is(tok::semi)) {
2157       nextToken();
2158       addUnwrappedLine();
2159       break;
2160     } else if (FormatTok->is(tok::r_brace)) {
2161       addUnwrappedLine();
2162       break;
2163     } else {
2164       nextToken();
2165     }
2166   }
2167 
2168   // Parse the class body after the enum's ";" if any.
2169   parseLevel(/*HasOpeningBrace=*/true);
2170   nextToken();
2171   --Line->Level;
2172   addUnwrappedLine();
2173 }
2174 
2175 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2176   const FormatToken &InitialToken = *FormatTok;
2177   nextToken();
2178 
2179   // The actual identifier can be a nested name specifier, and in macros
2180   // it is often token-pasted.
2181   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2182                             tok::kw___attribute, tok::kw___declspec,
2183                             tok::kw_alignas) ||
2184          ((Style.Language == FormatStyle::LK_Java ||
2185            Style.Language == FormatStyle::LK_JavaScript) &&
2186           FormatTok->isOneOf(tok::period, tok::comma))) {
2187     if (Style.Language == FormatStyle::LK_JavaScript &&
2188         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2189       // JavaScript/TypeScript supports inline object types in
2190       // extends/implements positions:
2191       //     class Foo implements {bar: number} { }
2192       nextToken();
2193       if (FormatTok->is(tok::l_brace)) {
2194         tryToParseBracedList();
2195         continue;
2196       }
2197     }
2198     bool IsNonMacroIdentifier =
2199         FormatTok->is(tok::identifier) &&
2200         FormatTok->TokenText != FormatTok->TokenText.upper();
2201     nextToken();
2202     // We can have macros or attributes in between 'class' and the class name.
2203     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2204       parseParens();
2205   }
2206 
2207   // Note that parsing away template declarations here leads to incorrectly
2208   // accepting function declarations as record declarations.
2209   // In general, we cannot solve this problem. Consider:
2210   // class A<int> B() {}
2211   // which can be a function definition or a class definition when B() is a
2212   // macro. If we find enough real-world cases where this is a problem, we
2213   // can parse for the 'template' keyword in the beginning of the statement,
2214   // and thus rule out the record production in case there is no template
2215   // (this would still leave us with an ambiguity between template function
2216   // and class declarations).
2217   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2218     while (!eof()) {
2219       if (FormatTok->is(tok::l_brace)) {
2220         calculateBraceTypes(/*ExpectClassBody=*/true);
2221         if (!tryToParseBracedList())
2222           break;
2223       }
2224       if (FormatTok->Tok.is(tok::semi))
2225         return;
2226       nextToken();
2227     }
2228   }
2229   if (FormatTok->Tok.is(tok::l_brace)) {
2230     if (ParseAsExpr) {
2231       parseChildBlock();
2232     } else {
2233       if (ShouldBreakBeforeBrace(Style, InitialToken))
2234         addUnwrappedLine();
2235 
2236       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2237                  /*MunchSemi=*/false);
2238     }
2239   }
2240   // There is no addUnwrappedLine() here so that we fall through to parsing a
2241   // structural element afterwards. Thus, in "class A {} n, m;",
2242   // "} n, m;" will end up in one unwrapped line.
2243 }
2244 
2245 void UnwrappedLineParser::parseObjCMethod() {
2246   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2247          "'(' or identifier expected.");
2248   do {
2249     if (FormatTok->Tok.is(tok::semi)) {
2250       nextToken();
2251       addUnwrappedLine();
2252       return;
2253     } else if (FormatTok->Tok.is(tok::l_brace)) {
2254       if (Style.BraceWrapping.AfterFunction)
2255         addUnwrappedLine();
2256       parseBlock(/*MustBeDeclaration=*/false);
2257       addUnwrappedLine();
2258       return;
2259     } else {
2260       nextToken();
2261     }
2262   } while (!eof());
2263 }
2264 
2265 void UnwrappedLineParser::parseObjCProtocolList() {
2266   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2267   do {
2268     nextToken();
2269     // Early exit in case someone forgot a close angle.
2270     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2271         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2272       return;
2273   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2274   nextToken(); // Skip '>'.
2275 }
2276 
2277 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2278   do {
2279     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2280       nextToken();
2281       addUnwrappedLine();
2282       break;
2283     }
2284     if (FormatTok->is(tok::l_brace)) {
2285       parseBlock(/*MustBeDeclaration=*/false);
2286       // In ObjC interfaces, nothing should be following the "}".
2287       addUnwrappedLine();
2288     } else if (FormatTok->is(tok::r_brace)) {
2289       // Ignore stray "}". parseStructuralElement doesn't consume them.
2290       nextToken();
2291       addUnwrappedLine();
2292     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2293       nextToken();
2294       parseObjCMethod();
2295     } else {
2296       parseStructuralElement();
2297     }
2298   } while (!eof());
2299 }
2300 
2301 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2302   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2303          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2304   nextToken();
2305   nextToken(); // interface name
2306 
2307   // @interface can be followed by a lightweight generic
2308   // specialization list, then either a base class or a category.
2309   if (FormatTok->Tok.is(tok::less)) {
2310     // Unlike protocol lists, generic parameterizations support
2311     // nested angles:
2312     //
2313     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2314     //     NSObject <NSCopying, NSSecureCoding>
2315     //
2316     // so we need to count how many open angles we have left.
2317     unsigned NumOpenAngles = 1;
2318     do {
2319       nextToken();
2320       // Early exit in case someone forgot a close angle.
2321       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2322           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2323         break;
2324       if (FormatTok->Tok.is(tok::less))
2325         ++NumOpenAngles;
2326       else if (FormatTok->Tok.is(tok::greater)) {
2327         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2328         --NumOpenAngles;
2329       }
2330     } while (!eof() && NumOpenAngles != 0);
2331     nextToken(); // Skip '>'.
2332   }
2333   if (FormatTok->Tok.is(tok::colon)) {
2334     nextToken();
2335     nextToken(); // base class name
2336   } else if (FormatTok->Tok.is(tok::l_paren))
2337     // Skip category, if present.
2338     parseParens();
2339 
2340   if (FormatTok->Tok.is(tok::less))
2341     parseObjCProtocolList();
2342 
2343   if (FormatTok->Tok.is(tok::l_brace)) {
2344     if (Style.BraceWrapping.AfterObjCDeclaration)
2345       addUnwrappedLine();
2346     parseBlock(/*MustBeDeclaration=*/true);
2347   }
2348 
2349   // With instance variables, this puts '}' on its own line.  Without instance
2350   // variables, this ends the @interface line.
2351   addUnwrappedLine();
2352 
2353   parseObjCUntilAtEnd();
2354 }
2355 
2356 // Returns true for the declaration/definition form of @protocol,
2357 // false for the expression form.
2358 bool UnwrappedLineParser::parseObjCProtocol() {
2359   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2360   nextToken();
2361 
2362   if (FormatTok->is(tok::l_paren))
2363     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2364     return false;
2365 
2366   // The definition/declaration form,
2367   // @protocol Foo
2368   // - (int)someMethod;
2369   // @end
2370 
2371   nextToken(); // protocol name
2372 
2373   if (FormatTok->Tok.is(tok::less))
2374     parseObjCProtocolList();
2375 
2376   // Check for protocol declaration.
2377   if (FormatTok->Tok.is(tok::semi)) {
2378     nextToken();
2379     addUnwrappedLine();
2380     return true;
2381   }
2382 
2383   addUnwrappedLine();
2384   parseObjCUntilAtEnd();
2385   return true;
2386 }
2387 
2388 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2389   bool IsImport = FormatTok->is(Keywords.kw_import);
2390   assert(IsImport || FormatTok->is(tok::kw_export));
2391   nextToken();
2392 
2393   // Consume the "default" in "export default class/function".
2394   if (FormatTok->is(tok::kw_default))
2395     nextToken();
2396 
2397   // Consume "async function", "function" and "default function", so that these
2398   // get parsed as free-standing JS functions, i.e. do not require a trailing
2399   // semicolon.
2400   if (FormatTok->is(Keywords.kw_async))
2401     nextToken();
2402   if (FormatTok->is(Keywords.kw_function)) {
2403     nextToken();
2404     return;
2405   }
2406 
2407   // For imports, `export *`, `export {...}`, consume the rest of the line up
2408   // to the terminating `;`. For everything else, just return and continue
2409   // parsing the structural element, i.e. the declaration or expression for
2410   // `export default`.
2411   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2412       !FormatTok->isStringLiteral())
2413     return;
2414 
2415   while (!eof()) {
2416     if (FormatTok->is(tok::semi))
2417       return;
2418     if (Line->Tokens.empty()) {
2419       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2420       // import statement should terminate.
2421       return;
2422     }
2423     if (FormatTok->is(tok::l_brace)) {
2424       FormatTok->BlockKind = BK_Block;
2425       nextToken();
2426       parseBracedList();
2427     } else {
2428       nextToken();
2429     }
2430   }
2431 }
2432 
2433 void UnwrappedLineParser::parseStatementMacro() {
2434   nextToken();
2435   if (FormatTok->is(tok::l_paren))
2436     parseParens();
2437   if (FormatTok->is(tok::semi))
2438     nextToken();
2439   addUnwrappedLine();
2440 }
2441 
2442 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2443                                                  StringRef Prefix = "") {
2444   llvm::dbgs() << Prefix << "Line(" << Line.Level
2445                << ", FSC=" << Line.FirstStartColumn << ")"
2446                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2447   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2448                                                     E = Line.Tokens.end();
2449        I != E; ++I) {
2450     llvm::dbgs() << I->Tok->Tok.getName() << "["
2451                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2452                  << "] ";
2453   }
2454   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2455                                                     E = Line.Tokens.end();
2456        I != E; ++I) {
2457     const UnwrappedLineNode &Node = *I;
2458     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2459              I = Node.Children.begin(),
2460              E = Node.Children.end();
2461          I != E; ++I) {
2462       printDebugInfo(*I, "\nChild: ");
2463     }
2464   }
2465   llvm::dbgs() << "\n";
2466 }
2467 
2468 void UnwrappedLineParser::addUnwrappedLine() {
2469   if (Line->Tokens.empty())
2470     return;
2471   LLVM_DEBUG({
2472     if (CurrentLines == &Lines)
2473       printDebugInfo(*Line);
2474   });
2475   CurrentLines->push_back(std::move(*Line));
2476   Line->Tokens.clear();
2477   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2478   Line->FirstStartColumn = 0;
2479   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2480     CurrentLines->append(
2481         std::make_move_iterator(PreprocessorDirectives.begin()),
2482         std::make_move_iterator(PreprocessorDirectives.end()));
2483     PreprocessorDirectives.clear();
2484   }
2485   // Disconnect the current token from the last token on the previous line.
2486   FormatTok->Previous = nullptr;
2487 }
2488 
2489 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2490 
2491 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2492   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2493          FormatTok.NewlinesBefore > 0;
2494 }
2495 
2496 // Checks if \p FormatTok is a line comment that continues the line comment
2497 // section on \p Line.
2498 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2499                                         const UnwrappedLine &Line,
2500                                         llvm::Regex &CommentPragmasRegex) {
2501   if (Line.Tokens.empty())
2502     return false;
2503 
2504   StringRef IndentContent = FormatTok.TokenText;
2505   if (FormatTok.TokenText.startswith("//") ||
2506       FormatTok.TokenText.startswith("/*"))
2507     IndentContent = FormatTok.TokenText.substr(2);
2508   if (CommentPragmasRegex.match(IndentContent))
2509     return false;
2510 
2511   // If Line starts with a line comment, then FormatTok continues the comment
2512   // section if its original column is greater or equal to the original start
2513   // column of the line.
2514   //
2515   // Define the min column token of a line as follows: if a line ends in '{' or
2516   // contains a '{' followed by a line comment, then the min column token is
2517   // that '{'. Otherwise, the min column token of the line is the first token of
2518   // the line.
2519   //
2520   // If Line starts with a token other than a line comment, then FormatTok
2521   // continues the comment section if its original column is greater than the
2522   // original start column of the min column token of the line.
2523   //
2524   // For example, the second line comment continues the first in these cases:
2525   //
2526   // // first line
2527   // // second line
2528   //
2529   // and:
2530   //
2531   // // first line
2532   //  // second line
2533   //
2534   // and:
2535   //
2536   // int i; // first line
2537   //  // second line
2538   //
2539   // and:
2540   //
2541   // do { // first line
2542   //      // second line
2543   //   int i;
2544   // } while (true);
2545   //
2546   // and:
2547   //
2548   // enum {
2549   //   a, // first line
2550   //    // second line
2551   //   b
2552   // };
2553   //
2554   // The second line comment doesn't continue the first in these cases:
2555   //
2556   //   // first line
2557   //  // second line
2558   //
2559   // and:
2560   //
2561   // int i; // first line
2562   // // second line
2563   //
2564   // and:
2565   //
2566   // do { // first line
2567   //   // second line
2568   //   int i;
2569   // } while (true);
2570   //
2571   // and:
2572   //
2573   // enum {
2574   //   a, // first line
2575   //   // second line
2576   // };
2577   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2578 
2579   // Scan for '{//'. If found, use the column of '{' as a min column for line
2580   // comment section continuation.
2581   const FormatToken *PreviousToken = nullptr;
2582   for (const UnwrappedLineNode &Node : Line.Tokens) {
2583     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2584         isLineComment(*Node.Tok)) {
2585       MinColumnToken = PreviousToken;
2586       break;
2587     }
2588     PreviousToken = Node.Tok;
2589 
2590     // Grab the last newline preceding a token in this unwrapped line.
2591     if (Node.Tok->NewlinesBefore > 0) {
2592       MinColumnToken = Node.Tok;
2593     }
2594   }
2595   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2596     MinColumnToken = PreviousToken;
2597   }
2598 
2599   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2600                               MinColumnToken);
2601 }
2602 
2603 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2604   bool JustComments = Line->Tokens.empty();
2605   for (SmallVectorImpl<FormatToken *>::const_iterator
2606            I = CommentsBeforeNextToken.begin(),
2607            E = CommentsBeforeNextToken.end();
2608        I != E; ++I) {
2609     // Line comments that belong to the same line comment section are put on the
2610     // same line since later we might want to reflow content between them.
2611     // Additional fine-grained breaking of line comment sections is controlled
2612     // by the class BreakableLineCommentSection in case it is desirable to keep
2613     // several line comment sections in the same unwrapped line.
2614     //
2615     // FIXME: Consider putting separate line comment sections as children to the
2616     // unwrapped line instead.
2617     (*I)->ContinuesLineCommentSection =
2618         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2619     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2620       addUnwrappedLine();
2621     pushToken(*I);
2622   }
2623   if (NewlineBeforeNext && JustComments)
2624     addUnwrappedLine();
2625   CommentsBeforeNextToken.clear();
2626 }
2627 
2628 void UnwrappedLineParser::nextToken(int LevelDifference) {
2629   if (eof())
2630     return;
2631   flushComments(isOnNewLine(*FormatTok));
2632   pushToken(FormatTok);
2633   FormatToken *Previous = FormatTok;
2634   if (Style.Language != FormatStyle::LK_JavaScript)
2635     readToken(LevelDifference);
2636   else
2637     readTokenWithJavaScriptASI();
2638   FormatTok->Previous = Previous;
2639 }
2640 
2641 void UnwrappedLineParser::distributeComments(
2642     const SmallVectorImpl<FormatToken *> &Comments,
2643     const FormatToken *NextTok) {
2644   // Whether or not a line comment token continues a line is controlled by
2645   // the method continuesLineCommentSection, with the following caveat:
2646   //
2647   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2648   // that each comment line from the trail is aligned with the next token, if
2649   // the next token exists. If a trail exists, the beginning of the maximal
2650   // trail is marked as a start of a new comment section.
2651   //
2652   // For example in this code:
2653   //
2654   // int a; // line about a
2655   //   // line 1 about b
2656   //   // line 2 about b
2657   //   int b;
2658   //
2659   // the two lines about b form a maximal trail, so there are two sections, the
2660   // first one consisting of the single comment "// line about a" and the
2661   // second one consisting of the next two comments.
2662   if (Comments.empty())
2663     return;
2664   bool ShouldPushCommentsInCurrentLine = true;
2665   bool HasTrailAlignedWithNextToken = false;
2666   unsigned StartOfTrailAlignedWithNextToken = 0;
2667   if (NextTok) {
2668     // We are skipping the first element intentionally.
2669     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2670       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2671         HasTrailAlignedWithNextToken = true;
2672         StartOfTrailAlignedWithNextToken = i;
2673       }
2674     }
2675   }
2676   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2677     FormatToken *FormatTok = Comments[i];
2678     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2679       FormatTok->ContinuesLineCommentSection = false;
2680     } else {
2681       FormatTok->ContinuesLineCommentSection =
2682           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2683     }
2684     if (!FormatTok->ContinuesLineCommentSection &&
2685         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2686       ShouldPushCommentsInCurrentLine = false;
2687     }
2688     if (ShouldPushCommentsInCurrentLine) {
2689       pushToken(FormatTok);
2690     } else {
2691       CommentsBeforeNextToken.push_back(FormatTok);
2692     }
2693   }
2694 }
2695 
2696 void UnwrappedLineParser::readToken(int LevelDifference) {
2697   SmallVector<FormatToken *, 1> Comments;
2698   do {
2699     FormatTok = Tokens->getNextToken();
2700     assert(FormatTok);
2701     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2702            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2703       distributeComments(Comments, FormatTok);
2704       Comments.clear();
2705       // If there is an unfinished unwrapped line, we flush the preprocessor
2706       // directives only after that unwrapped line was finished later.
2707       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2708       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2709       assert((LevelDifference >= 0 ||
2710               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2711              "LevelDifference makes Line->Level negative");
2712       Line->Level += LevelDifference;
2713       // Comments stored before the preprocessor directive need to be output
2714       // before the preprocessor directive, at the same level as the
2715       // preprocessor directive, as we consider them to apply to the directive.
2716       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2717           PPBranchLevel > 0)
2718         Line->Level += PPBranchLevel;
2719       flushComments(isOnNewLine(*FormatTok));
2720       parsePPDirective();
2721     }
2722     while (FormatTok->Type == TT_ConflictStart ||
2723            FormatTok->Type == TT_ConflictEnd ||
2724            FormatTok->Type == TT_ConflictAlternative) {
2725       if (FormatTok->Type == TT_ConflictStart) {
2726         conditionalCompilationStart(/*Unreachable=*/false);
2727       } else if (FormatTok->Type == TT_ConflictAlternative) {
2728         conditionalCompilationAlternative();
2729       } else if (FormatTok->Type == TT_ConflictEnd) {
2730         conditionalCompilationEnd();
2731       }
2732       FormatTok = Tokens->getNextToken();
2733       FormatTok->MustBreakBefore = true;
2734     }
2735 
2736     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2737         !Line->InPPDirective) {
2738       continue;
2739     }
2740 
2741     if (!FormatTok->Tok.is(tok::comment)) {
2742       distributeComments(Comments, FormatTok);
2743       Comments.clear();
2744       return;
2745     }
2746 
2747     Comments.push_back(FormatTok);
2748   } while (!eof());
2749 
2750   distributeComments(Comments, nullptr);
2751   Comments.clear();
2752 }
2753 
2754 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2755   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2756   if (MustBreakBeforeNextToken) {
2757     Line->Tokens.back().Tok->MustBreakBefore = true;
2758     MustBreakBeforeNextToken = false;
2759   }
2760 }
2761 
2762 } // end namespace format
2763 } // end namespace clang
2764