1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
27 class FormatTokenSource {
28 public:
29   virtual ~FormatTokenSource() {}
30   virtual FormatToken *getNextToken() = 0;
31 
32   virtual unsigned getPosition() = 0;
33   virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41                          bool MustBeDeclaration)
42       : Line(Line), Stack(Stack) {
43     Line.MustBeDeclaration = MustBeDeclaration;
44     Stack.push_back(MustBeDeclaration);
45   }
46   ~ScopedDeclarationState() {
47     Stack.pop_back();
48     if (!Stack.empty())
49       Line.MustBeDeclaration = Stack.back();
50     else
51       Line.MustBeDeclaration = true;
52   }
53 
54 private:
55   UnwrappedLine &Line;
56   std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     FakeEOF.Tok.startToken();
86     FakeEOF.Tok.setKind(tok::eof);
87     TokenSource = this;
88     Line.Level = 0;
89     Line.InPPDirective = true;
90   }
91 
92   ~ScopedMacroState() override {
93     TokenSource = PreviousTokenSource;
94     ResetToken = Token;
95     Line.InPPDirective = false;
96     Line.Level = PreviousLineLevel;
97   }
98 
99   FormatToken *getNextToken() override {
100     // The \c UnwrappedLineParser guards against this by never calling
101     // \c getNextToken() after it has encountered the first eof token.
102     assert(!eof());
103     PreviousToken = Token;
104     Token = PreviousTokenSource->getNextToken();
105     if (eof())
106       return &FakeEOF;
107     return Token;
108   }
109 
110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112   FormatToken *setPosition(unsigned Position) override {
113     PreviousToken = nullptr;
114     Token = PreviousTokenSource->setPosition(Position);
115     return Token;
116   }
117 
118 private:
119   bool eof() {
120     return Token && Token->HasUnescapedNewline &&
121            !continuesLineComment(*Token, PreviousToken,
122                                  /*MinColumnToken=*/PreviousToken);
123   }
124 
125   FormatToken FakeEOF;
126   UnwrappedLine &Line;
127   FormatTokenSource *&TokenSource;
128   FormatToken *&ResetToken;
129   unsigned PreviousLineLevel;
130   FormatTokenSource *PreviousTokenSource;
131 
132   FormatToken *Token;
133   FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
138 class ScopedLineState {
139 public:
140   ScopedLineState(UnwrappedLineParser &Parser,
141                   bool SwitchToPreprocessorLines = false)
142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143     if (SwitchToPreprocessorLines)
144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
145     else if (!Parser.Line->Tokens.empty())
146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147     PreBlockLine = std::move(Parser.Line);
148     Parser.Line = llvm::make_unique<UnwrappedLine>();
149     Parser.Line->Level = PreBlockLine->Level;
150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151   }
152 
153   ~ScopedLineState() {
154     if (!Parser.Line->Tokens.empty()) {
155       Parser.addUnwrappedLine();
156     }
157     assert(Parser.Line->Tokens.empty());
158     Parser.Line = std::move(PreBlockLine);
159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160       Parser.MustBreakBeforeNextToken = true;
161     Parser.CurrentLines = OriginalLines;
162   }
163 
164 private:
165   UnwrappedLineParser &Parser;
166 
167   std::unique_ptr<UnwrappedLine> PreBlockLine;
168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
171 class CompoundStatementIndenter {
172 public:
173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
174                             const FormatStyle &Style, unsigned &LineLevel)
175       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
176     if (Style.BraceWrapping.AfterControlStatement)
177       Parser->addUnwrappedLine();
178     if (Style.BraceWrapping.IndentBraces)
179       ++LineLevel;
180   }
181   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
182 
183 private:
184   unsigned &LineLevel;
185   unsigned OldLineLevel;
186 };
187 
188 namespace {
189 
190 class IndexedTokenSource : public FormatTokenSource {
191 public:
192   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
193       : Tokens(Tokens), Position(-1) {}
194 
195   FormatToken *getNextToken() override {
196     ++Position;
197     return Tokens[Position];
198   }
199 
200   unsigned getPosition() override {
201     assert(Position >= 0);
202     return Position;
203   }
204 
205   FormatToken *setPosition(unsigned P) override {
206     Position = P;
207     return Tokens[Position];
208   }
209 
210   void reset() { Position = -1; }
211 
212 private:
213   ArrayRef<FormatToken *> Tokens;
214   int Position;
215 };
216 
217 } // end anonymous namespace
218 
219 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
220                                          const AdditionalKeywords &Keywords,
221                                          unsigned FirstStartColumn,
222                                          ArrayRef<FormatToken *> Tokens,
223                                          UnwrappedLineConsumer &Callback)
224     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
225       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
226       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
227       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
228       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
229                        ? IG_Rejected
230                        : IG_Inited),
231       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
232 
233 void UnwrappedLineParser::reset() {
234   PPBranchLevel = -1;
235   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
236                      ? IG_Rejected
237                      : IG_Inited;
238   IncludeGuardToken = nullptr;
239   Line.reset(new UnwrappedLine);
240   CommentsBeforeNextToken.clear();
241   FormatTok = nullptr;
242   MustBreakBeforeNextToken = false;
243   PreprocessorDirectives.clear();
244   CurrentLines = &Lines;
245   DeclarationScopeStack.clear();
246   PPStack.clear();
247   Line->FirstStartColumn = FirstStartColumn;
248 }
249 
250 void UnwrappedLineParser::parse() {
251   IndexedTokenSource TokenSource(AllTokens);
252   Line->FirstStartColumn = FirstStartColumn;
253   do {
254     LLVM_DEBUG(llvm::dbgs() << "----\n");
255     reset();
256     Tokens = &TokenSource;
257     TokenSource.reset();
258 
259     readToken();
260     parseFile();
261 
262     // If we found an include guard then all preprocessor directives (other than
263     // the guard) are over-indented by one.
264     if (IncludeGuard == IG_Found)
265       for (auto &Line : Lines)
266         if (Line.InPPDirective && Line.Level > 0)
267           --Line.Level;
268 
269     // Create line with eof token.
270     pushToken(FormatTok);
271     addUnwrappedLine();
272 
273     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
274                                                   E = Lines.end();
275          I != E; ++I) {
276       Callback.consumeUnwrappedLine(*I);
277     }
278     Callback.finishRun();
279     Lines.clear();
280     while (!PPLevelBranchIndex.empty() &&
281            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
282       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
283       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
284     }
285     if (!PPLevelBranchIndex.empty()) {
286       ++PPLevelBranchIndex.back();
287       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
288       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
289     }
290   } while (!PPLevelBranchIndex.empty());
291 }
292 
293 void UnwrappedLineParser::parseFile() {
294   // The top-level context in a file always has declarations, except for pre-
295   // processor directives and JavaScript files.
296   bool MustBeDeclaration =
297       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
298   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
299                                           MustBeDeclaration);
300   if (Style.Language == FormatStyle::LK_TextProto)
301     parseBracedList();
302   else
303     parseLevel(/*HasOpeningBrace=*/false);
304   // Make sure to format the remaining tokens.
305   //
306   // LK_TextProto is special since its top-level is parsed as the body of a
307   // braced list, which does not necessarily have natural line separators such
308   // as a semicolon. Comments after the last entry that have been determined to
309   // not belong to that line, as in:
310   //   key: value
311   //   // endfile comment
312   // do not have a chance to be put on a line of their own until this point.
313   // Here we add this newline before end-of-file comments.
314   if (Style.Language == FormatStyle::LK_TextProto &&
315       !CommentsBeforeNextToken.empty())
316     addUnwrappedLine();
317   flushComments(true);
318   addUnwrappedLine();
319 }
320 
321 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
322   bool SwitchLabelEncountered = false;
323   do {
324     tok::TokenKind kind = FormatTok->Tok.getKind();
325     if (FormatTok->Type == TT_MacroBlockBegin) {
326       kind = tok::l_brace;
327     } else if (FormatTok->Type == TT_MacroBlockEnd) {
328       kind = tok::r_brace;
329     }
330 
331     switch (kind) {
332     case tok::comment:
333       nextToken();
334       addUnwrappedLine();
335       break;
336     case tok::l_brace:
337       // FIXME: Add parameter whether this can happen - if this happens, we must
338       // be in a non-declaration context.
339       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
340         continue;
341       parseBlock(/*MustBeDeclaration=*/false);
342       addUnwrappedLine();
343       break;
344     case tok::r_brace:
345       if (HasOpeningBrace)
346         return;
347       nextToken();
348       addUnwrappedLine();
349       break;
350     case tok::kw_default: {
351       unsigned StoredPosition = Tokens->getPosition();
352       FormatToken *Next;
353       do {
354         Next = Tokens->getNextToken();
355       } while (Next && Next->is(tok::comment));
356       FormatTok = Tokens->setPosition(StoredPosition);
357       if (Next && Next->isNot(tok::colon)) {
358         // default not followed by ':' is not a case label; treat it like
359         // an identifier.
360         parseStructuralElement();
361         break;
362       }
363       // Else, if it is 'default:', fall through to the case handling.
364       LLVM_FALLTHROUGH;
365     }
366     case tok::kw_case:
367       if (Style.Language == FormatStyle::LK_JavaScript &&
368           Line->MustBeDeclaration) {
369         // A 'case: string' style field declaration.
370         parseStructuralElement();
371         break;
372       }
373       if (!SwitchLabelEncountered &&
374           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
375         ++Line->Level;
376       SwitchLabelEncountered = true;
377       parseStructuralElement();
378       break;
379     default:
380       parseStructuralElement();
381       break;
382     }
383   } while (!eof());
384 }
385 
386 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
387   // We'll parse forward through the tokens until we hit
388   // a closing brace or eof - note that getNextToken() will
389   // parse macros, so this will magically work inside macro
390   // definitions, too.
391   unsigned StoredPosition = Tokens->getPosition();
392   FormatToken *Tok = FormatTok;
393   const FormatToken *PrevTok = Tok->Previous;
394   // Keep a stack of positions of lbrace tokens. We will
395   // update information about whether an lbrace starts a
396   // braced init list or a different block during the loop.
397   SmallVector<FormatToken *, 8> LBraceStack;
398   assert(Tok->Tok.is(tok::l_brace));
399   do {
400     // Get next non-comment token.
401     FormatToken *NextTok;
402     unsigned ReadTokens = 0;
403     do {
404       NextTok = Tokens->getNextToken();
405       ++ReadTokens;
406     } while (NextTok->is(tok::comment));
407 
408     switch (Tok->Tok.getKind()) {
409     case tok::l_brace:
410       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
411         if (PrevTok->isOneOf(tok::colon, tok::less))
412           // A ':' indicates this code is in a type, or a braced list
413           // following a label in an object literal ({a: {b: 1}}).
414           // A '<' could be an object used in a comparison, but that is nonsense
415           // code (can never return true), so more likely it is a generic type
416           // argument (`X<{a: string; b: number}>`).
417           // The code below could be confused by semicolons between the
418           // individual members in a type member list, which would normally
419           // trigger BK_Block. In both cases, this must be parsed as an inline
420           // braced init.
421           Tok->BlockKind = BK_BracedInit;
422         else if (PrevTok->is(tok::r_paren))
423           // `) { }` can only occur in function or method declarations in JS.
424           Tok->BlockKind = BK_Block;
425       } else {
426         Tok->BlockKind = BK_Unknown;
427       }
428       LBraceStack.push_back(Tok);
429       break;
430     case tok::r_brace:
431       if (LBraceStack.empty())
432         break;
433       if (LBraceStack.back()->BlockKind == BK_Unknown) {
434         bool ProbablyBracedList = false;
435         if (Style.Language == FormatStyle::LK_Proto) {
436           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
437         } else {
438           // Using OriginalColumn to distinguish between ObjC methods and
439           // binary operators is a bit hacky.
440           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
441                                   NextTok->OriginalColumn == 0;
442 
443           // If there is a comma, semicolon or right paren after the closing
444           // brace, we assume this is a braced initializer list.  Note that
445           // regardless how we mark inner braces here, we will overwrite the
446           // BlockKind later if we parse a braced list (where all blocks
447           // inside are by default braced lists), or when we explicitly detect
448           // blocks (for example while parsing lambdas).
449           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
450           // braced list in JS.
451           ProbablyBracedList =
452               (Style.Language == FormatStyle::LK_JavaScript &&
453                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
454                                 Keywords.kw_as)) ||
455               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
456               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
457                                tok::r_paren, tok::r_square, tok::l_brace,
458                                tok::ellipsis) ||
459               (NextTok->is(tok::identifier) &&
460                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
461               (NextTok->is(tok::semi) &&
462                (!ExpectClassBody || LBraceStack.size() != 1)) ||
463               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
464           if (NextTok->is(tok::l_square)) {
465             // We can have an array subscript after a braced init
466             // list, but C++11 attributes are expected after blocks.
467             NextTok = Tokens->getNextToken();
468             ++ReadTokens;
469             ProbablyBracedList = NextTok->isNot(tok::l_square);
470           }
471         }
472         if (ProbablyBracedList) {
473           Tok->BlockKind = BK_BracedInit;
474           LBraceStack.back()->BlockKind = BK_BracedInit;
475         } else {
476           Tok->BlockKind = BK_Block;
477           LBraceStack.back()->BlockKind = BK_Block;
478         }
479       }
480       LBraceStack.pop_back();
481       break;
482     case tok::identifier:
483       if (!Tok->is(TT_StatementMacro))
484         break;
485       LLVM_FALLTHROUGH;
486     case tok::at:
487     case tok::semi:
488     case tok::kw_if:
489     case tok::kw_while:
490     case tok::kw_for:
491     case tok::kw_switch:
492     case tok::kw_try:
493     case tok::kw___try:
494       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
495         LBraceStack.back()->BlockKind = BK_Block;
496       break;
497     default:
498       break;
499     }
500     PrevTok = Tok;
501     Tok = NextTok;
502   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
503 
504   // Assume other blocks for all unclosed opening braces.
505   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
506     if (LBraceStack[i]->BlockKind == BK_Unknown)
507       LBraceStack[i]->BlockKind = BK_Block;
508   }
509 
510   FormatTok = Tokens->setPosition(StoredPosition);
511 }
512 
513 template <class T>
514 static inline void hash_combine(std::size_t &seed, const T &v) {
515   std::hash<T> hasher;
516   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
517 }
518 
519 size_t UnwrappedLineParser::computePPHash() const {
520   size_t h = 0;
521   for (const auto &i : PPStack) {
522     hash_combine(h, size_t(i.Kind));
523     hash_combine(h, i.Line);
524   }
525   return h;
526 }
527 
528 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
529                                      bool MunchSemi) {
530   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
531          "'{' or macro block token expected");
532   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
533   FormatTok->BlockKind = BK_Block;
534 
535   size_t PPStartHash = computePPHash();
536 
537   unsigned InitialLevel = Line->Level;
538   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
539 
540   if (MacroBlock && FormatTok->is(tok::l_paren))
541     parseParens();
542 
543   size_t NbPreprocessorDirectives =
544       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
545   addUnwrappedLine();
546   size_t OpeningLineIndex =
547       CurrentLines->empty()
548           ? (UnwrappedLine::kInvalidIndex)
549           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
550 
551   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
552                                           MustBeDeclaration);
553   if (AddLevel)
554     ++Line->Level;
555   parseLevel(/*HasOpeningBrace=*/true);
556 
557   if (eof())
558     return;
559 
560   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
561                  : !FormatTok->is(tok::r_brace)) {
562     Line->Level = InitialLevel;
563     FormatTok->BlockKind = BK_Block;
564     return;
565   }
566 
567   size_t PPEndHash = computePPHash();
568 
569   // Munch the closing brace.
570   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
571 
572   if (MacroBlock && FormatTok->is(tok::l_paren))
573     parseParens();
574 
575   if (MunchSemi && FormatTok->Tok.is(tok::semi))
576     nextToken();
577   Line->Level = InitialLevel;
578 
579   if (PPStartHash == PPEndHash) {
580     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
581     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
582       // Update the opening line to add the forward reference as well
583       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
584           CurrentLines->size() - 1;
585     }
586   }
587 }
588 
589 static bool isGoogScope(const UnwrappedLine &Line) {
590   // FIXME: Closure-library specific stuff should not be hard-coded but be
591   // configurable.
592   if (Line.Tokens.size() < 4)
593     return false;
594   auto I = Line.Tokens.begin();
595   if (I->Tok->TokenText != "goog")
596     return false;
597   ++I;
598   if (I->Tok->isNot(tok::period))
599     return false;
600   ++I;
601   if (I->Tok->TokenText != "scope")
602     return false;
603   ++I;
604   return I->Tok->is(tok::l_paren);
605 }
606 
607 static bool isIIFE(const UnwrappedLine &Line,
608                    const AdditionalKeywords &Keywords) {
609   // Look for the start of an immediately invoked anonymous function.
610   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
611   // This is commonly done in JavaScript to create a new, anonymous scope.
612   // Example: (function() { ... })()
613   if (Line.Tokens.size() < 3)
614     return false;
615   auto I = Line.Tokens.begin();
616   if (I->Tok->isNot(tok::l_paren))
617     return false;
618   ++I;
619   if (I->Tok->isNot(Keywords.kw_function))
620     return false;
621   ++I;
622   return I->Tok->is(tok::l_paren);
623 }
624 
625 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
626                                    const FormatToken &InitialToken) {
627   if (InitialToken.is(tok::kw_namespace))
628     return Style.BraceWrapping.AfterNamespace;
629   if (InitialToken.is(tok::kw_class))
630     return Style.BraceWrapping.AfterClass;
631   if (InitialToken.is(tok::kw_union))
632     return Style.BraceWrapping.AfterUnion;
633   if (InitialToken.is(tok::kw_struct))
634     return Style.BraceWrapping.AfterStruct;
635   return false;
636 }
637 
638 void UnwrappedLineParser::parseChildBlock() {
639   FormatTok->BlockKind = BK_Block;
640   nextToken();
641   {
642     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
643                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
644     ScopedLineState LineState(*this);
645     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
646                                             /*MustBeDeclaration=*/false);
647     Line->Level += SkipIndent ? 0 : 1;
648     parseLevel(/*HasOpeningBrace=*/true);
649     flushComments(isOnNewLine(*FormatTok));
650     Line->Level -= SkipIndent ? 0 : 1;
651   }
652   nextToken();
653 }
654 
655 void UnwrappedLineParser::parsePPDirective() {
656   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
657   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
658 
659   nextToken();
660 
661   if (!FormatTok->Tok.getIdentifierInfo()) {
662     parsePPUnknown();
663     return;
664   }
665 
666   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
667   case tok::pp_define:
668     parsePPDefine();
669     return;
670   case tok::pp_if:
671     parsePPIf(/*IfDef=*/false);
672     break;
673   case tok::pp_ifdef:
674   case tok::pp_ifndef:
675     parsePPIf(/*IfDef=*/true);
676     break;
677   case tok::pp_else:
678     parsePPElse();
679     break;
680   case tok::pp_elif:
681     parsePPElIf();
682     break;
683   case tok::pp_endif:
684     parsePPEndIf();
685     break;
686   default:
687     parsePPUnknown();
688     break;
689   }
690 }
691 
692 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
693   size_t Line = CurrentLines->size();
694   if (CurrentLines == &PreprocessorDirectives)
695     Line += Lines.size();
696 
697   if (Unreachable ||
698       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
699     PPStack.push_back({PP_Unreachable, Line});
700   else
701     PPStack.push_back({PP_Conditional, Line});
702 }
703 
704 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
705   ++PPBranchLevel;
706   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
707   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
708     PPLevelBranchIndex.push_back(0);
709     PPLevelBranchCount.push_back(0);
710   }
711   PPChainBranchIndex.push(0);
712   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
713   conditionalCompilationCondition(Unreachable || Skip);
714 }
715 
716 void UnwrappedLineParser::conditionalCompilationAlternative() {
717   if (!PPStack.empty())
718     PPStack.pop_back();
719   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
720   if (!PPChainBranchIndex.empty())
721     ++PPChainBranchIndex.top();
722   conditionalCompilationCondition(
723       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
724       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
725 }
726 
727 void UnwrappedLineParser::conditionalCompilationEnd() {
728   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
729   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
730     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
731       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
732     }
733   }
734   // Guard against #endif's without #if.
735   if (PPBranchLevel > -1)
736     --PPBranchLevel;
737   if (!PPChainBranchIndex.empty())
738     PPChainBranchIndex.pop();
739   if (!PPStack.empty())
740     PPStack.pop_back();
741 }
742 
743 void UnwrappedLineParser::parsePPIf(bool IfDef) {
744   bool IfNDef = FormatTok->is(tok::pp_ifndef);
745   nextToken();
746   bool Unreachable = false;
747   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
748     Unreachable = true;
749   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
750     Unreachable = true;
751   conditionalCompilationStart(Unreachable);
752   FormatToken *IfCondition = FormatTok;
753   // If there's a #ifndef on the first line, and the only lines before it are
754   // comments, it could be an include guard.
755   bool MaybeIncludeGuard = IfNDef;
756   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
757     for (auto &Line : Lines) {
758       if (!Line.Tokens.front().Tok->is(tok::comment)) {
759         MaybeIncludeGuard = false;
760         IncludeGuard = IG_Rejected;
761         break;
762       }
763     }
764   --PPBranchLevel;
765   parsePPUnknown();
766   ++PPBranchLevel;
767   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
768     IncludeGuard = IG_IfNdefed;
769     IncludeGuardToken = IfCondition;
770   }
771 }
772 
773 void UnwrappedLineParser::parsePPElse() {
774   // If a potential include guard has an #else, it's not an include guard.
775   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
776     IncludeGuard = IG_Rejected;
777   conditionalCompilationAlternative();
778   if (PPBranchLevel > -1)
779     --PPBranchLevel;
780   parsePPUnknown();
781   ++PPBranchLevel;
782 }
783 
784 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
785 
786 void UnwrappedLineParser::parsePPEndIf() {
787   conditionalCompilationEnd();
788   parsePPUnknown();
789   // If the #endif of a potential include guard is the last thing in the file,
790   // then we found an include guard.
791   unsigned TokenPosition = Tokens->getPosition();
792   FormatToken *PeekNext = AllTokens[TokenPosition];
793   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
794       PeekNext->is(tok::eof) &&
795       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
796     IncludeGuard = IG_Found;
797 }
798 
799 void UnwrappedLineParser::parsePPDefine() {
800   nextToken();
801 
802   if (FormatTok->Tok.getKind() != tok::identifier) {
803     IncludeGuard = IG_Rejected;
804     IncludeGuardToken = nullptr;
805     parsePPUnknown();
806     return;
807   }
808 
809   if (IncludeGuard == IG_IfNdefed &&
810       IncludeGuardToken->TokenText == FormatTok->TokenText) {
811     IncludeGuard = IG_Defined;
812     IncludeGuardToken = nullptr;
813     for (auto &Line : Lines) {
814       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
815         IncludeGuard = IG_Rejected;
816         break;
817       }
818     }
819   }
820 
821   nextToken();
822   if (FormatTok->Tok.getKind() == tok::l_paren &&
823       FormatTok->WhitespaceRange.getBegin() ==
824           FormatTok->WhitespaceRange.getEnd()) {
825     parseParens();
826   }
827   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
828     Line->Level += PPBranchLevel + 1;
829   addUnwrappedLine();
830   ++Line->Level;
831 
832   // Errors during a preprocessor directive can only affect the layout of the
833   // preprocessor directive, and thus we ignore them. An alternative approach
834   // would be to use the same approach we use on the file level (no
835   // re-indentation if there was a structural error) within the macro
836   // definition.
837   parseFile();
838 }
839 
840 void UnwrappedLineParser::parsePPUnknown() {
841   do {
842     nextToken();
843   } while (!eof());
844   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
845     Line->Level += PPBranchLevel + 1;
846   addUnwrappedLine();
847 }
848 
849 // Here we blacklist certain tokens that are not usually the first token in an
850 // unwrapped line. This is used in attempt to distinguish macro calls without
851 // trailing semicolons from other constructs split to several lines.
852 static bool tokenCanStartNewLine(const clang::Token &Tok) {
853   // Semicolon can be a null-statement, l_square can be a start of a macro or
854   // a C++11 attribute, but this doesn't seem to be common.
855   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
856          Tok.isNot(tok::l_square) &&
857          // Tokens that can only be used as binary operators and a part of
858          // overloaded operator names.
859          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
860          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
861          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
862          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
863          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
864          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
865          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
866          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
867          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
868          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
869          Tok.isNot(tok::lesslessequal) &&
870          // Colon is used in labels, base class lists, initializer lists,
871          // range-based for loops, ternary operator, but should never be the
872          // first token in an unwrapped line.
873          Tok.isNot(tok::colon) &&
874          // 'noexcept' is a trailing annotation.
875          Tok.isNot(tok::kw_noexcept);
876 }
877 
878 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
879                           const FormatToken *FormatTok) {
880   // FIXME: This returns true for C/C++ keywords like 'struct'.
881   return FormatTok->is(tok::identifier) &&
882          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
883           !FormatTok->isOneOf(
884               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
885               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
886               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
887               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
888               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
889               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
890               Keywords.kw_from));
891 }
892 
893 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
894                                  const FormatToken *FormatTok) {
895   return FormatTok->Tok.isLiteral() ||
896          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
897          mustBeJSIdent(Keywords, FormatTok);
898 }
899 
900 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
901 // when encountered after a value (see mustBeJSIdentOrValue).
902 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
903                            const FormatToken *FormatTok) {
904   return FormatTok->isOneOf(
905       tok::kw_return, Keywords.kw_yield,
906       // conditionals
907       tok::kw_if, tok::kw_else,
908       // loops
909       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
910       // switch/case
911       tok::kw_switch, tok::kw_case,
912       // exceptions
913       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
914       // declaration
915       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
916       Keywords.kw_async, Keywords.kw_function,
917       // import/export
918       Keywords.kw_import, tok::kw_export);
919 }
920 
921 // readTokenWithJavaScriptASI reads the next token and terminates the current
922 // line if JavaScript Automatic Semicolon Insertion must
923 // happen between the current token and the next token.
924 //
925 // This method is conservative - it cannot cover all edge cases of JavaScript,
926 // but only aims to correctly handle certain well known cases. It *must not*
927 // return true in speculative cases.
928 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
929   FormatToken *Previous = FormatTok;
930   readToken();
931   FormatToken *Next = FormatTok;
932 
933   bool IsOnSameLine =
934       CommentsBeforeNextToken.empty()
935           ? Next->NewlinesBefore == 0
936           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
937   if (IsOnSameLine)
938     return;
939 
940   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
941   bool PreviousStartsTemplateExpr =
942       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
943   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
944     // If the line contains an '@' sign, the previous token might be an
945     // annotation, which can precede another identifier/value.
946     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
947                               [](UnwrappedLineNode &LineNode) {
948                                 return LineNode.Tok->is(tok::at);
949                               }) != Line->Tokens.end();
950     if (HasAt)
951       return;
952   }
953   if (Next->is(tok::exclaim) && PreviousMustBeValue)
954     return addUnwrappedLine();
955   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
956   bool NextEndsTemplateExpr =
957       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
958   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
959       (PreviousMustBeValue ||
960        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
961                          tok::minusminus)))
962     return addUnwrappedLine();
963   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
964       isJSDeclOrStmt(Keywords, Next))
965     return addUnwrappedLine();
966 }
967 
968 void UnwrappedLineParser::parseStructuralElement() {
969   assert(!FormatTok->is(tok::l_brace));
970   if (Style.Language == FormatStyle::LK_TableGen &&
971       FormatTok->is(tok::pp_include)) {
972     nextToken();
973     if (FormatTok->is(tok::string_literal))
974       nextToken();
975     addUnwrappedLine();
976     return;
977   }
978   switch (FormatTok->Tok.getKind()) {
979   case tok::kw_asm:
980     nextToken();
981     if (FormatTok->is(tok::l_brace)) {
982       FormatTok->Type = TT_InlineASMBrace;
983       nextToken();
984       while (FormatTok && FormatTok->isNot(tok::eof)) {
985         if (FormatTok->is(tok::r_brace)) {
986           FormatTok->Type = TT_InlineASMBrace;
987           nextToken();
988           addUnwrappedLine();
989           break;
990         }
991         FormatTok->Finalized = true;
992         nextToken();
993       }
994     }
995     break;
996   case tok::kw_namespace:
997     parseNamespace();
998     return;
999   case tok::kw_public:
1000   case tok::kw_protected:
1001   case tok::kw_private:
1002     if (Style.Language == FormatStyle::LK_Java ||
1003         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1004       nextToken();
1005     else
1006       parseAccessSpecifier();
1007     return;
1008   case tok::kw_if:
1009     parseIfThenElse();
1010     return;
1011   case tok::kw_for:
1012   case tok::kw_while:
1013     parseForOrWhileLoop();
1014     return;
1015   case tok::kw_do:
1016     parseDoWhile();
1017     return;
1018   case tok::kw_switch:
1019     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1020       // 'switch: string' field declaration.
1021       break;
1022     parseSwitch();
1023     return;
1024   case tok::kw_default:
1025     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1026       // 'default: string' field declaration.
1027       break;
1028     nextToken();
1029     if (FormatTok->is(tok::colon)) {
1030       parseLabel();
1031       return;
1032     }
1033     // e.g. "default void f() {}" in a Java interface.
1034     break;
1035   case tok::kw_case:
1036     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1037       // 'case: string' field declaration.
1038       break;
1039     parseCaseLabel();
1040     return;
1041   case tok::kw_try:
1042   case tok::kw___try:
1043     parseTryCatch();
1044     return;
1045   case tok::kw_extern:
1046     nextToken();
1047     if (FormatTok->Tok.is(tok::string_literal)) {
1048       nextToken();
1049       if (FormatTok->Tok.is(tok::l_brace)) {
1050         if (Style.BraceWrapping.AfterExternBlock) {
1051           addUnwrappedLine();
1052           parseBlock(/*MustBeDeclaration=*/true);
1053         } else {
1054           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1055         }
1056         addUnwrappedLine();
1057         return;
1058       }
1059     }
1060     break;
1061   case tok::kw_export:
1062     if (Style.Language == FormatStyle::LK_JavaScript) {
1063       parseJavaScriptEs6ImportExport();
1064       return;
1065     }
1066     if (!Style.isCpp())
1067       break;
1068     // Handle C++ "(inline|export) namespace".
1069     LLVM_FALLTHROUGH;
1070   case tok::kw_inline:
1071     nextToken();
1072     if (FormatTok->Tok.is(tok::kw_namespace)) {
1073       parseNamespace();
1074       return;
1075     }
1076     break;
1077   case tok::identifier:
1078     if (FormatTok->is(TT_ForEachMacro)) {
1079       parseForOrWhileLoop();
1080       return;
1081     }
1082     if (FormatTok->is(TT_MacroBlockBegin)) {
1083       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1084                  /*MunchSemi=*/false);
1085       return;
1086     }
1087     if (FormatTok->is(Keywords.kw_import)) {
1088       if (Style.Language == FormatStyle::LK_JavaScript) {
1089         parseJavaScriptEs6ImportExport();
1090         return;
1091       }
1092       if (Style.Language == FormatStyle::LK_Proto) {
1093         nextToken();
1094         if (FormatTok->is(tok::kw_public))
1095           nextToken();
1096         if (!FormatTok->is(tok::string_literal))
1097           return;
1098         nextToken();
1099         if (FormatTok->is(tok::semi))
1100           nextToken();
1101         addUnwrappedLine();
1102         return;
1103       }
1104     }
1105     if (Style.isCpp() &&
1106         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1107                            Keywords.kw_slots, Keywords.kw_qslots)) {
1108       nextToken();
1109       if (FormatTok->is(tok::colon)) {
1110         nextToken();
1111         addUnwrappedLine();
1112         return;
1113       }
1114     }
1115     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1116       parseStatementMacro();
1117       return;
1118     }
1119     // In all other cases, parse the declaration.
1120     break;
1121   default:
1122     break;
1123   }
1124   do {
1125     const FormatToken *Previous = FormatTok->Previous;
1126     switch (FormatTok->Tok.getKind()) {
1127     case tok::at:
1128       nextToken();
1129       if (FormatTok->Tok.is(tok::l_brace)) {
1130         nextToken();
1131         parseBracedList();
1132         break;
1133       } else if (Style.Language == FormatStyle::LK_Java &&
1134                  FormatTok->is(Keywords.kw_interface)) {
1135         nextToken();
1136         break;
1137       }
1138       switch (FormatTok->Tok.getObjCKeywordID()) {
1139       case tok::objc_public:
1140       case tok::objc_protected:
1141       case tok::objc_package:
1142       case tok::objc_private:
1143         return parseAccessSpecifier();
1144       case tok::objc_interface:
1145       case tok::objc_implementation:
1146         return parseObjCInterfaceOrImplementation();
1147       case tok::objc_protocol:
1148         if (parseObjCProtocol())
1149           return;
1150         break;
1151       case tok::objc_end:
1152         return; // Handled by the caller.
1153       case tok::objc_optional:
1154       case tok::objc_required:
1155         nextToken();
1156         addUnwrappedLine();
1157         return;
1158       case tok::objc_autoreleasepool:
1159         nextToken();
1160         if (FormatTok->Tok.is(tok::l_brace)) {
1161           if (Style.BraceWrapping.AfterControlStatement)
1162             addUnwrappedLine();
1163           parseBlock(/*MustBeDeclaration=*/false);
1164         }
1165         addUnwrappedLine();
1166         return;
1167       case tok::objc_synchronized:
1168         nextToken();
1169         if (FormatTok->Tok.is(tok::l_paren))
1170           // Skip synchronization object
1171           parseParens();
1172         if (FormatTok->Tok.is(tok::l_brace)) {
1173           if (Style.BraceWrapping.AfterControlStatement)
1174             addUnwrappedLine();
1175           parseBlock(/*MustBeDeclaration=*/false);
1176         }
1177         addUnwrappedLine();
1178         return;
1179       case tok::objc_try:
1180         // This branch isn't strictly necessary (the kw_try case below would
1181         // do this too after the tok::at is parsed above).  But be explicit.
1182         parseTryCatch();
1183         return;
1184       default:
1185         break;
1186       }
1187       break;
1188     case tok::kw_enum:
1189       // Ignore if this is part of "template <enum ...".
1190       if (Previous && Previous->is(tok::less)) {
1191         nextToken();
1192         break;
1193       }
1194 
1195       // parseEnum falls through and does not yet add an unwrapped line as an
1196       // enum definition can start a structural element.
1197       if (!parseEnum())
1198         break;
1199       // This only applies for C++.
1200       if (!Style.isCpp()) {
1201         addUnwrappedLine();
1202         return;
1203       }
1204       break;
1205     case tok::kw_typedef:
1206       nextToken();
1207       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1208                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1209         parseEnum();
1210       break;
1211     case tok::kw_struct:
1212     case tok::kw_union:
1213     case tok::kw_class:
1214       // parseRecord falls through and does not yet add an unwrapped line as a
1215       // record declaration or definition can start a structural element.
1216       parseRecord();
1217       // This does not apply for Java, JavaScript and C#.
1218       if (Style.Language == FormatStyle::LK_Java ||
1219           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1220         if (FormatTok->is(tok::semi))
1221           nextToken();
1222         addUnwrappedLine();
1223         return;
1224       }
1225       break;
1226     case tok::period:
1227       nextToken();
1228       // In Java, classes have an implicit static member "class".
1229       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1230           FormatTok->is(tok::kw_class))
1231         nextToken();
1232       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1233           FormatTok->Tok.getIdentifierInfo())
1234         // JavaScript only has pseudo keywords, all keywords are allowed to
1235         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1236         nextToken();
1237       break;
1238     case tok::semi:
1239       nextToken();
1240       addUnwrappedLine();
1241       return;
1242     case tok::r_brace:
1243       addUnwrappedLine();
1244       return;
1245     case tok::l_paren:
1246       parseParens();
1247       break;
1248     case tok::kw_operator:
1249       nextToken();
1250       if (FormatTok->isBinaryOperator())
1251         nextToken();
1252       break;
1253     case tok::caret:
1254       nextToken();
1255       if (FormatTok->Tok.isAnyIdentifier() ||
1256           FormatTok->isSimpleTypeSpecifier())
1257         nextToken();
1258       if (FormatTok->is(tok::l_paren))
1259         parseParens();
1260       if (FormatTok->is(tok::l_brace))
1261         parseChildBlock();
1262       break;
1263     case tok::l_brace:
1264       if (!tryToParseBracedList()) {
1265         // A block outside of parentheses must be the last part of a
1266         // structural element.
1267         // FIXME: Figure out cases where this is not true, and add projections
1268         // for them (the one we know is missing are lambdas).
1269         if (Style.BraceWrapping.AfterFunction)
1270           addUnwrappedLine();
1271         FormatTok->Type = TT_FunctionLBrace;
1272         parseBlock(/*MustBeDeclaration=*/false);
1273         addUnwrappedLine();
1274         return;
1275       }
1276       // Otherwise this was a braced init list, and the structural
1277       // element continues.
1278       break;
1279     case tok::kw_try:
1280       // We arrive here when parsing function-try blocks.
1281       if (Style.BraceWrapping.AfterFunction)
1282         addUnwrappedLine();
1283       parseTryCatch();
1284       return;
1285     case tok::identifier: {
1286       if (FormatTok->is(TT_MacroBlockEnd)) {
1287         addUnwrappedLine();
1288         return;
1289       }
1290 
1291       // Function declarations (as opposed to function expressions) are parsed
1292       // on their own unwrapped line by continuing this loop. Function
1293       // expressions (functions that are not on their own line) must not create
1294       // a new unwrapped line, so they are special cased below.
1295       size_t TokenCount = Line->Tokens.size();
1296       if (Style.Language == FormatStyle::LK_JavaScript &&
1297           FormatTok->is(Keywords.kw_function) &&
1298           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1299                                                      Keywords.kw_async)))) {
1300         tryToParseJSFunction();
1301         break;
1302       }
1303       if ((Style.Language == FormatStyle::LK_JavaScript ||
1304            Style.Language == FormatStyle::LK_Java) &&
1305           FormatTok->is(Keywords.kw_interface)) {
1306         if (Style.Language == FormatStyle::LK_JavaScript) {
1307           // In JavaScript/TypeScript, "interface" can be used as a standalone
1308           // identifier, e.g. in `var interface = 1;`. If "interface" is
1309           // followed by another identifier, it is very like to be an actual
1310           // interface declaration.
1311           unsigned StoredPosition = Tokens->getPosition();
1312           FormatToken *Next = Tokens->getNextToken();
1313           FormatTok = Tokens->setPosition(StoredPosition);
1314           if (Next && !mustBeJSIdent(Keywords, Next)) {
1315             nextToken();
1316             break;
1317           }
1318         }
1319         parseRecord();
1320         addUnwrappedLine();
1321         return;
1322       }
1323 
1324       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1325         parseStatementMacro();
1326         return;
1327       }
1328 
1329       // See if the following token should start a new unwrapped line.
1330       StringRef Text = FormatTok->TokenText;
1331       nextToken();
1332       if (Line->Tokens.size() == 1 &&
1333           // JS doesn't have macros, and within classes colons indicate fields,
1334           // not labels.
1335           Style.Language != FormatStyle::LK_JavaScript) {
1336         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1337           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1338           parseLabel();
1339           return;
1340         }
1341         // Recognize function-like macro usages without trailing semicolon as
1342         // well as free-standing macros like Q_OBJECT.
1343         bool FunctionLike = FormatTok->is(tok::l_paren);
1344         if (FunctionLike)
1345           parseParens();
1346 
1347         bool FollowedByNewline =
1348             CommentsBeforeNextToken.empty()
1349                 ? FormatTok->NewlinesBefore > 0
1350                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1351 
1352         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1353             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1354           addUnwrappedLine();
1355           return;
1356         }
1357       }
1358       break;
1359     }
1360     case tok::equal:
1361       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1362       // TT_JsFatArrow. The always start an expression or a child block if
1363       // followed by a curly.
1364       if (FormatTok->is(TT_JsFatArrow)) {
1365         nextToken();
1366         if (FormatTok->is(tok::l_brace))
1367           parseChildBlock();
1368         break;
1369       }
1370 
1371       nextToken();
1372       if (FormatTok->Tok.is(tok::l_brace)) {
1373         nextToken();
1374         parseBracedList();
1375       } else if (Style.Language == FormatStyle::LK_Proto &&
1376                  FormatTok->Tok.is(tok::less)) {
1377         nextToken();
1378         parseBracedList(/*ContinueOnSemicolons=*/false,
1379                         /*ClosingBraceKind=*/tok::greater);
1380       }
1381       break;
1382     case tok::l_square:
1383       parseSquare();
1384       break;
1385     case tok::kw_new:
1386       parseNew();
1387       break;
1388     default:
1389       nextToken();
1390       break;
1391     }
1392   } while (!eof());
1393 }
1394 
1395 bool UnwrappedLineParser::tryToParseLambda() {
1396   if (!Style.isCpp()) {
1397     nextToken();
1398     return false;
1399   }
1400   assert(FormatTok->is(tok::l_square));
1401   FormatToken &LSquare = *FormatTok;
1402   if (!tryToParseLambdaIntroducer())
1403     return false;
1404 
1405   bool SeenArrow = false;
1406 
1407   while (FormatTok->isNot(tok::l_brace)) {
1408     if (FormatTok->isSimpleTypeSpecifier()) {
1409       nextToken();
1410       continue;
1411     }
1412     switch (FormatTok->Tok.getKind()) {
1413     case tok::l_brace:
1414       break;
1415     case tok::l_paren:
1416       parseParens();
1417       break;
1418     case tok::amp:
1419     case tok::star:
1420     case tok::kw_const:
1421     case tok::comma:
1422     case tok::less:
1423     case tok::greater:
1424     case tok::identifier:
1425     case tok::numeric_constant:
1426     case tok::coloncolon:
1427     case tok::kw_mutable:
1428     case tok::kw_noexcept:
1429       nextToken();
1430       break;
1431     // Specialization of a template with an integer parameter can contain
1432     // arithmetic, logical, comparison and ternary operators.
1433     //
1434     // FIXME: This also accepts sequences of operators that are not in the scope
1435     // of a template argument list.
1436     //
1437     // In a C++ lambda a template type can only occur after an arrow. We use
1438     // this as an heuristic to distinguish between Objective-C expressions
1439     // followed by an `a->b` expression, such as:
1440     // ([obj func:arg] + a->b)
1441     // Otherwise the code below would parse as a lambda.
1442     case tok::plus:
1443     case tok::minus:
1444     case tok::exclaim:
1445     case tok::tilde:
1446     case tok::slash:
1447     case tok::percent:
1448     case tok::lessless:
1449     case tok::pipe:
1450     case tok::pipepipe:
1451     case tok::ampamp:
1452     case tok::caret:
1453     case tok::equalequal:
1454     case tok::exclaimequal:
1455     case tok::greaterequal:
1456     case tok::lessequal:
1457     case tok::question:
1458     case tok::colon:
1459     case tok::kw_true:
1460     case tok::kw_false:
1461       if (SeenArrow) {
1462         nextToken();
1463         break;
1464       }
1465       return true;
1466     case tok::arrow:
1467       // This might or might not actually be a lambda arrow (this could be an
1468       // ObjC method invocation followed by a dereferencing arrow). We might
1469       // reset this back to TT_Unknown in TokenAnnotator.
1470       FormatTok->Type = TT_LambdaArrow;
1471       SeenArrow = true;
1472       nextToken();
1473       break;
1474     default:
1475       return true;
1476     }
1477   }
1478   FormatTok->Type = TT_LambdaLBrace;
1479   LSquare.Type = TT_LambdaLSquare;
1480   parseChildBlock();
1481   return true;
1482 }
1483 
1484 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1485   const FormatToken *Previous = FormatTok->Previous;
1486   if (Previous &&
1487       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1488                          tok::kw_delete, tok::l_square) ||
1489        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1490        Previous->isSimpleTypeSpecifier())) {
1491     nextToken();
1492     return false;
1493   }
1494   nextToken();
1495   if (FormatTok->is(tok::l_square)) {
1496     return false;
1497   }
1498   parseSquare(/*LambdaIntroducer=*/true);
1499   return true;
1500 }
1501 
1502 void UnwrappedLineParser::tryToParseJSFunction() {
1503   assert(FormatTok->is(Keywords.kw_function) ||
1504          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1505   if (FormatTok->is(Keywords.kw_async))
1506     nextToken();
1507   // Consume "function".
1508   nextToken();
1509 
1510   // Consume * (generator function). Treat it like C++'s overloaded operators.
1511   if (FormatTok->is(tok::star)) {
1512     FormatTok->Type = TT_OverloadedOperator;
1513     nextToken();
1514   }
1515 
1516   // Consume function name.
1517   if (FormatTok->is(tok::identifier))
1518     nextToken();
1519 
1520   if (FormatTok->isNot(tok::l_paren))
1521     return;
1522 
1523   // Parse formal parameter list.
1524   parseParens();
1525 
1526   if (FormatTok->is(tok::colon)) {
1527     // Parse a type definition.
1528     nextToken();
1529 
1530     // Eat the type declaration. For braced inline object types, balance braces,
1531     // otherwise just parse until finding an l_brace for the function body.
1532     if (FormatTok->is(tok::l_brace))
1533       tryToParseBracedList();
1534     else
1535       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1536         nextToken();
1537   }
1538 
1539   if (FormatTok->is(tok::semi))
1540     return;
1541 
1542   parseChildBlock();
1543 }
1544 
1545 bool UnwrappedLineParser::tryToParseBracedList() {
1546   if (FormatTok->BlockKind == BK_Unknown)
1547     calculateBraceTypes();
1548   assert(FormatTok->BlockKind != BK_Unknown);
1549   if (FormatTok->BlockKind == BK_Block)
1550     return false;
1551   nextToken();
1552   parseBracedList();
1553   return true;
1554 }
1555 
1556 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1557                                           tok::TokenKind ClosingBraceKind) {
1558   bool HasError = false;
1559 
1560   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1561   // replace this by using parseAssigmentExpression() inside.
1562   do {
1563     if (Style.Language == FormatStyle::LK_JavaScript) {
1564       if (FormatTok->is(Keywords.kw_function) ||
1565           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1566         tryToParseJSFunction();
1567         continue;
1568       }
1569       if (FormatTok->is(TT_JsFatArrow)) {
1570         nextToken();
1571         // Fat arrows can be followed by simple expressions or by child blocks
1572         // in curly braces.
1573         if (FormatTok->is(tok::l_brace)) {
1574           parseChildBlock();
1575           continue;
1576         }
1577       }
1578       if (FormatTok->is(tok::l_brace)) {
1579         // Could be a method inside of a braced list `{a() { return 1; }}`.
1580         if (tryToParseBracedList())
1581           continue;
1582         parseChildBlock();
1583       }
1584     }
1585     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1586       nextToken();
1587       return !HasError;
1588     }
1589     switch (FormatTok->Tok.getKind()) {
1590     case tok::caret:
1591       nextToken();
1592       if (FormatTok->is(tok::l_brace)) {
1593         parseChildBlock();
1594       }
1595       break;
1596     case tok::l_square:
1597       tryToParseLambda();
1598       break;
1599     case tok::l_paren:
1600       parseParens();
1601       // JavaScript can just have free standing methods and getters/setters in
1602       // object literals. Detect them by a "{" following ")".
1603       if (Style.Language == FormatStyle::LK_JavaScript) {
1604         if (FormatTok->is(tok::l_brace))
1605           parseChildBlock();
1606         break;
1607       }
1608       break;
1609     case tok::l_brace:
1610       // Assume there are no blocks inside a braced init list apart
1611       // from the ones we explicitly parse out (like lambdas).
1612       FormatTok->BlockKind = BK_BracedInit;
1613       nextToken();
1614       parseBracedList();
1615       break;
1616     case tok::less:
1617       if (Style.Language == FormatStyle::LK_Proto) {
1618         nextToken();
1619         parseBracedList(/*ContinueOnSemicolons=*/false,
1620                         /*ClosingBraceKind=*/tok::greater);
1621       } else {
1622         nextToken();
1623       }
1624       break;
1625     case tok::semi:
1626       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1627       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1628       // used for error recovery if we have otherwise determined that this is
1629       // a braced list.
1630       if (Style.Language == FormatStyle::LK_JavaScript) {
1631         nextToken();
1632         break;
1633       }
1634       HasError = true;
1635       if (!ContinueOnSemicolons)
1636         return !HasError;
1637       nextToken();
1638       break;
1639     case tok::comma:
1640       nextToken();
1641       break;
1642     default:
1643       nextToken();
1644       break;
1645     }
1646   } while (!eof());
1647   return false;
1648 }
1649 
1650 void UnwrappedLineParser::parseParens() {
1651   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1652   nextToken();
1653   do {
1654     switch (FormatTok->Tok.getKind()) {
1655     case tok::l_paren:
1656       parseParens();
1657       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1658         parseChildBlock();
1659       break;
1660     case tok::r_paren:
1661       nextToken();
1662       return;
1663     case tok::r_brace:
1664       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1665       return;
1666     case tok::l_square:
1667       tryToParseLambda();
1668       break;
1669     case tok::l_brace:
1670       if (!tryToParseBracedList())
1671         parseChildBlock();
1672       break;
1673     case tok::at:
1674       nextToken();
1675       if (FormatTok->Tok.is(tok::l_brace)) {
1676         nextToken();
1677         parseBracedList();
1678       }
1679       break;
1680     case tok::kw_class:
1681       if (Style.Language == FormatStyle::LK_JavaScript)
1682         parseRecord(/*ParseAsExpr=*/true);
1683       else
1684         nextToken();
1685       break;
1686     case tok::identifier:
1687       if (Style.Language == FormatStyle::LK_JavaScript &&
1688           (FormatTok->is(Keywords.kw_function) ||
1689            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1690         tryToParseJSFunction();
1691       else
1692         nextToken();
1693       break;
1694     default:
1695       nextToken();
1696       break;
1697     }
1698   } while (!eof());
1699 }
1700 
1701 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1702   if (!LambdaIntroducer) {
1703     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1704     if (tryToParseLambda())
1705       return;
1706   }
1707   do {
1708     switch (FormatTok->Tok.getKind()) {
1709     case tok::l_paren:
1710       parseParens();
1711       break;
1712     case tok::r_square:
1713       nextToken();
1714       return;
1715     case tok::r_brace:
1716       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1717       return;
1718     case tok::l_square:
1719       parseSquare();
1720       break;
1721     case tok::l_brace: {
1722       if (!tryToParseBracedList())
1723         parseChildBlock();
1724       break;
1725     }
1726     case tok::at:
1727       nextToken();
1728       if (FormatTok->Tok.is(tok::l_brace)) {
1729         nextToken();
1730         parseBracedList();
1731       }
1732       break;
1733     default:
1734       nextToken();
1735       break;
1736     }
1737   } while (!eof());
1738 }
1739 
1740 void UnwrappedLineParser::parseIfThenElse() {
1741   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1742   nextToken();
1743   if (FormatTok->Tok.is(tok::kw_constexpr))
1744     nextToken();
1745   if (FormatTok->Tok.is(tok::l_paren))
1746     parseParens();
1747   bool NeedsUnwrappedLine = false;
1748   if (FormatTok->Tok.is(tok::l_brace)) {
1749     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1750     parseBlock(/*MustBeDeclaration=*/false);
1751     if (Style.BraceWrapping.BeforeElse)
1752       addUnwrappedLine();
1753     else
1754       NeedsUnwrappedLine = true;
1755   } else {
1756     addUnwrappedLine();
1757     ++Line->Level;
1758     parseStructuralElement();
1759     --Line->Level;
1760   }
1761   if (FormatTok->Tok.is(tok::kw_else)) {
1762     nextToken();
1763     if (FormatTok->Tok.is(tok::l_brace)) {
1764       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1765       parseBlock(/*MustBeDeclaration=*/false);
1766       addUnwrappedLine();
1767     } else if (FormatTok->Tok.is(tok::kw_if)) {
1768       parseIfThenElse();
1769     } else {
1770       addUnwrappedLine();
1771       ++Line->Level;
1772       parseStructuralElement();
1773       if (FormatTok->is(tok::eof))
1774         addUnwrappedLine();
1775       --Line->Level;
1776     }
1777   } else if (NeedsUnwrappedLine) {
1778     addUnwrappedLine();
1779   }
1780 }
1781 
1782 void UnwrappedLineParser::parseTryCatch() {
1783   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1784   nextToken();
1785   bool NeedsUnwrappedLine = false;
1786   if (FormatTok->is(tok::colon)) {
1787     // We are in a function try block, what comes is an initializer list.
1788     nextToken();
1789     while (FormatTok->is(tok::identifier)) {
1790       nextToken();
1791       if (FormatTok->is(tok::l_paren))
1792         parseParens();
1793       if (FormatTok->is(tok::comma))
1794         nextToken();
1795     }
1796   }
1797   // Parse try with resource.
1798   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1799     parseParens();
1800   }
1801   if (FormatTok->is(tok::l_brace)) {
1802     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1803     parseBlock(/*MustBeDeclaration=*/false);
1804     if (Style.BraceWrapping.BeforeCatch) {
1805       addUnwrappedLine();
1806     } else {
1807       NeedsUnwrappedLine = true;
1808     }
1809   } else if (!FormatTok->is(tok::kw_catch)) {
1810     // The C++ standard requires a compound-statement after a try.
1811     // If there's none, we try to assume there's a structuralElement
1812     // and try to continue.
1813     addUnwrappedLine();
1814     ++Line->Level;
1815     parseStructuralElement();
1816     --Line->Level;
1817   }
1818   while (1) {
1819     if (FormatTok->is(tok::at))
1820       nextToken();
1821     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1822                              tok::kw___finally) ||
1823           ((Style.Language == FormatStyle::LK_Java ||
1824             Style.Language == FormatStyle::LK_JavaScript) &&
1825            FormatTok->is(Keywords.kw_finally)) ||
1826           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1827            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1828       break;
1829     nextToken();
1830     while (FormatTok->isNot(tok::l_brace)) {
1831       if (FormatTok->is(tok::l_paren)) {
1832         parseParens();
1833         continue;
1834       }
1835       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1836         return;
1837       nextToken();
1838     }
1839     NeedsUnwrappedLine = false;
1840     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1841     parseBlock(/*MustBeDeclaration=*/false);
1842     if (Style.BraceWrapping.BeforeCatch)
1843       addUnwrappedLine();
1844     else
1845       NeedsUnwrappedLine = true;
1846   }
1847   if (NeedsUnwrappedLine)
1848     addUnwrappedLine();
1849 }
1850 
1851 void UnwrappedLineParser::parseNamespace() {
1852   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1853 
1854   const FormatToken &InitialToken = *FormatTok;
1855   nextToken();
1856   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1857     nextToken();
1858   if (FormatTok->Tok.is(tok::l_brace)) {
1859     if (ShouldBreakBeforeBrace(Style, InitialToken))
1860       addUnwrappedLine();
1861 
1862     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1863                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1864                      DeclarationScopeStack.size() > 1);
1865     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1866     // Munch the semicolon after a namespace. This is more common than one would
1867     // think. Puttin the semicolon into its own line is very ugly.
1868     if (FormatTok->Tok.is(tok::semi))
1869       nextToken();
1870     addUnwrappedLine();
1871   }
1872   // FIXME: Add error handling.
1873 }
1874 
1875 void UnwrappedLineParser::parseNew() {
1876   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1877   nextToken();
1878   if (Style.Language != FormatStyle::LK_Java)
1879     return;
1880 
1881   // In Java, we can parse everything up to the parens, which aren't optional.
1882   do {
1883     // There should not be a ;, { or } before the new's open paren.
1884     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1885       return;
1886 
1887     // Consume the parens.
1888     if (FormatTok->is(tok::l_paren)) {
1889       parseParens();
1890 
1891       // If there is a class body of an anonymous class, consume that as child.
1892       if (FormatTok->is(tok::l_brace))
1893         parseChildBlock();
1894       return;
1895     }
1896     nextToken();
1897   } while (!eof());
1898 }
1899 
1900 void UnwrappedLineParser::parseForOrWhileLoop() {
1901   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1902          "'for', 'while' or foreach macro expected");
1903   nextToken();
1904   // JS' for await ( ...
1905   if (Style.Language == FormatStyle::LK_JavaScript &&
1906       FormatTok->is(Keywords.kw_await))
1907     nextToken();
1908   if (FormatTok->Tok.is(tok::l_paren))
1909     parseParens();
1910   if (FormatTok->Tok.is(tok::l_brace)) {
1911     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1912     parseBlock(/*MustBeDeclaration=*/false);
1913     addUnwrappedLine();
1914   } else {
1915     addUnwrappedLine();
1916     ++Line->Level;
1917     parseStructuralElement();
1918     --Line->Level;
1919   }
1920 }
1921 
1922 void UnwrappedLineParser::parseDoWhile() {
1923   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1924   nextToken();
1925   if (FormatTok->Tok.is(tok::l_brace)) {
1926     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1927     parseBlock(/*MustBeDeclaration=*/false);
1928     if (Style.BraceWrapping.IndentBraces)
1929       addUnwrappedLine();
1930   } else {
1931     addUnwrappedLine();
1932     ++Line->Level;
1933     parseStructuralElement();
1934     --Line->Level;
1935   }
1936 
1937   // FIXME: Add error handling.
1938   if (!FormatTok->Tok.is(tok::kw_while)) {
1939     addUnwrappedLine();
1940     return;
1941   }
1942 
1943   nextToken();
1944   parseStructuralElement();
1945 }
1946 
1947 void UnwrappedLineParser::parseLabel() {
1948   nextToken();
1949   unsigned OldLineLevel = Line->Level;
1950   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1951     --Line->Level;
1952   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1953     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1954     parseBlock(/*MustBeDeclaration=*/false);
1955     if (FormatTok->Tok.is(tok::kw_break)) {
1956       if (Style.BraceWrapping.AfterControlStatement)
1957         addUnwrappedLine();
1958       parseStructuralElement();
1959     }
1960     addUnwrappedLine();
1961   } else {
1962     if (FormatTok->is(tok::semi))
1963       nextToken();
1964     addUnwrappedLine();
1965   }
1966   Line->Level = OldLineLevel;
1967   if (FormatTok->isNot(tok::l_brace)) {
1968     parseStructuralElement();
1969     addUnwrappedLine();
1970   }
1971 }
1972 
1973 void UnwrappedLineParser::parseCaseLabel() {
1974   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1975   // FIXME: fix handling of complex expressions here.
1976   do {
1977     nextToken();
1978   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1979   parseLabel();
1980 }
1981 
1982 void UnwrappedLineParser::parseSwitch() {
1983   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1984   nextToken();
1985   if (FormatTok->Tok.is(tok::l_paren))
1986     parseParens();
1987   if (FormatTok->Tok.is(tok::l_brace)) {
1988     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1989     parseBlock(/*MustBeDeclaration=*/false);
1990     addUnwrappedLine();
1991   } else {
1992     addUnwrappedLine();
1993     ++Line->Level;
1994     parseStructuralElement();
1995     --Line->Level;
1996   }
1997 }
1998 
1999 void UnwrappedLineParser::parseAccessSpecifier() {
2000   nextToken();
2001   // Understand Qt's slots.
2002   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2003     nextToken();
2004   // Otherwise, we don't know what it is, and we'd better keep the next token.
2005   if (FormatTok->Tok.is(tok::colon))
2006     nextToken();
2007   addUnwrappedLine();
2008 }
2009 
2010 bool UnwrappedLineParser::parseEnum() {
2011   // Won't be 'enum' for NS_ENUMs.
2012   if (FormatTok->Tok.is(tok::kw_enum))
2013     nextToken();
2014 
2015   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2016   // declarations. An "enum" keyword followed by a colon would be a syntax
2017   // error and thus assume it is just an identifier.
2018   if (Style.Language == FormatStyle::LK_JavaScript &&
2019       FormatTok->isOneOf(tok::colon, tok::question))
2020     return false;
2021 
2022   // In protobuf, "enum" can be used as a field name.
2023   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2024     return false;
2025 
2026   // Eat up enum class ...
2027   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2028     nextToken();
2029 
2030   while (FormatTok->Tok.getIdentifierInfo() ||
2031          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2032                             tok::greater, tok::comma, tok::question)) {
2033     nextToken();
2034     // We can have macros or attributes in between 'enum' and the enum name.
2035     if (FormatTok->is(tok::l_paren))
2036       parseParens();
2037     if (FormatTok->is(tok::identifier)) {
2038       nextToken();
2039       // If there are two identifiers in a row, this is likely an elaborate
2040       // return type. In Java, this can be "implements", etc.
2041       if (Style.isCpp() && FormatTok->is(tok::identifier))
2042         return false;
2043     }
2044   }
2045 
2046   // Just a declaration or something is wrong.
2047   if (FormatTok->isNot(tok::l_brace))
2048     return true;
2049   FormatTok->BlockKind = BK_Block;
2050 
2051   if (Style.Language == FormatStyle::LK_Java) {
2052     // Java enums are different.
2053     parseJavaEnumBody();
2054     return true;
2055   }
2056   if (Style.Language == FormatStyle::LK_Proto) {
2057     parseBlock(/*MustBeDeclaration=*/true);
2058     return true;
2059   }
2060 
2061   // Parse enum body.
2062   nextToken();
2063   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2064   if (HasError) {
2065     if (FormatTok->is(tok::semi))
2066       nextToken();
2067     addUnwrappedLine();
2068   }
2069   return true;
2070 
2071   // There is no addUnwrappedLine() here so that we fall through to parsing a
2072   // structural element afterwards. Thus, in "enum A {} n, m;",
2073   // "} n, m;" will end up in one unwrapped line.
2074 }
2075 
2076 void UnwrappedLineParser::parseJavaEnumBody() {
2077   // Determine whether the enum is simple, i.e. does not have a semicolon or
2078   // constants with class bodies. Simple enums can be formatted like braced
2079   // lists, contracted to a single line, etc.
2080   unsigned StoredPosition = Tokens->getPosition();
2081   bool IsSimple = true;
2082   FormatToken *Tok = Tokens->getNextToken();
2083   while (Tok) {
2084     if (Tok->is(tok::r_brace))
2085       break;
2086     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2087       IsSimple = false;
2088       break;
2089     }
2090     // FIXME: This will also mark enums with braces in the arguments to enum
2091     // constants as "not simple". This is probably fine in practice, though.
2092     Tok = Tokens->getNextToken();
2093   }
2094   FormatTok = Tokens->setPosition(StoredPosition);
2095 
2096   if (IsSimple) {
2097     nextToken();
2098     parseBracedList();
2099     addUnwrappedLine();
2100     return;
2101   }
2102 
2103   // Parse the body of a more complex enum.
2104   // First add a line for everything up to the "{".
2105   nextToken();
2106   addUnwrappedLine();
2107   ++Line->Level;
2108 
2109   // Parse the enum constants.
2110   while (FormatTok) {
2111     if (FormatTok->is(tok::l_brace)) {
2112       // Parse the constant's class body.
2113       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2114                  /*MunchSemi=*/false);
2115     } else if (FormatTok->is(tok::l_paren)) {
2116       parseParens();
2117     } else if (FormatTok->is(tok::comma)) {
2118       nextToken();
2119       addUnwrappedLine();
2120     } else if (FormatTok->is(tok::semi)) {
2121       nextToken();
2122       addUnwrappedLine();
2123       break;
2124     } else if (FormatTok->is(tok::r_brace)) {
2125       addUnwrappedLine();
2126       break;
2127     } else {
2128       nextToken();
2129     }
2130   }
2131 
2132   // Parse the class body after the enum's ";" if any.
2133   parseLevel(/*HasOpeningBrace=*/true);
2134   nextToken();
2135   --Line->Level;
2136   addUnwrappedLine();
2137 }
2138 
2139 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2140   const FormatToken &InitialToken = *FormatTok;
2141   nextToken();
2142 
2143   // The actual identifier can be a nested name specifier, and in macros
2144   // it is often token-pasted.
2145   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2146                             tok::kw___attribute, tok::kw___declspec,
2147                             tok::kw_alignas) ||
2148          ((Style.Language == FormatStyle::LK_Java ||
2149            Style.Language == FormatStyle::LK_JavaScript) &&
2150           FormatTok->isOneOf(tok::period, tok::comma))) {
2151     if (Style.Language == FormatStyle::LK_JavaScript &&
2152         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2153       // JavaScript/TypeScript supports inline object types in
2154       // extends/implements positions:
2155       //     class Foo implements {bar: number} { }
2156       nextToken();
2157       if (FormatTok->is(tok::l_brace)) {
2158         tryToParseBracedList();
2159         continue;
2160       }
2161     }
2162     bool IsNonMacroIdentifier =
2163         FormatTok->is(tok::identifier) &&
2164         FormatTok->TokenText != FormatTok->TokenText.upper();
2165     nextToken();
2166     // We can have macros or attributes in between 'class' and the class name.
2167     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2168       parseParens();
2169   }
2170 
2171   // Note that parsing away template declarations here leads to incorrectly
2172   // accepting function declarations as record declarations.
2173   // In general, we cannot solve this problem. Consider:
2174   // class A<int> B() {}
2175   // which can be a function definition or a class definition when B() is a
2176   // macro. If we find enough real-world cases where this is a problem, we
2177   // can parse for the 'template' keyword in the beginning of the statement,
2178   // and thus rule out the record production in case there is no template
2179   // (this would still leave us with an ambiguity between template function
2180   // and class declarations).
2181   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2182     while (!eof()) {
2183       if (FormatTok->is(tok::l_brace)) {
2184         calculateBraceTypes(/*ExpectClassBody=*/true);
2185         if (!tryToParseBracedList())
2186           break;
2187       }
2188       if (FormatTok->Tok.is(tok::semi))
2189         return;
2190       nextToken();
2191     }
2192   }
2193   if (FormatTok->Tok.is(tok::l_brace)) {
2194     if (ParseAsExpr) {
2195       parseChildBlock();
2196     } else {
2197       if (ShouldBreakBeforeBrace(Style, InitialToken))
2198         addUnwrappedLine();
2199 
2200       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2201                  /*MunchSemi=*/false);
2202     }
2203   }
2204   // There is no addUnwrappedLine() here so that we fall through to parsing a
2205   // structural element afterwards. Thus, in "class A {} n, m;",
2206   // "} n, m;" will end up in one unwrapped line.
2207 }
2208 
2209 void UnwrappedLineParser::parseObjCMethod() {
2210   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2211          "'(' or identifier expected.");
2212   do {
2213     if (FormatTok->Tok.is(tok::semi)) {
2214       nextToken();
2215       addUnwrappedLine();
2216       return;
2217     } else if (FormatTok->Tok.is(tok::l_brace)) {
2218       if (Style.BraceWrapping.AfterFunction)
2219         addUnwrappedLine();
2220       parseBlock(/*MustBeDeclaration=*/false);
2221       addUnwrappedLine();
2222       return;
2223     } else {
2224       nextToken();
2225     }
2226   } while (!eof());
2227 }
2228 
2229 void UnwrappedLineParser::parseObjCProtocolList() {
2230   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2231   do {
2232     nextToken();
2233     // Early exit in case someone forgot a close angle.
2234     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2235         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2236       return;
2237   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2238   nextToken(); // Skip '>'.
2239 }
2240 
2241 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2242   do {
2243     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2244       nextToken();
2245       addUnwrappedLine();
2246       break;
2247     }
2248     if (FormatTok->is(tok::l_brace)) {
2249       parseBlock(/*MustBeDeclaration=*/false);
2250       // In ObjC interfaces, nothing should be following the "}".
2251       addUnwrappedLine();
2252     } else if (FormatTok->is(tok::r_brace)) {
2253       // Ignore stray "}". parseStructuralElement doesn't consume them.
2254       nextToken();
2255       addUnwrappedLine();
2256     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2257       nextToken();
2258       parseObjCMethod();
2259     } else {
2260       parseStructuralElement();
2261     }
2262   } while (!eof());
2263 }
2264 
2265 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2266   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2267          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2268   nextToken();
2269   nextToken(); // interface name
2270 
2271   // @interface can be followed by a lightweight generic
2272   // specialization list, then either a base class or a category.
2273   if (FormatTok->Tok.is(tok::less)) {
2274     // Unlike protocol lists, generic parameterizations support
2275     // nested angles:
2276     //
2277     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2278     //     NSObject <NSCopying, NSSecureCoding>
2279     //
2280     // so we need to count how many open angles we have left.
2281     unsigned NumOpenAngles = 1;
2282     do {
2283       nextToken();
2284       // Early exit in case someone forgot a close angle.
2285       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2286           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2287         break;
2288       if (FormatTok->Tok.is(tok::less))
2289         ++NumOpenAngles;
2290       else if (FormatTok->Tok.is(tok::greater)) {
2291         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2292         --NumOpenAngles;
2293       }
2294     } while (!eof() && NumOpenAngles != 0);
2295     nextToken(); // Skip '>'.
2296   }
2297   if (FormatTok->Tok.is(tok::colon)) {
2298     nextToken();
2299     nextToken(); // base class name
2300   } else if (FormatTok->Tok.is(tok::l_paren))
2301     // Skip category, if present.
2302     parseParens();
2303 
2304   if (FormatTok->Tok.is(tok::less))
2305     parseObjCProtocolList();
2306 
2307   if (FormatTok->Tok.is(tok::l_brace)) {
2308     if (Style.BraceWrapping.AfterObjCDeclaration)
2309       addUnwrappedLine();
2310     parseBlock(/*MustBeDeclaration=*/true);
2311   }
2312 
2313   // With instance variables, this puts '}' on its own line.  Without instance
2314   // variables, this ends the @interface line.
2315   addUnwrappedLine();
2316 
2317   parseObjCUntilAtEnd();
2318 }
2319 
2320 // Returns true for the declaration/definition form of @protocol,
2321 // false for the expression form.
2322 bool UnwrappedLineParser::parseObjCProtocol() {
2323   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2324   nextToken();
2325 
2326   if (FormatTok->is(tok::l_paren))
2327     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2328     return false;
2329 
2330   // The definition/declaration form,
2331   // @protocol Foo
2332   // - (int)someMethod;
2333   // @end
2334 
2335   nextToken(); // protocol name
2336 
2337   if (FormatTok->Tok.is(tok::less))
2338     parseObjCProtocolList();
2339 
2340   // Check for protocol declaration.
2341   if (FormatTok->Tok.is(tok::semi)) {
2342     nextToken();
2343     addUnwrappedLine();
2344     return true;
2345   }
2346 
2347   addUnwrappedLine();
2348   parseObjCUntilAtEnd();
2349   return true;
2350 }
2351 
2352 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2353   bool IsImport = FormatTok->is(Keywords.kw_import);
2354   assert(IsImport || FormatTok->is(tok::kw_export));
2355   nextToken();
2356 
2357   // Consume the "default" in "export default class/function".
2358   if (FormatTok->is(tok::kw_default))
2359     nextToken();
2360 
2361   // Consume "async function", "function" and "default function", so that these
2362   // get parsed as free-standing JS functions, i.e. do not require a trailing
2363   // semicolon.
2364   if (FormatTok->is(Keywords.kw_async))
2365     nextToken();
2366   if (FormatTok->is(Keywords.kw_function)) {
2367     nextToken();
2368     return;
2369   }
2370 
2371   // For imports, `export *`, `export {...}`, consume the rest of the line up
2372   // to the terminating `;`. For everything else, just return and continue
2373   // parsing the structural element, i.e. the declaration or expression for
2374   // `export default`.
2375   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2376       !FormatTok->isStringLiteral())
2377     return;
2378 
2379   while (!eof()) {
2380     if (FormatTok->is(tok::semi))
2381       return;
2382     if (Line->Tokens.empty()) {
2383       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2384       // import statement should terminate.
2385       return;
2386     }
2387     if (FormatTok->is(tok::l_brace)) {
2388       FormatTok->BlockKind = BK_Block;
2389       nextToken();
2390       parseBracedList();
2391     } else {
2392       nextToken();
2393     }
2394   }
2395 }
2396 
2397 void UnwrappedLineParser::parseStatementMacro() {
2398   nextToken();
2399   if (FormatTok->is(tok::l_paren))
2400     parseParens();
2401   if (FormatTok->is(tok::semi))
2402     nextToken();
2403   addUnwrappedLine();
2404 }
2405 
2406 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2407                                                  StringRef Prefix = "") {
2408   llvm::dbgs() << Prefix << "Line(" << Line.Level
2409                << ", FSC=" << Line.FirstStartColumn << ")"
2410                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2411   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2412                                                     E = Line.Tokens.end();
2413        I != E; ++I) {
2414     llvm::dbgs() << I->Tok->Tok.getName() << "["
2415                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2416                  << "] ";
2417   }
2418   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2419                                                     E = Line.Tokens.end();
2420        I != E; ++I) {
2421     const UnwrappedLineNode &Node = *I;
2422     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2423              I = Node.Children.begin(),
2424              E = Node.Children.end();
2425          I != E; ++I) {
2426       printDebugInfo(*I, "\nChild: ");
2427     }
2428   }
2429   llvm::dbgs() << "\n";
2430 }
2431 
2432 void UnwrappedLineParser::addUnwrappedLine() {
2433   if (Line->Tokens.empty())
2434     return;
2435   LLVM_DEBUG({
2436     if (CurrentLines == &Lines)
2437       printDebugInfo(*Line);
2438   });
2439   CurrentLines->push_back(std::move(*Line));
2440   Line->Tokens.clear();
2441   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2442   Line->FirstStartColumn = 0;
2443   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2444     CurrentLines->append(
2445         std::make_move_iterator(PreprocessorDirectives.begin()),
2446         std::make_move_iterator(PreprocessorDirectives.end()));
2447     PreprocessorDirectives.clear();
2448   }
2449   // Disconnect the current token from the last token on the previous line.
2450   FormatTok->Previous = nullptr;
2451 }
2452 
2453 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2454 
2455 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2456   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2457          FormatTok.NewlinesBefore > 0;
2458 }
2459 
2460 // Checks if \p FormatTok is a line comment that continues the line comment
2461 // section on \p Line.
2462 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2463                                         const UnwrappedLine &Line,
2464                                         llvm::Regex &CommentPragmasRegex) {
2465   if (Line.Tokens.empty())
2466     return false;
2467 
2468   StringRef IndentContent = FormatTok.TokenText;
2469   if (FormatTok.TokenText.startswith("//") ||
2470       FormatTok.TokenText.startswith("/*"))
2471     IndentContent = FormatTok.TokenText.substr(2);
2472   if (CommentPragmasRegex.match(IndentContent))
2473     return false;
2474 
2475   // If Line starts with a line comment, then FormatTok continues the comment
2476   // section if its original column is greater or equal to the original start
2477   // column of the line.
2478   //
2479   // Define the min column token of a line as follows: if a line ends in '{' or
2480   // contains a '{' followed by a line comment, then the min column token is
2481   // that '{'. Otherwise, the min column token of the line is the first token of
2482   // the line.
2483   //
2484   // If Line starts with a token other than a line comment, then FormatTok
2485   // continues the comment section if its original column is greater than the
2486   // original start column of the min column token of the line.
2487   //
2488   // For example, the second line comment continues the first in these cases:
2489   //
2490   // // first line
2491   // // second line
2492   //
2493   // and:
2494   //
2495   // // first line
2496   //  // second line
2497   //
2498   // and:
2499   //
2500   // int i; // first line
2501   //  // second line
2502   //
2503   // and:
2504   //
2505   // do { // first line
2506   //      // second line
2507   //   int i;
2508   // } while (true);
2509   //
2510   // and:
2511   //
2512   // enum {
2513   //   a, // first line
2514   //    // second line
2515   //   b
2516   // };
2517   //
2518   // The second line comment doesn't continue the first in these cases:
2519   //
2520   //   // first line
2521   //  // second line
2522   //
2523   // and:
2524   //
2525   // int i; // first line
2526   // // second line
2527   //
2528   // and:
2529   //
2530   // do { // first line
2531   //   // second line
2532   //   int i;
2533   // } while (true);
2534   //
2535   // and:
2536   //
2537   // enum {
2538   //   a, // first line
2539   //   // second line
2540   // };
2541   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2542 
2543   // Scan for '{//'. If found, use the column of '{' as a min column for line
2544   // comment section continuation.
2545   const FormatToken *PreviousToken = nullptr;
2546   for (const UnwrappedLineNode &Node : Line.Tokens) {
2547     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2548         isLineComment(*Node.Tok)) {
2549       MinColumnToken = PreviousToken;
2550       break;
2551     }
2552     PreviousToken = Node.Tok;
2553 
2554     // Grab the last newline preceding a token in this unwrapped line.
2555     if (Node.Tok->NewlinesBefore > 0) {
2556       MinColumnToken = Node.Tok;
2557     }
2558   }
2559   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2560     MinColumnToken = PreviousToken;
2561   }
2562 
2563   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2564                               MinColumnToken);
2565 }
2566 
2567 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2568   bool JustComments = Line->Tokens.empty();
2569   for (SmallVectorImpl<FormatToken *>::const_iterator
2570            I = CommentsBeforeNextToken.begin(),
2571            E = CommentsBeforeNextToken.end();
2572        I != E; ++I) {
2573     // Line comments that belong to the same line comment section are put on the
2574     // same line since later we might want to reflow content between them.
2575     // Additional fine-grained breaking of line comment sections is controlled
2576     // by the class BreakableLineCommentSection in case it is desirable to keep
2577     // several line comment sections in the same unwrapped line.
2578     //
2579     // FIXME: Consider putting separate line comment sections as children to the
2580     // unwrapped line instead.
2581     (*I)->ContinuesLineCommentSection =
2582         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2583     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2584       addUnwrappedLine();
2585     pushToken(*I);
2586   }
2587   if (NewlineBeforeNext && JustComments)
2588     addUnwrappedLine();
2589   CommentsBeforeNextToken.clear();
2590 }
2591 
2592 void UnwrappedLineParser::nextToken(int LevelDifference) {
2593   if (eof())
2594     return;
2595   flushComments(isOnNewLine(*FormatTok));
2596   pushToken(FormatTok);
2597   FormatToken *Previous = FormatTok;
2598   if (Style.Language != FormatStyle::LK_JavaScript)
2599     readToken(LevelDifference);
2600   else
2601     readTokenWithJavaScriptASI();
2602   FormatTok->Previous = Previous;
2603 }
2604 
2605 void UnwrappedLineParser::distributeComments(
2606     const SmallVectorImpl<FormatToken *> &Comments,
2607     const FormatToken *NextTok) {
2608   // Whether or not a line comment token continues a line is controlled by
2609   // the method continuesLineCommentSection, with the following caveat:
2610   //
2611   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2612   // that each comment line from the trail is aligned with the next token, if
2613   // the next token exists. If a trail exists, the beginning of the maximal
2614   // trail is marked as a start of a new comment section.
2615   //
2616   // For example in this code:
2617   //
2618   // int a; // line about a
2619   //   // line 1 about b
2620   //   // line 2 about b
2621   //   int b;
2622   //
2623   // the two lines about b form a maximal trail, so there are two sections, the
2624   // first one consisting of the single comment "// line about a" and the
2625   // second one consisting of the next two comments.
2626   if (Comments.empty())
2627     return;
2628   bool ShouldPushCommentsInCurrentLine = true;
2629   bool HasTrailAlignedWithNextToken = false;
2630   unsigned StartOfTrailAlignedWithNextToken = 0;
2631   if (NextTok) {
2632     // We are skipping the first element intentionally.
2633     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2634       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2635         HasTrailAlignedWithNextToken = true;
2636         StartOfTrailAlignedWithNextToken = i;
2637       }
2638     }
2639   }
2640   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2641     FormatToken *FormatTok = Comments[i];
2642     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2643       FormatTok->ContinuesLineCommentSection = false;
2644     } else {
2645       FormatTok->ContinuesLineCommentSection =
2646           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2647     }
2648     if (!FormatTok->ContinuesLineCommentSection &&
2649         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2650       ShouldPushCommentsInCurrentLine = false;
2651     }
2652     if (ShouldPushCommentsInCurrentLine) {
2653       pushToken(FormatTok);
2654     } else {
2655       CommentsBeforeNextToken.push_back(FormatTok);
2656     }
2657   }
2658 }
2659 
2660 void UnwrappedLineParser::readToken(int LevelDifference) {
2661   SmallVector<FormatToken *, 1> Comments;
2662   do {
2663     FormatTok = Tokens->getNextToken();
2664     assert(FormatTok);
2665     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2666            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2667       distributeComments(Comments, FormatTok);
2668       Comments.clear();
2669       // If there is an unfinished unwrapped line, we flush the preprocessor
2670       // directives only after that unwrapped line was finished later.
2671       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2672       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2673       assert((LevelDifference >= 0 ||
2674               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2675              "LevelDifference makes Line->Level negative");
2676       Line->Level += LevelDifference;
2677       // Comments stored before the preprocessor directive need to be output
2678       // before the preprocessor directive, at the same level as the
2679       // preprocessor directive, as we consider them to apply to the directive.
2680       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2681           PPBranchLevel > 0)
2682         Line->Level += PPBranchLevel;
2683       flushComments(isOnNewLine(*FormatTok));
2684       parsePPDirective();
2685     }
2686     while (FormatTok->Type == TT_ConflictStart ||
2687            FormatTok->Type == TT_ConflictEnd ||
2688            FormatTok->Type == TT_ConflictAlternative) {
2689       if (FormatTok->Type == TT_ConflictStart) {
2690         conditionalCompilationStart(/*Unreachable=*/false);
2691       } else if (FormatTok->Type == TT_ConflictAlternative) {
2692         conditionalCompilationAlternative();
2693       } else if (FormatTok->Type == TT_ConflictEnd) {
2694         conditionalCompilationEnd();
2695       }
2696       FormatTok = Tokens->getNextToken();
2697       FormatTok->MustBreakBefore = true;
2698     }
2699 
2700     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2701         !Line->InPPDirective) {
2702       continue;
2703     }
2704 
2705     if (!FormatTok->Tok.is(tok::comment)) {
2706       distributeComments(Comments, FormatTok);
2707       Comments.clear();
2708       return;
2709     }
2710 
2711     Comments.push_back(FormatTok);
2712   } while (!eof());
2713 
2714   distributeComments(Comments, nullptr);
2715   Comments.clear();
2716 }
2717 
2718 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2719   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2720   if (MustBreakBeforeNextToken) {
2721     Line->Tokens.back().Tok->MustBreakBefore = true;
2722     MustBreakBeforeNextToken = false;
2723   }
2724 }
2725 
2726 } // end namespace format
2727 } // end namespace clang
2728