1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
27 class FormatTokenSource {
28 public:
29   virtual ~FormatTokenSource() {}
30   virtual FormatToken *getNextToken() = 0;
31 
32   virtual unsigned getPosition() = 0;
33   virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41                          bool MustBeDeclaration)
42       : Line(Line), Stack(Stack) {
43     Line.MustBeDeclaration = MustBeDeclaration;
44     Stack.push_back(MustBeDeclaration);
45   }
46   ~ScopedDeclarationState() {
47     Stack.pop_back();
48     if (!Stack.empty())
49       Line.MustBeDeclaration = Stack.back();
50     else
51       Line.MustBeDeclaration = true;
52   }
53 
54 private:
55   UnwrappedLine &Line;
56   std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     FakeEOF.Tok.startToken();
86     FakeEOF.Tok.setKind(tok::eof);
87     TokenSource = this;
88     Line.Level = 0;
89     Line.InPPDirective = true;
90   }
91 
92   ~ScopedMacroState() override {
93     TokenSource = PreviousTokenSource;
94     ResetToken = Token;
95     Line.InPPDirective = false;
96     Line.Level = PreviousLineLevel;
97   }
98 
99   FormatToken *getNextToken() override {
100     // The \c UnwrappedLineParser guards against this by never calling
101     // \c getNextToken() after it has encountered the first eof token.
102     assert(!eof());
103     PreviousToken = Token;
104     Token = PreviousTokenSource->getNextToken();
105     if (eof())
106       return &FakeEOF;
107     return Token;
108   }
109 
110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112   FormatToken *setPosition(unsigned Position) override {
113     PreviousToken = nullptr;
114     Token = PreviousTokenSource->setPosition(Position);
115     return Token;
116   }
117 
118 private:
119   bool eof() {
120     return Token && Token->HasUnescapedNewline &&
121            !continuesLineComment(*Token, PreviousToken,
122                                  /*MinColumnToken=*/PreviousToken);
123   }
124 
125   FormatToken FakeEOF;
126   UnwrappedLine &Line;
127   FormatTokenSource *&TokenSource;
128   FormatToken *&ResetToken;
129   unsigned PreviousLineLevel;
130   FormatTokenSource *PreviousTokenSource;
131 
132   FormatToken *Token;
133   FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
138 class ScopedLineState {
139 public:
140   ScopedLineState(UnwrappedLineParser &Parser,
141                   bool SwitchToPreprocessorLines = false)
142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143     if (SwitchToPreprocessorLines)
144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
145     else if (!Parser.Line->Tokens.empty())
146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147     PreBlockLine = std::move(Parser.Line);
148     Parser.Line = llvm::make_unique<UnwrappedLine>();
149     Parser.Line->Level = PreBlockLine->Level;
150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151   }
152 
153   ~ScopedLineState() {
154     if (!Parser.Line->Tokens.empty()) {
155       Parser.addUnwrappedLine();
156     }
157     assert(Parser.Line->Tokens.empty());
158     Parser.Line = std::move(PreBlockLine);
159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160       Parser.MustBreakBeforeNextToken = true;
161     Parser.CurrentLines = OriginalLines;
162   }
163 
164 private:
165   UnwrappedLineParser &Parser;
166 
167   std::unique_ptr<UnwrappedLine> PreBlockLine;
168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
171 class CompoundStatementIndenter {
172 public:
173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
174                             const FormatStyle &Style, unsigned &LineLevel)
175       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
176     if (Style.BraceWrapping.AfterControlStatement)
177       Parser->addUnwrappedLine();
178     if (Style.BraceWrapping.IndentBraces)
179       ++LineLevel;
180   }
181   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
182 
183 private:
184   unsigned &LineLevel;
185   unsigned OldLineLevel;
186 };
187 
188 namespace {
189 
190 class IndexedTokenSource : public FormatTokenSource {
191 public:
192   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
193       : Tokens(Tokens), Position(-1) {}
194 
195   FormatToken *getNextToken() override {
196     ++Position;
197     return Tokens[Position];
198   }
199 
200   unsigned getPosition() override {
201     assert(Position >= 0);
202     return Position;
203   }
204 
205   FormatToken *setPosition(unsigned P) override {
206     Position = P;
207     return Tokens[Position];
208   }
209 
210   void reset() { Position = -1; }
211 
212 private:
213   ArrayRef<FormatToken *> Tokens;
214   int Position;
215 };
216 
217 } // end anonymous namespace
218 
219 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
220                                          const AdditionalKeywords &Keywords,
221                                          unsigned FirstStartColumn,
222                                          ArrayRef<FormatToken *> Tokens,
223                                          UnwrappedLineConsumer &Callback)
224     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
225       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
226       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
227       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
228       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
229                        ? IG_Rejected
230                        : IG_Inited),
231       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
232 
233 void UnwrappedLineParser::reset() {
234   PPBranchLevel = -1;
235   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
236                      ? IG_Rejected
237                      : IG_Inited;
238   IncludeGuardToken = nullptr;
239   Line.reset(new UnwrappedLine);
240   CommentsBeforeNextToken.clear();
241   FormatTok = nullptr;
242   MustBreakBeforeNextToken = false;
243   PreprocessorDirectives.clear();
244   CurrentLines = &Lines;
245   DeclarationScopeStack.clear();
246   PPStack.clear();
247   Line->FirstStartColumn = FirstStartColumn;
248 }
249 
250 void UnwrappedLineParser::parse() {
251   IndexedTokenSource TokenSource(AllTokens);
252   Line->FirstStartColumn = FirstStartColumn;
253   do {
254     LLVM_DEBUG(llvm::dbgs() << "----\n");
255     reset();
256     Tokens = &TokenSource;
257     TokenSource.reset();
258 
259     readToken();
260     parseFile();
261 
262     // If we found an include guard then all preprocessor directives (other than
263     // the guard) are over-indented by one.
264     if (IncludeGuard == IG_Found)
265       for (auto &Line : Lines)
266         if (Line.InPPDirective && Line.Level > 0)
267           --Line.Level;
268 
269     // Create line with eof token.
270     pushToken(FormatTok);
271     addUnwrappedLine();
272 
273     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
274                                                   E = Lines.end();
275          I != E; ++I) {
276       Callback.consumeUnwrappedLine(*I);
277     }
278     Callback.finishRun();
279     Lines.clear();
280     while (!PPLevelBranchIndex.empty() &&
281            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
282       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
283       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
284     }
285     if (!PPLevelBranchIndex.empty()) {
286       ++PPLevelBranchIndex.back();
287       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
288       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
289     }
290   } while (!PPLevelBranchIndex.empty());
291 }
292 
293 void UnwrappedLineParser::parseFile() {
294   // The top-level context in a file always has declarations, except for pre-
295   // processor directives and JavaScript files.
296   bool MustBeDeclaration =
297       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
298   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
299                                           MustBeDeclaration);
300   if (Style.Language == FormatStyle::LK_TextProto)
301     parseBracedList();
302   else
303     parseLevel(/*HasOpeningBrace=*/false);
304   // Make sure to format the remaining tokens.
305   //
306   // LK_TextProto is special since its top-level is parsed as the body of a
307   // braced list, which does not necessarily have natural line separators such
308   // as a semicolon. Comments after the last entry that have been determined to
309   // not belong to that line, as in:
310   //   key: value
311   //   // endfile comment
312   // do not have a chance to be put on a line of their own until this point.
313   // Here we add this newline before end-of-file comments.
314   if (Style.Language == FormatStyle::LK_TextProto &&
315       !CommentsBeforeNextToken.empty())
316     addUnwrappedLine();
317   flushComments(true);
318   addUnwrappedLine();
319 }
320 
321 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
322   bool SwitchLabelEncountered = false;
323   do {
324     tok::TokenKind kind = FormatTok->Tok.getKind();
325     if (FormatTok->Type == TT_MacroBlockBegin) {
326       kind = tok::l_brace;
327     } else if (FormatTok->Type == TT_MacroBlockEnd) {
328       kind = tok::r_brace;
329     }
330 
331     switch (kind) {
332     case tok::comment:
333       nextToken();
334       addUnwrappedLine();
335       break;
336     case tok::l_brace:
337       // FIXME: Add parameter whether this can happen - if this happens, we must
338       // be in a non-declaration context.
339       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
340         continue;
341       parseBlock(/*MustBeDeclaration=*/false);
342       addUnwrappedLine();
343       break;
344     case tok::r_brace:
345       if (HasOpeningBrace)
346         return;
347       nextToken();
348       addUnwrappedLine();
349       break;
350     case tok::kw_default: {
351       unsigned StoredPosition = Tokens->getPosition();
352       FormatToken *Next;
353       do {
354         Next = Tokens->getNextToken();
355       } while (Next && Next->is(tok::comment));
356       FormatTok = Tokens->setPosition(StoredPosition);
357       if (Next && Next->isNot(tok::colon)) {
358         // default not followed by ':' is not a case label; treat it like
359         // an identifier.
360         parseStructuralElement();
361         break;
362       }
363       // Else, if it is 'default:', fall through to the case handling.
364       LLVM_FALLTHROUGH;
365     }
366     case tok::kw_case:
367       if (Style.Language == FormatStyle::LK_JavaScript &&
368           Line->MustBeDeclaration) {
369         // A 'case: string' style field declaration.
370         parseStructuralElement();
371         break;
372       }
373       if (!SwitchLabelEncountered &&
374           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
375         ++Line->Level;
376       SwitchLabelEncountered = true;
377       parseStructuralElement();
378       break;
379     default:
380       parseStructuralElement();
381       break;
382     }
383   } while (!eof());
384 }
385 
386 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
387   // We'll parse forward through the tokens until we hit
388   // a closing brace or eof - note that getNextToken() will
389   // parse macros, so this will magically work inside macro
390   // definitions, too.
391   unsigned StoredPosition = Tokens->getPosition();
392   FormatToken *Tok = FormatTok;
393   const FormatToken *PrevTok = Tok->Previous;
394   // Keep a stack of positions of lbrace tokens. We will
395   // update information about whether an lbrace starts a
396   // braced init list or a different block during the loop.
397   SmallVector<FormatToken *, 8> LBraceStack;
398   assert(Tok->Tok.is(tok::l_brace));
399   do {
400     // Get next non-comment token.
401     FormatToken *NextTok;
402     unsigned ReadTokens = 0;
403     do {
404       NextTok = Tokens->getNextToken();
405       ++ReadTokens;
406     } while (NextTok->is(tok::comment));
407 
408     switch (Tok->Tok.getKind()) {
409     case tok::l_brace:
410       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
411         if (PrevTok->isOneOf(tok::colon, tok::less))
412           // A ':' indicates this code is in a type, or a braced list
413           // following a label in an object literal ({a: {b: 1}}).
414           // A '<' could be an object used in a comparison, but that is nonsense
415           // code (can never return true), so more likely it is a generic type
416           // argument (`X<{a: string; b: number}>`).
417           // The code below could be confused by semicolons between the
418           // individual members in a type member list, which would normally
419           // trigger BK_Block. In both cases, this must be parsed as an inline
420           // braced init.
421           Tok->BlockKind = BK_BracedInit;
422         else if (PrevTok->is(tok::r_paren))
423           // `) { }` can only occur in function or method declarations in JS.
424           Tok->BlockKind = BK_Block;
425       } else {
426         Tok->BlockKind = BK_Unknown;
427       }
428       LBraceStack.push_back(Tok);
429       break;
430     case tok::r_brace:
431       if (LBraceStack.empty())
432         break;
433       if (LBraceStack.back()->BlockKind == BK_Unknown) {
434         bool ProbablyBracedList = false;
435         if (Style.Language == FormatStyle::LK_Proto) {
436           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
437         } else {
438           // Using OriginalColumn to distinguish between ObjC methods and
439           // binary operators is a bit hacky.
440           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
441                                   NextTok->OriginalColumn == 0;
442 
443           // If there is a comma, semicolon or right paren after the closing
444           // brace, we assume this is a braced initializer list.  Note that
445           // regardless how we mark inner braces here, we will overwrite the
446           // BlockKind later if we parse a braced list (where all blocks
447           // inside are by default braced lists), or when we explicitly detect
448           // blocks (for example while parsing lambdas).
449           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
450           // braced list in JS.
451           ProbablyBracedList =
452               (Style.Language == FormatStyle::LK_JavaScript &&
453                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
454                                 Keywords.kw_as)) ||
455               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
456               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
457                                tok::r_paren, tok::r_square, tok::l_brace,
458                                tok::ellipsis) ||
459               (NextTok->is(tok::identifier) &&
460                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
461               (NextTok->is(tok::semi) &&
462                (!ExpectClassBody || LBraceStack.size() != 1)) ||
463               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
464           if (NextTok->is(tok::l_square)) {
465             // We can have an array subscript after a braced init
466             // list, but C++11 attributes are expected after blocks.
467             NextTok = Tokens->getNextToken();
468             ++ReadTokens;
469             ProbablyBracedList = NextTok->isNot(tok::l_square);
470           }
471         }
472         if (ProbablyBracedList) {
473           Tok->BlockKind = BK_BracedInit;
474           LBraceStack.back()->BlockKind = BK_BracedInit;
475         } else {
476           Tok->BlockKind = BK_Block;
477           LBraceStack.back()->BlockKind = BK_Block;
478         }
479       }
480       LBraceStack.pop_back();
481       break;
482     case tok::identifier:
483       if (!Tok->is(TT_StatementMacro))
484         break;
485       LLVM_FALLTHROUGH;
486     case tok::at:
487     case tok::semi:
488     case tok::kw_if:
489     case tok::kw_while:
490     case tok::kw_for:
491     case tok::kw_switch:
492     case tok::kw_try:
493     case tok::kw___try:
494       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
495         LBraceStack.back()->BlockKind = BK_Block;
496       break;
497     default:
498       break;
499     }
500     PrevTok = Tok;
501     Tok = NextTok;
502   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
503 
504   // Assume other blocks for all unclosed opening braces.
505   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
506     if (LBraceStack[i]->BlockKind == BK_Unknown)
507       LBraceStack[i]->BlockKind = BK_Block;
508   }
509 
510   FormatTok = Tokens->setPosition(StoredPosition);
511 }
512 
513 template <class T>
514 static inline void hash_combine(std::size_t &seed, const T &v) {
515   std::hash<T> hasher;
516   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
517 }
518 
519 size_t UnwrappedLineParser::computePPHash() const {
520   size_t h = 0;
521   for (const auto &i : PPStack) {
522     hash_combine(h, size_t(i.Kind));
523     hash_combine(h, i.Line);
524   }
525   return h;
526 }
527 
528 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
529                                      bool MunchSemi) {
530   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
531          "'{' or macro block token expected");
532   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
533   FormatTok->BlockKind = BK_Block;
534 
535   size_t PPStartHash = computePPHash();
536 
537   unsigned InitialLevel = Line->Level;
538   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
539 
540   if (MacroBlock && FormatTok->is(tok::l_paren))
541     parseParens();
542 
543   size_t NbPreprocessorDirectives =
544       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
545   addUnwrappedLine();
546   size_t OpeningLineIndex =
547       CurrentLines->empty()
548           ? (UnwrappedLine::kInvalidIndex)
549           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
550 
551   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
552                                           MustBeDeclaration);
553   if (AddLevel)
554     ++Line->Level;
555   parseLevel(/*HasOpeningBrace=*/true);
556 
557   if (eof())
558     return;
559 
560   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
561                  : !FormatTok->is(tok::r_brace)) {
562     Line->Level = InitialLevel;
563     FormatTok->BlockKind = BK_Block;
564     return;
565   }
566 
567   size_t PPEndHash = computePPHash();
568 
569   // Munch the closing brace.
570   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
571 
572   if (MacroBlock && FormatTok->is(tok::l_paren))
573     parseParens();
574 
575   if (MunchSemi && FormatTok->Tok.is(tok::semi))
576     nextToken();
577   Line->Level = InitialLevel;
578 
579   if (PPStartHash == PPEndHash) {
580     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
581     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
582       // Update the opening line to add the forward reference as well
583       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
584           CurrentLines->size() - 1;
585     }
586   }
587 }
588 
589 static bool isGoogScope(const UnwrappedLine &Line) {
590   // FIXME: Closure-library specific stuff should not be hard-coded but be
591   // configurable.
592   if (Line.Tokens.size() < 4)
593     return false;
594   auto I = Line.Tokens.begin();
595   if (I->Tok->TokenText != "goog")
596     return false;
597   ++I;
598   if (I->Tok->isNot(tok::period))
599     return false;
600   ++I;
601   if (I->Tok->TokenText != "scope")
602     return false;
603   ++I;
604   return I->Tok->is(tok::l_paren);
605 }
606 
607 static bool isIIFE(const UnwrappedLine &Line,
608                    const AdditionalKeywords &Keywords) {
609   // Look for the start of an immediately invoked anonymous function.
610   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
611   // This is commonly done in JavaScript to create a new, anonymous scope.
612   // Example: (function() { ... })()
613   if (Line.Tokens.size() < 3)
614     return false;
615   auto I = Line.Tokens.begin();
616   if (I->Tok->isNot(tok::l_paren))
617     return false;
618   ++I;
619   if (I->Tok->isNot(Keywords.kw_function))
620     return false;
621   ++I;
622   return I->Tok->is(tok::l_paren);
623 }
624 
625 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
626                                    const FormatToken &InitialToken) {
627   if (InitialToken.is(tok::kw_namespace))
628     return Style.BraceWrapping.AfterNamespace;
629   if (InitialToken.is(tok::kw_class))
630     return Style.BraceWrapping.AfterClass;
631   if (InitialToken.is(tok::kw_union))
632     return Style.BraceWrapping.AfterUnion;
633   if (InitialToken.is(tok::kw_struct))
634     return Style.BraceWrapping.AfterStruct;
635   return false;
636 }
637 
638 void UnwrappedLineParser::parseChildBlock() {
639   FormatTok->BlockKind = BK_Block;
640   nextToken();
641   {
642     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
643                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
644     ScopedLineState LineState(*this);
645     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
646                                             /*MustBeDeclaration=*/false);
647     Line->Level += SkipIndent ? 0 : 1;
648     parseLevel(/*HasOpeningBrace=*/true);
649     flushComments(isOnNewLine(*FormatTok));
650     Line->Level -= SkipIndent ? 0 : 1;
651   }
652   nextToken();
653 }
654 
655 void UnwrappedLineParser::parsePPDirective() {
656   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
657   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
658   nextToken();
659 
660   if (!FormatTok->Tok.getIdentifierInfo()) {
661     parsePPUnknown();
662     return;
663   }
664 
665   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
666   case tok::pp_define:
667     parsePPDefine();
668     return;
669   case tok::pp_if:
670     parsePPIf(/*IfDef=*/false);
671     break;
672   case tok::pp_ifdef:
673   case tok::pp_ifndef:
674     parsePPIf(/*IfDef=*/true);
675     break;
676   case tok::pp_else:
677     parsePPElse();
678     break;
679   case tok::pp_elif:
680     parsePPElIf();
681     break;
682   case tok::pp_endif:
683     parsePPEndIf();
684     break;
685   default:
686     parsePPUnknown();
687     break;
688   }
689 }
690 
691 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
692   size_t Line = CurrentLines->size();
693   if (CurrentLines == &PreprocessorDirectives)
694     Line += Lines.size();
695 
696   if (Unreachable ||
697       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
698     PPStack.push_back({PP_Unreachable, Line});
699   else
700     PPStack.push_back({PP_Conditional, Line});
701 }
702 
703 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
704   ++PPBranchLevel;
705   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
706   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
707     PPLevelBranchIndex.push_back(0);
708     PPLevelBranchCount.push_back(0);
709   }
710   PPChainBranchIndex.push(0);
711   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
712   conditionalCompilationCondition(Unreachable || Skip);
713 }
714 
715 void UnwrappedLineParser::conditionalCompilationAlternative() {
716   if (!PPStack.empty())
717     PPStack.pop_back();
718   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
719   if (!PPChainBranchIndex.empty())
720     ++PPChainBranchIndex.top();
721   conditionalCompilationCondition(
722       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
723       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
724 }
725 
726 void UnwrappedLineParser::conditionalCompilationEnd() {
727   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
728   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
729     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
730       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
731     }
732   }
733   // Guard against #endif's without #if.
734   if (PPBranchLevel > -1)
735     --PPBranchLevel;
736   if (!PPChainBranchIndex.empty())
737     PPChainBranchIndex.pop();
738   if (!PPStack.empty())
739     PPStack.pop_back();
740 }
741 
742 void UnwrappedLineParser::parsePPIf(bool IfDef) {
743   bool IfNDef = FormatTok->is(tok::pp_ifndef);
744   nextToken();
745   bool Unreachable = false;
746   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
747     Unreachable = true;
748   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
749     Unreachable = true;
750   conditionalCompilationStart(Unreachable);
751   FormatToken *IfCondition = FormatTok;
752   // If there's a #ifndef on the first line, and the only lines before it are
753   // comments, it could be an include guard.
754   bool MaybeIncludeGuard = IfNDef;
755   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
756     for (auto &Line : Lines) {
757       if (!Line.Tokens.front().Tok->is(tok::comment)) {
758         MaybeIncludeGuard = false;
759         IncludeGuard = IG_Rejected;
760         break;
761       }
762     }
763   --PPBranchLevel;
764   parsePPUnknown();
765   ++PPBranchLevel;
766   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
767     IncludeGuard = IG_IfNdefed;
768     IncludeGuardToken = IfCondition;
769   }
770 }
771 
772 void UnwrappedLineParser::parsePPElse() {
773   // If a potential include guard has an #else, it's not an include guard.
774   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
775     IncludeGuard = IG_Rejected;
776   conditionalCompilationAlternative();
777   if (PPBranchLevel > -1)
778     --PPBranchLevel;
779   parsePPUnknown();
780   ++PPBranchLevel;
781 }
782 
783 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
784 
785 void UnwrappedLineParser::parsePPEndIf() {
786   conditionalCompilationEnd();
787   parsePPUnknown();
788   // If the #endif of a potential include guard is the last thing in the file,
789   // then we found an include guard.
790   unsigned TokenPosition = Tokens->getPosition();
791   FormatToken *PeekNext = AllTokens[TokenPosition];
792   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
793       PeekNext->is(tok::eof) &&
794       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
795     IncludeGuard = IG_Found;
796 }
797 
798 void UnwrappedLineParser::parsePPDefine() {
799   nextToken();
800 
801   if (FormatTok->Tok.getKind() != tok::identifier) {
802     IncludeGuard = IG_Rejected;
803     IncludeGuardToken = nullptr;
804     parsePPUnknown();
805     return;
806   }
807 
808   if (IncludeGuard == IG_IfNdefed &&
809       IncludeGuardToken->TokenText == FormatTok->TokenText) {
810     IncludeGuard = IG_Defined;
811     IncludeGuardToken = nullptr;
812     for (auto &Line : Lines) {
813       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
814         IncludeGuard = IG_Rejected;
815         break;
816       }
817     }
818   }
819 
820   nextToken();
821   if (FormatTok->Tok.getKind() == tok::l_paren &&
822       FormatTok->WhitespaceRange.getBegin() ==
823           FormatTok->WhitespaceRange.getEnd()) {
824     parseParens();
825   }
826   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
827     Line->Level += PPBranchLevel + 1;
828   addUnwrappedLine();
829   ++Line->Level;
830 
831   // Errors during a preprocessor directive can only affect the layout of the
832   // preprocessor directive, and thus we ignore them. An alternative approach
833   // would be to use the same approach we use on the file level (no
834   // re-indentation if there was a structural error) within the macro
835   // definition.
836   parseFile();
837 }
838 
839 void UnwrappedLineParser::parsePPUnknown() {
840   do {
841     nextToken();
842   } while (!eof());
843   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
844     Line->Level += PPBranchLevel + 1;
845   addUnwrappedLine();
846 }
847 
848 // Here we blacklist certain tokens that are not usually the first token in an
849 // unwrapped line. This is used in attempt to distinguish macro calls without
850 // trailing semicolons from other constructs split to several lines.
851 static bool tokenCanStartNewLine(const clang::Token &Tok) {
852   // Semicolon can be a null-statement, l_square can be a start of a macro or
853   // a C++11 attribute, but this doesn't seem to be common.
854   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
855          Tok.isNot(tok::l_square) &&
856          // Tokens that can only be used as binary operators and a part of
857          // overloaded operator names.
858          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
859          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
860          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
861          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
862          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
863          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
864          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
865          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
866          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
867          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
868          Tok.isNot(tok::lesslessequal) &&
869          // Colon is used in labels, base class lists, initializer lists,
870          // range-based for loops, ternary operator, but should never be the
871          // first token in an unwrapped line.
872          Tok.isNot(tok::colon) &&
873          // 'noexcept' is a trailing annotation.
874          Tok.isNot(tok::kw_noexcept);
875 }
876 
877 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
878                           const FormatToken *FormatTok) {
879   // FIXME: This returns true for C/C++ keywords like 'struct'.
880   return FormatTok->is(tok::identifier) &&
881          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
882           !FormatTok->isOneOf(
883               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
884               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
885               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
886               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
887               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
888               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
889               Keywords.kw_from));
890 }
891 
892 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
893                                  const FormatToken *FormatTok) {
894   return FormatTok->Tok.isLiteral() ||
895          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
896          mustBeJSIdent(Keywords, FormatTok);
897 }
898 
899 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
900 // when encountered after a value (see mustBeJSIdentOrValue).
901 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
902                            const FormatToken *FormatTok) {
903   return FormatTok->isOneOf(
904       tok::kw_return, Keywords.kw_yield,
905       // conditionals
906       tok::kw_if, tok::kw_else,
907       // loops
908       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
909       // switch/case
910       tok::kw_switch, tok::kw_case,
911       // exceptions
912       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
913       // declaration
914       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
915       Keywords.kw_async, Keywords.kw_function,
916       // import/export
917       Keywords.kw_import, tok::kw_export);
918 }
919 
920 // readTokenWithJavaScriptASI reads the next token and terminates the current
921 // line if JavaScript Automatic Semicolon Insertion must
922 // happen between the current token and the next token.
923 //
924 // This method is conservative - it cannot cover all edge cases of JavaScript,
925 // but only aims to correctly handle certain well known cases. It *must not*
926 // return true in speculative cases.
927 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
928   FormatToken *Previous = FormatTok;
929   readToken();
930   FormatToken *Next = FormatTok;
931 
932   bool IsOnSameLine =
933       CommentsBeforeNextToken.empty()
934           ? Next->NewlinesBefore == 0
935           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
936   if (IsOnSameLine)
937     return;
938 
939   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
940   bool PreviousStartsTemplateExpr =
941       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
942   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
943     // If the line contains an '@' sign, the previous token might be an
944     // annotation, which can precede another identifier/value.
945     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
946                               [](UnwrappedLineNode &LineNode) {
947                                 return LineNode.Tok->is(tok::at);
948                               }) != Line->Tokens.end();
949     if (HasAt)
950       return;
951   }
952   if (Next->is(tok::exclaim) && PreviousMustBeValue)
953     return addUnwrappedLine();
954   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
955   bool NextEndsTemplateExpr =
956       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
957   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
958       (PreviousMustBeValue ||
959        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
960                          tok::minusminus)))
961     return addUnwrappedLine();
962   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
963       isJSDeclOrStmt(Keywords, Next))
964     return addUnwrappedLine();
965 }
966 
967 void UnwrappedLineParser::parseStructuralElement() {
968   assert(!FormatTok->is(tok::l_brace));
969   if (Style.Language == FormatStyle::LK_TableGen &&
970       FormatTok->is(tok::pp_include)) {
971     nextToken();
972     if (FormatTok->is(tok::string_literal))
973       nextToken();
974     addUnwrappedLine();
975     return;
976   }
977   switch (FormatTok->Tok.getKind()) {
978   case tok::kw_asm:
979     nextToken();
980     if (FormatTok->is(tok::l_brace)) {
981       FormatTok->Type = TT_InlineASMBrace;
982       nextToken();
983       while (FormatTok && FormatTok->isNot(tok::eof)) {
984         if (FormatTok->is(tok::r_brace)) {
985           FormatTok->Type = TT_InlineASMBrace;
986           nextToken();
987           addUnwrappedLine();
988           break;
989         }
990         FormatTok->Finalized = true;
991         nextToken();
992       }
993     }
994     break;
995   case tok::kw_namespace:
996     parseNamespace();
997     return;
998   case tok::kw_public:
999   case tok::kw_protected:
1000   case tok::kw_private:
1001     if (Style.Language == FormatStyle::LK_Java ||
1002         Style.Language == FormatStyle::LK_JavaScript)
1003       nextToken();
1004     else
1005       parseAccessSpecifier();
1006     return;
1007   case tok::kw_if:
1008     parseIfThenElse();
1009     return;
1010   case tok::kw_for:
1011   case tok::kw_while:
1012     parseForOrWhileLoop();
1013     return;
1014   case tok::kw_do:
1015     parseDoWhile();
1016     return;
1017   case tok::kw_switch:
1018     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1019       // 'switch: string' field declaration.
1020       break;
1021     parseSwitch();
1022     return;
1023   case tok::kw_default:
1024     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1025       // 'default: string' field declaration.
1026       break;
1027     nextToken();
1028     if (FormatTok->is(tok::colon)) {
1029       parseLabel();
1030       return;
1031     }
1032     // e.g. "default void f() {}" in a Java interface.
1033     break;
1034   case tok::kw_case:
1035     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1036       // 'case: string' field declaration.
1037       break;
1038     parseCaseLabel();
1039     return;
1040   case tok::kw_try:
1041   case tok::kw___try:
1042     parseTryCatch();
1043     return;
1044   case tok::kw_extern:
1045     nextToken();
1046     if (FormatTok->Tok.is(tok::string_literal)) {
1047       nextToken();
1048       if (FormatTok->Tok.is(tok::l_brace)) {
1049         if (Style.BraceWrapping.AfterExternBlock) {
1050           addUnwrappedLine();
1051           parseBlock(/*MustBeDeclaration=*/true);
1052         } else {
1053           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1054         }
1055         addUnwrappedLine();
1056         return;
1057       }
1058     }
1059     break;
1060   case tok::kw_export:
1061     if (Style.Language == FormatStyle::LK_JavaScript) {
1062       parseJavaScriptEs6ImportExport();
1063       return;
1064     }
1065     if (!Style.isCpp())
1066       break;
1067     // Handle C++ "(inline|export) namespace".
1068     LLVM_FALLTHROUGH;
1069   case tok::kw_inline:
1070     nextToken();
1071     if (FormatTok->Tok.is(tok::kw_namespace)) {
1072       parseNamespace();
1073       return;
1074     }
1075     break;
1076   case tok::identifier:
1077     if (FormatTok->is(TT_ForEachMacro)) {
1078       parseForOrWhileLoop();
1079       return;
1080     }
1081     if (FormatTok->is(TT_MacroBlockBegin)) {
1082       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1083                  /*MunchSemi=*/false);
1084       return;
1085     }
1086     if (FormatTok->is(Keywords.kw_import)) {
1087       if (Style.Language == FormatStyle::LK_JavaScript) {
1088         parseJavaScriptEs6ImportExport();
1089         return;
1090       }
1091       if (Style.Language == FormatStyle::LK_Proto) {
1092         nextToken();
1093         if (FormatTok->is(tok::kw_public))
1094           nextToken();
1095         if (!FormatTok->is(tok::string_literal))
1096           return;
1097         nextToken();
1098         if (FormatTok->is(tok::semi))
1099           nextToken();
1100         addUnwrappedLine();
1101         return;
1102       }
1103     }
1104     if (Style.isCpp() &&
1105         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1106                            Keywords.kw_slots, Keywords.kw_qslots)) {
1107       nextToken();
1108       if (FormatTok->is(tok::colon)) {
1109         nextToken();
1110         addUnwrappedLine();
1111         return;
1112       }
1113     }
1114     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1115       parseStatementMacro();
1116       return;
1117     }
1118     // In all other cases, parse the declaration.
1119     break;
1120   default:
1121     break;
1122   }
1123   do {
1124     const FormatToken *Previous = FormatTok->Previous;
1125     switch (FormatTok->Tok.getKind()) {
1126     case tok::at:
1127       nextToken();
1128       if (FormatTok->Tok.is(tok::l_brace)) {
1129         nextToken();
1130         parseBracedList();
1131         break;
1132       } else if (Style.Language == FormatStyle::LK_Java &&
1133                  FormatTok->is(Keywords.kw_interface)) {
1134         nextToken();
1135         break;
1136       }
1137       switch (FormatTok->Tok.getObjCKeywordID()) {
1138       case tok::objc_public:
1139       case tok::objc_protected:
1140       case tok::objc_package:
1141       case tok::objc_private:
1142         return parseAccessSpecifier();
1143       case tok::objc_interface:
1144       case tok::objc_implementation:
1145         return parseObjCInterfaceOrImplementation();
1146       case tok::objc_protocol:
1147         if (parseObjCProtocol())
1148           return;
1149         break;
1150       case tok::objc_end:
1151         return; // Handled by the caller.
1152       case tok::objc_optional:
1153       case tok::objc_required:
1154         nextToken();
1155         addUnwrappedLine();
1156         return;
1157       case tok::objc_autoreleasepool:
1158         nextToken();
1159         if (FormatTok->Tok.is(tok::l_brace)) {
1160           if (Style.BraceWrapping.AfterControlStatement)
1161             addUnwrappedLine();
1162           parseBlock(/*MustBeDeclaration=*/false);
1163         }
1164         addUnwrappedLine();
1165         return;
1166       case tok::objc_synchronized:
1167         nextToken();
1168         if (FormatTok->Tok.is(tok::l_paren))
1169           // Skip synchronization object
1170           parseParens();
1171         if (FormatTok->Tok.is(tok::l_brace)) {
1172           if (Style.BraceWrapping.AfterControlStatement)
1173             addUnwrappedLine();
1174           parseBlock(/*MustBeDeclaration=*/false);
1175         }
1176         addUnwrappedLine();
1177         return;
1178       case tok::objc_try:
1179         // This branch isn't strictly necessary (the kw_try case below would
1180         // do this too after the tok::at is parsed above).  But be explicit.
1181         parseTryCatch();
1182         return;
1183       default:
1184         break;
1185       }
1186       break;
1187     case tok::kw_enum:
1188       // Ignore if this is part of "template <enum ...".
1189       if (Previous && Previous->is(tok::less)) {
1190         nextToken();
1191         break;
1192       }
1193 
1194       // parseEnum falls through and does not yet add an unwrapped line as an
1195       // enum definition can start a structural element.
1196       if (!parseEnum())
1197         break;
1198       // This only applies for C++.
1199       if (!Style.isCpp()) {
1200         addUnwrappedLine();
1201         return;
1202       }
1203       break;
1204     case tok::kw_typedef:
1205       nextToken();
1206       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1207                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1208         parseEnum();
1209       break;
1210     case tok::kw_struct:
1211     case tok::kw_union:
1212     case tok::kw_class:
1213       // parseRecord falls through and does not yet add an unwrapped line as a
1214       // record declaration or definition can start a structural element.
1215       parseRecord();
1216       // This does not apply for Java and JavaScript.
1217       if (Style.Language == FormatStyle::LK_Java ||
1218           Style.Language == FormatStyle::LK_JavaScript) {
1219         if (FormatTok->is(tok::semi))
1220           nextToken();
1221         addUnwrappedLine();
1222         return;
1223       }
1224       break;
1225     case tok::period:
1226       nextToken();
1227       // In Java, classes have an implicit static member "class".
1228       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1229           FormatTok->is(tok::kw_class))
1230         nextToken();
1231       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1232           FormatTok->Tok.getIdentifierInfo())
1233         // JavaScript only has pseudo keywords, all keywords are allowed to
1234         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1235         nextToken();
1236       break;
1237     case tok::semi:
1238       nextToken();
1239       addUnwrappedLine();
1240       return;
1241     case tok::r_brace:
1242       addUnwrappedLine();
1243       return;
1244     case tok::l_paren:
1245       parseParens();
1246       break;
1247     case tok::kw_operator:
1248       nextToken();
1249       if (FormatTok->isBinaryOperator())
1250         nextToken();
1251       break;
1252     case tok::caret:
1253       nextToken();
1254       if (FormatTok->Tok.isAnyIdentifier() ||
1255           FormatTok->isSimpleTypeSpecifier())
1256         nextToken();
1257       if (FormatTok->is(tok::l_paren))
1258         parseParens();
1259       if (FormatTok->is(tok::l_brace))
1260         parseChildBlock();
1261       break;
1262     case tok::l_brace:
1263       if (!tryToParseBracedList()) {
1264         // A block outside of parentheses must be the last part of a
1265         // structural element.
1266         // FIXME: Figure out cases where this is not true, and add projections
1267         // for them (the one we know is missing are lambdas).
1268         if (Style.BraceWrapping.AfterFunction)
1269           addUnwrappedLine();
1270         FormatTok->Type = TT_FunctionLBrace;
1271         parseBlock(/*MustBeDeclaration=*/false);
1272         addUnwrappedLine();
1273         return;
1274       }
1275       // Otherwise this was a braced init list, and the structural
1276       // element continues.
1277       break;
1278     case tok::kw_try:
1279       // We arrive here when parsing function-try blocks.
1280       if (Style.BraceWrapping.AfterFunction)
1281         addUnwrappedLine();
1282       parseTryCatch();
1283       return;
1284     case tok::identifier: {
1285       if (FormatTok->is(TT_MacroBlockEnd)) {
1286         addUnwrappedLine();
1287         return;
1288       }
1289 
1290       // Function declarations (as opposed to function expressions) are parsed
1291       // on their own unwrapped line by continuing this loop. Function
1292       // expressions (functions that are not on their own line) must not create
1293       // a new unwrapped line, so they are special cased below.
1294       size_t TokenCount = Line->Tokens.size();
1295       if (Style.Language == FormatStyle::LK_JavaScript &&
1296           FormatTok->is(Keywords.kw_function) &&
1297           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1298                                                      Keywords.kw_async)))) {
1299         tryToParseJSFunction();
1300         break;
1301       }
1302       if ((Style.Language == FormatStyle::LK_JavaScript ||
1303            Style.Language == FormatStyle::LK_Java) &&
1304           FormatTok->is(Keywords.kw_interface)) {
1305         if (Style.Language == FormatStyle::LK_JavaScript) {
1306           // In JavaScript/TypeScript, "interface" can be used as a standalone
1307           // identifier, e.g. in `var interface = 1;`. If "interface" is
1308           // followed by another identifier, it is very like to be an actual
1309           // interface declaration.
1310           unsigned StoredPosition = Tokens->getPosition();
1311           FormatToken *Next = Tokens->getNextToken();
1312           FormatTok = Tokens->setPosition(StoredPosition);
1313           if (Next && !mustBeJSIdent(Keywords, Next)) {
1314             nextToken();
1315             break;
1316           }
1317         }
1318         parseRecord();
1319         addUnwrappedLine();
1320         return;
1321       }
1322 
1323       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1324         parseStatementMacro();
1325         return;
1326       }
1327 
1328       // See if the following token should start a new unwrapped line.
1329       StringRef Text = FormatTok->TokenText;
1330       nextToken();
1331       if (Line->Tokens.size() == 1 &&
1332           // JS doesn't have macros, and within classes colons indicate fields,
1333           // not labels.
1334           Style.Language != FormatStyle::LK_JavaScript) {
1335         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1336           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1337           parseLabel();
1338           return;
1339         }
1340         // Recognize function-like macro usages without trailing semicolon as
1341         // well as free-standing macros like Q_OBJECT.
1342         bool FunctionLike = FormatTok->is(tok::l_paren);
1343         if (FunctionLike)
1344           parseParens();
1345 
1346         bool FollowedByNewline =
1347             CommentsBeforeNextToken.empty()
1348                 ? FormatTok->NewlinesBefore > 0
1349                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1350 
1351         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1352             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1353           addUnwrappedLine();
1354           return;
1355         }
1356       }
1357       break;
1358     }
1359     case tok::equal:
1360       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1361       // TT_JsFatArrow. The always start an expression or a child block if
1362       // followed by a curly.
1363       if (FormatTok->is(TT_JsFatArrow)) {
1364         nextToken();
1365         if (FormatTok->is(tok::l_brace))
1366           parseChildBlock();
1367         break;
1368       }
1369 
1370       nextToken();
1371       if (FormatTok->Tok.is(tok::l_brace)) {
1372         nextToken();
1373         parseBracedList();
1374       } else if (Style.Language == FormatStyle::LK_Proto &&
1375                  FormatTok->Tok.is(tok::less)) {
1376         nextToken();
1377         parseBracedList(/*ContinueOnSemicolons=*/false,
1378                         /*ClosingBraceKind=*/tok::greater);
1379       }
1380       break;
1381     case tok::l_square:
1382       parseSquare();
1383       break;
1384     case tok::kw_new:
1385       parseNew();
1386       break;
1387     default:
1388       nextToken();
1389       break;
1390     }
1391   } while (!eof());
1392 }
1393 
1394 bool UnwrappedLineParser::tryToParseLambda() {
1395   if (!Style.isCpp()) {
1396     nextToken();
1397     return false;
1398   }
1399   assert(FormatTok->is(tok::l_square));
1400   FormatToken &LSquare = *FormatTok;
1401   if (!tryToParseLambdaIntroducer())
1402     return false;
1403 
1404   bool SeenArrow = false;
1405 
1406   while (FormatTok->isNot(tok::l_brace)) {
1407     if (FormatTok->isSimpleTypeSpecifier()) {
1408       nextToken();
1409       continue;
1410     }
1411     switch (FormatTok->Tok.getKind()) {
1412     case tok::l_brace:
1413       break;
1414     case tok::l_paren:
1415       parseParens();
1416       break;
1417     case tok::amp:
1418     case tok::star:
1419     case tok::kw_const:
1420     case tok::comma:
1421     case tok::less:
1422     case tok::greater:
1423     case tok::identifier:
1424     case tok::numeric_constant:
1425     case tok::coloncolon:
1426     case tok::kw_mutable:
1427     case tok::kw_noexcept:
1428       nextToken();
1429       break;
1430     // Specialization of a template with an integer parameter can contain
1431     // arithmetic, logical, comparison and ternary operators.
1432     //
1433     // FIXME: This also accepts sequences of operators that are not in the scope
1434     // of a template argument list.
1435     //
1436     // In a C++ lambda a template type can only occur after an arrow. We use
1437     // this as an heuristic to distinguish between Objective-C expressions
1438     // followed by an `a->b` expression, such as:
1439     // ([obj func:arg] + a->b)
1440     // Otherwise the code below would parse as a lambda.
1441     case tok::plus:
1442     case tok::minus:
1443     case tok::exclaim:
1444     case tok::tilde:
1445     case tok::slash:
1446     case tok::percent:
1447     case tok::lessless:
1448     case tok::pipe:
1449     case tok::pipepipe:
1450     case tok::ampamp:
1451     case tok::caret:
1452     case tok::equalequal:
1453     case tok::exclaimequal:
1454     case tok::greaterequal:
1455     case tok::lessequal:
1456     case tok::question:
1457     case tok::colon:
1458     case tok::kw_true:
1459     case tok::kw_false:
1460       if (SeenArrow) {
1461         nextToken();
1462         break;
1463       }
1464       return true;
1465     case tok::arrow:
1466       // This might or might not actually be a lambda arrow (this could be an
1467       // ObjC method invocation followed by a dereferencing arrow). We might
1468       // reset this back to TT_Unknown in TokenAnnotator.
1469       FormatTok->Type = TT_LambdaArrow;
1470       SeenArrow = true;
1471       nextToken();
1472       break;
1473     default:
1474       return true;
1475     }
1476   }
1477   LSquare.Type = TT_LambdaLSquare;
1478   parseChildBlock();
1479   return true;
1480 }
1481 
1482 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1483   const FormatToken *Previous = FormatTok->Previous;
1484   if (Previous &&
1485       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1486                          tok::kw_delete, tok::l_square) ||
1487        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1488        Previous->isSimpleTypeSpecifier())) {
1489     nextToken();
1490     return false;
1491   }
1492   nextToken();
1493   if (FormatTok->is(tok::l_square)) {
1494     return false;
1495   }
1496   parseSquare(/*LambdaIntroducer=*/true);
1497   return true;
1498 }
1499 
1500 void UnwrappedLineParser::tryToParseJSFunction() {
1501   assert(FormatTok->is(Keywords.kw_function) ||
1502          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1503   if (FormatTok->is(Keywords.kw_async))
1504     nextToken();
1505   // Consume "function".
1506   nextToken();
1507 
1508   // Consume * (generator function). Treat it like C++'s overloaded operators.
1509   if (FormatTok->is(tok::star)) {
1510     FormatTok->Type = TT_OverloadedOperator;
1511     nextToken();
1512   }
1513 
1514   // Consume function name.
1515   if (FormatTok->is(tok::identifier))
1516     nextToken();
1517 
1518   if (FormatTok->isNot(tok::l_paren))
1519     return;
1520 
1521   // Parse formal parameter list.
1522   parseParens();
1523 
1524   if (FormatTok->is(tok::colon)) {
1525     // Parse a type definition.
1526     nextToken();
1527 
1528     // Eat the type declaration. For braced inline object types, balance braces,
1529     // otherwise just parse until finding an l_brace for the function body.
1530     if (FormatTok->is(tok::l_brace))
1531       tryToParseBracedList();
1532     else
1533       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1534         nextToken();
1535   }
1536 
1537   if (FormatTok->is(tok::semi))
1538     return;
1539 
1540   parseChildBlock();
1541 }
1542 
1543 bool UnwrappedLineParser::tryToParseBracedList() {
1544   if (FormatTok->BlockKind == BK_Unknown)
1545     calculateBraceTypes();
1546   assert(FormatTok->BlockKind != BK_Unknown);
1547   if (FormatTok->BlockKind == BK_Block)
1548     return false;
1549   nextToken();
1550   parseBracedList();
1551   return true;
1552 }
1553 
1554 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1555                                           tok::TokenKind ClosingBraceKind) {
1556   bool HasError = false;
1557 
1558   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1559   // replace this by using parseAssigmentExpression() inside.
1560   do {
1561     if (Style.Language == FormatStyle::LK_JavaScript) {
1562       if (FormatTok->is(Keywords.kw_function) ||
1563           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1564         tryToParseJSFunction();
1565         continue;
1566       }
1567       if (FormatTok->is(TT_JsFatArrow)) {
1568         nextToken();
1569         // Fat arrows can be followed by simple expressions or by child blocks
1570         // in curly braces.
1571         if (FormatTok->is(tok::l_brace)) {
1572           parseChildBlock();
1573           continue;
1574         }
1575       }
1576       if (FormatTok->is(tok::l_brace)) {
1577         // Could be a method inside of a braced list `{a() { return 1; }}`.
1578         if (tryToParseBracedList())
1579           continue;
1580         parseChildBlock();
1581       }
1582     }
1583     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1584       nextToken();
1585       return !HasError;
1586     }
1587     switch (FormatTok->Tok.getKind()) {
1588     case tok::caret:
1589       nextToken();
1590       if (FormatTok->is(tok::l_brace)) {
1591         parseChildBlock();
1592       }
1593       break;
1594     case tok::l_square:
1595       tryToParseLambda();
1596       break;
1597     case tok::l_paren:
1598       parseParens();
1599       // JavaScript can just have free standing methods and getters/setters in
1600       // object literals. Detect them by a "{" following ")".
1601       if (Style.Language == FormatStyle::LK_JavaScript) {
1602         if (FormatTok->is(tok::l_brace))
1603           parseChildBlock();
1604         break;
1605       }
1606       break;
1607     case tok::l_brace:
1608       // Assume there are no blocks inside a braced init list apart
1609       // from the ones we explicitly parse out (like lambdas).
1610       FormatTok->BlockKind = BK_BracedInit;
1611       nextToken();
1612       parseBracedList();
1613       break;
1614     case tok::less:
1615       if (Style.Language == FormatStyle::LK_Proto) {
1616         nextToken();
1617         parseBracedList(/*ContinueOnSemicolons=*/false,
1618                         /*ClosingBraceKind=*/tok::greater);
1619       } else {
1620         nextToken();
1621       }
1622       break;
1623     case tok::semi:
1624       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1625       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1626       // used for error recovery if we have otherwise determined that this is
1627       // a braced list.
1628       if (Style.Language == FormatStyle::LK_JavaScript) {
1629         nextToken();
1630         break;
1631       }
1632       HasError = true;
1633       if (!ContinueOnSemicolons)
1634         return !HasError;
1635       nextToken();
1636       break;
1637     case tok::comma:
1638       nextToken();
1639       break;
1640     default:
1641       nextToken();
1642       break;
1643     }
1644   } while (!eof());
1645   return false;
1646 }
1647 
1648 void UnwrappedLineParser::parseParens() {
1649   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1650   nextToken();
1651   do {
1652     switch (FormatTok->Tok.getKind()) {
1653     case tok::l_paren:
1654       parseParens();
1655       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1656         parseChildBlock();
1657       break;
1658     case tok::r_paren:
1659       nextToken();
1660       return;
1661     case tok::r_brace:
1662       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1663       return;
1664     case tok::l_square:
1665       tryToParseLambda();
1666       break;
1667     case tok::l_brace:
1668       if (!tryToParseBracedList())
1669         parseChildBlock();
1670       break;
1671     case tok::at:
1672       nextToken();
1673       if (FormatTok->Tok.is(tok::l_brace)) {
1674         nextToken();
1675         parseBracedList();
1676       }
1677       break;
1678     case tok::kw_class:
1679       if (Style.Language == FormatStyle::LK_JavaScript)
1680         parseRecord(/*ParseAsExpr=*/true);
1681       else
1682         nextToken();
1683       break;
1684     case tok::identifier:
1685       if (Style.Language == FormatStyle::LK_JavaScript &&
1686           (FormatTok->is(Keywords.kw_function) ||
1687            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1688         tryToParseJSFunction();
1689       else
1690         nextToken();
1691       break;
1692     default:
1693       nextToken();
1694       break;
1695     }
1696   } while (!eof());
1697 }
1698 
1699 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1700   if (!LambdaIntroducer) {
1701     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1702     if (tryToParseLambda())
1703       return;
1704   }
1705   do {
1706     switch (FormatTok->Tok.getKind()) {
1707     case tok::l_paren:
1708       parseParens();
1709       break;
1710     case tok::r_square:
1711       nextToken();
1712       return;
1713     case tok::r_brace:
1714       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1715       return;
1716     case tok::l_square:
1717       parseSquare();
1718       break;
1719     case tok::l_brace: {
1720       if (!tryToParseBracedList())
1721         parseChildBlock();
1722       break;
1723     }
1724     case tok::at:
1725       nextToken();
1726       if (FormatTok->Tok.is(tok::l_brace)) {
1727         nextToken();
1728         parseBracedList();
1729       }
1730       break;
1731     default:
1732       nextToken();
1733       break;
1734     }
1735   } while (!eof());
1736 }
1737 
1738 void UnwrappedLineParser::parseIfThenElse() {
1739   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1740   nextToken();
1741   if (FormatTok->Tok.is(tok::kw_constexpr))
1742     nextToken();
1743   if (FormatTok->Tok.is(tok::l_paren))
1744     parseParens();
1745   bool NeedsUnwrappedLine = false;
1746   if (FormatTok->Tok.is(tok::l_brace)) {
1747     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1748     parseBlock(/*MustBeDeclaration=*/false);
1749     if (Style.BraceWrapping.BeforeElse)
1750       addUnwrappedLine();
1751     else
1752       NeedsUnwrappedLine = true;
1753   } else {
1754     addUnwrappedLine();
1755     ++Line->Level;
1756     parseStructuralElement();
1757     --Line->Level;
1758   }
1759   if (FormatTok->Tok.is(tok::kw_else)) {
1760     nextToken();
1761     if (FormatTok->Tok.is(tok::l_brace)) {
1762       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1763       parseBlock(/*MustBeDeclaration=*/false);
1764       addUnwrappedLine();
1765     } else if (FormatTok->Tok.is(tok::kw_if)) {
1766       parseIfThenElse();
1767     } else {
1768       addUnwrappedLine();
1769       ++Line->Level;
1770       parseStructuralElement();
1771       if (FormatTok->is(tok::eof))
1772         addUnwrappedLine();
1773       --Line->Level;
1774     }
1775   } else if (NeedsUnwrappedLine) {
1776     addUnwrappedLine();
1777   }
1778 }
1779 
1780 void UnwrappedLineParser::parseTryCatch() {
1781   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1782   nextToken();
1783   bool NeedsUnwrappedLine = false;
1784   if (FormatTok->is(tok::colon)) {
1785     // We are in a function try block, what comes is an initializer list.
1786     nextToken();
1787     while (FormatTok->is(tok::identifier)) {
1788       nextToken();
1789       if (FormatTok->is(tok::l_paren))
1790         parseParens();
1791       if (FormatTok->is(tok::comma))
1792         nextToken();
1793     }
1794   }
1795   // Parse try with resource.
1796   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1797     parseParens();
1798   }
1799   if (FormatTok->is(tok::l_brace)) {
1800     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1801     parseBlock(/*MustBeDeclaration=*/false);
1802     if (Style.BraceWrapping.BeforeCatch) {
1803       addUnwrappedLine();
1804     } else {
1805       NeedsUnwrappedLine = true;
1806     }
1807   } else if (!FormatTok->is(tok::kw_catch)) {
1808     // The C++ standard requires a compound-statement after a try.
1809     // If there's none, we try to assume there's a structuralElement
1810     // and try to continue.
1811     addUnwrappedLine();
1812     ++Line->Level;
1813     parseStructuralElement();
1814     --Line->Level;
1815   }
1816   while (1) {
1817     if (FormatTok->is(tok::at))
1818       nextToken();
1819     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1820                              tok::kw___finally) ||
1821           ((Style.Language == FormatStyle::LK_Java ||
1822             Style.Language == FormatStyle::LK_JavaScript) &&
1823            FormatTok->is(Keywords.kw_finally)) ||
1824           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1825            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1826       break;
1827     nextToken();
1828     while (FormatTok->isNot(tok::l_brace)) {
1829       if (FormatTok->is(tok::l_paren)) {
1830         parseParens();
1831         continue;
1832       }
1833       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1834         return;
1835       nextToken();
1836     }
1837     NeedsUnwrappedLine = false;
1838     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1839     parseBlock(/*MustBeDeclaration=*/false);
1840     if (Style.BraceWrapping.BeforeCatch)
1841       addUnwrappedLine();
1842     else
1843       NeedsUnwrappedLine = true;
1844   }
1845   if (NeedsUnwrappedLine)
1846     addUnwrappedLine();
1847 }
1848 
1849 void UnwrappedLineParser::parseNamespace() {
1850   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1851 
1852   const FormatToken &InitialToken = *FormatTok;
1853   nextToken();
1854   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1855     nextToken();
1856   if (FormatTok->Tok.is(tok::l_brace)) {
1857     if (ShouldBreakBeforeBrace(Style, InitialToken))
1858       addUnwrappedLine();
1859 
1860     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1861                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1862                      DeclarationScopeStack.size() > 1);
1863     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1864     // Munch the semicolon after a namespace. This is more common than one would
1865     // think. Puttin the semicolon into its own line is very ugly.
1866     if (FormatTok->Tok.is(tok::semi))
1867       nextToken();
1868     addUnwrappedLine();
1869   }
1870   // FIXME: Add error handling.
1871 }
1872 
1873 void UnwrappedLineParser::parseNew() {
1874   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1875   nextToken();
1876   if (Style.Language != FormatStyle::LK_Java)
1877     return;
1878 
1879   // In Java, we can parse everything up to the parens, which aren't optional.
1880   do {
1881     // There should not be a ;, { or } before the new's open paren.
1882     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1883       return;
1884 
1885     // Consume the parens.
1886     if (FormatTok->is(tok::l_paren)) {
1887       parseParens();
1888 
1889       // If there is a class body of an anonymous class, consume that as child.
1890       if (FormatTok->is(tok::l_brace))
1891         parseChildBlock();
1892       return;
1893     }
1894     nextToken();
1895   } while (!eof());
1896 }
1897 
1898 void UnwrappedLineParser::parseForOrWhileLoop() {
1899   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1900          "'for', 'while' or foreach macro expected");
1901   nextToken();
1902   // JS' for await ( ...
1903   if (Style.Language == FormatStyle::LK_JavaScript &&
1904       FormatTok->is(Keywords.kw_await))
1905     nextToken();
1906   if (FormatTok->Tok.is(tok::l_paren))
1907     parseParens();
1908   if (FormatTok->Tok.is(tok::l_brace)) {
1909     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1910     parseBlock(/*MustBeDeclaration=*/false);
1911     addUnwrappedLine();
1912   } else {
1913     addUnwrappedLine();
1914     ++Line->Level;
1915     parseStructuralElement();
1916     --Line->Level;
1917   }
1918 }
1919 
1920 void UnwrappedLineParser::parseDoWhile() {
1921   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1922   nextToken();
1923   if (FormatTok->Tok.is(tok::l_brace)) {
1924     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1925     parseBlock(/*MustBeDeclaration=*/false);
1926     if (Style.BraceWrapping.IndentBraces)
1927       addUnwrappedLine();
1928   } else {
1929     addUnwrappedLine();
1930     ++Line->Level;
1931     parseStructuralElement();
1932     --Line->Level;
1933   }
1934 
1935   // FIXME: Add error handling.
1936   if (!FormatTok->Tok.is(tok::kw_while)) {
1937     addUnwrappedLine();
1938     return;
1939   }
1940 
1941   nextToken();
1942   parseStructuralElement();
1943 }
1944 
1945 void UnwrappedLineParser::parseLabel() {
1946   nextToken();
1947   unsigned OldLineLevel = Line->Level;
1948   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1949     --Line->Level;
1950   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1951     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1952     parseBlock(/*MustBeDeclaration=*/false);
1953     if (FormatTok->Tok.is(tok::kw_break)) {
1954       if (Style.BraceWrapping.AfterControlStatement)
1955         addUnwrappedLine();
1956       parseStructuralElement();
1957     }
1958     addUnwrappedLine();
1959   } else {
1960     if (FormatTok->is(tok::semi))
1961       nextToken();
1962     addUnwrappedLine();
1963   }
1964   Line->Level = OldLineLevel;
1965   if (FormatTok->isNot(tok::l_brace)) {
1966     parseStructuralElement();
1967     addUnwrappedLine();
1968   }
1969 }
1970 
1971 void UnwrappedLineParser::parseCaseLabel() {
1972   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1973   // FIXME: fix handling of complex expressions here.
1974   do {
1975     nextToken();
1976   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1977   parseLabel();
1978 }
1979 
1980 void UnwrappedLineParser::parseSwitch() {
1981   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1982   nextToken();
1983   if (FormatTok->Tok.is(tok::l_paren))
1984     parseParens();
1985   if (FormatTok->Tok.is(tok::l_brace)) {
1986     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1987     parseBlock(/*MustBeDeclaration=*/false);
1988     addUnwrappedLine();
1989   } else {
1990     addUnwrappedLine();
1991     ++Line->Level;
1992     parseStructuralElement();
1993     --Line->Level;
1994   }
1995 }
1996 
1997 void UnwrappedLineParser::parseAccessSpecifier() {
1998   nextToken();
1999   // Understand Qt's slots.
2000   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2001     nextToken();
2002   // Otherwise, we don't know what it is, and we'd better keep the next token.
2003   if (FormatTok->Tok.is(tok::colon))
2004     nextToken();
2005   addUnwrappedLine();
2006 }
2007 
2008 bool UnwrappedLineParser::parseEnum() {
2009   // Won't be 'enum' for NS_ENUMs.
2010   if (FormatTok->Tok.is(tok::kw_enum))
2011     nextToken();
2012 
2013   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2014   // declarations. An "enum" keyword followed by a colon would be a syntax
2015   // error and thus assume it is just an identifier.
2016   if (Style.Language == FormatStyle::LK_JavaScript &&
2017       FormatTok->isOneOf(tok::colon, tok::question))
2018     return false;
2019 
2020   // Eat up enum class ...
2021   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2022     nextToken();
2023 
2024   while (FormatTok->Tok.getIdentifierInfo() ||
2025          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2026                             tok::greater, tok::comma, tok::question)) {
2027     nextToken();
2028     // We can have macros or attributes in between 'enum' and the enum name.
2029     if (FormatTok->is(tok::l_paren))
2030       parseParens();
2031     if (FormatTok->is(tok::identifier)) {
2032       nextToken();
2033       // If there are two identifiers in a row, this is likely an elaborate
2034       // return type. In Java, this can be "implements", etc.
2035       if (Style.isCpp() && FormatTok->is(tok::identifier))
2036         return false;
2037     }
2038   }
2039 
2040   // Just a declaration or something is wrong.
2041   if (FormatTok->isNot(tok::l_brace))
2042     return true;
2043   FormatTok->BlockKind = BK_Block;
2044 
2045   if (Style.Language == FormatStyle::LK_Java) {
2046     // Java enums are different.
2047     parseJavaEnumBody();
2048     return true;
2049   }
2050   if (Style.Language == FormatStyle::LK_Proto) {
2051     parseBlock(/*MustBeDeclaration=*/true);
2052     return true;
2053   }
2054 
2055   // Parse enum body.
2056   nextToken();
2057   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2058   if (HasError) {
2059     if (FormatTok->is(tok::semi))
2060       nextToken();
2061     addUnwrappedLine();
2062   }
2063   return true;
2064 
2065   // There is no addUnwrappedLine() here so that we fall through to parsing a
2066   // structural element afterwards. Thus, in "enum A {} n, m;",
2067   // "} n, m;" will end up in one unwrapped line.
2068 }
2069 
2070 void UnwrappedLineParser::parseJavaEnumBody() {
2071   // Determine whether the enum is simple, i.e. does not have a semicolon or
2072   // constants with class bodies. Simple enums can be formatted like braced
2073   // lists, contracted to a single line, etc.
2074   unsigned StoredPosition = Tokens->getPosition();
2075   bool IsSimple = true;
2076   FormatToken *Tok = Tokens->getNextToken();
2077   while (Tok) {
2078     if (Tok->is(tok::r_brace))
2079       break;
2080     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2081       IsSimple = false;
2082       break;
2083     }
2084     // FIXME: This will also mark enums with braces in the arguments to enum
2085     // constants as "not simple". This is probably fine in practice, though.
2086     Tok = Tokens->getNextToken();
2087   }
2088   FormatTok = Tokens->setPosition(StoredPosition);
2089 
2090   if (IsSimple) {
2091     nextToken();
2092     parseBracedList();
2093     addUnwrappedLine();
2094     return;
2095   }
2096 
2097   // Parse the body of a more complex enum.
2098   // First add a line for everything up to the "{".
2099   nextToken();
2100   addUnwrappedLine();
2101   ++Line->Level;
2102 
2103   // Parse the enum constants.
2104   while (FormatTok) {
2105     if (FormatTok->is(tok::l_brace)) {
2106       // Parse the constant's class body.
2107       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2108                  /*MunchSemi=*/false);
2109     } else if (FormatTok->is(tok::l_paren)) {
2110       parseParens();
2111     } else if (FormatTok->is(tok::comma)) {
2112       nextToken();
2113       addUnwrappedLine();
2114     } else if (FormatTok->is(tok::semi)) {
2115       nextToken();
2116       addUnwrappedLine();
2117       break;
2118     } else if (FormatTok->is(tok::r_brace)) {
2119       addUnwrappedLine();
2120       break;
2121     } else {
2122       nextToken();
2123     }
2124   }
2125 
2126   // Parse the class body after the enum's ";" if any.
2127   parseLevel(/*HasOpeningBrace=*/true);
2128   nextToken();
2129   --Line->Level;
2130   addUnwrappedLine();
2131 }
2132 
2133 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2134   const FormatToken &InitialToken = *FormatTok;
2135   nextToken();
2136 
2137   // The actual identifier can be a nested name specifier, and in macros
2138   // it is often token-pasted.
2139   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2140                             tok::kw___attribute, tok::kw___declspec,
2141                             tok::kw_alignas) ||
2142          ((Style.Language == FormatStyle::LK_Java ||
2143            Style.Language == FormatStyle::LK_JavaScript) &&
2144           FormatTok->isOneOf(tok::period, tok::comma))) {
2145     if (Style.Language == FormatStyle::LK_JavaScript &&
2146         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2147       // JavaScript/TypeScript supports inline object types in
2148       // extends/implements positions:
2149       //     class Foo implements {bar: number} { }
2150       nextToken();
2151       if (FormatTok->is(tok::l_brace)) {
2152         tryToParseBracedList();
2153         continue;
2154       }
2155     }
2156     bool IsNonMacroIdentifier =
2157         FormatTok->is(tok::identifier) &&
2158         FormatTok->TokenText != FormatTok->TokenText.upper();
2159     nextToken();
2160     // We can have macros or attributes in between 'class' and the class name.
2161     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2162       parseParens();
2163   }
2164 
2165   // Note that parsing away template declarations here leads to incorrectly
2166   // accepting function declarations as record declarations.
2167   // In general, we cannot solve this problem. Consider:
2168   // class A<int> B() {}
2169   // which can be a function definition or a class definition when B() is a
2170   // macro. If we find enough real-world cases where this is a problem, we
2171   // can parse for the 'template' keyword in the beginning of the statement,
2172   // and thus rule out the record production in case there is no template
2173   // (this would still leave us with an ambiguity between template function
2174   // and class declarations).
2175   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2176     while (!eof()) {
2177       if (FormatTok->is(tok::l_brace)) {
2178         calculateBraceTypes(/*ExpectClassBody=*/true);
2179         if (!tryToParseBracedList())
2180           break;
2181       }
2182       if (FormatTok->Tok.is(tok::semi))
2183         return;
2184       nextToken();
2185     }
2186   }
2187   if (FormatTok->Tok.is(tok::l_brace)) {
2188     if (ParseAsExpr) {
2189       parseChildBlock();
2190     } else {
2191       if (ShouldBreakBeforeBrace(Style, InitialToken))
2192         addUnwrappedLine();
2193 
2194       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2195                  /*MunchSemi=*/false);
2196     }
2197   }
2198   // There is no addUnwrappedLine() here so that we fall through to parsing a
2199   // structural element afterwards. Thus, in "class A {} n, m;",
2200   // "} n, m;" will end up in one unwrapped line.
2201 }
2202 
2203 void UnwrappedLineParser::parseObjCMethod() {
2204   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2205          "'(' or identifier expected.");
2206   do {
2207     if (FormatTok->Tok.is(tok::semi)) {
2208       nextToken();
2209       addUnwrappedLine();
2210       return;
2211     } else if (FormatTok->Tok.is(tok::l_brace)) {
2212       if (Style.BraceWrapping.AfterFunction)
2213         addUnwrappedLine();
2214       parseBlock(/*MustBeDeclaration=*/false);
2215       addUnwrappedLine();
2216       return;
2217     } else {
2218       nextToken();
2219     }
2220   } while (!eof());
2221 }
2222 
2223 void UnwrappedLineParser::parseObjCProtocolList() {
2224   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2225   do {
2226     nextToken();
2227     // Early exit in case someone forgot a close angle.
2228     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2229         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2230       return;
2231   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2232   nextToken(); // Skip '>'.
2233 }
2234 
2235 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2236   do {
2237     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2238       nextToken();
2239       addUnwrappedLine();
2240       break;
2241     }
2242     if (FormatTok->is(tok::l_brace)) {
2243       parseBlock(/*MustBeDeclaration=*/false);
2244       // In ObjC interfaces, nothing should be following the "}".
2245       addUnwrappedLine();
2246     } else if (FormatTok->is(tok::r_brace)) {
2247       // Ignore stray "}". parseStructuralElement doesn't consume them.
2248       nextToken();
2249       addUnwrappedLine();
2250     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2251       nextToken();
2252       parseObjCMethod();
2253     } else {
2254       parseStructuralElement();
2255     }
2256   } while (!eof());
2257 }
2258 
2259 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2260   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2261          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2262   nextToken();
2263   nextToken(); // interface name
2264 
2265   // @interface can be followed by a lightweight generic
2266   // specialization list, then either a base class or a category.
2267   if (FormatTok->Tok.is(tok::less)) {
2268     // Unlike protocol lists, generic parameterizations support
2269     // nested angles:
2270     //
2271     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2272     //     NSObject <NSCopying, NSSecureCoding>
2273     //
2274     // so we need to count how many open angles we have left.
2275     unsigned NumOpenAngles = 1;
2276     do {
2277       nextToken();
2278       // Early exit in case someone forgot a close angle.
2279       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2280           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2281         break;
2282       if (FormatTok->Tok.is(tok::less))
2283         ++NumOpenAngles;
2284       else if (FormatTok->Tok.is(tok::greater)) {
2285         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2286         --NumOpenAngles;
2287       }
2288     } while (!eof() && NumOpenAngles != 0);
2289     nextToken(); // Skip '>'.
2290   }
2291   if (FormatTok->Tok.is(tok::colon)) {
2292     nextToken();
2293     nextToken(); // base class name
2294   } else if (FormatTok->Tok.is(tok::l_paren))
2295     // Skip category, if present.
2296     parseParens();
2297 
2298   if (FormatTok->Tok.is(tok::less))
2299     parseObjCProtocolList();
2300 
2301   if (FormatTok->Tok.is(tok::l_brace)) {
2302     if (Style.BraceWrapping.AfterObjCDeclaration)
2303       addUnwrappedLine();
2304     parseBlock(/*MustBeDeclaration=*/true);
2305   }
2306 
2307   // With instance variables, this puts '}' on its own line.  Without instance
2308   // variables, this ends the @interface line.
2309   addUnwrappedLine();
2310 
2311   parseObjCUntilAtEnd();
2312 }
2313 
2314 // Returns true for the declaration/definition form of @protocol,
2315 // false for the expression form.
2316 bool UnwrappedLineParser::parseObjCProtocol() {
2317   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2318   nextToken();
2319 
2320   if (FormatTok->is(tok::l_paren))
2321     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2322     return false;
2323 
2324   // The definition/declaration form,
2325   // @protocol Foo
2326   // - (int)someMethod;
2327   // @end
2328 
2329   nextToken(); // protocol name
2330 
2331   if (FormatTok->Tok.is(tok::less))
2332     parseObjCProtocolList();
2333 
2334   // Check for protocol declaration.
2335   if (FormatTok->Tok.is(tok::semi)) {
2336     nextToken();
2337     addUnwrappedLine();
2338     return true;
2339   }
2340 
2341   addUnwrappedLine();
2342   parseObjCUntilAtEnd();
2343   return true;
2344 }
2345 
2346 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2347   bool IsImport = FormatTok->is(Keywords.kw_import);
2348   assert(IsImport || FormatTok->is(tok::kw_export));
2349   nextToken();
2350 
2351   // Consume the "default" in "export default class/function".
2352   if (FormatTok->is(tok::kw_default))
2353     nextToken();
2354 
2355   // Consume "async function", "function" and "default function", so that these
2356   // get parsed as free-standing JS functions, i.e. do not require a trailing
2357   // semicolon.
2358   if (FormatTok->is(Keywords.kw_async))
2359     nextToken();
2360   if (FormatTok->is(Keywords.kw_function)) {
2361     nextToken();
2362     return;
2363   }
2364 
2365   // For imports, `export *`, `export {...}`, consume the rest of the line up
2366   // to the terminating `;`. For everything else, just return and continue
2367   // parsing the structural element, i.e. the declaration or expression for
2368   // `export default`.
2369   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2370       !FormatTok->isStringLiteral())
2371     return;
2372 
2373   while (!eof()) {
2374     if (FormatTok->is(tok::semi))
2375       return;
2376     if (Line->Tokens.empty()) {
2377       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2378       // import statement should terminate.
2379       return;
2380     }
2381     if (FormatTok->is(tok::l_brace)) {
2382       FormatTok->BlockKind = BK_Block;
2383       nextToken();
2384       parseBracedList();
2385     } else {
2386       nextToken();
2387     }
2388   }
2389 }
2390 
2391 void UnwrappedLineParser::parseStatementMacro() {
2392   nextToken();
2393   if (FormatTok->is(tok::l_paren))
2394     parseParens();
2395   if (FormatTok->is(tok::semi))
2396     nextToken();
2397   addUnwrappedLine();
2398 }
2399 
2400 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2401                                                  StringRef Prefix = "") {
2402   llvm::dbgs() << Prefix << "Line(" << Line.Level
2403                << ", FSC=" << Line.FirstStartColumn << ")"
2404                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2405   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2406                                                     E = Line.Tokens.end();
2407        I != E; ++I) {
2408     llvm::dbgs() << I->Tok->Tok.getName() << "["
2409                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2410                  << "] ";
2411   }
2412   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2413                                                     E = Line.Tokens.end();
2414        I != E; ++I) {
2415     const UnwrappedLineNode &Node = *I;
2416     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2417              I = Node.Children.begin(),
2418              E = Node.Children.end();
2419          I != E; ++I) {
2420       printDebugInfo(*I, "\nChild: ");
2421     }
2422   }
2423   llvm::dbgs() << "\n";
2424 }
2425 
2426 void UnwrappedLineParser::addUnwrappedLine() {
2427   if (Line->Tokens.empty())
2428     return;
2429   LLVM_DEBUG({
2430     if (CurrentLines == &Lines)
2431       printDebugInfo(*Line);
2432   });
2433   CurrentLines->push_back(std::move(*Line));
2434   Line->Tokens.clear();
2435   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2436   Line->FirstStartColumn = 0;
2437   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2438     CurrentLines->append(
2439         std::make_move_iterator(PreprocessorDirectives.begin()),
2440         std::make_move_iterator(PreprocessorDirectives.end()));
2441     PreprocessorDirectives.clear();
2442   }
2443   // Disconnect the current token from the last token on the previous line.
2444   FormatTok->Previous = nullptr;
2445 }
2446 
2447 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2448 
2449 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2450   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2451          FormatTok.NewlinesBefore > 0;
2452 }
2453 
2454 // Checks if \p FormatTok is a line comment that continues the line comment
2455 // section on \p Line.
2456 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2457                                         const UnwrappedLine &Line,
2458                                         llvm::Regex &CommentPragmasRegex) {
2459   if (Line.Tokens.empty())
2460     return false;
2461 
2462   StringRef IndentContent = FormatTok.TokenText;
2463   if (FormatTok.TokenText.startswith("//") ||
2464       FormatTok.TokenText.startswith("/*"))
2465     IndentContent = FormatTok.TokenText.substr(2);
2466   if (CommentPragmasRegex.match(IndentContent))
2467     return false;
2468 
2469   // If Line starts with a line comment, then FormatTok continues the comment
2470   // section if its original column is greater or equal to the original start
2471   // column of the line.
2472   //
2473   // Define the min column token of a line as follows: if a line ends in '{' or
2474   // contains a '{' followed by a line comment, then the min column token is
2475   // that '{'. Otherwise, the min column token of the line is the first token of
2476   // the line.
2477   //
2478   // If Line starts with a token other than a line comment, then FormatTok
2479   // continues the comment section if its original column is greater than the
2480   // original start column of the min column token of the line.
2481   //
2482   // For example, the second line comment continues the first in these cases:
2483   //
2484   // // first line
2485   // // second line
2486   //
2487   // and:
2488   //
2489   // // first line
2490   //  // second line
2491   //
2492   // and:
2493   //
2494   // int i; // first line
2495   //  // second line
2496   //
2497   // and:
2498   //
2499   // do { // first line
2500   //      // second line
2501   //   int i;
2502   // } while (true);
2503   //
2504   // and:
2505   //
2506   // enum {
2507   //   a, // first line
2508   //    // second line
2509   //   b
2510   // };
2511   //
2512   // The second line comment doesn't continue the first in these cases:
2513   //
2514   //   // first line
2515   //  // second line
2516   //
2517   // and:
2518   //
2519   // int i; // first line
2520   // // second line
2521   //
2522   // and:
2523   //
2524   // do { // first line
2525   //   // second line
2526   //   int i;
2527   // } while (true);
2528   //
2529   // and:
2530   //
2531   // enum {
2532   //   a, // first line
2533   //   // second line
2534   // };
2535   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2536 
2537   // Scan for '{//'. If found, use the column of '{' as a min column for line
2538   // comment section continuation.
2539   const FormatToken *PreviousToken = nullptr;
2540   for (const UnwrappedLineNode &Node : Line.Tokens) {
2541     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2542         isLineComment(*Node.Tok)) {
2543       MinColumnToken = PreviousToken;
2544       break;
2545     }
2546     PreviousToken = Node.Tok;
2547 
2548     // Grab the last newline preceding a token in this unwrapped line.
2549     if (Node.Tok->NewlinesBefore > 0) {
2550       MinColumnToken = Node.Tok;
2551     }
2552   }
2553   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2554     MinColumnToken = PreviousToken;
2555   }
2556 
2557   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2558                               MinColumnToken);
2559 }
2560 
2561 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2562   bool JustComments = Line->Tokens.empty();
2563   for (SmallVectorImpl<FormatToken *>::const_iterator
2564            I = CommentsBeforeNextToken.begin(),
2565            E = CommentsBeforeNextToken.end();
2566        I != E; ++I) {
2567     // Line comments that belong to the same line comment section are put on the
2568     // same line since later we might want to reflow content between them.
2569     // Additional fine-grained breaking of line comment sections is controlled
2570     // by the class BreakableLineCommentSection in case it is desirable to keep
2571     // several line comment sections in the same unwrapped line.
2572     //
2573     // FIXME: Consider putting separate line comment sections as children to the
2574     // unwrapped line instead.
2575     (*I)->ContinuesLineCommentSection =
2576         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2577     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2578       addUnwrappedLine();
2579     pushToken(*I);
2580   }
2581   if (NewlineBeforeNext && JustComments)
2582     addUnwrappedLine();
2583   CommentsBeforeNextToken.clear();
2584 }
2585 
2586 void UnwrappedLineParser::nextToken(int LevelDifference) {
2587   if (eof())
2588     return;
2589   flushComments(isOnNewLine(*FormatTok));
2590   pushToken(FormatTok);
2591   FormatToken *Previous = FormatTok;
2592   if (Style.Language != FormatStyle::LK_JavaScript)
2593     readToken(LevelDifference);
2594   else
2595     readTokenWithJavaScriptASI();
2596   FormatTok->Previous = Previous;
2597 }
2598 
2599 void UnwrappedLineParser::distributeComments(
2600     const SmallVectorImpl<FormatToken *> &Comments,
2601     const FormatToken *NextTok) {
2602   // Whether or not a line comment token continues a line is controlled by
2603   // the method continuesLineCommentSection, with the following caveat:
2604   //
2605   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2606   // that each comment line from the trail is aligned with the next token, if
2607   // the next token exists. If a trail exists, the beginning of the maximal
2608   // trail is marked as a start of a new comment section.
2609   //
2610   // For example in this code:
2611   //
2612   // int a; // line about a
2613   //   // line 1 about b
2614   //   // line 2 about b
2615   //   int b;
2616   //
2617   // the two lines about b form a maximal trail, so there are two sections, the
2618   // first one consisting of the single comment "// line about a" and the
2619   // second one consisting of the next two comments.
2620   if (Comments.empty())
2621     return;
2622   bool ShouldPushCommentsInCurrentLine = true;
2623   bool HasTrailAlignedWithNextToken = false;
2624   unsigned StartOfTrailAlignedWithNextToken = 0;
2625   if (NextTok) {
2626     // We are skipping the first element intentionally.
2627     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2628       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2629         HasTrailAlignedWithNextToken = true;
2630         StartOfTrailAlignedWithNextToken = i;
2631       }
2632     }
2633   }
2634   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2635     FormatToken *FormatTok = Comments[i];
2636     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2637       FormatTok->ContinuesLineCommentSection = false;
2638     } else {
2639       FormatTok->ContinuesLineCommentSection =
2640           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2641     }
2642     if (!FormatTok->ContinuesLineCommentSection &&
2643         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2644       ShouldPushCommentsInCurrentLine = false;
2645     }
2646     if (ShouldPushCommentsInCurrentLine) {
2647       pushToken(FormatTok);
2648     } else {
2649       CommentsBeforeNextToken.push_back(FormatTok);
2650     }
2651   }
2652 }
2653 
2654 void UnwrappedLineParser::readToken(int LevelDifference) {
2655   SmallVector<FormatToken *, 1> Comments;
2656   do {
2657     FormatTok = Tokens->getNextToken();
2658     assert(FormatTok);
2659     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2660            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2661       distributeComments(Comments, FormatTok);
2662       Comments.clear();
2663       // If there is an unfinished unwrapped line, we flush the preprocessor
2664       // directives only after that unwrapped line was finished later.
2665       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2666       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2667       assert((LevelDifference >= 0 ||
2668               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2669              "LevelDifference makes Line->Level negative");
2670       Line->Level += LevelDifference;
2671       // Comments stored before the preprocessor directive need to be output
2672       // before the preprocessor directive, at the same level as the
2673       // preprocessor directive, as we consider them to apply to the directive.
2674       flushComments(isOnNewLine(*FormatTok));
2675       parsePPDirective();
2676     }
2677     while (FormatTok->Type == TT_ConflictStart ||
2678            FormatTok->Type == TT_ConflictEnd ||
2679            FormatTok->Type == TT_ConflictAlternative) {
2680       if (FormatTok->Type == TT_ConflictStart) {
2681         conditionalCompilationStart(/*Unreachable=*/false);
2682       } else if (FormatTok->Type == TT_ConflictAlternative) {
2683         conditionalCompilationAlternative();
2684       } else if (FormatTok->Type == TT_ConflictEnd) {
2685         conditionalCompilationEnd();
2686       }
2687       FormatTok = Tokens->getNextToken();
2688       FormatTok->MustBreakBefore = true;
2689     }
2690 
2691     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2692         !Line->InPPDirective) {
2693       continue;
2694     }
2695 
2696     if (!FormatTok->Tok.is(tok::comment)) {
2697       distributeComments(Comments, FormatTok);
2698       Comments.clear();
2699       return;
2700     }
2701 
2702     Comments.push_back(FormatTok);
2703   } while (!eof());
2704 
2705   distributeComments(Comments, nullptr);
2706   Comments.clear();
2707 }
2708 
2709 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2710   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2711   if (MustBreakBeforeNextToken) {
2712     Line->Tokens.back().Tok->MustBreakBefore = true;
2713     MustBreakBeforeNextToken = false;
2714   }
2715 }
2716 
2717 } // end namespace format
2718 } // end namespace clang
2719