1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
27 class FormatTokenSource {
28 public:
29   virtual ~FormatTokenSource() {}
30   virtual FormatToken *getNextToken() = 0;
31 
32   virtual unsigned getPosition() = 0;
33   virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41                          bool MustBeDeclaration)
42       : Line(Line), Stack(Stack) {
43     Line.MustBeDeclaration = MustBeDeclaration;
44     Stack.push_back(MustBeDeclaration);
45   }
46   ~ScopedDeclarationState() {
47     Stack.pop_back();
48     if (!Stack.empty())
49       Line.MustBeDeclaration = Stack.back();
50     else
51       Line.MustBeDeclaration = true;
52   }
53 
54 private:
55   UnwrappedLine &Line;
56   std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     FakeEOF.Tok.startToken();
86     FakeEOF.Tok.setKind(tok::eof);
87     TokenSource = this;
88     Line.Level = 0;
89     Line.InPPDirective = true;
90   }
91 
92   ~ScopedMacroState() override {
93     TokenSource = PreviousTokenSource;
94     ResetToken = Token;
95     Line.InPPDirective = false;
96     Line.Level = PreviousLineLevel;
97   }
98 
99   FormatToken *getNextToken() override {
100     // The \c UnwrappedLineParser guards against this by never calling
101     // \c getNextToken() after it has encountered the first eof token.
102     assert(!eof());
103     PreviousToken = Token;
104     Token = PreviousTokenSource->getNextToken();
105     if (eof())
106       return &FakeEOF;
107     return Token;
108   }
109 
110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112   FormatToken *setPosition(unsigned Position) override {
113     PreviousToken = nullptr;
114     Token = PreviousTokenSource->setPosition(Position);
115     return Token;
116   }
117 
118 private:
119   bool eof() {
120     return Token && Token->HasUnescapedNewline &&
121            !continuesLineComment(*Token, PreviousToken,
122                                  /*MinColumnToken=*/PreviousToken);
123   }
124 
125   FormatToken FakeEOF;
126   UnwrappedLine &Line;
127   FormatTokenSource *&TokenSource;
128   FormatToken *&ResetToken;
129   unsigned PreviousLineLevel;
130   FormatTokenSource *PreviousTokenSource;
131 
132   FormatToken *Token;
133   FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
138 class ScopedLineState {
139 public:
140   ScopedLineState(UnwrappedLineParser &Parser,
141                   bool SwitchToPreprocessorLines = false)
142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143     if (SwitchToPreprocessorLines)
144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
145     else if (!Parser.Line->Tokens.empty())
146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147     PreBlockLine = std::move(Parser.Line);
148     Parser.Line = std::make_unique<UnwrappedLine>();
149     Parser.Line->Level = PreBlockLine->Level;
150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151   }
152 
153   ~ScopedLineState() {
154     if (!Parser.Line->Tokens.empty()) {
155       Parser.addUnwrappedLine();
156     }
157     assert(Parser.Line->Tokens.empty());
158     Parser.Line = std::move(PreBlockLine);
159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160       Parser.MustBreakBeforeNextToken = true;
161     Parser.CurrentLines = OriginalLines;
162   }
163 
164 private:
165   UnwrappedLineParser &Parser;
166 
167   std::unique_ptr<UnwrappedLine> PreBlockLine;
168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
171 class CompoundStatementIndenter {
172 public:
173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
174                             const FormatStyle &Style, unsigned &LineLevel)
175       : CompoundStatementIndenter(Parser, LineLevel,
176                                   Style.BraceWrapping.AfterControlStatement,
177                                   Style.BraceWrapping.IndentBraces) {}
178   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
179                             bool WrapBrace, bool IndentBrace)
180       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
181     if (WrapBrace)
182       Parser->addUnwrappedLine();
183     if (IndentBrace)
184       ++LineLevel;
185   }
186   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
187 
188 private:
189   unsigned &LineLevel;
190   unsigned OldLineLevel;
191 };
192 
193 namespace {
194 
195 class IndexedTokenSource : public FormatTokenSource {
196 public:
197   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
198       : Tokens(Tokens), Position(-1) {}
199 
200   FormatToken *getNextToken() override {
201     ++Position;
202     return Tokens[Position];
203   }
204 
205   unsigned getPosition() override {
206     assert(Position >= 0);
207     return Position;
208   }
209 
210   FormatToken *setPosition(unsigned P) override {
211     Position = P;
212     return Tokens[Position];
213   }
214 
215   void reset() { Position = -1; }
216 
217 private:
218   ArrayRef<FormatToken *> Tokens;
219   int Position;
220 };
221 
222 } // end anonymous namespace
223 
224 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
225                                          const AdditionalKeywords &Keywords,
226                                          unsigned FirstStartColumn,
227                                          ArrayRef<FormatToken *> Tokens,
228                                          UnwrappedLineConsumer &Callback)
229     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
230       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
231       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
232       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
233       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
234                        ? IG_Rejected
235                        : IG_Inited),
236       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
241                      ? IG_Rejected
242                      : IG_Inited;
243   IncludeGuardToken = nullptr;
244   Line.reset(new UnwrappedLine);
245   CommentsBeforeNextToken.clear();
246   FormatTok = nullptr;
247   MustBreakBeforeNextToken = false;
248   PreprocessorDirectives.clear();
249   CurrentLines = &Lines;
250   DeclarationScopeStack.clear();
251   PPStack.clear();
252   Line->FirstStartColumn = FirstStartColumn;
253 }
254 
255 void UnwrappedLineParser::parse() {
256   IndexedTokenSource TokenSource(AllTokens);
257   Line->FirstStartColumn = FirstStartColumn;
258   do {
259     LLVM_DEBUG(llvm::dbgs() << "----\n");
260     reset();
261     Tokens = &TokenSource;
262     TokenSource.reset();
263 
264     readToken();
265     parseFile();
266 
267     // If we found an include guard then all preprocessor directives (other than
268     // the guard) are over-indented by one.
269     if (IncludeGuard == IG_Found)
270       for (auto &Line : Lines)
271         if (Line.InPPDirective && Line.Level > 0)
272           --Line.Level;
273 
274     // Create line with eof token.
275     pushToken(FormatTok);
276     addUnwrappedLine();
277 
278     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
279                                                   E = Lines.end();
280          I != E; ++I) {
281       Callback.consumeUnwrappedLine(*I);
282     }
283     Callback.finishRun();
284     Lines.clear();
285     while (!PPLevelBranchIndex.empty() &&
286            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
287       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
288       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
289     }
290     if (!PPLevelBranchIndex.empty()) {
291       ++PPLevelBranchIndex.back();
292       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
293       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
294     }
295   } while (!PPLevelBranchIndex.empty());
296 }
297 
298 void UnwrappedLineParser::parseFile() {
299   // The top-level context in a file always has declarations, except for pre-
300   // processor directives and JavaScript files.
301   bool MustBeDeclaration =
302       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
303   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
304                                           MustBeDeclaration);
305   if (Style.Language == FormatStyle::LK_TextProto)
306     parseBracedList();
307   else
308     parseLevel(/*HasOpeningBrace=*/false);
309   // Make sure to format the remaining tokens.
310   //
311   // LK_TextProto is special since its top-level is parsed as the body of a
312   // braced list, which does not necessarily have natural line separators such
313   // as a semicolon. Comments after the last entry that have been determined to
314   // not belong to that line, as in:
315   //   key: value
316   //   // endfile comment
317   // do not have a chance to be put on a line of their own until this point.
318   // Here we add this newline before end-of-file comments.
319   if (Style.Language == FormatStyle::LK_TextProto &&
320       !CommentsBeforeNextToken.empty())
321     addUnwrappedLine();
322   flushComments(true);
323   addUnwrappedLine();
324 }
325 
326 void UnwrappedLineParser::parseCSharpAttribute() {
327   do {
328     switch (FormatTok->Tok.getKind()) {
329     case tok::r_square:
330       nextToken();
331       addUnwrappedLine();
332       return;
333     default:
334       nextToken();
335       break;
336     }
337   } while (!eof());
338 }
339 
340 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
341   bool SwitchLabelEncountered = false;
342   do {
343     tok::TokenKind kind = FormatTok->Tok.getKind();
344     if (FormatTok->Type == TT_MacroBlockBegin) {
345       kind = tok::l_brace;
346     } else if (FormatTok->Type == TT_MacroBlockEnd) {
347       kind = tok::r_brace;
348     }
349 
350     switch (kind) {
351     case tok::comment:
352       nextToken();
353       addUnwrappedLine();
354       break;
355     case tok::l_brace:
356       // FIXME: Add parameter whether this can happen - if this happens, we must
357       // be in a non-declaration context.
358       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
359         continue;
360       parseBlock(/*MustBeDeclaration=*/false);
361       addUnwrappedLine();
362       break;
363     case tok::r_brace:
364       if (HasOpeningBrace)
365         return;
366       nextToken();
367       addUnwrappedLine();
368       break;
369     case tok::kw_default: {
370       unsigned StoredPosition = Tokens->getPosition();
371       FormatToken *Next;
372       do {
373         Next = Tokens->getNextToken();
374       } while (Next && Next->is(tok::comment));
375       FormatTok = Tokens->setPosition(StoredPosition);
376       if (Next && Next->isNot(tok::colon)) {
377         // default not followed by ':' is not a case label; treat it like
378         // an identifier.
379         parseStructuralElement();
380         break;
381       }
382       // Else, if it is 'default:', fall through to the case handling.
383       LLVM_FALLTHROUGH;
384     }
385     case tok::kw_case:
386       if (Style.Language == FormatStyle::LK_JavaScript &&
387           Line->MustBeDeclaration) {
388         // A 'case: string' style field declaration.
389         parseStructuralElement();
390         break;
391       }
392       if (!SwitchLabelEncountered &&
393           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
394         ++Line->Level;
395       SwitchLabelEncountered = true;
396       parseStructuralElement();
397       break;
398     case tok::l_square:
399       if (Style.isCSharp()) {
400         nextToken();
401         parseCSharpAttribute();
402         break;
403       }
404       LLVM_FALLTHROUGH;
405     default:
406       parseStructuralElement();
407       break;
408     }
409   } while (!eof());
410 }
411 
412 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
413   // We'll parse forward through the tokens until we hit
414   // a closing brace or eof - note that getNextToken() will
415   // parse macros, so this will magically work inside macro
416   // definitions, too.
417   unsigned StoredPosition = Tokens->getPosition();
418   FormatToken *Tok = FormatTok;
419   const FormatToken *PrevTok = Tok->Previous;
420   // Keep a stack of positions of lbrace tokens. We will
421   // update information about whether an lbrace starts a
422   // braced init list or a different block during the loop.
423   SmallVector<FormatToken *, 8> LBraceStack;
424   assert(Tok->Tok.is(tok::l_brace));
425   do {
426     // Get next non-comment token.
427     FormatToken *NextTok;
428     unsigned ReadTokens = 0;
429     do {
430       NextTok = Tokens->getNextToken();
431       ++ReadTokens;
432     } while (NextTok->is(tok::comment));
433 
434     switch (Tok->Tok.getKind()) {
435     case tok::l_brace:
436       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
437         if (PrevTok->isOneOf(tok::colon, tok::less))
438           // A ':' indicates this code is in a type, or a braced list
439           // following a label in an object literal ({a: {b: 1}}).
440           // A '<' could be an object used in a comparison, but that is nonsense
441           // code (can never return true), so more likely it is a generic type
442           // argument (`X<{a: string; b: number}>`).
443           // The code below could be confused by semicolons between the
444           // individual members in a type member list, which would normally
445           // trigger BK_Block. In both cases, this must be parsed as an inline
446           // braced init.
447           Tok->BlockKind = BK_BracedInit;
448         else if (PrevTok->is(tok::r_paren))
449           // `) { }` can only occur in function or method declarations in JS.
450           Tok->BlockKind = BK_Block;
451       } else {
452         Tok->BlockKind = BK_Unknown;
453       }
454       LBraceStack.push_back(Tok);
455       break;
456     case tok::r_brace:
457       if (LBraceStack.empty())
458         break;
459       if (LBraceStack.back()->BlockKind == BK_Unknown) {
460         bool ProbablyBracedList = false;
461         if (Style.Language == FormatStyle::LK_Proto) {
462           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
463         } else {
464           // Using OriginalColumn to distinguish between ObjC methods and
465           // binary operators is a bit hacky.
466           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
467                                   NextTok->OriginalColumn == 0;
468 
469           // If there is a comma, semicolon or right paren after the closing
470           // brace, we assume this is a braced initializer list.  Note that
471           // regardless how we mark inner braces here, we will overwrite the
472           // BlockKind later if we parse a braced list (where all blocks
473           // inside are by default braced lists), or when we explicitly detect
474           // blocks (for example while parsing lambdas).
475           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
476           // braced list in JS.
477           ProbablyBracedList =
478               (Style.Language == FormatStyle::LK_JavaScript &&
479                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
480                                 Keywords.kw_as)) ||
481               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
482               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
483                                tok::r_paren, tok::r_square, tok::l_brace,
484                                tok::ellipsis) ||
485               (NextTok->is(tok::identifier) &&
486                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
487               (NextTok->is(tok::semi) &&
488                (!ExpectClassBody || LBraceStack.size() != 1)) ||
489               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
490           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
491             // We can have an array subscript after a braced init
492             // list, but C++11 attributes are expected after blocks.
493             NextTok = Tokens->getNextToken();
494             ++ReadTokens;
495             ProbablyBracedList = NextTok->isNot(tok::l_square);
496           }
497         }
498         if (ProbablyBracedList) {
499           Tok->BlockKind = BK_BracedInit;
500           LBraceStack.back()->BlockKind = BK_BracedInit;
501         } else {
502           Tok->BlockKind = BK_Block;
503           LBraceStack.back()->BlockKind = BK_Block;
504         }
505       }
506       LBraceStack.pop_back();
507       break;
508     case tok::identifier:
509       if (!Tok->is(TT_StatementMacro))
510         break;
511       LLVM_FALLTHROUGH;
512     case tok::at:
513     case tok::semi:
514     case tok::kw_if:
515     case tok::kw_while:
516     case tok::kw_for:
517     case tok::kw_switch:
518     case tok::kw_try:
519     case tok::kw___try:
520       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
521         LBraceStack.back()->BlockKind = BK_Block;
522       break;
523     default:
524       break;
525     }
526     PrevTok = Tok;
527     Tok = NextTok;
528   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
529 
530   // Assume other blocks for all unclosed opening braces.
531   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
532     if (LBraceStack[i]->BlockKind == BK_Unknown)
533       LBraceStack[i]->BlockKind = BK_Block;
534   }
535 
536   FormatTok = Tokens->setPosition(StoredPosition);
537 }
538 
539 template <class T>
540 static inline void hash_combine(std::size_t &seed, const T &v) {
541   std::hash<T> hasher;
542   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
543 }
544 
545 size_t UnwrappedLineParser::computePPHash() const {
546   size_t h = 0;
547   for (const auto &i : PPStack) {
548     hash_combine(h, size_t(i.Kind));
549     hash_combine(h, i.Line);
550   }
551   return h;
552 }
553 
554 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
555                                      bool MunchSemi) {
556   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
557          "'{' or macro block token expected");
558   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
559   FormatTok->BlockKind = BK_Block;
560 
561   size_t PPStartHash = computePPHash();
562 
563   unsigned InitialLevel = Line->Level;
564   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
565 
566   if (MacroBlock && FormatTok->is(tok::l_paren))
567     parseParens();
568 
569   size_t NbPreprocessorDirectives =
570       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
571   addUnwrappedLine();
572   size_t OpeningLineIndex =
573       CurrentLines->empty()
574           ? (UnwrappedLine::kInvalidIndex)
575           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
576 
577   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
578                                           MustBeDeclaration);
579   if (AddLevel)
580     ++Line->Level;
581   parseLevel(/*HasOpeningBrace=*/true);
582 
583   if (eof())
584     return;
585 
586   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
587                  : !FormatTok->is(tok::r_brace)) {
588     Line->Level = InitialLevel;
589     FormatTok->BlockKind = BK_Block;
590     return;
591   }
592 
593   size_t PPEndHash = computePPHash();
594 
595   // Munch the closing brace.
596   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
597 
598   if (MacroBlock && FormatTok->is(tok::l_paren))
599     parseParens();
600 
601   if (MunchSemi && FormatTok->Tok.is(tok::semi))
602     nextToken();
603   Line->Level = InitialLevel;
604 
605   if (PPStartHash == PPEndHash) {
606     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
607     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
608       // Update the opening line to add the forward reference as well
609       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
610           CurrentLines->size() - 1;
611     }
612   }
613 }
614 
615 static bool isGoogScope(const UnwrappedLine &Line) {
616   // FIXME: Closure-library specific stuff should not be hard-coded but be
617   // configurable.
618   if (Line.Tokens.size() < 4)
619     return false;
620   auto I = Line.Tokens.begin();
621   if (I->Tok->TokenText != "goog")
622     return false;
623   ++I;
624   if (I->Tok->isNot(tok::period))
625     return false;
626   ++I;
627   if (I->Tok->TokenText != "scope")
628     return false;
629   ++I;
630   return I->Tok->is(tok::l_paren);
631 }
632 
633 static bool isIIFE(const UnwrappedLine &Line,
634                    const AdditionalKeywords &Keywords) {
635   // Look for the start of an immediately invoked anonymous function.
636   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
637   // This is commonly done in JavaScript to create a new, anonymous scope.
638   // Example: (function() { ... })()
639   if (Line.Tokens.size() < 3)
640     return false;
641   auto I = Line.Tokens.begin();
642   if (I->Tok->isNot(tok::l_paren))
643     return false;
644   ++I;
645   if (I->Tok->isNot(Keywords.kw_function))
646     return false;
647   ++I;
648   return I->Tok->is(tok::l_paren);
649 }
650 
651 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
652                                    const FormatToken &InitialToken) {
653   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
654     return Style.BraceWrapping.AfterNamespace;
655   if (InitialToken.is(tok::kw_class))
656     return Style.BraceWrapping.AfterClass;
657   if (InitialToken.is(tok::kw_union))
658     return Style.BraceWrapping.AfterUnion;
659   if (InitialToken.is(tok::kw_struct))
660     return Style.BraceWrapping.AfterStruct;
661   return false;
662 }
663 
664 void UnwrappedLineParser::parseChildBlock() {
665   FormatTok->BlockKind = BK_Block;
666   nextToken();
667   {
668     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
669                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
670     ScopedLineState LineState(*this);
671     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
672                                             /*MustBeDeclaration=*/false);
673     Line->Level += SkipIndent ? 0 : 1;
674     parseLevel(/*HasOpeningBrace=*/true);
675     flushComments(isOnNewLine(*FormatTok));
676     Line->Level -= SkipIndent ? 0 : 1;
677   }
678   nextToken();
679 }
680 
681 void UnwrappedLineParser::parsePPDirective() {
682   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
683   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
684 
685   nextToken();
686 
687   if (!FormatTok->Tok.getIdentifierInfo()) {
688     parsePPUnknown();
689     return;
690   }
691 
692   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
693   case tok::pp_define:
694     parsePPDefine();
695     return;
696   case tok::pp_if:
697     parsePPIf(/*IfDef=*/false);
698     break;
699   case tok::pp_ifdef:
700   case tok::pp_ifndef:
701     parsePPIf(/*IfDef=*/true);
702     break;
703   case tok::pp_else:
704     parsePPElse();
705     break;
706   case tok::pp_elif:
707     parsePPElIf();
708     break;
709   case tok::pp_endif:
710     parsePPEndIf();
711     break;
712   default:
713     parsePPUnknown();
714     break;
715   }
716 }
717 
718 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
719   size_t Line = CurrentLines->size();
720   if (CurrentLines == &PreprocessorDirectives)
721     Line += Lines.size();
722 
723   if (Unreachable ||
724       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
725     PPStack.push_back({PP_Unreachable, Line});
726   else
727     PPStack.push_back({PP_Conditional, Line});
728 }
729 
730 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
731   ++PPBranchLevel;
732   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
733   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
734     PPLevelBranchIndex.push_back(0);
735     PPLevelBranchCount.push_back(0);
736   }
737   PPChainBranchIndex.push(0);
738   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
739   conditionalCompilationCondition(Unreachable || Skip);
740 }
741 
742 void UnwrappedLineParser::conditionalCompilationAlternative() {
743   if (!PPStack.empty())
744     PPStack.pop_back();
745   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
746   if (!PPChainBranchIndex.empty())
747     ++PPChainBranchIndex.top();
748   conditionalCompilationCondition(
749       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
750       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
751 }
752 
753 void UnwrappedLineParser::conditionalCompilationEnd() {
754   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
755   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
756     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
757       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
758     }
759   }
760   // Guard against #endif's without #if.
761   if (PPBranchLevel > -1)
762     --PPBranchLevel;
763   if (!PPChainBranchIndex.empty())
764     PPChainBranchIndex.pop();
765   if (!PPStack.empty())
766     PPStack.pop_back();
767 }
768 
769 void UnwrappedLineParser::parsePPIf(bool IfDef) {
770   bool IfNDef = FormatTok->is(tok::pp_ifndef);
771   nextToken();
772   bool Unreachable = false;
773   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
774     Unreachable = true;
775   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
776     Unreachable = true;
777   conditionalCompilationStart(Unreachable);
778   FormatToken *IfCondition = FormatTok;
779   // If there's a #ifndef on the first line, and the only lines before it are
780   // comments, it could be an include guard.
781   bool MaybeIncludeGuard = IfNDef;
782   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
783     for (auto &Line : Lines) {
784       if (!Line.Tokens.front().Tok->is(tok::comment)) {
785         MaybeIncludeGuard = false;
786         IncludeGuard = IG_Rejected;
787         break;
788       }
789     }
790   --PPBranchLevel;
791   parsePPUnknown();
792   ++PPBranchLevel;
793   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
794     IncludeGuard = IG_IfNdefed;
795     IncludeGuardToken = IfCondition;
796   }
797 }
798 
799 void UnwrappedLineParser::parsePPElse() {
800   // If a potential include guard has an #else, it's not an include guard.
801   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
802     IncludeGuard = IG_Rejected;
803   conditionalCompilationAlternative();
804   if (PPBranchLevel > -1)
805     --PPBranchLevel;
806   parsePPUnknown();
807   ++PPBranchLevel;
808 }
809 
810 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
811 
812 void UnwrappedLineParser::parsePPEndIf() {
813   conditionalCompilationEnd();
814   parsePPUnknown();
815   // If the #endif of a potential include guard is the last thing in the file,
816   // then we found an include guard.
817   unsigned TokenPosition = Tokens->getPosition();
818   FormatToken *PeekNext = AllTokens[TokenPosition];
819   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
820       PeekNext->is(tok::eof) &&
821       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
822     IncludeGuard = IG_Found;
823 }
824 
825 void UnwrappedLineParser::parsePPDefine() {
826   nextToken();
827 
828   if (!FormatTok->Tok.getIdentifierInfo()) {
829     IncludeGuard = IG_Rejected;
830     IncludeGuardToken = nullptr;
831     parsePPUnknown();
832     return;
833   }
834 
835   if (IncludeGuard == IG_IfNdefed &&
836       IncludeGuardToken->TokenText == FormatTok->TokenText) {
837     IncludeGuard = IG_Defined;
838     IncludeGuardToken = nullptr;
839     for (auto &Line : Lines) {
840       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
841         IncludeGuard = IG_Rejected;
842         break;
843       }
844     }
845   }
846 
847   nextToken();
848   if (FormatTok->Tok.getKind() == tok::l_paren &&
849       FormatTok->WhitespaceRange.getBegin() ==
850           FormatTok->WhitespaceRange.getEnd()) {
851     parseParens();
852   }
853   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
854     Line->Level += PPBranchLevel + 1;
855   addUnwrappedLine();
856   ++Line->Level;
857 
858   // Errors during a preprocessor directive can only affect the layout of the
859   // preprocessor directive, and thus we ignore them. An alternative approach
860   // would be to use the same approach we use on the file level (no
861   // re-indentation if there was a structural error) within the macro
862   // definition.
863   parseFile();
864 }
865 
866 void UnwrappedLineParser::parsePPUnknown() {
867   do {
868     nextToken();
869   } while (!eof());
870   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
871     Line->Level += PPBranchLevel + 1;
872   addUnwrappedLine();
873 }
874 
875 // Here we blacklist certain tokens that are not usually the first token in an
876 // unwrapped line. This is used in attempt to distinguish macro calls without
877 // trailing semicolons from other constructs split to several lines.
878 static bool tokenCanStartNewLine(const clang::Token &Tok) {
879   // Semicolon can be a null-statement, l_square can be a start of a macro or
880   // a C++11 attribute, but this doesn't seem to be common.
881   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
882          Tok.isNot(tok::l_square) &&
883          // Tokens that can only be used as binary operators and a part of
884          // overloaded operator names.
885          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
886          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
887          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
888          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
889          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
890          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
891          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
892          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
893          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
894          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
895          Tok.isNot(tok::lesslessequal) &&
896          // Colon is used in labels, base class lists, initializer lists,
897          // range-based for loops, ternary operator, but should never be the
898          // first token in an unwrapped line.
899          Tok.isNot(tok::colon) &&
900          // 'noexcept' is a trailing annotation.
901          Tok.isNot(tok::kw_noexcept);
902 }
903 
904 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
905                           const FormatToken *FormatTok) {
906   // FIXME: This returns true for C/C++ keywords like 'struct'.
907   return FormatTok->is(tok::identifier) &&
908          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
909           !FormatTok->isOneOf(
910               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
911               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
912               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
913               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
914               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
915               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
916               Keywords.kw_from));
917 }
918 
919 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
920                                  const FormatToken *FormatTok) {
921   return FormatTok->Tok.isLiteral() ||
922          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
923          mustBeJSIdent(Keywords, FormatTok);
924 }
925 
926 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
927 // when encountered after a value (see mustBeJSIdentOrValue).
928 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
929                            const FormatToken *FormatTok) {
930   return FormatTok->isOneOf(
931       tok::kw_return, Keywords.kw_yield,
932       // conditionals
933       tok::kw_if, tok::kw_else,
934       // loops
935       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
936       // switch/case
937       tok::kw_switch, tok::kw_case,
938       // exceptions
939       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
940       // declaration
941       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
942       Keywords.kw_async, Keywords.kw_function,
943       // import/export
944       Keywords.kw_import, tok::kw_export);
945 }
946 
947 // readTokenWithJavaScriptASI reads the next token and terminates the current
948 // line if JavaScript Automatic Semicolon Insertion must
949 // happen between the current token and the next token.
950 //
951 // This method is conservative - it cannot cover all edge cases of JavaScript,
952 // but only aims to correctly handle certain well known cases. It *must not*
953 // return true in speculative cases.
954 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
955   FormatToken *Previous = FormatTok;
956   readToken();
957   FormatToken *Next = FormatTok;
958 
959   bool IsOnSameLine =
960       CommentsBeforeNextToken.empty()
961           ? Next->NewlinesBefore == 0
962           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
963   if (IsOnSameLine)
964     return;
965 
966   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
967   bool PreviousStartsTemplateExpr =
968       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
969   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
970     // If the line contains an '@' sign, the previous token might be an
971     // annotation, which can precede another identifier/value.
972     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
973                               [](UnwrappedLineNode &LineNode) {
974                                 return LineNode.Tok->is(tok::at);
975                               }) != Line->Tokens.end();
976     if (HasAt)
977       return;
978   }
979   if (Next->is(tok::exclaim) && PreviousMustBeValue)
980     return addUnwrappedLine();
981   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
982   bool NextEndsTemplateExpr =
983       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
984   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
985       (PreviousMustBeValue ||
986        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
987                          tok::minusminus)))
988     return addUnwrappedLine();
989   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
990       isJSDeclOrStmt(Keywords, Next))
991     return addUnwrappedLine();
992 }
993 
994 void UnwrappedLineParser::parseStructuralElement() {
995   assert(!FormatTok->is(tok::l_brace));
996   if (Style.Language == FormatStyle::LK_TableGen &&
997       FormatTok->is(tok::pp_include)) {
998     nextToken();
999     if (FormatTok->is(tok::string_literal))
1000       nextToken();
1001     addUnwrappedLine();
1002     return;
1003   }
1004   switch (FormatTok->Tok.getKind()) {
1005   case tok::kw_asm:
1006     nextToken();
1007     if (FormatTok->is(tok::l_brace)) {
1008       FormatTok->Type = TT_InlineASMBrace;
1009       nextToken();
1010       while (FormatTok && FormatTok->isNot(tok::eof)) {
1011         if (FormatTok->is(tok::r_brace)) {
1012           FormatTok->Type = TT_InlineASMBrace;
1013           nextToken();
1014           addUnwrappedLine();
1015           break;
1016         }
1017         FormatTok->Finalized = true;
1018         nextToken();
1019       }
1020     }
1021     break;
1022   case tok::kw_namespace:
1023     parseNamespace();
1024     return;
1025   case tok::kw_public:
1026   case tok::kw_protected:
1027   case tok::kw_private:
1028     if (Style.Language == FormatStyle::LK_Java ||
1029         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1030       nextToken();
1031     else
1032       parseAccessSpecifier();
1033     return;
1034   case tok::kw_if:
1035     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1036       // field/method declaration.
1037       break;
1038     parseIfThenElse();
1039     return;
1040   case tok::kw_for:
1041   case tok::kw_while:
1042     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1043       // field/method declaration.
1044       break;
1045     parseForOrWhileLoop();
1046     return;
1047   case tok::kw_do:
1048     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1049       // field/method declaration.
1050       break;
1051     parseDoWhile();
1052     return;
1053   case tok::kw_switch:
1054     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1055       // 'switch: string' field declaration.
1056       break;
1057     parseSwitch();
1058     return;
1059   case tok::kw_default:
1060     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1061       // 'default: string' field declaration.
1062       break;
1063     nextToken();
1064     if (FormatTok->is(tok::colon)) {
1065       parseLabel();
1066       return;
1067     }
1068     // e.g. "default void f() {}" in a Java interface.
1069     break;
1070   case tok::kw_case:
1071     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1072       // 'case: string' field declaration.
1073       break;
1074     parseCaseLabel();
1075     return;
1076   case tok::kw_try:
1077   case tok::kw___try:
1078     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1079       // field/method declaration.
1080       break;
1081     parseTryCatch();
1082     return;
1083   case tok::kw_extern:
1084     nextToken();
1085     if (FormatTok->Tok.is(tok::string_literal)) {
1086       nextToken();
1087       if (FormatTok->Tok.is(tok::l_brace)) {
1088         if (Style.BraceWrapping.AfterExternBlock) {
1089           addUnwrappedLine();
1090           parseBlock(/*MustBeDeclaration=*/true);
1091         } else {
1092           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1093         }
1094         addUnwrappedLine();
1095         return;
1096       }
1097     }
1098     break;
1099   case tok::kw_export:
1100     if (Style.Language == FormatStyle::LK_JavaScript) {
1101       parseJavaScriptEs6ImportExport();
1102       return;
1103     }
1104     if (!Style.isCpp())
1105       break;
1106     // Handle C++ "(inline|export) namespace".
1107     LLVM_FALLTHROUGH;
1108   case tok::kw_inline:
1109     nextToken();
1110     if (FormatTok->Tok.is(tok::kw_namespace)) {
1111       parseNamespace();
1112       return;
1113     }
1114     break;
1115   case tok::identifier:
1116     if (FormatTok->is(TT_ForEachMacro)) {
1117       parseForOrWhileLoop();
1118       return;
1119     }
1120     if (FormatTok->is(TT_MacroBlockBegin)) {
1121       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1122                  /*MunchSemi=*/false);
1123       return;
1124     }
1125     if (FormatTok->is(Keywords.kw_import)) {
1126       if (Style.Language == FormatStyle::LK_JavaScript) {
1127         parseJavaScriptEs6ImportExport();
1128         return;
1129       }
1130       if (Style.Language == FormatStyle::LK_Proto) {
1131         nextToken();
1132         if (FormatTok->is(tok::kw_public))
1133           nextToken();
1134         if (!FormatTok->is(tok::string_literal))
1135           return;
1136         nextToken();
1137         if (FormatTok->is(tok::semi))
1138           nextToken();
1139         addUnwrappedLine();
1140         return;
1141       }
1142     }
1143     if (Style.isCpp() &&
1144         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1145                            Keywords.kw_slots, Keywords.kw_qslots)) {
1146       nextToken();
1147       if (FormatTok->is(tok::colon)) {
1148         nextToken();
1149         addUnwrappedLine();
1150         return;
1151       }
1152     }
1153     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1154       parseStatementMacro();
1155       return;
1156     }
1157     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1158       parseNamespace();
1159       return;
1160     }
1161     // In all other cases, parse the declaration.
1162     break;
1163   default:
1164     break;
1165   }
1166   do {
1167     const FormatToken *Previous = FormatTok->Previous;
1168     switch (FormatTok->Tok.getKind()) {
1169     case tok::at:
1170       nextToken();
1171       if (FormatTok->Tok.is(tok::l_brace)) {
1172         nextToken();
1173         parseBracedList();
1174         break;
1175       } else if (Style.Language == FormatStyle::LK_Java &&
1176                  FormatTok->is(Keywords.kw_interface)) {
1177         nextToken();
1178         break;
1179       }
1180       switch (FormatTok->Tok.getObjCKeywordID()) {
1181       case tok::objc_public:
1182       case tok::objc_protected:
1183       case tok::objc_package:
1184       case tok::objc_private:
1185         return parseAccessSpecifier();
1186       case tok::objc_interface:
1187       case tok::objc_implementation:
1188         return parseObjCInterfaceOrImplementation();
1189       case tok::objc_protocol:
1190         if (parseObjCProtocol())
1191           return;
1192         break;
1193       case tok::objc_end:
1194         return; // Handled by the caller.
1195       case tok::objc_optional:
1196       case tok::objc_required:
1197         nextToken();
1198         addUnwrappedLine();
1199         return;
1200       case tok::objc_autoreleasepool:
1201         nextToken();
1202         if (FormatTok->Tok.is(tok::l_brace)) {
1203           if (Style.BraceWrapping.AfterControlStatement ==
1204               FormatStyle::BWACS_Always)
1205             addUnwrappedLine();
1206           parseBlock(/*MustBeDeclaration=*/false);
1207         }
1208         addUnwrappedLine();
1209         return;
1210       case tok::objc_synchronized:
1211         nextToken();
1212         if (FormatTok->Tok.is(tok::l_paren))
1213           // Skip synchronization object
1214           parseParens();
1215         if (FormatTok->Tok.is(tok::l_brace)) {
1216           if (Style.BraceWrapping.AfterControlStatement ==
1217               FormatStyle::BWACS_Always)
1218             addUnwrappedLine();
1219           parseBlock(/*MustBeDeclaration=*/false);
1220         }
1221         addUnwrappedLine();
1222         return;
1223       case tok::objc_try:
1224         // This branch isn't strictly necessary (the kw_try case below would
1225         // do this too after the tok::at is parsed above).  But be explicit.
1226         parseTryCatch();
1227         return;
1228       default:
1229         break;
1230       }
1231       break;
1232     case tok::kw_enum:
1233       // Ignore if this is part of "template <enum ...".
1234       if (Previous && Previous->is(tok::less)) {
1235         nextToken();
1236         break;
1237       }
1238 
1239       // parseEnum falls through and does not yet add an unwrapped line as an
1240       // enum definition can start a structural element.
1241       if (!parseEnum())
1242         break;
1243       // This only applies for C++.
1244       if (!Style.isCpp()) {
1245         addUnwrappedLine();
1246         return;
1247       }
1248       break;
1249     case tok::kw_typedef:
1250       nextToken();
1251       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1252                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1253                              Keywords.kw_CF_CLOSED_ENUM,
1254                              Keywords.kw_NS_CLOSED_ENUM))
1255         parseEnum();
1256       break;
1257     case tok::kw_struct:
1258     case tok::kw_union:
1259     case tok::kw_class:
1260       // parseRecord falls through and does not yet add an unwrapped line as a
1261       // record declaration or definition can start a structural element.
1262       parseRecord();
1263       // This does not apply for Java, JavaScript and C#.
1264       if (Style.Language == FormatStyle::LK_Java ||
1265           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1266         if (FormatTok->is(tok::semi))
1267           nextToken();
1268         addUnwrappedLine();
1269         return;
1270       }
1271       break;
1272     case tok::period:
1273       nextToken();
1274       // In Java, classes have an implicit static member "class".
1275       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1276           FormatTok->is(tok::kw_class))
1277         nextToken();
1278       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1279           FormatTok->Tok.getIdentifierInfo())
1280         // JavaScript only has pseudo keywords, all keywords are allowed to
1281         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1282         nextToken();
1283       break;
1284     case tok::semi:
1285       nextToken();
1286       addUnwrappedLine();
1287       return;
1288     case tok::r_brace:
1289       addUnwrappedLine();
1290       return;
1291     case tok::l_paren:
1292       parseParens();
1293       break;
1294     case tok::kw_operator:
1295       nextToken();
1296       if (FormatTok->isBinaryOperator())
1297         nextToken();
1298       break;
1299     case tok::caret:
1300       nextToken();
1301       if (FormatTok->Tok.isAnyIdentifier() ||
1302           FormatTok->isSimpleTypeSpecifier())
1303         nextToken();
1304       if (FormatTok->is(tok::l_paren))
1305         parseParens();
1306       if (FormatTok->is(tok::l_brace))
1307         parseChildBlock();
1308       break;
1309     case tok::l_brace:
1310       if (!tryToParseBracedList()) {
1311         // A block outside of parentheses must be the last part of a
1312         // structural element.
1313         // FIXME: Figure out cases where this is not true, and add projections
1314         // for them (the one we know is missing are lambdas).
1315         if (Style.BraceWrapping.AfterFunction)
1316           addUnwrappedLine();
1317         FormatTok->Type = TT_FunctionLBrace;
1318         parseBlock(/*MustBeDeclaration=*/false);
1319         addUnwrappedLine();
1320         return;
1321       }
1322       // Otherwise this was a braced init list, and the structural
1323       // element continues.
1324       break;
1325     case tok::kw_try:
1326       if (Style.Language == FormatStyle::LK_JavaScript &&
1327           Line->MustBeDeclaration) {
1328         // field/method declaration.
1329         nextToken();
1330         break;
1331       }
1332       // We arrive here when parsing function-try blocks.
1333       if (Style.BraceWrapping.AfterFunction)
1334         addUnwrappedLine();
1335       parseTryCatch();
1336       return;
1337     case tok::identifier: {
1338       if (FormatTok->is(TT_MacroBlockEnd)) {
1339         addUnwrappedLine();
1340         return;
1341       }
1342 
1343       // Function declarations (as opposed to function expressions) are parsed
1344       // on their own unwrapped line by continuing this loop. Function
1345       // expressions (functions that are not on their own line) must not create
1346       // a new unwrapped line, so they are special cased below.
1347       size_t TokenCount = Line->Tokens.size();
1348       if (Style.Language == FormatStyle::LK_JavaScript &&
1349           FormatTok->is(Keywords.kw_function) &&
1350           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1351                                                      Keywords.kw_async)))) {
1352         tryToParseJSFunction();
1353         break;
1354       }
1355       if ((Style.Language == FormatStyle::LK_JavaScript ||
1356            Style.Language == FormatStyle::LK_Java) &&
1357           FormatTok->is(Keywords.kw_interface)) {
1358         if (Style.Language == FormatStyle::LK_JavaScript) {
1359           // In JavaScript/TypeScript, "interface" can be used as a standalone
1360           // identifier, e.g. in `var interface = 1;`. If "interface" is
1361           // followed by another identifier, it is very like to be an actual
1362           // interface declaration.
1363           unsigned StoredPosition = Tokens->getPosition();
1364           FormatToken *Next = Tokens->getNextToken();
1365           FormatTok = Tokens->setPosition(StoredPosition);
1366           if (Next && !mustBeJSIdent(Keywords, Next)) {
1367             nextToken();
1368             break;
1369           }
1370         }
1371         parseRecord();
1372         addUnwrappedLine();
1373         return;
1374       }
1375 
1376       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1377         parseStatementMacro();
1378         return;
1379       }
1380 
1381       // See if the following token should start a new unwrapped line.
1382       StringRef Text = FormatTok->TokenText;
1383       nextToken();
1384 
1385       // JS doesn't have macros, and within classes colons indicate fields, not
1386       // labels.
1387       if (Style.Language == FormatStyle::LK_JavaScript)
1388         break;
1389 
1390       TokenCount = Line->Tokens.size();
1391       if (TokenCount == 1 ||
1392           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1393         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1394           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1395           parseLabel(!Style.IndentGotoLabels);
1396           return;
1397         }
1398         // Recognize function-like macro usages without trailing semicolon as
1399         // well as free-standing macros like Q_OBJECT.
1400         bool FunctionLike = FormatTok->is(tok::l_paren);
1401         if (FunctionLike)
1402           parseParens();
1403 
1404         bool FollowedByNewline =
1405             CommentsBeforeNextToken.empty()
1406                 ? FormatTok->NewlinesBefore > 0
1407                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1408 
1409         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1410             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1411           addUnwrappedLine();
1412           return;
1413         }
1414       }
1415       break;
1416     }
1417     case tok::equal:
1418       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1419       // TT_JsFatArrow. The always start an expression or a child block if
1420       // followed by a curly.
1421       if (FormatTok->is(TT_JsFatArrow)) {
1422         nextToken();
1423         if (FormatTok->is(tok::l_brace))
1424           parseChildBlock();
1425         break;
1426       }
1427 
1428       nextToken();
1429       if (FormatTok->Tok.is(tok::l_brace)) {
1430         nextToken();
1431         parseBracedList();
1432       } else if (Style.Language == FormatStyle::LK_Proto &&
1433                  FormatTok->Tok.is(tok::less)) {
1434         nextToken();
1435         parseBracedList(/*ContinueOnSemicolons=*/false,
1436                         /*ClosingBraceKind=*/tok::greater);
1437       }
1438       break;
1439     case tok::l_square:
1440       parseSquare();
1441       break;
1442     case tok::kw_new:
1443       parseNew();
1444       break;
1445     default:
1446       nextToken();
1447       break;
1448     }
1449   } while (!eof());
1450 }
1451 
1452 bool UnwrappedLineParser::tryToParseLambda() {
1453   if (!Style.isCpp()) {
1454     nextToken();
1455     return false;
1456   }
1457   assert(FormatTok->is(tok::l_square));
1458   FormatToken &LSquare = *FormatTok;
1459   if (!tryToParseLambdaIntroducer())
1460     return false;
1461 
1462   bool SeenArrow = false;
1463 
1464   while (FormatTok->isNot(tok::l_brace)) {
1465     if (FormatTok->isSimpleTypeSpecifier()) {
1466       nextToken();
1467       continue;
1468     }
1469     switch (FormatTok->Tok.getKind()) {
1470     case tok::l_brace:
1471       break;
1472     case tok::l_paren:
1473       parseParens();
1474       break;
1475     case tok::amp:
1476     case tok::star:
1477     case tok::kw_const:
1478     case tok::comma:
1479     case tok::less:
1480     case tok::greater:
1481     case tok::identifier:
1482     case tok::numeric_constant:
1483     case tok::coloncolon:
1484     case tok::kw_class:
1485     case tok::kw_mutable:
1486     case tok::kw_noexcept:
1487     case tok::kw_template:
1488     case tok::kw_typename:
1489       nextToken();
1490       break;
1491     // Specialization of a template with an integer parameter can contain
1492     // arithmetic, logical, comparison and ternary operators.
1493     //
1494     // FIXME: This also accepts sequences of operators that are not in the scope
1495     // of a template argument list.
1496     //
1497     // In a C++ lambda a template type can only occur after an arrow. We use
1498     // this as an heuristic to distinguish between Objective-C expressions
1499     // followed by an `a->b` expression, such as:
1500     // ([obj func:arg] + a->b)
1501     // Otherwise the code below would parse as a lambda.
1502     //
1503     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1504     // explicit template lists: []<bool b = true && false>(U &&u){}
1505     case tok::plus:
1506     case tok::minus:
1507     case tok::exclaim:
1508     case tok::tilde:
1509     case tok::slash:
1510     case tok::percent:
1511     case tok::lessless:
1512     case tok::pipe:
1513     case tok::pipepipe:
1514     case tok::ampamp:
1515     case tok::caret:
1516     case tok::equalequal:
1517     case tok::exclaimequal:
1518     case tok::greaterequal:
1519     case tok::lessequal:
1520     case tok::question:
1521     case tok::colon:
1522     case tok::kw_true:
1523     case tok::kw_false:
1524       if (SeenArrow) {
1525         nextToken();
1526         break;
1527       }
1528       return true;
1529     case tok::arrow:
1530       // This might or might not actually be a lambda arrow (this could be an
1531       // ObjC method invocation followed by a dereferencing arrow). We might
1532       // reset this back to TT_Unknown in TokenAnnotator.
1533       FormatTok->Type = TT_LambdaArrow;
1534       SeenArrow = true;
1535       nextToken();
1536       break;
1537     default:
1538       return true;
1539     }
1540   }
1541   FormatTok->Type = TT_LambdaLBrace;
1542   LSquare.Type = TT_LambdaLSquare;
1543   parseChildBlock();
1544   return true;
1545 }
1546 
1547 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1548   const FormatToken *Previous = FormatTok->Previous;
1549   if (Previous &&
1550       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1551                          tok::kw_delete, tok::l_square) ||
1552        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1553        Previous->isSimpleTypeSpecifier())) {
1554     nextToken();
1555     return false;
1556   }
1557   nextToken();
1558   if (FormatTok->is(tok::l_square)) {
1559     return false;
1560   }
1561   parseSquare(/*LambdaIntroducer=*/true);
1562   return true;
1563 }
1564 
1565 void UnwrappedLineParser::tryToParseJSFunction() {
1566   assert(FormatTok->is(Keywords.kw_function) ||
1567          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1568   if (FormatTok->is(Keywords.kw_async))
1569     nextToken();
1570   // Consume "function".
1571   nextToken();
1572 
1573   // Consume * (generator function). Treat it like C++'s overloaded operators.
1574   if (FormatTok->is(tok::star)) {
1575     FormatTok->Type = TT_OverloadedOperator;
1576     nextToken();
1577   }
1578 
1579   // Consume function name.
1580   if (FormatTok->is(tok::identifier))
1581     nextToken();
1582 
1583   if (FormatTok->isNot(tok::l_paren))
1584     return;
1585 
1586   // Parse formal parameter list.
1587   parseParens();
1588 
1589   if (FormatTok->is(tok::colon)) {
1590     // Parse a type definition.
1591     nextToken();
1592 
1593     // Eat the type declaration. For braced inline object types, balance braces,
1594     // otherwise just parse until finding an l_brace for the function body.
1595     if (FormatTok->is(tok::l_brace))
1596       tryToParseBracedList();
1597     else
1598       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1599         nextToken();
1600   }
1601 
1602   if (FormatTok->is(tok::semi))
1603     return;
1604 
1605   parseChildBlock();
1606 }
1607 
1608 bool UnwrappedLineParser::tryToParseBracedList() {
1609   if (FormatTok->BlockKind == BK_Unknown)
1610     calculateBraceTypes();
1611   assert(FormatTok->BlockKind != BK_Unknown);
1612   if (FormatTok->BlockKind == BK_Block)
1613     return false;
1614   nextToken();
1615   parseBracedList();
1616   return true;
1617 }
1618 
1619 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1620                                           tok::TokenKind ClosingBraceKind) {
1621   bool HasError = false;
1622 
1623   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1624   // replace this by using parseAssigmentExpression() inside.
1625   do {
1626     if (Style.Language == FormatStyle::LK_JavaScript) {
1627       if (FormatTok->is(Keywords.kw_function) ||
1628           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1629         tryToParseJSFunction();
1630         continue;
1631       }
1632       if (FormatTok->is(TT_JsFatArrow)) {
1633         nextToken();
1634         // Fat arrows can be followed by simple expressions or by child blocks
1635         // in curly braces.
1636         if (FormatTok->is(tok::l_brace)) {
1637           parseChildBlock();
1638           continue;
1639         }
1640       }
1641       if (FormatTok->is(tok::l_brace)) {
1642         // Could be a method inside of a braced list `{a() { return 1; }}`.
1643         if (tryToParseBracedList())
1644           continue;
1645         parseChildBlock();
1646       }
1647     }
1648     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1649       nextToken();
1650       return !HasError;
1651     }
1652     switch (FormatTok->Tok.getKind()) {
1653     case tok::caret:
1654       nextToken();
1655       if (FormatTok->is(tok::l_brace)) {
1656         parseChildBlock();
1657       }
1658       break;
1659     case tok::l_square:
1660       tryToParseLambda();
1661       break;
1662     case tok::l_paren:
1663       parseParens();
1664       // JavaScript can just have free standing methods and getters/setters in
1665       // object literals. Detect them by a "{" following ")".
1666       if (Style.Language == FormatStyle::LK_JavaScript) {
1667         if (FormatTok->is(tok::l_brace))
1668           parseChildBlock();
1669         break;
1670       }
1671       break;
1672     case tok::l_brace:
1673       // Assume there are no blocks inside a braced init list apart
1674       // from the ones we explicitly parse out (like lambdas).
1675       FormatTok->BlockKind = BK_BracedInit;
1676       nextToken();
1677       parseBracedList();
1678       break;
1679     case tok::less:
1680       if (Style.Language == FormatStyle::LK_Proto) {
1681         nextToken();
1682         parseBracedList(/*ContinueOnSemicolons=*/false,
1683                         /*ClosingBraceKind=*/tok::greater);
1684       } else {
1685         nextToken();
1686       }
1687       break;
1688     case tok::semi:
1689       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1690       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1691       // used for error recovery if we have otherwise determined that this is
1692       // a braced list.
1693       if (Style.Language == FormatStyle::LK_JavaScript) {
1694         nextToken();
1695         break;
1696       }
1697       HasError = true;
1698       if (!ContinueOnSemicolons)
1699         return !HasError;
1700       nextToken();
1701       break;
1702     case tok::comma:
1703       nextToken();
1704       break;
1705     default:
1706       nextToken();
1707       break;
1708     }
1709   } while (!eof());
1710   return false;
1711 }
1712 
1713 void UnwrappedLineParser::parseParens() {
1714   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1715   nextToken();
1716   do {
1717     switch (FormatTok->Tok.getKind()) {
1718     case tok::l_paren:
1719       parseParens();
1720       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1721         parseChildBlock();
1722       break;
1723     case tok::r_paren:
1724       nextToken();
1725       return;
1726     case tok::r_brace:
1727       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1728       return;
1729     case tok::l_square:
1730       tryToParseLambda();
1731       break;
1732     case tok::l_brace:
1733       if (!tryToParseBracedList())
1734         parseChildBlock();
1735       break;
1736     case tok::at:
1737       nextToken();
1738       if (FormatTok->Tok.is(tok::l_brace)) {
1739         nextToken();
1740         parseBracedList();
1741       }
1742       break;
1743     case tok::kw_class:
1744       if (Style.Language == FormatStyle::LK_JavaScript)
1745         parseRecord(/*ParseAsExpr=*/true);
1746       else
1747         nextToken();
1748       break;
1749     case tok::identifier:
1750       if (Style.Language == FormatStyle::LK_JavaScript &&
1751           (FormatTok->is(Keywords.kw_function) ||
1752            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1753         tryToParseJSFunction();
1754       else
1755         nextToken();
1756       break;
1757     default:
1758       nextToken();
1759       break;
1760     }
1761   } while (!eof());
1762 }
1763 
1764 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1765   if (!LambdaIntroducer) {
1766     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1767     if (tryToParseLambda())
1768       return;
1769   }
1770   do {
1771     switch (FormatTok->Tok.getKind()) {
1772     case tok::l_paren:
1773       parseParens();
1774       break;
1775     case tok::r_square:
1776       nextToken();
1777       return;
1778     case tok::r_brace:
1779       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1780       return;
1781     case tok::l_square:
1782       parseSquare();
1783       break;
1784     case tok::l_brace: {
1785       if (!tryToParseBracedList())
1786         parseChildBlock();
1787       break;
1788     }
1789     case tok::at:
1790       nextToken();
1791       if (FormatTok->Tok.is(tok::l_brace)) {
1792         nextToken();
1793         parseBracedList();
1794       }
1795       break;
1796     default:
1797       nextToken();
1798       break;
1799     }
1800   } while (!eof());
1801 }
1802 
1803 void UnwrappedLineParser::parseIfThenElse() {
1804   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1805   nextToken();
1806   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1807     nextToken();
1808   if (FormatTok->Tok.is(tok::l_paren))
1809     parseParens();
1810   bool NeedsUnwrappedLine = false;
1811   if (FormatTok->Tok.is(tok::l_brace)) {
1812     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1813     parseBlock(/*MustBeDeclaration=*/false);
1814     if (Style.BraceWrapping.BeforeElse)
1815       addUnwrappedLine();
1816     else
1817       NeedsUnwrappedLine = true;
1818   } else {
1819     addUnwrappedLine();
1820     ++Line->Level;
1821     parseStructuralElement();
1822     --Line->Level;
1823   }
1824   if (FormatTok->Tok.is(tok::kw_else)) {
1825     nextToken();
1826     if (FormatTok->Tok.is(tok::l_brace)) {
1827       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1828       parseBlock(/*MustBeDeclaration=*/false);
1829       addUnwrappedLine();
1830     } else if (FormatTok->Tok.is(tok::kw_if)) {
1831       parseIfThenElse();
1832     } else {
1833       addUnwrappedLine();
1834       ++Line->Level;
1835       parseStructuralElement();
1836       if (FormatTok->is(tok::eof))
1837         addUnwrappedLine();
1838       --Line->Level;
1839     }
1840   } else if (NeedsUnwrappedLine) {
1841     addUnwrappedLine();
1842   }
1843 }
1844 
1845 void UnwrappedLineParser::parseTryCatch() {
1846   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1847   nextToken();
1848   bool NeedsUnwrappedLine = false;
1849   if (FormatTok->is(tok::colon)) {
1850     // We are in a function try block, what comes is an initializer list.
1851     nextToken();
1852     while (FormatTok->is(tok::identifier)) {
1853       nextToken();
1854       if (FormatTok->is(tok::l_paren))
1855         parseParens();
1856       if (FormatTok->is(tok::comma))
1857         nextToken();
1858     }
1859   }
1860   // Parse try with resource.
1861   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1862     parseParens();
1863   }
1864   if (FormatTok->is(tok::l_brace)) {
1865     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1866     parseBlock(/*MustBeDeclaration=*/false);
1867     if (Style.BraceWrapping.BeforeCatch) {
1868       addUnwrappedLine();
1869     } else {
1870       NeedsUnwrappedLine = true;
1871     }
1872   } else if (!FormatTok->is(tok::kw_catch)) {
1873     // The C++ standard requires a compound-statement after a try.
1874     // If there's none, we try to assume there's a structuralElement
1875     // and try to continue.
1876     addUnwrappedLine();
1877     ++Line->Level;
1878     parseStructuralElement();
1879     --Line->Level;
1880   }
1881   while (1) {
1882     if (FormatTok->is(tok::at))
1883       nextToken();
1884     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1885                              tok::kw___finally) ||
1886           ((Style.Language == FormatStyle::LK_Java ||
1887             Style.Language == FormatStyle::LK_JavaScript) &&
1888            FormatTok->is(Keywords.kw_finally)) ||
1889           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1890            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1891       break;
1892     nextToken();
1893     while (FormatTok->isNot(tok::l_brace)) {
1894       if (FormatTok->is(tok::l_paren)) {
1895         parseParens();
1896         continue;
1897       }
1898       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1899         return;
1900       nextToken();
1901     }
1902     NeedsUnwrappedLine = false;
1903     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1904     parseBlock(/*MustBeDeclaration=*/false);
1905     if (Style.BraceWrapping.BeforeCatch)
1906       addUnwrappedLine();
1907     else
1908       NeedsUnwrappedLine = true;
1909   }
1910   if (NeedsUnwrappedLine)
1911     addUnwrappedLine();
1912 }
1913 
1914 void UnwrappedLineParser::parseNamespace() {
1915   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
1916          "'namespace' expected");
1917 
1918   const FormatToken &InitialToken = *FormatTok;
1919   nextToken();
1920   if (InitialToken.is(TT_NamespaceMacro)) {
1921     parseParens();
1922   } else {
1923     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
1924                               tok::l_square)) {
1925       if (FormatTok->is(tok::l_square))
1926         parseSquare();
1927       else
1928         nextToken();
1929     }
1930   }
1931   if (FormatTok->Tok.is(tok::l_brace)) {
1932     if (ShouldBreakBeforeBrace(Style, InitialToken))
1933       addUnwrappedLine();
1934 
1935     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1936                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1937                      DeclarationScopeStack.size() > 1);
1938     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1939     // Munch the semicolon after a namespace. This is more common than one would
1940     // think. Puttin the semicolon into its own line is very ugly.
1941     if (FormatTok->Tok.is(tok::semi))
1942       nextToken();
1943     addUnwrappedLine();
1944   }
1945   // FIXME: Add error handling.
1946 }
1947 
1948 void UnwrappedLineParser::parseNew() {
1949   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1950   nextToken();
1951   if (Style.Language != FormatStyle::LK_Java)
1952     return;
1953 
1954   // In Java, we can parse everything up to the parens, which aren't optional.
1955   do {
1956     // There should not be a ;, { or } before the new's open paren.
1957     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1958       return;
1959 
1960     // Consume the parens.
1961     if (FormatTok->is(tok::l_paren)) {
1962       parseParens();
1963 
1964       // If there is a class body of an anonymous class, consume that as child.
1965       if (FormatTok->is(tok::l_brace))
1966         parseChildBlock();
1967       return;
1968     }
1969     nextToken();
1970   } while (!eof());
1971 }
1972 
1973 void UnwrappedLineParser::parseForOrWhileLoop() {
1974   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1975          "'for', 'while' or foreach macro expected");
1976   nextToken();
1977   // JS' for await ( ...
1978   if (Style.Language == FormatStyle::LK_JavaScript &&
1979       FormatTok->is(Keywords.kw_await))
1980     nextToken();
1981   if (FormatTok->Tok.is(tok::l_paren))
1982     parseParens();
1983   if (FormatTok->Tok.is(tok::l_brace)) {
1984     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1985     parseBlock(/*MustBeDeclaration=*/false);
1986     addUnwrappedLine();
1987   } else {
1988     addUnwrappedLine();
1989     ++Line->Level;
1990     parseStructuralElement();
1991     --Line->Level;
1992   }
1993 }
1994 
1995 void UnwrappedLineParser::parseDoWhile() {
1996   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1997   nextToken();
1998   if (FormatTok->Tok.is(tok::l_brace)) {
1999     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2000     parseBlock(/*MustBeDeclaration=*/false);
2001     if (Style.BraceWrapping.IndentBraces)
2002       addUnwrappedLine();
2003   } else {
2004     addUnwrappedLine();
2005     ++Line->Level;
2006     parseStructuralElement();
2007     --Line->Level;
2008   }
2009 
2010   // FIXME: Add error handling.
2011   if (!FormatTok->Tok.is(tok::kw_while)) {
2012     addUnwrappedLine();
2013     return;
2014   }
2015 
2016   nextToken();
2017   parseStructuralElement();
2018 }
2019 
2020 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2021   nextToken();
2022   unsigned OldLineLevel = Line->Level;
2023   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2024     --Line->Level;
2025   if (LeftAlignLabel)
2026     Line->Level = 0;
2027   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2028       FormatTok->Tok.is(tok::l_brace)) {
2029     CompoundStatementIndenter Indenter(this, Line->Level,
2030                                        Style.BraceWrapping.AfterCaseLabel,
2031                                        Style.BraceWrapping.IndentBraces);
2032     parseBlock(/*MustBeDeclaration=*/false);
2033     if (FormatTok->Tok.is(tok::kw_break)) {
2034       if (Style.BraceWrapping.AfterControlStatement ==
2035           FormatStyle::BWACS_Always)
2036         addUnwrappedLine();
2037       parseStructuralElement();
2038     }
2039     addUnwrappedLine();
2040   } else {
2041     if (FormatTok->is(tok::semi))
2042       nextToken();
2043     addUnwrappedLine();
2044   }
2045   Line->Level = OldLineLevel;
2046   if (FormatTok->isNot(tok::l_brace)) {
2047     parseStructuralElement();
2048     addUnwrappedLine();
2049   }
2050 }
2051 
2052 void UnwrappedLineParser::parseCaseLabel() {
2053   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2054   // FIXME: fix handling of complex expressions here.
2055   do {
2056     nextToken();
2057   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2058   parseLabel();
2059 }
2060 
2061 void UnwrappedLineParser::parseSwitch() {
2062   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2063   nextToken();
2064   if (FormatTok->Tok.is(tok::l_paren))
2065     parseParens();
2066   if (FormatTok->Tok.is(tok::l_brace)) {
2067     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2068     parseBlock(/*MustBeDeclaration=*/false);
2069     addUnwrappedLine();
2070   } else {
2071     addUnwrappedLine();
2072     ++Line->Level;
2073     parseStructuralElement();
2074     --Line->Level;
2075   }
2076 }
2077 
2078 void UnwrappedLineParser::parseAccessSpecifier() {
2079   nextToken();
2080   // Understand Qt's slots.
2081   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2082     nextToken();
2083   // Otherwise, we don't know what it is, and we'd better keep the next token.
2084   if (FormatTok->Tok.is(tok::colon))
2085     nextToken();
2086   addUnwrappedLine();
2087 }
2088 
2089 bool UnwrappedLineParser::parseEnum() {
2090   // Won't be 'enum' for NS_ENUMs.
2091   if (FormatTok->Tok.is(tok::kw_enum))
2092     nextToken();
2093 
2094   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2095   // declarations. An "enum" keyword followed by a colon would be a syntax
2096   // error and thus assume it is just an identifier.
2097   if (Style.Language == FormatStyle::LK_JavaScript &&
2098       FormatTok->isOneOf(tok::colon, tok::question))
2099     return false;
2100 
2101   // In protobuf, "enum" can be used as a field name.
2102   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2103     return false;
2104 
2105   // Eat up enum class ...
2106   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2107     nextToken();
2108 
2109   while (FormatTok->Tok.getIdentifierInfo() ||
2110          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2111                             tok::greater, tok::comma, tok::question)) {
2112     nextToken();
2113     // We can have macros or attributes in between 'enum' and the enum name.
2114     if (FormatTok->is(tok::l_paren))
2115       parseParens();
2116     if (FormatTok->is(tok::identifier)) {
2117       nextToken();
2118       // If there are two identifiers in a row, this is likely an elaborate
2119       // return type. In Java, this can be "implements", etc.
2120       if (Style.isCpp() && FormatTok->is(tok::identifier))
2121         return false;
2122     }
2123   }
2124 
2125   // Just a declaration or something is wrong.
2126   if (FormatTok->isNot(tok::l_brace))
2127     return true;
2128   FormatTok->BlockKind = BK_Block;
2129 
2130   if (Style.Language == FormatStyle::LK_Java) {
2131     // Java enums are different.
2132     parseJavaEnumBody();
2133     return true;
2134   }
2135   if (Style.Language == FormatStyle::LK_Proto) {
2136     parseBlock(/*MustBeDeclaration=*/true);
2137     return true;
2138   }
2139 
2140   // Parse enum body.
2141   nextToken();
2142   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2143   if (HasError) {
2144     if (FormatTok->is(tok::semi))
2145       nextToken();
2146     addUnwrappedLine();
2147   }
2148   return true;
2149 
2150   // There is no addUnwrappedLine() here so that we fall through to parsing a
2151   // structural element afterwards. Thus, in "enum A {} n, m;",
2152   // "} n, m;" will end up in one unwrapped line.
2153 }
2154 
2155 void UnwrappedLineParser::parseJavaEnumBody() {
2156   // Determine whether the enum is simple, i.e. does not have a semicolon or
2157   // constants with class bodies. Simple enums can be formatted like braced
2158   // lists, contracted to a single line, etc.
2159   unsigned StoredPosition = Tokens->getPosition();
2160   bool IsSimple = true;
2161   FormatToken *Tok = Tokens->getNextToken();
2162   while (Tok) {
2163     if (Tok->is(tok::r_brace))
2164       break;
2165     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2166       IsSimple = false;
2167       break;
2168     }
2169     // FIXME: This will also mark enums with braces in the arguments to enum
2170     // constants as "not simple". This is probably fine in practice, though.
2171     Tok = Tokens->getNextToken();
2172   }
2173   FormatTok = Tokens->setPosition(StoredPosition);
2174 
2175   if (IsSimple) {
2176     nextToken();
2177     parseBracedList();
2178     addUnwrappedLine();
2179     return;
2180   }
2181 
2182   // Parse the body of a more complex enum.
2183   // First add a line for everything up to the "{".
2184   nextToken();
2185   addUnwrappedLine();
2186   ++Line->Level;
2187 
2188   // Parse the enum constants.
2189   while (FormatTok) {
2190     if (FormatTok->is(tok::l_brace)) {
2191       // Parse the constant's class body.
2192       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2193                  /*MunchSemi=*/false);
2194     } else if (FormatTok->is(tok::l_paren)) {
2195       parseParens();
2196     } else if (FormatTok->is(tok::comma)) {
2197       nextToken();
2198       addUnwrappedLine();
2199     } else if (FormatTok->is(tok::semi)) {
2200       nextToken();
2201       addUnwrappedLine();
2202       break;
2203     } else if (FormatTok->is(tok::r_brace)) {
2204       addUnwrappedLine();
2205       break;
2206     } else {
2207       nextToken();
2208     }
2209   }
2210 
2211   // Parse the class body after the enum's ";" if any.
2212   parseLevel(/*HasOpeningBrace=*/true);
2213   nextToken();
2214   --Line->Level;
2215   addUnwrappedLine();
2216 }
2217 
2218 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2219   const FormatToken &InitialToken = *FormatTok;
2220   nextToken();
2221 
2222   // The actual identifier can be a nested name specifier, and in macros
2223   // it is often token-pasted.
2224   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2225                             tok::kw___attribute, tok::kw___declspec,
2226                             tok::kw_alignas) ||
2227          ((Style.Language == FormatStyle::LK_Java ||
2228            Style.Language == FormatStyle::LK_JavaScript) &&
2229           FormatTok->isOneOf(tok::period, tok::comma))) {
2230     if (Style.Language == FormatStyle::LK_JavaScript &&
2231         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2232       // JavaScript/TypeScript supports inline object types in
2233       // extends/implements positions:
2234       //     class Foo implements {bar: number} { }
2235       nextToken();
2236       if (FormatTok->is(tok::l_brace)) {
2237         tryToParseBracedList();
2238         continue;
2239       }
2240     }
2241     bool IsNonMacroIdentifier =
2242         FormatTok->is(tok::identifier) &&
2243         FormatTok->TokenText != FormatTok->TokenText.upper();
2244     nextToken();
2245     // We can have macros or attributes in between 'class' and the class name.
2246     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2247       parseParens();
2248   }
2249 
2250   // Note that parsing away template declarations here leads to incorrectly
2251   // accepting function declarations as record declarations.
2252   // In general, we cannot solve this problem. Consider:
2253   // class A<int> B() {}
2254   // which can be a function definition or a class definition when B() is a
2255   // macro. If we find enough real-world cases where this is a problem, we
2256   // can parse for the 'template' keyword in the beginning of the statement,
2257   // and thus rule out the record production in case there is no template
2258   // (this would still leave us with an ambiguity between template function
2259   // and class declarations).
2260   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2261     while (!eof()) {
2262       if (FormatTok->is(tok::l_brace)) {
2263         calculateBraceTypes(/*ExpectClassBody=*/true);
2264         if (!tryToParseBracedList())
2265           break;
2266       }
2267       if (FormatTok->Tok.is(tok::semi))
2268         return;
2269       nextToken();
2270     }
2271   }
2272   if (FormatTok->Tok.is(tok::l_brace)) {
2273     if (ParseAsExpr) {
2274       parseChildBlock();
2275     } else {
2276       if (ShouldBreakBeforeBrace(Style, InitialToken))
2277         addUnwrappedLine();
2278 
2279       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2280                  /*MunchSemi=*/false);
2281     }
2282   }
2283   // There is no addUnwrappedLine() here so that we fall through to parsing a
2284   // structural element afterwards. Thus, in "class A {} n, m;",
2285   // "} n, m;" will end up in one unwrapped line.
2286 }
2287 
2288 void UnwrappedLineParser::parseObjCMethod() {
2289   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2290          "'(' or identifier expected.");
2291   do {
2292     if (FormatTok->Tok.is(tok::semi)) {
2293       nextToken();
2294       addUnwrappedLine();
2295       return;
2296     } else if (FormatTok->Tok.is(tok::l_brace)) {
2297       if (Style.BraceWrapping.AfterFunction)
2298         addUnwrappedLine();
2299       parseBlock(/*MustBeDeclaration=*/false);
2300       addUnwrappedLine();
2301       return;
2302     } else {
2303       nextToken();
2304     }
2305   } while (!eof());
2306 }
2307 
2308 void UnwrappedLineParser::parseObjCProtocolList() {
2309   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2310   do {
2311     nextToken();
2312     // Early exit in case someone forgot a close angle.
2313     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2314         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2315       return;
2316   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2317   nextToken(); // Skip '>'.
2318 }
2319 
2320 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2321   do {
2322     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2323       nextToken();
2324       addUnwrappedLine();
2325       break;
2326     }
2327     if (FormatTok->is(tok::l_brace)) {
2328       parseBlock(/*MustBeDeclaration=*/false);
2329       // In ObjC interfaces, nothing should be following the "}".
2330       addUnwrappedLine();
2331     } else if (FormatTok->is(tok::r_brace)) {
2332       // Ignore stray "}". parseStructuralElement doesn't consume them.
2333       nextToken();
2334       addUnwrappedLine();
2335     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2336       nextToken();
2337       parseObjCMethod();
2338     } else {
2339       parseStructuralElement();
2340     }
2341   } while (!eof());
2342 }
2343 
2344 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2345   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2346          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2347   nextToken();
2348   nextToken(); // interface name
2349 
2350   // @interface can be followed by a lightweight generic
2351   // specialization list, then either a base class or a category.
2352   if (FormatTok->Tok.is(tok::less)) {
2353     // Unlike protocol lists, generic parameterizations support
2354     // nested angles:
2355     //
2356     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2357     //     NSObject <NSCopying, NSSecureCoding>
2358     //
2359     // so we need to count how many open angles we have left.
2360     unsigned NumOpenAngles = 1;
2361     do {
2362       nextToken();
2363       // Early exit in case someone forgot a close angle.
2364       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2365           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2366         break;
2367       if (FormatTok->Tok.is(tok::less))
2368         ++NumOpenAngles;
2369       else if (FormatTok->Tok.is(tok::greater)) {
2370         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2371         --NumOpenAngles;
2372       }
2373     } while (!eof() && NumOpenAngles != 0);
2374     nextToken(); // Skip '>'.
2375   }
2376   if (FormatTok->Tok.is(tok::colon)) {
2377     nextToken();
2378     nextToken(); // base class name
2379   } else if (FormatTok->Tok.is(tok::l_paren))
2380     // Skip category, if present.
2381     parseParens();
2382 
2383   if (FormatTok->Tok.is(tok::less))
2384     parseObjCProtocolList();
2385 
2386   if (FormatTok->Tok.is(tok::l_brace)) {
2387     if (Style.BraceWrapping.AfterObjCDeclaration)
2388       addUnwrappedLine();
2389     parseBlock(/*MustBeDeclaration=*/true);
2390   }
2391 
2392   // With instance variables, this puts '}' on its own line.  Without instance
2393   // variables, this ends the @interface line.
2394   addUnwrappedLine();
2395 
2396   parseObjCUntilAtEnd();
2397 }
2398 
2399 // Returns true for the declaration/definition form of @protocol,
2400 // false for the expression form.
2401 bool UnwrappedLineParser::parseObjCProtocol() {
2402   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2403   nextToken();
2404 
2405   if (FormatTok->is(tok::l_paren))
2406     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2407     return false;
2408 
2409   // The definition/declaration form,
2410   // @protocol Foo
2411   // - (int)someMethod;
2412   // @end
2413 
2414   nextToken(); // protocol name
2415 
2416   if (FormatTok->Tok.is(tok::less))
2417     parseObjCProtocolList();
2418 
2419   // Check for protocol declaration.
2420   if (FormatTok->Tok.is(tok::semi)) {
2421     nextToken();
2422     addUnwrappedLine();
2423     return true;
2424   }
2425 
2426   addUnwrappedLine();
2427   parseObjCUntilAtEnd();
2428   return true;
2429 }
2430 
2431 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2432   bool IsImport = FormatTok->is(Keywords.kw_import);
2433   assert(IsImport || FormatTok->is(tok::kw_export));
2434   nextToken();
2435 
2436   // Consume the "default" in "export default class/function".
2437   if (FormatTok->is(tok::kw_default))
2438     nextToken();
2439 
2440   // Consume "async function", "function" and "default function", so that these
2441   // get parsed as free-standing JS functions, i.e. do not require a trailing
2442   // semicolon.
2443   if (FormatTok->is(Keywords.kw_async))
2444     nextToken();
2445   if (FormatTok->is(Keywords.kw_function)) {
2446     nextToken();
2447     return;
2448   }
2449 
2450   // For imports, `export *`, `export {...}`, consume the rest of the line up
2451   // to the terminating `;`. For everything else, just return and continue
2452   // parsing the structural element, i.e. the declaration or expression for
2453   // `export default`.
2454   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2455       !FormatTok->isStringLiteral())
2456     return;
2457 
2458   while (!eof()) {
2459     if (FormatTok->is(tok::semi))
2460       return;
2461     if (Line->Tokens.empty()) {
2462       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2463       // import statement should terminate.
2464       return;
2465     }
2466     if (FormatTok->is(tok::l_brace)) {
2467       FormatTok->BlockKind = BK_Block;
2468       nextToken();
2469       parseBracedList();
2470     } else {
2471       nextToken();
2472     }
2473   }
2474 }
2475 
2476 void UnwrappedLineParser::parseStatementMacro() {
2477   nextToken();
2478   if (FormatTok->is(tok::l_paren))
2479     parseParens();
2480   if (FormatTok->is(tok::semi))
2481     nextToken();
2482   addUnwrappedLine();
2483 }
2484 
2485 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2486                                                  StringRef Prefix = "") {
2487   llvm::dbgs() << Prefix << "Line(" << Line.Level
2488                << ", FSC=" << Line.FirstStartColumn << ")"
2489                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2490   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2491                                                     E = Line.Tokens.end();
2492        I != E; ++I) {
2493     llvm::dbgs() << I->Tok->Tok.getName() << "["
2494                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2495                  << "] ";
2496   }
2497   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2498                                                     E = Line.Tokens.end();
2499        I != E; ++I) {
2500     const UnwrappedLineNode &Node = *I;
2501     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2502              I = Node.Children.begin(),
2503              E = Node.Children.end();
2504          I != E; ++I) {
2505       printDebugInfo(*I, "\nChild: ");
2506     }
2507   }
2508   llvm::dbgs() << "\n";
2509 }
2510 
2511 void UnwrappedLineParser::addUnwrappedLine() {
2512   if (Line->Tokens.empty())
2513     return;
2514   LLVM_DEBUG({
2515     if (CurrentLines == &Lines)
2516       printDebugInfo(*Line);
2517   });
2518   CurrentLines->push_back(std::move(*Line));
2519   Line->Tokens.clear();
2520   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2521   Line->FirstStartColumn = 0;
2522   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2523     CurrentLines->append(
2524         std::make_move_iterator(PreprocessorDirectives.begin()),
2525         std::make_move_iterator(PreprocessorDirectives.end()));
2526     PreprocessorDirectives.clear();
2527   }
2528   // Disconnect the current token from the last token on the previous line.
2529   FormatTok->Previous = nullptr;
2530 }
2531 
2532 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2533 
2534 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2535   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2536          FormatTok.NewlinesBefore > 0;
2537 }
2538 
2539 // Checks if \p FormatTok is a line comment that continues the line comment
2540 // section on \p Line.
2541 static bool
2542 continuesLineCommentSection(const FormatToken &FormatTok,
2543                             const UnwrappedLine &Line,
2544                             const llvm::Regex &CommentPragmasRegex) {
2545   if (Line.Tokens.empty())
2546     return false;
2547 
2548   StringRef IndentContent = FormatTok.TokenText;
2549   if (FormatTok.TokenText.startswith("//") ||
2550       FormatTok.TokenText.startswith("/*"))
2551     IndentContent = FormatTok.TokenText.substr(2);
2552   if (CommentPragmasRegex.match(IndentContent))
2553     return false;
2554 
2555   // If Line starts with a line comment, then FormatTok continues the comment
2556   // section if its original column is greater or equal to the original start
2557   // column of the line.
2558   //
2559   // Define the min column token of a line as follows: if a line ends in '{' or
2560   // contains a '{' followed by a line comment, then the min column token is
2561   // that '{'. Otherwise, the min column token of the line is the first token of
2562   // the line.
2563   //
2564   // If Line starts with a token other than a line comment, then FormatTok
2565   // continues the comment section if its original column is greater than the
2566   // original start column of the min column token of the line.
2567   //
2568   // For example, the second line comment continues the first in these cases:
2569   //
2570   // // first line
2571   // // second line
2572   //
2573   // and:
2574   //
2575   // // first line
2576   //  // second line
2577   //
2578   // and:
2579   //
2580   // int i; // first line
2581   //  // second line
2582   //
2583   // and:
2584   //
2585   // do { // first line
2586   //      // second line
2587   //   int i;
2588   // } while (true);
2589   //
2590   // and:
2591   //
2592   // enum {
2593   //   a, // first line
2594   //    // second line
2595   //   b
2596   // };
2597   //
2598   // The second line comment doesn't continue the first in these cases:
2599   //
2600   //   // first line
2601   //  // second line
2602   //
2603   // and:
2604   //
2605   // int i; // first line
2606   // // second line
2607   //
2608   // and:
2609   //
2610   // do { // first line
2611   //   // second line
2612   //   int i;
2613   // } while (true);
2614   //
2615   // and:
2616   //
2617   // enum {
2618   //   a, // first line
2619   //   // second line
2620   // };
2621   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2622 
2623   // Scan for '{//'. If found, use the column of '{' as a min column for line
2624   // comment section continuation.
2625   const FormatToken *PreviousToken = nullptr;
2626   for (const UnwrappedLineNode &Node : Line.Tokens) {
2627     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2628         isLineComment(*Node.Tok)) {
2629       MinColumnToken = PreviousToken;
2630       break;
2631     }
2632     PreviousToken = Node.Tok;
2633 
2634     // Grab the last newline preceding a token in this unwrapped line.
2635     if (Node.Tok->NewlinesBefore > 0) {
2636       MinColumnToken = Node.Tok;
2637     }
2638   }
2639   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2640     MinColumnToken = PreviousToken;
2641   }
2642 
2643   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2644                               MinColumnToken);
2645 }
2646 
2647 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2648   bool JustComments = Line->Tokens.empty();
2649   for (SmallVectorImpl<FormatToken *>::const_iterator
2650            I = CommentsBeforeNextToken.begin(),
2651            E = CommentsBeforeNextToken.end();
2652        I != E; ++I) {
2653     // Line comments that belong to the same line comment section are put on the
2654     // same line since later we might want to reflow content between them.
2655     // Additional fine-grained breaking of line comment sections is controlled
2656     // by the class BreakableLineCommentSection in case it is desirable to keep
2657     // several line comment sections in the same unwrapped line.
2658     //
2659     // FIXME: Consider putting separate line comment sections as children to the
2660     // unwrapped line instead.
2661     (*I)->ContinuesLineCommentSection =
2662         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2663     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2664       addUnwrappedLine();
2665     pushToken(*I);
2666   }
2667   if (NewlineBeforeNext && JustComments)
2668     addUnwrappedLine();
2669   CommentsBeforeNextToken.clear();
2670 }
2671 
2672 void UnwrappedLineParser::nextToken(int LevelDifference) {
2673   if (eof())
2674     return;
2675   flushComments(isOnNewLine(*FormatTok));
2676   pushToken(FormatTok);
2677   FormatToken *Previous = FormatTok;
2678   if (Style.Language != FormatStyle::LK_JavaScript)
2679     readToken(LevelDifference);
2680   else
2681     readTokenWithJavaScriptASI();
2682   FormatTok->Previous = Previous;
2683 }
2684 
2685 void UnwrappedLineParser::distributeComments(
2686     const SmallVectorImpl<FormatToken *> &Comments,
2687     const FormatToken *NextTok) {
2688   // Whether or not a line comment token continues a line is controlled by
2689   // the method continuesLineCommentSection, with the following caveat:
2690   //
2691   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2692   // that each comment line from the trail is aligned with the next token, if
2693   // the next token exists. If a trail exists, the beginning of the maximal
2694   // trail is marked as a start of a new comment section.
2695   //
2696   // For example in this code:
2697   //
2698   // int a; // line about a
2699   //   // line 1 about b
2700   //   // line 2 about b
2701   //   int b;
2702   //
2703   // the two lines about b form a maximal trail, so there are two sections, the
2704   // first one consisting of the single comment "// line about a" and the
2705   // second one consisting of the next two comments.
2706   if (Comments.empty())
2707     return;
2708   bool ShouldPushCommentsInCurrentLine = true;
2709   bool HasTrailAlignedWithNextToken = false;
2710   unsigned StartOfTrailAlignedWithNextToken = 0;
2711   if (NextTok) {
2712     // We are skipping the first element intentionally.
2713     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2714       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2715         HasTrailAlignedWithNextToken = true;
2716         StartOfTrailAlignedWithNextToken = i;
2717       }
2718     }
2719   }
2720   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2721     FormatToken *FormatTok = Comments[i];
2722     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2723       FormatTok->ContinuesLineCommentSection = false;
2724     } else {
2725       FormatTok->ContinuesLineCommentSection =
2726           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2727     }
2728     if (!FormatTok->ContinuesLineCommentSection &&
2729         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2730       ShouldPushCommentsInCurrentLine = false;
2731     }
2732     if (ShouldPushCommentsInCurrentLine) {
2733       pushToken(FormatTok);
2734     } else {
2735       CommentsBeforeNextToken.push_back(FormatTok);
2736     }
2737   }
2738 }
2739 
2740 void UnwrappedLineParser::readToken(int LevelDifference) {
2741   SmallVector<FormatToken *, 1> Comments;
2742   do {
2743     FormatTok = Tokens->getNextToken();
2744     assert(FormatTok);
2745     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2746            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2747       distributeComments(Comments, FormatTok);
2748       Comments.clear();
2749       // If there is an unfinished unwrapped line, we flush the preprocessor
2750       // directives only after that unwrapped line was finished later.
2751       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2752       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2753       assert((LevelDifference >= 0 ||
2754               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2755              "LevelDifference makes Line->Level negative");
2756       Line->Level += LevelDifference;
2757       // Comments stored before the preprocessor directive need to be output
2758       // before the preprocessor directive, at the same level as the
2759       // preprocessor directive, as we consider them to apply to the directive.
2760       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2761           PPBranchLevel > 0)
2762         Line->Level += PPBranchLevel;
2763       flushComments(isOnNewLine(*FormatTok));
2764       parsePPDirective();
2765     }
2766     while (FormatTok->Type == TT_ConflictStart ||
2767            FormatTok->Type == TT_ConflictEnd ||
2768            FormatTok->Type == TT_ConflictAlternative) {
2769       if (FormatTok->Type == TT_ConflictStart) {
2770         conditionalCompilationStart(/*Unreachable=*/false);
2771       } else if (FormatTok->Type == TT_ConflictAlternative) {
2772         conditionalCompilationAlternative();
2773       } else if (FormatTok->Type == TT_ConflictEnd) {
2774         conditionalCompilationEnd();
2775       }
2776       FormatTok = Tokens->getNextToken();
2777       FormatTok->MustBreakBefore = true;
2778     }
2779 
2780     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2781         !Line->InPPDirective) {
2782       continue;
2783     }
2784 
2785     if (!FormatTok->Tok.is(tok::comment)) {
2786       distributeComments(Comments, FormatTok);
2787       Comments.clear();
2788       return;
2789     }
2790 
2791     Comments.push_back(FormatTok);
2792   } while (!eof());
2793 
2794   distributeComments(Comments, nullptr);
2795   Comments.clear();
2796 }
2797 
2798 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2799   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2800   if (MustBreakBeforeNextToken) {
2801     Line->Tokens.back().Tok->MustBreakBefore = true;
2802     MustBreakBeforeNextToken = false;
2803   }
2804 }
2805 
2806 } // end namespace format
2807 } // end namespace clang
2808