1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/Support/Debug.h"
18 #include "llvm/Support/raw_ostream.h"
19 
20 #include <algorithm>
21 
22 #define DEBUG_TYPE "format-parser"
23 
24 namespace clang {
25 namespace format {
26 
27 class FormatTokenSource {
28 public:
29   virtual ~FormatTokenSource() {}
30   virtual FormatToken *getNextToken() = 0;
31 
32   virtual unsigned getPosition() = 0;
33   virtual FormatToken *setPosition(unsigned Position) = 0;
34 };
35 
36 namespace {
37 
38 class ScopedDeclarationState {
39 public:
40   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
41                          bool MustBeDeclaration)
42       : Line(Line), Stack(Stack) {
43     Line.MustBeDeclaration = MustBeDeclaration;
44     Stack.push_back(MustBeDeclaration);
45   }
46   ~ScopedDeclarationState() {
47     Stack.pop_back();
48     if (!Stack.empty())
49       Line.MustBeDeclaration = Stack.back();
50     else
51       Line.MustBeDeclaration = true;
52   }
53 
54 private:
55   UnwrappedLine &Line;
56   std::vector<bool> &Stack;
57 };
58 
59 static bool isLineComment(const FormatToken &FormatTok) {
60   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     FakeEOF.Tok.startToken();
86     FakeEOF.Tok.setKind(tok::eof);
87     TokenSource = this;
88     Line.Level = 0;
89     Line.InPPDirective = true;
90   }
91 
92   ~ScopedMacroState() override {
93     TokenSource = PreviousTokenSource;
94     ResetToken = Token;
95     Line.InPPDirective = false;
96     Line.Level = PreviousLineLevel;
97   }
98 
99   FormatToken *getNextToken() override {
100     // The \c UnwrappedLineParser guards against this by never calling
101     // \c getNextToken() after it has encountered the first eof token.
102     assert(!eof());
103     PreviousToken = Token;
104     Token = PreviousTokenSource->getNextToken();
105     if (eof())
106       return &FakeEOF;
107     return Token;
108   }
109 
110   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
111 
112   FormatToken *setPosition(unsigned Position) override {
113     PreviousToken = nullptr;
114     Token = PreviousTokenSource->setPosition(Position);
115     return Token;
116   }
117 
118 private:
119   bool eof() {
120     return Token && Token->HasUnescapedNewline &&
121            !continuesLineComment(*Token, PreviousToken,
122                                  /*MinColumnToken=*/PreviousToken);
123   }
124 
125   FormatToken FakeEOF;
126   UnwrappedLine &Line;
127   FormatTokenSource *&TokenSource;
128   FormatToken *&ResetToken;
129   unsigned PreviousLineLevel;
130   FormatTokenSource *PreviousTokenSource;
131 
132   FormatToken *Token;
133   FormatToken *PreviousToken;
134 };
135 
136 } // end anonymous namespace
137 
138 class ScopedLineState {
139 public:
140   ScopedLineState(UnwrappedLineParser &Parser,
141                   bool SwitchToPreprocessorLines = false)
142       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
143     if (SwitchToPreprocessorLines)
144       Parser.CurrentLines = &Parser.PreprocessorDirectives;
145     else if (!Parser.Line->Tokens.empty())
146       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
147     PreBlockLine = std::move(Parser.Line);
148     Parser.Line = std::make_unique<UnwrappedLine>();
149     Parser.Line->Level = PreBlockLine->Level;
150     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
151   }
152 
153   ~ScopedLineState() {
154     if (!Parser.Line->Tokens.empty()) {
155       Parser.addUnwrappedLine();
156     }
157     assert(Parser.Line->Tokens.empty());
158     Parser.Line = std::move(PreBlockLine);
159     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
160       Parser.MustBreakBeforeNextToken = true;
161     Parser.CurrentLines = OriginalLines;
162   }
163 
164 private:
165   UnwrappedLineParser &Parser;
166 
167   std::unique_ptr<UnwrappedLine> PreBlockLine;
168   SmallVectorImpl<UnwrappedLine> *OriginalLines;
169 };
170 
171 class CompoundStatementIndenter {
172 public:
173   CompoundStatementIndenter(UnwrappedLineParser *Parser,
174                             const FormatStyle &Style, unsigned &LineLevel)
175       : CompoundStatementIndenter(Parser, LineLevel,
176                                   Style.BraceWrapping.AfterControlStatement,
177                                   Style.BraceWrapping.IndentBraces) {}
178   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
179                             bool WrapBrace, bool IndentBrace)
180       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
181     if (WrapBrace)
182       Parser->addUnwrappedLine();
183     if (IndentBrace)
184       ++LineLevel;
185   }
186   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
187 
188 private:
189   unsigned &LineLevel;
190   unsigned OldLineLevel;
191 };
192 
193 namespace {
194 
195 class IndexedTokenSource : public FormatTokenSource {
196 public:
197   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
198       : Tokens(Tokens), Position(-1) {}
199 
200   FormatToken *getNextToken() override {
201     ++Position;
202     return Tokens[Position];
203   }
204 
205   unsigned getPosition() override {
206     assert(Position >= 0);
207     return Position;
208   }
209 
210   FormatToken *setPosition(unsigned P) override {
211     Position = P;
212     return Tokens[Position];
213   }
214 
215   void reset() { Position = -1; }
216 
217 private:
218   ArrayRef<FormatToken *> Tokens;
219   int Position;
220 };
221 
222 } // end anonymous namespace
223 
224 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
225                                          const AdditionalKeywords &Keywords,
226                                          unsigned FirstStartColumn,
227                                          ArrayRef<FormatToken *> Tokens,
228                                          UnwrappedLineConsumer &Callback)
229     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
230       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
231       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
232       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
233       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
234                        ? IG_Rejected
235                        : IG_Inited),
236       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
241                      ? IG_Rejected
242                      : IG_Inited;
243   IncludeGuardToken = nullptr;
244   Line.reset(new UnwrappedLine);
245   CommentsBeforeNextToken.clear();
246   FormatTok = nullptr;
247   MustBreakBeforeNextToken = false;
248   PreprocessorDirectives.clear();
249   CurrentLines = &Lines;
250   DeclarationScopeStack.clear();
251   PPStack.clear();
252   Line->FirstStartColumn = FirstStartColumn;
253 }
254 
255 void UnwrappedLineParser::parse() {
256   IndexedTokenSource TokenSource(AllTokens);
257   Line->FirstStartColumn = FirstStartColumn;
258   do {
259     LLVM_DEBUG(llvm::dbgs() << "----\n");
260     reset();
261     Tokens = &TokenSource;
262     TokenSource.reset();
263 
264     readToken();
265     parseFile();
266 
267     // If we found an include guard then all preprocessor directives (other than
268     // the guard) are over-indented by one.
269     if (IncludeGuard == IG_Found)
270       for (auto &Line : Lines)
271         if (Line.InPPDirective && Line.Level > 0)
272           --Line.Level;
273 
274     // Create line with eof token.
275     pushToken(FormatTok);
276     addUnwrappedLine();
277 
278     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
279                                                   E = Lines.end();
280          I != E; ++I) {
281       Callback.consumeUnwrappedLine(*I);
282     }
283     Callback.finishRun();
284     Lines.clear();
285     while (!PPLevelBranchIndex.empty() &&
286            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
287       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
288       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
289     }
290     if (!PPLevelBranchIndex.empty()) {
291       ++PPLevelBranchIndex.back();
292       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
293       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
294     }
295   } while (!PPLevelBranchIndex.empty());
296 }
297 
298 void UnwrappedLineParser::parseFile() {
299   // The top-level context in a file always has declarations, except for pre-
300   // processor directives and JavaScript files.
301   bool MustBeDeclaration =
302       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
303   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
304                                           MustBeDeclaration);
305   if (Style.Language == FormatStyle::LK_TextProto)
306     parseBracedList();
307   else
308     parseLevel(/*HasOpeningBrace=*/false);
309   // Make sure to format the remaining tokens.
310   //
311   // LK_TextProto is special since its top-level is parsed as the body of a
312   // braced list, which does not necessarily have natural line separators such
313   // as a semicolon. Comments after the last entry that have been determined to
314   // not belong to that line, as in:
315   //   key: value
316   //   // endfile comment
317   // do not have a chance to be put on a line of their own until this point.
318   // Here we add this newline before end-of-file comments.
319   if (Style.Language == FormatStyle::LK_TextProto &&
320       !CommentsBeforeNextToken.empty())
321     addUnwrappedLine();
322   flushComments(true);
323   addUnwrappedLine();
324 }
325 
326 void UnwrappedLineParser::parseCSharpAttribute() {
327   do {
328     switch (FormatTok->Tok.getKind()) {
329     case tok::r_square:
330       nextToken();
331       addUnwrappedLine();
332       return;
333     default:
334       nextToken();
335       break;
336     }
337   } while (!eof());
338 }
339 
340 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
341   bool SwitchLabelEncountered = false;
342   do {
343     tok::TokenKind kind = FormatTok->Tok.getKind();
344     if (FormatTok->Type == TT_MacroBlockBegin) {
345       kind = tok::l_brace;
346     } else if (FormatTok->Type == TT_MacroBlockEnd) {
347       kind = tok::r_brace;
348     }
349 
350     switch (kind) {
351     case tok::comment:
352       nextToken();
353       addUnwrappedLine();
354       break;
355     case tok::l_brace:
356       // FIXME: Add parameter whether this can happen - if this happens, we must
357       // be in a non-declaration context.
358       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
359         continue;
360       parseBlock(/*MustBeDeclaration=*/false);
361       addUnwrappedLine();
362       break;
363     case tok::r_brace:
364       if (HasOpeningBrace)
365         return;
366       nextToken();
367       addUnwrappedLine();
368       break;
369     case tok::kw_default: {
370       unsigned StoredPosition = Tokens->getPosition();
371       FormatToken *Next;
372       do {
373         Next = Tokens->getNextToken();
374       } while (Next && Next->is(tok::comment));
375       FormatTok = Tokens->setPosition(StoredPosition);
376       if (Next && Next->isNot(tok::colon)) {
377         // default not followed by ':' is not a case label; treat it like
378         // an identifier.
379         parseStructuralElement();
380         break;
381       }
382       // Else, if it is 'default:', fall through to the case handling.
383       LLVM_FALLTHROUGH;
384     }
385     case tok::kw_case:
386       if (Style.Language == FormatStyle::LK_JavaScript &&
387           Line->MustBeDeclaration) {
388         // A 'case: string' style field declaration.
389         parseStructuralElement();
390         break;
391       }
392       if (!SwitchLabelEncountered &&
393           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
394         ++Line->Level;
395       SwitchLabelEncountered = true;
396       parseStructuralElement();
397       break;
398     case tok::l_square:
399       if (Style.isCSharp()) {
400         nextToken();
401         parseCSharpAttribute();
402         break;
403       }
404       LLVM_FALLTHROUGH;
405     default:
406       parseStructuralElement();
407       break;
408     }
409   } while (!eof());
410 }
411 
412 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
413   // We'll parse forward through the tokens until we hit
414   // a closing brace or eof - note that getNextToken() will
415   // parse macros, so this will magically work inside macro
416   // definitions, too.
417   unsigned StoredPosition = Tokens->getPosition();
418   FormatToken *Tok = FormatTok;
419   const FormatToken *PrevTok = Tok->Previous;
420   // Keep a stack of positions of lbrace tokens. We will
421   // update information about whether an lbrace starts a
422   // braced init list or a different block during the loop.
423   SmallVector<FormatToken *, 8> LBraceStack;
424   assert(Tok->Tok.is(tok::l_brace));
425   do {
426     // Get next non-comment token.
427     FormatToken *NextTok;
428     unsigned ReadTokens = 0;
429     do {
430       NextTok = Tokens->getNextToken();
431       ++ReadTokens;
432     } while (NextTok->is(tok::comment));
433 
434     switch (Tok->Tok.getKind()) {
435     case tok::l_brace:
436       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
437         if (PrevTok->isOneOf(tok::colon, tok::less))
438           // A ':' indicates this code is in a type, or a braced list
439           // following a label in an object literal ({a: {b: 1}}).
440           // A '<' could be an object used in a comparison, but that is nonsense
441           // code (can never return true), so more likely it is a generic type
442           // argument (`X<{a: string; b: number}>`).
443           // The code below could be confused by semicolons between the
444           // individual members in a type member list, which would normally
445           // trigger BK_Block. In both cases, this must be parsed as an inline
446           // braced init.
447           Tok->BlockKind = BK_BracedInit;
448         else if (PrevTok->is(tok::r_paren))
449           // `) { }` can only occur in function or method declarations in JS.
450           Tok->BlockKind = BK_Block;
451       } else {
452         Tok->BlockKind = BK_Unknown;
453       }
454       LBraceStack.push_back(Tok);
455       break;
456     case tok::r_brace:
457       if (LBraceStack.empty())
458         break;
459       if (LBraceStack.back()->BlockKind == BK_Unknown) {
460         bool ProbablyBracedList = false;
461         if (Style.Language == FormatStyle::LK_Proto) {
462           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
463         } else {
464           // Using OriginalColumn to distinguish between ObjC methods and
465           // binary operators is a bit hacky.
466           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
467                                   NextTok->OriginalColumn == 0;
468 
469           // If there is a comma, semicolon or right paren after the closing
470           // brace, we assume this is a braced initializer list.  Note that
471           // regardless how we mark inner braces here, we will overwrite the
472           // BlockKind later if we parse a braced list (where all blocks
473           // inside are by default braced lists), or when we explicitly detect
474           // blocks (for example while parsing lambdas).
475           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
476           // braced list in JS.
477           ProbablyBracedList =
478               (Style.Language == FormatStyle::LK_JavaScript &&
479                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
480                                 Keywords.kw_as)) ||
481               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
482               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
483                                tok::r_paren, tok::r_square, tok::l_brace,
484                                tok::ellipsis) ||
485               (NextTok->is(tok::identifier) &&
486                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
487               (NextTok->is(tok::semi) &&
488                (!ExpectClassBody || LBraceStack.size() != 1)) ||
489               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
490           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
491             // We can have an array subscript after a braced init
492             // list, but C++11 attributes are expected after blocks.
493             NextTok = Tokens->getNextToken();
494             ++ReadTokens;
495             ProbablyBracedList = NextTok->isNot(tok::l_square);
496           }
497         }
498         if (ProbablyBracedList) {
499           Tok->BlockKind = BK_BracedInit;
500           LBraceStack.back()->BlockKind = BK_BracedInit;
501         } else {
502           Tok->BlockKind = BK_Block;
503           LBraceStack.back()->BlockKind = BK_Block;
504         }
505       }
506       LBraceStack.pop_back();
507       break;
508     case tok::identifier:
509       if (!Tok->is(TT_StatementMacro))
510         break;
511       LLVM_FALLTHROUGH;
512     case tok::at:
513     case tok::semi:
514     case tok::kw_if:
515     case tok::kw_while:
516     case tok::kw_for:
517     case tok::kw_switch:
518     case tok::kw_try:
519     case tok::kw___try:
520       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
521         LBraceStack.back()->BlockKind = BK_Block;
522       break;
523     default:
524       break;
525     }
526     PrevTok = Tok;
527     Tok = NextTok;
528   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
529 
530   // Assume other blocks for all unclosed opening braces.
531   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
532     if (LBraceStack[i]->BlockKind == BK_Unknown)
533       LBraceStack[i]->BlockKind = BK_Block;
534   }
535 
536   FormatTok = Tokens->setPosition(StoredPosition);
537 }
538 
539 template <class T>
540 static inline void hash_combine(std::size_t &seed, const T &v) {
541   std::hash<T> hasher;
542   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
543 }
544 
545 size_t UnwrappedLineParser::computePPHash() const {
546   size_t h = 0;
547   for (const auto &i : PPStack) {
548     hash_combine(h, size_t(i.Kind));
549     hash_combine(h, i.Line);
550   }
551   return h;
552 }
553 
554 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
555                                      bool MunchSemi) {
556   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
557          "'{' or macro block token expected");
558   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
559   FormatTok->BlockKind = BK_Block;
560 
561   size_t PPStartHash = computePPHash();
562 
563   unsigned InitialLevel = Line->Level;
564   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
565 
566   if (MacroBlock && FormatTok->is(tok::l_paren))
567     parseParens();
568 
569   size_t NbPreprocessorDirectives =
570       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
571   addUnwrappedLine();
572   size_t OpeningLineIndex =
573       CurrentLines->empty()
574           ? (UnwrappedLine::kInvalidIndex)
575           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
576 
577   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
578                                           MustBeDeclaration);
579   if (AddLevel)
580     ++Line->Level;
581   parseLevel(/*HasOpeningBrace=*/true);
582 
583   if (eof())
584     return;
585 
586   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
587                  : !FormatTok->is(tok::r_brace)) {
588     Line->Level = InitialLevel;
589     FormatTok->BlockKind = BK_Block;
590     return;
591   }
592 
593   size_t PPEndHash = computePPHash();
594 
595   // Munch the closing brace.
596   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
597 
598   if (MacroBlock && FormatTok->is(tok::l_paren))
599     parseParens();
600 
601   if (MunchSemi && FormatTok->Tok.is(tok::semi))
602     nextToken();
603   Line->Level = InitialLevel;
604 
605   if (PPStartHash == PPEndHash) {
606     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
607     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
608       // Update the opening line to add the forward reference as well
609       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
610           CurrentLines->size() - 1;
611     }
612   }
613 }
614 
615 static bool isGoogScope(const UnwrappedLine &Line) {
616   // FIXME: Closure-library specific stuff should not be hard-coded but be
617   // configurable.
618   if (Line.Tokens.size() < 4)
619     return false;
620   auto I = Line.Tokens.begin();
621   if (I->Tok->TokenText != "goog")
622     return false;
623   ++I;
624   if (I->Tok->isNot(tok::period))
625     return false;
626   ++I;
627   if (I->Tok->TokenText != "scope")
628     return false;
629   ++I;
630   return I->Tok->is(tok::l_paren);
631 }
632 
633 static bool isIIFE(const UnwrappedLine &Line,
634                    const AdditionalKeywords &Keywords) {
635   // Look for the start of an immediately invoked anonymous function.
636   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
637   // This is commonly done in JavaScript to create a new, anonymous scope.
638   // Example: (function() { ... })()
639   if (Line.Tokens.size() < 3)
640     return false;
641   auto I = Line.Tokens.begin();
642   if (I->Tok->isNot(tok::l_paren))
643     return false;
644   ++I;
645   if (I->Tok->isNot(Keywords.kw_function))
646     return false;
647   ++I;
648   return I->Tok->is(tok::l_paren);
649 }
650 
651 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
652                                    const FormatToken &InitialToken) {
653   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
654     return Style.BraceWrapping.AfterNamespace;
655   if (InitialToken.is(tok::kw_class))
656     return Style.BraceWrapping.AfterClass;
657   if (InitialToken.is(tok::kw_union))
658     return Style.BraceWrapping.AfterUnion;
659   if (InitialToken.is(tok::kw_struct))
660     return Style.BraceWrapping.AfterStruct;
661   return false;
662 }
663 
664 void UnwrappedLineParser::parseChildBlock() {
665   FormatTok->BlockKind = BK_Block;
666   nextToken();
667   {
668     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
669                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
670     ScopedLineState LineState(*this);
671     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
672                                             /*MustBeDeclaration=*/false);
673     Line->Level += SkipIndent ? 0 : 1;
674     parseLevel(/*HasOpeningBrace=*/true);
675     flushComments(isOnNewLine(*FormatTok));
676     Line->Level -= SkipIndent ? 0 : 1;
677   }
678   nextToken();
679 }
680 
681 void UnwrappedLineParser::parsePPDirective() {
682   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
683   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
684 
685   nextToken();
686 
687   if (!FormatTok->Tok.getIdentifierInfo()) {
688     parsePPUnknown();
689     return;
690   }
691 
692   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
693   case tok::pp_define:
694     parsePPDefine();
695     return;
696   case tok::pp_if:
697     parsePPIf(/*IfDef=*/false);
698     break;
699   case tok::pp_ifdef:
700   case tok::pp_ifndef:
701     parsePPIf(/*IfDef=*/true);
702     break;
703   case tok::pp_else:
704     parsePPElse();
705     break;
706   case tok::pp_elif:
707     parsePPElIf();
708     break;
709   case tok::pp_endif:
710     parsePPEndIf();
711     break;
712   default:
713     parsePPUnknown();
714     break;
715   }
716 }
717 
718 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
719   size_t Line = CurrentLines->size();
720   if (CurrentLines == &PreprocessorDirectives)
721     Line += Lines.size();
722 
723   if (Unreachable ||
724       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
725     PPStack.push_back({PP_Unreachable, Line});
726   else
727     PPStack.push_back({PP_Conditional, Line});
728 }
729 
730 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
731   ++PPBranchLevel;
732   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
733   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
734     PPLevelBranchIndex.push_back(0);
735     PPLevelBranchCount.push_back(0);
736   }
737   PPChainBranchIndex.push(0);
738   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
739   conditionalCompilationCondition(Unreachable || Skip);
740 }
741 
742 void UnwrappedLineParser::conditionalCompilationAlternative() {
743   if (!PPStack.empty())
744     PPStack.pop_back();
745   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
746   if (!PPChainBranchIndex.empty())
747     ++PPChainBranchIndex.top();
748   conditionalCompilationCondition(
749       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
750       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
751 }
752 
753 void UnwrappedLineParser::conditionalCompilationEnd() {
754   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
755   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
756     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
757       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
758     }
759   }
760   // Guard against #endif's without #if.
761   if (PPBranchLevel > -1)
762     --PPBranchLevel;
763   if (!PPChainBranchIndex.empty())
764     PPChainBranchIndex.pop();
765   if (!PPStack.empty())
766     PPStack.pop_back();
767 }
768 
769 void UnwrappedLineParser::parsePPIf(bool IfDef) {
770   bool IfNDef = FormatTok->is(tok::pp_ifndef);
771   nextToken();
772   bool Unreachable = false;
773   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
774     Unreachable = true;
775   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
776     Unreachable = true;
777   conditionalCompilationStart(Unreachable);
778   FormatToken *IfCondition = FormatTok;
779   // If there's a #ifndef on the first line, and the only lines before it are
780   // comments, it could be an include guard.
781   bool MaybeIncludeGuard = IfNDef;
782   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
783     for (auto &Line : Lines) {
784       if (!Line.Tokens.front().Tok->is(tok::comment)) {
785         MaybeIncludeGuard = false;
786         IncludeGuard = IG_Rejected;
787         break;
788       }
789     }
790   --PPBranchLevel;
791   parsePPUnknown();
792   ++PPBranchLevel;
793   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
794     IncludeGuard = IG_IfNdefed;
795     IncludeGuardToken = IfCondition;
796   }
797 }
798 
799 void UnwrappedLineParser::parsePPElse() {
800   // If a potential include guard has an #else, it's not an include guard.
801   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
802     IncludeGuard = IG_Rejected;
803   conditionalCompilationAlternative();
804   if (PPBranchLevel > -1)
805     --PPBranchLevel;
806   parsePPUnknown();
807   ++PPBranchLevel;
808 }
809 
810 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
811 
812 void UnwrappedLineParser::parsePPEndIf() {
813   conditionalCompilationEnd();
814   parsePPUnknown();
815   // If the #endif of a potential include guard is the last thing in the file,
816   // then we found an include guard.
817   unsigned TokenPosition = Tokens->getPosition();
818   FormatToken *PeekNext = AllTokens[TokenPosition];
819   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
820       PeekNext->is(tok::eof) &&
821       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
822     IncludeGuard = IG_Found;
823 }
824 
825 void UnwrappedLineParser::parsePPDefine() {
826   nextToken();
827 
828   if (!FormatTok->Tok.getIdentifierInfo()) {
829     IncludeGuard = IG_Rejected;
830     IncludeGuardToken = nullptr;
831     parsePPUnknown();
832     return;
833   }
834 
835   if (IncludeGuard == IG_IfNdefed &&
836       IncludeGuardToken->TokenText == FormatTok->TokenText) {
837     IncludeGuard = IG_Defined;
838     IncludeGuardToken = nullptr;
839     for (auto &Line : Lines) {
840       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
841         IncludeGuard = IG_Rejected;
842         break;
843       }
844     }
845   }
846 
847   nextToken();
848   if (FormatTok->Tok.getKind() == tok::l_paren &&
849       FormatTok->WhitespaceRange.getBegin() ==
850           FormatTok->WhitespaceRange.getEnd()) {
851     parseParens();
852   }
853   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
854     Line->Level += PPBranchLevel + 1;
855   addUnwrappedLine();
856   ++Line->Level;
857 
858   // Errors during a preprocessor directive can only affect the layout of the
859   // preprocessor directive, and thus we ignore them. An alternative approach
860   // would be to use the same approach we use on the file level (no
861   // re-indentation if there was a structural error) within the macro
862   // definition.
863   parseFile();
864 }
865 
866 void UnwrappedLineParser::parsePPUnknown() {
867   do {
868     nextToken();
869   } while (!eof());
870   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
871     Line->Level += PPBranchLevel + 1;
872   addUnwrappedLine();
873 }
874 
875 // Here we blacklist certain tokens that are not usually the first token in an
876 // unwrapped line. This is used in attempt to distinguish macro calls without
877 // trailing semicolons from other constructs split to several lines.
878 static bool tokenCanStartNewLine(const clang::Token &Tok) {
879   // Semicolon can be a null-statement, l_square can be a start of a macro or
880   // a C++11 attribute, but this doesn't seem to be common.
881   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
882          Tok.isNot(tok::l_square) &&
883          // Tokens that can only be used as binary operators and a part of
884          // overloaded operator names.
885          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
886          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
887          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
888          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
889          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
890          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
891          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
892          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
893          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
894          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
895          Tok.isNot(tok::lesslessequal) &&
896          // Colon is used in labels, base class lists, initializer lists,
897          // range-based for loops, ternary operator, but should never be the
898          // first token in an unwrapped line.
899          Tok.isNot(tok::colon) &&
900          // 'noexcept' is a trailing annotation.
901          Tok.isNot(tok::kw_noexcept);
902 }
903 
904 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
905                           const FormatToken *FormatTok) {
906   // FIXME: This returns true for C/C++ keywords like 'struct'.
907   return FormatTok->is(tok::identifier) &&
908          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
909           !FormatTok->isOneOf(
910               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
911               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
912               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
913               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
914               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
915               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
916               Keywords.kw_from));
917 }
918 
919 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
920                                  const FormatToken *FormatTok) {
921   return FormatTok->Tok.isLiteral() ||
922          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
923          mustBeJSIdent(Keywords, FormatTok);
924 }
925 
926 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
927 // when encountered after a value (see mustBeJSIdentOrValue).
928 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
929                            const FormatToken *FormatTok) {
930   return FormatTok->isOneOf(
931       tok::kw_return, Keywords.kw_yield,
932       // conditionals
933       tok::kw_if, tok::kw_else,
934       // loops
935       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
936       // switch/case
937       tok::kw_switch, tok::kw_case,
938       // exceptions
939       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
940       // declaration
941       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
942       Keywords.kw_async, Keywords.kw_function,
943       // import/export
944       Keywords.kw_import, tok::kw_export);
945 }
946 
947 // readTokenWithJavaScriptASI reads the next token and terminates the current
948 // line if JavaScript Automatic Semicolon Insertion must
949 // happen between the current token and the next token.
950 //
951 // This method is conservative - it cannot cover all edge cases of JavaScript,
952 // but only aims to correctly handle certain well known cases. It *must not*
953 // return true in speculative cases.
954 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
955   FormatToken *Previous = FormatTok;
956   readToken();
957   FormatToken *Next = FormatTok;
958 
959   bool IsOnSameLine =
960       CommentsBeforeNextToken.empty()
961           ? Next->NewlinesBefore == 0
962           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
963   if (IsOnSameLine)
964     return;
965 
966   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
967   bool PreviousStartsTemplateExpr =
968       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
969   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
970     // If the line contains an '@' sign, the previous token might be an
971     // annotation, which can precede another identifier/value.
972     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
973                               [](UnwrappedLineNode &LineNode) {
974                                 return LineNode.Tok->is(tok::at);
975                               }) != Line->Tokens.end();
976     if (HasAt)
977       return;
978   }
979   if (Next->is(tok::exclaim) && PreviousMustBeValue)
980     return addUnwrappedLine();
981   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
982   bool NextEndsTemplateExpr =
983       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
984   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
985       (PreviousMustBeValue ||
986        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
987                          tok::minusminus)))
988     return addUnwrappedLine();
989   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
990       isJSDeclOrStmt(Keywords, Next))
991     return addUnwrappedLine();
992 }
993 
994 void UnwrappedLineParser::parseStructuralElement() {
995   assert(!FormatTok->is(tok::l_brace));
996   if (Style.Language == FormatStyle::LK_TableGen &&
997       FormatTok->is(tok::pp_include)) {
998     nextToken();
999     if (FormatTok->is(tok::string_literal))
1000       nextToken();
1001     addUnwrappedLine();
1002     return;
1003   }
1004   switch (FormatTok->Tok.getKind()) {
1005   case tok::kw_asm:
1006     nextToken();
1007     if (FormatTok->is(tok::l_brace)) {
1008       FormatTok->Type = TT_InlineASMBrace;
1009       nextToken();
1010       while (FormatTok && FormatTok->isNot(tok::eof)) {
1011         if (FormatTok->is(tok::r_brace)) {
1012           FormatTok->Type = TT_InlineASMBrace;
1013           nextToken();
1014           addUnwrappedLine();
1015           break;
1016         }
1017         FormatTok->Finalized = true;
1018         nextToken();
1019       }
1020     }
1021     break;
1022   case tok::kw_namespace:
1023     parseNamespace();
1024     return;
1025   case tok::kw_public:
1026   case tok::kw_protected:
1027   case tok::kw_private:
1028     if (Style.Language == FormatStyle::LK_Java ||
1029         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1030       nextToken();
1031     else
1032       parseAccessSpecifier();
1033     return;
1034   case tok::kw_if:
1035     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1036       // field/method declaration.
1037       break;
1038     parseIfThenElse();
1039     return;
1040   case tok::kw_for:
1041   case tok::kw_while:
1042     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1043       // field/method declaration.
1044       break;
1045     parseForOrWhileLoop();
1046     return;
1047   case tok::kw_do:
1048     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1049       // field/method declaration.
1050       break;
1051     parseDoWhile();
1052     return;
1053   case tok::kw_switch:
1054     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1055       // 'switch: string' field declaration.
1056       break;
1057     parseSwitch();
1058     return;
1059   case tok::kw_default:
1060     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1061       // 'default: string' field declaration.
1062       break;
1063     nextToken();
1064     if (FormatTok->is(tok::colon)) {
1065       parseLabel();
1066       return;
1067     }
1068     // e.g. "default void f() {}" in a Java interface.
1069     break;
1070   case tok::kw_case:
1071     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1072       // 'case: string' field declaration.
1073       break;
1074     parseCaseLabel();
1075     return;
1076   case tok::kw_try:
1077   case tok::kw___try:
1078     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1079       // field/method declaration.
1080       break;
1081     parseTryCatch();
1082     return;
1083   case tok::kw_extern:
1084     nextToken();
1085     if (FormatTok->Tok.is(tok::string_literal)) {
1086       nextToken();
1087       if (FormatTok->Tok.is(tok::l_brace)) {
1088         if (Style.BraceWrapping.AfterExternBlock) {
1089           addUnwrappedLine();
1090           parseBlock(/*MustBeDeclaration=*/true);
1091         } else {
1092           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1093         }
1094         addUnwrappedLine();
1095         return;
1096       }
1097     }
1098     break;
1099   case tok::kw_export:
1100     if (Style.Language == FormatStyle::LK_JavaScript) {
1101       parseJavaScriptEs6ImportExport();
1102       return;
1103     }
1104     if (!Style.isCpp())
1105       break;
1106     // Handle C++ "(inline|export) namespace".
1107     LLVM_FALLTHROUGH;
1108   case tok::kw_inline:
1109     nextToken();
1110     if (FormatTok->Tok.is(tok::kw_namespace)) {
1111       parseNamespace();
1112       return;
1113     }
1114     break;
1115   case tok::identifier:
1116     if (FormatTok->is(TT_ForEachMacro)) {
1117       parseForOrWhileLoop();
1118       return;
1119     }
1120     if (FormatTok->is(TT_MacroBlockBegin)) {
1121       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1122                  /*MunchSemi=*/false);
1123       return;
1124     }
1125     if (FormatTok->is(Keywords.kw_import)) {
1126       if (Style.Language == FormatStyle::LK_JavaScript) {
1127         parseJavaScriptEs6ImportExport();
1128         return;
1129       }
1130       if (Style.Language == FormatStyle::LK_Proto) {
1131         nextToken();
1132         if (FormatTok->is(tok::kw_public))
1133           nextToken();
1134         if (!FormatTok->is(tok::string_literal))
1135           return;
1136         nextToken();
1137         if (FormatTok->is(tok::semi))
1138           nextToken();
1139         addUnwrappedLine();
1140         return;
1141       }
1142     }
1143     if (Style.isCpp() &&
1144         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1145                            Keywords.kw_slots, Keywords.kw_qslots)) {
1146       nextToken();
1147       if (FormatTok->is(tok::colon)) {
1148         nextToken();
1149         addUnwrappedLine();
1150         return;
1151       }
1152     }
1153     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1154       parseStatementMacro();
1155       return;
1156     }
1157     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1158       parseNamespace();
1159       return;
1160     }
1161     // In all other cases, parse the declaration.
1162     break;
1163   default:
1164     break;
1165   }
1166   do {
1167     const FormatToken *Previous = FormatTok->Previous;
1168     switch (FormatTok->Tok.getKind()) {
1169     case tok::at:
1170       nextToken();
1171       if (FormatTok->Tok.is(tok::l_brace)) {
1172         nextToken();
1173         parseBracedList();
1174         break;
1175       } else if (Style.Language == FormatStyle::LK_Java &&
1176                  FormatTok->is(Keywords.kw_interface)) {
1177         nextToken();
1178         break;
1179       }
1180       switch (FormatTok->Tok.getObjCKeywordID()) {
1181       case tok::objc_public:
1182       case tok::objc_protected:
1183       case tok::objc_package:
1184       case tok::objc_private:
1185         return parseAccessSpecifier();
1186       case tok::objc_interface:
1187       case tok::objc_implementation:
1188         return parseObjCInterfaceOrImplementation();
1189       case tok::objc_protocol:
1190         if (parseObjCProtocol())
1191           return;
1192         break;
1193       case tok::objc_end:
1194         return; // Handled by the caller.
1195       case tok::objc_optional:
1196       case tok::objc_required:
1197         nextToken();
1198         addUnwrappedLine();
1199         return;
1200       case tok::objc_autoreleasepool:
1201         nextToken();
1202         if (FormatTok->Tok.is(tok::l_brace)) {
1203           if (Style.BraceWrapping.AfterControlStatement ==
1204               FormatStyle::BWACS_Always)
1205             addUnwrappedLine();
1206           parseBlock(/*MustBeDeclaration=*/false);
1207         }
1208         addUnwrappedLine();
1209         return;
1210       case tok::objc_synchronized:
1211         nextToken();
1212         if (FormatTok->Tok.is(tok::l_paren))
1213           // Skip synchronization object
1214           parseParens();
1215         if (FormatTok->Tok.is(tok::l_brace)) {
1216           if (Style.BraceWrapping.AfterControlStatement ==
1217               FormatStyle::BWACS_Always)
1218             addUnwrappedLine();
1219           parseBlock(/*MustBeDeclaration=*/false);
1220         }
1221         addUnwrappedLine();
1222         return;
1223       case tok::objc_try:
1224         // This branch isn't strictly necessary (the kw_try case below would
1225         // do this too after the tok::at is parsed above).  But be explicit.
1226         parseTryCatch();
1227         return;
1228       default:
1229         break;
1230       }
1231       break;
1232     case tok::kw_enum:
1233       // Ignore if this is part of "template <enum ...".
1234       if (Previous && Previous->is(tok::less)) {
1235         nextToken();
1236         break;
1237       }
1238 
1239       // parseEnum falls through and does not yet add an unwrapped line as an
1240       // enum definition can start a structural element.
1241       if (!parseEnum())
1242         break;
1243       // This only applies for C++.
1244       if (!Style.isCpp()) {
1245         addUnwrappedLine();
1246         return;
1247       }
1248       break;
1249     case tok::kw_typedef:
1250       nextToken();
1251       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1252                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1253                              Keywords.kw_CF_CLOSED_ENUM,
1254                              Keywords.kw_NS_CLOSED_ENUM))
1255         parseEnum();
1256       break;
1257     case tok::kw_struct:
1258     case tok::kw_union:
1259     case tok::kw_class:
1260       // parseRecord falls through and does not yet add an unwrapped line as a
1261       // record declaration or definition can start a structural element.
1262       parseRecord();
1263       // This does not apply for Java, JavaScript and C#.
1264       if (Style.Language == FormatStyle::LK_Java ||
1265           Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
1266         if (FormatTok->is(tok::semi))
1267           nextToken();
1268         addUnwrappedLine();
1269         return;
1270       }
1271       break;
1272     case tok::period:
1273       nextToken();
1274       // In Java, classes have an implicit static member "class".
1275       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1276           FormatTok->is(tok::kw_class))
1277         nextToken();
1278       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1279           FormatTok->Tok.getIdentifierInfo())
1280         // JavaScript only has pseudo keywords, all keywords are allowed to
1281         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1282         nextToken();
1283       break;
1284     case tok::semi:
1285       nextToken();
1286       addUnwrappedLine();
1287       return;
1288     case tok::r_brace:
1289       addUnwrappedLine();
1290       return;
1291     case tok::l_paren:
1292       parseParens();
1293       break;
1294     case tok::kw_operator:
1295       nextToken();
1296       if (FormatTok->isBinaryOperator())
1297         nextToken();
1298       break;
1299     case tok::caret:
1300       nextToken();
1301       if (FormatTok->Tok.isAnyIdentifier() ||
1302           FormatTok->isSimpleTypeSpecifier())
1303         nextToken();
1304       if (FormatTok->is(tok::l_paren))
1305         parseParens();
1306       if (FormatTok->is(tok::l_brace))
1307         parseChildBlock();
1308       break;
1309     case tok::l_brace:
1310       if (!tryToParseBracedList()) {
1311         // A block outside of parentheses must be the last part of a
1312         // structural element.
1313         // FIXME: Figure out cases where this is not true, and add projections
1314         // for them (the one we know is missing are lambdas).
1315         if (Style.BraceWrapping.AfterFunction)
1316           addUnwrappedLine();
1317         FormatTok->Type = TT_FunctionLBrace;
1318         parseBlock(/*MustBeDeclaration=*/false);
1319         addUnwrappedLine();
1320         return;
1321       }
1322       // Otherwise this was a braced init list, and the structural
1323       // element continues.
1324       break;
1325     case tok::kw_try:
1326       if (Style.Language == FormatStyle::LK_JavaScript &&
1327           Line->MustBeDeclaration) {
1328         // field/method declaration.
1329         nextToken();
1330         break;
1331       }
1332       // We arrive here when parsing function-try blocks.
1333       if (Style.BraceWrapping.AfterFunction)
1334         addUnwrappedLine();
1335       parseTryCatch();
1336       return;
1337     case tok::identifier: {
1338       if (FormatTok->is(TT_MacroBlockEnd)) {
1339         addUnwrappedLine();
1340         return;
1341       }
1342 
1343       // Function declarations (as opposed to function expressions) are parsed
1344       // on their own unwrapped line by continuing this loop. Function
1345       // expressions (functions that are not on their own line) must not create
1346       // a new unwrapped line, so they are special cased below.
1347       size_t TokenCount = Line->Tokens.size();
1348       if (Style.Language == FormatStyle::LK_JavaScript &&
1349           FormatTok->is(Keywords.kw_function) &&
1350           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1351                                                      Keywords.kw_async)))) {
1352         tryToParseJSFunction();
1353         break;
1354       }
1355       if ((Style.Language == FormatStyle::LK_JavaScript ||
1356            Style.Language == FormatStyle::LK_Java) &&
1357           FormatTok->is(Keywords.kw_interface)) {
1358         if (Style.Language == FormatStyle::LK_JavaScript) {
1359           // In JavaScript/TypeScript, "interface" can be used as a standalone
1360           // identifier, e.g. in `var interface = 1;`. If "interface" is
1361           // followed by another identifier, it is very like to be an actual
1362           // interface declaration.
1363           unsigned StoredPosition = Tokens->getPosition();
1364           FormatToken *Next = Tokens->getNextToken();
1365           FormatTok = Tokens->setPosition(StoredPosition);
1366           if (Next && !mustBeJSIdent(Keywords, Next)) {
1367             nextToken();
1368             break;
1369           }
1370         }
1371         parseRecord();
1372         addUnwrappedLine();
1373         return;
1374       }
1375 
1376       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1377         parseStatementMacro();
1378         return;
1379       }
1380 
1381       // See if the following token should start a new unwrapped line.
1382       StringRef Text = FormatTok->TokenText;
1383       nextToken();
1384 
1385       // JS doesn't have macros, and within classes colons indicate fields, not
1386       // labels.
1387       if (Style.Language == FormatStyle::LK_JavaScript)
1388         break;
1389 
1390       TokenCount = Line->Tokens.size();
1391       if (TokenCount == 1 ||
1392           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1393         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1394           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1395           parseLabel(!Style.IndentGotoLabels);
1396           return;
1397         }
1398         // Recognize function-like macro usages without trailing semicolon as
1399         // well as free-standing macros like Q_OBJECT.
1400         bool FunctionLike = FormatTok->is(tok::l_paren);
1401         if (FunctionLike)
1402           parseParens();
1403 
1404         bool FollowedByNewline =
1405             CommentsBeforeNextToken.empty()
1406                 ? FormatTok->NewlinesBefore > 0
1407                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1408 
1409         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1410             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1411           addUnwrappedLine();
1412           return;
1413         }
1414       }
1415       break;
1416     }
1417     case tok::equal:
1418       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1419       // TT_JsFatArrow. The always start an expression or a child block if
1420       // followed by a curly.
1421       if (FormatTok->is(TT_JsFatArrow)) {
1422         nextToken();
1423         if (FormatTok->is(tok::l_brace))
1424           parseChildBlock();
1425         break;
1426       }
1427 
1428       nextToken();
1429       if (FormatTok->Tok.is(tok::l_brace)) {
1430         nextToken();
1431         parseBracedList();
1432       } else if (Style.Language == FormatStyle::LK_Proto &&
1433                  FormatTok->Tok.is(tok::less)) {
1434         nextToken();
1435         parseBracedList(/*ContinueOnSemicolons=*/false,
1436                         /*ClosingBraceKind=*/tok::greater);
1437       }
1438       break;
1439     case tok::l_square:
1440       parseSquare();
1441       break;
1442     case tok::kw_new:
1443       parseNew();
1444       break;
1445     default:
1446       nextToken();
1447       break;
1448     }
1449   } while (!eof());
1450 }
1451 
1452 bool UnwrappedLineParser::tryToParseLambda() {
1453   if (!Style.isCpp()) {
1454     nextToken();
1455     return false;
1456   }
1457   assert(FormatTok->is(tok::l_square));
1458   FormatToken &LSquare = *FormatTok;
1459   if (!tryToParseLambdaIntroducer())
1460     return false;
1461 
1462   bool SeenArrow = false;
1463 
1464   while (FormatTok->isNot(tok::l_brace)) {
1465     if (FormatTok->isSimpleTypeSpecifier()) {
1466       nextToken();
1467       continue;
1468     }
1469     switch (FormatTok->Tok.getKind()) {
1470     case tok::l_brace:
1471       break;
1472     case tok::l_paren:
1473       parseParens();
1474       break;
1475     case tok::amp:
1476     case tok::star:
1477     case tok::kw_const:
1478     case tok::comma:
1479     case tok::less:
1480     case tok::greater:
1481     case tok::identifier:
1482     case tok::numeric_constant:
1483     case tok::coloncolon:
1484     case tok::kw_class:
1485     case tok::kw_mutable:
1486     case tok::kw_noexcept:
1487     case tok::kw_template:
1488     case tok::kw_typename:
1489       nextToken();
1490       break;
1491     // Specialization of a template with an integer parameter can contain
1492     // arithmetic, logical, comparison and ternary operators.
1493     //
1494     // FIXME: This also accepts sequences of operators that are not in the scope
1495     // of a template argument list.
1496     //
1497     // In a C++ lambda a template type can only occur after an arrow. We use
1498     // this as an heuristic to distinguish between Objective-C expressions
1499     // followed by an `a->b` expression, such as:
1500     // ([obj func:arg] + a->b)
1501     // Otherwise the code below would parse as a lambda.
1502     //
1503     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1504     // explicit template lists: []<bool b = true && false>(U &&u){}
1505     case tok::plus:
1506     case tok::minus:
1507     case tok::exclaim:
1508     case tok::tilde:
1509     case tok::slash:
1510     case tok::percent:
1511     case tok::lessless:
1512     case tok::pipe:
1513     case tok::pipepipe:
1514     case tok::ampamp:
1515     case tok::caret:
1516     case tok::equalequal:
1517     case tok::exclaimequal:
1518     case tok::greaterequal:
1519     case tok::lessequal:
1520     case tok::question:
1521     case tok::colon:
1522     case tok::kw_true:
1523     case tok::kw_false:
1524       if (SeenArrow) {
1525         nextToken();
1526         break;
1527       }
1528       return true;
1529     case tok::arrow:
1530       // This might or might not actually be a lambda arrow (this could be an
1531       // ObjC method invocation followed by a dereferencing arrow). We might
1532       // reset this back to TT_Unknown in TokenAnnotator.
1533       FormatTok->Type = TT_LambdaArrow;
1534       SeenArrow = true;
1535       nextToken();
1536       break;
1537     default:
1538       return true;
1539     }
1540   }
1541   FormatTok->Type = TT_LambdaLBrace;
1542   LSquare.Type = TT_LambdaLSquare;
1543   parseChildBlock();
1544   return true;
1545 }
1546 
1547 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1548   const FormatToken *Previous = FormatTok->Previous;
1549   if (Previous &&
1550       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1551                          tok::kw_delete, tok::l_square) ||
1552        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1553        Previous->isSimpleTypeSpecifier())) {
1554     nextToken();
1555     return false;
1556   }
1557   nextToken();
1558   if (FormatTok->is(tok::l_square)) {
1559     return false;
1560   }
1561   parseSquare(/*LambdaIntroducer=*/true);
1562   return true;
1563 }
1564 
1565 void UnwrappedLineParser::tryToParseJSFunction() {
1566   assert(FormatTok->is(Keywords.kw_function) ||
1567          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1568   if (FormatTok->is(Keywords.kw_async))
1569     nextToken();
1570   // Consume "function".
1571   nextToken();
1572 
1573   // Consume * (generator function). Treat it like C++'s overloaded operators.
1574   if (FormatTok->is(tok::star)) {
1575     FormatTok->Type = TT_OverloadedOperator;
1576     nextToken();
1577   }
1578 
1579   // Consume function name.
1580   if (FormatTok->is(tok::identifier))
1581     nextToken();
1582 
1583   if (FormatTok->isNot(tok::l_paren))
1584     return;
1585 
1586   // Parse formal parameter list.
1587   parseParens();
1588 
1589   if (FormatTok->is(tok::colon)) {
1590     // Parse a type definition.
1591     nextToken();
1592 
1593     // Eat the type declaration. For braced inline object types, balance braces,
1594     // otherwise just parse until finding an l_brace for the function body.
1595     if (FormatTok->is(tok::l_brace))
1596       tryToParseBracedList();
1597     else
1598       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1599         nextToken();
1600   }
1601 
1602   if (FormatTok->is(tok::semi))
1603     return;
1604 
1605   parseChildBlock();
1606 }
1607 
1608 bool UnwrappedLineParser::tryToParseBracedList() {
1609   if (FormatTok->BlockKind == BK_Unknown)
1610     calculateBraceTypes();
1611   assert(FormatTok->BlockKind != BK_Unknown);
1612   if (FormatTok->BlockKind == BK_Block)
1613     return false;
1614   nextToken();
1615   parseBracedList();
1616   return true;
1617 }
1618 
1619 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1620                                           tok::TokenKind ClosingBraceKind) {
1621   bool HasError = false;
1622 
1623   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1624   // replace this by using parseAssigmentExpression() inside.
1625   do {
1626     if (Style.Language == FormatStyle::LK_JavaScript) {
1627       if (FormatTok->is(Keywords.kw_function) ||
1628           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1629         tryToParseJSFunction();
1630         continue;
1631       }
1632       if (FormatTok->is(TT_JsFatArrow)) {
1633         nextToken();
1634         // Fat arrows can be followed by simple expressions or by child blocks
1635         // in curly braces.
1636         if (FormatTok->is(tok::l_brace)) {
1637           parseChildBlock();
1638           continue;
1639         }
1640       }
1641       if (FormatTok->is(tok::l_brace)) {
1642         // Could be a method inside of a braced list `{a() { return 1; }}`.
1643         if (tryToParseBracedList())
1644           continue;
1645         parseChildBlock();
1646       }
1647     }
1648     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1649       nextToken();
1650       return !HasError;
1651     }
1652     switch (FormatTok->Tok.getKind()) {
1653     case tok::caret:
1654       nextToken();
1655       if (FormatTok->is(tok::l_brace)) {
1656         parseChildBlock();
1657       }
1658       break;
1659     case tok::l_square:
1660       tryToParseLambda();
1661       break;
1662     case tok::l_paren:
1663       parseParens();
1664       // JavaScript can just have free standing methods and getters/setters in
1665       // object literals. Detect them by a "{" following ")".
1666       if (Style.Language == FormatStyle::LK_JavaScript) {
1667         if (FormatTok->is(tok::l_brace))
1668           parseChildBlock();
1669         break;
1670       }
1671       break;
1672     case tok::l_brace:
1673       // Assume there are no blocks inside a braced init list apart
1674       // from the ones we explicitly parse out (like lambdas).
1675       FormatTok->BlockKind = BK_BracedInit;
1676       nextToken();
1677       parseBracedList();
1678       break;
1679     case tok::less:
1680       if (Style.Language == FormatStyle::LK_Proto) {
1681         nextToken();
1682         parseBracedList(/*ContinueOnSemicolons=*/false,
1683                         /*ClosingBraceKind=*/tok::greater);
1684       } else {
1685         nextToken();
1686       }
1687       break;
1688     case tok::semi:
1689       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1690       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1691       // used for error recovery if we have otherwise determined that this is
1692       // a braced list.
1693       if (Style.Language == FormatStyle::LK_JavaScript) {
1694         nextToken();
1695         break;
1696       }
1697       HasError = true;
1698       if (!ContinueOnSemicolons)
1699         return !HasError;
1700       nextToken();
1701       break;
1702     case tok::comma:
1703       nextToken();
1704       break;
1705     default:
1706       nextToken();
1707       break;
1708     }
1709   } while (!eof());
1710   return false;
1711 }
1712 
1713 void UnwrappedLineParser::parseParens() {
1714   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1715   nextToken();
1716   do {
1717     switch (FormatTok->Tok.getKind()) {
1718     case tok::l_paren:
1719       parseParens();
1720       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1721         parseChildBlock();
1722       break;
1723     case tok::r_paren:
1724       nextToken();
1725       return;
1726     case tok::r_brace:
1727       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1728       return;
1729     case tok::l_square:
1730       tryToParseLambda();
1731       break;
1732     case tok::l_brace:
1733       if (!tryToParseBracedList())
1734         parseChildBlock();
1735       break;
1736     case tok::at:
1737       nextToken();
1738       if (FormatTok->Tok.is(tok::l_brace)) {
1739         nextToken();
1740         parseBracedList();
1741       }
1742       break;
1743     case tok::kw_class:
1744       if (Style.Language == FormatStyle::LK_JavaScript)
1745         parseRecord(/*ParseAsExpr=*/true);
1746       else
1747         nextToken();
1748       break;
1749     case tok::identifier:
1750       if (Style.Language == FormatStyle::LK_JavaScript &&
1751           (FormatTok->is(Keywords.kw_function) ||
1752            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1753         tryToParseJSFunction();
1754       else
1755         nextToken();
1756       break;
1757     default:
1758       nextToken();
1759       break;
1760     }
1761   } while (!eof());
1762 }
1763 
1764 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1765   if (!LambdaIntroducer) {
1766     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1767     if (tryToParseLambda())
1768       return;
1769   }
1770   do {
1771     switch (FormatTok->Tok.getKind()) {
1772     case tok::l_paren:
1773       parseParens();
1774       break;
1775     case tok::r_square:
1776       nextToken();
1777       return;
1778     case tok::r_brace:
1779       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1780       return;
1781     case tok::l_square:
1782       parseSquare();
1783       break;
1784     case tok::l_brace: {
1785       if (!tryToParseBracedList())
1786         parseChildBlock();
1787       break;
1788     }
1789     case tok::at:
1790       nextToken();
1791       if (FormatTok->Tok.is(tok::l_brace)) {
1792         nextToken();
1793         parseBracedList();
1794       }
1795       break;
1796     default:
1797       nextToken();
1798       break;
1799     }
1800   } while (!eof());
1801 }
1802 
1803 void UnwrappedLineParser::parseIfThenElse() {
1804   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1805   nextToken();
1806   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
1807     nextToken();
1808   if (FormatTok->Tok.is(tok::l_paren))
1809     parseParens();
1810   bool NeedsUnwrappedLine = false;
1811   if (FormatTok->Tok.is(tok::l_brace)) {
1812     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1813     parseBlock(/*MustBeDeclaration=*/false);
1814     if (Style.BraceWrapping.BeforeElse)
1815       addUnwrappedLine();
1816     else
1817       NeedsUnwrappedLine = true;
1818   } else {
1819     addUnwrappedLine();
1820     ++Line->Level;
1821     parseStructuralElement();
1822     --Line->Level;
1823   }
1824   if (FormatTok->Tok.is(tok::kw_else)) {
1825     nextToken();
1826     if (FormatTok->Tok.is(tok::l_brace)) {
1827       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1828       parseBlock(/*MustBeDeclaration=*/false);
1829       addUnwrappedLine();
1830     } else if (FormatTok->Tok.is(tok::kw_if)) {
1831       parseIfThenElse();
1832     } else {
1833       addUnwrappedLine();
1834       ++Line->Level;
1835       parseStructuralElement();
1836       if (FormatTok->is(tok::eof))
1837         addUnwrappedLine();
1838       --Line->Level;
1839     }
1840   } else if (NeedsUnwrappedLine) {
1841     addUnwrappedLine();
1842   }
1843 }
1844 
1845 void UnwrappedLineParser::parseTryCatch() {
1846   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1847   nextToken();
1848   bool NeedsUnwrappedLine = false;
1849   if (FormatTok->is(tok::colon)) {
1850     // We are in a function try block, what comes is an initializer list.
1851     nextToken();
1852 
1853     // In case identifiers were removed by clang-tidy, what might follow is
1854     // multiple commas in sequence - before the first identifier.
1855     while (FormatTok->is(tok::comma))
1856       nextToken();
1857 
1858     while (FormatTok->is(tok::identifier)) {
1859       nextToken();
1860       if (FormatTok->is(tok::l_paren))
1861         parseParens();
1862 
1863       // In case identifiers were removed by clang-tidy, what might follow is
1864       // multiple commas in sequence - after the first identifier.
1865       while (FormatTok->is(tok::comma))
1866         nextToken();
1867     }
1868   }
1869   // Parse try with resource.
1870   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1871     parseParens();
1872   }
1873   if (FormatTok->is(tok::l_brace)) {
1874     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1875     parseBlock(/*MustBeDeclaration=*/false);
1876     if (Style.BraceWrapping.BeforeCatch) {
1877       addUnwrappedLine();
1878     } else {
1879       NeedsUnwrappedLine = true;
1880     }
1881   } else if (!FormatTok->is(tok::kw_catch)) {
1882     // The C++ standard requires a compound-statement after a try.
1883     // If there's none, we try to assume there's a structuralElement
1884     // and try to continue.
1885     addUnwrappedLine();
1886     ++Line->Level;
1887     parseStructuralElement();
1888     --Line->Level;
1889   }
1890   while (1) {
1891     if (FormatTok->is(tok::at))
1892       nextToken();
1893     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1894                              tok::kw___finally) ||
1895           ((Style.Language == FormatStyle::LK_Java ||
1896             Style.Language == FormatStyle::LK_JavaScript) &&
1897            FormatTok->is(Keywords.kw_finally)) ||
1898           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1899            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1900       break;
1901     nextToken();
1902     while (FormatTok->isNot(tok::l_brace)) {
1903       if (FormatTok->is(tok::l_paren)) {
1904         parseParens();
1905         continue;
1906       }
1907       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1908         return;
1909       nextToken();
1910     }
1911     NeedsUnwrappedLine = false;
1912     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1913     parseBlock(/*MustBeDeclaration=*/false);
1914     if (Style.BraceWrapping.BeforeCatch)
1915       addUnwrappedLine();
1916     else
1917       NeedsUnwrappedLine = true;
1918   }
1919   if (NeedsUnwrappedLine)
1920     addUnwrappedLine();
1921 }
1922 
1923 void UnwrappedLineParser::parseNamespace() {
1924   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
1925          "'namespace' expected");
1926 
1927   const FormatToken &InitialToken = *FormatTok;
1928   nextToken();
1929   if (InitialToken.is(TT_NamespaceMacro)) {
1930     parseParens();
1931   } else {
1932     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
1933                               tok::l_square)) {
1934       if (FormatTok->is(tok::l_square))
1935         parseSquare();
1936       else
1937         nextToken();
1938     }
1939   }
1940   if (FormatTok->Tok.is(tok::l_brace)) {
1941     if (ShouldBreakBeforeBrace(Style, InitialToken))
1942       addUnwrappedLine();
1943 
1944     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1945                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1946                      DeclarationScopeStack.size() > 1);
1947     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1948     // Munch the semicolon after a namespace. This is more common than one would
1949     // think. Puttin the semicolon into its own line is very ugly.
1950     if (FormatTok->Tok.is(tok::semi))
1951       nextToken();
1952     addUnwrappedLine();
1953   }
1954   // FIXME: Add error handling.
1955 }
1956 
1957 void UnwrappedLineParser::parseNew() {
1958   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1959   nextToken();
1960   if (Style.Language != FormatStyle::LK_Java)
1961     return;
1962 
1963   // In Java, we can parse everything up to the parens, which aren't optional.
1964   do {
1965     // There should not be a ;, { or } before the new's open paren.
1966     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1967       return;
1968 
1969     // Consume the parens.
1970     if (FormatTok->is(tok::l_paren)) {
1971       parseParens();
1972 
1973       // If there is a class body of an anonymous class, consume that as child.
1974       if (FormatTok->is(tok::l_brace))
1975         parseChildBlock();
1976       return;
1977     }
1978     nextToken();
1979   } while (!eof());
1980 }
1981 
1982 void UnwrappedLineParser::parseForOrWhileLoop() {
1983   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1984          "'for', 'while' or foreach macro expected");
1985   nextToken();
1986   // JS' for await ( ...
1987   if (Style.Language == FormatStyle::LK_JavaScript &&
1988       FormatTok->is(Keywords.kw_await))
1989     nextToken();
1990   if (FormatTok->Tok.is(tok::l_paren))
1991     parseParens();
1992   if (FormatTok->Tok.is(tok::l_brace)) {
1993     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1994     parseBlock(/*MustBeDeclaration=*/false);
1995     addUnwrappedLine();
1996   } else {
1997     addUnwrappedLine();
1998     ++Line->Level;
1999     parseStructuralElement();
2000     --Line->Level;
2001   }
2002 }
2003 
2004 void UnwrappedLineParser::parseDoWhile() {
2005   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2006   nextToken();
2007   if (FormatTok->Tok.is(tok::l_brace)) {
2008     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2009     parseBlock(/*MustBeDeclaration=*/false);
2010     if (Style.BraceWrapping.IndentBraces)
2011       addUnwrappedLine();
2012   } else {
2013     addUnwrappedLine();
2014     ++Line->Level;
2015     parseStructuralElement();
2016     --Line->Level;
2017   }
2018 
2019   // FIXME: Add error handling.
2020   if (!FormatTok->Tok.is(tok::kw_while)) {
2021     addUnwrappedLine();
2022     return;
2023   }
2024 
2025   nextToken();
2026   parseStructuralElement();
2027 }
2028 
2029 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2030   nextToken();
2031   unsigned OldLineLevel = Line->Level;
2032   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2033     --Line->Level;
2034   if (LeftAlignLabel)
2035     Line->Level = 0;
2036   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2037       FormatTok->Tok.is(tok::l_brace)) {
2038     CompoundStatementIndenter Indenter(this, Line->Level,
2039                                        Style.BraceWrapping.AfterCaseLabel,
2040                                        Style.BraceWrapping.IndentBraces);
2041     parseBlock(/*MustBeDeclaration=*/false);
2042     if (FormatTok->Tok.is(tok::kw_break)) {
2043       if (Style.BraceWrapping.AfterControlStatement ==
2044           FormatStyle::BWACS_Always)
2045         addUnwrappedLine();
2046       parseStructuralElement();
2047     }
2048     addUnwrappedLine();
2049   } else {
2050     if (FormatTok->is(tok::semi))
2051       nextToken();
2052     addUnwrappedLine();
2053   }
2054   Line->Level = OldLineLevel;
2055   if (FormatTok->isNot(tok::l_brace)) {
2056     parseStructuralElement();
2057     addUnwrappedLine();
2058   }
2059 }
2060 
2061 void UnwrappedLineParser::parseCaseLabel() {
2062   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2063   // FIXME: fix handling of complex expressions here.
2064   do {
2065     nextToken();
2066   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2067   parseLabel();
2068 }
2069 
2070 void UnwrappedLineParser::parseSwitch() {
2071   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2072   nextToken();
2073   if (FormatTok->Tok.is(tok::l_paren))
2074     parseParens();
2075   if (FormatTok->Tok.is(tok::l_brace)) {
2076     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2077     parseBlock(/*MustBeDeclaration=*/false);
2078     addUnwrappedLine();
2079   } else {
2080     addUnwrappedLine();
2081     ++Line->Level;
2082     parseStructuralElement();
2083     --Line->Level;
2084   }
2085 }
2086 
2087 void UnwrappedLineParser::parseAccessSpecifier() {
2088   nextToken();
2089   // Understand Qt's slots.
2090   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2091     nextToken();
2092   // Otherwise, we don't know what it is, and we'd better keep the next token.
2093   if (FormatTok->Tok.is(tok::colon))
2094     nextToken();
2095   addUnwrappedLine();
2096 }
2097 
2098 bool UnwrappedLineParser::parseEnum() {
2099   // Won't be 'enum' for NS_ENUMs.
2100   if (FormatTok->Tok.is(tok::kw_enum))
2101     nextToken();
2102 
2103   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2104   // declarations. An "enum" keyword followed by a colon would be a syntax
2105   // error and thus assume it is just an identifier.
2106   if (Style.Language == FormatStyle::LK_JavaScript &&
2107       FormatTok->isOneOf(tok::colon, tok::question))
2108     return false;
2109 
2110   // In protobuf, "enum" can be used as a field name.
2111   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2112     return false;
2113 
2114   // Eat up enum class ...
2115   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2116     nextToken();
2117 
2118   while (FormatTok->Tok.getIdentifierInfo() ||
2119          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2120                             tok::greater, tok::comma, tok::question)) {
2121     nextToken();
2122     // We can have macros or attributes in between 'enum' and the enum name.
2123     if (FormatTok->is(tok::l_paren))
2124       parseParens();
2125     if (FormatTok->is(tok::identifier)) {
2126       nextToken();
2127       // If there are two identifiers in a row, this is likely an elaborate
2128       // return type. In Java, this can be "implements", etc.
2129       if (Style.isCpp() && FormatTok->is(tok::identifier))
2130         return false;
2131     }
2132   }
2133 
2134   // Just a declaration or something is wrong.
2135   if (FormatTok->isNot(tok::l_brace))
2136     return true;
2137   FormatTok->BlockKind = BK_Block;
2138 
2139   if (Style.Language == FormatStyle::LK_Java) {
2140     // Java enums are different.
2141     parseJavaEnumBody();
2142     return true;
2143   }
2144   if (Style.Language == FormatStyle::LK_Proto) {
2145     parseBlock(/*MustBeDeclaration=*/true);
2146     return true;
2147   }
2148 
2149   // Parse enum body.
2150   nextToken();
2151   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
2152   if (HasError) {
2153     if (FormatTok->is(tok::semi))
2154       nextToken();
2155     addUnwrappedLine();
2156   }
2157   return true;
2158 
2159   // There is no addUnwrappedLine() here so that we fall through to parsing a
2160   // structural element afterwards. Thus, in "enum A {} n, m;",
2161   // "} n, m;" will end up in one unwrapped line.
2162 }
2163 
2164 void UnwrappedLineParser::parseJavaEnumBody() {
2165   // Determine whether the enum is simple, i.e. does not have a semicolon or
2166   // constants with class bodies. Simple enums can be formatted like braced
2167   // lists, contracted to a single line, etc.
2168   unsigned StoredPosition = Tokens->getPosition();
2169   bool IsSimple = true;
2170   FormatToken *Tok = Tokens->getNextToken();
2171   while (Tok) {
2172     if (Tok->is(tok::r_brace))
2173       break;
2174     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2175       IsSimple = false;
2176       break;
2177     }
2178     // FIXME: This will also mark enums with braces in the arguments to enum
2179     // constants as "not simple". This is probably fine in practice, though.
2180     Tok = Tokens->getNextToken();
2181   }
2182   FormatTok = Tokens->setPosition(StoredPosition);
2183 
2184   if (IsSimple) {
2185     nextToken();
2186     parseBracedList();
2187     addUnwrappedLine();
2188     return;
2189   }
2190 
2191   // Parse the body of a more complex enum.
2192   // First add a line for everything up to the "{".
2193   nextToken();
2194   addUnwrappedLine();
2195   ++Line->Level;
2196 
2197   // Parse the enum constants.
2198   while (FormatTok) {
2199     if (FormatTok->is(tok::l_brace)) {
2200       // Parse the constant's class body.
2201       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2202                  /*MunchSemi=*/false);
2203     } else if (FormatTok->is(tok::l_paren)) {
2204       parseParens();
2205     } else if (FormatTok->is(tok::comma)) {
2206       nextToken();
2207       addUnwrappedLine();
2208     } else if (FormatTok->is(tok::semi)) {
2209       nextToken();
2210       addUnwrappedLine();
2211       break;
2212     } else if (FormatTok->is(tok::r_brace)) {
2213       addUnwrappedLine();
2214       break;
2215     } else {
2216       nextToken();
2217     }
2218   }
2219 
2220   // Parse the class body after the enum's ";" if any.
2221   parseLevel(/*HasOpeningBrace=*/true);
2222   nextToken();
2223   --Line->Level;
2224   addUnwrappedLine();
2225 }
2226 
2227 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2228   const FormatToken &InitialToken = *FormatTok;
2229   nextToken();
2230 
2231   // The actual identifier can be a nested name specifier, and in macros
2232   // it is often token-pasted.
2233   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2234                             tok::kw___attribute, tok::kw___declspec,
2235                             tok::kw_alignas) ||
2236          ((Style.Language == FormatStyle::LK_Java ||
2237            Style.Language == FormatStyle::LK_JavaScript) &&
2238           FormatTok->isOneOf(tok::period, tok::comma))) {
2239     if (Style.Language == FormatStyle::LK_JavaScript &&
2240         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2241       // JavaScript/TypeScript supports inline object types in
2242       // extends/implements positions:
2243       //     class Foo implements {bar: number} { }
2244       nextToken();
2245       if (FormatTok->is(tok::l_brace)) {
2246         tryToParseBracedList();
2247         continue;
2248       }
2249     }
2250     bool IsNonMacroIdentifier =
2251         FormatTok->is(tok::identifier) &&
2252         FormatTok->TokenText != FormatTok->TokenText.upper();
2253     nextToken();
2254     // We can have macros or attributes in between 'class' and the class name.
2255     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2256       parseParens();
2257   }
2258 
2259   // Note that parsing away template declarations here leads to incorrectly
2260   // accepting function declarations as record declarations.
2261   // In general, we cannot solve this problem. Consider:
2262   // class A<int> B() {}
2263   // which can be a function definition or a class definition when B() is a
2264   // macro. If we find enough real-world cases where this is a problem, we
2265   // can parse for the 'template' keyword in the beginning of the statement,
2266   // and thus rule out the record production in case there is no template
2267   // (this would still leave us with an ambiguity between template function
2268   // and class declarations).
2269   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2270     while (!eof()) {
2271       if (FormatTok->is(tok::l_brace)) {
2272         calculateBraceTypes(/*ExpectClassBody=*/true);
2273         if (!tryToParseBracedList())
2274           break;
2275       }
2276       if (FormatTok->Tok.is(tok::semi))
2277         return;
2278       nextToken();
2279     }
2280   }
2281   if (FormatTok->Tok.is(tok::l_brace)) {
2282     if (ParseAsExpr) {
2283       parseChildBlock();
2284     } else {
2285       if (ShouldBreakBeforeBrace(Style, InitialToken))
2286         addUnwrappedLine();
2287 
2288       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2289                  /*MunchSemi=*/false);
2290     }
2291   }
2292   // There is no addUnwrappedLine() here so that we fall through to parsing a
2293   // structural element afterwards. Thus, in "class A {} n, m;",
2294   // "} n, m;" will end up in one unwrapped line.
2295 }
2296 
2297 void UnwrappedLineParser::parseObjCMethod() {
2298   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2299          "'(' or identifier expected.");
2300   do {
2301     if (FormatTok->Tok.is(tok::semi)) {
2302       nextToken();
2303       addUnwrappedLine();
2304       return;
2305     } else if (FormatTok->Tok.is(tok::l_brace)) {
2306       if (Style.BraceWrapping.AfterFunction)
2307         addUnwrappedLine();
2308       parseBlock(/*MustBeDeclaration=*/false);
2309       addUnwrappedLine();
2310       return;
2311     } else {
2312       nextToken();
2313     }
2314   } while (!eof());
2315 }
2316 
2317 void UnwrappedLineParser::parseObjCProtocolList() {
2318   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2319   do {
2320     nextToken();
2321     // Early exit in case someone forgot a close angle.
2322     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2323         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2324       return;
2325   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2326   nextToken(); // Skip '>'.
2327 }
2328 
2329 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2330   do {
2331     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2332       nextToken();
2333       addUnwrappedLine();
2334       break;
2335     }
2336     if (FormatTok->is(tok::l_brace)) {
2337       parseBlock(/*MustBeDeclaration=*/false);
2338       // In ObjC interfaces, nothing should be following the "}".
2339       addUnwrappedLine();
2340     } else if (FormatTok->is(tok::r_brace)) {
2341       // Ignore stray "}". parseStructuralElement doesn't consume them.
2342       nextToken();
2343       addUnwrappedLine();
2344     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2345       nextToken();
2346       parseObjCMethod();
2347     } else {
2348       parseStructuralElement();
2349     }
2350   } while (!eof());
2351 }
2352 
2353 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2354   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2355          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2356   nextToken();
2357   nextToken(); // interface name
2358 
2359   // @interface can be followed by a lightweight generic
2360   // specialization list, then either a base class or a category.
2361   if (FormatTok->Tok.is(tok::less)) {
2362     // Unlike protocol lists, generic parameterizations support
2363     // nested angles:
2364     //
2365     // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2366     //     NSObject <NSCopying, NSSecureCoding>
2367     //
2368     // so we need to count how many open angles we have left.
2369     unsigned NumOpenAngles = 1;
2370     do {
2371       nextToken();
2372       // Early exit in case someone forgot a close angle.
2373       if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2374           FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2375         break;
2376       if (FormatTok->Tok.is(tok::less))
2377         ++NumOpenAngles;
2378       else if (FormatTok->Tok.is(tok::greater)) {
2379         assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2380         --NumOpenAngles;
2381       }
2382     } while (!eof() && NumOpenAngles != 0);
2383     nextToken(); // Skip '>'.
2384   }
2385   if (FormatTok->Tok.is(tok::colon)) {
2386     nextToken();
2387     nextToken(); // base class name
2388   } else if (FormatTok->Tok.is(tok::l_paren))
2389     // Skip category, if present.
2390     parseParens();
2391 
2392   if (FormatTok->Tok.is(tok::less))
2393     parseObjCProtocolList();
2394 
2395   if (FormatTok->Tok.is(tok::l_brace)) {
2396     if (Style.BraceWrapping.AfterObjCDeclaration)
2397       addUnwrappedLine();
2398     parseBlock(/*MustBeDeclaration=*/true);
2399   }
2400 
2401   // With instance variables, this puts '}' on its own line.  Without instance
2402   // variables, this ends the @interface line.
2403   addUnwrappedLine();
2404 
2405   parseObjCUntilAtEnd();
2406 }
2407 
2408 // Returns true for the declaration/definition form of @protocol,
2409 // false for the expression form.
2410 bool UnwrappedLineParser::parseObjCProtocol() {
2411   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2412   nextToken();
2413 
2414   if (FormatTok->is(tok::l_paren))
2415     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2416     return false;
2417 
2418   // The definition/declaration form,
2419   // @protocol Foo
2420   // - (int)someMethod;
2421   // @end
2422 
2423   nextToken(); // protocol name
2424 
2425   if (FormatTok->Tok.is(tok::less))
2426     parseObjCProtocolList();
2427 
2428   // Check for protocol declaration.
2429   if (FormatTok->Tok.is(tok::semi)) {
2430     nextToken();
2431     addUnwrappedLine();
2432     return true;
2433   }
2434 
2435   addUnwrappedLine();
2436   parseObjCUntilAtEnd();
2437   return true;
2438 }
2439 
2440 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2441   bool IsImport = FormatTok->is(Keywords.kw_import);
2442   assert(IsImport || FormatTok->is(tok::kw_export));
2443   nextToken();
2444 
2445   // Consume the "default" in "export default class/function".
2446   if (FormatTok->is(tok::kw_default))
2447     nextToken();
2448 
2449   // Consume "async function", "function" and "default function", so that these
2450   // get parsed as free-standing JS functions, i.e. do not require a trailing
2451   // semicolon.
2452   if (FormatTok->is(Keywords.kw_async))
2453     nextToken();
2454   if (FormatTok->is(Keywords.kw_function)) {
2455     nextToken();
2456     return;
2457   }
2458 
2459   // For imports, `export *`, `export {...}`, consume the rest of the line up
2460   // to the terminating `;`. For everything else, just return and continue
2461   // parsing the structural element, i.e. the declaration or expression for
2462   // `export default`.
2463   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2464       !FormatTok->isStringLiteral())
2465     return;
2466 
2467   while (!eof()) {
2468     if (FormatTok->is(tok::semi))
2469       return;
2470     if (Line->Tokens.empty()) {
2471       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2472       // import statement should terminate.
2473       return;
2474     }
2475     if (FormatTok->is(tok::l_brace)) {
2476       FormatTok->BlockKind = BK_Block;
2477       nextToken();
2478       parseBracedList();
2479     } else {
2480       nextToken();
2481     }
2482   }
2483 }
2484 
2485 void UnwrappedLineParser::parseStatementMacro() {
2486   nextToken();
2487   if (FormatTok->is(tok::l_paren))
2488     parseParens();
2489   if (FormatTok->is(tok::semi))
2490     nextToken();
2491   addUnwrappedLine();
2492 }
2493 
2494 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2495                                                  StringRef Prefix = "") {
2496   llvm::dbgs() << Prefix << "Line(" << Line.Level
2497                << ", FSC=" << Line.FirstStartColumn << ")"
2498                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2499   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2500                                                     E = Line.Tokens.end();
2501        I != E; ++I) {
2502     llvm::dbgs() << I->Tok->Tok.getName() << "["
2503                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2504                  << "] ";
2505   }
2506   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2507                                                     E = Line.Tokens.end();
2508        I != E; ++I) {
2509     const UnwrappedLineNode &Node = *I;
2510     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2511              I = Node.Children.begin(),
2512              E = Node.Children.end();
2513          I != E; ++I) {
2514       printDebugInfo(*I, "\nChild: ");
2515     }
2516   }
2517   llvm::dbgs() << "\n";
2518 }
2519 
2520 void UnwrappedLineParser::addUnwrappedLine() {
2521   if (Line->Tokens.empty())
2522     return;
2523   LLVM_DEBUG({
2524     if (CurrentLines == &Lines)
2525       printDebugInfo(*Line);
2526   });
2527   CurrentLines->push_back(std::move(*Line));
2528   Line->Tokens.clear();
2529   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2530   Line->FirstStartColumn = 0;
2531   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2532     CurrentLines->append(
2533         std::make_move_iterator(PreprocessorDirectives.begin()),
2534         std::make_move_iterator(PreprocessorDirectives.end()));
2535     PreprocessorDirectives.clear();
2536   }
2537   // Disconnect the current token from the last token on the previous line.
2538   FormatTok->Previous = nullptr;
2539 }
2540 
2541 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2542 
2543 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2544   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2545          FormatTok.NewlinesBefore > 0;
2546 }
2547 
2548 // Checks if \p FormatTok is a line comment that continues the line comment
2549 // section on \p Line.
2550 static bool
2551 continuesLineCommentSection(const FormatToken &FormatTok,
2552                             const UnwrappedLine &Line,
2553                             const llvm::Regex &CommentPragmasRegex) {
2554   if (Line.Tokens.empty())
2555     return false;
2556 
2557   StringRef IndentContent = FormatTok.TokenText;
2558   if (FormatTok.TokenText.startswith("//") ||
2559       FormatTok.TokenText.startswith("/*"))
2560     IndentContent = FormatTok.TokenText.substr(2);
2561   if (CommentPragmasRegex.match(IndentContent))
2562     return false;
2563 
2564   // If Line starts with a line comment, then FormatTok continues the comment
2565   // section if its original column is greater or equal to the original start
2566   // column of the line.
2567   //
2568   // Define the min column token of a line as follows: if a line ends in '{' or
2569   // contains a '{' followed by a line comment, then the min column token is
2570   // that '{'. Otherwise, the min column token of the line is the first token of
2571   // the line.
2572   //
2573   // If Line starts with a token other than a line comment, then FormatTok
2574   // continues the comment section if its original column is greater than the
2575   // original start column of the min column token of the line.
2576   //
2577   // For example, the second line comment continues the first in these cases:
2578   //
2579   // // first line
2580   // // second line
2581   //
2582   // and:
2583   //
2584   // // first line
2585   //  // second line
2586   //
2587   // and:
2588   //
2589   // int i; // first line
2590   //  // second line
2591   //
2592   // and:
2593   //
2594   // do { // first line
2595   //      // second line
2596   //   int i;
2597   // } while (true);
2598   //
2599   // and:
2600   //
2601   // enum {
2602   //   a, // first line
2603   //    // second line
2604   //   b
2605   // };
2606   //
2607   // The second line comment doesn't continue the first in these cases:
2608   //
2609   //   // first line
2610   //  // second line
2611   //
2612   // and:
2613   //
2614   // int i; // first line
2615   // // second line
2616   //
2617   // and:
2618   //
2619   // do { // first line
2620   //   // second line
2621   //   int i;
2622   // } while (true);
2623   //
2624   // and:
2625   //
2626   // enum {
2627   //   a, // first line
2628   //   // second line
2629   // };
2630   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2631 
2632   // Scan for '{//'. If found, use the column of '{' as a min column for line
2633   // comment section continuation.
2634   const FormatToken *PreviousToken = nullptr;
2635   for (const UnwrappedLineNode &Node : Line.Tokens) {
2636     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2637         isLineComment(*Node.Tok)) {
2638       MinColumnToken = PreviousToken;
2639       break;
2640     }
2641     PreviousToken = Node.Tok;
2642 
2643     // Grab the last newline preceding a token in this unwrapped line.
2644     if (Node.Tok->NewlinesBefore > 0) {
2645       MinColumnToken = Node.Tok;
2646     }
2647   }
2648   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2649     MinColumnToken = PreviousToken;
2650   }
2651 
2652   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2653                               MinColumnToken);
2654 }
2655 
2656 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2657   bool JustComments = Line->Tokens.empty();
2658   for (SmallVectorImpl<FormatToken *>::const_iterator
2659            I = CommentsBeforeNextToken.begin(),
2660            E = CommentsBeforeNextToken.end();
2661        I != E; ++I) {
2662     // Line comments that belong to the same line comment section are put on the
2663     // same line since later we might want to reflow content between them.
2664     // Additional fine-grained breaking of line comment sections is controlled
2665     // by the class BreakableLineCommentSection in case it is desirable to keep
2666     // several line comment sections in the same unwrapped line.
2667     //
2668     // FIXME: Consider putting separate line comment sections as children to the
2669     // unwrapped line instead.
2670     (*I)->ContinuesLineCommentSection =
2671         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2672     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2673       addUnwrappedLine();
2674     pushToken(*I);
2675   }
2676   if (NewlineBeforeNext && JustComments)
2677     addUnwrappedLine();
2678   CommentsBeforeNextToken.clear();
2679 }
2680 
2681 void UnwrappedLineParser::nextToken(int LevelDifference) {
2682   if (eof())
2683     return;
2684   flushComments(isOnNewLine(*FormatTok));
2685   pushToken(FormatTok);
2686   FormatToken *Previous = FormatTok;
2687   if (Style.Language != FormatStyle::LK_JavaScript)
2688     readToken(LevelDifference);
2689   else
2690     readTokenWithJavaScriptASI();
2691   FormatTok->Previous = Previous;
2692 }
2693 
2694 void UnwrappedLineParser::distributeComments(
2695     const SmallVectorImpl<FormatToken *> &Comments,
2696     const FormatToken *NextTok) {
2697   // Whether or not a line comment token continues a line is controlled by
2698   // the method continuesLineCommentSection, with the following caveat:
2699   //
2700   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2701   // that each comment line from the trail is aligned with the next token, if
2702   // the next token exists. If a trail exists, the beginning of the maximal
2703   // trail is marked as a start of a new comment section.
2704   //
2705   // For example in this code:
2706   //
2707   // int a; // line about a
2708   //   // line 1 about b
2709   //   // line 2 about b
2710   //   int b;
2711   //
2712   // the two lines about b form a maximal trail, so there are two sections, the
2713   // first one consisting of the single comment "// line about a" and the
2714   // second one consisting of the next two comments.
2715   if (Comments.empty())
2716     return;
2717   bool ShouldPushCommentsInCurrentLine = true;
2718   bool HasTrailAlignedWithNextToken = false;
2719   unsigned StartOfTrailAlignedWithNextToken = 0;
2720   if (NextTok) {
2721     // We are skipping the first element intentionally.
2722     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2723       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2724         HasTrailAlignedWithNextToken = true;
2725         StartOfTrailAlignedWithNextToken = i;
2726       }
2727     }
2728   }
2729   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2730     FormatToken *FormatTok = Comments[i];
2731     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2732       FormatTok->ContinuesLineCommentSection = false;
2733     } else {
2734       FormatTok->ContinuesLineCommentSection =
2735           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2736     }
2737     if (!FormatTok->ContinuesLineCommentSection &&
2738         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2739       ShouldPushCommentsInCurrentLine = false;
2740     }
2741     if (ShouldPushCommentsInCurrentLine) {
2742       pushToken(FormatTok);
2743     } else {
2744       CommentsBeforeNextToken.push_back(FormatTok);
2745     }
2746   }
2747 }
2748 
2749 void UnwrappedLineParser::readToken(int LevelDifference) {
2750   SmallVector<FormatToken *, 1> Comments;
2751   do {
2752     FormatTok = Tokens->getNextToken();
2753     assert(FormatTok);
2754     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2755            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2756       distributeComments(Comments, FormatTok);
2757       Comments.clear();
2758       // If there is an unfinished unwrapped line, we flush the preprocessor
2759       // directives only after that unwrapped line was finished later.
2760       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2761       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2762       assert((LevelDifference >= 0 ||
2763               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2764              "LevelDifference makes Line->Level negative");
2765       Line->Level += LevelDifference;
2766       // Comments stored before the preprocessor directive need to be output
2767       // before the preprocessor directive, at the same level as the
2768       // preprocessor directive, as we consider them to apply to the directive.
2769       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
2770           PPBranchLevel > 0)
2771         Line->Level += PPBranchLevel;
2772       flushComments(isOnNewLine(*FormatTok));
2773       parsePPDirective();
2774     }
2775     while (FormatTok->Type == TT_ConflictStart ||
2776            FormatTok->Type == TT_ConflictEnd ||
2777            FormatTok->Type == TT_ConflictAlternative) {
2778       if (FormatTok->Type == TT_ConflictStart) {
2779         conditionalCompilationStart(/*Unreachable=*/false);
2780       } else if (FormatTok->Type == TT_ConflictAlternative) {
2781         conditionalCompilationAlternative();
2782       } else if (FormatTok->Type == TT_ConflictEnd) {
2783         conditionalCompilationEnd();
2784       }
2785       FormatTok = Tokens->getNextToken();
2786       FormatTok->MustBreakBefore = true;
2787     }
2788 
2789     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2790         !Line->InPPDirective) {
2791       continue;
2792     }
2793 
2794     if (!FormatTok->Tok.is(tok::comment)) {
2795       distributeComments(Comments, FormatTok);
2796       Comments.clear();
2797       return;
2798     }
2799 
2800     Comments.push_back(FormatTok);
2801   } while (!eof());
2802 
2803   distributeComments(Comments, nullptr);
2804   Comments.clear();
2805 }
2806 
2807 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2808   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2809   if (MustBreakBeforeNextToken) {
2810     Line->Tokens.back().Tok->MustBreakBefore = true;
2811     MustBreakBeforeNextToken = false;
2812   }
2813 }
2814 
2815 } // end namespace format
2816 } // end namespace clang
2817