1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31   virtual FormatToken *getNextToken() = 0;
32 
33   virtual unsigned getPosition() = 0;
34   virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36 
37 namespace {
38 
39 class ScopedDeclarationState {
40 public:
41   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42                          bool MustBeDeclaration)
43       : Line(Line), Stack(Stack) {
44     Line.MustBeDeclaration = MustBeDeclaration;
45     Stack.push_back(MustBeDeclaration);
46   }
47   ~ScopedDeclarationState() {
48     Stack.pop_back();
49     if (!Stack.empty())
50       Line.MustBeDeclaration = Stack.back();
51     else
52       Line.MustBeDeclaration = true;
53   }
54 
55 private:
56   UnwrappedLine &Line;
57   std::vector<bool> &Stack;
58 };
59 
60 static bool isLineComment(const FormatToken &FormatTok) {
61   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63 
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
67 static bool continuesLineComment(const FormatToken &FormatTok,
68                                  const FormatToken *Previous,
69                                  const FormatToken *MinColumnToken) {
70   if (!Previous || !MinColumnToken)
71     return false;
72   unsigned MinContinueColumn =
73       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75          isLineComment(*Previous) &&
76          FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78 
79 class ScopedMacroState : public FormatTokenSource {
80 public:
81   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82                    FormatToken *&ResetToken)
83       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85         Token(nullptr), PreviousToken(nullptr) {
86     TokenSource = this;
87     Line.Level = 0;
88     Line.InPPDirective = true;
89   }
90 
91   ~ScopedMacroState() override {
92     TokenSource = PreviousTokenSource;
93     ResetToken = Token;
94     Line.InPPDirective = false;
95     Line.Level = PreviousLineLevel;
96   }
97 
98   FormatToken *getNextToken() override {
99     // The \c UnwrappedLineParser guards against this by never calling
100     // \c getNextToken() after it has encountered the first eof token.
101     assert(!eof());
102     PreviousToken = Token;
103     Token = PreviousTokenSource->getNextToken();
104     if (eof())
105       return getFakeEOF();
106     return Token;
107   }
108 
109   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
110 
111   FormatToken *setPosition(unsigned Position) override {
112     PreviousToken = nullptr;
113     Token = PreviousTokenSource->setPosition(Position);
114     return Token;
115   }
116 
117 private:
118   bool eof() {
119     return Token && Token->HasUnescapedNewline &&
120            !continuesLineComment(*Token, PreviousToken,
121                                  /*MinColumnToken=*/PreviousToken);
122   }
123 
124   FormatToken *getFakeEOF() {
125     static bool EOFInitialized = false;
126     static FormatToken FormatTok;
127     if (!EOFInitialized) {
128       FormatTok.Tok.startToken();
129       FormatTok.Tok.setKind(tok::eof);
130       EOFInitialized = true;
131     }
132     return &FormatTok;
133   }
134 
135   UnwrappedLine &Line;
136   FormatTokenSource *&TokenSource;
137   FormatToken *&ResetToken;
138   unsigned PreviousLineLevel;
139   FormatTokenSource *PreviousTokenSource;
140 
141   FormatToken *Token;
142   FormatToken *PreviousToken;
143 };
144 
145 } // end anonymous namespace
146 
147 class ScopedLineState {
148 public:
149   ScopedLineState(UnwrappedLineParser &Parser,
150                   bool SwitchToPreprocessorLines = false)
151       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
152     if (SwitchToPreprocessorLines)
153       Parser.CurrentLines = &Parser.PreprocessorDirectives;
154     else if (!Parser.Line->Tokens.empty())
155       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
156     PreBlockLine = std::move(Parser.Line);
157     Parser.Line = llvm::make_unique<UnwrappedLine>();
158     Parser.Line->Level = PreBlockLine->Level;
159     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
160   }
161 
162   ~ScopedLineState() {
163     if (!Parser.Line->Tokens.empty()) {
164       Parser.addUnwrappedLine();
165     }
166     assert(Parser.Line->Tokens.empty());
167     Parser.Line = std::move(PreBlockLine);
168     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
169       Parser.MustBreakBeforeNextToken = true;
170     Parser.CurrentLines = OriginalLines;
171   }
172 
173 private:
174   UnwrappedLineParser &Parser;
175 
176   std::unique_ptr<UnwrappedLine> PreBlockLine;
177   SmallVectorImpl<UnwrappedLine> *OriginalLines;
178 };
179 
180 class CompoundStatementIndenter {
181 public:
182   CompoundStatementIndenter(UnwrappedLineParser *Parser,
183                             const FormatStyle &Style, unsigned &LineLevel)
184       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
185     if (Style.BraceWrapping.AfterControlStatement)
186       Parser->addUnwrappedLine();
187     if (Style.BraceWrapping.IndentBraces)
188       ++LineLevel;
189   }
190   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
191 
192 private:
193   unsigned &LineLevel;
194   unsigned OldLineLevel;
195 };
196 
197 namespace {
198 
199 class IndexedTokenSource : public FormatTokenSource {
200 public:
201   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
202       : Tokens(Tokens), Position(-1) {}
203 
204   FormatToken *getNextToken() override {
205     ++Position;
206     return Tokens[Position];
207   }
208 
209   unsigned getPosition() override {
210     assert(Position >= 0);
211     return Position;
212   }
213 
214   FormatToken *setPosition(unsigned P) override {
215     Position = P;
216     return Tokens[Position];
217   }
218 
219   void reset() { Position = -1; }
220 
221 private:
222   ArrayRef<FormatToken *> Tokens;
223   int Position;
224 };
225 
226 } // end anonymous namespace
227 
228 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
229                                          const AdditionalKeywords &Keywords,
230                                          unsigned FirstStartColumn,
231                                          ArrayRef<FormatToken *> Tokens,
232                                          UnwrappedLineConsumer &Callback)
233     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
234       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
235       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
236       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
237       IfNdefCondition(nullptr), FoundIncludeGuardStart(false),
238       IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {}
239 
240 void UnwrappedLineParser::reset() {
241   PPBranchLevel = -1;
242   IfNdefCondition = nullptr;
243   FoundIncludeGuardStart = false;
244   IncludeGuardRejected = false;
245   Line.reset(new UnwrappedLine);
246   CommentsBeforeNextToken.clear();
247   FormatTok = nullptr;
248   MustBreakBeforeNextToken = false;
249   PreprocessorDirectives.clear();
250   CurrentLines = &Lines;
251   DeclarationScopeStack.clear();
252   PPStack.clear();
253   Line->FirstStartColumn = FirstStartColumn;
254 }
255 
256 void UnwrappedLineParser::parse() {
257   IndexedTokenSource TokenSource(AllTokens);
258   Line->FirstStartColumn = FirstStartColumn;
259   do {
260     DEBUG(llvm::dbgs() << "----\n");
261     reset();
262     Tokens = &TokenSource;
263     TokenSource.reset();
264 
265     readToken();
266     parseFile();
267     // Create line with eof token.
268     pushToken(FormatTok);
269     addUnwrappedLine();
270 
271     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
272                                                   E = Lines.end();
273          I != E; ++I) {
274       Callback.consumeUnwrappedLine(*I);
275     }
276     Callback.finishRun();
277     Lines.clear();
278     while (!PPLevelBranchIndex.empty() &&
279            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
280       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
281       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
282     }
283     if (!PPLevelBranchIndex.empty()) {
284       ++PPLevelBranchIndex.back();
285       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
286       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
287     }
288   } while (!PPLevelBranchIndex.empty());
289 }
290 
291 void UnwrappedLineParser::parseFile() {
292   // The top-level context in a file always has declarations, except for pre-
293   // processor directives and JavaScript files.
294   bool MustBeDeclaration =
295       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
296   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
297                                           MustBeDeclaration);
298   if (Style.Language == FormatStyle::LK_TextProto)
299     parseBracedList();
300   else
301     parseLevel(/*HasOpeningBrace=*/false);
302   // Make sure to format the remaining tokens.
303   flushComments(true);
304   addUnwrappedLine();
305 }
306 
307 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
308   bool SwitchLabelEncountered = false;
309   do {
310     tok::TokenKind kind = FormatTok->Tok.getKind();
311     if (FormatTok->Type == TT_MacroBlockBegin) {
312       kind = tok::l_brace;
313     } else if (FormatTok->Type == TT_MacroBlockEnd) {
314       kind = tok::r_brace;
315     }
316 
317     switch (kind) {
318     case tok::comment:
319       nextToken();
320       addUnwrappedLine();
321       break;
322     case tok::l_brace:
323       // FIXME: Add parameter whether this can happen - if this happens, we must
324       // be in a non-declaration context.
325       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
326         continue;
327       parseBlock(/*MustBeDeclaration=*/false);
328       addUnwrappedLine();
329       break;
330     case tok::r_brace:
331       if (HasOpeningBrace)
332         return;
333       nextToken();
334       addUnwrappedLine();
335       break;
336     case tok::kw_default: {
337       unsigned StoredPosition = Tokens->getPosition();
338       FormatToken *Next = Tokens->getNextToken();
339       FormatTok = Tokens->setPosition(StoredPosition);
340       if (Next && Next->isNot(tok::colon)) {
341         // default not followed by ':' is not a case label; treat it like
342         // an identifier.
343         parseStructuralElement();
344         break;
345       }
346       // Else, if it is 'default:', fall through to the case handling.
347       LLVM_FALLTHROUGH;
348     }
349     case tok::kw_case:
350       if (Style.Language == FormatStyle::LK_JavaScript &&
351           Line->MustBeDeclaration) {
352         // A 'case: string' style field declaration.
353         parseStructuralElement();
354         break;
355       }
356       if (!SwitchLabelEncountered &&
357           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
358         ++Line->Level;
359       SwitchLabelEncountered = true;
360       parseStructuralElement();
361       break;
362     default:
363       parseStructuralElement();
364       break;
365     }
366   } while (!eof());
367 }
368 
369 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
370   // We'll parse forward through the tokens until we hit
371   // a closing brace or eof - note that getNextToken() will
372   // parse macros, so this will magically work inside macro
373   // definitions, too.
374   unsigned StoredPosition = Tokens->getPosition();
375   FormatToken *Tok = FormatTok;
376   const FormatToken *PrevTok = Tok->Previous;
377   // Keep a stack of positions of lbrace tokens. We will
378   // update information about whether an lbrace starts a
379   // braced init list or a different block during the loop.
380   SmallVector<FormatToken *, 8> LBraceStack;
381   assert(Tok->Tok.is(tok::l_brace));
382   do {
383     // Get next non-comment token.
384     FormatToken *NextTok;
385     unsigned ReadTokens = 0;
386     do {
387       NextTok = Tokens->getNextToken();
388       ++ReadTokens;
389     } while (NextTok->is(tok::comment));
390 
391     switch (Tok->Tok.getKind()) {
392     case tok::l_brace:
393       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
394         if (PrevTok->isOneOf(tok::colon, tok::less))
395           // A ':' indicates this code is in a type, or a braced list
396           // following a label in an object literal ({a: {b: 1}}).
397           // A '<' could be an object used in a comparison, but that is nonsense
398           // code (can never return true), so more likely it is a generic type
399           // argument (`X<{a: string; b: number}>`).
400           // The code below could be confused by semicolons between the
401           // individual members in a type member list, which would normally
402           // trigger BK_Block. In both cases, this must be parsed as an inline
403           // braced init.
404           Tok->BlockKind = BK_BracedInit;
405         else if (PrevTok->is(tok::r_paren))
406           // `) { }` can only occur in function or method declarations in JS.
407           Tok->BlockKind = BK_Block;
408       } else {
409         Tok->BlockKind = BK_Unknown;
410       }
411       LBraceStack.push_back(Tok);
412       break;
413     case tok::r_brace:
414       if (LBraceStack.empty())
415         break;
416       if (LBraceStack.back()->BlockKind == BK_Unknown) {
417         bool ProbablyBracedList = false;
418         if (Style.Language == FormatStyle::LK_Proto) {
419           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
420         } else {
421           // Using OriginalColumn to distinguish between ObjC methods and
422           // binary operators is a bit hacky.
423           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
424                                   NextTok->OriginalColumn == 0;
425 
426           // If there is a comma, semicolon or right paren after the closing
427           // brace, we assume this is a braced initializer list.  Note that
428           // regardless how we mark inner braces here, we will overwrite the
429           // BlockKind later if we parse a braced list (where all blocks
430           // inside are by default braced lists), or when we explicitly detect
431           // blocks (for example while parsing lambdas).
432           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
433           // braced list in JS.
434           ProbablyBracedList =
435               (Style.Language == FormatStyle::LK_JavaScript &&
436                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
437                                 Keywords.kw_as)) ||
438               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
439               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
440                                tok::r_paren, tok::r_square, tok::l_brace,
441                                tok::l_square, tok::ellipsis) ||
442               (NextTok->is(tok::identifier) &&
443                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
444               (NextTok->is(tok::semi) &&
445                (!ExpectClassBody || LBraceStack.size() != 1)) ||
446               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
447         }
448         if (ProbablyBracedList) {
449           Tok->BlockKind = BK_BracedInit;
450           LBraceStack.back()->BlockKind = BK_BracedInit;
451         } else {
452           Tok->BlockKind = BK_Block;
453           LBraceStack.back()->BlockKind = BK_Block;
454         }
455       }
456       LBraceStack.pop_back();
457       break;
458     case tok::at:
459     case tok::semi:
460     case tok::kw_if:
461     case tok::kw_while:
462     case tok::kw_for:
463     case tok::kw_switch:
464     case tok::kw_try:
465     case tok::kw___try:
466       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
467         LBraceStack.back()->BlockKind = BK_Block;
468       break;
469     default:
470       break;
471     }
472     PrevTok = Tok;
473     Tok = NextTok;
474   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
475 
476   // Assume other blocks for all unclosed opening braces.
477   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
478     if (LBraceStack[i]->BlockKind == BK_Unknown)
479       LBraceStack[i]->BlockKind = BK_Block;
480   }
481 
482   FormatTok = Tokens->setPosition(StoredPosition);
483 }
484 
485 template <class T>
486 static inline void hash_combine(std::size_t &seed, const T &v) {
487   std::hash<T> hasher;
488   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
489 }
490 
491 size_t UnwrappedLineParser::computePPHash() const {
492   size_t h = 0;
493   for (const auto &i : PPStack) {
494     hash_combine(h, size_t(i.Kind));
495     hash_combine(h, i.Line);
496   }
497   return h;
498 }
499 
500 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
501                                      bool MunchSemi) {
502   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
503          "'{' or macro block token expected");
504   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
505   FormatTok->BlockKind = BK_Block;
506 
507   size_t PPStartHash = computePPHash();
508 
509   unsigned InitialLevel = Line->Level;
510   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
511 
512   if (MacroBlock && FormatTok->is(tok::l_paren))
513     parseParens();
514 
515   size_t NbPreprocessorDirectives =
516       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
517   addUnwrappedLine();
518   size_t OpeningLineIndex =
519       CurrentLines->empty()
520           ? (UnwrappedLine::kInvalidIndex)
521           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
522 
523   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
524                                           MustBeDeclaration);
525   if (AddLevel)
526     ++Line->Level;
527   parseLevel(/*HasOpeningBrace=*/true);
528 
529   if (eof())
530     return;
531 
532   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
533                  : !FormatTok->is(tok::r_brace)) {
534     Line->Level = InitialLevel;
535     FormatTok->BlockKind = BK_Block;
536     return;
537   }
538 
539   size_t PPEndHash = computePPHash();
540 
541   // Munch the closing brace.
542   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
543 
544   if (MacroBlock && FormatTok->is(tok::l_paren))
545     parseParens();
546 
547   if (MunchSemi && FormatTok->Tok.is(tok::semi))
548     nextToken();
549   Line->Level = InitialLevel;
550 
551   if (PPStartHash == PPEndHash) {
552     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
553     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
554       // Update the opening line to add the forward reference as well
555       (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
556           CurrentLines->size() - 1;
557     }
558   }
559 }
560 
561 static bool isGoogScope(const UnwrappedLine &Line) {
562   // FIXME: Closure-library specific stuff should not be hard-coded but be
563   // configurable.
564   if (Line.Tokens.size() < 4)
565     return false;
566   auto I = Line.Tokens.begin();
567   if (I->Tok->TokenText != "goog")
568     return false;
569   ++I;
570   if (I->Tok->isNot(tok::period))
571     return false;
572   ++I;
573   if (I->Tok->TokenText != "scope")
574     return false;
575   ++I;
576   return I->Tok->is(tok::l_paren);
577 }
578 
579 static bool isIIFE(const UnwrappedLine &Line,
580                    const AdditionalKeywords &Keywords) {
581   // Look for the start of an immediately invoked anonymous function.
582   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
583   // This is commonly done in JavaScript to create a new, anonymous scope.
584   // Example: (function() { ... })()
585   if (Line.Tokens.size() < 3)
586     return false;
587   auto I = Line.Tokens.begin();
588   if (I->Tok->isNot(tok::l_paren))
589     return false;
590   ++I;
591   if (I->Tok->isNot(Keywords.kw_function))
592     return false;
593   ++I;
594   return I->Tok->is(tok::l_paren);
595 }
596 
597 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
598                                    const FormatToken &InitialToken) {
599   if (InitialToken.is(tok::kw_namespace))
600     return Style.BraceWrapping.AfterNamespace;
601   if (InitialToken.is(tok::kw_class))
602     return Style.BraceWrapping.AfterClass;
603   if (InitialToken.is(tok::kw_union))
604     return Style.BraceWrapping.AfterUnion;
605   if (InitialToken.is(tok::kw_struct))
606     return Style.BraceWrapping.AfterStruct;
607   return false;
608 }
609 
610 void UnwrappedLineParser::parseChildBlock() {
611   FormatTok->BlockKind = BK_Block;
612   nextToken();
613   {
614     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
615                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
616     ScopedLineState LineState(*this);
617     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
618                                             /*MustBeDeclaration=*/false);
619     Line->Level += SkipIndent ? 0 : 1;
620     parseLevel(/*HasOpeningBrace=*/true);
621     flushComments(isOnNewLine(*FormatTok));
622     Line->Level -= SkipIndent ? 0 : 1;
623   }
624   nextToken();
625 }
626 
627 void UnwrappedLineParser::parsePPDirective() {
628   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
629   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
630   nextToken();
631 
632   if (!FormatTok->Tok.getIdentifierInfo()) {
633     parsePPUnknown();
634     return;
635   }
636 
637   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
638   case tok::pp_define:
639     parsePPDefine();
640     return;
641   case tok::pp_if:
642     parsePPIf(/*IfDef=*/false);
643     break;
644   case tok::pp_ifdef:
645   case tok::pp_ifndef:
646     parsePPIf(/*IfDef=*/true);
647     break;
648   case tok::pp_else:
649     parsePPElse();
650     break;
651   case tok::pp_elif:
652     parsePPElIf();
653     break;
654   case tok::pp_endif:
655     parsePPEndIf();
656     break;
657   default:
658     parsePPUnknown();
659     break;
660   }
661 }
662 
663 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
664   size_t Line = CurrentLines->size();
665   if (CurrentLines == &PreprocessorDirectives)
666     Line += Lines.size();
667 
668   if (Unreachable ||
669       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
670     PPStack.push_back({PP_Unreachable, Line});
671   else
672     PPStack.push_back({PP_Conditional, Line});
673 }
674 
675 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
676   ++PPBranchLevel;
677   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
678   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
679     PPLevelBranchIndex.push_back(0);
680     PPLevelBranchCount.push_back(0);
681   }
682   PPChainBranchIndex.push(0);
683   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
684   conditionalCompilationCondition(Unreachable || Skip);
685 }
686 
687 void UnwrappedLineParser::conditionalCompilationAlternative() {
688   if (!PPStack.empty())
689     PPStack.pop_back();
690   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
691   if (!PPChainBranchIndex.empty())
692     ++PPChainBranchIndex.top();
693   conditionalCompilationCondition(
694       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
695       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
696 }
697 
698 void UnwrappedLineParser::conditionalCompilationEnd() {
699   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
700   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
701     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
702       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
703     }
704   }
705   // Guard against #endif's without #if.
706   if (PPBranchLevel > -1)
707     --PPBranchLevel;
708   if (!PPChainBranchIndex.empty())
709     PPChainBranchIndex.pop();
710   if (!PPStack.empty())
711     PPStack.pop_back();
712 }
713 
714 void UnwrappedLineParser::parsePPIf(bool IfDef) {
715   bool IfNDef = FormatTok->is(tok::pp_ifndef);
716   nextToken();
717   bool Unreachable = false;
718   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
719     Unreachable = true;
720   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
721     Unreachable = true;
722   conditionalCompilationStart(Unreachable);
723   FormatToken *IfCondition = FormatTok;
724   // If there's a #ifndef on the first line, and the only lines before it are
725   // comments, it could be an include guard.
726   bool MaybeIncludeGuard = IfNDef;
727   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {
728     for (auto &Line : Lines) {
729       if (!Line.Tokens.front().Tok->is(tok::comment)) {
730         MaybeIncludeGuard = false;
731         IncludeGuardRejected = true;
732         break;
733       }
734     }
735   }
736   --PPBranchLevel;
737   parsePPUnknown();
738   ++PPBranchLevel;
739   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)
740     IfNdefCondition = IfCondition;
741 }
742 
743 void UnwrappedLineParser::parsePPElse() {
744   // If a potential include guard has an #else, it's not an include guard.
745   if (FoundIncludeGuardStart && PPBranchLevel == 0)
746     FoundIncludeGuardStart = false;
747   conditionalCompilationAlternative();
748   if (PPBranchLevel > -1)
749     --PPBranchLevel;
750   parsePPUnknown();
751   ++PPBranchLevel;
752 }
753 
754 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
755 
756 void UnwrappedLineParser::parsePPEndIf() {
757   conditionalCompilationEnd();
758   parsePPUnknown();
759   // If the #endif of a potential include guard is the last thing in the file,
760   // then we count it as a real include guard and subtract one from every
761   // preprocessor indent.
762   unsigned TokenPosition = Tokens->getPosition();
763   FormatToken *PeekNext = AllTokens[TokenPosition];
764   if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) &&
765       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
766     for (auto &Line : Lines)
767       if (Line.InPPDirective && Line.Level > 0)
768         --Line.Level;
769 }
770 
771 void UnwrappedLineParser::parsePPDefine() {
772   nextToken();
773 
774   if (FormatTok->Tok.getKind() != tok::identifier) {
775     parsePPUnknown();
776     return;
777   }
778   if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {
779     FoundIncludeGuardStart = true;
780     for (auto &Line : Lines) {
781       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
782         FoundIncludeGuardStart = false;
783         break;
784       }
785     }
786   }
787   IfNdefCondition = nullptr;
788   nextToken();
789   if (FormatTok->Tok.getKind() == tok::l_paren &&
790       FormatTok->WhitespaceRange.getBegin() ==
791           FormatTok->WhitespaceRange.getEnd()) {
792     parseParens();
793   }
794   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
795     Line->Level += PPBranchLevel + 1;
796   addUnwrappedLine();
797   ++Line->Level;
798 
799   // Errors during a preprocessor directive can only affect the layout of the
800   // preprocessor directive, and thus we ignore them. An alternative approach
801   // would be to use the same approach we use on the file level (no
802   // re-indentation if there was a structural error) within the macro
803   // definition.
804   parseFile();
805 }
806 
807 void UnwrappedLineParser::parsePPUnknown() {
808   do {
809     nextToken();
810   } while (!eof());
811   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
812     Line->Level += PPBranchLevel + 1;
813   addUnwrappedLine();
814   IfNdefCondition = nullptr;
815 }
816 
817 // Here we blacklist certain tokens that are not usually the first token in an
818 // unwrapped line. This is used in attempt to distinguish macro calls without
819 // trailing semicolons from other constructs split to several lines.
820 static bool tokenCanStartNewLine(const clang::Token &Tok) {
821   // Semicolon can be a null-statement, l_square can be a start of a macro or
822   // a C++11 attribute, but this doesn't seem to be common.
823   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
824          Tok.isNot(tok::l_square) &&
825          // Tokens that can only be used as binary operators and a part of
826          // overloaded operator names.
827          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
828          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
829          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
830          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
831          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
832          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
833          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
834          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
835          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
836          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
837          Tok.isNot(tok::lesslessequal) &&
838          // Colon is used in labels, base class lists, initializer lists,
839          // range-based for loops, ternary operator, but should never be the
840          // first token in an unwrapped line.
841          Tok.isNot(tok::colon) &&
842          // 'noexcept' is a trailing annotation.
843          Tok.isNot(tok::kw_noexcept);
844 }
845 
846 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
847                           const FormatToken *FormatTok) {
848   // FIXME: This returns true for C/C++ keywords like 'struct'.
849   return FormatTok->is(tok::identifier) &&
850          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
851           !FormatTok->isOneOf(
852               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
853               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
854               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
855               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
856               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
857               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
858               Keywords.kw_from));
859 }
860 
861 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
862                                  const FormatToken *FormatTok) {
863   return FormatTok->Tok.isLiteral() ||
864          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
865          mustBeJSIdent(Keywords, FormatTok);
866 }
867 
868 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
869 // when encountered after a value (see mustBeJSIdentOrValue).
870 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
871                            const FormatToken *FormatTok) {
872   return FormatTok->isOneOf(
873       tok::kw_return, Keywords.kw_yield,
874       // conditionals
875       tok::kw_if, tok::kw_else,
876       // loops
877       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
878       // switch/case
879       tok::kw_switch, tok::kw_case,
880       // exceptions
881       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
882       // declaration
883       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
884       Keywords.kw_async, Keywords.kw_function,
885       // import/export
886       Keywords.kw_import, tok::kw_export);
887 }
888 
889 // readTokenWithJavaScriptASI reads the next token and terminates the current
890 // line if JavaScript Automatic Semicolon Insertion must
891 // happen between the current token and the next token.
892 //
893 // This method is conservative - it cannot cover all edge cases of JavaScript,
894 // but only aims to correctly handle certain well known cases. It *must not*
895 // return true in speculative cases.
896 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
897   FormatToken *Previous = FormatTok;
898   readToken();
899   FormatToken *Next = FormatTok;
900 
901   bool IsOnSameLine =
902       CommentsBeforeNextToken.empty()
903           ? Next->NewlinesBefore == 0
904           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
905   if (IsOnSameLine)
906     return;
907 
908   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
909   bool PreviousStartsTemplateExpr =
910       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
911   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
912     // If the line contains an '@' sign, the previous token might be an
913     // annotation, which can precede another identifier/value.
914     bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
915                               [](UnwrappedLineNode &LineNode) {
916                                 return LineNode.Tok->is(tok::at);
917                               }) != Line->Tokens.end();
918     if (HasAt)
919       return;
920   }
921   if (Next->is(tok::exclaim) && PreviousMustBeValue)
922     return addUnwrappedLine();
923   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
924   bool NextEndsTemplateExpr =
925       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
926   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
927       (PreviousMustBeValue ||
928        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
929                          tok::minusminus)))
930     return addUnwrappedLine();
931   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
932       isJSDeclOrStmt(Keywords, Next))
933     return addUnwrappedLine();
934 }
935 
936 void UnwrappedLineParser::parseStructuralElement() {
937   assert(!FormatTok->is(tok::l_brace));
938   if (Style.Language == FormatStyle::LK_TableGen &&
939       FormatTok->is(tok::pp_include)) {
940     nextToken();
941     if (FormatTok->is(tok::string_literal))
942       nextToken();
943     addUnwrappedLine();
944     return;
945   }
946   switch (FormatTok->Tok.getKind()) {
947   case tok::kw_asm:
948     nextToken();
949     if (FormatTok->is(tok::l_brace)) {
950       FormatTok->Type = TT_InlineASMBrace;
951       nextToken();
952       while (FormatTok && FormatTok->isNot(tok::eof)) {
953         if (FormatTok->is(tok::r_brace)) {
954           FormatTok->Type = TT_InlineASMBrace;
955           nextToken();
956           addUnwrappedLine();
957           break;
958         }
959         FormatTok->Finalized = true;
960         nextToken();
961       }
962     }
963     break;
964   case tok::kw_namespace:
965     parseNamespace();
966     return;
967   case tok::kw_inline:
968     nextToken();
969     if (FormatTok->Tok.is(tok::kw_namespace)) {
970       parseNamespace();
971       return;
972     }
973     break;
974   case tok::kw_public:
975   case tok::kw_protected:
976   case tok::kw_private:
977     if (Style.Language == FormatStyle::LK_Java ||
978         Style.Language == FormatStyle::LK_JavaScript)
979       nextToken();
980     else
981       parseAccessSpecifier();
982     return;
983   case tok::kw_if:
984     parseIfThenElse();
985     return;
986   case tok::kw_for:
987   case tok::kw_while:
988     parseForOrWhileLoop();
989     return;
990   case tok::kw_do:
991     parseDoWhile();
992     return;
993   case tok::kw_switch:
994     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
995       // 'switch: string' field declaration.
996       break;
997     parseSwitch();
998     return;
999   case tok::kw_default:
1000     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1001       // 'default: string' field declaration.
1002       break;
1003     nextToken();
1004     if (FormatTok->is(tok::colon)) {
1005       parseLabel();
1006       return;
1007     }
1008     // e.g. "default void f() {}" in a Java interface.
1009     break;
1010   case tok::kw_case:
1011     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1012       // 'case: string' field declaration.
1013       break;
1014     parseCaseLabel();
1015     return;
1016   case tok::kw_try:
1017   case tok::kw___try:
1018     parseTryCatch();
1019     return;
1020   case tok::kw_extern:
1021     nextToken();
1022     if (FormatTok->Tok.is(tok::string_literal)) {
1023       nextToken();
1024       if (FormatTok->Tok.is(tok::l_brace)) {
1025         if (Style.BraceWrapping.AfterExternBlock) {
1026           addUnwrappedLine();
1027           parseBlock(/*MustBeDeclaration=*/true);
1028         } else {
1029           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1030         }
1031         addUnwrappedLine();
1032         return;
1033       }
1034     }
1035     break;
1036   case tok::kw_export:
1037     if (Style.Language == FormatStyle::LK_JavaScript) {
1038       parseJavaScriptEs6ImportExport();
1039       return;
1040     }
1041     break;
1042   case tok::identifier:
1043     if (FormatTok->is(TT_ForEachMacro)) {
1044       parseForOrWhileLoop();
1045       return;
1046     }
1047     if (FormatTok->is(TT_MacroBlockBegin)) {
1048       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1049                  /*MunchSemi=*/false);
1050       return;
1051     }
1052     if (FormatTok->is(Keywords.kw_import)) {
1053       if (Style.Language == FormatStyle::LK_JavaScript) {
1054         parseJavaScriptEs6ImportExport();
1055         return;
1056       }
1057       if (Style.Language == FormatStyle::LK_Proto) {
1058         nextToken();
1059         if (FormatTok->is(tok::kw_public))
1060           nextToken();
1061         if (!FormatTok->is(tok::string_literal))
1062           return;
1063         nextToken();
1064         if (FormatTok->is(tok::semi))
1065           nextToken();
1066         addUnwrappedLine();
1067         return;
1068       }
1069     }
1070     if (Style.isCpp() &&
1071         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1072                            Keywords.kw_slots, Keywords.kw_qslots)) {
1073       nextToken();
1074       if (FormatTok->is(tok::colon)) {
1075         nextToken();
1076         addUnwrappedLine();
1077         return;
1078       }
1079     }
1080     // In all other cases, parse the declaration.
1081     break;
1082   default:
1083     break;
1084   }
1085   do {
1086     const FormatToken *Previous = FormatTok->Previous;
1087     switch (FormatTok->Tok.getKind()) {
1088     case tok::at:
1089       nextToken();
1090       if (FormatTok->Tok.is(tok::l_brace)) {
1091         nextToken();
1092         parseBracedList();
1093         break;
1094       }
1095       switch (FormatTok->Tok.getObjCKeywordID()) {
1096       case tok::objc_public:
1097       case tok::objc_protected:
1098       case tok::objc_package:
1099       case tok::objc_private:
1100         return parseAccessSpecifier();
1101       case tok::objc_interface:
1102       case tok::objc_implementation:
1103         return parseObjCInterfaceOrImplementation();
1104       case tok::objc_protocol:
1105         if (parseObjCProtocol())
1106           return;
1107         break;
1108       case tok::objc_end:
1109         return; // Handled by the caller.
1110       case tok::objc_optional:
1111       case tok::objc_required:
1112         nextToken();
1113         addUnwrappedLine();
1114         return;
1115       case tok::objc_autoreleasepool:
1116         nextToken();
1117         if (FormatTok->Tok.is(tok::l_brace)) {
1118           if (Style.BraceWrapping.AfterObjCDeclaration)
1119             addUnwrappedLine();
1120           parseBlock(/*MustBeDeclaration=*/false);
1121         }
1122         addUnwrappedLine();
1123         return;
1124       case tok::objc_try:
1125         // This branch isn't strictly necessary (the kw_try case below would
1126         // do this too after the tok::at is parsed above).  But be explicit.
1127         parseTryCatch();
1128         return;
1129       default:
1130         break;
1131       }
1132       break;
1133     case tok::kw_enum:
1134       // Ignore if this is part of "template <enum ...".
1135       if (Previous && Previous->is(tok::less)) {
1136         nextToken();
1137         break;
1138       }
1139 
1140       // parseEnum falls through and does not yet add an unwrapped line as an
1141       // enum definition can start a structural element.
1142       if (!parseEnum())
1143         break;
1144       // This only applies for C++.
1145       if (!Style.isCpp()) {
1146         addUnwrappedLine();
1147         return;
1148       }
1149       break;
1150     case tok::kw_typedef:
1151       nextToken();
1152       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1153                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1154         parseEnum();
1155       break;
1156     case tok::kw_struct:
1157     case tok::kw_union:
1158     case tok::kw_class:
1159       // parseRecord falls through and does not yet add an unwrapped line as a
1160       // record declaration or definition can start a structural element.
1161       parseRecord();
1162       // This does not apply for Java and JavaScript.
1163       if (Style.Language == FormatStyle::LK_Java ||
1164           Style.Language == FormatStyle::LK_JavaScript) {
1165         if (FormatTok->is(tok::semi))
1166           nextToken();
1167         addUnwrappedLine();
1168         return;
1169       }
1170       break;
1171     case tok::period:
1172       nextToken();
1173       // In Java, classes have an implicit static member "class".
1174       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1175           FormatTok->is(tok::kw_class))
1176         nextToken();
1177       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1178           FormatTok->Tok.getIdentifierInfo())
1179         // JavaScript only has pseudo keywords, all keywords are allowed to
1180         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1181         nextToken();
1182       break;
1183     case tok::semi:
1184       nextToken();
1185       addUnwrappedLine();
1186       return;
1187     case tok::r_brace:
1188       addUnwrappedLine();
1189       return;
1190     case tok::l_paren:
1191       parseParens();
1192       break;
1193     case tok::kw_operator:
1194       nextToken();
1195       if (FormatTok->isBinaryOperator())
1196         nextToken();
1197       break;
1198     case tok::caret:
1199       nextToken();
1200       if (FormatTok->Tok.isAnyIdentifier() ||
1201           FormatTok->isSimpleTypeSpecifier())
1202         nextToken();
1203       if (FormatTok->is(tok::l_paren))
1204         parseParens();
1205       if (FormatTok->is(tok::l_brace))
1206         parseChildBlock();
1207       break;
1208     case tok::l_brace:
1209       if (!tryToParseBracedList()) {
1210         // A block outside of parentheses must be the last part of a
1211         // structural element.
1212         // FIXME: Figure out cases where this is not true, and add projections
1213         // for them (the one we know is missing are lambdas).
1214         if (Style.BraceWrapping.AfterFunction)
1215           addUnwrappedLine();
1216         FormatTok->Type = TT_FunctionLBrace;
1217         parseBlock(/*MustBeDeclaration=*/false);
1218         addUnwrappedLine();
1219         return;
1220       }
1221       // Otherwise this was a braced init list, and the structural
1222       // element continues.
1223       break;
1224     case tok::kw_try:
1225       // We arrive here when parsing function-try blocks.
1226       parseTryCatch();
1227       return;
1228     case tok::identifier: {
1229       if (FormatTok->is(TT_MacroBlockEnd)) {
1230         addUnwrappedLine();
1231         return;
1232       }
1233 
1234       // Function declarations (as opposed to function expressions) are parsed
1235       // on their own unwrapped line by continuing this loop. Function
1236       // expressions (functions that are not on their own line) must not create
1237       // a new unwrapped line, so they are special cased below.
1238       size_t TokenCount = Line->Tokens.size();
1239       if (Style.Language == FormatStyle::LK_JavaScript &&
1240           FormatTok->is(Keywords.kw_function) &&
1241           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1242                                                      Keywords.kw_async)))) {
1243         tryToParseJSFunction();
1244         break;
1245       }
1246       if ((Style.Language == FormatStyle::LK_JavaScript ||
1247            Style.Language == FormatStyle::LK_Java) &&
1248           FormatTok->is(Keywords.kw_interface)) {
1249         if (Style.Language == FormatStyle::LK_JavaScript) {
1250           // In JavaScript/TypeScript, "interface" can be used as a standalone
1251           // identifier, e.g. in `var interface = 1;`. If "interface" is
1252           // followed by another identifier, it is very like to be an actual
1253           // interface declaration.
1254           unsigned StoredPosition = Tokens->getPosition();
1255           FormatToken *Next = Tokens->getNextToken();
1256           FormatTok = Tokens->setPosition(StoredPosition);
1257           if (Next && !mustBeJSIdent(Keywords, Next)) {
1258             nextToken();
1259             break;
1260           }
1261         }
1262         parseRecord();
1263         addUnwrappedLine();
1264         return;
1265       }
1266 
1267       // See if the following token should start a new unwrapped line.
1268       StringRef Text = FormatTok->TokenText;
1269       nextToken();
1270       if (Line->Tokens.size() == 1 &&
1271           // JS doesn't have macros, and within classes colons indicate fields,
1272           // not labels.
1273           Style.Language != FormatStyle::LK_JavaScript) {
1274         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1275           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1276           parseLabel();
1277           return;
1278         }
1279         // Recognize function-like macro usages without trailing semicolon as
1280         // well as free-standing macros like Q_OBJECT.
1281         bool FunctionLike = FormatTok->is(tok::l_paren);
1282         if (FunctionLike)
1283           parseParens();
1284 
1285         bool FollowedByNewline =
1286             CommentsBeforeNextToken.empty()
1287                 ? FormatTok->NewlinesBefore > 0
1288                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1289 
1290         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1291             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1292           addUnwrappedLine();
1293           return;
1294         }
1295       }
1296       break;
1297     }
1298     case tok::equal:
1299       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1300       // TT_JsFatArrow. The always start an expression or a child block if
1301       // followed by a curly.
1302       if (FormatTok->is(TT_JsFatArrow)) {
1303         nextToken();
1304         if (FormatTok->is(tok::l_brace))
1305           parseChildBlock();
1306         break;
1307       }
1308 
1309       nextToken();
1310       if (FormatTok->Tok.is(tok::l_brace)) {
1311         nextToken();
1312         parseBracedList();
1313       } else if (Style.Language == FormatStyle::LK_Proto &&
1314                  FormatTok->Tok.is(tok::less)) {
1315         nextToken();
1316         parseBracedList(/*ContinueOnSemicolons=*/false,
1317                         /*ClosingBraceKind=*/tok::greater);
1318       }
1319       break;
1320     case tok::l_square:
1321       parseSquare();
1322       break;
1323     case tok::kw_new:
1324       parseNew();
1325       break;
1326     default:
1327       nextToken();
1328       break;
1329     }
1330   } while (!eof());
1331 }
1332 
1333 bool UnwrappedLineParser::tryToParseLambda() {
1334   if (!Style.isCpp()) {
1335     nextToken();
1336     return false;
1337   }
1338   assert(FormatTok->is(tok::l_square));
1339   FormatToken &LSquare = *FormatTok;
1340   if (!tryToParseLambdaIntroducer())
1341     return false;
1342 
1343   while (FormatTok->isNot(tok::l_brace)) {
1344     if (FormatTok->isSimpleTypeSpecifier()) {
1345       nextToken();
1346       continue;
1347     }
1348     switch (FormatTok->Tok.getKind()) {
1349     case tok::l_brace:
1350       break;
1351     case tok::l_paren:
1352       parseParens();
1353       break;
1354     case tok::amp:
1355     case tok::star:
1356     case tok::kw_const:
1357     case tok::comma:
1358     case tok::less:
1359     case tok::greater:
1360     case tok::identifier:
1361     case tok::numeric_constant:
1362     case tok::coloncolon:
1363     case tok::kw_mutable:
1364       nextToken();
1365       break;
1366     case tok::arrow:
1367       FormatTok->Type = TT_LambdaArrow;
1368       nextToken();
1369       break;
1370     default:
1371       return true;
1372     }
1373   }
1374   LSquare.Type = TT_LambdaLSquare;
1375   parseChildBlock();
1376   return true;
1377 }
1378 
1379 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1380   const FormatToken *Previous = FormatTok->Previous;
1381   if (Previous &&
1382       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1383                          tok::kw_delete) ||
1384        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1385        Previous->isSimpleTypeSpecifier())) {
1386     nextToken();
1387     return false;
1388   }
1389   nextToken();
1390   parseSquare(/*LambdaIntroducer=*/true);
1391   return true;
1392 }
1393 
1394 void UnwrappedLineParser::tryToParseJSFunction() {
1395   assert(FormatTok->is(Keywords.kw_function) ||
1396          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1397   if (FormatTok->is(Keywords.kw_async))
1398     nextToken();
1399   // Consume "function".
1400   nextToken();
1401 
1402   // Consume * (generator function). Treat it like C++'s overloaded operators.
1403   if (FormatTok->is(tok::star)) {
1404     FormatTok->Type = TT_OverloadedOperator;
1405     nextToken();
1406   }
1407 
1408   // Consume function name.
1409   if (FormatTok->is(tok::identifier))
1410     nextToken();
1411 
1412   if (FormatTok->isNot(tok::l_paren))
1413     return;
1414 
1415   // Parse formal parameter list.
1416   parseParens();
1417 
1418   if (FormatTok->is(tok::colon)) {
1419     // Parse a type definition.
1420     nextToken();
1421 
1422     // Eat the type declaration. For braced inline object types, balance braces,
1423     // otherwise just parse until finding an l_brace for the function body.
1424     if (FormatTok->is(tok::l_brace))
1425       tryToParseBracedList();
1426     else
1427       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1428         nextToken();
1429   }
1430 
1431   if (FormatTok->is(tok::semi))
1432     return;
1433 
1434   parseChildBlock();
1435 }
1436 
1437 bool UnwrappedLineParser::tryToParseBracedList() {
1438   if (FormatTok->BlockKind == BK_Unknown)
1439     calculateBraceTypes();
1440   assert(FormatTok->BlockKind != BK_Unknown);
1441   if (FormatTok->BlockKind == BK_Block)
1442     return false;
1443   nextToken();
1444   parseBracedList();
1445   return true;
1446 }
1447 
1448 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1449                                           tok::TokenKind ClosingBraceKind) {
1450   bool HasError = false;
1451 
1452   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1453   // replace this by using parseAssigmentExpression() inside.
1454   do {
1455     if (Style.Language == FormatStyle::LK_JavaScript) {
1456       if (FormatTok->is(Keywords.kw_function) ||
1457           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1458         tryToParseJSFunction();
1459         continue;
1460       }
1461       if (FormatTok->is(TT_JsFatArrow)) {
1462         nextToken();
1463         // Fat arrows can be followed by simple expressions or by child blocks
1464         // in curly braces.
1465         if (FormatTok->is(tok::l_brace)) {
1466           parseChildBlock();
1467           continue;
1468         }
1469       }
1470       if (FormatTok->is(tok::l_brace)) {
1471         // Could be a method inside of a braced list `{a() { return 1; }}`.
1472         if (tryToParseBracedList())
1473           continue;
1474         parseChildBlock();
1475       }
1476     }
1477     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1478       nextToken();
1479       return !HasError;
1480     }
1481     switch (FormatTok->Tok.getKind()) {
1482     case tok::caret:
1483       nextToken();
1484       if (FormatTok->is(tok::l_brace)) {
1485         parseChildBlock();
1486       }
1487       break;
1488     case tok::l_square:
1489       tryToParseLambda();
1490       break;
1491     case tok::l_paren:
1492       parseParens();
1493       // JavaScript can just have free standing methods and getters/setters in
1494       // object literals. Detect them by a "{" following ")".
1495       if (Style.Language == FormatStyle::LK_JavaScript) {
1496         if (FormatTok->is(tok::l_brace))
1497           parseChildBlock();
1498         break;
1499       }
1500       break;
1501     case tok::l_brace:
1502       // Assume there are no blocks inside a braced init list apart
1503       // from the ones we explicitly parse out (like lambdas).
1504       FormatTok->BlockKind = BK_BracedInit;
1505       nextToken();
1506       parseBracedList();
1507       break;
1508     case tok::less:
1509       if (Style.Language == FormatStyle::LK_Proto) {
1510         nextToken();
1511         parseBracedList(/*ContinueOnSemicolons=*/false,
1512                         /*ClosingBraceKind=*/tok::greater);
1513       } else {
1514         nextToken();
1515       }
1516       break;
1517     case tok::semi:
1518       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1519       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1520       // used for error recovery if we have otherwise determined that this is
1521       // a braced list.
1522       if (Style.Language == FormatStyle::LK_JavaScript) {
1523         nextToken();
1524         break;
1525       }
1526       HasError = true;
1527       if (!ContinueOnSemicolons)
1528         return !HasError;
1529       nextToken();
1530       break;
1531     case tok::comma:
1532       nextToken();
1533       break;
1534     default:
1535       nextToken();
1536       break;
1537     }
1538   } while (!eof());
1539   return false;
1540 }
1541 
1542 void UnwrappedLineParser::parseParens() {
1543   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1544   nextToken();
1545   do {
1546     switch (FormatTok->Tok.getKind()) {
1547     case tok::l_paren:
1548       parseParens();
1549       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1550         parseChildBlock();
1551       break;
1552     case tok::r_paren:
1553       nextToken();
1554       return;
1555     case tok::r_brace:
1556       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1557       return;
1558     case tok::l_square:
1559       tryToParseLambda();
1560       break;
1561     case tok::l_brace:
1562       if (!tryToParseBracedList())
1563         parseChildBlock();
1564       break;
1565     case tok::at:
1566       nextToken();
1567       if (FormatTok->Tok.is(tok::l_brace)) {
1568         nextToken();
1569         parseBracedList();
1570       }
1571       break;
1572     case tok::kw_class:
1573       if (Style.Language == FormatStyle::LK_JavaScript)
1574         parseRecord(/*ParseAsExpr=*/true);
1575       else
1576         nextToken();
1577       break;
1578     case tok::identifier:
1579       if (Style.Language == FormatStyle::LK_JavaScript &&
1580           (FormatTok->is(Keywords.kw_function) ||
1581            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1582         tryToParseJSFunction();
1583       else
1584         nextToken();
1585       break;
1586     default:
1587       nextToken();
1588       break;
1589     }
1590   } while (!eof());
1591 }
1592 
1593 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1594   if (!LambdaIntroducer) {
1595     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1596     if (tryToParseLambda())
1597       return;
1598   }
1599   do {
1600     switch (FormatTok->Tok.getKind()) {
1601     case tok::l_paren:
1602       parseParens();
1603       break;
1604     case tok::r_square:
1605       nextToken();
1606       return;
1607     case tok::r_brace:
1608       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1609       return;
1610     case tok::l_square:
1611       parseSquare();
1612       break;
1613     case tok::l_brace: {
1614       if (!tryToParseBracedList())
1615         parseChildBlock();
1616       break;
1617     }
1618     case tok::at:
1619       nextToken();
1620       if (FormatTok->Tok.is(tok::l_brace)) {
1621         nextToken();
1622         parseBracedList();
1623       }
1624       break;
1625     default:
1626       nextToken();
1627       break;
1628     }
1629   } while (!eof());
1630 }
1631 
1632 void UnwrappedLineParser::parseIfThenElse() {
1633   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1634   nextToken();
1635   if (FormatTok->Tok.is(tok::kw_constexpr))
1636     nextToken();
1637   if (FormatTok->Tok.is(tok::l_paren))
1638     parseParens();
1639   bool NeedsUnwrappedLine = false;
1640   if (FormatTok->Tok.is(tok::l_brace)) {
1641     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1642     parseBlock(/*MustBeDeclaration=*/false);
1643     if (Style.BraceWrapping.BeforeElse)
1644       addUnwrappedLine();
1645     else
1646       NeedsUnwrappedLine = true;
1647   } else {
1648     addUnwrappedLine();
1649     ++Line->Level;
1650     parseStructuralElement();
1651     --Line->Level;
1652   }
1653   if (FormatTok->Tok.is(tok::kw_else)) {
1654     nextToken();
1655     if (FormatTok->Tok.is(tok::l_brace)) {
1656       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1657       parseBlock(/*MustBeDeclaration=*/false);
1658       addUnwrappedLine();
1659     } else if (FormatTok->Tok.is(tok::kw_if)) {
1660       parseIfThenElse();
1661     } else {
1662       addUnwrappedLine();
1663       ++Line->Level;
1664       parseStructuralElement();
1665       if (FormatTok->is(tok::eof))
1666         addUnwrappedLine();
1667       --Line->Level;
1668     }
1669   } else if (NeedsUnwrappedLine) {
1670     addUnwrappedLine();
1671   }
1672 }
1673 
1674 void UnwrappedLineParser::parseTryCatch() {
1675   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1676   nextToken();
1677   bool NeedsUnwrappedLine = false;
1678   if (FormatTok->is(tok::colon)) {
1679     // We are in a function try block, what comes is an initializer list.
1680     nextToken();
1681     while (FormatTok->is(tok::identifier)) {
1682       nextToken();
1683       if (FormatTok->is(tok::l_paren))
1684         parseParens();
1685       if (FormatTok->is(tok::comma))
1686         nextToken();
1687     }
1688   }
1689   // Parse try with resource.
1690   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1691     parseParens();
1692   }
1693   if (FormatTok->is(tok::l_brace)) {
1694     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1695     parseBlock(/*MustBeDeclaration=*/false);
1696     if (Style.BraceWrapping.BeforeCatch) {
1697       addUnwrappedLine();
1698     } else {
1699       NeedsUnwrappedLine = true;
1700     }
1701   } else if (!FormatTok->is(tok::kw_catch)) {
1702     // The C++ standard requires a compound-statement after a try.
1703     // If there's none, we try to assume there's a structuralElement
1704     // and try to continue.
1705     addUnwrappedLine();
1706     ++Line->Level;
1707     parseStructuralElement();
1708     --Line->Level;
1709   }
1710   while (1) {
1711     if (FormatTok->is(tok::at))
1712       nextToken();
1713     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1714                              tok::kw___finally) ||
1715           ((Style.Language == FormatStyle::LK_Java ||
1716             Style.Language == FormatStyle::LK_JavaScript) &&
1717            FormatTok->is(Keywords.kw_finally)) ||
1718           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1719            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1720       break;
1721     nextToken();
1722     while (FormatTok->isNot(tok::l_brace)) {
1723       if (FormatTok->is(tok::l_paren)) {
1724         parseParens();
1725         continue;
1726       }
1727       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1728         return;
1729       nextToken();
1730     }
1731     NeedsUnwrappedLine = false;
1732     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1733     parseBlock(/*MustBeDeclaration=*/false);
1734     if (Style.BraceWrapping.BeforeCatch)
1735       addUnwrappedLine();
1736     else
1737       NeedsUnwrappedLine = true;
1738   }
1739   if (NeedsUnwrappedLine)
1740     addUnwrappedLine();
1741 }
1742 
1743 void UnwrappedLineParser::parseNamespace() {
1744   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1745 
1746   const FormatToken &InitialToken = *FormatTok;
1747   nextToken();
1748   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1749     nextToken();
1750   if (FormatTok->Tok.is(tok::l_brace)) {
1751     if (ShouldBreakBeforeBrace(Style, InitialToken))
1752       addUnwrappedLine();
1753 
1754     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1755                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1756                      DeclarationScopeStack.size() > 1);
1757     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1758     // Munch the semicolon after a namespace. This is more common than one would
1759     // think. Puttin the semicolon into its own line is very ugly.
1760     if (FormatTok->Tok.is(tok::semi))
1761       nextToken();
1762     addUnwrappedLine();
1763   }
1764   // FIXME: Add error handling.
1765 }
1766 
1767 void UnwrappedLineParser::parseNew() {
1768   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1769   nextToken();
1770   if (Style.Language != FormatStyle::LK_Java)
1771     return;
1772 
1773   // In Java, we can parse everything up to the parens, which aren't optional.
1774   do {
1775     // There should not be a ;, { or } before the new's open paren.
1776     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1777       return;
1778 
1779     // Consume the parens.
1780     if (FormatTok->is(tok::l_paren)) {
1781       parseParens();
1782 
1783       // If there is a class body of an anonymous class, consume that as child.
1784       if (FormatTok->is(tok::l_brace))
1785         parseChildBlock();
1786       return;
1787     }
1788     nextToken();
1789   } while (!eof());
1790 }
1791 
1792 void UnwrappedLineParser::parseForOrWhileLoop() {
1793   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1794          "'for', 'while' or foreach macro expected");
1795   nextToken();
1796   // JS' for await ( ...
1797   if (Style.Language == FormatStyle::LK_JavaScript &&
1798       FormatTok->is(Keywords.kw_await))
1799     nextToken();
1800   if (FormatTok->Tok.is(tok::l_paren))
1801     parseParens();
1802   if (FormatTok->Tok.is(tok::l_brace)) {
1803     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1804     parseBlock(/*MustBeDeclaration=*/false);
1805     addUnwrappedLine();
1806   } else {
1807     addUnwrappedLine();
1808     ++Line->Level;
1809     parseStructuralElement();
1810     --Line->Level;
1811   }
1812 }
1813 
1814 void UnwrappedLineParser::parseDoWhile() {
1815   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1816   nextToken();
1817   if (FormatTok->Tok.is(tok::l_brace)) {
1818     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1819     parseBlock(/*MustBeDeclaration=*/false);
1820     if (Style.BraceWrapping.IndentBraces)
1821       addUnwrappedLine();
1822   } else {
1823     addUnwrappedLine();
1824     ++Line->Level;
1825     parseStructuralElement();
1826     --Line->Level;
1827   }
1828 
1829   // FIXME: Add error handling.
1830   if (!FormatTok->Tok.is(tok::kw_while)) {
1831     addUnwrappedLine();
1832     return;
1833   }
1834 
1835   nextToken();
1836   parseStructuralElement();
1837 }
1838 
1839 void UnwrappedLineParser::parseLabel() {
1840   nextToken();
1841   unsigned OldLineLevel = Line->Level;
1842   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1843     --Line->Level;
1844   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1845     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1846     parseBlock(/*MustBeDeclaration=*/false);
1847     if (FormatTok->Tok.is(tok::kw_break)) {
1848       if (Style.BraceWrapping.AfterControlStatement)
1849         addUnwrappedLine();
1850       parseStructuralElement();
1851     }
1852     addUnwrappedLine();
1853   } else {
1854     if (FormatTok->is(tok::semi))
1855       nextToken();
1856     addUnwrappedLine();
1857   }
1858   Line->Level = OldLineLevel;
1859   if (FormatTok->isNot(tok::l_brace)) {
1860     parseStructuralElement();
1861     addUnwrappedLine();
1862   }
1863 }
1864 
1865 void UnwrappedLineParser::parseCaseLabel() {
1866   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1867   // FIXME: fix handling of complex expressions here.
1868   do {
1869     nextToken();
1870   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1871   parseLabel();
1872 }
1873 
1874 void UnwrappedLineParser::parseSwitch() {
1875   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1876   nextToken();
1877   if (FormatTok->Tok.is(tok::l_paren))
1878     parseParens();
1879   if (FormatTok->Tok.is(tok::l_brace)) {
1880     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1881     parseBlock(/*MustBeDeclaration=*/false);
1882     addUnwrappedLine();
1883   } else {
1884     addUnwrappedLine();
1885     ++Line->Level;
1886     parseStructuralElement();
1887     --Line->Level;
1888   }
1889 }
1890 
1891 void UnwrappedLineParser::parseAccessSpecifier() {
1892   nextToken();
1893   // Understand Qt's slots.
1894   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1895     nextToken();
1896   // Otherwise, we don't know what it is, and we'd better keep the next token.
1897   if (FormatTok->Tok.is(tok::colon))
1898     nextToken();
1899   addUnwrappedLine();
1900 }
1901 
1902 bool UnwrappedLineParser::parseEnum() {
1903   // Won't be 'enum' for NS_ENUMs.
1904   if (FormatTok->Tok.is(tok::kw_enum))
1905     nextToken();
1906 
1907   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1908   // declarations. An "enum" keyword followed by a colon would be a syntax
1909   // error and thus assume it is just an identifier.
1910   if (Style.Language == FormatStyle::LK_JavaScript &&
1911       FormatTok->isOneOf(tok::colon, tok::question))
1912     return false;
1913 
1914   // Eat up enum class ...
1915   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1916     nextToken();
1917 
1918   while (FormatTok->Tok.getIdentifierInfo() ||
1919          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1920                             tok::greater, tok::comma, tok::question)) {
1921     nextToken();
1922     // We can have macros or attributes in between 'enum' and the enum name.
1923     if (FormatTok->is(tok::l_paren))
1924       parseParens();
1925     if (FormatTok->is(tok::identifier)) {
1926       nextToken();
1927       // If there are two identifiers in a row, this is likely an elaborate
1928       // return type. In Java, this can be "implements", etc.
1929       if (Style.isCpp() && FormatTok->is(tok::identifier))
1930         return false;
1931     }
1932   }
1933 
1934   // Just a declaration or something is wrong.
1935   if (FormatTok->isNot(tok::l_brace))
1936     return true;
1937   FormatTok->BlockKind = BK_Block;
1938 
1939   if (Style.Language == FormatStyle::LK_Java) {
1940     // Java enums are different.
1941     parseJavaEnumBody();
1942     return true;
1943   }
1944   if (Style.Language == FormatStyle::LK_Proto) {
1945     parseBlock(/*MustBeDeclaration=*/true);
1946     return true;
1947   }
1948 
1949   // Parse enum body.
1950   nextToken();
1951   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1952   if (HasError) {
1953     if (FormatTok->is(tok::semi))
1954       nextToken();
1955     addUnwrappedLine();
1956   }
1957   return true;
1958 
1959   // There is no addUnwrappedLine() here so that we fall through to parsing a
1960   // structural element afterwards. Thus, in "enum A {} n, m;",
1961   // "} n, m;" will end up in one unwrapped line.
1962 }
1963 
1964 void UnwrappedLineParser::parseJavaEnumBody() {
1965   // Determine whether the enum is simple, i.e. does not have a semicolon or
1966   // constants with class bodies. Simple enums can be formatted like braced
1967   // lists, contracted to a single line, etc.
1968   unsigned StoredPosition = Tokens->getPosition();
1969   bool IsSimple = true;
1970   FormatToken *Tok = Tokens->getNextToken();
1971   while (Tok) {
1972     if (Tok->is(tok::r_brace))
1973       break;
1974     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1975       IsSimple = false;
1976       break;
1977     }
1978     // FIXME: This will also mark enums with braces in the arguments to enum
1979     // constants as "not simple". This is probably fine in practice, though.
1980     Tok = Tokens->getNextToken();
1981   }
1982   FormatTok = Tokens->setPosition(StoredPosition);
1983 
1984   if (IsSimple) {
1985     nextToken();
1986     parseBracedList();
1987     addUnwrappedLine();
1988     return;
1989   }
1990 
1991   // Parse the body of a more complex enum.
1992   // First add a line for everything up to the "{".
1993   nextToken();
1994   addUnwrappedLine();
1995   ++Line->Level;
1996 
1997   // Parse the enum constants.
1998   while (FormatTok) {
1999     if (FormatTok->is(tok::l_brace)) {
2000       // Parse the constant's class body.
2001       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2002                  /*MunchSemi=*/false);
2003     } else if (FormatTok->is(tok::l_paren)) {
2004       parseParens();
2005     } else if (FormatTok->is(tok::comma)) {
2006       nextToken();
2007       addUnwrappedLine();
2008     } else if (FormatTok->is(tok::semi)) {
2009       nextToken();
2010       addUnwrappedLine();
2011       break;
2012     } else if (FormatTok->is(tok::r_brace)) {
2013       addUnwrappedLine();
2014       break;
2015     } else {
2016       nextToken();
2017     }
2018   }
2019 
2020   // Parse the class body after the enum's ";" if any.
2021   parseLevel(/*HasOpeningBrace=*/true);
2022   nextToken();
2023   --Line->Level;
2024   addUnwrappedLine();
2025 }
2026 
2027 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2028   const FormatToken &InitialToken = *FormatTok;
2029   nextToken();
2030 
2031   // The actual identifier can be a nested name specifier, and in macros
2032   // it is often token-pasted.
2033   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2034                             tok::kw___attribute, tok::kw___declspec,
2035                             tok::kw_alignas) ||
2036          ((Style.Language == FormatStyle::LK_Java ||
2037            Style.Language == FormatStyle::LK_JavaScript) &&
2038           FormatTok->isOneOf(tok::period, tok::comma))) {
2039     if (Style.Language == FormatStyle::LK_JavaScript &&
2040         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2041       // JavaScript/TypeScript supports inline object types in
2042       // extends/implements positions:
2043       //     class Foo implements {bar: number} { }
2044       nextToken();
2045       if (FormatTok->is(tok::l_brace)) {
2046         tryToParseBracedList();
2047         continue;
2048       }
2049     }
2050     bool IsNonMacroIdentifier =
2051         FormatTok->is(tok::identifier) &&
2052         FormatTok->TokenText != FormatTok->TokenText.upper();
2053     nextToken();
2054     // We can have macros or attributes in between 'class' and the class name.
2055     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2056       parseParens();
2057   }
2058 
2059   // Note that parsing away template declarations here leads to incorrectly
2060   // accepting function declarations as record declarations.
2061   // In general, we cannot solve this problem. Consider:
2062   // class A<int> B() {}
2063   // which can be a function definition or a class definition when B() is a
2064   // macro. If we find enough real-world cases where this is a problem, we
2065   // can parse for the 'template' keyword in the beginning of the statement,
2066   // and thus rule out the record production in case there is no template
2067   // (this would still leave us with an ambiguity between template function
2068   // and class declarations).
2069   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2070     while (!eof()) {
2071       if (FormatTok->is(tok::l_brace)) {
2072         calculateBraceTypes(/*ExpectClassBody=*/true);
2073         if (!tryToParseBracedList())
2074           break;
2075       }
2076       if (FormatTok->Tok.is(tok::semi))
2077         return;
2078       nextToken();
2079     }
2080   }
2081   if (FormatTok->Tok.is(tok::l_brace)) {
2082     if (ParseAsExpr) {
2083       parseChildBlock();
2084     } else {
2085       if (ShouldBreakBeforeBrace(Style, InitialToken))
2086         addUnwrappedLine();
2087 
2088       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2089                  /*MunchSemi=*/false);
2090     }
2091   }
2092   // There is no addUnwrappedLine() here so that we fall through to parsing a
2093   // structural element afterwards. Thus, in "class A {} n, m;",
2094   // "} n, m;" will end up in one unwrapped line.
2095 }
2096 
2097 void UnwrappedLineParser::parseObjCProtocolList() {
2098   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2099   do
2100     nextToken();
2101   while (!eof() && FormatTok->Tok.isNot(tok::greater));
2102   nextToken(); // Skip '>'.
2103 }
2104 
2105 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2106   do {
2107     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2108       nextToken();
2109       addUnwrappedLine();
2110       break;
2111     }
2112     if (FormatTok->is(tok::l_brace)) {
2113       parseBlock(/*MustBeDeclaration=*/false);
2114       // In ObjC interfaces, nothing should be following the "}".
2115       addUnwrappedLine();
2116     } else if (FormatTok->is(tok::r_brace)) {
2117       // Ignore stray "}". parseStructuralElement doesn't consume them.
2118       nextToken();
2119       addUnwrappedLine();
2120     } else {
2121       parseStructuralElement();
2122     }
2123   } while (!eof());
2124 }
2125 
2126 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2127   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2128          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2129   nextToken();
2130   nextToken(); // interface name
2131 
2132   // @interface can be followed by either a base class, or a category.
2133   if (FormatTok->Tok.is(tok::colon)) {
2134     nextToken();
2135     nextToken(); // base class name
2136   } else if (FormatTok->Tok.is(tok::l_paren))
2137     // Skip category, if present.
2138     parseParens();
2139 
2140   if (FormatTok->Tok.is(tok::less))
2141     parseObjCProtocolList();
2142 
2143   if (FormatTok->Tok.is(tok::l_brace)) {
2144     if (Style.BraceWrapping.AfterObjCDeclaration)
2145       addUnwrappedLine();
2146     parseBlock(/*MustBeDeclaration=*/true);
2147   }
2148 
2149   // With instance variables, this puts '}' on its own line.  Without instance
2150   // variables, this ends the @interface line.
2151   addUnwrappedLine();
2152 
2153   parseObjCUntilAtEnd();
2154 }
2155 
2156 // Returns true for the declaration/definition form of @protocol,
2157 // false for the expression form.
2158 bool UnwrappedLineParser::parseObjCProtocol() {
2159   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2160   nextToken();
2161 
2162   if (FormatTok->is(tok::l_paren))
2163     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2164     return false;
2165 
2166   // The definition/declaration form,
2167   // @protocol Foo
2168   // - (int)someMethod;
2169   // @end
2170 
2171   nextToken(); // protocol name
2172 
2173   if (FormatTok->Tok.is(tok::less))
2174     parseObjCProtocolList();
2175 
2176   // Check for protocol declaration.
2177   if (FormatTok->Tok.is(tok::semi)) {
2178     nextToken();
2179     addUnwrappedLine();
2180     return true;
2181   }
2182 
2183   addUnwrappedLine();
2184   parseObjCUntilAtEnd();
2185   return true;
2186 }
2187 
2188 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2189   bool IsImport = FormatTok->is(Keywords.kw_import);
2190   assert(IsImport || FormatTok->is(tok::kw_export));
2191   nextToken();
2192 
2193   // Consume the "default" in "export default class/function".
2194   if (FormatTok->is(tok::kw_default))
2195     nextToken();
2196 
2197   // Consume "async function", "function" and "default function", so that these
2198   // get parsed as free-standing JS functions, i.e. do not require a trailing
2199   // semicolon.
2200   if (FormatTok->is(Keywords.kw_async))
2201     nextToken();
2202   if (FormatTok->is(Keywords.kw_function)) {
2203     nextToken();
2204     return;
2205   }
2206 
2207   // For imports, `export *`, `export {...}`, consume the rest of the line up
2208   // to the terminating `;`. For everything else, just return and continue
2209   // parsing the structural element, i.e. the declaration or expression for
2210   // `export default`.
2211   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2212       !FormatTok->isStringLiteral())
2213     return;
2214 
2215   while (!eof()) {
2216     if (FormatTok->is(tok::semi))
2217       return;
2218     if (Line->Tokens.empty()) {
2219       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2220       // import statement should terminate.
2221       return;
2222     }
2223     if (FormatTok->is(tok::l_brace)) {
2224       FormatTok->BlockKind = BK_Block;
2225       nextToken();
2226       parseBracedList();
2227     } else {
2228       nextToken();
2229     }
2230   }
2231 }
2232 
2233 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2234                                                  StringRef Prefix = "") {
2235   llvm::dbgs() << Prefix << "Line(" << Line.Level
2236                << ", FSC=" << Line.FirstStartColumn << ")"
2237                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2238   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2239                                                     E = Line.Tokens.end();
2240        I != E; ++I) {
2241     llvm::dbgs() << I->Tok->Tok.getName() << "["
2242                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2243                  << "] ";
2244   }
2245   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2246                                                     E = Line.Tokens.end();
2247        I != E; ++I) {
2248     const UnwrappedLineNode &Node = *I;
2249     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2250              I = Node.Children.begin(),
2251              E = Node.Children.end();
2252          I != E; ++I) {
2253       printDebugInfo(*I, "\nChild: ");
2254     }
2255   }
2256   llvm::dbgs() << "\n";
2257 }
2258 
2259 void UnwrappedLineParser::addUnwrappedLine() {
2260   if (Line->Tokens.empty())
2261     return;
2262   DEBUG({
2263     if (CurrentLines == &Lines)
2264       printDebugInfo(*Line);
2265   });
2266   CurrentLines->push_back(std::move(*Line));
2267   Line->Tokens.clear();
2268   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2269   Line->FirstStartColumn = 0;
2270   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2271     CurrentLines->append(
2272         std::make_move_iterator(PreprocessorDirectives.begin()),
2273         std::make_move_iterator(PreprocessorDirectives.end()));
2274     PreprocessorDirectives.clear();
2275   }
2276   // Disconnect the current token from the last token on the previous line.
2277   FormatTok->Previous = nullptr;
2278 }
2279 
2280 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2281 
2282 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2283   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2284          FormatTok.NewlinesBefore > 0;
2285 }
2286 
2287 // Checks if \p FormatTok is a line comment that continues the line comment
2288 // section on \p Line.
2289 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2290                                         const UnwrappedLine &Line,
2291                                         llvm::Regex &CommentPragmasRegex) {
2292   if (Line.Tokens.empty())
2293     return false;
2294 
2295   StringRef IndentContent = FormatTok.TokenText;
2296   if (FormatTok.TokenText.startswith("//") ||
2297       FormatTok.TokenText.startswith("/*"))
2298     IndentContent = FormatTok.TokenText.substr(2);
2299   if (CommentPragmasRegex.match(IndentContent))
2300     return false;
2301 
2302   // If Line starts with a line comment, then FormatTok continues the comment
2303   // section if its original column is greater or equal to the original start
2304   // column of the line.
2305   //
2306   // Define the min column token of a line as follows: if a line ends in '{' or
2307   // contains a '{' followed by a line comment, then the min column token is
2308   // that '{'. Otherwise, the min column token of the line is the first token of
2309   // the line.
2310   //
2311   // If Line starts with a token other than a line comment, then FormatTok
2312   // continues the comment section if its original column is greater than the
2313   // original start column of the min column token of the line.
2314   //
2315   // For example, the second line comment continues the first in these cases:
2316   //
2317   // // first line
2318   // // second line
2319   //
2320   // and:
2321   //
2322   // // first line
2323   //  // second line
2324   //
2325   // and:
2326   //
2327   // int i; // first line
2328   //  // second line
2329   //
2330   // and:
2331   //
2332   // do { // first line
2333   //      // second line
2334   //   int i;
2335   // } while (true);
2336   //
2337   // and:
2338   //
2339   // enum {
2340   //   a, // first line
2341   //    // second line
2342   //   b
2343   // };
2344   //
2345   // The second line comment doesn't continue the first in these cases:
2346   //
2347   //   // first line
2348   //  // second line
2349   //
2350   // and:
2351   //
2352   // int i; // first line
2353   // // second line
2354   //
2355   // and:
2356   //
2357   // do { // first line
2358   //   // second line
2359   //   int i;
2360   // } while (true);
2361   //
2362   // and:
2363   //
2364   // enum {
2365   //   a, // first line
2366   //   // second line
2367   // };
2368   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2369 
2370   // Scan for '{//'. If found, use the column of '{' as a min column for line
2371   // comment section continuation.
2372   const FormatToken *PreviousToken = nullptr;
2373   for (const UnwrappedLineNode &Node : Line.Tokens) {
2374     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2375         isLineComment(*Node.Tok)) {
2376       MinColumnToken = PreviousToken;
2377       break;
2378     }
2379     PreviousToken = Node.Tok;
2380 
2381     // Grab the last newline preceding a token in this unwrapped line.
2382     if (Node.Tok->NewlinesBefore > 0) {
2383       MinColumnToken = Node.Tok;
2384     }
2385   }
2386   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2387     MinColumnToken = PreviousToken;
2388   }
2389 
2390   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2391                               MinColumnToken);
2392 }
2393 
2394 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2395   bool JustComments = Line->Tokens.empty();
2396   for (SmallVectorImpl<FormatToken *>::const_iterator
2397            I = CommentsBeforeNextToken.begin(),
2398            E = CommentsBeforeNextToken.end();
2399        I != E; ++I) {
2400     // Line comments that belong to the same line comment section are put on the
2401     // same line since later we might want to reflow content between them.
2402     // Additional fine-grained breaking of line comment sections is controlled
2403     // by the class BreakableLineCommentSection in case it is desirable to keep
2404     // several line comment sections in the same unwrapped line.
2405     //
2406     // FIXME: Consider putting separate line comment sections as children to the
2407     // unwrapped line instead.
2408     (*I)->ContinuesLineCommentSection =
2409         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2410     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2411       addUnwrappedLine();
2412     pushToken(*I);
2413   }
2414   if (NewlineBeforeNext && JustComments)
2415     addUnwrappedLine();
2416   CommentsBeforeNextToken.clear();
2417 }
2418 
2419 void UnwrappedLineParser::nextToken(int LevelDifference) {
2420   if (eof())
2421     return;
2422   flushComments(isOnNewLine(*FormatTok));
2423   pushToken(FormatTok);
2424   FormatToken *Previous = FormatTok;
2425   if (Style.Language != FormatStyle::LK_JavaScript)
2426     readToken(LevelDifference);
2427   else
2428     readTokenWithJavaScriptASI();
2429   FormatTok->Previous = Previous;
2430 }
2431 
2432 void UnwrappedLineParser::distributeComments(
2433     const SmallVectorImpl<FormatToken *> &Comments,
2434     const FormatToken *NextTok) {
2435   // Whether or not a line comment token continues a line is controlled by
2436   // the method continuesLineCommentSection, with the following caveat:
2437   //
2438   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2439   // that each comment line from the trail is aligned with the next token, if
2440   // the next token exists. If a trail exists, the beginning of the maximal
2441   // trail is marked as a start of a new comment section.
2442   //
2443   // For example in this code:
2444   //
2445   // int a; // line about a
2446   //   // line 1 about b
2447   //   // line 2 about b
2448   //   int b;
2449   //
2450   // the two lines about b form a maximal trail, so there are two sections, the
2451   // first one consisting of the single comment "// line about a" and the
2452   // second one consisting of the next two comments.
2453   if (Comments.empty())
2454     return;
2455   bool ShouldPushCommentsInCurrentLine = true;
2456   bool HasTrailAlignedWithNextToken = false;
2457   unsigned StartOfTrailAlignedWithNextToken = 0;
2458   if (NextTok) {
2459     // We are skipping the first element intentionally.
2460     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2461       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2462         HasTrailAlignedWithNextToken = true;
2463         StartOfTrailAlignedWithNextToken = i;
2464       }
2465     }
2466   }
2467   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2468     FormatToken *FormatTok = Comments[i];
2469     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2470       FormatTok->ContinuesLineCommentSection = false;
2471     } else {
2472       FormatTok->ContinuesLineCommentSection =
2473           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2474     }
2475     if (!FormatTok->ContinuesLineCommentSection &&
2476         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2477       ShouldPushCommentsInCurrentLine = false;
2478     }
2479     if (ShouldPushCommentsInCurrentLine) {
2480       pushToken(FormatTok);
2481     } else {
2482       CommentsBeforeNextToken.push_back(FormatTok);
2483     }
2484   }
2485 }
2486 
2487 void UnwrappedLineParser::readToken(int LevelDifference) {
2488   SmallVector<FormatToken *, 1> Comments;
2489   do {
2490     FormatTok = Tokens->getNextToken();
2491     assert(FormatTok);
2492     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2493            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2494       distributeComments(Comments, FormatTok);
2495       Comments.clear();
2496       // If there is an unfinished unwrapped line, we flush the preprocessor
2497       // directives only after that unwrapped line was finished later.
2498       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2499       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2500       assert((LevelDifference >= 0 ||
2501               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2502              "LevelDifference makes Line->Level negative");
2503       Line->Level += LevelDifference;
2504       // Comments stored before the preprocessor directive need to be output
2505       // before the preprocessor directive, at the same level as the
2506       // preprocessor directive, as we consider them to apply to the directive.
2507       flushComments(isOnNewLine(*FormatTok));
2508       parsePPDirective();
2509     }
2510     while (FormatTok->Type == TT_ConflictStart ||
2511            FormatTok->Type == TT_ConflictEnd ||
2512            FormatTok->Type == TT_ConflictAlternative) {
2513       if (FormatTok->Type == TT_ConflictStart) {
2514         conditionalCompilationStart(/*Unreachable=*/false);
2515       } else if (FormatTok->Type == TT_ConflictAlternative) {
2516         conditionalCompilationAlternative();
2517       } else if (FormatTok->Type == TT_ConflictEnd) {
2518         conditionalCompilationEnd();
2519       }
2520       FormatTok = Tokens->getNextToken();
2521       FormatTok->MustBreakBefore = true;
2522     }
2523 
2524     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2525         !Line->InPPDirective) {
2526       continue;
2527     }
2528 
2529     if (!FormatTok->Tok.is(tok::comment)) {
2530       distributeComments(Comments, FormatTok);
2531       Comments.clear();
2532       return;
2533     }
2534 
2535     Comments.push_back(FormatTok);
2536   } while (!eof());
2537 
2538   distributeComments(Comments, nullptr);
2539   Comments.clear();
2540 }
2541 
2542 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2543   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2544   if (MustBreakBeforeNextToken) {
2545     Line->Tokens.back().Tok->MustBreakBefore = true;
2546     MustBreakBeforeNextToken = false;
2547   }
2548 }
2549 
2550 } // end namespace format
2551 } // end namespace clang
2552