1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
60 }
61 
62 // Checks if \p FormatTok is a line comment that continues the line comment
63 // \p Previous. The original column of \p MinColumnToken is used to determine
64 // whether \p FormatTok is indented enough to the right to continue \p Previous.
65 static bool continuesLineComment(const FormatToken &FormatTok,
66                                  const FormatToken *Previous,
67                                  const FormatToken *MinColumnToken) {
68   if (!Previous || !MinColumnToken)
69     return false;
70   unsigned MinContinueColumn =
71       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
72   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
73          isLineComment(*Previous) &&
74          FormatTok.OriginalColumn >= MinContinueColumn;
75 }
76 
77 class ScopedMacroState : public FormatTokenSource {
78 public:
79   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
80                    FormatToken *&ResetToken)
81       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
82         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
83         Token(nullptr), PreviousToken(nullptr) {
84     TokenSource = this;
85     Line.Level = 0;
86     Line.InPPDirective = true;
87   }
88 
89   ~ScopedMacroState() override {
90     TokenSource = PreviousTokenSource;
91     ResetToken = Token;
92     Line.InPPDirective = false;
93     Line.Level = PreviousLineLevel;
94   }
95 
96   FormatToken *getNextToken() override {
97     // The \c UnwrappedLineParser guards against this by never calling
98     // \c getNextToken() after it has encountered the first eof token.
99     assert(!eof());
100     PreviousToken = Token;
101     Token = PreviousTokenSource->getNextToken();
102     if (eof())
103       return getFakeEOF();
104     return Token;
105   }
106 
107   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
108 
109   FormatToken *setPosition(unsigned Position) override {
110     PreviousToken = nullptr;
111     Token = PreviousTokenSource->setPosition(Position);
112     return Token;
113   }
114 
115 private:
116   bool eof() {
117     return Token && Token->HasUnescapedNewline &&
118            !continuesLineComment(*Token, PreviousToken,
119                                  /*MinColumnToken=*/PreviousToken);
120   }
121 
122   FormatToken *getFakeEOF() {
123     static bool EOFInitialized = false;
124     static FormatToken FormatTok;
125     if (!EOFInitialized) {
126       FormatTok.Tok.startToken();
127       FormatTok.Tok.setKind(tok::eof);
128       EOFInitialized = true;
129     }
130     return &FormatTok;
131   }
132 
133   UnwrappedLine &Line;
134   FormatTokenSource *&TokenSource;
135   FormatToken *&ResetToken;
136   unsigned PreviousLineLevel;
137   FormatTokenSource *PreviousTokenSource;
138 
139   FormatToken *Token;
140   FormatToken *PreviousToken;
141 };
142 
143 } // end anonymous namespace
144 
145 class ScopedLineState {
146 public:
147   ScopedLineState(UnwrappedLineParser &Parser,
148                   bool SwitchToPreprocessorLines = false)
149       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
150     if (SwitchToPreprocessorLines)
151       Parser.CurrentLines = &Parser.PreprocessorDirectives;
152     else if (!Parser.Line->Tokens.empty())
153       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
154     PreBlockLine = std::move(Parser.Line);
155     Parser.Line = llvm::make_unique<UnwrappedLine>();
156     Parser.Line->Level = PreBlockLine->Level;
157     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
158   }
159 
160   ~ScopedLineState() {
161     if (!Parser.Line->Tokens.empty()) {
162       Parser.addUnwrappedLine();
163     }
164     assert(Parser.Line->Tokens.empty());
165     Parser.Line = std::move(PreBlockLine);
166     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
167       Parser.MustBreakBeforeNextToken = true;
168     Parser.CurrentLines = OriginalLines;
169   }
170 
171 private:
172   UnwrappedLineParser &Parser;
173 
174   std::unique_ptr<UnwrappedLine> PreBlockLine;
175   SmallVectorImpl<UnwrappedLine> *OriginalLines;
176 };
177 
178 class CompoundStatementIndenter {
179 public:
180   CompoundStatementIndenter(UnwrappedLineParser *Parser,
181                             const FormatStyle &Style, unsigned &LineLevel)
182       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
183     if (Style.BraceWrapping.AfterControlStatement)
184       Parser->addUnwrappedLine();
185     if (Style.BraceWrapping.IndentBraces)
186       ++LineLevel;
187   }
188   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
189 
190 private:
191   unsigned &LineLevel;
192   unsigned OldLineLevel;
193 };
194 
195 namespace {
196 
197 class IndexedTokenSource : public FormatTokenSource {
198 public:
199   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
200       : Tokens(Tokens), Position(-1) {}
201 
202   FormatToken *getNextToken() override {
203     ++Position;
204     return Tokens[Position];
205   }
206 
207   unsigned getPosition() override {
208     assert(Position >= 0);
209     return Position;
210   }
211 
212   FormatToken *setPosition(unsigned P) override {
213     Position = P;
214     return Tokens[Position];
215   }
216 
217   void reset() { Position = -1; }
218 
219 private:
220   ArrayRef<FormatToken *> Tokens;
221   int Position;
222 };
223 
224 } // end anonymous namespace
225 
226 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
227                                          const AdditionalKeywords &Keywords,
228                                          unsigned FirstStartColumn,
229                                          ArrayRef<FormatToken *> Tokens,
230                                          UnwrappedLineConsumer &Callback)
231     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
232       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
233       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
234       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
235       IfNdefCondition(nullptr), FoundIncludeGuardStart(false),
236       IncludeGuardRejected(false), FirstStartColumn(FirstStartColumn) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IfNdefCondition = nullptr;
241   FoundIncludeGuardStart = false;
242   IncludeGuardRejected = false;
243   Line.reset(new UnwrappedLine);
244   CommentsBeforeNextToken.clear();
245   FormatTok = nullptr;
246   MustBreakBeforeNextToken = false;
247   PreprocessorDirectives.clear();
248   CurrentLines = &Lines;
249   DeclarationScopeStack.clear();
250   PPStack.clear();
251   Line->FirstStartColumn = FirstStartColumn;
252 }
253 
254 void UnwrappedLineParser::parse() {
255   IndexedTokenSource TokenSource(AllTokens);
256   Line->FirstStartColumn = FirstStartColumn;
257   do {
258     DEBUG(llvm::dbgs() << "----\n");
259     reset();
260     Tokens = &TokenSource;
261     TokenSource.reset();
262 
263     readToken();
264     parseFile();
265     // Create line with eof token.
266     pushToken(FormatTok);
267     addUnwrappedLine();
268 
269     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
270                                                   E = Lines.end();
271          I != E; ++I) {
272       Callback.consumeUnwrappedLine(*I);
273     }
274     Callback.finishRun();
275     Lines.clear();
276     while (!PPLevelBranchIndex.empty() &&
277            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
278       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
279       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
280     }
281     if (!PPLevelBranchIndex.empty()) {
282       ++PPLevelBranchIndex.back();
283       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
284       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
285     }
286   } while (!PPLevelBranchIndex.empty());
287 }
288 
289 void UnwrappedLineParser::parseFile() {
290   // The top-level context in a file always has declarations, except for pre-
291   // processor directives and JavaScript files.
292   bool MustBeDeclaration =
293       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
294   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
295                                           MustBeDeclaration);
296   if (Style.Language == FormatStyle::LK_TextProto)
297     parseBracedList();
298   else
299     parseLevel(/*HasOpeningBrace=*/false);
300   // Make sure to format the remaining tokens.
301   flushComments(true);
302   addUnwrappedLine();
303 }
304 
305 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
306   bool SwitchLabelEncountered = false;
307   do {
308     tok::TokenKind kind = FormatTok->Tok.getKind();
309     if (FormatTok->Type == TT_MacroBlockBegin) {
310       kind = tok::l_brace;
311     } else if (FormatTok->Type == TT_MacroBlockEnd) {
312       kind = tok::r_brace;
313     }
314 
315     switch (kind) {
316     case tok::comment:
317       nextToken();
318       addUnwrappedLine();
319       break;
320     case tok::l_brace:
321       // FIXME: Add parameter whether this can happen - if this happens, we must
322       // be in a non-declaration context.
323       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
324         continue;
325       parseBlock(/*MustBeDeclaration=*/false);
326       addUnwrappedLine();
327       break;
328     case tok::r_brace:
329       if (HasOpeningBrace)
330         return;
331       nextToken();
332       addUnwrappedLine();
333       break;
334     case tok::kw_default:
335     case tok::kw_case:
336       if (Style.Language == FormatStyle::LK_JavaScript &&
337           Line->MustBeDeclaration) {
338         // A 'case: string' style field declaration.
339         parseStructuralElement();
340         break;
341       }
342       if (!SwitchLabelEncountered &&
343           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
344         ++Line->Level;
345       SwitchLabelEncountered = true;
346       parseStructuralElement();
347       break;
348     default:
349       parseStructuralElement();
350       break;
351     }
352   } while (!eof());
353 }
354 
355 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
356   // We'll parse forward through the tokens until we hit
357   // a closing brace or eof - note that getNextToken() will
358   // parse macros, so this will magically work inside macro
359   // definitions, too.
360   unsigned StoredPosition = Tokens->getPosition();
361   FormatToken *Tok = FormatTok;
362   const FormatToken *PrevTok = Tok->Previous;
363   // Keep a stack of positions of lbrace tokens. We will
364   // update information about whether an lbrace starts a
365   // braced init list or a different block during the loop.
366   SmallVector<FormatToken *, 8> LBraceStack;
367   assert(Tok->Tok.is(tok::l_brace));
368   do {
369     // Get next non-comment token.
370     FormatToken *NextTok;
371     unsigned ReadTokens = 0;
372     do {
373       NextTok = Tokens->getNextToken();
374       ++ReadTokens;
375     } while (NextTok->is(tok::comment));
376 
377     switch (Tok->Tok.getKind()) {
378     case tok::l_brace:
379       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
380         if (PrevTok->is(tok::colon))
381           // A colon indicates this code is in a type, or a braced list
382           // following a label in an object literal ({a: {b: 1}}). The code
383           // below could be confused by semicolons between the individual
384           // members in a type member list, which would normally trigger
385           // BK_Block. In both cases, this must be parsed as an inline braced
386           // init.
387           Tok->BlockKind = BK_BracedInit;
388         else if (PrevTok->is(tok::r_paren))
389           // `) { }` can only occur in function or method declarations in JS.
390           Tok->BlockKind = BK_Block;
391       } else {
392         Tok->BlockKind = BK_Unknown;
393       }
394       LBraceStack.push_back(Tok);
395       break;
396     case tok::r_brace:
397       if (LBraceStack.empty())
398         break;
399       if (LBraceStack.back()->BlockKind == BK_Unknown) {
400         bool ProbablyBracedList = false;
401         if (Style.Language == FormatStyle::LK_Proto) {
402           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
403         } else {
404           // Using OriginalColumn to distinguish between ObjC methods and
405           // binary operators is a bit hacky.
406           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
407                                   NextTok->OriginalColumn == 0;
408 
409           // If there is a comma, semicolon or right paren after the closing
410           // brace, we assume this is a braced initializer list.  Note that
411           // regardless how we mark inner braces here, we will overwrite the
412           // BlockKind later if we parse a braced list (where all blocks
413           // inside are by default braced lists), or when we explicitly detect
414           // blocks (for example while parsing lambdas).
415           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
416           // braced list in JS.
417           ProbablyBracedList =
418               (Style.Language == FormatStyle::LK_JavaScript &&
419                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
420                                 Keywords.kw_as)) ||
421               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
422               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
423                                tok::r_paren, tok::r_square, tok::l_brace,
424                                tok::l_square, tok::ellipsis) ||
425               (NextTok->is(tok::identifier) &&
426                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
427               (NextTok->is(tok::semi) &&
428                (!ExpectClassBody || LBraceStack.size() != 1)) ||
429               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
430         }
431         if (ProbablyBracedList) {
432           Tok->BlockKind = BK_BracedInit;
433           LBraceStack.back()->BlockKind = BK_BracedInit;
434         } else {
435           Tok->BlockKind = BK_Block;
436           LBraceStack.back()->BlockKind = BK_Block;
437         }
438       }
439       LBraceStack.pop_back();
440       break;
441     case tok::at:
442     case tok::semi:
443     case tok::kw_if:
444     case tok::kw_while:
445     case tok::kw_for:
446     case tok::kw_switch:
447     case tok::kw_try:
448     case tok::kw___try:
449       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
450         LBraceStack.back()->BlockKind = BK_Block;
451       break;
452     default:
453       break;
454     }
455     PrevTok = Tok;
456     Tok = NextTok;
457   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
458 
459   // Assume other blocks for all unclosed opening braces.
460   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
461     if (LBraceStack[i]->BlockKind == BK_Unknown)
462       LBraceStack[i]->BlockKind = BK_Block;
463   }
464 
465   FormatTok = Tokens->setPosition(StoredPosition);
466 }
467 
468 template <class T>
469 static inline void hash_combine(std::size_t &seed, const T &v) {
470   std::hash<T> hasher;
471   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
472 }
473 
474 size_t UnwrappedLineParser::computePPHash() const {
475   size_t h = 0;
476   for (const auto &i : PPStack) {
477     hash_combine(h, size_t(i.Kind));
478     hash_combine(h, i.Line);
479   }
480   return h;
481 }
482 
483 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
484                                      bool MunchSemi) {
485   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
486          "'{' or macro block token expected");
487   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
488   FormatTok->BlockKind = BK_Block;
489 
490   size_t PPStartHash = computePPHash();
491 
492   unsigned InitialLevel = Line->Level;
493   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
494 
495   if (MacroBlock && FormatTok->is(tok::l_paren))
496     parseParens();
497 
498   size_t NbPreprocessorDirectives =
499       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
500   addUnwrappedLine();
501   size_t OpeningLineIndex =
502       CurrentLines->empty()
503           ? (UnwrappedLine::kInvalidIndex)
504           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
505 
506   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
507                                           MustBeDeclaration);
508   if (AddLevel)
509     ++Line->Level;
510   parseLevel(/*HasOpeningBrace=*/true);
511 
512   if (eof())
513     return;
514 
515   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
516                  : !FormatTok->is(tok::r_brace)) {
517     Line->Level = InitialLevel;
518     FormatTok->BlockKind = BK_Block;
519     return;
520   }
521 
522   size_t PPEndHash = computePPHash();
523 
524   // Munch the closing brace.
525   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
526 
527   if (MacroBlock && FormatTok->is(tok::l_paren))
528     parseParens();
529 
530   if (MunchSemi && FormatTok->Tok.is(tok::semi))
531     nextToken();
532   Line->Level = InitialLevel;
533 
534   if (PPStartHash == PPEndHash) {
535     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
536     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
537       // Update the opening line to add the forward reference as well
538       (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
539           CurrentLines->size() - 1;
540     }
541   }
542 }
543 
544 static bool isGoogScope(const UnwrappedLine &Line) {
545   // FIXME: Closure-library specific stuff should not be hard-coded but be
546   // configurable.
547   if (Line.Tokens.size() < 4)
548     return false;
549   auto I = Line.Tokens.begin();
550   if (I->Tok->TokenText != "goog")
551     return false;
552   ++I;
553   if (I->Tok->isNot(tok::period))
554     return false;
555   ++I;
556   if (I->Tok->TokenText != "scope")
557     return false;
558   ++I;
559   return I->Tok->is(tok::l_paren);
560 }
561 
562 static bool isIIFE(const UnwrappedLine &Line,
563                    const AdditionalKeywords &Keywords) {
564   // Look for the start of an immediately invoked anonymous function.
565   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
566   // This is commonly done in JavaScript to create a new, anonymous scope.
567   // Example: (function() { ... })()
568   if (Line.Tokens.size() < 3)
569     return false;
570   auto I = Line.Tokens.begin();
571   if (I->Tok->isNot(tok::l_paren))
572     return false;
573   ++I;
574   if (I->Tok->isNot(Keywords.kw_function))
575     return false;
576   ++I;
577   return I->Tok->is(tok::l_paren);
578 }
579 
580 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
581                                    const FormatToken &InitialToken) {
582   if (InitialToken.is(tok::kw_namespace))
583     return Style.BraceWrapping.AfterNamespace;
584   if (InitialToken.is(tok::kw_class))
585     return Style.BraceWrapping.AfterClass;
586   if (InitialToken.is(tok::kw_union))
587     return Style.BraceWrapping.AfterUnion;
588   if (InitialToken.is(tok::kw_struct))
589     return Style.BraceWrapping.AfterStruct;
590   return false;
591 }
592 
593 void UnwrappedLineParser::parseChildBlock() {
594   FormatTok->BlockKind = BK_Block;
595   nextToken();
596   {
597     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
598                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
599     ScopedLineState LineState(*this);
600     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
601                                             /*MustBeDeclaration=*/false);
602     Line->Level += SkipIndent ? 0 : 1;
603     parseLevel(/*HasOpeningBrace=*/true);
604     flushComments(isOnNewLine(*FormatTok));
605     Line->Level -= SkipIndent ? 0 : 1;
606   }
607   nextToken();
608 }
609 
610 void UnwrappedLineParser::parsePPDirective() {
611   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
612   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
613   nextToken();
614 
615   if (!FormatTok->Tok.getIdentifierInfo()) {
616     parsePPUnknown();
617     return;
618   }
619 
620   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
621   case tok::pp_define:
622     parsePPDefine();
623     return;
624   case tok::pp_if:
625     parsePPIf(/*IfDef=*/false);
626     break;
627   case tok::pp_ifdef:
628   case tok::pp_ifndef:
629     parsePPIf(/*IfDef=*/true);
630     break;
631   case tok::pp_else:
632     parsePPElse();
633     break;
634   case tok::pp_elif:
635     parsePPElIf();
636     break;
637   case tok::pp_endif:
638     parsePPEndIf();
639     break;
640   default:
641     parsePPUnknown();
642     break;
643   }
644 }
645 
646 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
647   size_t Line = CurrentLines->size();
648   if (CurrentLines == &PreprocessorDirectives)
649     Line += Lines.size();
650 
651   if (Unreachable ||
652       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
653     PPStack.push_back({PP_Unreachable, Line});
654   else
655     PPStack.push_back({PP_Conditional, Line});
656 }
657 
658 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
659   ++PPBranchLevel;
660   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
661   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
662     PPLevelBranchIndex.push_back(0);
663     PPLevelBranchCount.push_back(0);
664   }
665   PPChainBranchIndex.push(0);
666   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
667   conditionalCompilationCondition(Unreachable || Skip);
668 }
669 
670 void UnwrappedLineParser::conditionalCompilationAlternative() {
671   if (!PPStack.empty())
672     PPStack.pop_back();
673   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
674   if (!PPChainBranchIndex.empty())
675     ++PPChainBranchIndex.top();
676   conditionalCompilationCondition(
677       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
678       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
679 }
680 
681 void UnwrappedLineParser::conditionalCompilationEnd() {
682   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
683   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
684     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
685       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
686     }
687   }
688   // Guard against #endif's without #if.
689   if (PPBranchLevel > -1)
690     --PPBranchLevel;
691   if (!PPChainBranchIndex.empty())
692     PPChainBranchIndex.pop();
693   if (!PPStack.empty())
694     PPStack.pop_back();
695 }
696 
697 void UnwrappedLineParser::parsePPIf(bool IfDef) {
698   bool IfNDef = FormatTok->is(tok::pp_ifndef);
699   nextToken();
700   bool Unreachable = false;
701   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
702     Unreachable = true;
703   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
704     Unreachable = true;
705   conditionalCompilationStart(Unreachable);
706   FormatToken *IfCondition = FormatTok;
707   // If there's a #ifndef on the first line, and the only lines before it are
708   // comments, it could be an include guard.
709   bool MaybeIncludeGuard = IfNDef;
710   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {
711     for (auto &Line : Lines) {
712       if (!Line.Tokens.front().Tok->is(tok::comment)) {
713         MaybeIncludeGuard = false;
714         IncludeGuardRejected = true;
715         break;
716       }
717     }
718   }
719   --PPBranchLevel;
720   parsePPUnknown();
721   ++PPBranchLevel;
722   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)
723     IfNdefCondition = IfCondition;
724 }
725 
726 void UnwrappedLineParser::parsePPElse() {
727   // If a potential include guard has an #else, it's not an include guard.
728   if (FoundIncludeGuardStart && PPBranchLevel == 0)
729     FoundIncludeGuardStart = false;
730   conditionalCompilationAlternative();
731   if (PPBranchLevel > -1)
732     --PPBranchLevel;
733   parsePPUnknown();
734   ++PPBranchLevel;
735 }
736 
737 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
738 
739 void UnwrappedLineParser::parsePPEndIf() {
740   conditionalCompilationEnd();
741   parsePPUnknown();
742   // If the #endif of a potential include guard is the last thing in the file,
743   // then we count it as a real include guard and subtract one from every
744   // preprocessor indent.
745   unsigned TokenPosition = Tokens->getPosition();
746   FormatToken *PeekNext = AllTokens[TokenPosition];
747   if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) &&
748       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
749     for (auto &Line : Lines)
750       if (Line.InPPDirective && Line.Level > 0)
751         --Line.Level;
752 }
753 
754 void UnwrappedLineParser::parsePPDefine() {
755   nextToken();
756 
757   if (FormatTok->Tok.getKind() != tok::identifier) {
758     parsePPUnknown();
759     return;
760   }
761   if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {
762     FoundIncludeGuardStart = true;
763     for (auto &Line : Lines) {
764       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
765         FoundIncludeGuardStart = false;
766         break;
767       }
768     }
769   }
770   IfNdefCondition = nullptr;
771   nextToken();
772   if (FormatTok->Tok.getKind() == tok::l_paren &&
773       FormatTok->WhitespaceRange.getBegin() ==
774           FormatTok->WhitespaceRange.getEnd()) {
775     parseParens();
776   }
777   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
778     Line->Level += PPBranchLevel + 1;
779   addUnwrappedLine();
780   ++Line->Level;
781 
782   // Errors during a preprocessor directive can only affect the layout of the
783   // preprocessor directive, and thus we ignore them. An alternative approach
784   // would be to use the same approach we use on the file level (no
785   // re-indentation if there was a structural error) within the macro
786   // definition.
787   parseFile();
788 }
789 
790 void UnwrappedLineParser::parsePPUnknown() {
791   do {
792     nextToken();
793   } while (!eof());
794   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
795     Line->Level += PPBranchLevel + 1;
796   addUnwrappedLine();
797   IfNdefCondition = nullptr;
798 }
799 
800 // Here we blacklist certain tokens that are not usually the first token in an
801 // unwrapped line. This is used in attempt to distinguish macro calls without
802 // trailing semicolons from other constructs split to several lines.
803 static bool tokenCanStartNewLine(const clang::Token &Tok) {
804   // Semicolon can be a null-statement, l_square can be a start of a macro or
805   // a C++11 attribute, but this doesn't seem to be common.
806   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
807          Tok.isNot(tok::l_square) &&
808          // Tokens that can only be used as binary operators and a part of
809          // overloaded operator names.
810          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
811          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
812          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
813          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
814          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
815          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
816          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
817          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
818          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
819          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
820          Tok.isNot(tok::lesslessequal) &&
821          // Colon is used in labels, base class lists, initializer lists,
822          // range-based for loops, ternary operator, but should never be the
823          // first token in an unwrapped line.
824          Tok.isNot(tok::colon) &&
825          // 'noexcept' is a trailing annotation.
826          Tok.isNot(tok::kw_noexcept);
827 }
828 
829 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
830                           const FormatToken *FormatTok) {
831   // FIXME: This returns true for C/C++ keywords like 'struct'.
832   return FormatTok->is(tok::identifier) &&
833          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
834           !FormatTok->isOneOf(
835               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
836               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
837               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
838               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
839               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
840               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
841               Keywords.kw_from));
842 }
843 
844 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
845                                  const FormatToken *FormatTok) {
846   return FormatTok->Tok.isLiteral() ||
847          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
848          mustBeJSIdent(Keywords, FormatTok);
849 }
850 
851 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
852 // when encountered after a value (see mustBeJSIdentOrValue).
853 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
854                            const FormatToken *FormatTok) {
855   return FormatTok->isOneOf(
856       tok::kw_return, Keywords.kw_yield,
857       // conditionals
858       tok::kw_if, tok::kw_else,
859       // loops
860       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
861       // switch/case
862       tok::kw_switch, tok::kw_case,
863       // exceptions
864       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
865       // declaration
866       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
867       Keywords.kw_async, Keywords.kw_function,
868       // import/export
869       Keywords.kw_import, tok::kw_export);
870 }
871 
872 // readTokenWithJavaScriptASI reads the next token and terminates the current
873 // line if JavaScript Automatic Semicolon Insertion must
874 // happen between the current token and the next token.
875 //
876 // This method is conservative - it cannot cover all edge cases of JavaScript,
877 // but only aims to correctly handle certain well known cases. It *must not*
878 // return true in speculative cases.
879 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
880   FormatToken *Previous = FormatTok;
881   readToken();
882   FormatToken *Next = FormatTok;
883 
884   bool IsOnSameLine =
885       CommentsBeforeNextToken.empty()
886           ? Next->NewlinesBefore == 0
887           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
888   if (IsOnSameLine)
889     return;
890 
891   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
892   bool PreviousStartsTemplateExpr =
893       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
894   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
895     // If the token before the previous one is an '@', the previous token is an
896     // annotation and can precede another identifier/value.
897     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
898     if (PrePrevious->is(tok::at))
899       return;
900   }
901   if (Next->is(tok::exclaim) && PreviousMustBeValue)
902     return addUnwrappedLine();
903   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
904   bool NextEndsTemplateExpr =
905       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
906   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
907       (PreviousMustBeValue ||
908        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
909                          tok::minusminus)))
910     return addUnwrappedLine();
911   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
912       isJSDeclOrStmt(Keywords, Next))
913     return addUnwrappedLine();
914 }
915 
916 void UnwrappedLineParser::parseStructuralElement() {
917   assert(!FormatTok->is(tok::l_brace));
918   if (Style.Language == FormatStyle::LK_TableGen &&
919       FormatTok->is(tok::pp_include)) {
920     nextToken();
921     if (FormatTok->is(tok::string_literal))
922       nextToken();
923     addUnwrappedLine();
924     return;
925   }
926   switch (FormatTok->Tok.getKind()) {
927   case tok::at:
928     nextToken();
929     if (FormatTok->Tok.is(tok::l_brace)) {
930       nextToken();
931       parseBracedList();
932       break;
933     }
934     switch (FormatTok->Tok.getObjCKeywordID()) {
935     case tok::objc_public:
936     case tok::objc_protected:
937     case tok::objc_package:
938     case tok::objc_private:
939       return parseAccessSpecifier();
940     case tok::objc_interface:
941     case tok::objc_implementation:
942       return parseObjCInterfaceOrImplementation();
943     case tok::objc_protocol:
944       return parseObjCProtocol();
945     case tok::objc_end:
946       return; // Handled by the caller.
947     case tok::objc_optional:
948     case tok::objc_required:
949       nextToken();
950       addUnwrappedLine();
951       return;
952     case tok::objc_autoreleasepool:
953       nextToken();
954       if (FormatTok->Tok.is(tok::l_brace)) {
955         if (Style.BraceWrapping.AfterObjCDeclaration)
956           addUnwrappedLine();
957         parseBlock(/*MustBeDeclaration=*/false);
958       }
959       addUnwrappedLine();
960       return;
961     case tok::objc_try:
962       // This branch isn't strictly necessary (the kw_try case below would
963       // do this too after the tok::at is parsed above).  But be explicit.
964       parseTryCatch();
965       return;
966     default:
967       break;
968     }
969     break;
970   case tok::kw_asm:
971     nextToken();
972     if (FormatTok->is(tok::l_brace)) {
973       FormatTok->Type = TT_InlineASMBrace;
974       nextToken();
975       while (FormatTok && FormatTok->isNot(tok::eof)) {
976         if (FormatTok->is(tok::r_brace)) {
977           FormatTok->Type = TT_InlineASMBrace;
978           nextToken();
979           addUnwrappedLine();
980           break;
981         }
982         FormatTok->Finalized = true;
983         nextToken();
984       }
985     }
986     break;
987   case tok::kw_namespace:
988     parseNamespace();
989     return;
990   case tok::kw_inline:
991     nextToken();
992     if (FormatTok->Tok.is(tok::kw_namespace)) {
993       parseNamespace();
994       return;
995     }
996     break;
997   case tok::kw_public:
998   case tok::kw_protected:
999   case tok::kw_private:
1000     if (Style.Language == FormatStyle::LK_Java ||
1001         Style.Language == FormatStyle::LK_JavaScript)
1002       nextToken();
1003     else
1004       parseAccessSpecifier();
1005     return;
1006   case tok::kw_if:
1007     parseIfThenElse();
1008     return;
1009   case tok::kw_for:
1010   case tok::kw_while:
1011     parseForOrWhileLoop();
1012     return;
1013   case tok::kw_do:
1014     parseDoWhile();
1015     return;
1016   case tok::kw_switch:
1017     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1018       // 'switch: string' field declaration.
1019       break;
1020     parseSwitch();
1021     return;
1022   case tok::kw_default:
1023     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1024       // 'default: string' field declaration.
1025       break;
1026     nextToken();
1027     parseLabel();
1028     return;
1029   case tok::kw_case:
1030     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1031       // 'case: string' field declaration.
1032       break;
1033     parseCaseLabel();
1034     return;
1035   case tok::kw_try:
1036   case tok::kw___try:
1037     parseTryCatch();
1038     return;
1039   case tok::kw_extern:
1040     nextToken();
1041     if (FormatTok->Tok.is(tok::string_literal)) {
1042       nextToken();
1043       if (FormatTok->Tok.is(tok::l_brace)) {
1044         if (Style.BraceWrapping.AfterExternBlock) {
1045           addUnwrappedLine();
1046           parseBlock(/*MustBeDeclaration=*/true);
1047         } else {
1048           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1049         }
1050         addUnwrappedLine();
1051         return;
1052       }
1053     }
1054     break;
1055   case tok::kw_export:
1056     if (Style.Language == FormatStyle::LK_JavaScript) {
1057       parseJavaScriptEs6ImportExport();
1058       return;
1059     }
1060     break;
1061   case tok::identifier:
1062     if (FormatTok->is(TT_ForEachMacro)) {
1063       parseForOrWhileLoop();
1064       return;
1065     }
1066     if (FormatTok->is(TT_MacroBlockBegin)) {
1067       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1068                  /*MunchSemi=*/false);
1069       return;
1070     }
1071     if (FormatTok->is(Keywords.kw_import)) {
1072       if (Style.Language == FormatStyle::LK_JavaScript) {
1073         parseJavaScriptEs6ImportExport();
1074         return;
1075       }
1076       if (Style.Language == FormatStyle::LK_Proto) {
1077         nextToken();
1078         if (FormatTok->is(tok::kw_public))
1079           nextToken();
1080         if (!FormatTok->is(tok::string_literal))
1081           return;
1082         nextToken();
1083         if (FormatTok->is(tok::semi))
1084           nextToken();
1085         addUnwrappedLine();
1086         return;
1087       }
1088     }
1089     if (Style.isCpp() &&
1090         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1091                            Keywords.kw_slots, Keywords.kw_qslots)) {
1092       nextToken();
1093       if (FormatTok->is(tok::colon)) {
1094         nextToken();
1095         addUnwrappedLine();
1096         return;
1097       }
1098     }
1099     // In all other cases, parse the declaration.
1100     break;
1101   default:
1102     break;
1103   }
1104   do {
1105     const FormatToken *Previous = FormatTok->Previous;
1106     switch (FormatTok->Tok.getKind()) {
1107     case tok::at:
1108       nextToken();
1109       if (FormatTok->Tok.is(tok::l_brace)) {
1110         nextToken();
1111         parseBracedList();
1112       }
1113       break;
1114     case tok::kw_enum:
1115       // Ignore if this is part of "template <enum ...".
1116       if (Previous && Previous->is(tok::less)) {
1117         nextToken();
1118         break;
1119       }
1120 
1121       // parseEnum falls through and does not yet add an unwrapped line as an
1122       // enum definition can start a structural element.
1123       if (!parseEnum())
1124         break;
1125       // This only applies for C++.
1126       if (!Style.isCpp()) {
1127         addUnwrappedLine();
1128         return;
1129       }
1130       break;
1131     case tok::kw_typedef:
1132       nextToken();
1133       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1134                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1135         parseEnum();
1136       break;
1137     case tok::kw_struct:
1138     case tok::kw_union:
1139     case tok::kw_class:
1140       // parseRecord falls through and does not yet add an unwrapped line as a
1141       // record declaration or definition can start a structural element.
1142       parseRecord();
1143       // This does not apply for Java and JavaScript.
1144       if (Style.Language == FormatStyle::LK_Java ||
1145           Style.Language == FormatStyle::LK_JavaScript) {
1146         if (FormatTok->is(tok::semi))
1147           nextToken();
1148         addUnwrappedLine();
1149         return;
1150       }
1151       break;
1152     case tok::period:
1153       nextToken();
1154       // In Java, classes have an implicit static member "class".
1155       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1156           FormatTok->is(tok::kw_class))
1157         nextToken();
1158       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1159           FormatTok->Tok.getIdentifierInfo())
1160         // JavaScript only has pseudo keywords, all keywords are allowed to
1161         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1162         nextToken();
1163       break;
1164     case tok::semi:
1165       nextToken();
1166       addUnwrappedLine();
1167       return;
1168     case tok::r_brace:
1169       addUnwrappedLine();
1170       return;
1171     case tok::l_paren:
1172       parseParens();
1173       break;
1174     case tok::kw_operator:
1175       nextToken();
1176       if (FormatTok->isBinaryOperator())
1177         nextToken();
1178       break;
1179     case tok::caret:
1180       nextToken();
1181       if (FormatTok->Tok.isAnyIdentifier() ||
1182           FormatTok->isSimpleTypeSpecifier())
1183         nextToken();
1184       if (FormatTok->is(tok::l_paren))
1185         parseParens();
1186       if (FormatTok->is(tok::l_brace))
1187         parseChildBlock();
1188       break;
1189     case tok::l_brace:
1190       if (!tryToParseBracedList()) {
1191         // A block outside of parentheses must be the last part of a
1192         // structural element.
1193         // FIXME: Figure out cases where this is not true, and add projections
1194         // for them (the one we know is missing are lambdas).
1195         if (Style.BraceWrapping.AfterFunction)
1196           addUnwrappedLine();
1197         FormatTok->Type = TT_FunctionLBrace;
1198         parseBlock(/*MustBeDeclaration=*/false);
1199         addUnwrappedLine();
1200         return;
1201       }
1202       // Otherwise this was a braced init list, and the structural
1203       // element continues.
1204       break;
1205     case tok::kw_try:
1206       // We arrive here when parsing function-try blocks.
1207       parseTryCatch();
1208       return;
1209     case tok::identifier: {
1210       if (FormatTok->is(TT_MacroBlockEnd)) {
1211         addUnwrappedLine();
1212         return;
1213       }
1214 
1215       // Function declarations (as opposed to function expressions) are parsed
1216       // on their own unwrapped line by continuing this loop. Function
1217       // expressions (functions that are not on their own line) must not create
1218       // a new unwrapped line, so they are special cased below.
1219       size_t TokenCount = Line->Tokens.size();
1220       if (Style.Language == FormatStyle::LK_JavaScript &&
1221           FormatTok->is(Keywords.kw_function) &&
1222           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1223                                                      Keywords.kw_async)))) {
1224         tryToParseJSFunction();
1225         break;
1226       }
1227       if ((Style.Language == FormatStyle::LK_JavaScript ||
1228            Style.Language == FormatStyle::LK_Java) &&
1229           FormatTok->is(Keywords.kw_interface)) {
1230         if (Style.Language == FormatStyle::LK_JavaScript) {
1231           // In JavaScript/TypeScript, "interface" can be used as a standalone
1232           // identifier, e.g. in `var interface = 1;`. If "interface" is
1233           // followed by another identifier, it is very like to be an actual
1234           // interface declaration.
1235           unsigned StoredPosition = Tokens->getPosition();
1236           FormatToken *Next = Tokens->getNextToken();
1237           FormatTok = Tokens->setPosition(StoredPosition);
1238           if (Next && !mustBeJSIdent(Keywords, Next)) {
1239             nextToken();
1240             break;
1241           }
1242         }
1243         parseRecord();
1244         addUnwrappedLine();
1245         return;
1246       }
1247 
1248       // See if the following token should start a new unwrapped line.
1249       StringRef Text = FormatTok->TokenText;
1250       nextToken();
1251       if (Line->Tokens.size() == 1 &&
1252           // JS doesn't have macros, and within classes colons indicate fields,
1253           // not labels.
1254           Style.Language != FormatStyle::LK_JavaScript) {
1255         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1256           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1257           parseLabel();
1258           return;
1259         }
1260         // Recognize function-like macro usages without trailing semicolon as
1261         // well as free-standing macros like Q_OBJECT.
1262         bool FunctionLike = FormatTok->is(tok::l_paren);
1263         if (FunctionLike)
1264           parseParens();
1265 
1266         bool FollowedByNewline =
1267             CommentsBeforeNextToken.empty()
1268                 ? FormatTok->NewlinesBefore > 0
1269                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1270 
1271         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1272             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1273           addUnwrappedLine();
1274           return;
1275         }
1276       }
1277       break;
1278     }
1279     case tok::equal:
1280       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1281       // TT_JsFatArrow. The always start an expression or a child block if
1282       // followed by a curly.
1283       if (FormatTok->is(TT_JsFatArrow)) {
1284         nextToken();
1285         if (FormatTok->is(tok::l_brace))
1286           parseChildBlock();
1287         break;
1288       }
1289 
1290       nextToken();
1291       if (FormatTok->Tok.is(tok::l_brace)) {
1292         nextToken();
1293         parseBracedList();
1294       } else if (Style.Language == FormatStyle::LK_Proto &&
1295                  FormatTok->Tok.is(tok::less)) {
1296         nextToken();
1297         parseBracedList(/*ContinueOnSemicolons=*/false,
1298                         /*ClosingBraceKind=*/tok::greater);
1299       }
1300       break;
1301     case tok::l_square:
1302       parseSquare();
1303       break;
1304     case tok::kw_new:
1305       parseNew();
1306       break;
1307     default:
1308       nextToken();
1309       break;
1310     }
1311   } while (!eof());
1312 }
1313 
1314 bool UnwrappedLineParser::tryToParseLambda() {
1315   if (!Style.isCpp()) {
1316     nextToken();
1317     return false;
1318   }
1319   assert(FormatTok->is(tok::l_square));
1320   FormatToken &LSquare = *FormatTok;
1321   if (!tryToParseLambdaIntroducer())
1322     return false;
1323 
1324   while (FormatTok->isNot(tok::l_brace)) {
1325     if (FormatTok->isSimpleTypeSpecifier()) {
1326       nextToken();
1327       continue;
1328     }
1329     switch (FormatTok->Tok.getKind()) {
1330     case tok::l_brace:
1331       break;
1332     case tok::l_paren:
1333       parseParens();
1334       break;
1335     case tok::amp:
1336     case tok::star:
1337     case tok::kw_const:
1338     case tok::comma:
1339     case tok::less:
1340     case tok::greater:
1341     case tok::identifier:
1342     case tok::numeric_constant:
1343     case tok::coloncolon:
1344     case tok::kw_mutable:
1345       nextToken();
1346       break;
1347     case tok::arrow:
1348       FormatTok->Type = TT_LambdaArrow;
1349       nextToken();
1350       break;
1351     default:
1352       return true;
1353     }
1354   }
1355   LSquare.Type = TT_LambdaLSquare;
1356   parseChildBlock();
1357   return true;
1358 }
1359 
1360 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1361   const FormatToken *Previous = FormatTok->Previous;
1362   if (Previous &&
1363       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1364                          tok::kw_delete) ||
1365        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1366        Previous->isSimpleTypeSpecifier())) {
1367     nextToken();
1368     return false;
1369   }
1370   nextToken();
1371   parseSquare(/*LambdaIntroducer=*/true);
1372   return true;
1373 }
1374 
1375 void UnwrappedLineParser::tryToParseJSFunction() {
1376   assert(FormatTok->is(Keywords.kw_function) ||
1377          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1378   if (FormatTok->is(Keywords.kw_async))
1379     nextToken();
1380   // Consume "function".
1381   nextToken();
1382 
1383   // Consume * (generator function). Treat it like C++'s overloaded operators.
1384   if (FormatTok->is(tok::star)) {
1385     FormatTok->Type = TT_OverloadedOperator;
1386     nextToken();
1387   }
1388 
1389   // Consume function name.
1390   if (FormatTok->is(tok::identifier))
1391     nextToken();
1392 
1393   if (FormatTok->isNot(tok::l_paren))
1394     return;
1395 
1396   // Parse formal parameter list.
1397   parseParens();
1398 
1399   if (FormatTok->is(tok::colon)) {
1400     // Parse a type definition.
1401     nextToken();
1402 
1403     // Eat the type declaration. For braced inline object types, balance braces,
1404     // otherwise just parse until finding an l_brace for the function body.
1405     if (FormatTok->is(tok::l_brace))
1406       tryToParseBracedList();
1407     else
1408       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1409         nextToken();
1410   }
1411 
1412   if (FormatTok->is(tok::semi))
1413     return;
1414 
1415   parseChildBlock();
1416 }
1417 
1418 bool UnwrappedLineParser::tryToParseBracedList() {
1419   if (FormatTok->BlockKind == BK_Unknown)
1420     calculateBraceTypes();
1421   assert(FormatTok->BlockKind != BK_Unknown);
1422   if (FormatTok->BlockKind == BK_Block)
1423     return false;
1424   nextToken();
1425   parseBracedList();
1426   return true;
1427 }
1428 
1429 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1430                                           tok::TokenKind ClosingBraceKind) {
1431   bool HasError = false;
1432 
1433   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1434   // replace this by using parseAssigmentExpression() inside.
1435   do {
1436     if (Style.Language == FormatStyle::LK_JavaScript) {
1437       if (FormatTok->is(Keywords.kw_function) ||
1438           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1439         tryToParseJSFunction();
1440         continue;
1441       }
1442       if (FormatTok->is(TT_JsFatArrow)) {
1443         nextToken();
1444         // Fat arrows can be followed by simple expressions or by child blocks
1445         // in curly braces.
1446         if (FormatTok->is(tok::l_brace)) {
1447           parseChildBlock();
1448           continue;
1449         }
1450       }
1451       if (FormatTok->is(tok::l_brace)) {
1452         // Could be a method inside of a braced list `{a() { return 1; }}`.
1453         if (tryToParseBracedList())
1454           continue;
1455         parseChildBlock();
1456       }
1457     }
1458     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1459       nextToken();
1460       return !HasError;
1461     }
1462     switch (FormatTok->Tok.getKind()) {
1463     case tok::caret:
1464       nextToken();
1465       if (FormatTok->is(tok::l_brace)) {
1466         parseChildBlock();
1467       }
1468       break;
1469     case tok::l_square:
1470       tryToParseLambda();
1471       break;
1472     case tok::l_paren:
1473       parseParens();
1474       // JavaScript can just have free standing methods and getters/setters in
1475       // object literals. Detect them by a "{" following ")".
1476       if (Style.Language == FormatStyle::LK_JavaScript) {
1477         if (FormatTok->is(tok::l_brace))
1478           parseChildBlock();
1479         break;
1480       }
1481       break;
1482     case tok::l_brace:
1483       // Assume there are no blocks inside a braced init list apart
1484       // from the ones we explicitly parse out (like lambdas).
1485       FormatTok->BlockKind = BK_BracedInit;
1486       nextToken();
1487       parseBracedList();
1488       break;
1489     case tok::less:
1490       if (Style.Language == FormatStyle::LK_Proto) {
1491         nextToken();
1492         parseBracedList(/*ContinueOnSemicolons=*/false,
1493                         /*ClosingBraceKind=*/tok::greater);
1494       } else {
1495         nextToken();
1496       }
1497       break;
1498     case tok::semi:
1499       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1500       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1501       // used for error recovery if we have otherwise determined that this is
1502       // a braced list.
1503       if (Style.Language == FormatStyle::LK_JavaScript) {
1504         nextToken();
1505         break;
1506       }
1507       HasError = true;
1508       if (!ContinueOnSemicolons)
1509         return !HasError;
1510       nextToken();
1511       break;
1512     case tok::comma:
1513       nextToken();
1514       break;
1515     default:
1516       nextToken();
1517       break;
1518     }
1519   } while (!eof());
1520   return false;
1521 }
1522 
1523 void UnwrappedLineParser::parseParens() {
1524   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1525   nextToken();
1526   do {
1527     switch (FormatTok->Tok.getKind()) {
1528     case tok::l_paren:
1529       parseParens();
1530       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1531         parseChildBlock();
1532       break;
1533     case tok::r_paren:
1534       nextToken();
1535       return;
1536     case tok::r_brace:
1537       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1538       return;
1539     case tok::l_square:
1540       tryToParseLambda();
1541       break;
1542     case tok::l_brace:
1543       if (!tryToParseBracedList())
1544         parseChildBlock();
1545       break;
1546     case tok::at:
1547       nextToken();
1548       if (FormatTok->Tok.is(tok::l_brace)) {
1549         nextToken();
1550         parseBracedList();
1551       }
1552       break;
1553     case tok::kw_class:
1554       if (Style.Language == FormatStyle::LK_JavaScript)
1555         parseRecord(/*ParseAsExpr=*/true);
1556       else
1557         nextToken();
1558       break;
1559     case tok::identifier:
1560       if (Style.Language == FormatStyle::LK_JavaScript &&
1561           (FormatTok->is(Keywords.kw_function) ||
1562            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1563         tryToParseJSFunction();
1564       else
1565         nextToken();
1566       break;
1567     default:
1568       nextToken();
1569       break;
1570     }
1571   } while (!eof());
1572 }
1573 
1574 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1575   if (!LambdaIntroducer) {
1576     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1577     if (tryToParseLambda())
1578       return;
1579   }
1580   do {
1581     switch (FormatTok->Tok.getKind()) {
1582     case tok::l_paren:
1583       parseParens();
1584       break;
1585     case tok::r_square:
1586       nextToken();
1587       return;
1588     case tok::r_brace:
1589       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1590       return;
1591     case tok::l_square:
1592       parseSquare();
1593       break;
1594     case tok::l_brace: {
1595       if (!tryToParseBracedList())
1596         parseChildBlock();
1597       break;
1598     }
1599     case tok::at:
1600       nextToken();
1601       if (FormatTok->Tok.is(tok::l_brace)) {
1602         nextToken();
1603         parseBracedList();
1604       }
1605       break;
1606     default:
1607       nextToken();
1608       break;
1609     }
1610   } while (!eof());
1611 }
1612 
1613 void UnwrappedLineParser::parseIfThenElse() {
1614   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1615   nextToken();
1616   if (FormatTok->Tok.is(tok::kw_constexpr))
1617     nextToken();
1618   if (FormatTok->Tok.is(tok::l_paren))
1619     parseParens();
1620   bool NeedsUnwrappedLine = false;
1621   if (FormatTok->Tok.is(tok::l_brace)) {
1622     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1623     parseBlock(/*MustBeDeclaration=*/false);
1624     if (Style.BraceWrapping.BeforeElse)
1625       addUnwrappedLine();
1626     else
1627       NeedsUnwrappedLine = true;
1628   } else {
1629     addUnwrappedLine();
1630     ++Line->Level;
1631     parseStructuralElement();
1632     --Line->Level;
1633   }
1634   if (FormatTok->Tok.is(tok::kw_else)) {
1635     nextToken();
1636     if (FormatTok->Tok.is(tok::l_brace)) {
1637       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1638       parseBlock(/*MustBeDeclaration=*/false);
1639       addUnwrappedLine();
1640     } else if (FormatTok->Tok.is(tok::kw_if)) {
1641       parseIfThenElse();
1642     } else {
1643       addUnwrappedLine();
1644       ++Line->Level;
1645       parseStructuralElement();
1646       if (FormatTok->is(tok::eof))
1647         addUnwrappedLine();
1648       --Line->Level;
1649     }
1650   } else if (NeedsUnwrappedLine) {
1651     addUnwrappedLine();
1652   }
1653 }
1654 
1655 void UnwrappedLineParser::parseTryCatch() {
1656   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1657   nextToken();
1658   bool NeedsUnwrappedLine = false;
1659   if (FormatTok->is(tok::colon)) {
1660     // We are in a function try block, what comes is an initializer list.
1661     nextToken();
1662     while (FormatTok->is(tok::identifier)) {
1663       nextToken();
1664       if (FormatTok->is(tok::l_paren))
1665         parseParens();
1666       if (FormatTok->is(tok::comma))
1667         nextToken();
1668     }
1669   }
1670   // Parse try with resource.
1671   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1672     parseParens();
1673   }
1674   if (FormatTok->is(tok::l_brace)) {
1675     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1676     parseBlock(/*MustBeDeclaration=*/false);
1677     if (Style.BraceWrapping.BeforeCatch) {
1678       addUnwrappedLine();
1679     } else {
1680       NeedsUnwrappedLine = true;
1681     }
1682   } else if (!FormatTok->is(tok::kw_catch)) {
1683     // The C++ standard requires a compound-statement after a try.
1684     // If there's none, we try to assume there's a structuralElement
1685     // and try to continue.
1686     addUnwrappedLine();
1687     ++Line->Level;
1688     parseStructuralElement();
1689     --Line->Level;
1690   }
1691   while (1) {
1692     if (FormatTok->is(tok::at))
1693       nextToken();
1694     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1695                              tok::kw___finally) ||
1696           ((Style.Language == FormatStyle::LK_Java ||
1697             Style.Language == FormatStyle::LK_JavaScript) &&
1698            FormatTok->is(Keywords.kw_finally)) ||
1699           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1700            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1701       break;
1702     nextToken();
1703     while (FormatTok->isNot(tok::l_brace)) {
1704       if (FormatTok->is(tok::l_paren)) {
1705         parseParens();
1706         continue;
1707       }
1708       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1709         return;
1710       nextToken();
1711     }
1712     NeedsUnwrappedLine = false;
1713     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1714     parseBlock(/*MustBeDeclaration=*/false);
1715     if (Style.BraceWrapping.BeforeCatch)
1716       addUnwrappedLine();
1717     else
1718       NeedsUnwrappedLine = true;
1719   }
1720   if (NeedsUnwrappedLine)
1721     addUnwrappedLine();
1722 }
1723 
1724 void UnwrappedLineParser::parseNamespace() {
1725   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1726 
1727   const FormatToken &InitialToken = *FormatTok;
1728   nextToken();
1729   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1730     nextToken();
1731   if (FormatTok->Tok.is(tok::l_brace)) {
1732     if (ShouldBreakBeforeBrace(Style, InitialToken))
1733       addUnwrappedLine();
1734 
1735     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1736                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1737                      DeclarationScopeStack.size() > 1);
1738     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1739     // Munch the semicolon after a namespace. This is more common than one would
1740     // think. Puttin the semicolon into its own line is very ugly.
1741     if (FormatTok->Tok.is(tok::semi))
1742       nextToken();
1743     addUnwrappedLine();
1744   }
1745   // FIXME: Add error handling.
1746 }
1747 
1748 void UnwrappedLineParser::parseNew() {
1749   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1750   nextToken();
1751   if (Style.Language != FormatStyle::LK_Java)
1752     return;
1753 
1754   // In Java, we can parse everything up to the parens, which aren't optional.
1755   do {
1756     // There should not be a ;, { or } before the new's open paren.
1757     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1758       return;
1759 
1760     // Consume the parens.
1761     if (FormatTok->is(tok::l_paren)) {
1762       parseParens();
1763 
1764       // If there is a class body of an anonymous class, consume that as child.
1765       if (FormatTok->is(tok::l_brace))
1766         parseChildBlock();
1767       return;
1768     }
1769     nextToken();
1770   } while (!eof());
1771 }
1772 
1773 void UnwrappedLineParser::parseForOrWhileLoop() {
1774   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1775          "'for', 'while' or foreach macro expected");
1776   nextToken();
1777   // JS' for await ( ...
1778   if (Style.Language == FormatStyle::LK_JavaScript &&
1779       FormatTok->is(Keywords.kw_await))
1780     nextToken();
1781   if (FormatTok->Tok.is(tok::l_paren))
1782     parseParens();
1783   if (FormatTok->Tok.is(tok::l_brace)) {
1784     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1785     parseBlock(/*MustBeDeclaration=*/false);
1786     addUnwrappedLine();
1787   } else {
1788     addUnwrappedLine();
1789     ++Line->Level;
1790     parseStructuralElement();
1791     --Line->Level;
1792   }
1793 }
1794 
1795 void UnwrappedLineParser::parseDoWhile() {
1796   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1797   nextToken();
1798   if (FormatTok->Tok.is(tok::l_brace)) {
1799     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1800     parseBlock(/*MustBeDeclaration=*/false);
1801     if (Style.BraceWrapping.IndentBraces)
1802       addUnwrappedLine();
1803   } else {
1804     addUnwrappedLine();
1805     ++Line->Level;
1806     parseStructuralElement();
1807     --Line->Level;
1808   }
1809 
1810   // FIXME: Add error handling.
1811   if (!FormatTok->Tok.is(tok::kw_while)) {
1812     addUnwrappedLine();
1813     return;
1814   }
1815 
1816   nextToken();
1817   parseStructuralElement();
1818 }
1819 
1820 void UnwrappedLineParser::parseLabel() {
1821   nextToken();
1822   unsigned OldLineLevel = Line->Level;
1823   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1824     --Line->Level;
1825   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1826     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1827     parseBlock(/*MustBeDeclaration=*/false);
1828     if (FormatTok->Tok.is(tok::kw_break)) {
1829       if (Style.BraceWrapping.AfterControlStatement)
1830         addUnwrappedLine();
1831       parseStructuralElement();
1832     }
1833     addUnwrappedLine();
1834   } else {
1835     if (FormatTok->is(tok::semi))
1836       nextToken();
1837     addUnwrappedLine();
1838   }
1839   Line->Level = OldLineLevel;
1840   if (FormatTok->isNot(tok::l_brace)) {
1841     parseStructuralElement();
1842     addUnwrappedLine();
1843   }
1844 }
1845 
1846 void UnwrappedLineParser::parseCaseLabel() {
1847   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1848   // FIXME: fix handling of complex expressions here.
1849   do {
1850     nextToken();
1851   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1852   parseLabel();
1853 }
1854 
1855 void UnwrappedLineParser::parseSwitch() {
1856   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1857   nextToken();
1858   if (FormatTok->Tok.is(tok::l_paren))
1859     parseParens();
1860   if (FormatTok->Tok.is(tok::l_brace)) {
1861     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1862     parseBlock(/*MustBeDeclaration=*/false);
1863     addUnwrappedLine();
1864   } else {
1865     addUnwrappedLine();
1866     ++Line->Level;
1867     parseStructuralElement();
1868     --Line->Level;
1869   }
1870 }
1871 
1872 void UnwrappedLineParser::parseAccessSpecifier() {
1873   nextToken();
1874   // Understand Qt's slots.
1875   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1876     nextToken();
1877   // Otherwise, we don't know what it is, and we'd better keep the next token.
1878   if (FormatTok->Tok.is(tok::colon))
1879     nextToken();
1880   addUnwrappedLine();
1881 }
1882 
1883 bool UnwrappedLineParser::parseEnum() {
1884   // Won't be 'enum' for NS_ENUMs.
1885   if (FormatTok->Tok.is(tok::kw_enum))
1886     nextToken();
1887 
1888   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1889   // declarations. An "enum" keyword followed by a colon would be a syntax
1890   // error and thus assume it is just an identifier.
1891   if (Style.Language == FormatStyle::LK_JavaScript &&
1892       FormatTok->isOneOf(tok::colon, tok::question))
1893     return false;
1894 
1895   // Eat up enum class ...
1896   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1897     nextToken();
1898 
1899   while (FormatTok->Tok.getIdentifierInfo() ||
1900          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1901                             tok::greater, tok::comma, tok::question)) {
1902     nextToken();
1903     // We can have macros or attributes in between 'enum' and the enum name.
1904     if (FormatTok->is(tok::l_paren))
1905       parseParens();
1906     if (FormatTok->is(tok::identifier)) {
1907       nextToken();
1908       // If there are two identifiers in a row, this is likely an elaborate
1909       // return type. In Java, this can be "implements", etc.
1910       if (Style.isCpp() && FormatTok->is(tok::identifier))
1911         return false;
1912     }
1913   }
1914 
1915   // Just a declaration or something is wrong.
1916   if (FormatTok->isNot(tok::l_brace))
1917     return true;
1918   FormatTok->BlockKind = BK_Block;
1919 
1920   if (Style.Language == FormatStyle::LK_Java) {
1921     // Java enums are different.
1922     parseJavaEnumBody();
1923     return true;
1924   }
1925   if (Style.Language == FormatStyle::LK_Proto) {
1926     parseBlock(/*MustBeDeclaration=*/true);
1927     return true;
1928   }
1929 
1930   // Parse enum body.
1931   nextToken();
1932   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1933   if (HasError) {
1934     if (FormatTok->is(tok::semi))
1935       nextToken();
1936     addUnwrappedLine();
1937   }
1938   return true;
1939 
1940   // There is no addUnwrappedLine() here so that we fall through to parsing a
1941   // structural element afterwards. Thus, in "enum A {} n, m;",
1942   // "} n, m;" will end up in one unwrapped line.
1943 }
1944 
1945 void UnwrappedLineParser::parseJavaEnumBody() {
1946   // Determine whether the enum is simple, i.e. does not have a semicolon or
1947   // constants with class bodies. Simple enums can be formatted like braced
1948   // lists, contracted to a single line, etc.
1949   unsigned StoredPosition = Tokens->getPosition();
1950   bool IsSimple = true;
1951   FormatToken *Tok = Tokens->getNextToken();
1952   while (Tok) {
1953     if (Tok->is(tok::r_brace))
1954       break;
1955     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1956       IsSimple = false;
1957       break;
1958     }
1959     // FIXME: This will also mark enums with braces in the arguments to enum
1960     // constants as "not simple". This is probably fine in practice, though.
1961     Tok = Tokens->getNextToken();
1962   }
1963   FormatTok = Tokens->setPosition(StoredPosition);
1964 
1965   if (IsSimple) {
1966     nextToken();
1967     parseBracedList();
1968     addUnwrappedLine();
1969     return;
1970   }
1971 
1972   // Parse the body of a more complex enum.
1973   // First add a line for everything up to the "{".
1974   nextToken();
1975   addUnwrappedLine();
1976   ++Line->Level;
1977 
1978   // Parse the enum constants.
1979   while (FormatTok) {
1980     if (FormatTok->is(tok::l_brace)) {
1981       // Parse the constant's class body.
1982       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1983                  /*MunchSemi=*/false);
1984     } else if (FormatTok->is(tok::l_paren)) {
1985       parseParens();
1986     } else if (FormatTok->is(tok::comma)) {
1987       nextToken();
1988       addUnwrappedLine();
1989     } else if (FormatTok->is(tok::semi)) {
1990       nextToken();
1991       addUnwrappedLine();
1992       break;
1993     } else if (FormatTok->is(tok::r_brace)) {
1994       addUnwrappedLine();
1995       break;
1996     } else {
1997       nextToken();
1998     }
1999   }
2000 
2001   // Parse the class body after the enum's ";" if any.
2002   parseLevel(/*HasOpeningBrace=*/true);
2003   nextToken();
2004   --Line->Level;
2005   addUnwrappedLine();
2006 }
2007 
2008 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2009   const FormatToken &InitialToken = *FormatTok;
2010   nextToken();
2011 
2012   // The actual identifier can be a nested name specifier, and in macros
2013   // it is often token-pasted.
2014   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2015                             tok::kw___attribute, tok::kw___declspec,
2016                             tok::kw_alignas) ||
2017          ((Style.Language == FormatStyle::LK_Java ||
2018            Style.Language == FormatStyle::LK_JavaScript) &&
2019           FormatTok->isOneOf(tok::period, tok::comma))) {
2020     if (Style.Language == FormatStyle::LK_JavaScript &&
2021         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2022       // JavaScript/TypeScript supports inline object types in
2023       // extends/implements positions:
2024       //     class Foo implements {bar: number} { }
2025       nextToken();
2026       if (FormatTok->is(tok::l_brace)) {
2027         tryToParseBracedList();
2028         continue;
2029       }
2030     }
2031     bool IsNonMacroIdentifier =
2032         FormatTok->is(tok::identifier) &&
2033         FormatTok->TokenText != FormatTok->TokenText.upper();
2034     nextToken();
2035     // We can have macros or attributes in between 'class' and the class name.
2036     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2037       parseParens();
2038   }
2039 
2040   // Note that parsing away template declarations here leads to incorrectly
2041   // accepting function declarations as record declarations.
2042   // In general, we cannot solve this problem. Consider:
2043   // class A<int> B() {}
2044   // which can be a function definition or a class definition when B() is a
2045   // macro. If we find enough real-world cases where this is a problem, we
2046   // can parse for the 'template' keyword in the beginning of the statement,
2047   // and thus rule out the record production in case there is no template
2048   // (this would still leave us with an ambiguity between template function
2049   // and class declarations).
2050   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2051     while (!eof()) {
2052       if (FormatTok->is(tok::l_brace)) {
2053         calculateBraceTypes(/*ExpectClassBody=*/true);
2054         if (!tryToParseBracedList())
2055           break;
2056       }
2057       if (FormatTok->Tok.is(tok::semi))
2058         return;
2059       nextToken();
2060     }
2061   }
2062   if (FormatTok->Tok.is(tok::l_brace)) {
2063     if (ParseAsExpr) {
2064       parseChildBlock();
2065     } else {
2066       if (ShouldBreakBeforeBrace(Style, InitialToken))
2067         addUnwrappedLine();
2068 
2069       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2070                  /*MunchSemi=*/false);
2071     }
2072   }
2073   // There is no addUnwrappedLine() here so that we fall through to parsing a
2074   // structural element afterwards. Thus, in "class A {} n, m;",
2075   // "} n, m;" will end up in one unwrapped line.
2076 }
2077 
2078 void UnwrappedLineParser::parseObjCProtocolList() {
2079   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2080   do
2081     nextToken();
2082   while (!eof() && FormatTok->Tok.isNot(tok::greater));
2083   nextToken(); // Skip '>'.
2084 }
2085 
2086 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2087   do {
2088     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2089       nextToken();
2090       addUnwrappedLine();
2091       break;
2092     }
2093     if (FormatTok->is(tok::l_brace)) {
2094       parseBlock(/*MustBeDeclaration=*/false);
2095       // In ObjC interfaces, nothing should be following the "}".
2096       addUnwrappedLine();
2097     } else if (FormatTok->is(tok::r_brace)) {
2098       // Ignore stray "}". parseStructuralElement doesn't consume them.
2099       nextToken();
2100       addUnwrappedLine();
2101     } else {
2102       parseStructuralElement();
2103     }
2104   } while (!eof());
2105 }
2106 
2107 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2108   nextToken();
2109   nextToken(); // interface name
2110 
2111   // @interface can be followed by either a base class, or a category.
2112   if (FormatTok->Tok.is(tok::colon)) {
2113     nextToken();
2114     nextToken(); // base class name
2115   } else if (FormatTok->Tok.is(tok::l_paren))
2116     // Skip category, if present.
2117     parseParens();
2118 
2119   if (FormatTok->Tok.is(tok::less))
2120     parseObjCProtocolList();
2121 
2122   if (FormatTok->Tok.is(tok::l_brace)) {
2123     if (Style.BraceWrapping.AfterObjCDeclaration)
2124       addUnwrappedLine();
2125     parseBlock(/*MustBeDeclaration=*/true);
2126   }
2127 
2128   // With instance variables, this puts '}' on its own line.  Without instance
2129   // variables, this ends the @interface line.
2130   addUnwrappedLine();
2131 
2132   parseObjCUntilAtEnd();
2133 }
2134 
2135 void UnwrappedLineParser::parseObjCProtocol() {
2136   nextToken();
2137   nextToken(); // protocol name
2138 
2139   if (FormatTok->Tok.is(tok::less))
2140     parseObjCProtocolList();
2141 
2142   // Check for protocol declaration.
2143   if (FormatTok->Tok.is(tok::semi)) {
2144     nextToken();
2145     return addUnwrappedLine();
2146   }
2147 
2148   addUnwrappedLine();
2149   parseObjCUntilAtEnd();
2150 }
2151 
2152 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2153   bool IsImport = FormatTok->is(Keywords.kw_import);
2154   assert(IsImport || FormatTok->is(tok::kw_export));
2155   nextToken();
2156 
2157   // Consume the "default" in "export default class/function".
2158   if (FormatTok->is(tok::kw_default))
2159     nextToken();
2160 
2161   // Consume "async function", "function" and "default function", so that these
2162   // get parsed as free-standing JS functions, i.e. do not require a trailing
2163   // semicolon.
2164   if (FormatTok->is(Keywords.kw_async))
2165     nextToken();
2166   if (FormatTok->is(Keywords.kw_function)) {
2167     nextToken();
2168     return;
2169   }
2170 
2171   // For imports, `export *`, `export {...}`, consume the rest of the line up
2172   // to the terminating `;`. For everything else, just return and continue
2173   // parsing the structural element, i.e. the declaration or expression for
2174   // `export default`.
2175   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2176       !FormatTok->isStringLiteral())
2177     return;
2178 
2179   while (!eof()) {
2180     if (FormatTok->is(tok::semi))
2181       return;
2182     if (Line->Tokens.empty()) {
2183       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2184       // import statement should terminate.
2185       return;
2186     }
2187     if (FormatTok->is(tok::l_brace)) {
2188       FormatTok->BlockKind = BK_Block;
2189       nextToken();
2190       parseBracedList();
2191     } else {
2192       nextToken();
2193     }
2194   }
2195 }
2196 
2197 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2198                                                  StringRef Prefix = "") {
2199   llvm::dbgs() << Prefix << "Line(" << Line.Level
2200                << ", FSC=" << Line.FirstStartColumn << ")"
2201                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2202   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2203                                                     E = Line.Tokens.end();
2204        I != E; ++I) {
2205     llvm::dbgs() << I->Tok->Tok.getName() << "["
2206                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2207                  << "] ";
2208   }
2209   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2210                                                     E = Line.Tokens.end();
2211        I != E; ++I) {
2212     const UnwrappedLineNode &Node = *I;
2213     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2214              I = Node.Children.begin(),
2215              E = Node.Children.end();
2216          I != E; ++I) {
2217       printDebugInfo(*I, "\nChild: ");
2218     }
2219   }
2220   llvm::dbgs() << "\n";
2221 }
2222 
2223 void UnwrappedLineParser::addUnwrappedLine() {
2224   if (Line->Tokens.empty())
2225     return;
2226   DEBUG({
2227     if (CurrentLines == &Lines)
2228       printDebugInfo(*Line);
2229   });
2230   CurrentLines->push_back(std::move(*Line));
2231   Line->Tokens.clear();
2232   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2233   Line->FirstStartColumn = 0;
2234   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2235     CurrentLines->append(
2236         std::make_move_iterator(PreprocessorDirectives.begin()),
2237         std::make_move_iterator(PreprocessorDirectives.end()));
2238     PreprocessorDirectives.clear();
2239   }
2240   // Disconnect the current token from the last token on the previous line.
2241   FormatTok->Previous = nullptr;
2242 }
2243 
2244 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2245 
2246 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2247   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2248          FormatTok.NewlinesBefore > 0;
2249 }
2250 
2251 // Checks if \p FormatTok is a line comment that continues the line comment
2252 // section on \p Line.
2253 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2254                                         const UnwrappedLine &Line,
2255                                         llvm::Regex &CommentPragmasRegex) {
2256   if (Line.Tokens.empty())
2257     return false;
2258 
2259   StringRef IndentContent = FormatTok.TokenText;
2260   if (FormatTok.TokenText.startswith("//") ||
2261       FormatTok.TokenText.startswith("/*"))
2262     IndentContent = FormatTok.TokenText.substr(2);
2263   if (CommentPragmasRegex.match(IndentContent))
2264     return false;
2265 
2266   // If Line starts with a line comment, then FormatTok continues the comment
2267   // section if its original column is greater or equal to the original start
2268   // column of the line.
2269   //
2270   // Define the min column token of a line as follows: if a line ends in '{' or
2271   // contains a '{' followed by a line comment, then the min column token is
2272   // that '{'. Otherwise, the min column token of the line is the first token of
2273   // the line.
2274   //
2275   // If Line starts with a token other than a line comment, then FormatTok
2276   // continues the comment section if its original column is greater than the
2277   // original start column of the min column token of the line.
2278   //
2279   // For example, the second line comment continues the first in these cases:
2280   //
2281   // // first line
2282   // // second line
2283   //
2284   // and:
2285   //
2286   // // first line
2287   //  // second line
2288   //
2289   // and:
2290   //
2291   // int i; // first line
2292   //  // second line
2293   //
2294   // and:
2295   //
2296   // do { // first line
2297   //      // second line
2298   //   int i;
2299   // } while (true);
2300   //
2301   // and:
2302   //
2303   // enum {
2304   //   a, // first line
2305   //    // second line
2306   //   b
2307   // };
2308   //
2309   // The second line comment doesn't continue the first in these cases:
2310   //
2311   //   // first line
2312   //  // second line
2313   //
2314   // and:
2315   //
2316   // int i; // first line
2317   // // second line
2318   //
2319   // and:
2320   //
2321   // do { // first line
2322   //   // second line
2323   //   int i;
2324   // } while (true);
2325   //
2326   // and:
2327   //
2328   // enum {
2329   //   a, // first line
2330   //   // second line
2331   // };
2332   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2333 
2334   // Scan for '{//'. If found, use the column of '{' as a min column for line
2335   // comment section continuation.
2336   const FormatToken *PreviousToken = nullptr;
2337   for (const UnwrappedLineNode &Node : Line.Tokens) {
2338     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2339         isLineComment(*Node.Tok)) {
2340       MinColumnToken = PreviousToken;
2341       break;
2342     }
2343     PreviousToken = Node.Tok;
2344 
2345     // Grab the last newline preceding a token in this unwrapped line.
2346     if (Node.Tok->NewlinesBefore > 0) {
2347       MinColumnToken = Node.Tok;
2348     }
2349   }
2350   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2351     MinColumnToken = PreviousToken;
2352   }
2353 
2354   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2355                               MinColumnToken);
2356 }
2357 
2358 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2359   bool JustComments = Line->Tokens.empty();
2360   for (SmallVectorImpl<FormatToken *>::const_iterator
2361            I = CommentsBeforeNextToken.begin(),
2362            E = CommentsBeforeNextToken.end();
2363        I != E; ++I) {
2364     // Line comments that belong to the same line comment section are put on the
2365     // same line since later we might want to reflow content between them.
2366     // Additional fine-grained breaking of line comment sections is controlled
2367     // by the class BreakableLineCommentSection in case it is desirable to keep
2368     // several line comment sections in the same unwrapped line.
2369     //
2370     // FIXME: Consider putting separate line comment sections as children to the
2371     // unwrapped line instead.
2372     (*I)->ContinuesLineCommentSection =
2373         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2374     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2375       addUnwrappedLine();
2376     pushToken(*I);
2377   }
2378   if (NewlineBeforeNext && JustComments)
2379     addUnwrappedLine();
2380   CommentsBeforeNextToken.clear();
2381 }
2382 
2383 void UnwrappedLineParser::nextToken(int LevelDifference) {
2384   if (eof())
2385     return;
2386   flushComments(isOnNewLine(*FormatTok));
2387   pushToken(FormatTok);
2388   FormatToken *Previous = FormatTok;
2389   if (Style.Language != FormatStyle::LK_JavaScript)
2390     readToken(LevelDifference);
2391   else
2392     readTokenWithJavaScriptASI();
2393   FormatTok->Previous = Previous;
2394 }
2395 
2396 void UnwrappedLineParser::distributeComments(
2397     const SmallVectorImpl<FormatToken *> &Comments,
2398     const FormatToken *NextTok) {
2399   // Whether or not a line comment token continues a line is controlled by
2400   // the method continuesLineCommentSection, with the following caveat:
2401   //
2402   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2403   // that each comment line from the trail is aligned with the next token, if
2404   // the next token exists. If a trail exists, the beginning of the maximal
2405   // trail is marked as a start of a new comment section.
2406   //
2407   // For example in this code:
2408   //
2409   // int a; // line about a
2410   //   // line 1 about b
2411   //   // line 2 about b
2412   //   int b;
2413   //
2414   // the two lines about b form a maximal trail, so there are two sections, the
2415   // first one consisting of the single comment "// line about a" and the
2416   // second one consisting of the next two comments.
2417   if (Comments.empty())
2418     return;
2419   bool ShouldPushCommentsInCurrentLine = true;
2420   bool HasTrailAlignedWithNextToken = false;
2421   unsigned StartOfTrailAlignedWithNextToken = 0;
2422   if (NextTok) {
2423     // We are skipping the first element intentionally.
2424     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2425       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2426         HasTrailAlignedWithNextToken = true;
2427         StartOfTrailAlignedWithNextToken = i;
2428       }
2429     }
2430   }
2431   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2432     FormatToken *FormatTok = Comments[i];
2433     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2434       FormatTok->ContinuesLineCommentSection = false;
2435     } else {
2436       FormatTok->ContinuesLineCommentSection =
2437           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2438     }
2439     if (!FormatTok->ContinuesLineCommentSection &&
2440         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2441       ShouldPushCommentsInCurrentLine = false;
2442     }
2443     if (ShouldPushCommentsInCurrentLine) {
2444       pushToken(FormatTok);
2445     } else {
2446       CommentsBeforeNextToken.push_back(FormatTok);
2447     }
2448   }
2449 }
2450 
2451 void UnwrappedLineParser::readToken(int LevelDifference) {
2452   SmallVector<FormatToken *, 1> Comments;
2453   do {
2454     FormatTok = Tokens->getNextToken();
2455     assert(FormatTok);
2456     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2457            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2458       distributeComments(Comments, FormatTok);
2459       Comments.clear();
2460       // If there is an unfinished unwrapped line, we flush the preprocessor
2461       // directives only after that unwrapped line was finished later.
2462       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2463       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2464       assert((LevelDifference >= 0 ||
2465               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2466              "LevelDifference makes Line->Level negative");
2467       Line->Level += LevelDifference;
2468       // Comments stored before the preprocessor directive need to be output
2469       // before the preprocessor directive, at the same level as the
2470       // preprocessor directive, as we consider them to apply to the directive.
2471       flushComments(isOnNewLine(*FormatTok));
2472       parsePPDirective();
2473     }
2474     while (FormatTok->Type == TT_ConflictStart ||
2475            FormatTok->Type == TT_ConflictEnd ||
2476            FormatTok->Type == TT_ConflictAlternative) {
2477       if (FormatTok->Type == TT_ConflictStart) {
2478         conditionalCompilationStart(/*Unreachable=*/false);
2479       } else if (FormatTok->Type == TT_ConflictAlternative) {
2480         conditionalCompilationAlternative();
2481       } else if (FormatTok->Type == TT_ConflictEnd) {
2482         conditionalCompilationEnd();
2483       }
2484       FormatTok = Tokens->getNextToken();
2485       FormatTok->MustBreakBefore = true;
2486     }
2487 
2488     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2489         !Line->InPPDirective) {
2490       continue;
2491     }
2492 
2493     if (!FormatTok->Tok.is(tok::comment)) {
2494       distributeComments(Comments, FormatTok);
2495       Comments.clear();
2496       return;
2497     }
2498 
2499     Comments.push_back(FormatTok);
2500   } while (!eof());
2501 
2502   distributeComments(Comments, nullptr);
2503   Comments.clear();
2504 }
2505 
2506 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2507   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2508   if (MustBreakBeforeNextToken) {
2509     Line->Tokens.back().Tok->MustBreakBefore = true;
2510     MustBreakBeforeNextToken = false;
2511   }
2512 }
2513 
2514 } // end namespace format
2515 } // end namespace clang
2516