1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59   return FormatTok.is(tok::comment) &&
60          FormatTok.TokenText.startswith("//");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     TokenSource = this;
86     Line.Level = 0;
87     Line.InPPDirective = true;
88   }
89 
90   ~ScopedMacroState() override {
91     TokenSource = PreviousTokenSource;
92     ResetToken = Token;
93     Line.InPPDirective = false;
94     Line.Level = PreviousLineLevel;
95   }
96 
97   FormatToken *getNextToken() override {
98     // The \c UnwrappedLineParser guards against this by never calling
99     // \c getNextToken() after it has encountered the first eof token.
100     assert(!eof());
101     PreviousToken = Token;
102     Token = PreviousTokenSource->getNextToken();
103     if (eof())
104       return getFakeEOF();
105     return Token;
106   }
107 
108   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
109 
110   FormatToken *setPosition(unsigned Position) override {
111     PreviousToken = nullptr;
112     Token = PreviousTokenSource->setPosition(Position);
113     return Token;
114   }
115 
116 private:
117   bool eof() {
118     return Token && Token->HasUnescapedNewline &&
119            !continuesLineComment(*Token, PreviousToken,
120                                  /*MinColumnToken=*/PreviousToken);
121   }
122 
123   FormatToken *getFakeEOF() {
124     static bool EOFInitialized = false;
125     static FormatToken FormatTok;
126     if (!EOFInitialized) {
127       FormatTok.Tok.startToken();
128       FormatTok.Tok.setKind(tok::eof);
129       EOFInitialized = true;
130     }
131     return &FormatTok;
132   }
133 
134   UnwrappedLine &Line;
135   FormatTokenSource *&TokenSource;
136   FormatToken *&ResetToken;
137   unsigned PreviousLineLevel;
138   FormatTokenSource *PreviousTokenSource;
139 
140   FormatToken *Token;
141   FormatToken *PreviousToken;
142 };
143 
144 } // end anonymous namespace
145 
146 class ScopedLineState {
147 public:
148   ScopedLineState(UnwrappedLineParser &Parser,
149                   bool SwitchToPreprocessorLines = false)
150       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
151     if (SwitchToPreprocessorLines)
152       Parser.CurrentLines = &Parser.PreprocessorDirectives;
153     else if (!Parser.Line->Tokens.empty())
154       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
155     PreBlockLine = std::move(Parser.Line);
156     Parser.Line = llvm::make_unique<UnwrappedLine>();
157     Parser.Line->Level = PreBlockLine->Level;
158     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
159   }
160 
161   ~ScopedLineState() {
162     if (!Parser.Line->Tokens.empty()) {
163       Parser.addUnwrappedLine();
164     }
165     assert(Parser.Line->Tokens.empty());
166     Parser.Line = std::move(PreBlockLine);
167     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
168       Parser.MustBreakBeforeNextToken = true;
169     Parser.CurrentLines = OriginalLines;
170   }
171 
172 private:
173   UnwrappedLineParser &Parser;
174 
175   std::unique_ptr<UnwrappedLine> PreBlockLine;
176   SmallVectorImpl<UnwrappedLine> *OriginalLines;
177 };
178 
179 class CompoundStatementIndenter {
180 public:
181   CompoundStatementIndenter(UnwrappedLineParser *Parser,
182                             const FormatStyle &Style, unsigned &LineLevel)
183       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
184     if (Style.BraceWrapping.AfterControlStatement)
185       Parser->addUnwrappedLine();
186     if (Style.BraceWrapping.IndentBraces)
187       ++LineLevel;
188   }
189   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
190 
191 private:
192   unsigned &LineLevel;
193   unsigned OldLineLevel;
194 };
195 
196 namespace {
197 
198 class IndexedTokenSource : public FormatTokenSource {
199 public:
200   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
201       : Tokens(Tokens), Position(-1) {}
202 
203   FormatToken *getNextToken() override {
204     ++Position;
205     return Tokens[Position];
206   }
207 
208   unsigned getPosition() override {
209     assert(Position >= 0);
210     return Position;
211   }
212 
213   FormatToken *setPosition(unsigned P) override {
214     Position = P;
215     return Tokens[Position];
216   }
217 
218   void reset() { Position = -1; }
219 
220 private:
221   ArrayRef<FormatToken *> Tokens;
222   int Position;
223 };
224 
225 } // end anonymous namespace
226 
227 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
228                                          const AdditionalKeywords &Keywords,
229                                          ArrayRef<FormatToken *> Tokens,
230                                          UnwrappedLineConsumer &Callback)
231     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
232       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
233       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
234       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
235       IfNdefCondition(nullptr), FoundIncludeGuardStart(false),
236       IncludeGuardRejected(false) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IfNdefCondition = nullptr;
241   FoundIncludeGuardStart = false;
242   IncludeGuardRejected = false;
243   Line.reset(new UnwrappedLine);
244   CommentsBeforeNextToken.clear();
245   FormatTok = nullptr;
246   MustBreakBeforeNextToken = false;
247   PreprocessorDirectives.clear();
248   CurrentLines = &Lines;
249   DeclarationScopeStack.clear();
250   PPStack.clear();
251 }
252 
253 void UnwrappedLineParser::parse() {
254   IndexedTokenSource TokenSource(AllTokens);
255   do {
256     DEBUG(llvm::dbgs() << "----\n");
257     reset();
258     Tokens = &TokenSource;
259     TokenSource.reset();
260 
261     readToken();
262     parseFile();
263     // Create line with eof token.
264     pushToken(FormatTok);
265     addUnwrappedLine();
266 
267     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
268                                                   E = Lines.end();
269          I != E; ++I) {
270       Callback.consumeUnwrappedLine(*I);
271     }
272     Callback.finishRun();
273     Lines.clear();
274     while (!PPLevelBranchIndex.empty() &&
275            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
276       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
277       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
278     }
279     if (!PPLevelBranchIndex.empty()) {
280       ++PPLevelBranchIndex.back();
281       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
282       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
283     }
284   } while (!PPLevelBranchIndex.empty());
285 }
286 
287 void UnwrappedLineParser::parseFile() {
288   // The top-level context in a file always has declarations, except for pre-
289   // processor directives and JavaScript files.
290   bool MustBeDeclaration =
291       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
292   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
293                                           MustBeDeclaration);
294   if (Style.Language == FormatStyle::LK_TextProto)
295     parseBracedList();
296   else
297     parseLevel(/*HasOpeningBrace=*/false);
298   // Make sure to format the remaining tokens.
299   flushComments(true);
300   addUnwrappedLine();
301 }
302 
303 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
304   bool SwitchLabelEncountered = false;
305   do {
306     tok::TokenKind kind = FormatTok->Tok.getKind();
307     if (FormatTok->Type == TT_MacroBlockBegin) {
308       kind = tok::l_brace;
309     } else if (FormatTok->Type == TT_MacroBlockEnd) {
310       kind = tok::r_brace;
311     }
312 
313     switch (kind) {
314     case tok::comment:
315       nextToken();
316       addUnwrappedLine();
317       break;
318     case tok::l_brace:
319       // FIXME: Add parameter whether this can happen - if this happens, we must
320       // be in a non-declaration context.
321       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
322         continue;
323       parseBlock(/*MustBeDeclaration=*/false);
324       addUnwrappedLine();
325       break;
326     case tok::r_brace:
327       if (HasOpeningBrace)
328         return;
329       nextToken();
330       addUnwrappedLine();
331       break;
332     case tok::kw_default:
333     case tok::kw_case:
334       if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) {
335         // A 'case: string' style field declaration.
336         parseStructuralElement();
337         break;
338       }
339       if (!SwitchLabelEncountered &&
340           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
341         ++Line->Level;
342       SwitchLabelEncountered = true;
343       parseStructuralElement();
344       break;
345     default:
346       parseStructuralElement();
347       break;
348     }
349   } while (!eof());
350 }
351 
352 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
353   // We'll parse forward through the tokens until we hit
354   // a closing brace or eof - note that getNextToken() will
355   // parse macros, so this will magically work inside macro
356   // definitions, too.
357   unsigned StoredPosition = Tokens->getPosition();
358   FormatToken *Tok = FormatTok;
359   const FormatToken *PrevTok = getPreviousToken();
360   // Keep a stack of positions of lbrace tokens. We will
361   // update information about whether an lbrace starts a
362   // braced init list or a different block during the loop.
363   SmallVector<FormatToken *, 8> LBraceStack;
364   assert(Tok->Tok.is(tok::l_brace));
365   do {
366     // Get next non-comment token.
367     FormatToken *NextTok;
368     unsigned ReadTokens = 0;
369     do {
370       NextTok = Tokens->getNextToken();
371       ++ReadTokens;
372     } while (NextTok->is(tok::comment));
373 
374     switch (Tok->Tok.getKind()) {
375     case tok::l_brace:
376       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
377         if (PrevTok->is(tok::colon))
378           // A colon indicates this code is in a type, or a braced list
379           // following a label in an object literal ({a: {b: 1}}). The code
380           // below could be confused by semicolons between the individual
381           // members in a type member list, which would normally trigger
382           // BK_Block. In both cases, this must be parsed as an inline braced
383           // init.
384           Tok->BlockKind = BK_BracedInit;
385         else if (PrevTok->is(tok::r_paren))
386           // `) { }` can only occur in function or method declarations in JS.
387           Tok->BlockKind = BK_Block;
388       } else {
389         Tok->BlockKind = BK_Unknown;
390       }
391       LBraceStack.push_back(Tok);
392       break;
393     case tok::r_brace:
394       if (LBraceStack.empty())
395         break;
396       if (LBraceStack.back()->BlockKind == BK_Unknown) {
397         bool ProbablyBracedList = false;
398         if (Style.Language == FormatStyle::LK_Proto) {
399           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
400         } else {
401           // Using OriginalColumn to distinguish between ObjC methods and
402           // binary operators is a bit hacky.
403           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
404                                   NextTok->OriginalColumn == 0;
405 
406           // If there is a comma, semicolon or right paren after the closing
407           // brace, we assume this is a braced initializer list.  Note that
408           // regardless how we mark inner braces here, we will overwrite the
409           // BlockKind later if we parse a braced list (where all blocks
410           // inside are by default braced lists), or when we explicitly detect
411           // blocks (for example while parsing lambdas).
412           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
413           // braced list in JS.
414           ProbablyBracedList =
415               (Style.Language == FormatStyle::LK_JavaScript &&
416                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
417                                 Keywords.kw_as)) ||
418               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
419               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
420                                tok::r_paren, tok::r_square, tok::l_brace,
421                                tok::l_square, tok::ellipsis) ||
422               (NextTok->is(tok::identifier) &&
423                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
424               (NextTok->is(tok::semi) &&
425                (!ExpectClassBody || LBraceStack.size() != 1)) ||
426               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
427         }
428         if (ProbablyBracedList) {
429           Tok->BlockKind = BK_BracedInit;
430           LBraceStack.back()->BlockKind = BK_BracedInit;
431         } else {
432           Tok->BlockKind = BK_Block;
433           LBraceStack.back()->BlockKind = BK_Block;
434         }
435       }
436       LBraceStack.pop_back();
437       break;
438     case tok::at:
439     case tok::semi:
440     case tok::kw_if:
441     case tok::kw_while:
442     case tok::kw_for:
443     case tok::kw_switch:
444     case tok::kw_try:
445     case tok::kw___try:
446       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
447         LBraceStack.back()->BlockKind = BK_Block;
448       break;
449     default:
450       break;
451     }
452     PrevTok = Tok;
453     Tok = NextTok;
454   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
455 
456   // Assume other blocks for all unclosed opening braces.
457   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
458     if (LBraceStack[i]->BlockKind == BK_Unknown)
459       LBraceStack[i]->BlockKind = BK_Block;
460   }
461 
462   FormatTok = Tokens->setPosition(StoredPosition);
463 }
464 
465 template <class T>
466 static inline void hash_combine(std::size_t &seed, const T &v) {
467   std::hash<T> hasher;
468   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
469 }
470 
471 size_t UnwrappedLineParser::computePPHash() const {
472   size_t h = 0;
473   for (const auto &i : PPStack) {
474     hash_combine(h, size_t(i.Kind));
475     hash_combine(h, i.Line);
476   }
477   return h;
478 }
479 
480 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
481                                      bool MunchSemi) {
482   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
483          "'{' or macro block token expected");
484   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
485   FormatTok->BlockKind = BK_Block;
486 
487   size_t PPStartHash = computePPHash();
488 
489   unsigned InitialLevel = Line->Level;
490   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
491 
492   if (MacroBlock && FormatTok->is(tok::l_paren))
493     parseParens();
494 
495   size_t NbPreprocessorDirectives =
496       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
497   addUnwrappedLine();
498   size_t OpeningLineIndex =
499       CurrentLines->empty()
500           ? (UnwrappedLine::kInvalidIndex)
501           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
502 
503   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
504                                           MustBeDeclaration);
505   if (AddLevel)
506     ++Line->Level;
507   parseLevel(/*HasOpeningBrace=*/true);
508 
509   if (eof())
510     return;
511 
512   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
513                  : !FormatTok->is(tok::r_brace)) {
514     Line->Level = InitialLevel;
515     FormatTok->BlockKind = BK_Block;
516     return;
517   }
518 
519   size_t PPEndHash = computePPHash();
520 
521   // Munch the closing brace.
522   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
523 
524   if (MacroBlock && FormatTok->is(tok::l_paren))
525     parseParens();
526 
527   if (MunchSemi && FormatTok->Tok.is(tok::semi))
528     nextToken();
529   Line->Level = InitialLevel;
530 
531   if (PPStartHash == PPEndHash) {
532     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
533     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
534       // Update the opening line to add the forward reference as well
535       (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
536           CurrentLines->size() - 1;
537     }
538   }
539 }
540 
541 static bool isGoogScope(const UnwrappedLine &Line) {
542   // FIXME: Closure-library specific stuff should not be hard-coded but be
543   // configurable.
544   if (Line.Tokens.size() < 4)
545     return false;
546   auto I = Line.Tokens.begin();
547   if (I->Tok->TokenText != "goog")
548     return false;
549   ++I;
550   if (I->Tok->isNot(tok::period))
551     return false;
552   ++I;
553   if (I->Tok->TokenText != "scope")
554     return false;
555   ++I;
556   return I->Tok->is(tok::l_paren);
557 }
558 
559 static bool isIIFE(const UnwrappedLine &Line,
560                    const AdditionalKeywords &Keywords) {
561   // Look for the start of an immediately invoked anonymous function.
562   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
563   // This is commonly done in JavaScript to create a new, anonymous scope.
564   // Example: (function() { ... })()
565   if (Line.Tokens.size() < 3)
566     return false;
567   auto I = Line.Tokens.begin();
568   if (I->Tok->isNot(tok::l_paren))
569     return false;
570   ++I;
571   if (I->Tok->isNot(Keywords.kw_function))
572     return false;
573   ++I;
574   return I->Tok->is(tok::l_paren);
575 }
576 
577 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
578                                    const FormatToken &InitialToken) {
579   if (InitialToken.is(tok::kw_namespace))
580     return Style.BraceWrapping.AfterNamespace;
581   if (InitialToken.is(tok::kw_class))
582     return Style.BraceWrapping.AfterClass;
583   if (InitialToken.is(tok::kw_union))
584     return Style.BraceWrapping.AfterUnion;
585   if (InitialToken.is(tok::kw_struct))
586     return Style.BraceWrapping.AfterStruct;
587   return false;
588 }
589 
590 void UnwrappedLineParser::parseChildBlock() {
591   FormatTok->BlockKind = BK_Block;
592   nextToken();
593   {
594     bool SkipIndent =
595         (Style.Language == FormatStyle::LK_JavaScript &&
596          (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
597     ScopedLineState LineState(*this);
598     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
599                                             /*MustBeDeclaration=*/false);
600     Line->Level += SkipIndent ? 0 : 1;
601     parseLevel(/*HasOpeningBrace=*/true);
602     flushComments(isOnNewLine(*FormatTok));
603     Line->Level -= SkipIndent ? 0 : 1;
604   }
605   nextToken();
606 }
607 
608 void UnwrappedLineParser::parsePPDirective() {
609   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
610   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
611   nextToken();
612 
613   if (!FormatTok->Tok.getIdentifierInfo()) {
614     parsePPUnknown();
615     return;
616   }
617 
618   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
619   case tok::pp_define:
620     parsePPDefine();
621     return;
622   case tok::pp_if:
623     parsePPIf(/*IfDef=*/false);
624     break;
625   case tok::pp_ifdef:
626   case tok::pp_ifndef:
627     parsePPIf(/*IfDef=*/true);
628     break;
629   case tok::pp_else:
630     parsePPElse();
631     break;
632   case tok::pp_elif:
633     parsePPElIf();
634     break;
635   case tok::pp_endif:
636     parsePPEndIf();
637     break;
638   default:
639     parsePPUnknown();
640     break;
641   }
642 }
643 
644 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
645   size_t Line = CurrentLines->size();
646   if (CurrentLines == &PreprocessorDirectives)
647     Line += Lines.size();
648 
649   if (Unreachable ||
650       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
651     PPStack.push_back({PP_Unreachable, Line});
652   else
653     PPStack.push_back({PP_Conditional, Line});
654 }
655 
656 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
657   ++PPBranchLevel;
658   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
659   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
660     PPLevelBranchIndex.push_back(0);
661     PPLevelBranchCount.push_back(0);
662   }
663   PPChainBranchIndex.push(0);
664   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
665   conditionalCompilationCondition(Unreachable || Skip);
666 }
667 
668 void UnwrappedLineParser::conditionalCompilationAlternative() {
669   if (!PPStack.empty())
670     PPStack.pop_back();
671   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
672   if (!PPChainBranchIndex.empty())
673     ++PPChainBranchIndex.top();
674   conditionalCompilationCondition(
675       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
676       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
677 }
678 
679 void UnwrappedLineParser::conditionalCompilationEnd() {
680   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
681   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
682     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
683       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
684     }
685   }
686   // Guard against #endif's without #if.
687   if (PPBranchLevel > -1)
688     --PPBranchLevel;
689   if (!PPChainBranchIndex.empty())
690     PPChainBranchIndex.pop();
691   if (!PPStack.empty())
692     PPStack.pop_back();
693 }
694 
695 void UnwrappedLineParser::parsePPIf(bool IfDef) {
696   bool IfNDef = FormatTok->is(tok::pp_ifndef);
697   nextToken();
698   bool Unreachable = false;
699   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
700     Unreachable = true;
701   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
702     Unreachable = true;
703   conditionalCompilationStart(Unreachable);
704   FormatToken *IfCondition = FormatTok;
705   // If there's a #ifndef on the first line, and the only lines before it are
706   // comments, it could be an include guard.
707   bool MaybeIncludeGuard = IfNDef;
708   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {
709     for (auto &Line : Lines) {
710       if (!Line.Tokens.front().Tok->is(tok::comment)) {
711         MaybeIncludeGuard = false;
712         IncludeGuardRejected = true;
713         break;
714       }
715     }
716   }
717   --PPBranchLevel;
718   parsePPUnknown();
719   ++PPBranchLevel;
720   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)
721     IfNdefCondition = IfCondition;
722 }
723 
724 void UnwrappedLineParser::parsePPElse() {
725   // If a potential include guard has an #else, it's not an include guard.
726   if (FoundIncludeGuardStart && PPBranchLevel == 0)
727     FoundIncludeGuardStart = false;
728   conditionalCompilationAlternative();
729   if (PPBranchLevel > -1)
730     --PPBranchLevel;
731   parsePPUnknown();
732   ++PPBranchLevel;
733 }
734 
735 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
736 
737 void UnwrappedLineParser::parsePPEndIf() {
738   conditionalCompilationEnd();
739   parsePPUnknown();
740   // If the #endif of a potential include guard is the last thing in the file,
741   // then we count it as a real include guard and subtract one from every
742   // preprocessor indent.
743   unsigned TokenPosition = Tokens->getPosition();
744   FormatToken *PeekNext = AllTokens[TokenPosition];
745   if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof)) {
746     for (auto &Line : Lines) {
747       if (Line.InPPDirective && Line.Level > 0)
748         --Line.Level;
749     }
750   }
751 }
752 
753 void UnwrappedLineParser::parsePPDefine() {
754   nextToken();
755 
756   if (FormatTok->Tok.getKind() != tok::identifier) {
757     parsePPUnknown();
758     return;
759   }
760   if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {
761     FoundIncludeGuardStart = true;
762     for (auto &Line : Lines) {
763       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
764         FoundIncludeGuardStart = false;
765         break;
766       }
767     }
768   }
769   IfNdefCondition = nullptr;
770   nextToken();
771   if (FormatTok->Tok.getKind() == tok::l_paren &&
772       FormatTok->WhitespaceRange.getBegin() ==
773           FormatTok->WhitespaceRange.getEnd()) {
774     parseParens();
775   }
776   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
777     Line->Level += PPBranchLevel + 1;
778   addUnwrappedLine();
779   ++Line->Level;
780 
781   // Errors during a preprocessor directive can only affect the layout of the
782   // preprocessor directive, and thus we ignore them. An alternative approach
783   // would be to use the same approach we use on the file level (no
784   // re-indentation if there was a structural error) within the macro
785   // definition.
786   parseFile();
787 }
788 
789 void UnwrappedLineParser::parsePPUnknown() {
790   do {
791     nextToken();
792   } while (!eof());
793   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
794     Line->Level += PPBranchLevel + 1;
795   addUnwrappedLine();
796   IfNdefCondition = nullptr;
797 }
798 
799 // Here we blacklist certain tokens that are not usually the first token in an
800 // unwrapped line. This is used in attempt to distinguish macro calls without
801 // trailing semicolons from other constructs split to several lines.
802 static bool tokenCanStartNewLine(const clang::Token &Tok) {
803   // Semicolon can be a null-statement, l_square can be a start of a macro or
804   // a C++11 attribute, but this doesn't seem to be common.
805   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
806          Tok.isNot(tok::l_square) &&
807          // Tokens that can only be used as binary operators and a part of
808          // overloaded operator names.
809          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
810          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
811          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
812          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
813          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
814          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
815          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
816          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
817          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
818          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
819          Tok.isNot(tok::lesslessequal) &&
820          // Colon is used in labels, base class lists, initializer lists,
821          // range-based for loops, ternary operator, but should never be the
822          // first token in an unwrapped line.
823          Tok.isNot(tok::colon) &&
824          // 'noexcept' is a trailing annotation.
825          Tok.isNot(tok::kw_noexcept);
826 }
827 
828 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
829                           const FormatToken *FormatTok) {
830   // FIXME: This returns true for C/C++ keywords like 'struct'.
831   return FormatTok->is(tok::identifier) &&
832          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
833           !FormatTok->isOneOf(
834               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
835               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
836               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
837               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
838               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
839               Keywords.kw_instanceof, Keywords.kw_interface,
840               Keywords.kw_throws, Keywords.kw_from));
841 }
842 
843 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
844                                  const FormatToken *FormatTok) {
845   return FormatTok->Tok.isLiteral() ||
846          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
847          mustBeJSIdent(Keywords, FormatTok);
848 }
849 
850 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
851 // when encountered after a value (see mustBeJSIdentOrValue).
852 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
853                            const FormatToken *FormatTok) {
854   return FormatTok->isOneOf(
855       tok::kw_return, Keywords.kw_yield,
856       // conditionals
857       tok::kw_if, tok::kw_else,
858       // loops
859       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
860       // switch/case
861       tok::kw_switch, tok::kw_case,
862       // exceptions
863       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
864       // declaration
865       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
866       Keywords.kw_async, Keywords.kw_function,
867       // import/export
868       Keywords.kw_import, tok::kw_export);
869 }
870 
871 // readTokenWithJavaScriptASI reads the next token and terminates the current
872 // line if JavaScript Automatic Semicolon Insertion must
873 // happen between the current token and the next token.
874 //
875 // This method is conservative - it cannot cover all edge cases of JavaScript,
876 // but only aims to correctly handle certain well known cases. It *must not*
877 // return true in speculative cases.
878 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
879   FormatToken *Previous = FormatTok;
880   readToken();
881   FormatToken *Next = FormatTok;
882 
883   bool IsOnSameLine =
884       CommentsBeforeNextToken.empty()
885           ? Next->NewlinesBefore == 0
886           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
887   if (IsOnSameLine)
888     return;
889 
890   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
891   bool PreviousStartsTemplateExpr =
892       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
893   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
894     // If the token before the previous one is an '@', the previous token is an
895     // annotation and can precede another identifier/value.
896     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
897     if (PrePrevious->is(tok::at))
898       return;
899   }
900   if (Next->is(tok::exclaim) && PreviousMustBeValue)
901     return addUnwrappedLine();
902   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
903   bool NextEndsTemplateExpr =
904       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
905   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
906       (PreviousMustBeValue ||
907        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
908                          tok::minusminus)))
909     return addUnwrappedLine();
910   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
911       isJSDeclOrStmt(Keywords, Next))
912     return addUnwrappedLine();
913 }
914 
915 void UnwrappedLineParser::parseStructuralElement() {
916   assert(!FormatTok->is(tok::l_brace));
917   if (Style.Language == FormatStyle::LK_TableGen &&
918       FormatTok->is(tok::pp_include)) {
919     nextToken();
920     if (FormatTok->is(tok::string_literal))
921       nextToken();
922     addUnwrappedLine();
923     return;
924   }
925   switch (FormatTok->Tok.getKind()) {
926   case tok::at:
927     nextToken();
928     if (FormatTok->Tok.is(tok::l_brace)) {
929       nextToken();
930       parseBracedList();
931       break;
932     }
933     switch (FormatTok->Tok.getObjCKeywordID()) {
934     case tok::objc_public:
935     case tok::objc_protected:
936     case tok::objc_package:
937     case tok::objc_private:
938       return parseAccessSpecifier();
939     case tok::objc_interface:
940     case tok::objc_implementation:
941       return parseObjCInterfaceOrImplementation();
942     case tok::objc_protocol:
943       return parseObjCProtocol();
944     case tok::objc_end:
945       return; // Handled by the caller.
946     case tok::objc_optional:
947     case tok::objc_required:
948       nextToken();
949       addUnwrappedLine();
950       return;
951     case tok::objc_autoreleasepool:
952       nextToken();
953       if (FormatTok->Tok.is(tok::l_brace)) {
954         if (Style.BraceWrapping.AfterObjCDeclaration)
955           addUnwrappedLine();
956         parseBlock(/*MustBeDeclaration=*/false);
957       }
958       addUnwrappedLine();
959       return;
960     case tok::objc_try:
961       // This branch isn't strictly necessary (the kw_try case below would
962       // do this too after the tok::at is parsed above).  But be explicit.
963       parseTryCatch();
964       return;
965     default:
966       break;
967     }
968     break;
969   case tok::kw_asm:
970     nextToken();
971     if (FormatTok->is(tok::l_brace)) {
972       FormatTok->Type = TT_InlineASMBrace;
973       nextToken();
974       while (FormatTok && FormatTok->isNot(tok::eof)) {
975         if (FormatTok->is(tok::r_brace)) {
976           FormatTok->Type = TT_InlineASMBrace;
977           nextToken();
978           addUnwrappedLine();
979           break;
980         }
981         FormatTok->Finalized = true;
982         nextToken();
983       }
984     }
985     break;
986   case tok::kw_namespace:
987     parseNamespace();
988     return;
989   case tok::kw_inline:
990     nextToken();
991     if (FormatTok->Tok.is(tok::kw_namespace)) {
992       parseNamespace();
993       return;
994     }
995     break;
996   case tok::kw_public:
997   case tok::kw_protected:
998   case tok::kw_private:
999     if (Style.Language == FormatStyle::LK_Java ||
1000         Style.Language == FormatStyle::LK_JavaScript)
1001       nextToken();
1002     else
1003       parseAccessSpecifier();
1004     return;
1005   case tok::kw_if:
1006     parseIfThenElse();
1007     return;
1008   case tok::kw_for:
1009   case tok::kw_while:
1010     parseForOrWhileLoop();
1011     return;
1012   case tok::kw_do:
1013     parseDoWhile();
1014     return;
1015   case tok::kw_switch:
1016     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1017       // 'switch: string' field declaration.
1018       break;
1019     parseSwitch();
1020     return;
1021   case tok::kw_default:
1022     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1023       // 'default: string' field declaration.
1024       break;
1025     nextToken();
1026     parseLabel();
1027     return;
1028   case tok::kw_case:
1029     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1030       // 'case: string' field declaration.
1031       break;
1032     parseCaseLabel();
1033     return;
1034   case tok::kw_try:
1035   case tok::kw___try:
1036     parseTryCatch();
1037     return;
1038   case tok::kw_extern:
1039     nextToken();
1040     if (FormatTok->Tok.is(tok::string_literal)) {
1041       nextToken();
1042       if (FormatTok->Tok.is(tok::l_brace)) {
1043         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1044         addUnwrappedLine();
1045         return;
1046       }
1047     }
1048     break;
1049   case tok::kw_export:
1050     if (Style.Language == FormatStyle::LK_JavaScript) {
1051       parseJavaScriptEs6ImportExport();
1052       return;
1053     }
1054     break;
1055   case tok::identifier:
1056     if (FormatTok->is(TT_ForEachMacro)) {
1057       parseForOrWhileLoop();
1058       return;
1059     }
1060     if (FormatTok->is(TT_MacroBlockBegin)) {
1061       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1062                  /*MunchSemi=*/false);
1063       return;
1064     }
1065     if (FormatTok->is(Keywords.kw_import)) {
1066       if (Style.Language == FormatStyle::LK_JavaScript) {
1067         parseJavaScriptEs6ImportExport();
1068         return;
1069       }
1070       if (Style.Language == FormatStyle::LK_Proto) {
1071         nextToken();
1072         if (FormatTok->is(tok::kw_public))
1073           nextToken();
1074         if (!FormatTok->is(tok::string_literal))
1075           return;
1076         nextToken();
1077         if (FormatTok->is(tok::semi))
1078           nextToken();
1079         addUnwrappedLine();
1080         return;
1081       }
1082     }
1083     if (Style.isCpp() &&
1084         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1085                            Keywords.kw_slots, Keywords.kw_qslots)) {
1086       nextToken();
1087       if (FormatTok->is(tok::colon)) {
1088         nextToken();
1089         addUnwrappedLine();
1090         return;
1091       }
1092     }
1093     // In all other cases, parse the declaration.
1094     break;
1095   default:
1096     break;
1097   }
1098   do {
1099     const FormatToken *Previous = getPreviousToken();
1100     switch (FormatTok->Tok.getKind()) {
1101     case tok::at:
1102       nextToken();
1103       if (FormatTok->Tok.is(tok::l_brace)) {
1104         nextToken();
1105         parseBracedList();
1106       }
1107       break;
1108     case tok::kw_enum:
1109       // Ignore if this is part of "template <enum ...".
1110       if (Previous && Previous->is(tok::less)) {
1111         nextToken();
1112         break;
1113       }
1114 
1115       // parseEnum falls through and does not yet add an unwrapped line as an
1116       // enum definition can start a structural element.
1117       if (!parseEnum())
1118         break;
1119       // This only applies for C++.
1120       if (!Style.isCpp()) {
1121         addUnwrappedLine();
1122         return;
1123       }
1124       break;
1125     case tok::kw_typedef:
1126       nextToken();
1127       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1128                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1129         parseEnum();
1130       break;
1131     case tok::kw_struct:
1132     case tok::kw_union:
1133     case tok::kw_class:
1134       // parseRecord falls through and does not yet add an unwrapped line as a
1135       // record declaration or definition can start a structural element.
1136       parseRecord();
1137       // This does not apply for Java and JavaScript.
1138       if (Style.Language == FormatStyle::LK_Java ||
1139           Style.Language == FormatStyle::LK_JavaScript) {
1140         if (FormatTok->is(tok::semi))
1141           nextToken();
1142         addUnwrappedLine();
1143         return;
1144       }
1145       break;
1146     case tok::period:
1147       nextToken();
1148       // In Java, classes have an implicit static member "class".
1149       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1150           FormatTok->is(tok::kw_class))
1151         nextToken();
1152       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1153           FormatTok->Tok.getIdentifierInfo())
1154         // JavaScript only has pseudo keywords, all keywords are allowed to
1155         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1156         nextToken();
1157       break;
1158     case tok::semi:
1159       nextToken();
1160       addUnwrappedLine();
1161       return;
1162     case tok::r_brace:
1163       addUnwrappedLine();
1164       return;
1165     case tok::l_paren:
1166       parseParens();
1167       break;
1168     case tok::kw_operator:
1169       nextToken();
1170       if (FormatTok->isBinaryOperator())
1171         nextToken();
1172       break;
1173     case tok::caret:
1174       nextToken();
1175       if (FormatTok->Tok.isAnyIdentifier() ||
1176           FormatTok->isSimpleTypeSpecifier())
1177         nextToken();
1178       if (FormatTok->is(tok::l_paren))
1179         parseParens();
1180       if (FormatTok->is(tok::l_brace))
1181         parseChildBlock();
1182       break;
1183     case tok::l_brace:
1184       if (!tryToParseBracedList()) {
1185         // A block outside of parentheses must be the last part of a
1186         // structural element.
1187         // FIXME: Figure out cases where this is not true, and add projections
1188         // for them (the one we know is missing are lambdas).
1189         if (Style.BraceWrapping.AfterFunction)
1190           addUnwrappedLine();
1191         FormatTok->Type = TT_FunctionLBrace;
1192         parseBlock(/*MustBeDeclaration=*/false);
1193         addUnwrappedLine();
1194         return;
1195       }
1196       // Otherwise this was a braced init list, and the structural
1197       // element continues.
1198       break;
1199     case tok::kw_try:
1200       // We arrive here when parsing function-try blocks.
1201       parseTryCatch();
1202       return;
1203     case tok::identifier: {
1204       if (FormatTok->is(TT_MacroBlockEnd)) {
1205         addUnwrappedLine();
1206         return;
1207       }
1208 
1209       // Function declarations (as opposed to function expressions) are parsed
1210       // on their own unwrapped line by continuing this loop. Function
1211       // expressions (functions that are not on their own line) must not create
1212       // a new unwrapped line, so they are special cased below.
1213       size_t TokenCount = Line->Tokens.size();
1214       if (Style.Language == FormatStyle::LK_JavaScript &&
1215           FormatTok->is(Keywords.kw_function) &&
1216           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1217                                                      Keywords.kw_async)))) {
1218         tryToParseJSFunction();
1219         break;
1220       }
1221       if ((Style.Language == FormatStyle::LK_JavaScript ||
1222            Style.Language == FormatStyle::LK_Java) &&
1223           FormatTok->is(Keywords.kw_interface)) {
1224         if (Style.Language == FormatStyle::LK_JavaScript) {
1225           // In JavaScript/TypeScript, "interface" can be used as a standalone
1226           // identifier, e.g. in `var interface = 1;`. If "interface" is
1227           // followed by another identifier, it is very like to be an actual
1228           // interface declaration.
1229           unsigned StoredPosition = Tokens->getPosition();
1230           FormatToken *Next = Tokens->getNextToken();
1231           FormatTok = Tokens->setPosition(StoredPosition);
1232           if (Next && !mustBeJSIdent(Keywords, Next)) {
1233             nextToken();
1234             break;
1235           }
1236         }
1237         parseRecord();
1238         addUnwrappedLine();
1239         return;
1240       }
1241 
1242       // See if the following token should start a new unwrapped line.
1243       StringRef Text = FormatTok->TokenText;
1244       nextToken();
1245       if (Line->Tokens.size() == 1 &&
1246           // JS doesn't have macros, and within classes colons indicate fields,
1247           // not labels.
1248           Style.Language != FormatStyle::LK_JavaScript) {
1249         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1250           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1251           parseLabel();
1252           return;
1253         }
1254         // Recognize function-like macro usages without trailing semicolon as
1255         // well as free-standing macros like Q_OBJECT.
1256         bool FunctionLike = FormatTok->is(tok::l_paren);
1257         if (FunctionLike)
1258           parseParens();
1259 
1260         bool FollowedByNewline =
1261             CommentsBeforeNextToken.empty()
1262                 ? FormatTok->NewlinesBefore > 0
1263                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1264 
1265         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1266             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1267           addUnwrappedLine();
1268           return;
1269         }
1270       }
1271       break;
1272     }
1273     case tok::equal:
1274       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1275       // TT_JsFatArrow. The always start an expression or a child block if
1276       // followed by a curly.
1277       if (FormatTok->is(TT_JsFatArrow)) {
1278         nextToken();
1279         if (FormatTok->is(tok::l_brace))
1280           parseChildBlock();
1281         break;
1282       }
1283 
1284       nextToken();
1285       if (FormatTok->Tok.is(tok::l_brace)) {
1286         nextToken();
1287         parseBracedList();
1288       } else if (Style.Language == FormatStyle::LK_Proto &&
1289                FormatTok->Tok.is(tok::less)) {
1290         nextToken();
1291         parseBracedList(/*ContinueOnSemicolons=*/false,
1292                         /*ClosingBraceKind=*/tok::greater);
1293       }
1294       break;
1295     case tok::l_square:
1296       parseSquare();
1297       break;
1298     case tok::kw_new:
1299       parseNew();
1300       break;
1301     default:
1302       nextToken();
1303       break;
1304     }
1305   } while (!eof());
1306 }
1307 
1308 bool UnwrappedLineParser::tryToParseLambda() {
1309   if (!Style.isCpp()) {
1310     nextToken();
1311     return false;
1312   }
1313   const FormatToken* Previous = getPreviousToken();
1314   if (Previous &&
1315       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1316                          tok::kw_delete) ||
1317        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1318     nextToken();
1319     return false;
1320   }
1321   assert(FormatTok->is(tok::l_square));
1322   FormatToken &LSquare = *FormatTok;
1323   if (!tryToParseLambdaIntroducer())
1324     return false;
1325 
1326   while (FormatTok->isNot(tok::l_brace)) {
1327     if (FormatTok->isSimpleTypeSpecifier()) {
1328       nextToken();
1329       continue;
1330     }
1331     switch (FormatTok->Tok.getKind()) {
1332     case tok::l_brace:
1333       break;
1334     case tok::l_paren:
1335       parseParens();
1336       break;
1337     case tok::amp:
1338     case tok::star:
1339     case tok::kw_const:
1340     case tok::comma:
1341     case tok::less:
1342     case tok::greater:
1343     case tok::identifier:
1344     case tok::numeric_constant:
1345     case tok::coloncolon:
1346     case tok::kw_mutable:
1347       nextToken();
1348       break;
1349     case tok::arrow:
1350       FormatTok->Type = TT_LambdaArrow;
1351       nextToken();
1352       break;
1353     default:
1354       return true;
1355     }
1356   }
1357   LSquare.Type = TT_LambdaLSquare;
1358   parseChildBlock();
1359   return true;
1360 }
1361 
1362 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1363   nextToken();
1364   if (FormatTok->is(tok::equal)) {
1365     nextToken();
1366     if (FormatTok->is(tok::r_square)) {
1367       nextToken();
1368       return true;
1369     }
1370     if (FormatTok->isNot(tok::comma))
1371       return false;
1372     nextToken();
1373   } else if (FormatTok->is(tok::amp)) {
1374     nextToken();
1375     if (FormatTok->is(tok::r_square)) {
1376       nextToken();
1377       return true;
1378     }
1379     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1380       return false;
1381     }
1382     if (FormatTok->is(tok::comma))
1383       nextToken();
1384   } else if (FormatTok->is(tok::r_square)) {
1385     nextToken();
1386     return true;
1387   }
1388   do {
1389     if (FormatTok->is(tok::amp))
1390       nextToken();
1391     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1392       return false;
1393     nextToken();
1394     if (FormatTok->is(tok::ellipsis))
1395       nextToken();
1396     if (FormatTok->is(tok::comma)) {
1397       nextToken();
1398     } else if (FormatTok->is(tok::r_square)) {
1399       nextToken();
1400       return true;
1401     } else {
1402       return false;
1403     }
1404   } while (!eof());
1405   return false;
1406 }
1407 
1408 void UnwrappedLineParser::tryToParseJSFunction() {
1409   assert(FormatTok->is(Keywords.kw_function) ||
1410          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1411   if (FormatTok->is(Keywords.kw_async))
1412     nextToken();
1413   // Consume "function".
1414   nextToken();
1415 
1416   // Consume * (generator function). Treat it like C++'s overloaded operators.
1417   if (FormatTok->is(tok::star)) {
1418     FormatTok->Type = TT_OverloadedOperator;
1419     nextToken();
1420   }
1421 
1422   // Consume function name.
1423   if (FormatTok->is(tok::identifier))
1424     nextToken();
1425 
1426   if (FormatTok->isNot(tok::l_paren))
1427     return;
1428 
1429   // Parse formal parameter list.
1430   parseParens();
1431 
1432   if (FormatTok->is(tok::colon)) {
1433     // Parse a type definition.
1434     nextToken();
1435 
1436     // Eat the type declaration. For braced inline object types, balance braces,
1437     // otherwise just parse until finding an l_brace for the function body.
1438     if (FormatTok->is(tok::l_brace))
1439       tryToParseBracedList();
1440     else
1441       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1442         nextToken();
1443   }
1444 
1445   if (FormatTok->is(tok::semi))
1446     return;
1447 
1448   parseChildBlock();
1449 }
1450 
1451 bool UnwrappedLineParser::tryToParseBracedList() {
1452   if (FormatTok->BlockKind == BK_Unknown)
1453     calculateBraceTypes();
1454   assert(FormatTok->BlockKind != BK_Unknown);
1455   if (FormatTok->BlockKind == BK_Block)
1456     return false;
1457   nextToken();
1458   parseBracedList();
1459   return true;
1460 }
1461 
1462 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1463                                           tok::TokenKind ClosingBraceKind) {
1464   bool HasError = false;
1465 
1466   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1467   // replace this by using parseAssigmentExpression() inside.
1468   do {
1469     if (Style.Language == FormatStyle::LK_JavaScript) {
1470       if (FormatTok->is(Keywords.kw_function) ||
1471           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1472         tryToParseJSFunction();
1473         continue;
1474       }
1475       if (FormatTok->is(TT_JsFatArrow)) {
1476         nextToken();
1477         // Fat arrows can be followed by simple expressions or by child blocks
1478         // in curly braces.
1479         if (FormatTok->is(tok::l_brace)) {
1480           parseChildBlock();
1481           continue;
1482         }
1483       }
1484       if (FormatTok->is(tok::l_brace)) {
1485         // Could be a method inside of a braced list `{a() { return 1; }}`.
1486         if (tryToParseBracedList())
1487           continue;
1488         parseChildBlock();
1489       }
1490     }
1491     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1492       nextToken();
1493       return !HasError;
1494     }
1495     switch (FormatTok->Tok.getKind()) {
1496     case tok::caret:
1497       nextToken();
1498       if (FormatTok->is(tok::l_brace)) {
1499         parseChildBlock();
1500       }
1501       break;
1502     case tok::l_square:
1503       tryToParseLambda();
1504       break;
1505     case tok::l_paren:
1506       parseParens();
1507       // JavaScript can just have free standing methods and getters/setters in
1508       // object literals. Detect them by a "{" following ")".
1509       if (Style.Language == FormatStyle::LK_JavaScript) {
1510         if (FormatTok->is(tok::l_brace))
1511           parseChildBlock();
1512         break;
1513       }
1514       break;
1515     case tok::l_brace:
1516       // Assume there are no blocks inside a braced init list apart
1517       // from the ones we explicitly parse out (like lambdas).
1518       FormatTok->BlockKind = BK_BracedInit;
1519       nextToken();
1520       parseBracedList();
1521       break;
1522     case tok::less:
1523       if (Style.Language == FormatStyle::LK_Proto) {
1524         nextToken();
1525         parseBracedList(/*ContinueOnSemicolons=*/false,
1526                         /*ClosingBraceKind=*/tok::greater);
1527       } else {
1528         nextToken();
1529       }
1530       break;
1531     case tok::semi:
1532       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1533       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1534       // used for error recovery if we have otherwise determined that this is
1535       // a braced list.
1536       if (Style.Language == FormatStyle::LK_JavaScript) {
1537         nextToken();
1538         break;
1539       }
1540       HasError = true;
1541       if (!ContinueOnSemicolons)
1542         return !HasError;
1543       nextToken();
1544       break;
1545     case tok::comma:
1546       nextToken();
1547       break;
1548     default:
1549       nextToken();
1550       break;
1551     }
1552   } while (!eof());
1553   return false;
1554 }
1555 
1556 void UnwrappedLineParser::parseParens() {
1557   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1558   nextToken();
1559   do {
1560     switch (FormatTok->Tok.getKind()) {
1561     case tok::l_paren:
1562       parseParens();
1563       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1564         parseChildBlock();
1565       break;
1566     case tok::r_paren:
1567       nextToken();
1568       return;
1569     case tok::r_brace:
1570       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1571       return;
1572     case tok::l_square:
1573       tryToParseLambda();
1574       break;
1575     case tok::l_brace:
1576       if (!tryToParseBracedList())
1577         parseChildBlock();
1578       break;
1579     case tok::at:
1580       nextToken();
1581       if (FormatTok->Tok.is(tok::l_brace)) {
1582         nextToken();
1583         parseBracedList();
1584       }
1585       break;
1586     case tok::kw_class:
1587       if (Style.Language == FormatStyle::LK_JavaScript)
1588         parseRecord(/*ParseAsExpr=*/true);
1589       else
1590         nextToken();
1591       break;
1592     case tok::identifier:
1593       if (Style.Language == FormatStyle::LK_JavaScript &&
1594           (FormatTok->is(Keywords.kw_function) ||
1595            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1596         tryToParseJSFunction();
1597       else
1598         nextToken();
1599       break;
1600     default:
1601       nextToken();
1602       break;
1603     }
1604   } while (!eof());
1605 }
1606 
1607 void UnwrappedLineParser::parseSquare() {
1608   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1609   if (tryToParseLambda())
1610     return;
1611   do {
1612     switch (FormatTok->Tok.getKind()) {
1613     case tok::l_paren:
1614       parseParens();
1615       break;
1616     case tok::r_square:
1617       nextToken();
1618       return;
1619     case tok::r_brace:
1620       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1621       return;
1622     case tok::l_square:
1623       parseSquare();
1624       break;
1625     case tok::l_brace: {
1626       if (!tryToParseBracedList())
1627         parseChildBlock();
1628       break;
1629     }
1630     case tok::at:
1631       nextToken();
1632       if (FormatTok->Tok.is(tok::l_brace)) {
1633         nextToken();
1634         parseBracedList();
1635       }
1636       break;
1637     default:
1638       nextToken();
1639       break;
1640     }
1641   } while (!eof());
1642 }
1643 
1644 void UnwrappedLineParser::parseIfThenElse() {
1645   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1646   nextToken();
1647   if (FormatTok->Tok.is(tok::kw_constexpr))
1648     nextToken();
1649   if (FormatTok->Tok.is(tok::l_paren))
1650     parseParens();
1651   bool NeedsUnwrappedLine = false;
1652   if (FormatTok->Tok.is(tok::l_brace)) {
1653     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1654     parseBlock(/*MustBeDeclaration=*/false);
1655     if (Style.BraceWrapping.BeforeElse)
1656       addUnwrappedLine();
1657     else
1658       NeedsUnwrappedLine = true;
1659   } else {
1660     addUnwrappedLine();
1661     ++Line->Level;
1662     parseStructuralElement();
1663     --Line->Level;
1664   }
1665   if (FormatTok->Tok.is(tok::kw_else)) {
1666     nextToken();
1667     if (FormatTok->Tok.is(tok::l_brace)) {
1668       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1669       parseBlock(/*MustBeDeclaration=*/false);
1670       addUnwrappedLine();
1671     } else if (FormatTok->Tok.is(tok::kw_if)) {
1672       parseIfThenElse();
1673     } else {
1674       addUnwrappedLine();
1675       ++Line->Level;
1676       parseStructuralElement();
1677       if (FormatTok->is(tok::eof))
1678         addUnwrappedLine();
1679       --Line->Level;
1680     }
1681   } else if (NeedsUnwrappedLine) {
1682     addUnwrappedLine();
1683   }
1684 }
1685 
1686 void UnwrappedLineParser::parseTryCatch() {
1687   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1688   nextToken();
1689   bool NeedsUnwrappedLine = false;
1690   if (FormatTok->is(tok::colon)) {
1691     // We are in a function try block, what comes is an initializer list.
1692     nextToken();
1693     while (FormatTok->is(tok::identifier)) {
1694       nextToken();
1695       if (FormatTok->is(tok::l_paren))
1696         parseParens();
1697       if (FormatTok->is(tok::comma))
1698         nextToken();
1699     }
1700   }
1701   // Parse try with resource.
1702   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1703     parseParens();
1704   }
1705   if (FormatTok->is(tok::l_brace)) {
1706     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1707     parseBlock(/*MustBeDeclaration=*/false);
1708     if (Style.BraceWrapping.BeforeCatch) {
1709       addUnwrappedLine();
1710     } else {
1711       NeedsUnwrappedLine = true;
1712     }
1713   } else if (!FormatTok->is(tok::kw_catch)) {
1714     // The C++ standard requires a compound-statement after a try.
1715     // If there's none, we try to assume there's a structuralElement
1716     // and try to continue.
1717     addUnwrappedLine();
1718     ++Line->Level;
1719     parseStructuralElement();
1720     --Line->Level;
1721   }
1722   while (1) {
1723     if (FormatTok->is(tok::at))
1724       nextToken();
1725     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1726                              tok::kw___finally) ||
1727           ((Style.Language == FormatStyle::LK_Java ||
1728             Style.Language == FormatStyle::LK_JavaScript) &&
1729            FormatTok->is(Keywords.kw_finally)) ||
1730           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1731            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1732       break;
1733     nextToken();
1734     while (FormatTok->isNot(tok::l_brace)) {
1735       if (FormatTok->is(tok::l_paren)) {
1736         parseParens();
1737         continue;
1738       }
1739       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1740         return;
1741       nextToken();
1742     }
1743     NeedsUnwrappedLine = false;
1744     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1745     parseBlock(/*MustBeDeclaration=*/false);
1746     if (Style.BraceWrapping.BeforeCatch)
1747       addUnwrappedLine();
1748     else
1749       NeedsUnwrappedLine = true;
1750   }
1751   if (NeedsUnwrappedLine)
1752     addUnwrappedLine();
1753 }
1754 
1755 void UnwrappedLineParser::parseNamespace() {
1756   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1757 
1758   const FormatToken &InitialToken = *FormatTok;
1759   nextToken();
1760   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1761     nextToken();
1762   if (FormatTok->Tok.is(tok::l_brace)) {
1763     if (ShouldBreakBeforeBrace(Style, InitialToken))
1764       addUnwrappedLine();
1765 
1766     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1767                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1768                      DeclarationScopeStack.size() > 1);
1769     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1770     // Munch the semicolon after a namespace. This is more common than one would
1771     // think. Puttin the semicolon into its own line is very ugly.
1772     if (FormatTok->Tok.is(tok::semi))
1773       nextToken();
1774     addUnwrappedLine();
1775   }
1776   // FIXME: Add error handling.
1777 }
1778 
1779 void UnwrappedLineParser::parseNew() {
1780   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1781   nextToken();
1782   if (Style.Language != FormatStyle::LK_Java)
1783     return;
1784 
1785   // In Java, we can parse everything up to the parens, which aren't optional.
1786   do {
1787     // There should not be a ;, { or } before the new's open paren.
1788     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1789       return;
1790 
1791     // Consume the parens.
1792     if (FormatTok->is(tok::l_paren)) {
1793       parseParens();
1794 
1795       // If there is a class body of an anonymous class, consume that as child.
1796       if (FormatTok->is(tok::l_brace))
1797         parseChildBlock();
1798       return;
1799     }
1800     nextToken();
1801   } while (!eof());
1802 }
1803 
1804 void UnwrappedLineParser::parseForOrWhileLoop() {
1805   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1806          "'for', 'while' or foreach macro expected");
1807   nextToken();
1808   // JS' for await ( ...
1809   if (Style.Language == FormatStyle::LK_JavaScript &&
1810       FormatTok->is(Keywords.kw_await))
1811     nextToken();
1812   if (FormatTok->Tok.is(tok::l_paren))
1813     parseParens();
1814   if (FormatTok->Tok.is(tok::l_brace)) {
1815     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1816     parseBlock(/*MustBeDeclaration=*/false);
1817     addUnwrappedLine();
1818   } else {
1819     addUnwrappedLine();
1820     ++Line->Level;
1821     parseStructuralElement();
1822     --Line->Level;
1823   }
1824 }
1825 
1826 void UnwrappedLineParser::parseDoWhile() {
1827   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1828   nextToken();
1829   if (FormatTok->Tok.is(tok::l_brace)) {
1830     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1831     parseBlock(/*MustBeDeclaration=*/false);
1832     if (Style.BraceWrapping.IndentBraces)
1833       addUnwrappedLine();
1834   } else {
1835     addUnwrappedLine();
1836     ++Line->Level;
1837     parseStructuralElement();
1838     --Line->Level;
1839   }
1840 
1841   // FIXME: Add error handling.
1842   if (!FormatTok->Tok.is(tok::kw_while)) {
1843     addUnwrappedLine();
1844     return;
1845   }
1846 
1847   nextToken();
1848   parseStructuralElement();
1849 }
1850 
1851 void UnwrappedLineParser::parseLabel() {
1852   nextToken();
1853   unsigned OldLineLevel = Line->Level;
1854   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1855     --Line->Level;
1856   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1857     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1858     parseBlock(/*MustBeDeclaration=*/false);
1859     if (FormatTok->Tok.is(tok::kw_break)) {
1860       if (Style.BraceWrapping.AfterControlStatement)
1861         addUnwrappedLine();
1862       parseStructuralElement();
1863     }
1864     addUnwrappedLine();
1865   } else {
1866     if (FormatTok->is(tok::semi))
1867       nextToken();
1868     addUnwrappedLine();
1869   }
1870   Line->Level = OldLineLevel;
1871   if (FormatTok->isNot(tok::l_brace)) {
1872     parseStructuralElement();
1873     addUnwrappedLine();
1874   }
1875 }
1876 
1877 void UnwrappedLineParser::parseCaseLabel() {
1878   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1879   // FIXME: fix handling of complex expressions here.
1880   do {
1881     nextToken();
1882   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1883   parseLabel();
1884 }
1885 
1886 void UnwrappedLineParser::parseSwitch() {
1887   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1888   nextToken();
1889   if (FormatTok->Tok.is(tok::l_paren))
1890     parseParens();
1891   if (FormatTok->Tok.is(tok::l_brace)) {
1892     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1893     parseBlock(/*MustBeDeclaration=*/false);
1894     addUnwrappedLine();
1895   } else {
1896     addUnwrappedLine();
1897     ++Line->Level;
1898     parseStructuralElement();
1899     --Line->Level;
1900   }
1901 }
1902 
1903 void UnwrappedLineParser::parseAccessSpecifier() {
1904   nextToken();
1905   // Understand Qt's slots.
1906   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1907     nextToken();
1908   // Otherwise, we don't know what it is, and we'd better keep the next token.
1909   if (FormatTok->Tok.is(tok::colon))
1910     nextToken();
1911   addUnwrappedLine();
1912 }
1913 
1914 bool UnwrappedLineParser::parseEnum() {
1915   // Won't be 'enum' for NS_ENUMs.
1916   if (FormatTok->Tok.is(tok::kw_enum))
1917     nextToken();
1918 
1919   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1920   // declarations. An "enum" keyword followed by a colon would be a syntax
1921   // error and thus assume it is just an identifier.
1922   if (Style.Language == FormatStyle::LK_JavaScript &&
1923       FormatTok->isOneOf(tok::colon, tok::question))
1924     return false;
1925 
1926   // Eat up enum class ...
1927   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1928     nextToken();
1929 
1930   while (FormatTok->Tok.getIdentifierInfo() ||
1931          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1932                             tok::greater, tok::comma, tok::question)) {
1933     nextToken();
1934     // We can have macros or attributes in between 'enum' and the enum name.
1935     if (FormatTok->is(tok::l_paren))
1936       parseParens();
1937     if (FormatTok->is(tok::identifier)) {
1938       nextToken();
1939       // If there are two identifiers in a row, this is likely an elaborate
1940       // return type. In Java, this can be "implements", etc.
1941       if (Style.isCpp() && FormatTok->is(tok::identifier))
1942         return false;
1943     }
1944   }
1945 
1946   // Just a declaration or something is wrong.
1947   if (FormatTok->isNot(tok::l_brace))
1948     return true;
1949   FormatTok->BlockKind = BK_Block;
1950 
1951   if (Style.Language == FormatStyle::LK_Java) {
1952     // Java enums are different.
1953     parseJavaEnumBody();
1954     return true;
1955   }
1956   if (Style.Language == FormatStyle::LK_Proto) {
1957     parseBlock(/*MustBeDeclaration=*/true);
1958     return true;
1959   }
1960 
1961   // Parse enum body.
1962   nextToken();
1963   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1964   if (HasError) {
1965     if (FormatTok->is(tok::semi))
1966       nextToken();
1967     addUnwrappedLine();
1968   }
1969   return true;
1970 
1971   // There is no addUnwrappedLine() here so that we fall through to parsing a
1972   // structural element afterwards. Thus, in "enum A {} n, m;",
1973   // "} n, m;" will end up in one unwrapped line.
1974 }
1975 
1976 void UnwrappedLineParser::parseJavaEnumBody() {
1977   // Determine whether the enum is simple, i.e. does not have a semicolon or
1978   // constants with class bodies. Simple enums can be formatted like braced
1979   // lists, contracted to a single line, etc.
1980   unsigned StoredPosition = Tokens->getPosition();
1981   bool IsSimple = true;
1982   FormatToken *Tok = Tokens->getNextToken();
1983   while (Tok) {
1984     if (Tok->is(tok::r_brace))
1985       break;
1986     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1987       IsSimple = false;
1988       break;
1989     }
1990     // FIXME: This will also mark enums with braces in the arguments to enum
1991     // constants as "not simple". This is probably fine in practice, though.
1992     Tok = Tokens->getNextToken();
1993   }
1994   FormatTok = Tokens->setPosition(StoredPosition);
1995 
1996   if (IsSimple) {
1997     nextToken();
1998     parseBracedList();
1999     addUnwrappedLine();
2000     return;
2001   }
2002 
2003   // Parse the body of a more complex enum.
2004   // First add a line for everything up to the "{".
2005   nextToken();
2006   addUnwrappedLine();
2007   ++Line->Level;
2008 
2009   // Parse the enum constants.
2010   while (FormatTok) {
2011     if (FormatTok->is(tok::l_brace)) {
2012       // Parse the constant's class body.
2013       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2014                  /*MunchSemi=*/false);
2015     } else if (FormatTok->is(tok::l_paren)) {
2016       parseParens();
2017     } else if (FormatTok->is(tok::comma)) {
2018       nextToken();
2019       addUnwrappedLine();
2020     } else if (FormatTok->is(tok::semi)) {
2021       nextToken();
2022       addUnwrappedLine();
2023       break;
2024     } else if (FormatTok->is(tok::r_brace)) {
2025       addUnwrappedLine();
2026       break;
2027     } else {
2028       nextToken();
2029     }
2030   }
2031 
2032   // Parse the class body after the enum's ";" if any.
2033   parseLevel(/*HasOpeningBrace=*/true);
2034   nextToken();
2035   --Line->Level;
2036   addUnwrappedLine();
2037 }
2038 
2039 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2040   const FormatToken &InitialToken = *FormatTok;
2041   nextToken();
2042 
2043   // The actual identifier can be a nested name specifier, and in macros
2044   // it is often token-pasted.
2045   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2046                             tok::kw___attribute, tok::kw___declspec,
2047                             tok::kw_alignas) ||
2048          ((Style.Language == FormatStyle::LK_Java ||
2049            Style.Language == FormatStyle::LK_JavaScript) &&
2050           FormatTok->isOneOf(tok::period, tok::comma))) {
2051     if (Style.Language == FormatStyle::LK_JavaScript &&
2052         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2053       // JavaScript/TypeScript supports inline object types in
2054       // extends/implements positions:
2055       //     class Foo implements {bar: number} { }
2056       nextToken();
2057       if (FormatTok->is(tok::l_brace)) {
2058         tryToParseBracedList();
2059         continue;
2060       }
2061     }
2062     bool IsNonMacroIdentifier =
2063         FormatTok->is(tok::identifier) &&
2064         FormatTok->TokenText != FormatTok->TokenText.upper();
2065     nextToken();
2066     // We can have macros or attributes in between 'class' and the class name.
2067     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2068       parseParens();
2069   }
2070 
2071   // Note that parsing away template declarations here leads to incorrectly
2072   // accepting function declarations as record declarations.
2073   // In general, we cannot solve this problem. Consider:
2074   // class A<int> B() {}
2075   // which can be a function definition or a class definition when B() is a
2076   // macro. If we find enough real-world cases where this is a problem, we
2077   // can parse for the 'template' keyword in the beginning of the statement,
2078   // and thus rule out the record production in case there is no template
2079   // (this would still leave us with an ambiguity between template function
2080   // and class declarations).
2081   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2082     while (!eof()) {
2083       if (FormatTok->is(tok::l_brace)) {
2084         calculateBraceTypes(/*ExpectClassBody=*/true);
2085         if (!tryToParseBracedList())
2086           break;
2087       }
2088       if (FormatTok->Tok.is(tok::semi))
2089         return;
2090       nextToken();
2091     }
2092   }
2093   if (FormatTok->Tok.is(tok::l_brace)) {
2094     if (ParseAsExpr) {
2095       parseChildBlock();
2096     } else {
2097       if (ShouldBreakBeforeBrace(Style, InitialToken))
2098         addUnwrappedLine();
2099 
2100       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2101                  /*MunchSemi=*/false);
2102     }
2103   }
2104   // There is no addUnwrappedLine() here so that we fall through to parsing a
2105   // structural element afterwards. Thus, in "class A {} n, m;",
2106   // "} n, m;" will end up in one unwrapped line.
2107 }
2108 
2109 void UnwrappedLineParser::parseObjCProtocolList() {
2110   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2111   do
2112     nextToken();
2113   while (!eof() && FormatTok->Tok.isNot(tok::greater));
2114   nextToken(); // Skip '>'.
2115 }
2116 
2117 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2118   do {
2119     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2120       nextToken();
2121       addUnwrappedLine();
2122       break;
2123     }
2124     if (FormatTok->is(tok::l_brace)) {
2125       parseBlock(/*MustBeDeclaration=*/false);
2126       // In ObjC interfaces, nothing should be following the "}".
2127       addUnwrappedLine();
2128     } else if (FormatTok->is(tok::r_brace)) {
2129       // Ignore stray "}". parseStructuralElement doesn't consume them.
2130       nextToken();
2131       addUnwrappedLine();
2132     } else {
2133       parseStructuralElement();
2134     }
2135   } while (!eof());
2136 }
2137 
2138 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2139   nextToken();
2140   nextToken(); // interface name
2141 
2142   // @interface can be followed by either a base class, or a category.
2143   if (FormatTok->Tok.is(tok::colon)) {
2144     nextToken();
2145     nextToken(); // base class name
2146   } else if (FormatTok->Tok.is(tok::l_paren))
2147     // Skip category, if present.
2148     parseParens();
2149 
2150   if (FormatTok->Tok.is(tok::less))
2151     parseObjCProtocolList();
2152 
2153   if (FormatTok->Tok.is(tok::l_brace)) {
2154     if (Style.BraceWrapping.AfterObjCDeclaration)
2155       addUnwrappedLine();
2156     parseBlock(/*MustBeDeclaration=*/true);
2157   }
2158 
2159   // With instance variables, this puts '}' on its own line.  Without instance
2160   // variables, this ends the @interface line.
2161   addUnwrappedLine();
2162 
2163   parseObjCUntilAtEnd();
2164 }
2165 
2166 void UnwrappedLineParser::parseObjCProtocol() {
2167   nextToken();
2168   nextToken(); // protocol name
2169 
2170   if (FormatTok->Tok.is(tok::less))
2171     parseObjCProtocolList();
2172 
2173   // Check for protocol declaration.
2174   if (FormatTok->Tok.is(tok::semi)) {
2175     nextToken();
2176     return addUnwrappedLine();
2177   }
2178 
2179   addUnwrappedLine();
2180   parseObjCUntilAtEnd();
2181 }
2182 
2183 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2184   bool IsImport = FormatTok->is(Keywords.kw_import);
2185   assert(IsImport || FormatTok->is(tok::kw_export));
2186   nextToken();
2187 
2188   // Consume the "default" in "export default class/function".
2189   if (FormatTok->is(tok::kw_default))
2190     nextToken();
2191 
2192   // Consume "async function", "function" and "default function", so that these
2193   // get parsed as free-standing JS functions, i.e. do not require a trailing
2194   // semicolon.
2195   if (FormatTok->is(Keywords.kw_async))
2196     nextToken();
2197   if (FormatTok->is(Keywords.kw_function)) {
2198     nextToken();
2199     return;
2200   }
2201 
2202   // For imports, `export *`, `export {...}`, consume the rest of the line up
2203   // to the terminating `;`. For everything else, just return and continue
2204   // parsing the structural element, i.e. the declaration or expression for
2205   // `export default`.
2206   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2207       !FormatTok->isStringLiteral())
2208     return;
2209 
2210   while (!eof()) {
2211     if (FormatTok->is(tok::semi))
2212       return;
2213     if (Line->Tokens.size() == 0) {
2214       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2215       // import statement should terminate.
2216       return;
2217     }
2218     if (FormatTok->is(tok::l_brace)) {
2219       FormatTok->BlockKind = BK_Block;
2220       nextToken();
2221       parseBracedList();
2222     } else {
2223       nextToken();
2224     }
2225   }
2226 }
2227 
2228 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2229                                                  StringRef Prefix = "") {
2230   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2231                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2232   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2233                                                     E = Line.Tokens.end();
2234        I != E; ++I) {
2235     llvm::dbgs() << I->Tok->Tok.getName() << "["
2236                  << "T=" << I->Tok->Type
2237                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2238   }
2239   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2240                                                     E = Line.Tokens.end();
2241        I != E; ++I) {
2242     const UnwrappedLineNode &Node = *I;
2243     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2244              I = Node.Children.begin(),
2245              E = Node.Children.end();
2246          I != E; ++I) {
2247       printDebugInfo(*I, "\nChild: ");
2248     }
2249   }
2250   llvm::dbgs() << "\n";
2251 }
2252 
2253 void UnwrappedLineParser::addUnwrappedLine() {
2254   if (Line->Tokens.empty())
2255     return;
2256   DEBUG({
2257     if (CurrentLines == &Lines)
2258       printDebugInfo(*Line);
2259   });
2260   CurrentLines->push_back(std::move(*Line));
2261   Line->Tokens.clear();
2262   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2263   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2264     CurrentLines->append(
2265         std::make_move_iterator(PreprocessorDirectives.begin()),
2266         std::make_move_iterator(PreprocessorDirectives.end()));
2267     PreprocessorDirectives.clear();
2268   }
2269 }
2270 
2271 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2272 
2273 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2274   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2275          FormatTok.NewlinesBefore > 0;
2276 }
2277 
2278 // Checks if \p FormatTok is a line comment that continues the line comment
2279 // section on \p Line.
2280 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2281                                         const UnwrappedLine &Line,
2282                                         llvm::Regex &CommentPragmasRegex) {
2283   if (Line.Tokens.empty())
2284     return false;
2285 
2286   StringRef IndentContent = FormatTok.TokenText;
2287   if (FormatTok.TokenText.startswith("//") ||
2288       FormatTok.TokenText.startswith("/*"))
2289     IndentContent = FormatTok.TokenText.substr(2);
2290   if (CommentPragmasRegex.match(IndentContent))
2291     return false;
2292 
2293   // If Line starts with a line comment, then FormatTok continues the comment
2294   // section if its original column is greater or equal to the original start
2295   // column of the line.
2296   //
2297   // Define the min column token of a line as follows: if a line ends in '{' or
2298   // contains a '{' followed by a line comment, then the min column token is
2299   // that '{'. Otherwise, the min column token of the line is the first token of
2300   // the line.
2301   //
2302   // If Line starts with a token other than a line comment, then FormatTok
2303   // continues the comment section if its original column is greater than the
2304   // original start column of the min column token of the line.
2305   //
2306   // For example, the second line comment continues the first in these cases:
2307   //
2308   // // first line
2309   // // second line
2310   //
2311   // and:
2312   //
2313   // // first line
2314   //  // second line
2315   //
2316   // and:
2317   //
2318   // int i; // first line
2319   //  // second line
2320   //
2321   // and:
2322   //
2323   // do { // first line
2324   //      // second line
2325   //   int i;
2326   // } while (true);
2327   //
2328   // and:
2329   //
2330   // enum {
2331   //   a, // first line
2332   //    // second line
2333   //   b
2334   // };
2335   //
2336   // The second line comment doesn't continue the first in these cases:
2337   //
2338   //   // first line
2339   //  // second line
2340   //
2341   // and:
2342   //
2343   // int i; // first line
2344   // // second line
2345   //
2346   // and:
2347   //
2348   // do { // first line
2349   //   // second line
2350   //   int i;
2351   // } while (true);
2352   //
2353   // and:
2354   //
2355   // enum {
2356   //   a, // first line
2357   //   // second line
2358   // };
2359   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2360 
2361   // Scan for '{//'. If found, use the column of '{' as a min column for line
2362   // comment section continuation.
2363   const FormatToken *PreviousToken = nullptr;
2364   for (const UnwrappedLineNode &Node : Line.Tokens) {
2365     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2366         isLineComment(*Node.Tok)) {
2367       MinColumnToken = PreviousToken;
2368       break;
2369     }
2370     PreviousToken = Node.Tok;
2371 
2372     // Grab the last newline preceding a token in this unwrapped line.
2373     if (Node.Tok->NewlinesBefore > 0) {
2374       MinColumnToken = Node.Tok;
2375     }
2376   }
2377   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2378     MinColumnToken = PreviousToken;
2379   }
2380 
2381   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2382                               MinColumnToken);
2383 }
2384 
2385 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2386   bool JustComments = Line->Tokens.empty();
2387   for (SmallVectorImpl<FormatToken *>::const_iterator
2388            I = CommentsBeforeNextToken.begin(),
2389            E = CommentsBeforeNextToken.end();
2390        I != E; ++I) {
2391     // Line comments that belong to the same line comment section are put on the
2392     // same line since later we might want to reflow content between them.
2393     // Additional fine-grained breaking of line comment sections is controlled
2394     // by the class BreakableLineCommentSection in case it is desirable to keep
2395     // several line comment sections in the same unwrapped line.
2396     //
2397     // FIXME: Consider putting separate line comment sections as children to the
2398     // unwrapped line instead.
2399     (*I)->ContinuesLineCommentSection =
2400         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2401     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2402       addUnwrappedLine();
2403     pushToken(*I);
2404   }
2405   if (NewlineBeforeNext && JustComments)
2406     addUnwrappedLine();
2407   CommentsBeforeNextToken.clear();
2408 }
2409 
2410 void UnwrappedLineParser::nextToken(int LevelDifference) {
2411   if (eof())
2412     return;
2413   flushComments(isOnNewLine(*FormatTok));
2414   pushToken(FormatTok);
2415   if (Style.Language != FormatStyle::LK_JavaScript)
2416     readToken(LevelDifference);
2417   else
2418     readTokenWithJavaScriptASI();
2419 }
2420 
2421 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2422   // FIXME: This is a dirty way to access the previous token. Find a better
2423   // solution.
2424   if (!Line || Line->Tokens.empty())
2425     return nullptr;
2426   return Line->Tokens.back().Tok;
2427 }
2428 
2429 void UnwrappedLineParser::distributeComments(
2430     const SmallVectorImpl<FormatToken *> &Comments,
2431     const FormatToken *NextTok) {
2432   // Whether or not a line comment token continues a line is controlled by
2433   // the method continuesLineCommentSection, with the following caveat:
2434   //
2435   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2436   // that each comment line from the trail is aligned with the next token, if
2437   // the next token exists. If a trail exists, the beginning of the maximal
2438   // trail is marked as a start of a new comment section.
2439   //
2440   // For example in this code:
2441   //
2442   // int a; // line about a
2443   //   // line 1 about b
2444   //   // line 2 about b
2445   //   int b;
2446   //
2447   // the two lines about b form a maximal trail, so there are two sections, the
2448   // first one consisting of the single comment "// line about a" and the
2449   // second one consisting of the next two comments.
2450   if (Comments.empty())
2451     return;
2452   bool ShouldPushCommentsInCurrentLine = true;
2453   bool HasTrailAlignedWithNextToken = false;
2454   unsigned StartOfTrailAlignedWithNextToken = 0;
2455   if (NextTok) {
2456     // We are skipping the first element intentionally.
2457     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2458       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2459         HasTrailAlignedWithNextToken = true;
2460         StartOfTrailAlignedWithNextToken = i;
2461       }
2462     }
2463   }
2464   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2465     FormatToken *FormatTok = Comments[i];
2466     if (HasTrailAlignedWithNextToken &&
2467         i == StartOfTrailAlignedWithNextToken) {
2468       FormatTok->ContinuesLineCommentSection = false;
2469     } else {
2470       FormatTok->ContinuesLineCommentSection =
2471           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2472     }
2473     if (!FormatTok->ContinuesLineCommentSection &&
2474         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2475       ShouldPushCommentsInCurrentLine = false;
2476     }
2477     if (ShouldPushCommentsInCurrentLine) {
2478       pushToken(FormatTok);
2479     } else {
2480       CommentsBeforeNextToken.push_back(FormatTok);
2481     }
2482   }
2483 }
2484 
2485 void UnwrappedLineParser::readToken(int LevelDifference) {
2486   SmallVector<FormatToken *, 1> Comments;
2487   do {
2488     FormatTok = Tokens->getNextToken();
2489     assert(FormatTok);
2490     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2491            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2492       distributeComments(Comments, FormatTok);
2493       Comments.clear();
2494       // If there is an unfinished unwrapped line, we flush the preprocessor
2495       // directives only after that unwrapped line was finished later.
2496       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2497       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2498       assert((LevelDifference >= 0 ||
2499               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2500              "LevelDifference makes Line->Level negative");
2501       Line->Level += LevelDifference;
2502       // Comments stored before the preprocessor directive need to be output
2503       // before the preprocessor directive, at the same level as the
2504       // preprocessor directive, as we consider them to apply to the directive.
2505       flushComments(isOnNewLine(*FormatTok));
2506       parsePPDirective();
2507     }
2508     while (FormatTok->Type == TT_ConflictStart ||
2509            FormatTok->Type == TT_ConflictEnd ||
2510            FormatTok->Type == TT_ConflictAlternative) {
2511       if (FormatTok->Type == TT_ConflictStart) {
2512         conditionalCompilationStart(/*Unreachable=*/false);
2513       } else if (FormatTok->Type == TT_ConflictAlternative) {
2514         conditionalCompilationAlternative();
2515       } else if (FormatTok->Type == TT_ConflictEnd) {
2516         conditionalCompilationEnd();
2517       }
2518       FormatTok = Tokens->getNextToken();
2519       FormatTok->MustBreakBefore = true;
2520     }
2521 
2522     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2523         !Line->InPPDirective) {
2524       continue;
2525     }
2526 
2527     if (!FormatTok->Tok.is(tok::comment)) {
2528       distributeComments(Comments, FormatTok);
2529       Comments.clear();
2530       return;
2531     }
2532 
2533     Comments.push_back(FormatTok);
2534   } while (!eof());
2535 
2536   distributeComments(Comments, nullptr);
2537   Comments.clear();
2538 }
2539 
2540 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2541   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2542   if (MustBreakBeforeNextToken) {
2543     Line->Tokens.back().Tok->MustBreakBefore = true;
2544     MustBreakBeforeNextToken = false;
2545   }
2546 }
2547 
2548 } // end namespace format
2549 } // end namespace clang
2550