1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59   return FormatTok.is(tok::comment) &&
60          FormatTok.TokenText.startswith("//");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     TokenSource = this;
86     Line.Level = 0;
87     Line.InPPDirective = true;
88   }
89 
90   ~ScopedMacroState() override {
91     TokenSource = PreviousTokenSource;
92     ResetToken = Token;
93     Line.InPPDirective = false;
94     Line.Level = PreviousLineLevel;
95   }
96 
97   FormatToken *getNextToken() override {
98     // The \c UnwrappedLineParser guards against this by never calling
99     // \c getNextToken() after it has encountered the first eof token.
100     assert(!eof());
101     PreviousToken = Token;
102     Token = PreviousTokenSource->getNextToken();
103     if (eof())
104       return getFakeEOF();
105     return Token;
106   }
107 
108   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
109 
110   FormatToken *setPosition(unsigned Position) override {
111     PreviousToken = nullptr;
112     Token = PreviousTokenSource->setPosition(Position);
113     return Token;
114   }
115 
116 private:
117   bool eof() {
118     return Token && Token->HasUnescapedNewline &&
119            !continuesLineComment(*Token, PreviousToken,
120                                  /*MinColumnToken=*/PreviousToken);
121   }
122 
123   FormatToken *getFakeEOF() {
124     static bool EOFInitialized = false;
125     static FormatToken FormatTok;
126     if (!EOFInitialized) {
127       FormatTok.Tok.startToken();
128       FormatTok.Tok.setKind(tok::eof);
129       EOFInitialized = true;
130     }
131     return &FormatTok;
132   }
133 
134   UnwrappedLine &Line;
135   FormatTokenSource *&TokenSource;
136   FormatToken *&ResetToken;
137   unsigned PreviousLineLevel;
138   FormatTokenSource *PreviousTokenSource;
139 
140   FormatToken *Token;
141   FormatToken *PreviousToken;
142 };
143 
144 } // end anonymous namespace
145 
146 class ScopedLineState {
147 public:
148   ScopedLineState(UnwrappedLineParser &Parser,
149                   bool SwitchToPreprocessorLines = false)
150       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
151     if (SwitchToPreprocessorLines)
152       Parser.CurrentLines = &Parser.PreprocessorDirectives;
153     else if (!Parser.Line->Tokens.empty())
154       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
155     PreBlockLine = std::move(Parser.Line);
156     Parser.Line = llvm::make_unique<UnwrappedLine>();
157     Parser.Line->Level = PreBlockLine->Level;
158     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
159   }
160 
161   ~ScopedLineState() {
162     if (!Parser.Line->Tokens.empty()) {
163       Parser.addUnwrappedLine();
164     }
165     assert(Parser.Line->Tokens.empty());
166     Parser.Line = std::move(PreBlockLine);
167     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
168       Parser.MustBreakBeforeNextToken = true;
169     Parser.CurrentLines = OriginalLines;
170   }
171 
172 private:
173   UnwrappedLineParser &Parser;
174 
175   std::unique_ptr<UnwrappedLine> PreBlockLine;
176   SmallVectorImpl<UnwrappedLine> *OriginalLines;
177 };
178 
179 class CompoundStatementIndenter {
180 public:
181   CompoundStatementIndenter(UnwrappedLineParser *Parser,
182                             const FormatStyle &Style, unsigned &LineLevel)
183       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
184     if (Style.BraceWrapping.AfterControlStatement)
185       Parser->addUnwrappedLine();
186     if (Style.BraceWrapping.IndentBraces)
187       ++LineLevel;
188   }
189   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
190 
191 private:
192   unsigned &LineLevel;
193   unsigned OldLineLevel;
194 };
195 
196 namespace {
197 
198 class IndexedTokenSource : public FormatTokenSource {
199 public:
200   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
201       : Tokens(Tokens), Position(-1) {}
202 
203   FormatToken *getNextToken() override {
204     ++Position;
205     return Tokens[Position];
206   }
207 
208   unsigned getPosition() override {
209     assert(Position >= 0);
210     return Position;
211   }
212 
213   FormatToken *setPosition(unsigned P) override {
214     Position = P;
215     return Tokens[Position];
216   }
217 
218   void reset() { Position = -1; }
219 
220 private:
221   ArrayRef<FormatToken *> Tokens;
222   int Position;
223 };
224 
225 } // end anonymous namespace
226 
227 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
228                                          const AdditionalKeywords &Keywords,
229                                          ArrayRef<FormatToken *> Tokens,
230                                          UnwrappedLineConsumer &Callback)
231     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
232       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
233       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
234       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
235       IfNdefCondition(nullptr), FoundIncludeGuardStart(false),
236       IncludeGuardRejected(false) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IfNdefCondition = nullptr;
241   FoundIncludeGuardStart = false;
242   IncludeGuardRejected = false;
243   Line.reset(new UnwrappedLine);
244   CommentsBeforeNextToken.clear();
245   FormatTok = nullptr;
246   MustBreakBeforeNextToken = false;
247   PreprocessorDirectives.clear();
248   CurrentLines = &Lines;
249   DeclarationScopeStack.clear();
250   PPStack.clear();
251 }
252 
253 void UnwrappedLineParser::parse() {
254   IndexedTokenSource TokenSource(AllTokens);
255   do {
256     DEBUG(llvm::dbgs() << "----\n");
257     reset();
258     Tokens = &TokenSource;
259     TokenSource.reset();
260 
261     readToken();
262     parseFile();
263     // Create line with eof token.
264     pushToken(FormatTok);
265     addUnwrappedLine();
266 
267     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
268                                                   E = Lines.end();
269          I != E; ++I) {
270       Callback.consumeUnwrappedLine(*I);
271     }
272     Callback.finishRun();
273     Lines.clear();
274     while (!PPLevelBranchIndex.empty() &&
275            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
276       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
277       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
278     }
279     if (!PPLevelBranchIndex.empty()) {
280       ++PPLevelBranchIndex.back();
281       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
282       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
283     }
284   } while (!PPLevelBranchIndex.empty());
285 }
286 
287 void UnwrappedLineParser::parseFile() {
288   // The top-level context in a file always has declarations, except for pre-
289   // processor directives and JavaScript files.
290   bool MustBeDeclaration =
291       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
292   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
293                                           MustBeDeclaration);
294   if (Style.Language == FormatStyle::LK_TextProto)
295     parseBracedList();
296   else
297     parseLevel(/*HasOpeningBrace=*/false);
298   // Make sure to format the remaining tokens.
299   flushComments(true);
300   addUnwrappedLine();
301 }
302 
303 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
304   bool SwitchLabelEncountered = false;
305   do {
306     tok::TokenKind kind = FormatTok->Tok.getKind();
307     if (FormatTok->Type == TT_MacroBlockBegin) {
308       kind = tok::l_brace;
309     } else if (FormatTok->Type == TT_MacroBlockEnd) {
310       kind = tok::r_brace;
311     }
312 
313     switch (kind) {
314     case tok::comment:
315       nextToken();
316       addUnwrappedLine();
317       break;
318     case tok::l_brace:
319       // FIXME: Add parameter whether this can happen - if this happens, we must
320       // be in a non-declaration context.
321       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
322         continue;
323       parseBlock(/*MustBeDeclaration=*/false);
324       addUnwrappedLine();
325       break;
326     case tok::r_brace:
327       if (HasOpeningBrace)
328         return;
329       nextToken();
330       addUnwrappedLine();
331       break;
332     case tok::kw_default:
333     case tok::kw_case:
334       if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) {
335         // A 'case: string' style field declaration.
336         parseStructuralElement();
337         break;
338       }
339       if (!SwitchLabelEncountered &&
340           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
341         ++Line->Level;
342       SwitchLabelEncountered = true;
343       parseStructuralElement();
344       break;
345     default:
346       parseStructuralElement();
347       break;
348     }
349   } while (!eof());
350 }
351 
352 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
353   // We'll parse forward through the tokens until we hit
354   // a closing brace or eof - note that getNextToken() will
355   // parse macros, so this will magically work inside macro
356   // definitions, too.
357   unsigned StoredPosition = Tokens->getPosition();
358   FormatToken *Tok = FormatTok;
359   const FormatToken *PrevTok = getPreviousToken();
360   // Keep a stack of positions of lbrace tokens. We will
361   // update information about whether an lbrace starts a
362   // braced init list or a different block during the loop.
363   SmallVector<FormatToken *, 8> LBraceStack;
364   assert(Tok->Tok.is(tok::l_brace));
365   do {
366     // Get next non-comment token.
367     FormatToken *NextTok;
368     unsigned ReadTokens = 0;
369     do {
370       NextTok = Tokens->getNextToken();
371       ++ReadTokens;
372     } while (NextTok->is(tok::comment));
373 
374     switch (Tok->Tok.getKind()) {
375     case tok::l_brace:
376       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
377         if (PrevTok->is(tok::colon))
378           // A colon indicates this code is in a type, or a braced list
379           // following a label in an object literal ({a: {b: 1}}). The code
380           // below could be confused by semicolons between the individual
381           // members in a type member list, which would normally trigger
382           // BK_Block. In both cases, this must be parsed as an inline braced
383           // init.
384           Tok->BlockKind = BK_BracedInit;
385         else if (PrevTok->is(tok::r_paren))
386           // `) { }` can only occur in function or method declarations in JS.
387           Tok->BlockKind = BK_Block;
388       } else {
389         Tok->BlockKind = BK_Unknown;
390       }
391       LBraceStack.push_back(Tok);
392       break;
393     case tok::r_brace:
394       if (LBraceStack.empty())
395         break;
396       if (LBraceStack.back()->BlockKind == BK_Unknown) {
397         bool ProbablyBracedList = false;
398         if (Style.Language == FormatStyle::LK_Proto) {
399           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
400         } else {
401           // Using OriginalColumn to distinguish between ObjC methods and
402           // binary operators is a bit hacky.
403           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
404                                   NextTok->OriginalColumn == 0;
405 
406           // If there is a comma, semicolon or right paren after the closing
407           // brace, we assume this is a braced initializer list.  Note that
408           // regardless how we mark inner braces here, we will overwrite the
409           // BlockKind later if we parse a braced list (where all blocks
410           // inside are by default braced lists), or when we explicitly detect
411           // blocks (for example while parsing lambdas).
412           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
413           // braced list in JS.
414           ProbablyBracedList =
415               (Style.Language == FormatStyle::LK_JavaScript &&
416                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
417                                 Keywords.kw_as)) ||
418               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
419               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
420                                tok::r_paren, tok::r_square, tok::l_brace,
421                                tok::l_square, tok::ellipsis) ||
422               (NextTok->is(tok::identifier) &&
423                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
424               (NextTok->is(tok::semi) &&
425                (!ExpectClassBody || LBraceStack.size() != 1)) ||
426               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
427         }
428         if (ProbablyBracedList) {
429           Tok->BlockKind = BK_BracedInit;
430           LBraceStack.back()->BlockKind = BK_BracedInit;
431         } else {
432           Tok->BlockKind = BK_Block;
433           LBraceStack.back()->BlockKind = BK_Block;
434         }
435       }
436       LBraceStack.pop_back();
437       break;
438     case tok::at:
439     case tok::semi:
440     case tok::kw_if:
441     case tok::kw_while:
442     case tok::kw_for:
443     case tok::kw_switch:
444     case tok::kw_try:
445     case tok::kw___try:
446       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
447         LBraceStack.back()->BlockKind = BK_Block;
448       break;
449     default:
450       break;
451     }
452     PrevTok = Tok;
453     Tok = NextTok;
454   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
455 
456   // Assume other blocks for all unclosed opening braces.
457   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
458     if (LBraceStack[i]->BlockKind == BK_Unknown)
459       LBraceStack[i]->BlockKind = BK_Block;
460   }
461 
462   FormatTok = Tokens->setPosition(StoredPosition);
463 }
464 
465 template <class T>
466 static inline void hash_combine(std::size_t &seed, const T &v) {
467   std::hash<T> hasher;
468   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
469 }
470 
471 size_t UnwrappedLineParser::computePPHash() const {
472   size_t h = 0;
473   for (const auto &i : PPStack) {
474     hash_combine(h, size_t(i.Kind));
475     hash_combine(h, i.Line);
476   }
477   return h;
478 }
479 
480 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
481                                      bool MunchSemi) {
482   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
483          "'{' or macro block token expected");
484   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
485   FormatTok->BlockKind = BK_Block;
486 
487   size_t PPStartHash = computePPHash();
488 
489   unsigned InitialLevel = Line->Level;
490   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
491 
492   if (MacroBlock && FormatTok->is(tok::l_paren))
493     parseParens();
494 
495   size_t NbPreprocessorDirectives =
496       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
497   addUnwrappedLine();
498   size_t OpeningLineIndex =
499       CurrentLines->empty()
500           ? (UnwrappedLine::kInvalidIndex)
501           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
502 
503   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
504                                           MustBeDeclaration);
505   if (AddLevel)
506     ++Line->Level;
507   parseLevel(/*HasOpeningBrace=*/true);
508 
509   if (eof())
510     return;
511 
512   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
513                  : !FormatTok->is(tok::r_brace)) {
514     Line->Level = InitialLevel;
515     FormatTok->BlockKind = BK_Block;
516     return;
517   }
518 
519   size_t PPEndHash = computePPHash();
520 
521   // Munch the closing brace.
522   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
523 
524   if (MacroBlock && FormatTok->is(tok::l_paren))
525     parseParens();
526 
527   if (MunchSemi && FormatTok->Tok.is(tok::semi))
528     nextToken();
529   Line->Level = InitialLevel;
530 
531   if (PPStartHash == PPEndHash) {
532     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
533     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
534       // Update the opening line to add the forward reference as well
535       (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
536           CurrentLines->size() - 1;
537     }
538   }
539 }
540 
541 static bool isGoogScope(const UnwrappedLine &Line) {
542   // FIXME: Closure-library specific stuff should not be hard-coded but be
543   // configurable.
544   if (Line.Tokens.size() < 4)
545     return false;
546   auto I = Line.Tokens.begin();
547   if (I->Tok->TokenText != "goog")
548     return false;
549   ++I;
550   if (I->Tok->isNot(tok::period))
551     return false;
552   ++I;
553   if (I->Tok->TokenText != "scope")
554     return false;
555   ++I;
556   return I->Tok->is(tok::l_paren);
557 }
558 
559 static bool isIIFE(const UnwrappedLine &Line,
560                    const AdditionalKeywords &Keywords) {
561   // Look for the start of an immediately invoked anonymous function.
562   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
563   // This is commonly done in JavaScript to create a new, anonymous scope.
564   // Example: (function() { ... })()
565   if (Line.Tokens.size() < 3)
566     return false;
567   auto I = Line.Tokens.begin();
568   if (I->Tok->isNot(tok::l_paren))
569     return false;
570   ++I;
571   if (I->Tok->isNot(Keywords.kw_function))
572     return false;
573   ++I;
574   return I->Tok->is(tok::l_paren);
575 }
576 
577 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
578                                    const FormatToken &InitialToken) {
579   if (InitialToken.is(tok::kw_namespace))
580     return Style.BraceWrapping.AfterNamespace;
581   if (InitialToken.is(tok::kw_class))
582     return Style.BraceWrapping.AfterClass;
583   if (InitialToken.is(tok::kw_union))
584     return Style.BraceWrapping.AfterUnion;
585   if (InitialToken.is(tok::kw_struct))
586     return Style.BraceWrapping.AfterStruct;
587   return false;
588 }
589 
590 void UnwrappedLineParser::parseChildBlock() {
591   FormatTok->BlockKind = BK_Block;
592   nextToken();
593   {
594     bool SkipIndent =
595         (Style.Language == FormatStyle::LK_JavaScript &&
596          (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
597     ScopedLineState LineState(*this);
598     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
599                                             /*MustBeDeclaration=*/false);
600     Line->Level += SkipIndent ? 0 : 1;
601     parseLevel(/*HasOpeningBrace=*/true);
602     flushComments(isOnNewLine(*FormatTok));
603     Line->Level -= SkipIndent ? 0 : 1;
604   }
605   nextToken();
606 }
607 
608 void UnwrappedLineParser::parsePPDirective() {
609   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
610   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
611   nextToken();
612 
613   if (!FormatTok->Tok.getIdentifierInfo()) {
614     parsePPUnknown();
615     return;
616   }
617 
618   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
619   case tok::pp_define:
620     parsePPDefine();
621     return;
622   case tok::pp_if:
623     parsePPIf(/*IfDef=*/false);
624     break;
625   case tok::pp_ifdef:
626   case tok::pp_ifndef:
627     parsePPIf(/*IfDef=*/true);
628     break;
629   case tok::pp_else:
630     parsePPElse();
631     break;
632   case tok::pp_elif:
633     parsePPElIf();
634     break;
635   case tok::pp_endif:
636     parsePPEndIf();
637     break;
638   default:
639     parsePPUnknown();
640     break;
641   }
642 }
643 
644 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
645   size_t Line = CurrentLines->size();
646   if (CurrentLines == &PreprocessorDirectives)
647     Line += Lines.size();
648 
649   if (Unreachable ||
650       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
651     PPStack.push_back({PP_Unreachable, Line});
652   else
653     PPStack.push_back({PP_Conditional, Line});
654 }
655 
656 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
657   ++PPBranchLevel;
658   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
659   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
660     PPLevelBranchIndex.push_back(0);
661     PPLevelBranchCount.push_back(0);
662   }
663   PPChainBranchIndex.push(0);
664   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
665   conditionalCompilationCondition(Unreachable || Skip);
666 }
667 
668 void UnwrappedLineParser::conditionalCompilationAlternative() {
669   if (!PPStack.empty())
670     PPStack.pop_back();
671   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
672   if (!PPChainBranchIndex.empty())
673     ++PPChainBranchIndex.top();
674   conditionalCompilationCondition(
675       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
676       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
677 }
678 
679 void UnwrappedLineParser::conditionalCompilationEnd() {
680   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
681   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
682     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
683       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
684     }
685   }
686   // Guard against #endif's without #if.
687   if (PPBranchLevel > -1)
688     --PPBranchLevel;
689   if (!PPChainBranchIndex.empty())
690     PPChainBranchIndex.pop();
691   if (!PPStack.empty())
692     PPStack.pop_back();
693 }
694 
695 void UnwrappedLineParser::parsePPIf(bool IfDef) {
696   bool IfNDef = FormatTok->is(tok::pp_ifndef);
697   nextToken();
698   bool Unreachable = false;
699   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
700     Unreachable = true;
701   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
702     Unreachable = true;
703   conditionalCompilationStart(Unreachable);
704   FormatToken *IfCondition = FormatTok;
705   // If there's a #ifndef on the first line, and the only lines before it are
706   // comments, it could be an include guard.
707   bool MaybeIncludeGuard = IfNDef;
708   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {
709     for (auto &Line : Lines) {
710       if (!Line.Tokens.front().Tok->is(tok::comment)) {
711         MaybeIncludeGuard = false;
712         IncludeGuardRejected = true;
713         break;
714       }
715     }
716   }
717   --PPBranchLevel;
718   parsePPUnknown();
719   ++PPBranchLevel;
720   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)
721     IfNdefCondition = IfCondition;
722 }
723 
724 void UnwrappedLineParser::parsePPElse() {
725   // If a potential include guard has an #else, it's not an include guard.
726   if (FoundIncludeGuardStart && PPBranchLevel == 0)
727     FoundIncludeGuardStart = false;
728   conditionalCompilationAlternative();
729   if (PPBranchLevel > -1)
730     --PPBranchLevel;
731   parsePPUnknown();
732   ++PPBranchLevel;
733 }
734 
735 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
736 
737 void UnwrappedLineParser::parsePPEndIf() {
738   conditionalCompilationEnd();
739   parsePPUnknown();
740   // If the #endif of a potential include guard is the last thing in the file,
741   // then we count it as a real include guard and subtract one from every
742   // preprocessor indent.
743   unsigned TokenPosition = Tokens->getPosition();
744   FormatToken *PeekNext = AllTokens[TokenPosition];
745   if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) &&
746       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
747     for (auto &Line : Lines)
748       if (Line.InPPDirective && Line.Level > 0)
749         --Line.Level;
750 }
751 
752 void UnwrappedLineParser::parsePPDefine() {
753   nextToken();
754 
755   if (FormatTok->Tok.getKind() != tok::identifier) {
756     parsePPUnknown();
757     return;
758   }
759   if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {
760     FoundIncludeGuardStart = true;
761     for (auto &Line : Lines) {
762       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
763         FoundIncludeGuardStart = false;
764         break;
765       }
766     }
767   }
768   IfNdefCondition = nullptr;
769   nextToken();
770   if (FormatTok->Tok.getKind() == tok::l_paren &&
771       FormatTok->WhitespaceRange.getBegin() ==
772           FormatTok->WhitespaceRange.getEnd()) {
773     parseParens();
774   }
775   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
776     Line->Level += PPBranchLevel + 1;
777   addUnwrappedLine();
778   ++Line->Level;
779 
780   // Errors during a preprocessor directive can only affect the layout of the
781   // preprocessor directive, and thus we ignore them. An alternative approach
782   // would be to use the same approach we use on the file level (no
783   // re-indentation if there was a structural error) within the macro
784   // definition.
785   parseFile();
786 }
787 
788 void UnwrappedLineParser::parsePPUnknown() {
789   do {
790     nextToken();
791   } while (!eof());
792   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
793     Line->Level += PPBranchLevel + 1;
794   addUnwrappedLine();
795   IfNdefCondition = nullptr;
796 }
797 
798 // Here we blacklist certain tokens that are not usually the first token in an
799 // unwrapped line. This is used in attempt to distinguish macro calls without
800 // trailing semicolons from other constructs split to several lines.
801 static bool tokenCanStartNewLine(const clang::Token &Tok) {
802   // Semicolon can be a null-statement, l_square can be a start of a macro or
803   // a C++11 attribute, but this doesn't seem to be common.
804   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
805          Tok.isNot(tok::l_square) &&
806          // Tokens that can only be used as binary operators and a part of
807          // overloaded operator names.
808          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
809          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
810          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
811          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
812          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
813          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
814          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
815          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
816          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
817          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
818          Tok.isNot(tok::lesslessequal) &&
819          // Colon is used in labels, base class lists, initializer lists,
820          // range-based for loops, ternary operator, but should never be the
821          // first token in an unwrapped line.
822          Tok.isNot(tok::colon) &&
823          // 'noexcept' is a trailing annotation.
824          Tok.isNot(tok::kw_noexcept);
825 }
826 
827 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
828                           const FormatToken *FormatTok) {
829   // FIXME: This returns true for C/C++ keywords like 'struct'.
830   return FormatTok->is(tok::identifier) &&
831          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
832           !FormatTok->isOneOf(
833               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
834               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
835               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
836               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
837               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
838               Keywords.kw_instanceof, Keywords.kw_interface,
839               Keywords.kw_throws, Keywords.kw_from));
840 }
841 
842 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
843                                  const FormatToken *FormatTok) {
844   return FormatTok->Tok.isLiteral() ||
845          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
846          mustBeJSIdent(Keywords, FormatTok);
847 }
848 
849 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
850 // when encountered after a value (see mustBeJSIdentOrValue).
851 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
852                            const FormatToken *FormatTok) {
853   return FormatTok->isOneOf(
854       tok::kw_return, Keywords.kw_yield,
855       // conditionals
856       tok::kw_if, tok::kw_else,
857       // loops
858       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
859       // switch/case
860       tok::kw_switch, tok::kw_case,
861       // exceptions
862       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
863       // declaration
864       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
865       Keywords.kw_async, Keywords.kw_function,
866       // import/export
867       Keywords.kw_import, tok::kw_export);
868 }
869 
870 // readTokenWithJavaScriptASI reads the next token and terminates the current
871 // line if JavaScript Automatic Semicolon Insertion must
872 // happen between the current token and the next token.
873 //
874 // This method is conservative - it cannot cover all edge cases of JavaScript,
875 // but only aims to correctly handle certain well known cases. It *must not*
876 // return true in speculative cases.
877 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
878   FormatToken *Previous = FormatTok;
879   readToken();
880   FormatToken *Next = FormatTok;
881 
882   bool IsOnSameLine =
883       CommentsBeforeNextToken.empty()
884           ? Next->NewlinesBefore == 0
885           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
886   if (IsOnSameLine)
887     return;
888 
889   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
890   bool PreviousStartsTemplateExpr =
891       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
892   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
893     // If the token before the previous one is an '@', the previous token is an
894     // annotation and can precede another identifier/value.
895     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
896     if (PrePrevious->is(tok::at))
897       return;
898   }
899   if (Next->is(tok::exclaim) && PreviousMustBeValue)
900     return addUnwrappedLine();
901   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
902   bool NextEndsTemplateExpr =
903       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
904   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
905       (PreviousMustBeValue ||
906        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
907                          tok::minusminus)))
908     return addUnwrappedLine();
909   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
910       isJSDeclOrStmt(Keywords, Next))
911     return addUnwrappedLine();
912 }
913 
914 void UnwrappedLineParser::parseStructuralElement() {
915   assert(!FormatTok->is(tok::l_brace));
916   if (Style.Language == FormatStyle::LK_TableGen &&
917       FormatTok->is(tok::pp_include)) {
918     nextToken();
919     if (FormatTok->is(tok::string_literal))
920       nextToken();
921     addUnwrappedLine();
922     return;
923   }
924   switch (FormatTok->Tok.getKind()) {
925   case tok::at:
926     nextToken();
927     if (FormatTok->Tok.is(tok::l_brace)) {
928       nextToken();
929       parseBracedList();
930       break;
931     }
932     switch (FormatTok->Tok.getObjCKeywordID()) {
933     case tok::objc_public:
934     case tok::objc_protected:
935     case tok::objc_package:
936     case tok::objc_private:
937       return parseAccessSpecifier();
938     case tok::objc_interface:
939     case tok::objc_implementation:
940       return parseObjCInterfaceOrImplementation();
941     case tok::objc_protocol:
942       return parseObjCProtocol();
943     case tok::objc_end:
944       return; // Handled by the caller.
945     case tok::objc_optional:
946     case tok::objc_required:
947       nextToken();
948       addUnwrappedLine();
949       return;
950     case tok::objc_autoreleasepool:
951       nextToken();
952       if (FormatTok->Tok.is(tok::l_brace)) {
953         if (Style.BraceWrapping.AfterObjCDeclaration)
954           addUnwrappedLine();
955         parseBlock(/*MustBeDeclaration=*/false);
956       }
957       addUnwrappedLine();
958       return;
959     case tok::objc_try:
960       // This branch isn't strictly necessary (the kw_try case below would
961       // do this too after the tok::at is parsed above).  But be explicit.
962       parseTryCatch();
963       return;
964     default:
965       break;
966     }
967     break;
968   case tok::kw_asm:
969     nextToken();
970     if (FormatTok->is(tok::l_brace)) {
971       FormatTok->Type = TT_InlineASMBrace;
972       nextToken();
973       while (FormatTok && FormatTok->isNot(tok::eof)) {
974         if (FormatTok->is(tok::r_brace)) {
975           FormatTok->Type = TT_InlineASMBrace;
976           nextToken();
977           addUnwrappedLine();
978           break;
979         }
980         FormatTok->Finalized = true;
981         nextToken();
982       }
983     }
984     break;
985   case tok::kw_namespace:
986     parseNamespace();
987     return;
988   case tok::kw_inline:
989     nextToken();
990     if (FormatTok->Tok.is(tok::kw_namespace)) {
991       parseNamespace();
992       return;
993     }
994     break;
995   case tok::kw_public:
996   case tok::kw_protected:
997   case tok::kw_private:
998     if (Style.Language == FormatStyle::LK_Java ||
999         Style.Language == FormatStyle::LK_JavaScript)
1000       nextToken();
1001     else
1002       parseAccessSpecifier();
1003     return;
1004   case tok::kw_if:
1005     parseIfThenElse();
1006     return;
1007   case tok::kw_for:
1008   case tok::kw_while:
1009     parseForOrWhileLoop();
1010     return;
1011   case tok::kw_do:
1012     parseDoWhile();
1013     return;
1014   case tok::kw_switch:
1015     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1016       // 'switch: string' field declaration.
1017       break;
1018     parseSwitch();
1019     return;
1020   case tok::kw_default:
1021     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1022       // 'default: string' field declaration.
1023       break;
1024     nextToken();
1025     parseLabel();
1026     return;
1027   case tok::kw_case:
1028     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1029       // 'case: string' field declaration.
1030       break;
1031     parseCaseLabel();
1032     return;
1033   case tok::kw_try:
1034   case tok::kw___try:
1035     parseTryCatch();
1036     return;
1037   case tok::kw_extern:
1038     nextToken();
1039     if (FormatTok->Tok.is(tok::string_literal)) {
1040       nextToken();
1041       if (FormatTok->Tok.is(tok::l_brace)) {
1042         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1043         addUnwrappedLine();
1044         return;
1045       }
1046     }
1047     break;
1048   case tok::kw_export:
1049     if (Style.Language == FormatStyle::LK_JavaScript) {
1050       parseJavaScriptEs6ImportExport();
1051       return;
1052     }
1053     break;
1054   case tok::identifier:
1055     if (FormatTok->is(TT_ForEachMacro)) {
1056       parseForOrWhileLoop();
1057       return;
1058     }
1059     if (FormatTok->is(TT_MacroBlockBegin)) {
1060       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1061                  /*MunchSemi=*/false);
1062       return;
1063     }
1064     if (FormatTok->is(Keywords.kw_import)) {
1065       if (Style.Language == FormatStyle::LK_JavaScript) {
1066         parseJavaScriptEs6ImportExport();
1067         return;
1068       }
1069       if (Style.Language == FormatStyle::LK_Proto) {
1070         nextToken();
1071         if (FormatTok->is(tok::kw_public))
1072           nextToken();
1073         if (!FormatTok->is(tok::string_literal))
1074           return;
1075         nextToken();
1076         if (FormatTok->is(tok::semi))
1077           nextToken();
1078         addUnwrappedLine();
1079         return;
1080       }
1081     }
1082     if (Style.isCpp() &&
1083         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1084                            Keywords.kw_slots, Keywords.kw_qslots)) {
1085       nextToken();
1086       if (FormatTok->is(tok::colon)) {
1087         nextToken();
1088         addUnwrappedLine();
1089         return;
1090       }
1091     }
1092     // In all other cases, parse the declaration.
1093     break;
1094   default:
1095     break;
1096   }
1097   do {
1098     const FormatToken *Previous = getPreviousToken();
1099     switch (FormatTok->Tok.getKind()) {
1100     case tok::at:
1101       nextToken();
1102       if (FormatTok->Tok.is(tok::l_brace)) {
1103         nextToken();
1104         parseBracedList();
1105       }
1106       break;
1107     case tok::kw_enum:
1108       // Ignore if this is part of "template <enum ...".
1109       if (Previous && Previous->is(tok::less)) {
1110         nextToken();
1111         break;
1112       }
1113 
1114       // parseEnum falls through and does not yet add an unwrapped line as an
1115       // enum definition can start a structural element.
1116       if (!parseEnum())
1117         break;
1118       // This only applies for C++.
1119       if (!Style.isCpp()) {
1120         addUnwrappedLine();
1121         return;
1122       }
1123       break;
1124     case tok::kw_typedef:
1125       nextToken();
1126       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1127                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1128         parseEnum();
1129       break;
1130     case tok::kw_struct:
1131     case tok::kw_union:
1132     case tok::kw_class:
1133       // parseRecord falls through and does not yet add an unwrapped line as a
1134       // record declaration or definition can start a structural element.
1135       parseRecord();
1136       // This does not apply for Java and JavaScript.
1137       if (Style.Language == FormatStyle::LK_Java ||
1138           Style.Language == FormatStyle::LK_JavaScript) {
1139         if (FormatTok->is(tok::semi))
1140           nextToken();
1141         addUnwrappedLine();
1142         return;
1143       }
1144       break;
1145     case tok::period:
1146       nextToken();
1147       // In Java, classes have an implicit static member "class".
1148       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1149           FormatTok->is(tok::kw_class))
1150         nextToken();
1151       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1152           FormatTok->Tok.getIdentifierInfo())
1153         // JavaScript only has pseudo keywords, all keywords are allowed to
1154         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1155         nextToken();
1156       break;
1157     case tok::semi:
1158       nextToken();
1159       addUnwrappedLine();
1160       return;
1161     case tok::r_brace:
1162       addUnwrappedLine();
1163       return;
1164     case tok::l_paren:
1165       parseParens();
1166       break;
1167     case tok::kw_operator:
1168       nextToken();
1169       if (FormatTok->isBinaryOperator())
1170         nextToken();
1171       break;
1172     case tok::caret:
1173       nextToken();
1174       if (FormatTok->Tok.isAnyIdentifier() ||
1175           FormatTok->isSimpleTypeSpecifier())
1176         nextToken();
1177       if (FormatTok->is(tok::l_paren))
1178         parseParens();
1179       if (FormatTok->is(tok::l_brace))
1180         parseChildBlock();
1181       break;
1182     case tok::l_brace:
1183       if (!tryToParseBracedList()) {
1184         // A block outside of parentheses must be the last part of a
1185         // structural element.
1186         // FIXME: Figure out cases where this is not true, and add projections
1187         // for them (the one we know is missing are lambdas).
1188         if (Style.BraceWrapping.AfterFunction)
1189           addUnwrappedLine();
1190         FormatTok->Type = TT_FunctionLBrace;
1191         parseBlock(/*MustBeDeclaration=*/false);
1192         addUnwrappedLine();
1193         return;
1194       }
1195       // Otherwise this was a braced init list, and the structural
1196       // element continues.
1197       break;
1198     case tok::kw_try:
1199       // We arrive here when parsing function-try blocks.
1200       parseTryCatch();
1201       return;
1202     case tok::identifier: {
1203       if (FormatTok->is(TT_MacroBlockEnd)) {
1204         addUnwrappedLine();
1205         return;
1206       }
1207 
1208       // Function declarations (as opposed to function expressions) are parsed
1209       // on their own unwrapped line by continuing this loop. Function
1210       // expressions (functions that are not on their own line) must not create
1211       // a new unwrapped line, so they are special cased below.
1212       size_t TokenCount = Line->Tokens.size();
1213       if (Style.Language == FormatStyle::LK_JavaScript &&
1214           FormatTok->is(Keywords.kw_function) &&
1215           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1216                                                      Keywords.kw_async)))) {
1217         tryToParseJSFunction();
1218         break;
1219       }
1220       if ((Style.Language == FormatStyle::LK_JavaScript ||
1221            Style.Language == FormatStyle::LK_Java) &&
1222           FormatTok->is(Keywords.kw_interface)) {
1223         if (Style.Language == FormatStyle::LK_JavaScript) {
1224           // In JavaScript/TypeScript, "interface" can be used as a standalone
1225           // identifier, e.g. in `var interface = 1;`. If "interface" is
1226           // followed by another identifier, it is very like to be an actual
1227           // interface declaration.
1228           unsigned StoredPosition = Tokens->getPosition();
1229           FormatToken *Next = Tokens->getNextToken();
1230           FormatTok = Tokens->setPosition(StoredPosition);
1231           if (Next && !mustBeJSIdent(Keywords, Next)) {
1232             nextToken();
1233             break;
1234           }
1235         }
1236         parseRecord();
1237         addUnwrappedLine();
1238         return;
1239       }
1240 
1241       // See if the following token should start a new unwrapped line.
1242       StringRef Text = FormatTok->TokenText;
1243       nextToken();
1244       if (Line->Tokens.size() == 1 &&
1245           // JS doesn't have macros, and within classes colons indicate fields,
1246           // not labels.
1247           Style.Language != FormatStyle::LK_JavaScript) {
1248         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1249           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1250           parseLabel();
1251           return;
1252         }
1253         // Recognize function-like macro usages without trailing semicolon as
1254         // well as free-standing macros like Q_OBJECT.
1255         bool FunctionLike = FormatTok->is(tok::l_paren);
1256         if (FunctionLike)
1257           parseParens();
1258 
1259         bool FollowedByNewline =
1260             CommentsBeforeNextToken.empty()
1261                 ? FormatTok->NewlinesBefore > 0
1262                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1263 
1264         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1265             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1266           addUnwrappedLine();
1267           return;
1268         }
1269       }
1270       break;
1271     }
1272     case tok::equal:
1273       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1274       // TT_JsFatArrow. The always start an expression or a child block if
1275       // followed by a curly.
1276       if (FormatTok->is(TT_JsFatArrow)) {
1277         nextToken();
1278         if (FormatTok->is(tok::l_brace))
1279           parseChildBlock();
1280         break;
1281       }
1282 
1283       nextToken();
1284       if (FormatTok->Tok.is(tok::l_brace)) {
1285         nextToken();
1286         parseBracedList();
1287       } else if (Style.Language == FormatStyle::LK_Proto &&
1288                FormatTok->Tok.is(tok::less)) {
1289         nextToken();
1290         parseBracedList(/*ContinueOnSemicolons=*/false,
1291                         /*ClosingBraceKind=*/tok::greater);
1292       }
1293       break;
1294     case tok::l_square:
1295       parseSquare();
1296       break;
1297     case tok::kw_new:
1298       parseNew();
1299       break;
1300     default:
1301       nextToken();
1302       break;
1303     }
1304   } while (!eof());
1305 }
1306 
1307 bool UnwrappedLineParser::tryToParseLambda() {
1308   if (!Style.isCpp()) {
1309     nextToken();
1310     return false;
1311   }
1312   const FormatToken* Previous = getPreviousToken();
1313   if (Previous &&
1314       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1315                          tok::kw_delete) ||
1316        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1317     nextToken();
1318     return false;
1319   }
1320   assert(FormatTok->is(tok::l_square));
1321   FormatToken &LSquare = *FormatTok;
1322   if (!tryToParseLambdaIntroducer())
1323     return false;
1324 
1325   while (FormatTok->isNot(tok::l_brace)) {
1326     if (FormatTok->isSimpleTypeSpecifier()) {
1327       nextToken();
1328       continue;
1329     }
1330     switch (FormatTok->Tok.getKind()) {
1331     case tok::l_brace:
1332       break;
1333     case tok::l_paren:
1334       parseParens();
1335       break;
1336     case tok::amp:
1337     case tok::star:
1338     case tok::kw_const:
1339     case tok::comma:
1340     case tok::less:
1341     case tok::greater:
1342     case tok::identifier:
1343     case tok::numeric_constant:
1344     case tok::coloncolon:
1345     case tok::kw_mutable:
1346       nextToken();
1347       break;
1348     case tok::arrow:
1349       FormatTok->Type = TT_LambdaArrow;
1350       nextToken();
1351       break;
1352     default:
1353       return true;
1354     }
1355   }
1356   LSquare.Type = TT_LambdaLSquare;
1357   parseChildBlock();
1358   return true;
1359 }
1360 
1361 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1362   nextToken();
1363   if (FormatTok->is(tok::equal)) {
1364     nextToken();
1365     if (FormatTok->is(tok::r_square)) {
1366       nextToken();
1367       return true;
1368     }
1369     if (FormatTok->isNot(tok::comma))
1370       return false;
1371     nextToken();
1372   } else if (FormatTok->is(tok::amp)) {
1373     nextToken();
1374     if (FormatTok->is(tok::r_square)) {
1375       nextToken();
1376       return true;
1377     }
1378     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1379       return false;
1380     }
1381     if (FormatTok->is(tok::comma))
1382       nextToken();
1383   } else if (FormatTok->is(tok::r_square)) {
1384     nextToken();
1385     return true;
1386   }
1387   do {
1388     if (FormatTok->is(tok::amp))
1389       nextToken();
1390     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1391       return false;
1392     nextToken();
1393     if (FormatTok->is(tok::ellipsis))
1394       nextToken();
1395     if (FormatTok->is(tok::comma)) {
1396       nextToken();
1397     } else if (FormatTok->is(tok::r_square)) {
1398       nextToken();
1399       return true;
1400     } else {
1401       return false;
1402     }
1403   } while (!eof());
1404   return false;
1405 }
1406 
1407 void UnwrappedLineParser::tryToParseJSFunction() {
1408   assert(FormatTok->is(Keywords.kw_function) ||
1409          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1410   if (FormatTok->is(Keywords.kw_async))
1411     nextToken();
1412   // Consume "function".
1413   nextToken();
1414 
1415   // Consume * (generator function). Treat it like C++'s overloaded operators.
1416   if (FormatTok->is(tok::star)) {
1417     FormatTok->Type = TT_OverloadedOperator;
1418     nextToken();
1419   }
1420 
1421   // Consume function name.
1422   if (FormatTok->is(tok::identifier))
1423     nextToken();
1424 
1425   if (FormatTok->isNot(tok::l_paren))
1426     return;
1427 
1428   // Parse formal parameter list.
1429   parseParens();
1430 
1431   if (FormatTok->is(tok::colon)) {
1432     // Parse a type definition.
1433     nextToken();
1434 
1435     // Eat the type declaration. For braced inline object types, balance braces,
1436     // otherwise just parse until finding an l_brace for the function body.
1437     if (FormatTok->is(tok::l_brace))
1438       tryToParseBracedList();
1439     else
1440       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1441         nextToken();
1442   }
1443 
1444   if (FormatTok->is(tok::semi))
1445     return;
1446 
1447   parseChildBlock();
1448 }
1449 
1450 bool UnwrappedLineParser::tryToParseBracedList() {
1451   if (FormatTok->BlockKind == BK_Unknown)
1452     calculateBraceTypes();
1453   assert(FormatTok->BlockKind != BK_Unknown);
1454   if (FormatTok->BlockKind == BK_Block)
1455     return false;
1456   nextToken();
1457   parseBracedList();
1458   return true;
1459 }
1460 
1461 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1462                                           tok::TokenKind ClosingBraceKind) {
1463   bool HasError = false;
1464 
1465   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1466   // replace this by using parseAssigmentExpression() inside.
1467   do {
1468     if (Style.Language == FormatStyle::LK_JavaScript) {
1469       if (FormatTok->is(Keywords.kw_function) ||
1470           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1471         tryToParseJSFunction();
1472         continue;
1473       }
1474       if (FormatTok->is(TT_JsFatArrow)) {
1475         nextToken();
1476         // Fat arrows can be followed by simple expressions or by child blocks
1477         // in curly braces.
1478         if (FormatTok->is(tok::l_brace)) {
1479           parseChildBlock();
1480           continue;
1481         }
1482       }
1483       if (FormatTok->is(tok::l_brace)) {
1484         // Could be a method inside of a braced list `{a() { return 1; }}`.
1485         if (tryToParseBracedList())
1486           continue;
1487         parseChildBlock();
1488       }
1489     }
1490     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1491       nextToken();
1492       return !HasError;
1493     }
1494     switch (FormatTok->Tok.getKind()) {
1495     case tok::caret:
1496       nextToken();
1497       if (FormatTok->is(tok::l_brace)) {
1498         parseChildBlock();
1499       }
1500       break;
1501     case tok::l_square:
1502       tryToParseLambda();
1503       break;
1504     case tok::l_paren:
1505       parseParens();
1506       // JavaScript can just have free standing methods and getters/setters in
1507       // object literals. Detect them by a "{" following ")".
1508       if (Style.Language == FormatStyle::LK_JavaScript) {
1509         if (FormatTok->is(tok::l_brace))
1510           parseChildBlock();
1511         break;
1512       }
1513       break;
1514     case tok::l_brace:
1515       // Assume there are no blocks inside a braced init list apart
1516       // from the ones we explicitly parse out (like lambdas).
1517       FormatTok->BlockKind = BK_BracedInit;
1518       nextToken();
1519       parseBracedList();
1520       break;
1521     case tok::less:
1522       if (Style.Language == FormatStyle::LK_Proto) {
1523         nextToken();
1524         parseBracedList(/*ContinueOnSemicolons=*/false,
1525                         /*ClosingBraceKind=*/tok::greater);
1526       } else {
1527         nextToken();
1528       }
1529       break;
1530     case tok::semi:
1531       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1532       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1533       // used for error recovery if we have otherwise determined that this is
1534       // a braced list.
1535       if (Style.Language == FormatStyle::LK_JavaScript) {
1536         nextToken();
1537         break;
1538       }
1539       HasError = true;
1540       if (!ContinueOnSemicolons)
1541         return !HasError;
1542       nextToken();
1543       break;
1544     case tok::comma:
1545       nextToken();
1546       break;
1547     default:
1548       nextToken();
1549       break;
1550     }
1551   } while (!eof());
1552   return false;
1553 }
1554 
1555 void UnwrappedLineParser::parseParens() {
1556   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1557   nextToken();
1558   do {
1559     switch (FormatTok->Tok.getKind()) {
1560     case tok::l_paren:
1561       parseParens();
1562       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1563         parseChildBlock();
1564       break;
1565     case tok::r_paren:
1566       nextToken();
1567       return;
1568     case tok::r_brace:
1569       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1570       return;
1571     case tok::l_square:
1572       tryToParseLambda();
1573       break;
1574     case tok::l_brace:
1575       if (!tryToParseBracedList())
1576         parseChildBlock();
1577       break;
1578     case tok::at:
1579       nextToken();
1580       if (FormatTok->Tok.is(tok::l_brace)) {
1581         nextToken();
1582         parseBracedList();
1583       }
1584       break;
1585     case tok::kw_class:
1586       if (Style.Language == FormatStyle::LK_JavaScript)
1587         parseRecord(/*ParseAsExpr=*/true);
1588       else
1589         nextToken();
1590       break;
1591     case tok::identifier:
1592       if (Style.Language == FormatStyle::LK_JavaScript &&
1593           (FormatTok->is(Keywords.kw_function) ||
1594            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1595         tryToParseJSFunction();
1596       else
1597         nextToken();
1598       break;
1599     default:
1600       nextToken();
1601       break;
1602     }
1603   } while (!eof());
1604 }
1605 
1606 void UnwrappedLineParser::parseSquare() {
1607   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1608   if (tryToParseLambda())
1609     return;
1610   do {
1611     switch (FormatTok->Tok.getKind()) {
1612     case tok::l_paren:
1613       parseParens();
1614       break;
1615     case tok::r_square:
1616       nextToken();
1617       return;
1618     case tok::r_brace:
1619       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1620       return;
1621     case tok::l_square:
1622       parseSquare();
1623       break;
1624     case tok::l_brace: {
1625       if (!tryToParseBracedList())
1626         parseChildBlock();
1627       break;
1628     }
1629     case tok::at:
1630       nextToken();
1631       if (FormatTok->Tok.is(tok::l_brace)) {
1632         nextToken();
1633         parseBracedList();
1634       }
1635       break;
1636     default:
1637       nextToken();
1638       break;
1639     }
1640   } while (!eof());
1641 }
1642 
1643 void UnwrappedLineParser::parseIfThenElse() {
1644   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1645   nextToken();
1646   if (FormatTok->Tok.is(tok::kw_constexpr))
1647     nextToken();
1648   if (FormatTok->Tok.is(tok::l_paren))
1649     parseParens();
1650   bool NeedsUnwrappedLine = false;
1651   if (FormatTok->Tok.is(tok::l_brace)) {
1652     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1653     parseBlock(/*MustBeDeclaration=*/false);
1654     if (Style.BraceWrapping.BeforeElse)
1655       addUnwrappedLine();
1656     else
1657       NeedsUnwrappedLine = true;
1658   } else {
1659     addUnwrappedLine();
1660     ++Line->Level;
1661     parseStructuralElement();
1662     --Line->Level;
1663   }
1664   if (FormatTok->Tok.is(tok::kw_else)) {
1665     nextToken();
1666     if (FormatTok->Tok.is(tok::l_brace)) {
1667       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1668       parseBlock(/*MustBeDeclaration=*/false);
1669       addUnwrappedLine();
1670     } else if (FormatTok->Tok.is(tok::kw_if)) {
1671       parseIfThenElse();
1672     } else {
1673       addUnwrappedLine();
1674       ++Line->Level;
1675       parseStructuralElement();
1676       if (FormatTok->is(tok::eof))
1677         addUnwrappedLine();
1678       --Line->Level;
1679     }
1680   } else if (NeedsUnwrappedLine) {
1681     addUnwrappedLine();
1682   }
1683 }
1684 
1685 void UnwrappedLineParser::parseTryCatch() {
1686   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1687   nextToken();
1688   bool NeedsUnwrappedLine = false;
1689   if (FormatTok->is(tok::colon)) {
1690     // We are in a function try block, what comes is an initializer list.
1691     nextToken();
1692     while (FormatTok->is(tok::identifier)) {
1693       nextToken();
1694       if (FormatTok->is(tok::l_paren))
1695         parseParens();
1696       if (FormatTok->is(tok::comma))
1697         nextToken();
1698     }
1699   }
1700   // Parse try with resource.
1701   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1702     parseParens();
1703   }
1704   if (FormatTok->is(tok::l_brace)) {
1705     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1706     parseBlock(/*MustBeDeclaration=*/false);
1707     if (Style.BraceWrapping.BeforeCatch) {
1708       addUnwrappedLine();
1709     } else {
1710       NeedsUnwrappedLine = true;
1711     }
1712   } else if (!FormatTok->is(tok::kw_catch)) {
1713     // The C++ standard requires a compound-statement after a try.
1714     // If there's none, we try to assume there's a structuralElement
1715     // and try to continue.
1716     addUnwrappedLine();
1717     ++Line->Level;
1718     parseStructuralElement();
1719     --Line->Level;
1720   }
1721   while (1) {
1722     if (FormatTok->is(tok::at))
1723       nextToken();
1724     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1725                              tok::kw___finally) ||
1726           ((Style.Language == FormatStyle::LK_Java ||
1727             Style.Language == FormatStyle::LK_JavaScript) &&
1728            FormatTok->is(Keywords.kw_finally)) ||
1729           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1730            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1731       break;
1732     nextToken();
1733     while (FormatTok->isNot(tok::l_brace)) {
1734       if (FormatTok->is(tok::l_paren)) {
1735         parseParens();
1736         continue;
1737       }
1738       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1739         return;
1740       nextToken();
1741     }
1742     NeedsUnwrappedLine = false;
1743     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1744     parseBlock(/*MustBeDeclaration=*/false);
1745     if (Style.BraceWrapping.BeforeCatch)
1746       addUnwrappedLine();
1747     else
1748       NeedsUnwrappedLine = true;
1749   }
1750   if (NeedsUnwrappedLine)
1751     addUnwrappedLine();
1752 }
1753 
1754 void UnwrappedLineParser::parseNamespace() {
1755   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1756 
1757   const FormatToken &InitialToken = *FormatTok;
1758   nextToken();
1759   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1760     nextToken();
1761   if (FormatTok->Tok.is(tok::l_brace)) {
1762     if (ShouldBreakBeforeBrace(Style, InitialToken))
1763       addUnwrappedLine();
1764 
1765     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1766                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1767                      DeclarationScopeStack.size() > 1);
1768     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1769     // Munch the semicolon after a namespace. This is more common than one would
1770     // think. Puttin the semicolon into its own line is very ugly.
1771     if (FormatTok->Tok.is(tok::semi))
1772       nextToken();
1773     addUnwrappedLine();
1774   }
1775   // FIXME: Add error handling.
1776 }
1777 
1778 void UnwrappedLineParser::parseNew() {
1779   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1780   nextToken();
1781   if (Style.Language != FormatStyle::LK_Java)
1782     return;
1783 
1784   // In Java, we can parse everything up to the parens, which aren't optional.
1785   do {
1786     // There should not be a ;, { or } before the new's open paren.
1787     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1788       return;
1789 
1790     // Consume the parens.
1791     if (FormatTok->is(tok::l_paren)) {
1792       parseParens();
1793 
1794       // If there is a class body of an anonymous class, consume that as child.
1795       if (FormatTok->is(tok::l_brace))
1796         parseChildBlock();
1797       return;
1798     }
1799     nextToken();
1800   } while (!eof());
1801 }
1802 
1803 void UnwrappedLineParser::parseForOrWhileLoop() {
1804   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1805          "'for', 'while' or foreach macro expected");
1806   nextToken();
1807   // JS' for await ( ...
1808   if (Style.Language == FormatStyle::LK_JavaScript &&
1809       FormatTok->is(Keywords.kw_await))
1810     nextToken();
1811   if (FormatTok->Tok.is(tok::l_paren))
1812     parseParens();
1813   if (FormatTok->Tok.is(tok::l_brace)) {
1814     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1815     parseBlock(/*MustBeDeclaration=*/false);
1816     addUnwrappedLine();
1817   } else {
1818     addUnwrappedLine();
1819     ++Line->Level;
1820     parseStructuralElement();
1821     --Line->Level;
1822   }
1823 }
1824 
1825 void UnwrappedLineParser::parseDoWhile() {
1826   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1827   nextToken();
1828   if (FormatTok->Tok.is(tok::l_brace)) {
1829     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1830     parseBlock(/*MustBeDeclaration=*/false);
1831     if (Style.BraceWrapping.IndentBraces)
1832       addUnwrappedLine();
1833   } else {
1834     addUnwrappedLine();
1835     ++Line->Level;
1836     parseStructuralElement();
1837     --Line->Level;
1838   }
1839 
1840   // FIXME: Add error handling.
1841   if (!FormatTok->Tok.is(tok::kw_while)) {
1842     addUnwrappedLine();
1843     return;
1844   }
1845 
1846   nextToken();
1847   parseStructuralElement();
1848 }
1849 
1850 void UnwrappedLineParser::parseLabel() {
1851   nextToken();
1852   unsigned OldLineLevel = Line->Level;
1853   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1854     --Line->Level;
1855   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1856     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1857     parseBlock(/*MustBeDeclaration=*/false);
1858     if (FormatTok->Tok.is(tok::kw_break)) {
1859       if (Style.BraceWrapping.AfterControlStatement)
1860         addUnwrappedLine();
1861       parseStructuralElement();
1862     }
1863     addUnwrappedLine();
1864   } else {
1865     if (FormatTok->is(tok::semi))
1866       nextToken();
1867     addUnwrappedLine();
1868   }
1869   Line->Level = OldLineLevel;
1870   if (FormatTok->isNot(tok::l_brace)) {
1871     parseStructuralElement();
1872     addUnwrappedLine();
1873   }
1874 }
1875 
1876 void UnwrappedLineParser::parseCaseLabel() {
1877   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1878   // FIXME: fix handling of complex expressions here.
1879   do {
1880     nextToken();
1881   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1882   parseLabel();
1883 }
1884 
1885 void UnwrappedLineParser::parseSwitch() {
1886   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1887   nextToken();
1888   if (FormatTok->Tok.is(tok::l_paren))
1889     parseParens();
1890   if (FormatTok->Tok.is(tok::l_brace)) {
1891     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1892     parseBlock(/*MustBeDeclaration=*/false);
1893     addUnwrappedLine();
1894   } else {
1895     addUnwrappedLine();
1896     ++Line->Level;
1897     parseStructuralElement();
1898     --Line->Level;
1899   }
1900 }
1901 
1902 void UnwrappedLineParser::parseAccessSpecifier() {
1903   nextToken();
1904   // Understand Qt's slots.
1905   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1906     nextToken();
1907   // Otherwise, we don't know what it is, and we'd better keep the next token.
1908   if (FormatTok->Tok.is(tok::colon))
1909     nextToken();
1910   addUnwrappedLine();
1911 }
1912 
1913 bool UnwrappedLineParser::parseEnum() {
1914   // Won't be 'enum' for NS_ENUMs.
1915   if (FormatTok->Tok.is(tok::kw_enum))
1916     nextToken();
1917 
1918   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1919   // declarations. An "enum" keyword followed by a colon would be a syntax
1920   // error and thus assume it is just an identifier.
1921   if (Style.Language == FormatStyle::LK_JavaScript &&
1922       FormatTok->isOneOf(tok::colon, tok::question))
1923     return false;
1924 
1925   // Eat up enum class ...
1926   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1927     nextToken();
1928 
1929   while (FormatTok->Tok.getIdentifierInfo() ||
1930          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1931                             tok::greater, tok::comma, tok::question)) {
1932     nextToken();
1933     // We can have macros or attributes in between 'enum' and the enum name.
1934     if (FormatTok->is(tok::l_paren))
1935       parseParens();
1936     if (FormatTok->is(tok::identifier)) {
1937       nextToken();
1938       // If there are two identifiers in a row, this is likely an elaborate
1939       // return type. In Java, this can be "implements", etc.
1940       if (Style.isCpp() && FormatTok->is(tok::identifier))
1941         return false;
1942     }
1943   }
1944 
1945   // Just a declaration or something is wrong.
1946   if (FormatTok->isNot(tok::l_brace))
1947     return true;
1948   FormatTok->BlockKind = BK_Block;
1949 
1950   if (Style.Language == FormatStyle::LK_Java) {
1951     // Java enums are different.
1952     parseJavaEnumBody();
1953     return true;
1954   }
1955   if (Style.Language == FormatStyle::LK_Proto) {
1956     parseBlock(/*MustBeDeclaration=*/true);
1957     return true;
1958   }
1959 
1960   // Parse enum body.
1961   nextToken();
1962   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1963   if (HasError) {
1964     if (FormatTok->is(tok::semi))
1965       nextToken();
1966     addUnwrappedLine();
1967   }
1968   return true;
1969 
1970   // There is no addUnwrappedLine() here so that we fall through to parsing a
1971   // structural element afterwards. Thus, in "enum A {} n, m;",
1972   // "} n, m;" will end up in one unwrapped line.
1973 }
1974 
1975 void UnwrappedLineParser::parseJavaEnumBody() {
1976   // Determine whether the enum is simple, i.e. does not have a semicolon or
1977   // constants with class bodies. Simple enums can be formatted like braced
1978   // lists, contracted to a single line, etc.
1979   unsigned StoredPosition = Tokens->getPosition();
1980   bool IsSimple = true;
1981   FormatToken *Tok = Tokens->getNextToken();
1982   while (Tok) {
1983     if (Tok->is(tok::r_brace))
1984       break;
1985     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1986       IsSimple = false;
1987       break;
1988     }
1989     // FIXME: This will also mark enums with braces in the arguments to enum
1990     // constants as "not simple". This is probably fine in practice, though.
1991     Tok = Tokens->getNextToken();
1992   }
1993   FormatTok = Tokens->setPosition(StoredPosition);
1994 
1995   if (IsSimple) {
1996     nextToken();
1997     parseBracedList();
1998     addUnwrappedLine();
1999     return;
2000   }
2001 
2002   // Parse the body of a more complex enum.
2003   // First add a line for everything up to the "{".
2004   nextToken();
2005   addUnwrappedLine();
2006   ++Line->Level;
2007 
2008   // Parse the enum constants.
2009   while (FormatTok) {
2010     if (FormatTok->is(tok::l_brace)) {
2011       // Parse the constant's class body.
2012       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2013                  /*MunchSemi=*/false);
2014     } else if (FormatTok->is(tok::l_paren)) {
2015       parseParens();
2016     } else if (FormatTok->is(tok::comma)) {
2017       nextToken();
2018       addUnwrappedLine();
2019     } else if (FormatTok->is(tok::semi)) {
2020       nextToken();
2021       addUnwrappedLine();
2022       break;
2023     } else if (FormatTok->is(tok::r_brace)) {
2024       addUnwrappedLine();
2025       break;
2026     } else {
2027       nextToken();
2028     }
2029   }
2030 
2031   // Parse the class body after the enum's ";" if any.
2032   parseLevel(/*HasOpeningBrace=*/true);
2033   nextToken();
2034   --Line->Level;
2035   addUnwrappedLine();
2036 }
2037 
2038 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2039   const FormatToken &InitialToken = *FormatTok;
2040   nextToken();
2041 
2042   // The actual identifier can be a nested name specifier, and in macros
2043   // it is often token-pasted.
2044   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2045                             tok::kw___attribute, tok::kw___declspec,
2046                             tok::kw_alignas) ||
2047          ((Style.Language == FormatStyle::LK_Java ||
2048            Style.Language == FormatStyle::LK_JavaScript) &&
2049           FormatTok->isOneOf(tok::period, tok::comma))) {
2050     if (Style.Language == FormatStyle::LK_JavaScript &&
2051         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2052       // JavaScript/TypeScript supports inline object types in
2053       // extends/implements positions:
2054       //     class Foo implements {bar: number} { }
2055       nextToken();
2056       if (FormatTok->is(tok::l_brace)) {
2057         tryToParseBracedList();
2058         continue;
2059       }
2060     }
2061     bool IsNonMacroIdentifier =
2062         FormatTok->is(tok::identifier) &&
2063         FormatTok->TokenText != FormatTok->TokenText.upper();
2064     nextToken();
2065     // We can have macros or attributes in between 'class' and the class name.
2066     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2067       parseParens();
2068   }
2069 
2070   // Note that parsing away template declarations here leads to incorrectly
2071   // accepting function declarations as record declarations.
2072   // In general, we cannot solve this problem. Consider:
2073   // class A<int> B() {}
2074   // which can be a function definition or a class definition when B() is a
2075   // macro. If we find enough real-world cases where this is a problem, we
2076   // can parse for the 'template' keyword in the beginning of the statement,
2077   // and thus rule out the record production in case there is no template
2078   // (this would still leave us with an ambiguity between template function
2079   // and class declarations).
2080   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2081     while (!eof()) {
2082       if (FormatTok->is(tok::l_brace)) {
2083         calculateBraceTypes(/*ExpectClassBody=*/true);
2084         if (!tryToParseBracedList())
2085           break;
2086       }
2087       if (FormatTok->Tok.is(tok::semi))
2088         return;
2089       nextToken();
2090     }
2091   }
2092   if (FormatTok->Tok.is(tok::l_brace)) {
2093     if (ParseAsExpr) {
2094       parseChildBlock();
2095     } else {
2096       if (ShouldBreakBeforeBrace(Style, InitialToken))
2097         addUnwrappedLine();
2098 
2099       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2100                  /*MunchSemi=*/false);
2101     }
2102   }
2103   // There is no addUnwrappedLine() here so that we fall through to parsing a
2104   // structural element afterwards. Thus, in "class A {} n, m;",
2105   // "} n, m;" will end up in one unwrapped line.
2106 }
2107 
2108 void UnwrappedLineParser::parseObjCProtocolList() {
2109   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2110   do
2111     nextToken();
2112   while (!eof() && FormatTok->Tok.isNot(tok::greater));
2113   nextToken(); // Skip '>'.
2114 }
2115 
2116 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2117   do {
2118     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2119       nextToken();
2120       addUnwrappedLine();
2121       break;
2122     }
2123     if (FormatTok->is(tok::l_brace)) {
2124       parseBlock(/*MustBeDeclaration=*/false);
2125       // In ObjC interfaces, nothing should be following the "}".
2126       addUnwrappedLine();
2127     } else if (FormatTok->is(tok::r_brace)) {
2128       // Ignore stray "}". parseStructuralElement doesn't consume them.
2129       nextToken();
2130       addUnwrappedLine();
2131     } else {
2132       parseStructuralElement();
2133     }
2134   } while (!eof());
2135 }
2136 
2137 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2138   nextToken();
2139   nextToken(); // interface name
2140 
2141   // @interface can be followed by either a base class, or a category.
2142   if (FormatTok->Tok.is(tok::colon)) {
2143     nextToken();
2144     nextToken(); // base class name
2145   } else if (FormatTok->Tok.is(tok::l_paren))
2146     // Skip category, if present.
2147     parseParens();
2148 
2149   if (FormatTok->Tok.is(tok::less))
2150     parseObjCProtocolList();
2151 
2152   if (FormatTok->Tok.is(tok::l_brace)) {
2153     if (Style.BraceWrapping.AfterObjCDeclaration)
2154       addUnwrappedLine();
2155     parseBlock(/*MustBeDeclaration=*/true);
2156   }
2157 
2158   // With instance variables, this puts '}' on its own line.  Without instance
2159   // variables, this ends the @interface line.
2160   addUnwrappedLine();
2161 
2162   parseObjCUntilAtEnd();
2163 }
2164 
2165 void UnwrappedLineParser::parseObjCProtocol() {
2166   nextToken();
2167   nextToken(); // protocol name
2168 
2169   if (FormatTok->Tok.is(tok::less))
2170     parseObjCProtocolList();
2171 
2172   // Check for protocol declaration.
2173   if (FormatTok->Tok.is(tok::semi)) {
2174     nextToken();
2175     return addUnwrappedLine();
2176   }
2177 
2178   addUnwrappedLine();
2179   parseObjCUntilAtEnd();
2180 }
2181 
2182 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2183   bool IsImport = FormatTok->is(Keywords.kw_import);
2184   assert(IsImport || FormatTok->is(tok::kw_export));
2185   nextToken();
2186 
2187   // Consume the "default" in "export default class/function".
2188   if (FormatTok->is(tok::kw_default))
2189     nextToken();
2190 
2191   // Consume "async function", "function" and "default function", so that these
2192   // get parsed as free-standing JS functions, i.e. do not require a trailing
2193   // semicolon.
2194   if (FormatTok->is(Keywords.kw_async))
2195     nextToken();
2196   if (FormatTok->is(Keywords.kw_function)) {
2197     nextToken();
2198     return;
2199   }
2200 
2201   // For imports, `export *`, `export {...}`, consume the rest of the line up
2202   // to the terminating `;`. For everything else, just return and continue
2203   // parsing the structural element, i.e. the declaration or expression for
2204   // `export default`.
2205   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2206       !FormatTok->isStringLiteral())
2207     return;
2208 
2209   while (!eof()) {
2210     if (FormatTok->is(tok::semi))
2211       return;
2212     if (Line->Tokens.size() == 0) {
2213       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2214       // import statement should terminate.
2215       return;
2216     }
2217     if (FormatTok->is(tok::l_brace)) {
2218       FormatTok->BlockKind = BK_Block;
2219       nextToken();
2220       parseBracedList();
2221     } else {
2222       nextToken();
2223     }
2224   }
2225 }
2226 
2227 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2228                                                  StringRef Prefix = "") {
2229   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2230                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2231   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2232                                                     E = Line.Tokens.end();
2233        I != E; ++I) {
2234     llvm::dbgs() << I->Tok->Tok.getName() << "["
2235                  << "T=" << I->Tok->Type
2236                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2237   }
2238   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2239                                                     E = Line.Tokens.end();
2240        I != E; ++I) {
2241     const UnwrappedLineNode &Node = *I;
2242     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2243              I = Node.Children.begin(),
2244              E = Node.Children.end();
2245          I != E; ++I) {
2246       printDebugInfo(*I, "\nChild: ");
2247     }
2248   }
2249   llvm::dbgs() << "\n";
2250 }
2251 
2252 void UnwrappedLineParser::addUnwrappedLine() {
2253   if (Line->Tokens.empty())
2254     return;
2255   DEBUG({
2256     if (CurrentLines == &Lines)
2257       printDebugInfo(*Line);
2258   });
2259   CurrentLines->push_back(std::move(*Line));
2260   Line->Tokens.clear();
2261   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2262   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2263     CurrentLines->append(
2264         std::make_move_iterator(PreprocessorDirectives.begin()),
2265         std::make_move_iterator(PreprocessorDirectives.end()));
2266     PreprocessorDirectives.clear();
2267   }
2268 }
2269 
2270 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2271 
2272 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2273   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2274          FormatTok.NewlinesBefore > 0;
2275 }
2276 
2277 // Checks if \p FormatTok is a line comment that continues the line comment
2278 // section on \p Line.
2279 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2280                                         const UnwrappedLine &Line,
2281                                         llvm::Regex &CommentPragmasRegex) {
2282   if (Line.Tokens.empty())
2283     return false;
2284 
2285   StringRef IndentContent = FormatTok.TokenText;
2286   if (FormatTok.TokenText.startswith("//") ||
2287       FormatTok.TokenText.startswith("/*"))
2288     IndentContent = FormatTok.TokenText.substr(2);
2289   if (CommentPragmasRegex.match(IndentContent))
2290     return false;
2291 
2292   // If Line starts with a line comment, then FormatTok continues the comment
2293   // section if its original column is greater or equal to the original start
2294   // column of the line.
2295   //
2296   // Define the min column token of a line as follows: if a line ends in '{' or
2297   // contains a '{' followed by a line comment, then the min column token is
2298   // that '{'. Otherwise, the min column token of the line is the first token of
2299   // the line.
2300   //
2301   // If Line starts with a token other than a line comment, then FormatTok
2302   // continues the comment section if its original column is greater than the
2303   // original start column of the min column token of the line.
2304   //
2305   // For example, the second line comment continues the first in these cases:
2306   //
2307   // // first line
2308   // // second line
2309   //
2310   // and:
2311   //
2312   // // first line
2313   //  // second line
2314   //
2315   // and:
2316   //
2317   // int i; // first line
2318   //  // second line
2319   //
2320   // and:
2321   //
2322   // do { // first line
2323   //      // second line
2324   //   int i;
2325   // } while (true);
2326   //
2327   // and:
2328   //
2329   // enum {
2330   //   a, // first line
2331   //    // second line
2332   //   b
2333   // };
2334   //
2335   // The second line comment doesn't continue the first in these cases:
2336   //
2337   //   // first line
2338   //  // second line
2339   //
2340   // and:
2341   //
2342   // int i; // first line
2343   // // second line
2344   //
2345   // and:
2346   //
2347   // do { // first line
2348   //   // second line
2349   //   int i;
2350   // } while (true);
2351   //
2352   // and:
2353   //
2354   // enum {
2355   //   a, // first line
2356   //   // second line
2357   // };
2358   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2359 
2360   // Scan for '{//'. If found, use the column of '{' as a min column for line
2361   // comment section continuation.
2362   const FormatToken *PreviousToken = nullptr;
2363   for (const UnwrappedLineNode &Node : Line.Tokens) {
2364     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2365         isLineComment(*Node.Tok)) {
2366       MinColumnToken = PreviousToken;
2367       break;
2368     }
2369     PreviousToken = Node.Tok;
2370 
2371     // Grab the last newline preceding a token in this unwrapped line.
2372     if (Node.Tok->NewlinesBefore > 0) {
2373       MinColumnToken = Node.Tok;
2374     }
2375   }
2376   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2377     MinColumnToken = PreviousToken;
2378   }
2379 
2380   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2381                               MinColumnToken);
2382 }
2383 
2384 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2385   bool JustComments = Line->Tokens.empty();
2386   for (SmallVectorImpl<FormatToken *>::const_iterator
2387            I = CommentsBeforeNextToken.begin(),
2388            E = CommentsBeforeNextToken.end();
2389        I != E; ++I) {
2390     // Line comments that belong to the same line comment section are put on the
2391     // same line since later we might want to reflow content between them.
2392     // Additional fine-grained breaking of line comment sections is controlled
2393     // by the class BreakableLineCommentSection in case it is desirable to keep
2394     // several line comment sections in the same unwrapped line.
2395     //
2396     // FIXME: Consider putting separate line comment sections as children to the
2397     // unwrapped line instead.
2398     (*I)->ContinuesLineCommentSection =
2399         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2400     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2401       addUnwrappedLine();
2402     pushToken(*I);
2403   }
2404   if (NewlineBeforeNext && JustComments)
2405     addUnwrappedLine();
2406   CommentsBeforeNextToken.clear();
2407 }
2408 
2409 void UnwrappedLineParser::nextToken(int LevelDifference) {
2410   if (eof())
2411     return;
2412   flushComments(isOnNewLine(*FormatTok));
2413   pushToken(FormatTok);
2414   if (Style.Language != FormatStyle::LK_JavaScript)
2415     readToken(LevelDifference);
2416   else
2417     readTokenWithJavaScriptASI();
2418 }
2419 
2420 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2421   // FIXME: This is a dirty way to access the previous token. Find a better
2422   // solution.
2423   if (!Line || Line->Tokens.empty())
2424     return nullptr;
2425   return Line->Tokens.back().Tok;
2426 }
2427 
2428 void UnwrappedLineParser::distributeComments(
2429     const SmallVectorImpl<FormatToken *> &Comments,
2430     const FormatToken *NextTok) {
2431   // Whether or not a line comment token continues a line is controlled by
2432   // the method continuesLineCommentSection, with the following caveat:
2433   //
2434   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2435   // that each comment line from the trail is aligned with the next token, if
2436   // the next token exists. If a trail exists, the beginning of the maximal
2437   // trail is marked as a start of a new comment section.
2438   //
2439   // For example in this code:
2440   //
2441   // int a; // line about a
2442   //   // line 1 about b
2443   //   // line 2 about b
2444   //   int b;
2445   //
2446   // the two lines about b form a maximal trail, so there are two sections, the
2447   // first one consisting of the single comment "// line about a" and the
2448   // second one consisting of the next two comments.
2449   if (Comments.empty())
2450     return;
2451   bool ShouldPushCommentsInCurrentLine = true;
2452   bool HasTrailAlignedWithNextToken = false;
2453   unsigned StartOfTrailAlignedWithNextToken = 0;
2454   if (NextTok) {
2455     // We are skipping the first element intentionally.
2456     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2457       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2458         HasTrailAlignedWithNextToken = true;
2459         StartOfTrailAlignedWithNextToken = i;
2460       }
2461     }
2462   }
2463   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2464     FormatToken *FormatTok = Comments[i];
2465     if (HasTrailAlignedWithNextToken &&
2466         i == StartOfTrailAlignedWithNextToken) {
2467       FormatTok->ContinuesLineCommentSection = false;
2468     } else {
2469       FormatTok->ContinuesLineCommentSection =
2470           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2471     }
2472     if (!FormatTok->ContinuesLineCommentSection &&
2473         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2474       ShouldPushCommentsInCurrentLine = false;
2475     }
2476     if (ShouldPushCommentsInCurrentLine) {
2477       pushToken(FormatTok);
2478     } else {
2479       CommentsBeforeNextToken.push_back(FormatTok);
2480     }
2481   }
2482 }
2483 
2484 void UnwrappedLineParser::readToken(int LevelDifference) {
2485   SmallVector<FormatToken *, 1> Comments;
2486   do {
2487     FormatTok = Tokens->getNextToken();
2488     assert(FormatTok);
2489     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2490            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2491       distributeComments(Comments, FormatTok);
2492       Comments.clear();
2493       // If there is an unfinished unwrapped line, we flush the preprocessor
2494       // directives only after that unwrapped line was finished later.
2495       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2496       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2497       assert((LevelDifference >= 0 ||
2498               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2499              "LevelDifference makes Line->Level negative");
2500       Line->Level += LevelDifference;
2501       // Comments stored before the preprocessor directive need to be output
2502       // before the preprocessor directive, at the same level as the
2503       // preprocessor directive, as we consider them to apply to the directive.
2504       flushComments(isOnNewLine(*FormatTok));
2505       parsePPDirective();
2506     }
2507     while (FormatTok->Type == TT_ConflictStart ||
2508            FormatTok->Type == TT_ConflictEnd ||
2509            FormatTok->Type == TT_ConflictAlternative) {
2510       if (FormatTok->Type == TT_ConflictStart) {
2511         conditionalCompilationStart(/*Unreachable=*/false);
2512       } else if (FormatTok->Type == TT_ConflictAlternative) {
2513         conditionalCompilationAlternative();
2514       } else if (FormatTok->Type == TT_ConflictEnd) {
2515         conditionalCompilationEnd();
2516       }
2517       FormatTok = Tokens->getNextToken();
2518       FormatTok->MustBreakBefore = true;
2519     }
2520 
2521     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2522         !Line->InPPDirective) {
2523       continue;
2524     }
2525 
2526     if (!FormatTok->Tok.is(tok::comment)) {
2527       distributeComments(Comments, FormatTok);
2528       Comments.clear();
2529       return;
2530     }
2531 
2532     Comments.push_back(FormatTok);
2533   } while (!eof());
2534 
2535   distributeComments(Comments, nullptr);
2536   Comments.clear();
2537 }
2538 
2539 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2540   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2541   if (MustBreakBeforeNextToken) {
2542     Line->Tokens.back().Tok->MustBreakBefore = true;
2543     MustBreakBeforeNextToken = false;
2544   }
2545 }
2546 
2547 } // end namespace format
2548 } // end namespace clang
2549