1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BraceWrapping.AfterControlStatement)
158       Parser->addUnwrappedLine();
159     if (Style.BraceWrapping.IndentBraces)
160       ++LineLevel;
161   }
162   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
163 
164 private:
165   unsigned &LineLevel;
166   unsigned OldLineLevel;
167 };
168 
169 namespace {
170 
171 class IndexedTokenSource : public FormatTokenSource {
172 public:
173   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
174       : Tokens(Tokens), Position(-1) {}
175 
176   FormatToken *getNextToken() override {
177     ++Position;
178     return Tokens[Position];
179   }
180 
181   unsigned getPosition() override {
182     assert(Position >= 0);
183     return Position;
184   }
185 
186   FormatToken *setPosition(unsigned P) override {
187     Position = P;
188     return Tokens[Position];
189   }
190 
191   void reset() { Position = -1; }
192 
193 private:
194   ArrayRef<FormatToken *> Tokens;
195   int Position;
196 };
197 
198 } // end anonymous namespace
199 
200 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
201                                          const AdditionalKeywords &Keywords,
202                                          ArrayRef<FormatToken *> Tokens,
203                                          UnwrappedLineConsumer &Callback)
204     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
205       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
206       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
207       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
208 
209 void UnwrappedLineParser::reset() {
210   PPBranchLevel = -1;
211   Line.reset(new UnwrappedLine);
212   CommentsBeforeNextToken.clear();
213   FormatTok = nullptr;
214   MustBreakBeforeNextToken = false;
215   PreprocessorDirectives.clear();
216   CurrentLines = &Lines;
217   DeclarationScopeStack.clear();
218   PPStack.clear();
219 }
220 
221 void UnwrappedLineParser::parse() {
222   IndexedTokenSource TokenSource(AllTokens);
223   do {
224     DEBUG(llvm::dbgs() << "----\n");
225     reset();
226     Tokens = &TokenSource;
227     TokenSource.reset();
228 
229     readToken();
230     parseFile();
231     // Create line with eof token.
232     pushToken(FormatTok);
233     addUnwrappedLine();
234 
235     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
236                                                   E = Lines.end();
237          I != E; ++I) {
238       Callback.consumeUnwrappedLine(*I);
239     }
240     Callback.finishRun();
241     Lines.clear();
242     while (!PPLevelBranchIndex.empty() &&
243            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
244       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
245       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
246     }
247     if (!PPLevelBranchIndex.empty()) {
248       ++PPLevelBranchIndex.back();
249       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
250       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
251     }
252   } while (!PPLevelBranchIndex.empty());
253 }
254 
255 void UnwrappedLineParser::parseFile() {
256   // The top-level context in a file always has declarations, except for pre-
257   // processor directives and JavaScript files.
258   bool MustBeDeclaration =
259       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
260   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
261                                           MustBeDeclaration);
262   parseLevel(/*HasOpeningBrace=*/false);
263   // Make sure to format the remaining tokens.
264   flushComments(true);
265   addUnwrappedLine();
266 }
267 
268 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
269   bool SwitchLabelEncountered = false;
270   do {
271     tok::TokenKind kind = FormatTok->Tok.getKind();
272     if (FormatTok->Type == TT_MacroBlockBegin) {
273       kind = tok::l_brace;
274     } else if (FormatTok->Type == TT_MacroBlockEnd) {
275       kind = tok::r_brace;
276     }
277 
278     switch (kind) {
279     case tok::comment:
280       nextToken();
281       addUnwrappedLine();
282       break;
283     case tok::l_brace:
284       // FIXME: Add parameter whether this can happen - if this happens, we must
285       // be in a non-declaration context.
286       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
287         continue;
288       parseBlock(/*MustBeDeclaration=*/false);
289       addUnwrappedLine();
290       break;
291     case tok::r_brace:
292       if (HasOpeningBrace)
293         return;
294       nextToken();
295       addUnwrappedLine();
296       break;
297     case tok::kw_default:
298     case tok::kw_case:
299       if (!SwitchLabelEncountered &&
300           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
301         ++Line->Level;
302       SwitchLabelEncountered = true;
303       parseStructuralElement();
304       break;
305     default:
306       parseStructuralElement();
307       break;
308     }
309   } while (!eof());
310 }
311 
312 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
313   // We'll parse forward through the tokens until we hit
314   // a closing brace or eof - note that getNextToken() will
315   // parse macros, so this will magically work inside macro
316   // definitions, too.
317   unsigned StoredPosition = Tokens->getPosition();
318   FormatToken *Tok = FormatTok;
319   const FormatToken *PrevTok = getPreviousToken();
320   // Keep a stack of positions of lbrace tokens. We will
321   // update information about whether an lbrace starts a
322   // braced init list or a different block during the loop.
323   SmallVector<FormatToken *, 8> LBraceStack;
324   assert(Tok->Tok.is(tok::l_brace));
325   do {
326     // Get next non-comment token.
327     FormatToken *NextTok;
328     unsigned ReadTokens = 0;
329     do {
330       NextTok = Tokens->getNextToken();
331       ++ReadTokens;
332     } while (NextTok->is(tok::comment));
333 
334     switch (Tok->Tok.getKind()) {
335     case tok::l_brace:
336       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
337           PrevTok->is(tok::colon))
338         // A colon indicates this code is in a type, or a braced list following
339         // a label in an object literal ({a: {b: 1}}).
340         // The code below could be confused by semicolons between the individual
341         // members in a type member list, which would normally trigger BK_Block.
342         // In both cases, this must be parsed as an inline braced init.
343         Tok->BlockKind = BK_BracedInit;
344       else
345         Tok->BlockKind = BK_Unknown;
346       LBraceStack.push_back(Tok);
347       break;
348     case tok::r_brace:
349       if (LBraceStack.empty())
350         break;
351       if (LBraceStack.back()->BlockKind == BK_Unknown) {
352         bool ProbablyBracedList = false;
353         if (Style.Language == FormatStyle::LK_Proto) {
354           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
355         } else {
356           // Using OriginalColumn to distinguish between ObjC methods and
357           // binary operators is a bit hacky.
358           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
359                                   NextTok->OriginalColumn == 0;
360 
361           // If there is a comma, semicolon or right paren after the closing
362           // brace, we assume this is a braced initializer list.  Note that
363           // regardless how we mark inner braces here, we will overwrite the
364           // BlockKind later if we parse a braced list (where all blocks
365           // inside are by default braced lists), or when we explicitly detect
366           // blocks (for example while parsing lambdas).
367           ProbablyBracedList =
368               (Style.Language == FormatStyle::LK_JavaScript &&
369                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
370                                 Keywords.kw_as)) ||
371               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
372               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
373                                tok::r_paren, tok::r_square, tok::l_brace,
374                                tok::l_square, tok::ellipsis) ||
375               (NextTok->is(tok::identifier) &&
376                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
377               (NextTok->is(tok::semi) &&
378                (!ExpectClassBody || LBraceStack.size() != 1)) ||
379               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
380         }
381         if (ProbablyBracedList) {
382           Tok->BlockKind = BK_BracedInit;
383           LBraceStack.back()->BlockKind = BK_BracedInit;
384         } else {
385           Tok->BlockKind = BK_Block;
386           LBraceStack.back()->BlockKind = BK_Block;
387         }
388       }
389       LBraceStack.pop_back();
390       break;
391     case tok::at:
392     case tok::semi:
393     case tok::kw_if:
394     case tok::kw_while:
395     case tok::kw_for:
396     case tok::kw_switch:
397     case tok::kw_try:
398     case tok::kw___try:
399       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
400         LBraceStack.back()->BlockKind = BK_Block;
401       break;
402     default:
403       break;
404     }
405     PrevTok = Tok;
406     Tok = NextTok;
407   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
408 
409   // Assume other blocks for all unclosed opening braces.
410   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
411     if (LBraceStack[i]->BlockKind == BK_Unknown)
412       LBraceStack[i]->BlockKind = BK_Block;
413   }
414 
415   FormatTok = Tokens->setPosition(StoredPosition);
416 }
417 
418 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
419                                      bool MunchSemi) {
420   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
421          "'{' or macro block token expected");
422   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
423   FormatTok->BlockKind = BK_Block;
424 
425   unsigned InitialLevel = Line->Level;
426   nextToken();
427 
428   if (MacroBlock && FormatTok->is(tok::l_paren))
429     parseParens();
430 
431   addUnwrappedLine();
432   size_t OpeningLineIndex =
433       Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1);
434 
435   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
436                                           MustBeDeclaration);
437   if (AddLevel)
438     ++Line->Level;
439   parseLevel(/*HasOpeningBrace=*/true);
440 
441   if (eof())
442     return;
443 
444   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
445                  : !FormatTok->is(tok::r_brace)) {
446     Line->Level = InitialLevel;
447     FormatTok->BlockKind = BK_Block;
448     return;
449   }
450 
451   nextToken(); // Munch the closing brace.
452 
453   if (MacroBlock && FormatTok->is(tok::l_paren))
454     parseParens();
455 
456   if (MunchSemi && FormatTok->Tok.is(tok::semi))
457     nextToken();
458   Line->Level = InitialLevel;
459   Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
460 }
461 
462 static bool isGoogScope(const UnwrappedLine &Line) {
463   // FIXME: Closure-library specific stuff should not be hard-coded but be
464   // configurable.
465   if (Line.Tokens.size() < 4)
466     return false;
467   auto I = Line.Tokens.begin();
468   if (I->Tok->TokenText != "goog")
469     return false;
470   ++I;
471   if (I->Tok->isNot(tok::period))
472     return false;
473   ++I;
474   if (I->Tok->TokenText != "scope")
475     return false;
476   ++I;
477   return I->Tok->is(tok::l_paren);
478 }
479 
480 static bool isIIFE(const UnwrappedLine &Line,
481                    const AdditionalKeywords &Keywords) {
482   // Look for the start of an immediately invoked anonymous function.
483   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
484   // This is commonly done in JavaScript to create a new, anonymous scope.
485   // Example: (function() { ... })()
486   if (Line.Tokens.size() < 3)
487     return false;
488   auto I = Line.Tokens.begin();
489   if (I->Tok->isNot(tok::l_paren))
490     return false;
491   ++I;
492   if (I->Tok->isNot(Keywords.kw_function))
493     return false;
494   ++I;
495   return I->Tok->is(tok::l_paren);
496 }
497 
498 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
499                                    const FormatToken &InitialToken) {
500   if (InitialToken.is(tok::kw_namespace))
501     return Style.BraceWrapping.AfterNamespace;
502   if (InitialToken.is(tok::kw_class))
503     return Style.BraceWrapping.AfterClass;
504   if (InitialToken.is(tok::kw_union))
505     return Style.BraceWrapping.AfterUnion;
506   if (InitialToken.is(tok::kw_struct))
507     return Style.BraceWrapping.AfterStruct;
508   return false;
509 }
510 
511 void UnwrappedLineParser::parseChildBlock() {
512   FormatTok->BlockKind = BK_Block;
513   nextToken();
514   {
515     bool SkipIndent =
516         (Style.Language == FormatStyle::LK_JavaScript &&
517          (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
518     ScopedLineState LineState(*this);
519     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
520                                             /*MustBeDeclaration=*/false);
521     Line->Level += SkipIndent ? 0 : 1;
522     parseLevel(/*HasOpeningBrace=*/true);
523     flushComments(isOnNewLine(*FormatTok));
524     Line->Level -= SkipIndent ? 0 : 1;
525   }
526   nextToken();
527 }
528 
529 void UnwrappedLineParser::parsePPDirective() {
530   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
531   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
532   nextToken();
533 
534   if (!FormatTok->Tok.getIdentifierInfo()) {
535     parsePPUnknown();
536     return;
537   }
538 
539   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
540   case tok::pp_define:
541     parsePPDefine();
542     return;
543   case tok::pp_if:
544     parsePPIf(/*IfDef=*/false);
545     break;
546   case tok::pp_ifdef:
547   case tok::pp_ifndef:
548     parsePPIf(/*IfDef=*/true);
549     break;
550   case tok::pp_else:
551     parsePPElse();
552     break;
553   case tok::pp_elif:
554     parsePPElIf();
555     break;
556   case tok::pp_endif:
557     parsePPEndIf();
558     break;
559   default:
560     parsePPUnknown();
561     break;
562   }
563 }
564 
565 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
566   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
567     PPStack.push_back(PP_Unreachable);
568   else
569     PPStack.push_back(PP_Conditional);
570 }
571 
572 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
573   ++PPBranchLevel;
574   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
575   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
576     PPLevelBranchIndex.push_back(0);
577     PPLevelBranchCount.push_back(0);
578   }
579   PPChainBranchIndex.push(0);
580   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
581   conditionalCompilationCondition(Unreachable || Skip);
582 }
583 
584 void UnwrappedLineParser::conditionalCompilationAlternative() {
585   if (!PPStack.empty())
586     PPStack.pop_back();
587   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
588   if (!PPChainBranchIndex.empty())
589     ++PPChainBranchIndex.top();
590   conditionalCompilationCondition(
591       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
592       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
593 }
594 
595 void UnwrappedLineParser::conditionalCompilationEnd() {
596   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
597   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
598     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
599       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
600     }
601   }
602   // Guard against #endif's without #if.
603   if (PPBranchLevel > 0)
604     --PPBranchLevel;
605   if (!PPChainBranchIndex.empty())
606     PPChainBranchIndex.pop();
607   if (!PPStack.empty())
608     PPStack.pop_back();
609 }
610 
611 void UnwrappedLineParser::parsePPIf(bool IfDef) {
612   bool IfNDef = FormatTok->is(tok::pp_ifndef);
613   nextToken();
614   bool Unreachable = false;
615   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
616     Unreachable = true;
617   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
618     Unreachable = true;
619   conditionalCompilationStart(Unreachable);
620   parsePPUnknown();
621 }
622 
623 void UnwrappedLineParser::parsePPElse() {
624   conditionalCompilationAlternative();
625   parsePPUnknown();
626 }
627 
628 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
629 
630 void UnwrappedLineParser::parsePPEndIf() {
631   conditionalCompilationEnd();
632   parsePPUnknown();
633 }
634 
635 void UnwrappedLineParser::parsePPDefine() {
636   nextToken();
637 
638   if (FormatTok->Tok.getKind() != tok::identifier) {
639     parsePPUnknown();
640     return;
641   }
642   nextToken();
643   if (FormatTok->Tok.getKind() == tok::l_paren &&
644       FormatTok->WhitespaceRange.getBegin() ==
645           FormatTok->WhitespaceRange.getEnd()) {
646     parseParens();
647   }
648   addUnwrappedLine();
649   Line->Level = 1;
650 
651   // Errors during a preprocessor directive can only affect the layout of the
652   // preprocessor directive, and thus we ignore them. An alternative approach
653   // would be to use the same approach we use on the file level (no
654   // re-indentation if there was a structural error) within the macro
655   // definition.
656   parseFile();
657 }
658 
659 void UnwrappedLineParser::parsePPUnknown() {
660   do {
661     nextToken();
662   } while (!eof());
663   addUnwrappedLine();
664 }
665 
666 // Here we blacklist certain tokens that are not usually the first token in an
667 // unwrapped line. This is used in attempt to distinguish macro calls without
668 // trailing semicolons from other constructs split to several lines.
669 static bool tokenCanStartNewLine(const clang::Token &Tok) {
670   // Semicolon can be a null-statement, l_square can be a start of a macro or
671   // a C++11 attribute, but this doesn't seem to be common.
672   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
673          Tok.isNot(tok::l_square) &&
674          // Tokens that can only be used as binary operators and a part of
675          // overloaded operator names.
676          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
677          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
678          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
679          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
680          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
681          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
682          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
683          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
684          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
685          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
686          Tok.isNot(tok::lesslessequal) &&
687          // Colon is used in labels, base class lists, initializer lists,
688          // range-based for loops, ternary operator, but should never be the
689          // first token in an unwrapped line.
690          Tok.isNot(tok::colon) &&
691          // 'noexcept' is a trailing annotation.
692          Tok.isNot(tok::kw_noexcept);
693 }
694 
695 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
696                           const FormatToken *FormatTok) {
697   // FIXME: This returns true for C/C++ keywords like 'struct'.
698   return FormatTok->is(tok::identifier) &&
699          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
700           !FormatTok->isOneOf(
701               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
702               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
703               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
704               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
705               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
706               Keywords.kw_instanceof, Keywords.kw_interface,
707               Keywords.kw_throws));
708 }
709 
710 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
711                                  const FormatToken *FormatTok) {
712   return FormatTok->Tok.isLiteral() ||
713          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
714          mustBeJSIdent(Keywords, FormatTok);
715 }
716 
717 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
718 // when encountered after a value (see mustBeJSIdentOrValue).
719 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
720                            const FormatToken *FormatTok) {
721   return FormatTok->isOneOf(
722       tok::kw_return, Keywords.kw_yield,
723       // conditionals
724       tok::kw_if, tok::kw_else,
725       // loops
726       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
727       // switch/case
728       tok::kw_switch, tok::kw_case,
729       // exceptions
730       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
731       // declaration
732       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
733       Keywords.kw_async, Keywords.kw_function,
734       // import/export
735       Keywords.kw_import, tok::kw_export);
736 }
737 
738 // readTokenWithJavaScriptASI reads the next token and terminates the current
739 // line if JavaScript Automatic Semicolon Insertion must
740 // happen between the current token and the next token.
741 //
742 // This method is conservative - it cannot cover all edge cases of JavaScript,
743 // but only aims to correctly handle certain well known cases. It *must not*
744 // return true in speculative cases.
745 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
746   FormatToken *Previous = FormatTok;
747   readToken();
748   FormatToken *Next = FormatTok;
749 
750   bool IsOnSameLine =
751       CommentsBeforeNextToken.empty()
752           ? Next->NewlinesBefore == 0
753           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
754   if (IsOnSameLine)
755     return;
756 
757   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
758   bool PreviousStartsTemplateExpr =
759       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
760   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
761     // If the token before the previous one is an '@', the previous token is an
762     // annotation and can precede another identifier/value.
763     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
764     if (PrePrevious->is(tok::at))
765       return;
766   }
767   if (Next->is(tok::exclaim) && PreviousMustBeValue)
768     return addUnwrappedLine();
769   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
770   bool NextEndsTemplateExpr =
771       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
772   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
773       (PreviousMustBeValue ||
774        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
775                          tok::minusminus)))
776     return addUnwrappedLine();
777   if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
778     return addUnwrappedLine();
779 }
780 
781 void UnwrappedLineParser::parseStructuralElement() {
782   assert(!FormatTok->is(tok::l_brace));
783   if (Style.Language == FormatStyle::LK_TableGen &&
784       FormatTok->is(tok::pp_include)) {
785     nextToken();
786     if (FormatTok->is(tok::string_literal))
787       nextToken();
788     addUnwrappedLine();
789     return;
790   }
791   switch (FormatTok->Tok.getKind()) {
792   case tok::at:
793     nextToken();
794     if (FormatTok->Tok.is(tok::l_brace)) {
795       parseBracedList();
796       break;
797     }
798     switch (FormatTok->Tok.getObjCKeywordID()) {
799     case tok::objc_public:
800     case tok::objc_protected:
801     case tok::objc_package:
802     case tok::objc_private:
803       return parseAccessSpecifier();
804     case tok::objc_interface:
805     case tok::objc_implementation:
806       return parseObjCInterfaceOrImplementation();
807     case tok::objc_protocol:
808       return parseObjCProtocol();
809     case tok::objc_end:
810       return; // Handled by the caller.
811     case tok::objc_optional:
812     case tok::objc_required:
813       nextToken();
814       addUnwrappedLine();
815       return;
816     case tok::objc_autoreleasepool:
817       nextToken();
818       if (FormatTok->Tok.is(tok::l_brace)) {
819         if (Style.BraceWrapping.AfterObjCDeclaration)
820           addUnwrappedLine();
821         parseBlock(/*MustBeDeclaration=*/false);
822       }
823       addUnwrappedLine();
824       return;
825     case tok::objc_try:
826       // This branch isn't strictly necessary (the kw_try case below would
827       // do this too after the tok::at is parsed above).  But be explicit.
828       parseTryCatch();
829       return;
830     default:
831       break;
832     }
833     break;
834   case tok::kw_asm:
835     nextToken();
836     if (FormatTok->is(tok::l_brace)) {
837       FormatTok->Type = TT_InlineASMBrace;
838       nextToken();
839       while (FormatTok && FormatTok->isNot(tok::eof)) {
840         if (FormatTok->is(tok::r_brace)) {
841           FormatTok->Type = TT_InlineASMBrace;
842           nextToken();
843           addUnwrappedLine();
844           break;
845         }
846         FormatTok->Finalized = true;
847         nextToken();
848       }
849     }
850     break;
851   case tok::kw_namespace:
852     parseNamespace();
853     return;
854   case tok::kw_inline:
855     nextToken();
856     if (FormatTok->Tok.is(tok::kw_namespace)) {
857       parseNamespace();
858       return;
859     }
860     break;
861   case tok::kw_public:
862   case tok::kw_protected:
863   case tok::kw_private:
864     if (Style.Language == FormatStyle::LK_Java ||
865         Style.Language == FormatStyle::LK_JavaScript)
866       nextToken();
867     else
868       parseAccessSpecifier();
869     return;
870   case tok::kw_if:
871     parseIfThenElse();
872     return;
873   case tok::kw_for:
874   case tok::kw_while:
875     parseForOrWhileLoop();
876     return;
877   case tok::kw_do:
878     parseDoWhile();
879     return;
880   case tok::kw_switch:
881     parseSwitch();
882     return;
883   case tok::kw_default:
884     nextToken();
885     parseLabel();
886     return;
887   case tok::kw_case:
888     parseCaseLabel();
889     return;
890   case tok::kw_try:
891   case tok::kw___try:
892     parseTryCatch();
893     return;
894   case tok::kw_extern:
895     nextToken();
896     if (FormatTok->Tok.is(tok::string_literal)) {
897       nextToken();
898       if (FormatTok->Tok.is(tok::l_brace)) {
899         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
900         addUnwrappedLine();
901         return;
902       }
903     }
904     break;
905   case tok::kw_export:
906     if (Style.Language == FormatStyle::LK_JavaScript) {
907       parseJavaScriptEs6ImportExport();
908       return;
909     }
910     break;
911   case tok::identifier:
912     if (FormatTok->is(TT_ForEachMacro)) {
913       parseForOrWhileLoop();
914       return;
915     }
916     if (FormatTok->is(TT_MacroBlockBegin)) {
917       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
918                  /*MunchSemi=*/false);
919       return;
920     }
921     if (FormatTok->is(Keywords.kw_import)) {
922       if (Style.Language == FormatStyle::LK_JavaScript) {
923         parseJavaScriptEs6ImportExport();
924         return;
925       }
926       if (Style.Language == FormatStyle::LK_Proto) {
927         nextToken();
928         if (FormatTok->is(tok::kw_public))
929           nextToken();
930         if (!FormatTok->is(tok::string_literal))
931           return;
932         nextToken();
933         if (FormatTok->is(tok::semi))
934           nextToken();
935         addUnwrappedLine();
936         return;
937       }
938     }
939     if (Style.isCpp() &&
940         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
941                            Keywords.kw_slots, Keywords.kw_qslots)) {
942       nextToken();
943       if (FormatTok->is(tok::colon)) {
944         nextToken();
945         addUnwrappedLine();
946         return;
947       }
948     }
949     // In all other cases, parse the declaration.
950     break;
951   default:
952     break;
953   }
954   do {
955     const FormatToken *Previous = getPreviousToken();
956     switch (FormatTok->Tok.getKind()) {
957     case tok::at:
958       nextToken();
959       if (FormatTok->Tok.is(tok::l_brace))
960         parseBracedList();
961       break;
962     case tok::kw_enum:
963       // Ignore if this is part of "template <enum ...".
964       if (Previous && Previous->is(tok::less)) {
965         nextToken();
966         break;
967       }
968 
969       // parseEnum falls through and does not yet add an unwrapped line as an
970       // enum definition can start a structural element.
971       if (!parseEnum())
972         break;
973       // This only applies for C++.
974       if (!Style.isCpp()) {
975         addUnwrappedLine();
976         return;
977       }
978       break;
979     case tok::kw_typedef:
980       nextToken();
981       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
982                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
983         parseEnum();
984       break;
985     case tok::kw_struct:
986     case tok::kw_union:
987     case tok::kw_class:
988       // parseRecord falls through and does not yet add an unwrapped line as a
989       // record declaration or definition can start a structural element.
990       parseRecord();
991       // This does not apply for Java and JavaScript.
992       if (Style.Language == FormatStyle::LK_Java ||
993           Style.Language == FormatStyle::LK_JavaScript) {
994         if (FormatTok->is(tok::semi))
995           nextToken();
996         addUnwrappedLine();
997         return;
998       }
999       break;
1000     case tok::period:
1001       nextToken();
1002       // In Java, classes have an implicit static member "class".
1003       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1004           FormatTok->is(tok::kw_class))
1005         nextToken();
1006       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1007           FormatTok->Tok.getIdentifierInfo())
1008         // JavaScript only has pseudo keywords, all keywords are allowed to
1009         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1010         nextToken();
1011       break;
1012     case tok::semi:
1013       nextToken();
1014       addUnwrappedLine();
1015       return;
1016     case tok::r_brace:
1017       addUnwrappedLine();
1018       return;
1019     case tok::l_paren:
1020       parseParens();
1021       break;
1022     case tok::kw_operator:
1023       nextToken();
1024       if (FormatTok->isBinaryOperator())
1025         nextToken();
1026       break;
1027     case tok::caret:
1028       nextToken();
1029       if (FormatTok->Tok.isAnyIdentifier() ||
1030           FormatTok->isSimpleTypeSpecifier())
1031         nextToken();
1032       if (FormatTok->is(tok::l_paren))
1033         parseParens();
1034       if (FormatTok->is(tok::l_brace))
1035         parseChildBlock();
1036       break;
1037     case tok::l_brace:
1038       if (!tryToParseBracedList()) {
1039         // A block outside of parentheses must be the last part of a
1040         // structural element.
1041         // FIXME: Figure out cases where this is not true, and add projections
1042         // for them (the one we know is missing are lambdas).
1043         if (Style.BraceWrapping.AfterFunction)
1044           addUnwrappedLine();
1045         FormatTok->Type = TT_FunctionLBrace;
1046         parseBlock(/*MustBeDeclaration=*/false);
1047         addUnwrappedLine();
1048         return;
1049       }
1050       // Otherwise this was a braced init list, and the structural
1051       // element continues.
1052       break;
1053     case tok::kw_try:
1054       // We arrive here when parsing function-try blocks.
1055       parseTryCatch();
1056       return;
1057     case tok::identifier: {
1058       if (FormatTok->is(TT_MacroBlockEnd)) {
1059         addUnwrappedLine();
1060         return;
1061       }
1062 
1063       // Function declarations (as opposed to function expressions) are parsed
1064       // on their own unwrapped line by continuing this loop. Function
1065       // expressions (functions that are not on their own line) must not create
1066       // a new unwrapped line, so they are special cased below.
1067       size_t TokenCount = Line->Tokens.size();
1068       if (Style.Language == FormatStyle::LK_JavaScript &&
1069           FormatTok->is(Keywords.kw_function) &&
1070           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1071                                                      Keywords.kw_async)))) {
1072         tryToParseJSFunction();
1073         break;
1074       }
1075       if ((Style.Language == FormatStyle::LK_JavaScript ||
1076            Style.Language == FormatStyle::LK_Java) &&
1077           FormatTok->is(Keywords.kw_interface)) {
1078         if (Style.Language == FormatStyle::LK_JavaScript) {
1079           // In JavaScript/TypeScript, "interface" can be used as a standalone
1080           // identifier, e.g. in `var interface = 1;`. If "interface" is
1081           // followed by another identifier, it is very like to be an actual
1082           // interface declaration.
1083           unsigned StoredPosition = Tokens->getPosition();
1084           FormatToken *Next = Tokens->getNextToken();
1085           FormatTok = Tokens->setPosition(StoredPosition);
1086           if (Next && !mustBeJSIdent(Keywords, Next)) {
1087             nextToken();
1088             break;
1089           }
1090         }
1091         parseRecord();
1092         addUnwrappedLine();
1093         return;
1094       }
1095 
1096       // See if the following token should start a new unwrapped line.
1097       StringRef Text = FormatTok->TokenText;
1098       nextToken();
1099       if (Line->Tokens.size() == 1 &&
1100           // JS doesn't have macros, and within classes colons indicate fields,
1101           // not labels.
1102           Style.Language != FormatStyle::LK_JavaScript) {
1103         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1104           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1105           parseLabel();
1106           return;
1107         }
1108         // Recognize function-like macro usages without trailing semicolon as
1109         // well as free-standing macros like Q_OBJECT.
1110         bool FunctionLike = FormatTok->is(tok::l_paren);
1111         if (FunctionLike)
1112           parseParens();
1113 
1114         bool FollowedByNewline =
1115             CommentsBeforeNextToken.empty()
1116                 ? FormatTok->NewlinesBefore > 0
1117                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1118 
1119         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1120             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1121           addUnwrappedLine();
1122           return;
1123         }
1124       }
1125       break;
1126     }
1127     case tok::equal:
1128       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1129       // TT_JsFatArrow. The always start an expression or a child block if
1130       // followed by a curly.
1131       if (FormatTok->is(TT_JsFatArrow)) {
1132         nextToken();
1133         if (FormatTok->is(tok::l_brace))
1134           parseChildBlock();
1135         break;
1136       }
1137 
1138       nextToken();
1139       if (FormatTok->Tok.is(tok::l_brace)) {
1140         parseBracedList();
1141       }
1142       break;
1143     case tok::l_square:
1144       parseSquare();
1145       break;
1146     case tok::kw_new:
1147       parseNew();
1148       break;
1149     default:
1150       nextToken();
1151       break;
1152     }
1153   } while (!eof());
1154 }
1155 
1156 bool UnwrappedLineParser::tryToParseLambda() {
1157   if (!Style.isCpp()) {
1158     nextToken();
1159     return false;
1160   }
1161   const FormatToken* Previous = getPreviousToken();
1162   if (Previous &&
1163       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1164                          tok::kw_delete) ||
1165        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1166     nextToken();
1167     return false;
1168   }
1169   assert(FormatTok->is(tok::l_square));
1170   FormatToken &LSquare = *FormatTok;
1171   if (!tryToParseLambdaIntroducer())
1172     return false;
1173 
1174   while (FormatTok->isNot(tok::l_brace)) {
1175     if (FormatTok->isSimpleTypeSpecifier()) {
1176       nextToken();
1177       continue;
1178     }
1179     switch (FormatTok->Tok.getKind()) {
1180     case tok::l_brace:
1181       break;
1182     case tok::l_paren:
1183       parseParens();
1184       break;
1185     case tok::amp:
1186     case tok::star:
1187     case tok::kw_const:
1188     case tok::comma:
1189     case tok::less:
1190     case tok::greater:
1191     case tok::identifier:
1192     case tok::numeric_constant:
1193     case tok::coloncolon:
1194     case tok::kw_mutable:
1195       nextToken();
1196       break;
1197     case tok::arrow:
1198       FormatTok->Type = TT_LambdaArrow;
1199       nextToken();
1200       break;
1201     default:
1202       return true;
1203     }
1204   }
1205   LSquare.Type = TT_LambdaLSquare;
1206   parseChildBlock();
1207   return true;
1208 }
1209 
1210 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1211   nextToken();
1212   if (FormatTok->is(tok::equal)) {
1213     nextToken();
1214     if (FormatTok->is(tok::r_square)) {
1215       nextToken();
1216       return true;
1217     }
1218     if (FormatTok->isNot(tok::comma))
1219       return false;
1220     nextToken();
1221   } else if (FormatTok->is(tok::amp)) {
1222     nextToken();
1223     if (FormatTok->is(tok::r_square)) {
1224       nextToken();
1225       return true;
1226     }
1227     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1228       return false;
1229     }
1230     if (FormatTok->is(tok::comma))
1231       nextToken();
1232   } else if (FormatTok->is(tok::r_square)) {
1233     nextToken();
1234     return true;
1235   }
1236   do {
1237     if (FormatTok->is(tok::amp))
1238       nextToken();
1239     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1240       return false;
1241     nextToken();
1242     if (FormatTok->is(tok::ellipsis))
1243       nextToken();
1244     if (FormatTok->is(tok::comma)) {
1245       nextToken();
1246     } else if (FormatTok->is(tok::r_square)) {
1247       nextToken();
1248       return true;
1249     } else {
1250       return false;
1251     }
1252   } while (!eof());
1253   return false;
1254 }
1255 
1256 void UnwrappedLineParser::tryToParseJSFunction() {
1257   assert(FormatTok->is(Keywords.kw_function) ||
1258          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1259   if (FormatTok->is(Keywords.kw_async))
1260     nextToken();
1261   // Consume "function".
1262   nextToken();
1263 
1264   // Consume * (generator function). Treat it like C++'s overloaded operators.
1265   if (FormatTok->is(tok::star)) {
1266     FormatTok->Type = TT_OverloadedOperator;
1267     nextToken();
1268   }
1269 
1270   // Consume function name.
1271   if (FormatTok->is(tok::identifier))
1272     nextToken();
1273 
1274   if (FormatTok->isNot(tok::l_paren))
1275     return;
1276 
1277   // Parse formal parameter list.
1278   parseParens();
1279 
1280   if (FormatTok->is(tok::colon)) {
1281     // Parse a type definition.
1282     nextToken();
1283 
1284     // Eat the type declaration. For braced inline object types, balance braces,
1285     // otherwise just parse until finding an l_brace for the function body.
1286     if (FormatTok->is(tok::l_brace))
1287       tryToParseBracedList();
1288     else
1289       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1290         nextToken();
1291   }
1292 
1293   if (FormatTok->is(tok::semi))
1294     return;
1295 
1296   parseChildBlock();
1297 }
1298 
1299 bool UnwrappedLineParser::tryToParseBracedList() {
1300   if (FormatTok->BlockKind == BK_Unknown)
1301     calculateBraceTypes();
1302   assert(FormatTok->BlockKind != BK_Unknown);
1303   if (FormatTok->BlockKind == BK_Block)
1304     return false;
1305   parseBracedList();
1306   return true;
1307 }
1308 
1309 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1310   bool HasError = false;
1311   nextToken();
1312 
1313   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1314   // replace this by using parseAssigmentExpression() inside.
1315   do {
1316     if (Style.Language == FormatStyle::LK_JavaScript) {
1317       if (FormatTok->is(Keywords.kw_function) ||
1318           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1319         tryToParseJSFunction();
1320         continue;
1321       }
1322       if (FormatTok->is(TT_JsFatArrow)) {
1323         nextToken();
1324         // Fat arrows can be followed by simple expressions or by child blocks
1325         // in curly braces.
1326         if (FormatTok->is(tok::l_brace)) {
1327           parseChildBlock();
1328           continue;
1329         }
1330       }
1331       if (FormatTok->is(tok::l_brace)) {
1332         // Could be a method inside of a braced list `{a() { return 1; }}`.
1333         if (tryToParseBracedList())
1334           continue;
1335         parseChildBlock();
1336       }
1337     }
1338     switch (FormatTok->Tok.getKind()) {
1339     case tok::caret:
1340       nextToken();
1341       if (FormatTok->is(tok::l_brace)) {
1342         parseChildBlock();
1343       }
1344       break;
1345     case tok::l_square:
1346       tryToParseLambda();
1347       break;
1348     case tok::l_paren:
1349       parseParens();
1350       // JavaScript can just have free standing methods and getters/setters in
1351       // object literals. Detect them by a "{" following ")".
1352       if (Style.Language == FormatStyle::LK_JavaScript) {
1353         if (FormatTok->is(tok::l_brace))
1354           parseChildBlock();
1355         break;
1356       }
1357       break;
1358     case tok::l_brace:
1359       // Assume there are no blocks inside a braced init list apart
1360       // from the ones we explicitly parse out (like lambdas).
1361       FormatTok->BlockKind = BK_BracedInit;
1362       parseBracedList();
1363       break;
1364     case tok::r_brace:
1365       nextToken();
1366       return !HasError;
1367     case tok::semi:
1368       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1369       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1370       // used for error recovery if we have otherwise determined that this is
1371       // a braced list.
1372       if (Style.Language == FormatStyle::LK_JavaScript) {
1373         nextToken();
1374         break;
1375       }
1376       HasError = true;
1377       if (!ContinueOnSemicolons)
1378         return !HasError;
1379       nextToken();
1380       break;
1381     case tok::comma:
1382       nextToken();
1383       break;
1384     default:
1385       nextToken();
1386       break;
1387     }
1388   } while (!eof());
1389   return false;
1390 }
1391 
1392 void UnwrappedLineParser::parseParens() {
1393   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1394   nextToken();
1395   do {
1396     switch (FormatTok->Tok.getKind()) {
1397     case tok::l_paren:
1398       parseParens();
1399       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1400         parseChildBlock();
1401       break;
1402     case tok::r_paren:
1403       nextToken();
1404       return;
1405     case tok::r_brace:
1406       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1407       return;
1408     case tok::l_square:
1409       tryToParseLambda();
1410       break;
1411     case tok::l_brace:
1412       if (!tryToParseBracedList())
1413         parseChildBlock();
1414       break;
1415     case tok::at:
1416       nextToken();
1417       if (FormatTok->Tok.is(tok::l_brace))
1418         parseBracedList();
1419       break;
1420     case tok::kw_class:
1421       if (Style.Language == FormatStyle::LK_JavaScript)
1422         parseRecord(/*ParseAsExpr=*/true);
1423       else
1424         nextToken();
1425       break;
1426     case tok::identifier:
1427       if (Style.Language == FormatStyle::LK_JavaScript &&
1428           (FormatTok->is(Keywords.kw_function) ||
1429            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1430         tryToParseJSFunction();
1431       else
1432         nextToken();
1433       break;
1434     default:
1435       nextToken();
1436       break;
1437     }
1438   } while (!eof());
1439 }
1440 
1441 void UnwrappedLineParser::parseSquare() {
1442   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1443   if (tryToParseLambda())
1444     return;
1445   do {
1446     switch (FormatTok->Tok.getKind()) {
1447     case tok::l_paren:
1448       parseParens();
1449       break;
1450     case tok::r_square:
1451       nextToken();
1452       return;
1453     case tok::r_brace:
1454       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1455       return;
1456     case tok::l_square:
1457       parseSquare();
1458       break;
1459     case tok::l_brace: {
1460       if (!tryToParseBracedList())
1461         parseChildBlock();
1462       break;
1463     }
1464     case tok::at:
1465       nextToken();
1466       if (FormatTok->Tok.is(tok::l_brace))
1467         parseBracedList();
1468       break;
1469     default:
1470       nextToken();
1471       break;
1472     }
1473   } while (!eof());
1474 }
1475 
1476 void UnwrappedLineParser::parseIfThenElse() {
1477   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1478   nextToken();
1479   if (FormatTok->Tok.is(tok::l_paren))
1480     parseParens();
1481   bool NeedsUnwrappedLine = false;
1482   if (FormatTok->Tok.is(tok::l_brace)) {
1483     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1484     parseBlock(/*MustBeDeclaration=*/false);
1485     if (Style.BraceWrapping.BeforeElse)
1486       addUnwrappedLine();
1487     else
1488       NeedsUnwrappedLine = true;
1489   } else {
1490     addUnwrappedLine();
1491     ++Line->Level;
1492     parseStructuralElement();
1493     --Line->Level;
1494   }
1495   if (FormatTok->Tok.is(tok::kw_else)) {
1496     nextToken();
1497     if (FormatTok->Tok.is(tok::l_brace)) {
1498       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1499       parseBlock(/*MustBeDeclaration=*/false);
1500       addUnwrappedLine();
1501     } else if (FormatTok->Tok.is(tok::kw_if)) {
1502       parseIfThenElse();
1503     } else {
1504       addUnwrappedLine();
1505       ++Line->Level;
1506       parseStructuralElement();
1507       if (FormatTok->is(tok::eof))
1508         addUnwrappedLine();
1509       --Line->Level;
1510     }
1511   } else if (NeedsUnwrappedLine) {
1512     addUnwrappedLine();
1513   }
1514 }
1515 
1516 void UnwrappedLineParser::parseTryCatch() {
1517   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1518   nextToken();
1519   bool NeedsUnwrappedLine = false;
1520   if (FormatTok->is(tok::colon)) {
1521     // We are in a function try block, what comes is an initializer list.
1522     nextToken();
1523     while (FormatTok->is(tok::identifier)) {
1524       nextToken();
1525       if (FormatTok->is(tok::l_paren))
1526         parseParens();
1527       if (FormatTok->is(tok::comma))
1528         nextToken();
1529     }
1530   }
1531   // Parse try with resource.
1532   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1533     parseParens();
1534   }
1535   if (FormatTok->is(tok::l_brace)) {
1536     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1537     parseBlock(/*MustBeDeclaration=*/false);
1538     if (Style.BraceWrapping.BeforeCatch) {
1539       addUnwrappedLine();
1540     } else {
1541       NeedsUnwrappedLine = true;
1542     }
1543   } else if (!FormatTok->is(tok::kw_catch)) {
1544     // The C++ standard requires a compound-statement after a try.
1545     // If there's none, we try to assume there's a structuralElement
1546     // and try to continue.
1547     addUnwrappedLine();
1548     ++Line->Level;
1549     parseStructuralElement();
1550     --Line->Level;
1551   }
1552   while (1) {
1553     if (FormatTok->is(tok::at))
1554       nextToken();
1555     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1556                              tok::kw___finally) ||
1557           ((Style.Language == FormatStyle::LK_Java ||
1558             Style.Language == FormatStyle::LK_JavaScript) &&
1559            FormatTok->is(Keywords.kw_finally)) ||
1560           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1561            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1562       break;
1563     nextToken();
1564     while (FormatTok->isNot(tok::l_brace)) {
1565       if (FormatTok->is(tok::l_paren)) {
1566         parseParens();
1567         continue;
1568       }
1569       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1570         return;
1571       nextToken();
1572     }
1573     NeedsUnwrappedLine = false;
1574     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1575     parseBlock(/*MustBeDeclaration=*/false);
1576     if (Style.BraceWrapping.BeforeCatch)
1577       addUnwrappedLine();
1578     else
1579       NeedsUnwrappedLine = true;
1580   }
1581   if (NeedsUnwrappedLine)
1582     addUnwrappedLine();
1583 }
1584 
1585 void UnwrappedLineParser::parseNamespace() {
1586   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1587 
1588   const FormatToken &InitialToken = *FormatTok;
1589   nextToken();
1590   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1591     nextToken();
1592   if (FormatTok->Tok.is(tok::l_brace)) {
1593     if (ShouldBreakBeforeBrace(Style, InitialToken))
1594       addUnwrappedLine();
1595 
1596     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1597                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1598                      DeclarationScopeStack.size() > 1);
1599     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1600     // Munch the semicolon after a namespace. This is more common than one would
1601     // think. Puttin the semicolon into its own line is very ugly.
1602     if (FormatTok->Tok.is(tok::semi))
1603       nextToken();
1604     addUnwrappedLine();
1605   }
1606   // FIXME: Add error handling.
1607 }
1608 
1609 void UnwrappedLineParser::parseNew() {
1610   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1611   nextToken();
1612   if (Style.Language != FormatStyle::LK_Java)
1613     return;
1614 
1615   // In Java, we can parse everything up to the parens, which aren't optional.
1616   do {
1617     // There should not be a ;, { or } before the new's open paren.
1618     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1619       return;
1620 
1621     // Consume the parens.
1622     if (FormatTok->is(tok::l_paren)) {
1623       parseParens();
1624 
1625       // If there is a class body of an anonymous class, consume that as child.
1626       if (FormatTok->is(tok::l_brace))
1627         parseChildBlock();
1628       return;
1629     }
1630     nextToken();
1631   } while (!eof());
1632 }
1633 
1634 void UnwrappedLineParser::parseForOrWhileLoop() {
1635   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1636          "'for', 'while' or foreach macro expected");
1637   nextToken();
1638   if (FormatTok->Tok.is(tok::l_paren))
1639     parseParens();
1640   if (FormatTok->Tok.is(tok::l_brace)) {
1641     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1642     parseBlock(/*MustBeDeclaration=*/false);
1643     addUnwrappedLine();
1644   } else {
1645     addUnwrappedLine();
1646     ++Line->Level;
1647     parseStructuralElement();
1648     --Line->Level;
1649   }
1650 }
1651 
1652 void UnwrappedLineParser::parseDoWhile() {
1653   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1654   nextToken();
1655   if (FormatTok->Tok.is(tok::l_brace)) {
1656     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1657     parseBlock(/*MustBeDeclaration=*/false);
1658     if (Style.BraceWrapping.IndentBraces)
1659       addUnwrappedLine();
1660   } else {
1661     addUnwrappedLine();
1662     ++Line->Level;
1663     parseStructuralElement();
1664     --Line->Level;
1665   }
1666 
1667   // FIXME: Add error handling.
1668   if (!FormatTok->Tok.is(tok::kw_while)) {
1669     addUnwrappedLine();
1670     return;
1671   }
1672 
1673   nextToken();
1674   parseStructuralElement();
1675 }
1676 
1677 void UnwrappedLineParser::parseLabel() {
1678   nextToken();
1679   unsigned OldLineLevel = Line->Level;
1680   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1681     --Line->Level;
1682   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1683     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1684     parseBlock(/*MustBeDeclaration=*/false);
1685     if (FormatTok->Tok.is(tok::kw_break)) {
1686       if (Style.BraceWrapping.AfterControlStatement)
1687         addUnwrappedLine();
1688       parseStructuralElement();
1689     }
1690     addUnwrappedLine();
1691   } else {
1692     if (FormatTok->is(tok::semi))
1693       nextToken();
1694     addUnwrappedLine();
1695   }
1696   Line->Level = OldLineLevel;
1697   if (FormatTok->isNot(tok::l_brace)) {
1698     parseStructuralElement();
1699     addUnwrappedLine();
1700   }
1701 }
1702 
1703 void UnwrappedLineParser::parseCaseLabel() {
1704   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1705   // FIXME: fix handling of complex expressions here.
1706   do {
1707     nextToken();
1708   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1709   parseLabel();
1710 }
1711 
1712 void UnwrappedLineParser::parseSwitch() {
1713   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1714   nextToken();
1715   if (FormatTok->Tok.is(tok::l_paren))
1716     parseParens();
1717   if (FormatTok->Tok.is(tok::l_brace)) {
1718     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1719     parseBlock(/*MustBeDeclaration=*/false);
1720     addUnwrappedLine();
1721   } else {
1722     addUnwrappedLine();
1723     ++Line->Level;
1724     parseStructuralElement();
1725     --Line->Level;
1726   }
1727 }
1728 
1729 void UnwrappedLineParser::parseAccessSpecifier() {
1730   nextToken();
1731   // Understand Qt's slots.
1732   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1733     nextToken();
1734   // Otherwise, we don't know what it is, and we'd better keep the next token.
1735   if (FormatTok->Tok.is(tok::colon))
1736     nextToken();
1737   addUnwrappedLine();
1738 }
1739 
1740 bool UnwrappedLineParser::parseEnum() {
1741   // Won't be 'enum' for NS_ENUMs.
1742   if (FormatTok->Tok.is(tok::kw_enum))
1743     nextToken();
1744 
1745   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1746   // declarations. An "enum" keyword followed by a colon would be a syntax
1747   // error and thus assume it is just an identifier.
1748   if (Style.Language == FormatStyle::LK_JavaScript &&
1749       FormatTok->isOneOf(tok::colon, tok::question))
1750     return false;
1751 
1752   // Eat up enum class ...
1753   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1754     nextToken();
1755 
1756   while (FormatTok->Tok.getIdentifierInfo() ||
1757          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1758                             tok::greater, tok::comma, tok::question)) {
1759     nextToken();
1760     // We can have macros or attributes in between 'enum' and the enum name.
1761     if (FormatTok->is(tok::l_paren))
1762       parseParens();
1763     if (FormatTok->is(tok::identifier)) {
1764       nextToken();
1765       // If there are two identifiers in a row, this is likely an elaborate
1766       // return type. In Java, this can be "implements", etc.
1767       if (Style.isCpp() && FormatTok->is(tok::identifier))
1768         return false;
1769     }
1770   }
1771 
1772   // Just a declaration or something is wrong.
1773   if (FormatTok->isNot(tok::l_brace))
1774     return true;
1775   FormatTok->BlockKind = BK_Block;
1776 
1777   if (Style.Language == FormatStyle::LK_Java) {
1778     // Java enums are different.
1779     parseJavaEnumBody();
1780     return true;
1781   }
1782   if (Style.Language == FormatStyle::LK_Proto) {
1783     parseBlock(/*MustBeDeclaration=*/true);
1784     return true;
1785   }
1786 
1787   // Parse enum body.
1788   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1789   if (HasError) {
1790     if (FormatTok->is(tok::semi))
1791       nextToken();
1792     addUnwrappedLine();
1793   }
1794   return true;
1795 
1796   // There is no addUnwrappedLine() here so that we fall through to parsing a
1797   // structural element afterwards. Thus, in "enum A {} n, m;",
1798   // "} n, m;" will end up in one unwrapped line.
1799 }
1800 
1801 void UnwrappedLineParser::parseJavaEnumBody() {
1802   // Determine whether the enum is simple, i.e. does not have a semicolon or
1803   // constants with class bodies. Simple enums can be formatted like braced
1804   // lists, contracted to a single line, etc.
1805   unsigned StoredPosition = Tokens->getPosition();
1806   bool IsSimple = true;
1807   FormatToken *Tok = Tokens->getNextToken();
1808   while (Tok) {
1809     if (Tok->is(tok::r_brace))
1810       break;
1811     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1812       IsSimple = false;
1813       break;
1814     }
1815     // FIXME: This will also mark enums with braces in the arguments to enum
1816     // constants as "not simple". This is probably fine in practice, though.
1817     Tok = Tokens->getNextToken();
1818   }
1819   FormatTok = Tokens->setPosition(StoredPosition);
1820 
1821   if (IsSimple) {
1822     parseBracedList();
1823     addUnwrappedLine();
1824     return;
1825   }
1826 
1827   // Parse the body of a more complex enum.
1828   // First add a line for everything up to the "{".
1829   nextToken();
1830   addUnwrappedLine();
1831   ++Line->Level;
1832 
1833   // Parse the enum constants.
1834   while (FormatTok) {
1835     if (FormatTok->is(tok::l_brace)) {
1836       // Parse the constant's class body.
1837       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1838                  /*MunchSemi=*/false);
1839     } else if (FormatTok->is(tok::l_paren)) {
1840       parseParens();
1841     } else if (FormatTok->is(tok::comma)) {
1842       nextToken();
1843       addUnwrappedLine();
1844     } else if (FormatTok->is(tok::semi)) {
1845       nextToken();
1846       addUnwrappedLine();
1847       break;
1848     } else if (FormatTok->is(tok::r_brace)) {
1849       addUnwrappedLine();
1850       break;
1851     } else {
1852       nextToken();
1853     }
1854   }
1855 
1856   // Parse the class body after the enum's ";" if any.
1857   parseLevel(/*HasOpeningBrace=*/true);
1858   nextToken();
1859   --Line->Level;
1860   addUnwrappedLine();
1861 }
1862 
1863 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1864   const FormatToken &InitialToken = *FormatTok;
1865   nextToken();
1866 
1867   // The actual identifier can be a nested name specifier, and in macros
1868   // it is often token-pasted.
1869   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1870                             tok::kw___attribute, tok::kw___declspec,
1871                             tok::kw_alignas) ||
1872          ((Style.Language == FormatStyle::LK_Java ||
1873            Style.Language == FormatStyle::LK_JavaScript) &&
1874           FormatTok->isOneOf(tok::period, tok::comma))) {
1875     bool IsNonMacroIdentifier =
1876         FormatTok->is(tok::identifier) &&
1877         FormatTok->TokenText != FormatTok->TokenText.upper();
1878     nextToken();
1879     // We can have macros or attributes in between 'class' and the class name.
1880     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1881       parseParens();
1882   }
1883 
1884   // Note that parsing away template declarations here leads to incorrectly
1885   // accepting function declarations as record declarations.
1886   // In general, we cannot solve this problem. Consider:
1887   // class A<int> B() {}
1888   // which can be a function definition or a class definition when B() is a
1889   // macro. If we find enough real-world cases where this is a problem, we
1890   // can parse for the 'template' keyword in the beginning of the statement,
1891   // and thus rule out the record production in case there is no template
1892   // (this would still leave us with an ambiguity between template function
1893   // and class declarations).
1894   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1895     while (!eof()) {
1896       if (FormatTok->is(tok::l_brace)) {
1897         calculateBraceTypes(/*ExpectClassBody=*/true);
1898         if (!tryToParseBracedList())
1899           break;
1900       }
1901       if (FormatTok->Tok.is(tok::semi))
1902         return;
1903       nextToken();
1904     }
1905   }
1906   if (FormatTok->Tok.is(tok::l_brace)) {
1907     if (ParseAsExpr) {
1908       parseChildBlock();
1909     } else {
1910       if (ShouldBreakBeforeBrace(Style, InitialToken))
1911         addUnwrappedLine();
1912 
1913       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1914                  /*MunchSemi=*/false);
1915     }
1916   }
1917   // There is no addUnwrappedLine() here so that we fall through to parsing a
1918   // structural element afterwards. Thus, in "class A {} n, m;",
1919   // "} n, m;" will end up in one unwrapped line.
1920 }
1921 
1922 void UnwrappedLineParser::parseObjCProtocolList() {
1923   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1924   do
1925     nextToken();
1926   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1927   nextToken(); // Skip '>'.
1928 }
1929 
1930 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1931   do {
1932     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1933       nextToken();
1934       addUnwrappedLine();
1935       break;
1936     }
1937     if (FormatTok->is(tok::l_brace)) {
1938       parseBlock(/*MustBeDeclaration=*/false);
1939       // In ObjC interfaces, nothing should be following the "}".
1940       addUnwrappedLine();
1941     } else if (FormatTok->is(tok::r_brace)) {
1942       // Ignore stray "}". parseStructuralElement doesn't consume them.
1943       nextToken();
1944       addUnwrappedLine();
1945     } else {
1946       parseStructuralElement();
1947     }
1948   } while (!eof());
1949 }
1950 
1951 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1952   nextToken();
1953   nextToken(); // interface name
1954 
1955   // @interface can be followed by either a base class, or a category.
1956   if (FormatTok->Tok.is(tok::colon)) {
1957     nextToken();
1958     nextToken(); // base class name
1959   } else if (FormatTok->Tok.is(tok::l_paren))
1960     // Skip category, if present.
1961     parseParens();
1962 
1963   if (FormatTok->Tok.is(tok::less))
1964     parseObjCProtocolList();
1965 
1966   if (FormatTok->Tok.is(tok::l_brace)) {
1967     if (Style.BraceWrapping.AfterObjCDeclaration)
1968       addUnwrappedLine();
1969     parseBlock(/*MustBeDeclaration=*/true);
1970   }
1971 
1972   // With instance variables, this puts '}' on its own line.  Without instance
1973   // variables, this ends the @interface line.
1974   addUnwrappedLine();
1975 
1976   parseObjCUntilAtEnd();
1977 }
1978 
1979 void UnwrappedLineParser::parseObjCProtocol() {
1980   nextToken();
1981   nextToken(); // protocol name
1982 
1983   if (FormatTok->Tok.is(tok::less))
1984     parseObjCProtocolList();
1985 
1986   // Check for protocol declaration.
1987   if (FormatTok->Tok.is(tok::semi)) {
1988     nextToken();
1989     return addUnwrappedLine();
1990   }
1991 
1992   addUnwrappedLine();
1993   parseObjCUntilAtEnd();
1994 }
1995 
1996 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1997   bool IsImport = FormatTok->is(Keywords.kw_import);
1998   assert(IsImport || FormatTok->is(tok::kw_export));
1999   nextToken();
2000 
2001   // Consume the "default" in "export default class/function".
2002   if (FormatTok->is(tok::kw_default))
2003     nextToken();
2004 
2005   // Consume "async function", "function" and "default function", so that these
2006   // get parsed as free-standing JS functions, i.e. do not require a trailing
2007   // semicolon.
2008   if (FormatTok->is(Keywords.kw_async))
2009     nextToken();
2010   if (FormatTok->is(Keywords.kw_function)) {
2011     nextToken();
2012     return;
2013   }
2014 
2015   // For imports, `export *`, `export {...}`, consume the rest of the line up
2016   // to the terminating `;`. For everything else, just return and continue
2017   // parsing the structural element, i.e. the declaration or expression for
2018   // `export default`.
2019   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2020       !FormatTok->isStringLiteral())
2021     return;
2022 
2023   while (!eof()) {
2024     if (FormatTok->is(tok::semi))
2025       return;
2026     if (Line->Tokens.size() == 0) {
2027       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2028       // import statement should terminate.
2029       return;
2030     }
2031     if (FormatTok->is(tok::l_brace)) {
2032       FormatTok->BlockKind = BK_Block;
2033       parseBracedList();
2034     } else {
2035       nextToken();
2036     }
2037   }
2038 }
2039 
2040 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2041                                                  StringRef Prefix = "") {
2042   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2043                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2044   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2045                                                     E = Line.Tokens.end();
2046        I != E; ++I) {
2047     llvm::dbgs() << I->Tok->Tok.getName() << "["
2048                  << "T=" << I->Tok->Type
2049                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2050   }
2051   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2052                                                     E = Line.Tokens.end();
2053        I != E; ++I) {
2054     const UnwrappedLineNode &Node = *I;
2055     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2056              I = Node.Children.begin(),
2057              E = Node.Children.end();
2058          I != E; ++I) {
2059       printDebugInfo(*I, "\nChild: ");
2060     }
2061   }
2062   llvm::dbgs() << "\n";
2063 }
2064 
2065 void UnwrappedLineParser::addUnwrappedLine() {
2066   if (Line->Tokens.empty())
2067     return;
2068   DEBUG({
2069     if (CurrentLines == &Lines)
2070       printDebugInfo(*Line);
2071   });
2072   CurrentLines->push_back(std::move(*Line));
2073   Line->Tokens.clear();
2074   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2075   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2076     CurrentLines->append(
2077         std::make_move_iterator(PreprocessorDirectives.begin()),
2078         std::make_move_iterator(PreprocessorDirectives.end()));
2079     PreprocessorDirectives.clear();
2080   }
2081 }
2082 
2083 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2084 
2085 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2086   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2087          FormatTok.NewlinesBefore > 0;
2088 }
2089 
2090 static bool isLineComment(const FormatToken &FormatTok) {
2091   return FormatTok.is(tok::comment) &&
2092          FormatTok.TokenText.startswith("//");
2093 }
2094 
2095 // Checks if \p FormatTok is a line comment that continues the line comment
2096 // section on \p Line.
2097 static bool continuesLineComment(const FormatToken &FormatTok,
2098                                  const UnwrappedLine &Line,
2099                                  llvm::Regex &CommentPragmasRegex) {
2100   if (Line.Tokens.empty())
2101     return false;
2102 
2103   StringRef IndentContent = FormatTok.TokenText;
2104   if (FormatTok.TokenText.startswith("//") ||
2105       FormatTok.TokenText.startswith("/*"))
2106     IndentContent = FormatTok.TokenText.substr(2);
2107   if (CommentPragmasRegex.match(IndentContent))
2108     return false;
2109 
2110   // If Line starts with a line comment, then FormatTok continues the comment
2111   // section if its original column is greater or equal to the original start
2112   // column of the line.
2113   //
2114   // Define the min column token of a line as follows: if a line ends in '{' or
2115   // contains a '{' followed by a line comment, then the min column token is
2116   // that '{'. Otherwise, the min column token of the line is the first token of
2117   // the line.
2118   //
2119   // If Line starts with a token other than a line comment, then FormatTok
2120   // continues the comment section if its original column is greater than the
2121   // original start column of the min column token of the line.
2122   //
2123   // For example, the second line comment continues the first in these cases:
2124   //
2125   // // first line
2126   // // second line
2127   //
2128   // and:
2129   //
2130   // // first line
2131   //  // second line
2132   //
2133   // and:
2134   //
2135   // int i; // first line
2136   //  // second line
2137   //
2138   // and:
2139   //
2140   // do { // first line
2141   //      // second line
2142   //   int i;
2143   // } while (true);
2144   //
2145   // and:
2146   //
2147   // enum {
2148   //   a, // first line
2149   //    // second line
2150   //   b
2151   // };
2152   //
2153   // The second line comment doesn't continue the first in these cases:
2154   //
2155   //   // first line
2156   //  // second line
2157   //
2158   // and:
2159   //
2160   // int i; // first line
2161   // // second line
2162   //
2163   // and:
2164   //
2165   // do { // first line
2166   //   // second line
2167   //   int i;
2168   // } while (true);
2169   //
2170   // and:
2171   //
2172   // enum {
2173   //   a, // first line
2174   //   // second line
2175   // };
2176   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2177 
2178   // Scan for '{//'. If found, use the column of '{' as a min column for line
2179   // comment section continuation.
2180   const FormatToken *PreviousToken = nullptr;
2181   for (const UnwrappedLineNode &Node : Line.Tokens) {
2182     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2183         isLineComment(*Node.Tok)) {
2184       MinColumnToken = PreviousToken;
2185       break;
2186     }
2187     PreviousToken = Node.Tok;
2188 
2189     // Grab the last newline preceding a token in this unwrapped line.
2190     if (Node.Tok->NewlinesBefore > 0) {
2191       MinColumnToken = Node.Tok;
2192     }
2193   }
2194   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2195     MinColumnToken = PreviousToken;
2196   }
2197 
2198   unsigned MinContinueColumn =
2199       MinColumnToken->OriginalColumn +
2200       (isLineComment(*MinColumnToken) ? 0 : 1);
2201   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
2202          isLineComment(*(Line.Tokens.back().Tok)) &&
2203          FormatTok.OriginalColumn >= MinContinueColumn;
2204 }
2205 
2206 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2207   bool JustComments = Line->Tokens.empty();
2208   for (SmallVectorImpl<FormatToken *>::const_iterator
2209            I = CommentsBeforeNextToken.begin(),
2210            E = CommentsBeforeNextToken.end();
2211        I != E; ++I) {
2212     // Line comments that belong to the same line comment section are put on the
2213     // same line since later we might want to reflow content between them.
2214     // Additional fine-grained breaking of line comment sections is controlled
2215     // by the class BreakableLineCommentSection in case it is desirable to keep
2216     // several line comment sections in the same unwrapped line.
2217     //
2218     // FIXME: Consider putting separate line comment sections as children to the
2219     // unwrapped line instead.
2220     (*I)->ContinuesLineCommentSection =
2221         continuesLineComment(**I, *Line, CommentPragmasRegex);
2222     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2223       addUnwrappedLine();
2224     pushToken(*I);
2225   }
2226   if (NewlineBeforeNext && JustComments)
2227     addUnwrappedLine();
2228   CommentsBeforeNextToken.clear();
2229 }
2230 
2231 void UnwrappedLineParser::nextToken() {
2232   if (eof())
2233     return;
2234   flushComments(isOnNewLine(*FormatTok));
2235   pushToken(FormatTok);
2236   if (Style.Language != FormatStyle::LK_JavaScript)
2237     readToken();
2238   else
2239     readTokenWithJavaScriptASI();
2240 }
2241 
2242 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2243   // FIXME: This is a dirty way to access the previous token. Find a better
2244   // solution.
2245   if (!Line || Line->Tokens.empty())
2246     return nullptr;
2247   return Line->Tokens.back().Tok;
2248 }
2249 
2250 void UnwrappedLineParser::distributeComments(
2251     const SmallVectorImpl<FormatToken *> &Comments,
2252     const FormatToken *NextTok) {
2253   // Whether or not a line comment token continues a line is controlled by
2254   // the method continuesLineComment, with the following caveat:
2255   //
2256   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2257   // that each comment line from the trail is aligned with the next token, if
2258   // the next token exists. If a trail exists, the beginning of the maximal
2259   // trail is marked as a start of a new comment section.
2260   //
2261   // For example in this code:
2262   //
2263   // int a; // line about a
2264   //   // line 1 about b
2265   //   // line 2 about b
2266   //   int b;
2267   //
2268   // the two lines about b form a maximal trail, so there are two sections, the
2269   // first one consisting of the single comment "// line about a" and the
2270   // second one consisting of the next two comments.
2271   if (Comments.empty())
2272     return;
2273   bool ShouldPushCommentsInCurrentLine = true;
2274   bool HasTrailAlignedWithNextToken = false;
2275   unsigned StartOfTrailAlignedWithNextToken = 0;
2276   if (NextTok) {
2277     // We are skipping the first element intentionally.
2278     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2279       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2280         HasTrailAlignedWithNextToken = true;
2281         StartOfTrailAlignedWithNextToken = i;
2282       }
2283     }
2284   }
2285   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2286     FormatToken *FormatTok = Comments[i];
2287     if (HasTrailAlignedWithNextToken &&
2288         i == StartOfTrailAlignedWithNextToken) {
2289       FormatTok->ContinuesLineCommentSection = false;
2290     } else {
2291       FormatTok->ContinuesLineCommentSection =
2292           continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
2293     }
2294     if (!FormatTok->ContinuesLineCommentSection &&
2295         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2296       ShouldPushCommentsInCurrentLine = false;
2297     }
2298     if (ShouldPushCommentsInCurrentLine) {
2299       pushToken(FormatTok);
2300     } else {
2301       CommentsBeforeNextToken.push_back(FormatTok);
2302     }
2303   }
2304 }
2305 
2306 void UnwrappedLineParser::readToken() {
2307   SmallVector<FormatToken *, 1> Comments;
2308   do {
2309     FormatTok = Tokens->getNextToken();
2310     assert(FormatTok);
2311     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2312            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2313       distributeComments(Comments, FormatTok);
2314       Comments.clear();
2315       // If there is an unfinished unwrapped line, we flush the preprocessor
2316       // directives only after that unwrapped line was finished later.
2317       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2318       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2319       // Comments stored before the preprocessor directive need to be output
2320       // before the preprocessor directive, at the same level as the
2321       // preprocessor directive, as we consider them to apply to the directive.
2322       flushComments(isOnNewLine(*FormatTok));
2323       parsePPDirective();
2324     }
2325     while (FormatTok->Type == TT_ConflictStart ||
2326            FormatTok->Type == TT_ConflictEnd ||
2327            FormatTok->Type == TT_ConflictAlternative) {
2328       if (FormatTok->Type == TT_ConflictStart) {
2329         conditionalCompilationStart(/*Unreachable=*/false);
2330       } else if (FormatTok->Type == TT_ConflictAlternative) {
2331         conditionalCompilationAlternative();
2332       } else if (FormatTok->Type == TT_ConflictEnd) {
2333         conditionalCompilationEnd();
2334       }
2335       FormatTok = Tokens->getNextToken();
2336       FormatTok->MustBreakBefore = true;
2337     }
2338 
2339     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2340         !Line->InPPDirective) {
2341       continue;
2342     }
2343 
2344     if (!FormatTok->Tok.is(tok::comment)) {
2345       distributeComments(Comments, FormatTok);
2346       Comments.clear();
2347       return;
2348     }
2349 
2350     Comments.push_back(FormatTok);
2351   } while (!eof());
2352 
2353   distributeComments(Comments, nullptr);
2354   Comments.clear();
2355 }
2356 
2357 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2358   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2359   if (MustBreakBeforeNextToken) {
2360     Line->Tokens.back().Tok->MustBreakBefore = true;
2361     MustBreakBeforeNextToken = false;
2362   }
2363 }
2364 
2365 } // end namespace format
2366 } // end namespace clang
2367