1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BraceWrapping.AfterControlStatement)
158       Parser->addUnwrappedLine();
159     if (Style.BraceWrapping.IndentBraces)
160       ++LineLevel;
161   }
162   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
163 
164 private:
165   unsigned &LineLevel;
166   unsigned OldLineLevel;
167 };
168 
169 namespace {
170 
171 class IndexedTokenSource : public FormatTokenSource {
172 public:
173   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
174       : Tokens(Tokens), Position(-1) {}
175 
176   FormatToken *getNextToken() override {
177     ++Position;
178     return Tokens[Position];
179   }
180 
181   unsigned getPosition() override {
182     assert(Position >= 0);
183     return Position;
184   }
185 
186   FormatToken *setPosition(unsigned P) override {
187     Position = P;
188     return Tokens[Position];
189   }
190 
191   void reset() { Position = -1; }
192 
193 private:
194   ArrayRef<FormatToken *> Tokens;
195   int Position;
196 };
197 
198 } // end anonymous namespace
199 
200 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
201                                          const AdditionalKeywords &Keywords,
202                                          ArrayRef<FormatToken *> Tokens,
203                                          UnwrappedLineConsumer &Callback)
204     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
205       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
206       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
207       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
208 
209 void UnwrappedLineParser::reset() {
210   PPBranchLevel = -1;
211   Line.reset(new UnwrappedLine);
212   CommentsBeforeNextToken.clear();
213   FormatTok = nullptr;
214   MustBreakBeforeNextToken = false;
215   PreprocessorDirectives.clear();
216   CurrentLines = &Lines;
217   DeclarationScopeStack.clear();
218   PPStack.clear();
219 }
220 
221 void UnwrappedLineParser::parse() {
222   IndexedTokenSource TokenSource(AllTokens);
223   do {
224     DEBUG(llvm::dbgs() << "----\n");
225     reset();
226     Tokens = &TokenSource;
227     TokenSource.reset();
228 
229     readToken();
230     parseFile();
231     // Create line with eof token.
232     pushToken(FormatTok);
233     addUnwrappedLine();
234 
235     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
236                                                   E = Lines.end();
237          I != E; ++I) {
238       Callback.consumeUnwrappedLine(*I);
239     }
240     Callback.finishRun();
241     Lines.clear();
242     while (!PPLevelBranchIndex.empty() &&
243            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
244       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
245       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
246     }
247     if (!PPLevelBranchIndex.empty()) {
248       ++PPLevelBranchIndex.back();
249       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
250       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
251     }
252   } while (!PPLevelBranchIndex.empty());
253 }
254 
255 void UnwrappedLineParser::parseFile() {
256   // The top-level context in a file always has declarations, except for pre-
257   // processor directives and JavaScript files.
258   bool MustBeDeclaration =
259       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
260   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
261                                           MustBeDeclaration);
262   parseLevel(/*HasOpeningBrace=*/false);
263   // Make sure to format the remaining tokens.
264   flushComments(true);
265   addUnwrappedLine();
266 }
267 
268 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
269   bool SwitchLabelEncountered = false;
270   do {
271     tok::TokenKind kind = FormatTok->Tok.getKind();
272     if (FormatTok->Type == TT_MacroBlockBegin) {
273       kind = tok::l_brace;
274     } else if (FormatTok->Type == TT_MacroBlockEnd) {
275       kind = tok::r_brace;
276     }
277 
278     switch (kind) {
279     case tok::comment:
280       nextToken();
281       addUnwrappedLine();
282       break;
283     case tok::l_brace:
284       // FIXME: Add parameter whether this can happen - if this happens, we must
285       // be in a non-declaration context.
286       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
287         continue;
288       parseBlock(/*MustBeDeclaration=*/false);
289       addUnwrappedLine();
290       break;
291     case tok::r_brace:
292       if (HasOpeningBrace)
293         return;
294       nextToken();
295       addUnwrappedLine();
296       break;
297     case tok::kw_default:
298     case tok::kw_case:
299       if (!SwitchLabelEncountered &&
300           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
301         ++Line->Level;
302       SwitchLabelEncountered = true;
303       parseStructuralElement();
304       break;
305     default:
306       parseStructuralElement();
307       break;
308     }
309   } while (!eof());
310 }
311 
312 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
313   // We'll parse forward through the tokens until we hit
314   // a closing brace or eof - note that getNextToken() will
315   // parse macros, so this will magically work inside macro
316   // definitions, too.
317   unsigned StoredPosition = Tokens->getPosition();
318   FormatToken *Tok = FormatTok;
319   const FormatToken *PrevTok = getPreviousToken();
320   // Keep a stack of positions of lbrace tokens. We will
321   // update information about whether an lbrace starts a
322   // braced init list or a different block during the loop.
323   SmallVector<FormatToken *, 8> LBraceStack;
324   assert(Tok->Tok.is(tok::l_brace));
325   do {
326     // Get next non-comment token.
327     FormatToken *NextTok;
328     unsigned ReadTokens = 0;
329     do {
330       NextTok = Tokens->getNextToken();
331       ++ReadTokens;
332     } while (NextTok->is(tok::comment));
333 
334     switch (Tok->Tok.getKind()) {
335     case tok::l_brace:
336       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
337           PrevTok->is(tok::colon))
338         // A colon indicates this code is in a type, or a braced list following
339         // a label in an object literal ({a: {b: 1}}).
340         // The code below could be confused by semicolons between the individual
341         // members in a type member list, which would normally trigger BK_Block.
342         // In both cases, this must be parsed as an inline braced init.
343         Tok->BlockKind = BK_BracedInit;
344       else
345         Tok->BlockKind = BK_Unknown;
346       LBraceStack.push_back(Tok);
347       break;
348     case tok::r_brace:
349       if (LBraceStack.empty())
350         break;
351       if (LBraceStack.back()->BlockKind == BK_Unknown) {
352         bool ProbablyBracedList = false;
353         if (Style.Language == FormatStyle::LK_Proto) {
354           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
355         } else {
356           // Using OriginalColumn to distinguish between ObjC methods and
357           // binary operators is a bit hacky.
358           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
359                                   NextTok->OriginalColumn == 0;
360 
361           // If there is a comma, semicolon or right paren after the closing
362           // brace, we assume this is a braced initializer list.  Note that
363           // regardless how we mark inner braces here, we will overwrite the
364           // BlockKind later if we parse a braced list (where all blocks
365           // inside are by default braced lists), or when we explicitly detect
366           // blocks (for example while parsing lambdas).
367           ProbablyBracedList =
368               (Style.Language == FormatStyle::LK_JavaScript &&
369                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
370                                 Keywords.kw_as)) ||
371               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
372                                tok::r_paren, tok::r_square, tok::l_brace,
373                                tok::l_square, tok::l_paren, tok::ellipsis) ||
374               (NextTok->is(tok::identifier) &&
375                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
376               (NextTok->is(tok::semi) &&
377                (!ExpectClassBody || LBraceStack.size() != 1)) ||
378               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
379         }
380         if (ProbablyBracedList) {
381           Tok->BlockKind = BK_BracedInit;
382           LBraceStack.back()->BlockKind = BK_BracedInit;
383         } else {
384           Tok->BlockKind = BK_Block;
385           LBraceStack.back()->BlockKind = BK_Block;
386         }
387       }
388       LBraceStack.pop_back();
389       break;
390     case tok::at:
391     case tok::semi:
392     case tok::kw_if:
393     case tok::kw_while:
394     case tok::kw_for:
395     case tok::kw_switch:
396     case tok::kw_try:
397     case tok::kw___try:
398       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
399         LBraceStack.back()->BlockKind = BK_Block;
400       break;
401     default:
402       break;
403     }
404     PrevTok = Tok;
405     Tok = NextTok;
406   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
407 
408   // Assume other blocks for all unclosed opening braces.
409   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
410     if (LBraceStack[i]->BlockKind == BK_Unknown)
411       LBraceStack[i]->BlockKind = BK_Block;
412   }
413 
414   FormatTok = Tokens->setPosition(StoredPosition);
415 }
416 
417 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
418                                      bool MunchSemi) {
419   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
420          "'{' or macro block token expected");
421   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
422   FormatTok->BlockKind = BK_Block;
423 
424   unsigned InitialLevel = Line->Level;
425   nextToken();
426 
427   if (MacroBlock && FormatTok->is(tok::l_paren))
428     parseParens();
429 
430   addUnwrappedLine();
431   size_t OpeningLineIndex =
432       Lines.empty() ? (UnwrappedLine::kInvalidIndex) : (Lines.size() - 1);
433 
434   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
435                                           MustBeDeclaration);
436   if (AddLevel)
437     ++Line->Level;
438   parseLevel(/*HasOpeningBrace=*/true);
439 
440   if (eof())
441     return;
442 
443   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
444                  : !FormatTok->is(tok::r_brace)) {
445     Line->Level = InitialLevel;
446     FormatTok->BlockKind = BK_Block;
447     return;
448   }
449 
450   nextToken(); // Munch the closing brace.
451 
452   if (MacroBlock && FormatTok->is(tok::l_paren))
453     parseParens();
454 
455   if (MunchSemi && FormatTok->Tok.is(tok::semi))
456     nextToken();
457   Line->Level = InitialLevel;
458   Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
459 }
460 
461 static bool isGoogScope(const UnwrappedLine &Line) {
462   // FIXME: Closure-library specific stuff should not be hard-coded but be
463   // configurable.
464   if (Line.Tokens.size() < 4)
465     return false;
466   auto I = Line.Tokens.begin();
467   if (I->Tok->TokenText != "goog")
468     return false;
469   ++I;
470   if (I->Tok->isNot(tok::period))
471     return false;
472   ++I;
473   if (I->Tok->TokenText != "scope")
474     return false;
475   ++I;
476   return I->Tok->is(tok::l_paren);
477 }
478 
479 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
480                                    const FormatToken &InitialToken) {
481   if (InitialToken.is(tok::kw_namespace))
482     return Style.BraceWrapping.AfterNamespace;
483   if (InitialToken.is(tok::kw_class))
484     return Style.BraceWrapping.AfterClass;
485   if (InitialToken.is(tok::kw_union))
486     return Style.BraceWrapping.AfterUnion;
487   if (InitialToken.is(tok::kw_struct))
488     return Style.BraceWrapping.AfterStruct;
489   return false;
490 }
491 
492 void UnwrappedLineParser::parseChildBlock() {
493   FormatTok->BlockKind = BK_Block;
494   nextToken();
495   {
496     bool GoogScope =
497         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
498     ScopedLineState LineState(*this);
499     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
500                                             /*MustBeDeclaration=*/false);
501     Line->Level += GoogScope ? 0 : 1;
502     parseLevel(/*HasOpeningBrace=*/true);
503     flushComments(isOnNewLine(*FormatTok));
504     Line->Level -= GoogScope ? 0 : 1;
505   }
506   nextToken();
507 }
508 
509 void UnwrappedLineParser::parsePPDirective() {
510   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
511   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
512   nextToken();
513 
514   if (!FormatTok->Tok.getIdentifierInfo()) {
515     parsePPUnknown();
516     return;
517   }
518 
519   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
520   case tok::pp_define:
521     parsePPDefine();
522     return;
523   case tok::pp_if:
524     parsePPIf(/*IfDef=*/false);
525     break;
526   case tok::pp_ifdef:
527   case tok::pp_ifndef:
528     parsePPIf(/*IfDef=*/true);
529     break;
530   case tok::pp_else:
531     parsePPElse();
532     break;
533   case tok::pp_elif:
534     parsePPElIf();
535     break;
536   case tok::pp_endif:
537     parsePPEndIf();
538     break;
539   default:
540     parsePPUnknown();
541     break;
542   }
543 }
544 
545 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
546   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
547     PPStack.push_back(PP_Unreachable);
548   else
549     PPStack.push_back(PP_Conditional);
550 }
551 
552 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
553   ++PPBranchLevel;
554   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
555   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
556     PPLevelBranchIndex.push_back(0);
557     PPLevelBranchCount.push_back(0);
558   }
559   PPChainBranchIndex.push(0);
560   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
561   conditionalCompilationCondition(Unreachable || Skip);
562 }
563 
564 void UnwrappedLineParser::conditionalCompilationAlternative() {
565   if (!PPStack.empty())
566     PPStack.pop_back();
567   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
568   if (!PPChainBranchIndex.empty())
569     ++PPChainBranchIndex.top();
570   conditionalCompilationCondition(
571       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
572       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
573 }
574 
575 void UnwrappedLineParser::conditionalCompilationEnd() {
576   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
577   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
578     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
579       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
580     }
581   }
582   // Guard against #endif's without #if.
583   if (PPBranchLevel > 0)
584     --PPBranchLevel;
585   if (!PPChainBranchIndex.empty())
586     PPChainBranchIndex.pop();
587   if (!PPStack.empty())
588     PPStack.pop_back();
589 }
590 
591 void UnwrappedLineParser::parsePPIf(bool IfDef) {
592   bool IfNDef = FormatTok->is(tok::pp_ifndef);
593   nextToken();
594   bool Unreachable = false;
595   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
596     Unreachable = true;
597   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
598     Unreachable = true;
599   conditionalCompilationStart(Unreachable);
600   parsePPUnknown();
601 }
602 
603 void UnwrappedLineParser::parsePPElse() {
604   conditionalCompilationAlternative();
605   parsePPUnknown();
606 }
607 
608 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
609 
610 void UnwrappedLineParser::parsePPEndIf() {
611   conditionalCompilationEnd();
612   parsePPUnknown();
613 }
614 
615 void UnwrappedLineParser::parsePPDefine() {
616   nextToken();
617 
618   if (FormatTok->Tok.getKind() != tok::identifier) {
619     parsePPUnknown();
620     return;
621   }
622   nextToken();
623   if (FormatTok->Tok.getKind() == tok::l_paren &&
624       FormatTok->WhitespaceRange.getBegin() ==
625           FormatTok->WhitespaceRange.getEnd()) {
626     parseParens();
627   }
628   addUnwrappedLine();
629   Line->Level = 1;
630 
631   // Errors during a preprocessor directive can only affect the layout of the
632   // preprocessor directive, and thus we ignore them. An alternative approach
633   // would be to use the same approach we use on the file level (no
634   // re-indentation if there was a structural error) within the macro
635   // definition.
636   parseFile();
637 }
638 
639 void UnwrappedLineParser::parsePPUnknown() {
640   do {
641     nextToken();
642   } while (!eof());
643   addUnwrappedLine();
644 }
645 
646 // Here we blacklist certain tokens that are not usually the first token in an
647 // unwrapped line. This is used in attempt to distinguish macro calls without
648 // trailing semicolons from other constructs split to several lines.
649 static bool tokenCanStartNewLine(const clang::Token &Tok) {
650   // Semicolon can be a null-statement, l_square can be a start of a macro or
651   // a C++11 attribute, but this doesn't seem to be common.
652   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
653          Tok.isNot(tok::l_square) &&
654          // Tokens that can only be used as binary operators and a part of
655          // overloaded operator names.
656          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
657          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
658          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
659          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
660          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
661          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
662          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
663          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
664          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
665          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
666          Tok.isNot(tok::lesslessequal) &&
667          // Colon is used in labels, base class lists, initializer lists,
668          // range-based for loops, ternary operator, but should never be the
669          // first token in an unwrapped line.
670          Tok.isNot(tok::colon) &&
671          // 'noexcept' is a trailing annotation.
672          Tok.isNot(tok::kw_noexcept);
673 }
674 
675 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
676                           const FormatToken *FormatTok) {
677   // FIXME: This returns true for C/C++ keywords like 'struct'.
678   return FormatTok->is(tok::identifier) &&
679          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
680           !FormatTok->isOneOf(
681               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
682               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
683               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
684               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
685               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
686               Keywords.kw_instanceof, Keywords.kw_interface,
687               Keywords.kw_throws));
688 }
689 
690 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
691                                  const FormatToken *FormatTok) {
692   return FormatTok->Tok.isLiteral() ||
693          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
694          mustBeJSIdent(Keywords, FormatTok);
695 }
696 
697 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
698 // when encountered after a value (see mustBeJSIdentOrValue).
699 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
700                            const FormatToken *FormatTok) {
701   return FormatTok->isOneOf(
702       tok::kw_return, Keywords.kw_yield,
703       // conditionals
704       tok::kw_if, tok::kw_else,
705       // loops
706       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
707       // switch/case
708       tok::kw_switch, tok::kw_case,
709       // exceptions
710       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
711       // declaration
712       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
713       Keywords.kw_async, Keywords.kw_function,
714       // import/export
715       Keywords.kw_import, tok::kw_export);
716 }
717 
718 // readTokenWithJavaScriptASI reads the next token and terminates the current
719 // line if JavaScript Automatic Semicolon Insertion must
720 // happen between the current token and the next token.
721 //
722 // This method is conservative - it cannot cover all edge cases of JavaScript,
723 // but only aims to correctly handle certain well known cases. It *must not*
724 // return true in speculative cases.
725 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
726   FormatToken *Previous = FormatTok;
727   readToken();
728   FormatToken *Next = FormatTok;
729 
730   bool IsOnSameLine =
731       CommentsBeforeNextToken.empty()
732           ? Next->NewlinesBefore == 0
733           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
734   if (IsOnSameLine)
735     return;
736 
737   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
738   bool PreviousStartsTemplateExpr =
739       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
740   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
741     // If the token before the previous one is an '@', the previous token is an
742     // annotation and can precede another identifier/value.
743     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
744     if (PrePrevious->is(tok::at))
745       return;
746   }
747   if (Next->is(tok::exclaim) && PreviousMustBeValue)
748     return addUnwrappedLine();
749   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
750   bool NextEndsTemplateExpr =
751       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
752   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
753       (PreviousMustBeValue ||
754        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
755                          tok::minusminus)))
756     return addUnwrappedLine();
757   if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
758     return addUnwrappedLine();
759 }
760 
761 void UnwrappedLineParser::parseStructuralElement() {
762   assert(!FormatTok->is(tok::l_brace));
763   if (Style.Language == FormatStyle::LK_TableGen &&
764       FormatTok->is(tok::pp_include)) {
765     nextToken();
766     if (FormatTok->is(tok::string_literal))
767       nextToken();
768     addUnwrappedLine();
769     return;
770   }
771   switch (FormatTok->Tok.getKind()) {
772   case tok::at:
773     nextToken();
774     if (FormatTok->Tok.is(tok::l_brace)) {
775       parseBracedList();
776       break;
777     }
778     switch (FormatTok->Tok.getObjCKeywordID()) {
779     case tok::objc_public:
780     case tok::objc_protected:
781     case tok::objc_package:
782     case tok::objc_private:
783       return parseAccessSpecifier();
784     case tok::objc_interface:
785     case tok::objc_implementation:
786       return parseObjCInterfaceOrImplementation();
787     case tok::objc_protocol:
788       return parseObjCProtocol();
789     case tok::objc_end:
790       return; // Handled by the caller.
791     case tok::objc_optional:
792     case tok::objc_required:
793       nextToken();
794       addUnwrappedLine();
795       return;
796     case tok::objc_autoreleasepool:
797       nextToken();
798       if (FormatTok->Tok.is(tok::l_brace)) {
799         if (Style.BraceWrapping.AfterObjCDeclaration)
800           addUnwrappedLine();
801         parseBlock(/*MustBeDeclaration=*/false);
802       }
803       addUnwrappedLine();
804       return;
805     case tok::objc_try:
806       // This branch isn't strictly necessary (the kw_try case below would
807       // do this too after the tok::at is parsed above).  But be explicit.
808       parseTryCatch();
809       return;
810     default:
811       break;
812     }
813     break;
814   case tok::kw_asm:
815     nextToken();
816     if (FormatTok->is(tok::l_brace)) {
817       FormatTok->Type = TT_InlineASMBrace;
818       nextToken();
819       while (FormatTok && FormatTok->isNot(tok::eof)) {
820         if (FormatTok->is(tok::r_brace)) {
821           FormatTok->Type = TT_InlineASMBrace;
822           nextToken();
823           addUnwrappedLine();
824           break;
825         }
826         FormatTok->Finalized = true;
827         nextToken();
828       }
829     }
830     break;
831   case tok::kw_namespace:
832     parseNamespace();
833     return;
834   case tok::kw_inline:
835     nextToken();
836     if (FormatTok->Tok.is(tok::kw_namespace)) {
837       parseNamespace();
838       return;
839     }
840     break;
841   case tok::kw_public:
842   case tok::kw_protected:
843   case tok::kw_private:
844     if (Style.Language == FormatStyle::LK_Java ||
845         Style.Language == FormatStyle::LK_JavaScript)
846       nextToken();
847     else
848       parseAccessSpecifier();
849     return;
850   case tok::kw_if:
851     parseIfThenElse();
852     return;
853   case tok::kw_for:
854   case tok::kw_while:
855     parseForOrWhileLoop();
856     return;
857   case tok::kw_do:
858     parseDoWhile();
859     return;
860   case tok::kw_switch:
861     parseSwitch();
862     return;
863   case tok::kw_default:
864     nextToken();
865     parseLabel();
866     return;
867   case tok::kw_case:
868     parseCaseLabel();
869     return;
870   case tok::kw_try:
871   case tok::kw___try:
872     parseTryCatch();
873     return;
874   case tok::kw_extern:
875     nextToken();
876     if (FormatTok->Tok.is(tok::string_literal)) {
877       nextToken();
878       if (FormatTok->Tok.is(tok::l_brace)) {
879         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
880         addUnwrappedLine();
881         return;
882       }
883     }
884     break;
885   case tok::kw_export:
886     if (Style.Language == FormatStyle::LK_JavaScript) {
887       parseJavaScriptEs6ImportExport();
888       return;
889     }
890     break;
891   case tok::identifier:
892     if (FormatTok->is(TT_ForEachMacro)) {
893       parseForOrWhileLoop();
894       return;
895     }
896     if (FormatTok->is(TT_MacroBlockBegin)) {
897       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
898                  /*MunchSemi=*/false);
899       return;
900     }
901     if (FormatTok->is(Keywords.kw_import)) {
902       if (Style.Language == FormatStyle::LK_JavaScript) {
903         parseJavaScriptEs6ImportExport();
904         return;
905       }
906       if (Style.Language == FormatStyle::LK_Proto) {
907         nextToken();
908         if (FormatTok->is(tok::kw_public))
909           nextToken();
910         if (!FormatTok->is(tok::string_literal))
911           return;
912         nextToken();
913         if (FormatTok->is(tok::semi))
914           nextToken();
915         addUnwrappedLine();
916         return;
917       }
918     }
919     if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
920                            Keywords.kw_slots, Keywords.kw_qslots)) {
921       nextToken();
922       if (FormatTok->is(tok::colon)) {
923         nextToken();
924         addUnwrappedLine();
925         return;
926       }
927     }
928     // In all other cases, parse the declaration.
929     break;
930   default:
931     break;
932   }
933   do {
934     const FormatToken *Previous = getPreviousToken();
935     switch (FormatTok->Tok.getKind()) {
936     case tok::at:
937       nextToken();
938       if (FormatTok->Tok.is(tok::l_brace))
939         parseBracedList();
940       break;
941     case tok::kw_enum:
942       // Ignore if this is part of "template <enum ...".
943       if (Previous && Previous->is(tok::less)) {
944         nextToken();
945         break;
946       }
947 
948       // parseEnum falls through and does not yet add an unwrapped line as an
949       // enum definition can start a structural element.
950       if (!parseEnum())
951         break;
952       // This only applies for C++.
953       if (!Style.IsCpp()) {
954         addUnwrappedLine();
955         return;
956       }
957       break;
958     case tok::kw_typedef:
959       nextToken();
960       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
961                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
962         parseEnum();
963       break;
964     case tok::kw_struct:
965     case tok::kw_union:
966     case tok::kw_class:
967       // parseRecord falls through and does not yet add an unwrapped line as a
968       // record declaration or definition can start a structural element.
969       parseRecord();
970       // This does not apply for Java and JavaScript.
971       if (Style.Language == FormatStyle::LK_Java ||
972           Style.Language == FormatStyle::LK_JavaScript) {
973         if (FormatTok->is(tok::semi))
974           nextToken();
975         addUnwrappedLine();
976         return;
977       }
978       break;
979     case tok::period:
980       nextToken();
981       // In Java, classes have an implicit static member "class".
982       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
983           FormatTok->is(tok::kw_class))
984         nextToken();
985       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
986           FormatTok->Tok.getIdentifierInfo())
987         // JavaScript only has pseudo keywords, all keywords are allowed to
988         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
989         nextToken();
990       break;
991     case tok::semi:
992       nextToken();
993       addUnwrappedLine();
994       return;
995     case tok::r_brace:
996       addUnwrappedLine();
997       return;
998     case tok::l_paren:
999       parseParens();
1000       break;
1001     case tok::kw_operator:
1002       nextToken();
1003       if (FormatTok->isBinaryOperator())
1004         nextToken();
1005       break;
1006     case tok::caret:
1007       nextToken();
1008       if (FormatTok->Tok.isAnyIdentifier() ||
1009           FormatTok->isSimpleTypeSpecifier())
1010         nextToken();
1011       if (FormatTok->is(tok::l_paren))
1012         parseParens();
1013       if (FormatTok->is(tok::l_brace))
1014         parseChildBlock();
1015       break;
1016     case tok::l_brace:
1017       if (!tryToParseBracedList()) {
1018         // A block outside of parentheses must be the last part of a
1019         // structural element.
1020         // FIXME: Figure out cases where this is not true, and add projections
1021         // for them (the one we know is missing are lambdas).
1022         if (Style.BraceWrapping.AfterFunction)
1023           addUnwrappedLine();
1024         FormatTok->Type = TT_FunctionLBrace;
1025         parseBlock(/*MustBeDeclaration=*/false);
1026         addUnwrappedLine();
1027         return;
1028       }
1029       // Otherwise this was a braced init list, and the structural
1030       // element continues.
1031       break;
1032     case tok::kw_try:
1033       // We arrive here when parsing function-try blocks.
1034       parseTryCatch();
1035       return;
1036     case tok::identifier: {
1037       if (FormatTok->is(TT_MacroBlockEnd)) {
1038         addUnwrappedLine();
1039         return;
1040       }
1041 
1042       // Parse function literal unless 'function' is the first token in a line
1043       // in which case this should be treated as a free-standing function.
1044       if (Style.Language == FormatStyle::LK_JavaScript &&
1045           (FormatTok->is(Keywords.kw_function) ||
1046            FormatTok->startsSequence(Keywords.kw_async,
1047                                      Keywords.kw_function)) &&
1048           Line->Tokens.size() > 0) {
1049         tryToParseJSFunction();
1050         break;
1051       }
1052       if ((Style.Language == FormatStyle::LK_JavaScript ||
1053            Style.Language == FormatStyle::LK_Java) &&
1054           FormatTok->is(Keywords.kw_interface)) {
1055         if (Style.Language == FormatStyle::LK_JavaScript) {
1056           // In JavaScript/TypeScript, "interface" can be used as a standalone
1057           // identifier, e.g. in `var interface = 1;`. If "interface" is
1058           // followed by another identifier, it is very like to be an actual
1059           // interface declaration.
1060           unsigned StoredPosition = Tokens->getPosition();
1061           FormatToken *Next = Tokens->getNextToken();
1062           FormatTok = Tokens->setPosition(StoredPosition);
1063           if (Next && !mustBeJSIdent(Keywords, Next)) {
1064             nextToken();
1065             break;
1066           }
1067         }
1068         parseRecord();
1069         addUnwrappedLine();
1070         return;
1071       }
1072 
1073       // See if the following token should start a new unwrapped line.
1074       StringRef Text = FormatTok->TokenText;
1075       nextToken();
1076       if (Line->Tokens.size() == 1 &&
1077           // JS doesn't have macros, and within classes colons indicate fields,
1078           // not labels.
1079           Style.Language != FormatStyle::LK_JavaScript) {
1080         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1081           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1082           parseLabel();
1083           return;
1084         }
1085         // Recognize function-like macro usages without trailing semicolon as
1086         // well as free-standing macros like Q_OBJECT.
1087         bool FunctionLike = FormatTok->is(tok::l_paren);
1088         if (FunctionLike)
1089           parseParens();
1090 
1091         bool FollowedByNewline =
1092             CommentsBeforeNextToken.empty()
1093                 ? FormatTok->NewlinesBefore > 0
1094                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1095 
1096         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1097             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1098           addUnwrappedLine();
1099           return;
1100         }
1101       }
1102       break;
1103     }
1104     case tok::equal:
1105       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1106       // TT_JsFatArrow. The always start an expression or a child block if
1107       // followed by a curly.
1108       if (FormatTok->is(TT_JsFatArrow)) {
1109         nextToken();
1110         if (FormatTok->is(tok::l_brace))
1111           parseChildBlock();
1112         break;
1113       }
1114 
1115       nextToken();
1116       if (FormatTok->Tok.is(tok::l_brace)) {
1117         parseBracedList();
1118       }
1119       break;
1120     case tok::l_square:
1121       parseSquare();
1122       break;
1123     case tok::kw_new:
1124       parseNew();
1125       break;
1126     default:
1127       nextToken();
1128       break;
1129     }
1130   } while (!eof());
1131 }
1132 
1133 bool UnwrappedLineParser::tryToParseLambda() {
1134   if (!Style.IsCpp()) {
1135     nextToken();
1136     return false;
1137   }
1138   const FormatToken* Previous = getPreviousToken();
1139   if (Previous &&
1140       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1141                          tok::kw_delete) ||
1142        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1143     nextToken();
1144     return false;
1145   }
1146   assert(FormatTok->is(tok::l_square));
1147   FormatToken &LSquare = *FormatTok;
1148   if (!tryToParseLambdaIntroducer())
1149     return false;
1150 
1151   while (FormatTok->isNot(tok::l_brace)) {
1152     if (FormatTok->isSimpleTypeSpecifier()) {
1153       nextToken();
1154       continue;
1155     }
1156     switch (FormatTok->Tok.getKind()) {
1157     case tok::l_brace:
1158       break;
1159     case tok::l_paren:
1160       parseParens();
1161       break;
1162     case tok::amp:
1163     case tok::star:
1164     case tok::kw_const:
1165     case tok::comma:
1166     case tok::less:
1167     case tok::greater:
1168     case tok::identifier:
1169     case tok::numeric_constant:
1170     case tok::coloncolon:
1171     case tok::kw_mutable:
1172       nextToken();
1173       break;
1174     case tok::arrow:
1175       FormatTok->Type = TT_LambdaArrow;
1176       nextToken();
1177       break;
1178     default:
1179       return true;
1180     }
1181   }
1182   LSquare.Type = TT_LambdaLSquare;
1183   parseChildBlock();
1184   return true;
1185 }
1186 
1187 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1188   nextToken();
1189   if (FormatTok->is(tok::equal)) {
1190     nextToken();
1191     if (FormatTok->is(tok::r_square)) {
1192       nextToken();
1193       return true;
1194     }
1195     if (FormatTok->isNot(tok::comma))
1196       return false;
1197     nextToken();
1198   } else if (FormatTok->is(tok::amp)) {
1199     nextToken();
1200     if (FormatTok->is(tok::r_square)) {
1201       nextToken();
1202       return true;
1203     }
1204     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1205       return false;
1206     }
1207     if (FormatTok->is(tok::comma))
1208       nextToken();
1209   } else if (FormatTok->is(tok::r_square)) {
1210     nextToken();
1211     return true;
1212   }
1213   do {
1214     if (FormatTok->is(tok::amp))
1215       nextToken();
1216     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1217       return false;
1218     nextToken();
1219     if (FormatTok->is(tok::ellipsis))
1220       nextToken();
1221     if (FormatTok->is(tok::comma)) {
1222       nextToken();
1223     } else if (FormatTok->is(tok::r_square)) {
1224       nextToken();
1225       return true;
1226     } else {
1227       return false;
1228     }
1229   } while (!eof());
1230   return false;
1231 }
1232 
1233 void UnwrappedLineParser::tryToParseJSFunction() {
1234   assert(FormatTok->is(Keywords.kw_function) ||
1235          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1236   if (FormatTok->is(Keywords.kw_async))
1237     nextToken();
1238   // Consume "function".
1239   nextToken();
1240 
1241   // Consume * (generator function). Treat it like C++'s overloaded operators.
1242   if (FormatTok->is(tok::star)) {
1243     FormatTok->Type = TT_OverloadedOperator;
1244     nextToken();
1245   }
1246 
1247   // Consume function name.
1248   if (FormatTok->is(tok::identifier))
1249     nextToken();
1250 
1251   if (FormatTok->isNot(tok::l_paren))
1252     return;
1253 
1254   // Parse formal parameter list.
1255   parseParens();
1256 
1257   if (FormatTok->is(tok::colon)) {
1258     // Parse a type definition.
1259     nextToken();
1260 
1261     // Eat the type declaration. For braced inline object types, balance braces,
1262     // otherwise just parse until finding an l_brace for the function body.
1263     if (FormatTok->is(tok::l_brace))
1264       tryToParseBracedList();
1265     else
1266       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1267         nextToken();
1268   }
1269 
1270   if (FormatTok->is(tok::semi))
1271     return;
1272 
1273   parseChildBlock();
1274 }
1275 
1276 bool UnwrappedLineParser::tryToParseBracedList() {
1277   if (FormatTok->BlockKind == BK_Unknown)
1278     calculateBraceTypes();
1279   assert(FormatTok->BlockKind != BK_Unknown);
1280   if (FormatTok->BlockKind == BK_Block)
1281     return false;
1282   parseBracedList();
1283   return true;
1284 }
1285 
1286 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1287   bool HasError = false;
1288   nextToken();
1289 
1290   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1291   // replace this by using parseAssigmentExpression() inside.
1292   do {
1293     if (Style.Language == FormatStyle::LK_JavaScript) {
1294       if (FormatTok->is(Keywords.kw_function) ||
1295           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1296         tryToParseJSFunction();
1297         continue;
1298       }
1299       if (FormatTok->is(TT_JsFatArrow)) {
1300         nextToken();
1301         // Fat arrows can be followed by simple expressions or by child blocks
1302         // in curly braces.
1303         if (FormatTok->is(tok::l_brace)) {
1304           parseChildBlock();
1305           continue;
1306         }
1307       }
1308       if (FormatTok->is(tok::l_brace)) {
1309         // Could be a method inside of a braced list `{a() { return 1; }}`.
1310         if (tryToParseBracedList())
1311           continue;
1312         parseChildBlock();
1313       }
1314     }
1315     switch (FormatTok->Tok.getKind()) {
1316     case tok::caret:
1317       nextToken();
1318       if (FormatTok->is(tok::l_brace)) {
1319         parseChildBlock();
1320       }
1321       break;
1322     case tok::l_square:
1323       tryToParseLambda();
1324       break;
1325     case tok::l_paren:
1326       parseParens();
1327       // JavaScript can just have free standing methods and getters/setters in
1328       // object literals. Detect them by a "{" following ")".
1329       if (Style.Language == FormatStyle::LK_JavaScript) {
1330         if (FormatTok->is(tok::l_brace))
1331           parseChildBlock();
1332         break;
1333       }
1334       break;
1335     case tok::l_brace:
1336       // Assume there are no blocks inside a braced init list apart
1337       // from the ones we explicitly parse out (like lambdas).
1338       FormatTok->BlockKind = BK_BracedInit;
1339       parseBracedList();
1340       break;
1341     case tok::r_brace:
1342       nextToken();
1343       return !HasError;
1344     case tok::semi:
1345       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1346       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1347       // used for error recovery if we have otherwise determined that this is
1348       // a braced list.
1349       if (Style.Language == FormatStyle::LK_JavaScript) {
1350         nextToken();
1351         break;
1352       }
1353       HasError = true;
1354       if (!ContinueOnSemicolons)
1355         return !HasError;
1356       nextToken();
1357       break;
1358     case tok::comma:
1359       nextToken();
1360       break;
1361     default:
1362       nextToken();
1363       break;
1364     }
1365   } while (!eof());
1366   return false;
1367 }
1368 
1369 void UnwrappedLineParser::parseParens() {
1370   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1371   nextToken();
1372   do {
1373     switch (FormatTok->Tok.getKind()) {
1374     case tok::l_paren:
1375       parseParens();
1376       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1377         parseChildBlock();
1378       break;
1379     case tok::r_paren:
1380       nextToken();
1381       return;
1382     case tok::r_brace:
1383       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1384       return;
1385     case tok::l_square:
1386       tryToParseLambda();
1387       break;
1388     case tok::l_brace:
1389       if (!tryToParseBracedList())
1390         parseChildBlock();
1391       break;
1392     case tok::at:
1393       nextToken();
1394       if (FormatTok->Tok.is(tok::l_brace))
1395         parseBracedList();
1396       break;
1397     case tok::kw_class:
1398       if (Style.Language == FormatStyle::LK_JavaScript)
1399         parseRecord(/*ParseAsExpr=*/true);
1400       else
1401         nextToken();
1402       break;
1403     case tok::identifier:
1404       if (Style.Language == FormatStyle::LK_JavaScript &&
1405           (FormatTok->is(Keywords.kw_function) ||
1406            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1407         tryToParseJSFunction();
1408       else
1409         nextToken();
1410       break;
1411     default:
1412       nextToken();
1413       break;
1414     }
1415   } while (!eof());
1416 }
1417 
1418 void UnwrappedLineParser::parseSquare() {
1419   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1420   if (tryToParseLambda())
1421     return;
1422   do {
1423     switch (FormatTok->Tok.getKind()) {
1424     case tok::l_paren:
1425       parseParens();
1426       break;
1427     case tok::r_square:
1428       nextToken();
1429       return;
1430     case tok::r_brace:
1431       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1432       return;
1433     case tok::l_square:
1434       parseSquare();
1435       break;
1436     case tok::l_brace: {
1437       if (!tryToParseBracedList())
1438         parseChildBlock();
1439       break;
1440     }
1441     case tok::at:
1442       nextToken();
1443       if (FormatTok->Tok.is(tok::l_brace))
1444         parseBracedList();
1445       break;
1446     default:
1447       nextToken();
1448       break;
1449     }
1450   } while (!eof());
1451 }
1452 
1453 void UnwrappedLineParser::parseIfThenElse() {
1454   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1455   nextToken();
1456   if (FormatTok->Tok.is(tok::l_paren))
1457     parseParens();
1458   bool NeedsUnwrappedLine = false;
1459   if (FormatTok->Tok.is(tok::l_brace)) {
1460     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1461     parseBlock(/*MustBeDeclaration=*/false);
1462     if (Style.BraceWrapping.BeforeElse)
1463       addUnwrappedLine();
1464     else
1465       NeedsUnwrappedLine = true;
1466   } else {
1467     addUnwrappedLine();
1468     ++Line->Level;
1469     parseStructuralElement();
1470     --Line->Level;
1471   }
1472   if (FormatTok->Tok.is(tok::kw_else)) {
1473     nextToken();
1474     if (FormatTok->Tok.is(tok::l_brace)) {
1475       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1476       parseBlock(/*MustBeDeclaration=*/false);
1477       addUnwrappedLine();
1478     } else if (FormatTok->Tok.is(tok::kw_if)) {
1479       parseIfThenElse();
1480     } else {
1481       addUnwrappedLine();
1482       ++Line->Level;
1483       parseStructuralElement();
1484       if (FormatTok->is(tok::eof))
1485         addUnwrappedLine();
1486       --Line->Level;
1487     }
1488   } else if (NeedsUnwrappedLine) {
1489     addUnwrappedLine();
1490   }
1491 }
1492 
1493 void UnwrappedLineParser::parseTryCatch() {
1494   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1495   nextToken();
1496   bool NeedsUnwrappedLine = false;
1497   if (FormatTok->is(tok::colon)) {
1498     // We are in a function try block, what comes is an initializer list.
1499     nextToken();
1500     while (FormatTok->is(tok::identifier)) {
1501       nextToken();
1502       if (FormatTok->is(tok::l_paren))
1503         parseParens();
1504       if (FormatTok->is(tok::comma))
1505         nextToken();
1506     }
1507   }
1508   // Parse try with resource.
1509   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1510     parseParens();
1511   }
1512   if (FormatTok->is(tok::l_brace)) {
1513     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1514     parseBlock(/*MustBeDeclaration=*/false);
1515     if (Style.BraceWrapping.BeforeCatch) {
1516       addUnwrappedLine();
1517     } else {
1518       NeedsUnwrappedLine = true;
1519     }
1520   } else if (!FormatTok->is(tok::kw_catch)) {
1521     // The C++ standard requires a compound-statement after a try.
1522     // If there's none, we try to assume there's a structuralElement
1523     // and try to continue.
1524     addUnwrappedLine();
1525     ++Line->Level;
1526     parseStructuralElement();
1527     --Line->Level;
1528   }
1529   while (1) {
1530     if (FormatTok->is(tok::at))
1531       nextToken();
1532     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1533                              tok::kw___finally) ||
1534           ((Style.Language == FormatStyle::LK_Java ||
1535             Style.Language == FormatStyle::LK_JavaScript) &&
1536            FormatTok->is(Keywords.kw_finally)) ||
1537           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1538            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1539       break;
1540     nextToken();
1541     while (FormatTok->isNot(tok::l_brace)) {
1542       if (FormatTok->is(tok::l_paren)) {
1543         parseParens();
1544         continue;
1545       }
1546       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1547         return;
1548       nextToken();
1549     }
1550     NeedsUnwrappedLine = false;
1551     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1552     parseBlock(/*MustBeDeclaration=*/false);
1553     if (Style.BraceWrapping.BeforeCatch)
1554       addUnwrappedLine();
1555     else
1556       NeedsUnwrappedLine = true;
1557   }
1558   if (NeedsUnwrappedLine)
1559     addUnwrappedLine();
1560 }
1561 
1562 void UnwrappedLineParser::parseNamespace() {
1563   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1564 
1565   const FormatToken &InitialToken = *FormatTok;
1566   nextToken();
1567   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1568     nextToken();
1569   if (FormatTok->Tok.is(tok::l_brace)) {
1570     if (ShouldBreakBeforeBrace(Style, InitialToken))
1571       addUnwrappedLine();
1572 
1573     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1574                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1575                      DeclarationScopeStack.size() > 1);
1576     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1577     // Munch the semicolon after a namespace. This is more common than one would
1578     // think. Puttin the semicolon into its own line is very ugly.
1579     if (FormatTok->Tok.is(tok::semi))
1580       nextToken();
1581     addUnwrappedLine();
1582   }
1583   // FIXME: Add error handling.
1584 }
1585 
1586 void UnwrappedLineParser::parseNew() {
1587   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1588   nextToken();
1589   if (Style.Language != FormatStyle::LK_Java)
1590     return;
1591 
1592   // In Java, we can parse everything up to the parens, which aren't optional.
1593   do {
1594     // There should not be a ;, { or } before the new's open paren.
1595     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1596       return;
1597 
1598     // Consume the parens.
1599     if (FormatTok->is(tok::l_paren)) {
1600       parseParens();
1601 
1602       // If there is a class body of an anonymous class, consume that as child.
1603       if (FormatTok->is(tok::l_brace))
1604         parseChildBlock();
1605       return;
1606     }
1607     nextToken();
1608   } while (!eof());
1609 }
1610 
1611 void UnwrappedLineParser::parseForOrWhileLoop() {
1612   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1613          "'for', 'while' or foreach macro expected");
1614   nextToken();
1615   if (FormatTok->Tok.is(tok::l_paren))
1616     parseParens();
1617   if (FormatTok->Tok.is(tok::l_brace)) {
1618     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1619     parseBlock(/*MustBeDeclaration=*/false);
1620     addUnwrappedLine();
1621   } else {
1622     addUnwrappedLine();
1623     ++Line->Level;
1624     parseStructuralElement();
1625     --Line->Level;
1626   }
1627 }
1628 
1629 void UnwrappedLineParser::parseDoWhile() {
1630   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1631   nextToken();
1632   if (FormatTok->Tok.is(tok::l_brace)) {
1633     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1634     parseBlock(/*MustBeDeclaration=*/false);
1635     if (Style.BraceWrapping.IndentBraces)
1636       addUnwrappedLine();
1637   } else {
1638     addUnwrappedLine();
1639     ++Line->Level;
1640     parseStructuralElement();
1641     --Line->Level;
1642   }
1643 
1644   // FIXME: Add error handling.
1645   if (!FormatTok->Tok.is(tok::kw_while)) {
1646     addUnwrappedLine();
1647     return;
1648   }
1649 
1650   nextToken();
1651   parseStructuralElement();
1652 }
1653 
1654 void UnwrappedLineParser::parseLabel() {
1655   nextToken();
1656   unsigned OldLineLevel = Line->Level;
1657   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1658     --Line->Level;
1659   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1660     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1661     parseBlock(/*MustBeDeclaration=*/false);
1662     if (FormatTok->Tok.is(tok::kw_break)) {
1663       if (Style.BraceWrapping.AfterControlStatement)
1664         addUnwrappedLine();
1665       parseStructuralElement();
1666     }
1667     addUnwrappedLine();
1668   } else {
1669     if (FormatTok->is(tok::semi))
1670       nextToken();
1671     addUnwrappedLine();
1672   }
1673   Line->Level = OldLineLevel;
1674   if (FormatTok->isNot(tok::l_brace)) {
1675     parseStructuralElement();
1676     addUnwrappedLine();
1677   }
1678 }
1679 
1680 void UnwrappedLineParser::parseCaseLabel() {
1681   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1682   // FIXME: fix handling of complex expressions here.
1683   do {
1684     nextToken();
1685   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1686   parseLabel();
1687 }
1688 
1689 void UnwrappedLineParser::parseSwitch() {
1690   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1691   nextToken();
1692   if (FormatTok->Tok.is(tok::l_paren))
1693     parseParens();
1694   if (FormatTok->Tok.is(tok::l_brace)) {
1695     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1696     parseBlock(/*MustBeDeclaration=*/false);
1697     addUnwrappedLine();
1698   } else {
1699     addUnwrappedLine();
1700     ++Line->Level;
1701     parseStructuralElement();
1702     --Line->Level;
1703   }
1704 }
1705 
1706 void UnwrappedLineParser::parseAccessSpecifier() {
1707   nextToken();
1708   // Understand Qt's slots.
1709   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1710     nextToken();
1711   // Otherwise, we don't know what it is, and we'd better keep the next token.
1712   if (FormatTok->Tok.is(tok::colon))
1713     nextToken();
1714   addUnwrappedLine();
1715 }
1716 
1717 bool UnwrappedLineParser::parseEnum() {
1718   // Won't be 'enum' for NS_ENUMs.
1719   if (FormatTok->Tok.is(tok::kw_enum))
1720     nextToken();
1721 
1722   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1723   // declarations. An "enum" keyword followed by a colon would be a syntax
1724   // error and thus assume it is just an identifier.
1725   if (Style.Language == FormatStyle::LK_JavaScript &&
1726       FormatTok->isOneOf(tok::colon, tok::question))
1727     return false;
1728 
1729   // Eat up enum class ...
1730   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1731     nextToken();
1732 
1733   while (FormatTok->Tok.getIdentifierInfo() ||
1734          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1735                             tok::greater, tok::comma, tok::question)) {
1736     nextToken();
1737     // We can have macros or attributes in between 'enum' and the enum name.
1738     if (FormatTok->is(tok::l_paren))
1739       parseParens();
1740     if (FormatTok->is(tok::identifier)) {
1741       nextToken();
1742       // If there are two identifiers in a row, this is likely an elaborate
1743       // return type. In Java, this can be "implements", etc.
1744       if (Style.IsCpp() && FormatTok->is(tok::identifier))
1745         return false;
1746     }
1747   }
1748 
1749   // Just a declaration or something is wrong.
1750   if (FormatTok->isNot(tok::l_brace))
1751     return true;
1752   FormatTok->BlockKind = BK_Block;
1753 
1754   if (Style.Language == FormatStyle::LK_Java) {
1755     // Java enums are different.
1756     parseJavaEnumBody();
1757     return true;
1758   }
1759   if (Style.Language == FormatStyle::LK_Proto) {
1760     parseBlock(/*MustBeDeclaration=*/true);
1761     return true;
1762   }
1763 
1764   // Parse enum body.
1765   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1766   if (HasError) {
1767     if (FormatTok->is(tok::semi))
1768       nextToken();
1769     addUnwrappedLine();
1770   }
1771   return true;
1772 
1773   // There is no addUnwrappedLine() here so that we fall through to parsing a
1774   // structural element afterwards. Thus, in "enum A {} n, m;",
1775   // "} n, m;" will end up in one unwrapped line.
1776 }
1777 
1778 void UnwrappedLineParser::parseJavaEnumBody() {
1779   // Determine whether the enum is simple, i.e. does not have a semicolon or
1780   // constants with class bodies. Simple enums can be formatted like braced
1781   // lists, contracted to a single line, etc.
1782   unsigned StoredPosition = Tokens->getPosition();
1783   bool IsSimple = true;
1784   FormatToken *Tok = Tokens->getNextToken();
1785   while (Tok) {
1786     if (Tok->is(tok::r_brace))
1787       break;
1788     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1789       IsSimple = false;
1790       break;
1791     }
1792     // FIXME: This will also mark enums with braces in the arguments to enum
1793     // constants as "not simple". This is probably fine in practice, though.
1794     Tok = Tokens->getNextToken();
1795   }
1796   FormatTok = Tokens->setPosition(StoredPosition);
1797 
1798   if (IsSimple) {
1799     parseBracedList();
1800     addUnwrappedLine();
1801     return;
1802   }
1803 
1804   // Parse the body of a more complex enum.
1805   // First add a line for everything up to the "{".
1806   nextToken();
1807   addUnwrappedLine();
1808   ++Line->Level;
1809 
1810   // Parse the enum constants.
1811   while (FormatTok) {
1812     if (FormatTok->is(tok::l_brace)) {
1813       // Parse the constant's class body.
1814       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1815                  /*MunchSemi=*/false);
1816     } else if (FormatTok->is(tok::l_paren)) {
1817       parseParens();
1818     } else if (FormatTok->is(tok::comma)) {
1819       nextToken();
1820       addUnwrappedLine();
1821     } else if (FormatTok->is(tok::semi)) {
1822       nextToken();
1823       addUnwrappedLine();
1824       break;
1825     } else if (FormatTok->is(tok::r_brace)) {
1826       addUnwrappedLine();
1827       break;
1828     } else {
1829       nextToken();
1830     }
1831   }
1832 
1833   // Parse the class body after the enum's ";" if any.
1834   parseLevel(/*HasOpeningBrace=*/true);
1835   nextToken();
1836   --Line->Level;
1837   addUnwrappedLine();
1838 }
1839 
1840 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1841   const FormatToken &InitialToken = *FormatTok;
1842   nextToken();
1843 
1844   // The actual identifier can be a nested name specifier, and in macros
1845   // it is often token-pasted.
1846   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1847                             tok::kw___attribute, tok::kw___declspec,
1848                             tok::kw_alignas) ||
1849          ((Style.Language == FormatStyle::LK_Java ||
1850            Style.Language == FormatStyle::LK_JavaScript) &&
1851           FormatTok->isOneOf(tok::period, tok::comma))) {
1852     bool IsNonMacroIdentifier =
1853         FormatTok->is(tok::identifier) &&
1854         FormatTok->TokenText != FormatTok->TokenText.upper();
1855     nextToken();
1856     // We can have macros or attributes in between 'class' and the class name.
1857     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1858       parseParens();
1859   }
1860 
1861   // Note that parsing away template declarations here leads to incorrectly
1862   // accepting function declarations as record declarations.
1863   // In general, we cannot solve this problem. Consider:
1864   // class A<int> B() {}
1865   // which can be a function definition or a class definition when B() is a
1866   // macro. If we find enough real-world cases where this is a problem, we
1867   // can parse for the 'template' keyword in the beginning of the statement,
1868   // and thus rule out the record production in case there is no template
1869   // (this would still leave us with an ambiguity between template function
1870   // and class declarations).
1871   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1872     while (!eof()) {
1873       if (FormatTok->is(tok::l_brace)) {
1874         calculateBraceTypes(/*ExpectClassBody=*/true);
1875         if (!tryToParseBracedList())
1876           break;
1877       }
1878       if (FormatTok->Tok.is(tok::semi))
1879         return;
1880       nextToken();
1881     }
1882   }
1883   if (FormatTok->Tok.is(tok::l_brace)) {
1884     if (ParseAsExpr) {
1885       parseChildBlock();
1886     } else {
1887       if (ShouldBreakBeforeBrace(Style, InitialToken))
1888         addUnwrappedLine();
1889 
1890       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1891                  /*MunchSemi=*/false);
1892     }
1893   }
1894   // There is no addUnwrappedLine() here so that we fall through to parsing a
1895   // structural element afterwards. Thus, in "class A {} n, m;",
1896   // "} n, m;" will end up in one unwrapped line.
1897 }
1898 
1899 void UnwrappedLineParser::parseObjCProtocolList() {
1900   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1901   do
1902     nextToken();
1903   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1904   nextToken(); // Skip '>'.
1905 }
1906 
1907 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1908   do {
1909     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1910       nextToken();
1911       addUnwrappedLine();
1912       break;
1913     }
1914     if (FormatTok->is(tok::l_brace)) {
1915       parseBlock(/*MustBeDeclaration=*/false);
1916       // In ObjC interfaces, nothing should be following the "}".
1917       addUnwrappedLine();
1918     } else if (FormatTok->is(tok::r_brace)) {
1919       // Ignore stray "}". parseStructuralElement doesn't consume them.
1920       nextToken();
1921       addUnwrappedLine();
1922     } else {
1923       parseStructuralElement();
1924     }
1925   } while (!eof());
1926 }
1927 
1928 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1929   nextToken();
1930   nextToken(); // interface name
1931 
1932   // @interface can be followed by either a base class, or a category.
1933   if (FormatTok->Tok.is(tok::colon)) {
1934     nextToken();
1935     nextToken(); // base class name
1936   } else if (FormatTok->Tok.is(tok::l_paren))
1937     // Skip category, if present.
1938     parseParens();
1939 
1940   if (FormatTok->Tok.is(tok::less))
1941     parseObjCProtocolList();
1942 
1943   if (FormatTok->Tok.is(tok::l_brace)) {
1944     if (Style.BraceWrapping.AfterObjCDeclaration)
1945       addUnwrappedLine();
1946     parseBlock(/*MustBeDeclaration=*/true);
1947   }
1948 
1949   // With instance variables, this puts '}' on its own line.  Without instance
1950   // variables, this ends the @interface line.
1951   addUnwrappedLine();
1952 
1953   parseObjCUntilAtEnd();
1954 }
1955 
1956 void UnwrappedLineParser::parseObjCProtocol() {
1957   nextToken();
1958   nextToken(); // protocol name
1959 
1960   if (FormatTok->Tok.is(tok::less))
1961     parseObjCProtocolList();
1962 
1963   // Check for protocol declaration.
1964   if (FormatTok->Tok.is(tok::semi)) {
1965     nextToken();
1966     return addUnwrappedLine();
1967   }
1968 
1969   addUnwrappedLine();
1970   parseObjCUntilAtEnd();
1971 }
1972 
1973 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1974   bool IsImport = FormatTok->is(Keywords.kw_import);
1975   assert(IsImport || FormatTok->is(tok::kw_export));
1976   nextToken();
1977 
1978   // Consume the "default" in "export default class/function".
1979   if (FormatTok->is(tok::kw_default))
1980     nextToken();
1981 
1982   // Consume "async function", "function" and "default function", so that these
1983   // get parsed as free-standing JS functions, i.e. do not require a trailing
1984   // semicolon.
1985   if (FormatTok->is(Keywords.kw_async))
1986     nextToken();
1987   if (FormatTok->is(Keywords.kw_function)) {
1988     nextToken();
1989     return;
1990   }
1991 
1992   // For imports, `export *`, `export {...}`, consume the rest of the line up
1993   // to the terminating `;`. For everything else, just return and continue
1994   // parsing the structural element, i.e. the declaration or expression for
1995   // `export default`.
1996   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
1997       !FormatTok->isStringLiteral())
1998     return;
1999 
2000   while (!eof()) {
2001     if (FormatTok->is(tok::semi))
2002       return;
2003     if (Line->Tokens.size() == 0) {
2004       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2005       // import statement should terminate.
2006       return;
2007     }
2008     if (FormatTok->is(tok::l_brace)) {
2009       FormatTok->BlockKind = BK_Block;
2010       parseBracedList();
2011     } else {
2012       nextToken();
2013     }
2014   }
2015 }
2016 
2017 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2018                                                  StringRef Prefix = "") {
2019   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2020                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2021   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2022                                                     E = Line.Tokens.end();
2023        I != E; ++I) {
2024     llvm::dbgs() << I->Tok->Tok.getName() << "["
2025                  << "T=" << I->Tok->Type
2026                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2027   }
2028   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2029                                                     E = Line.Tokens.end();
2030        I != E; ++I) {
2031     const UnwrappedLineNode &Node = *I;
2032     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2033              I = Node.Children.begin(),
2034              E = Node.Children.end();
2035          I != E; ++I) {
2036       printDebugInfo(*I, "\nChild: ");
2037     }
2038   }
2039   llvm::dbgs() << "\n";
2040 }
2041 
2042 void UnwrappedLineParser::addUnwrappedLine() {
2043   if (Line->Tokens.empty())
2044     return;
2045   DEBUG({
2046     if (CurrentLines == &Lines)
2047       printDebugInfo(*Line);
2048   });
2049   CurrentLines->push_back(std::move(*Line));
2050   Line->Tokens.clear();
2051   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2052   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2053     CurrentLines->append(
2054         std::make_move_iterator(PreprocessorDirectives.begin()),
2055         std::make_move_iterator(PreprocessorDirectives.end()));
2056     PreprocessorDirectives.clear();
2057   }
2058 }
2059 
2060 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2061 
2062 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2063   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2064          FormatTok.NewlinesBefore > 0;
2065 }
2066 
2067 static bool isLineComment(const FormatToken &FormatTok) {
2068   return FormatTok.is(tok::comment) &&
2069          FormatTok.TokenText.startswith("//");
2070 }
2071 
2072 // Checks if \p FormatTok is a line comment that continues the line comment
2073 // section on \p Line.
2074 static bool continuesLineComment(const FormatToken &FormatTok,
2075                                  const UnwrappedLine &Line,
2076                                  llvm::Regex &CommentPragmasRegex) {
2077   if (Line.Tokens.empty())
2078     return false;
2079 
2080   StringRef IndentContent = FormatTok.TokenText;
2081   if (FormatTok.TokenText.startswith("//") ||
2082       FormatTok.TokenText.startswith("/*"))
2083     IndentContent = FormatTok.TokenText.substr(2);
2084   if (CommentPragmasRegex.match(IndentContent))
2085     return false;
2086 
2087   // If Line starts with a line comment, then FormatTok continues the comment
2088   // section if its original column is greater or equal to the original start
2089   // column of the line.
2090   //
2091   // Define the min column token of a line as follows: if a line ends in '{' or
2092   // contains a '{' followed by a line comment, then the min column token is
2093   // that '{'. Otherwise, the min column token of the line is the first token of
2094   // the line.
2095   //
2096   // If Line starts with a token other than a line comment, then FormatTok
2097   // continues the comment section if its original column is greater than the
2098   // original start column of the min column token of the line.
2099   //
2100   // For example, the second line comment continues the first in these cases:
2101   //
2102   // // first line
2103   // // second line
2104   //
2105   // and:
2106   //
2107   // // first line
2108   //  // second line
2109   //
2110   // and:
2111   //
2112   // int i; // first line
2113   //  // second line
2114   //
2115   // and:
2116   //
2117   // do { // first line
2118   //      // second line
2119   //   int i;
2120   // } while (true);
2121   //
2122   // and:
2123   //
2124   // enum {
2125   //   a, // first line
2126   //    // second line
2127   //   b
2128   // };
2129   //
2130   // The second line comment doesn't continue the first in these cases:
2131   //
2132   //   // first line
2133   //  // second line
2134   //
2135   // and:
2136   //
2137   // int i; // first line
2138   // // second line
2139   //
2140   // and:
2141   //
2142   // do { // first line
2143   //   // second line
2144   //   int i;
2145   // } while (true);
2146   //
2147   // and:
2148   //
2149   // enum {
2150   //   a, // first line
2151   //   // second line
2152   // };
2153   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2154 
2155   // Scan for '{//'. If found, use the column of '{' as a min column for line
2156   // comment section continuation.
2157   const FormatToken *PreviousToken = nullptr;
2158   for (const UnwrappedLineNode Node : Line.Tokens) {
2159     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2160         isLineComment(*Node.Tok)) {
2161       MinColumnToken = PreviousToken;
2162       break;
2163     }
2164     PreviousToken = Node.Tok;
2165 
2166     // Grab the last newline preceding a token in this unwrapped line.
2167     if (Node.Tok->NewlinesBefore > 0) {
2168       MinColumnToken = Node.Tok;
2169     }
2170   }
2171   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2172     MinColumnToken = PreviousToken;
2173   }
2174 
2175   unsigned MinContinueColumn =
2176       MinColumnToken->OriginalColumn +
2177       (isLineComment(*MinColumnToken) ? 0 : 1);
2178   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
2179          isLineComment(*(Line.Tokens.back().Tok)) &&
2180          FormatTok.OriginalColumn >= MinContinueColumn;
2181 }
2182 
2183 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2184   bool JustComments = Line->Tokens.empty();
2185   for (SmallVectorImpl<FormatToken *>::const_iterator
2186            I = CommentsBeforeNextToken.begin(),
2187            E = CommentsBeforeNextToken.end();
2188        I != E; ++I) {
2189     // Line comments that belong to the same line comment section are put on the
2190     // same line since later we might want to reflow content between them.
2191     // Additional fine-grained breaking of line comment sections is controlled
2192     // by the class BreakableLineCommentSection in case it is desirable to keep
2193     // several line comment sections in the same unwrapped line.
2194     //
2195     // FIXME: Consider putting separate line comment sections as children to the
2196     // unwrapped line instead.
2197     (*I)->ContinuesLineCommentSection =
2198         continuesLineComment(**I, *Line, CommentPragmasRegex);
2199     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2200       addUnwrappedLine();
2201     pushToken(*I);
2202   }
2203   if (NewlineBeforeNext && JustComments)
2204     addUnwrappedLine();
2205   CommentsBeforeNextToken.clear();
2206 }
2207 
2208 void UnwrappedLineParser::nextToken() {
2209   if (eof())
2210     return;
2211   flushComments(isOnNewLine(*FormatTok));
2212   pushToken(FormatTok);
2213   if (Style.Language != FormatStyle::LK_JavaScript)
2214     readToken();
2215   else
2216     readTokenWithJavaScriptASI();
2217 }
2218 
2219 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2220   // FIXME: This is a dirty way to access the previous token. Find a better
2221   // solution.
2222   if (!Line || Line->Tokens.empty())
2223     return nullptr;
2224   return Line->Tokens.back().Tok;
2225 }
2226 
2227 void UnwrappedLineParser::distributeComments(
2228     const SmallVectorImpl<FormatToken *> &Comments,
2229     const FormatToken *NextTok) {
2230   // Whether or not a line comment token continues a line is controlled by
2231   // the method continuesLineComment, with the following caveat:
2232   //
2233   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2234   // that each comment line from the trail is aligned with the next token, if
2235   // the next token exists. If a trail exists, the beginning of the maximal
2236   // trail is marked as a start of a new comment section.
2237   //
2238   // For example in this code:
2239   //
2240   // int a; // line about a
2241   //   // line 1 about b
2242   //   // line 2 about b
2243   //   int b;
2244   //
2245   // the two lines about b form a maximal trail, so there are two sections, the
2246   // first one consisting of the single comment "// line about a" and the
2247   // second one consisting of the next two comments.
2248   if (Comments.empty())
2249     return;
2250   bool ShouldPushCommentsInCurrentLine = true;
2251   bool HasTrailAlignedWithNextToken = false;
2252   unsigned StartOfTrailAlignedWithNextToken = 0;
2253   if (NextTok) {
2254     // We are skipping the first element intentionally.
2255     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2256       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2257         HasTrailAlignedWithNextToken = true;
2258         StartOfTrailAlignedWithNextToken = i;
2259       }
2260     }
2261   }
2262   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2263     FormatToken *FormatTok = Comments[i];
2264     if (HasTrailAlignedWithNextToken &&
2265         i == StartOfTrailAlignedWithNextToken) {
2266       FormatTok->ContinuesLineCommentSection = false;
2267     } else {
2268       FormatTok->ContinuesLineCommentSection =
2269           continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
2270     }
2271     if (!FormatTok->ContinuesLineCommentSection &&
2272         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2273       ShouldPushCommentsInCurrentLine = false;
2274     }
2275     if (ShouldPushCommentsInCurrentLine) {
2276       pushToken(FormatTok);
2277     } else {
2278       CommentsBeforeNextToken.push_back(FormatTok);
2279     }
2280   }
2281 }
2282 
2283 void UnwrappedLineParser::readToken() {
2284   SmallVector<FormatToken *, 1> Comments;
2285   do {
2286     FormatTok = Tokens->getNextToken();
2287     assert(FormatTok);
2288     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2289            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2290       distributeComments(Comments, FormatTok);
2291       Comments.clear();
2292       // If there is an unfinished unwrapped line, we flush the preprocessor
2293       // directives only after that unwrapped line was finished later.
2294       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2295       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2296       // Comments stored before the preprocessor directive need to be output
2297       // before the preprocessor directive, at the same level as the
2298       // preprocessor directive, as we consider them to apply to the directive.
2299       flushComments(isOnNewLine(*FormatTok));
2300       parsePPDirective();
2301     }
2302     while (FormatTok->Type == TT_ConflictStart ||
2303            FormatTok->Type == TT_ConflictEnd ||
2304            FormatTok->Type == TT_ConflictAlternative) {
2305       if (FormatTok->Type == TT_ConflictStart) {
2306         conditionalCompilationStart(/*Unreachable=*/false);
2307       } else if (FormatTok->Type == TT_ConflictAlternative) {
2308         conditionalCompilationAlternative();
2309       } else if (FormatTok->Type == TT_ConflictEnd) {
2310         conditionalCompilationEnd();
2311       }
2312       FormatTok = Tokens->getNextToken();
2313       FormatTok->MustBreakBefore = true;
2314     }
2315 
2316     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2317         !Line->InPPDirective) {
2318       continue;
2319     }
2320 
2321     if (!FormatTok->Tok.is(tok::comment)) {
2322       distributeComments(Comments, FormatTok);
2323       Comments.clear();
2324       return;
2325     }
2326 
2327     Comments.push_back(FormatTok);
2328   } while (!eof());
2329 
2330   distributeComments(Comments, nullptr);
2331   Comments.clear();
2332 }
2333 
2334 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2335   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2336   if (MustBreakBeforeNextToken) {
2337     Line->Tokens.back().Tok->MustBreakBefore = true;
2338     MustBreakBeforeNextToken = false;
2339   }
2340 }
2341 
2342 } // end namespace format
2343 } // end namespace clang
2344