1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BraceWrapping.AfterControlStatement)
158       Parser->addUnwrappedLine();
159     if (Style.BraceWrapping.IndentBraces)
160       ++LineLevel;
161   }
162   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
163 
164 private:
165   unsigned &LineLevel;
166   unsigned OldLineLevel;
167 };
168 
169 namespace {
170 
171 class IndexedTokenSource : public FormatTokenSource {
172 public:
173   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
174       : Tokens(Tokens), Position(-1) {}
175 
176   FormatToken *getNextToken() override {
177     ++Position;
178     return Tokens[Position];
179   }
180 
181   unsigned getPosition() override {
182     assert(Position >= 0);
183     return Position;
184   }
185 
186   FormatToken *setPosition(unsigned P) override {
187     Position = P;
188     return Tokens[Position];
189   }
190 
191   void reset() { Position = -1; }
192 
193 private:
194   ArrayRef<FormatToken *> Tokens;
195   int Position;
196 };
197 
198 } // end anonymous namespace
199 
200 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
201                                          const AdditionalKeywords &Keywords,
202                                          ArrayRef<FormatToken *> Tokens,
203                                          UnwrappedLineConsumer &Callback)
204     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
205       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
206       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
207       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
208 
209 void UnwrappedLineParser::reset() {
210   PPBranchLevel = -1;
211   Line.reset(new UnwrappedLine);
212   CommentsBeforeNextToken.clear();
213   FormatTok = nullptr;
214   MustBreakBeforeNextToken = false;
215   PreprocessorDirectives.clear();
216   CurrentLines = &Lines;
217   DeclarationScopeStack.clear();
218   PPStack.clear();
219 }
220 
221 void UnwrappedLineParser::parse() {
222   IndexedTokenSource TokenSource(AllTokens);
223   do {
224     DEBUG(llvm::dbgs() << "----\n");
225     reset();
226     Tokens = &TokenSource;
227     TokenSource.reset();
228 
229     readToken();
230     parseFile();
231     // Create line with eof token.
232     pushToken(FormatTok);
233     addUnwrappedLine();
234 
235     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
236                                                   E = Lines.end();
237          I != E; ++I) {
238       Callback.consumeUnwrappedLine(*I);
239     }
240     Callback.finishRun();
241     Lines.clear();
242     while (!PPLevelBranchIndex.empty() &&
243            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
244       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
245       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
246     }
247     if (!PPLevelBranchIndex.empty()) {
248       ++PPLevelBranchIndex.back();
249       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
250       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
251     }
252   } while (!PPLevelBranchIndex.empty());
253 }
254 
255 void UnwrappedLineParser::parseFile() {
256   // The top-level context in a file always has declarations, except for pre-
257   // processor directives and JavaScript files.
258   bool MustBeDeclaration =
259       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
260   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
261                                           MustBeDeclaration);
262   parseLevel(/*HasOpeningBrace=*/false);
263   // Make sure to format the remaining tokens.
264   flushComments(true);
265   addUnwrappedLine();
266 }
267 
268 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
269   bool SwitchLabelEncountered = false;
270   do {
271     tok::TokenKind kind = FormatTok->Tok.getKind();
272     if (FormatTok->Type == TT_MacroBlockBegin) {
273       kind = tok::l_brace;
274     } else if (FormatTok->Type == TT_MacroBlockEnd) {
275       kind = tok::r_brace;
276     }
277 
278     switch (kind) {
279     case tok::comment:
280       nextToken();
281       addUnwrappedLine();
282       break;
283     case tok::l_brace:
284       // FIXME: Add parameter whether this can happen - if this happens, we must
285       // be in a non-declaration context.
286       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
287         continue;
288       parseBlock(/*MustBeDeclaration=*/false);
289       addUnwrappedLine();
290       break;
291     case tok::r_brace:
292       if (HasOpeningBrace)
293         return;
294       nextToken();
295       addUnwrappedLine();
296       break;
297     case tok::kw_default:
298     case tok::kw_case:
299       if (!SwitchLabelEncountered &&
300           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
301         ++Line->Level;
302       SwitchLabelEncountered = true;
303       parseStructuralElement();
304       break;
305     default:
306       parseStructuralElement();
307       break;
308     }
309   } while (!eof());
310 }
311 
312 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
313   // We'll parse forward through the tokens until we hit
314   // a closing brace or eof - note that getNextToken() will
315   // parse macros, so this will magically work inside macro
316   // definitions, too.
317   unsigned StoredPosition = Tokens->getPosition();
318   FormatToken *Tok = FormatTok;
319   const FormatToken *PrevTok = getPreviousToken();
320   // Keep a stack of positions of lbrace tokens. We will
321   // update information about whether an lbrace starts a
322   // braced init list or a different block during the loop.
323   SmallVector<FormatToken *, 8> LBraceStack;
324   assert(Tok->Tok.is(tok::l_brace));
325   do {
326     // Get next non-comment token.
327     FormatToken *NextTok;
328     unsigned ReadTokens = 0;
329     do {
330       NextTok = Tokens->getNextToken();
331       ++ReadTokens;
332     } while (NextTok->is(tok::comment));
333 
334     switch (Tok->Tok.getKind()) {
335     case tok::l_brace:
336       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
337           PrevTok->is(tok::colon))
338         // A colon indicates this code is in a type, or a braced list following
339         // a label in an object literal ({a: {b: 1}}).
340         // The code below could be confused by semicolons between the individual
341         // members in a type member list, which would normally trigger BK_Block.
342         // In both cases, this must be parsed as an inline braced init.
343         Tok->BlockKind = BK_BracedInit;
344       else
345         Tok->BlockKind = BK_Unknown;
346       LBraceStack.push_back(Tok);
347       break;
348     case tok::r_brace:
349       if (LBraceStack.empty())
350         break;
351       if (LBraceStack.back()->BlockKind == BK_Unknown) {
352         bool ProbablyBracedList = false;
353         if (Style.Language == FormatStyle::LK_Proto) {
354           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
355         } else {
356           // Using OriginalColumn to distinguish between ObjC methods and
357           // binary operators is a bit hacky.
358           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
359                                   NextTok->OriginalColumn == 0;
360 
361           // If there is a comma, semicolon or right paren after the closing
362           // brace, we assume this is a braced initializer list.  Note that
363           // regardless how we mark inner braces here, we will overwrite the
364           // BlockKind later if we parse a braced list (where all blocks
365           // inside are by default braced lists), or when we explicitly detect
366           // blocks (for example while parsing lambdas).
367           ProbablyBracedList =
368               (Style.Language == FormatStyle::LK_JavaScript &&
369                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
370                                 Keywords.kw_as)) ||
371               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
372                                tok::r_paren, tok::r_square, tok::l_brace,
373                                tok::l_square, tok::l_paren, tok::ellipsis) ||
374               (NextTok->is(tok::identifier) &&
375                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
376               (NextTok->is(tok::semi) &&
377                (!ExpectClassBody || LBraceStack.size() != 1)) ||
378               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
379         }
380         if (ProbablyBracedList) {
381           Tok->BlockKind = BK_BracedInit;
382           LBraceStack.back()->BlockKind = BK_BracedInit;
383         } else {
384           Tok->BlockKind = BK_Block;
385           LBraceStack.back()->BlockKind = BK_Block;
386         }
387       }
388       LBraceStack.pop_back();
389       break;
390     case tok::at:
391     case tok::semi:
392     case tok::kw_if:
393     case tok::kw_while:
394     case tok::kw_for:
395     case tok::kw_switch:
396     case tok::kw_try:
397     case tok::kw___try:
398       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
399         LBraceStack.back()->BlockKind = BK_Block;
400       break;
401     default:
402       break;
403     }
404     PrevTok = Tok;
405     Tok = NextTok;
406   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
407 
408   // Assume other blocks for all unclosed opening braces.
409   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
410     if (LBraceStack[i]->BlockKind == BK_Unknown)
411       LBraceStack[i]->BlockKind = BK_Block;
412   }
413 
414   FormatTok = Tokens->setPosition(StoredPosition);
415 }
416 
417 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
418                                      bool MunchSemi) {
419   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
420          "'{' or macro block token expected");
421   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
422   FormatTok->BlockKind = BK_Block;
423 
424   unsigned InitialLevel = Line->Level;
425   nextToken();
426 
427   if (MacroBlock && FormatTok->is(tok::l_paren))
428     parseParens();
429 
430   addUnwrappedLine();
431 
432   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
433                                           MustBeDeclaration);
434   if (AddLevel)
435     ++Line->Level;
436   parseLevel(/*HasOpeningBrace=*/true);
437 
438   if (eof())
439     return;
440 
441   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
442                  : !FormatTok->is(tok::r_brace)) {
443     Line->Level = InitialLevel;
444     FormatTok->BlockKind = BK_Block;
445     return;
446   }
447 
448   nextToken(); // Munch the closing brace.
449 
450   if (MacroBlock && FormatTok->is(tok::l_paren))
451     parseParens();
452 
453   if (MunchSemi && FormatTok->Tok.is(tok::semi))
454     nextToken();
455   Line->Level = InitialLevel;
456 }
457 
458 static bool isGoogScope(const UnwrappedLine &Line) {
459   // FIXME: Closure-library specific stuff should not be hard-coded but be
460   // configurable.
461   if (Line.Tokens.size() < 4)
462     return false;
463   auto I = Line.Tokens.begin();
464   if (I->Tok->TokenText != "goog")
465     return false;
466   ++I;
467   if (I->Tok->isNot(tok::period))
468     return false;
469   ++I;
470   if (I->Tok->TokenText != "scope")
471     return false;
472   ++I;
473   return I->Tok->is(tok::l_paren);
474 }
475 
476 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
477                                    const FormatToken &InitialToken) {
478   if (InitialToken.is(tok::kw_namespace))
479     return Style.BraceWrapping.AfterNamespace;
480   if (InitialToken.is(tok::kw_class))
481     return Style.BraceWrapping.AfterClass;
482   if (InitialToken.is(tok::kw_union))
483     return Style.BraceWrapping.AfterUnion;
484   if (InitialToken.is(tok::kw_struct))
485     return Style.BraceWrapping.AfterStruct;
486   return false;
487 }
488 
489 void UnwrappedLineParser::parseChildBlock() {
490   FormatTok->BlockKind = BK_Block;
491   nextToken();
492   {
493     bool GoogScope =
494         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
495     ScopedLineState LineState(*this);
496     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
497                                             /*MustBeDeclaration=*/false);
498     Line->Level += GoogScope ? 0 : 1;
499     parseLevel(/*HasOpeningBrace=*/true);
500     flushComments(isOnNewLine(*FormatTok));
501     Line->Level -= GoogScope ? 0 : 1;
502   }
503   nextToken();
504 }
505 
506 void UnwrappedLineParser::parsePPDirective() {
507   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
508   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
509   nextToken();
510 
511   if (!FormatTok->Tok.getIdentifierInfo()) {
512     parsePPUnknown();
513     return;
514   }
515 
516   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
517   case tok::pp_define:
518     parsePPDefine();
519     return;
520   case tok::pp_if:
521     parsePPIf(/*IfDef=*/false);
522     break;
523   case tok::pp_ifdef:
524   case tok::pp_ifndef:
525     parsePPIf(/*IfDef=*/true);
526     break;
527   case tok::pp_else:
528     parsePPElse();
529     break;
530   case tok::pp_elif:
531     parsePPElIf();
532     break;
533   case tok::pp_endif:
534     parsePPEndIf();
535     break;
536   default:
537     parsePPUnknown();
538     break;
539   }
540 }
541 
542 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
543   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
544     PPStack.push_back(PP_Unreachable);
545   else
546     PPStack.push_back(PP_Conditional);
547 }
548 
549 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
550   ++PPBranchLevel;
551   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
552   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
553     PPLevelBranchIndex.push_back(0);
554     PPLevelBranchCount.push_back(0);
555   }
556   PPChainBranchIndex.push(0);
557   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
558   conditionalCompilationCondition(Unreachable || Skip);
559 }
560 
561 void UnwrappedLineParser::conditionalCompilationAlternative() {
562   if (!PPStack.empty())
563     PPStack.pop_back();
564   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
565   if (!PPChainBranchIndex.empty())
566     ++PPChainBranchIndex.top();
567   conditionalCompilationCondition(
568       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
569       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
570 }
571 
572 void UnwrappedLineParser::conditionalCompilationEnd() {
573   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
574   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
575     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
576       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
577     }
578   }
579   // Guard against #endif's without #if.
580   if (PPBranchLevel > 0)
581     --PPBranchLevel;
582   if (!PPChainBranchIndex.empty())
583     PPChainBranchIndex.pop();
584   if (!PPStack.empty())
585     PPStack.pop_back();
586 }
587 
588 void UnwrappedLineParser::parsePPIf(bool IfDef) {
589   nextToken();
590   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
591                          FormatTok->Tok.getLiteralData() != nullptr &&
592                          StringRef(FormatTok->Tok.getLiteralData(),
593                                    FormatTok->Tok.getLength()) == "0") ||
594                         FormatTok->Tok.is(tok::kw_false);
595   conditionalCompilationStart(!IfDef && IsLiteralFalse);
596   parsePPUnknown();
597 }
598 
599 void UnwrappedLineParser::parsePPElse() {
600   conditionalCompilationAlternative();
601   parsePPUnknown();
602 }
603 
604 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
605 
606 void UnwrappedLineParser::parsePPEndIf() {
607   conditionalCompilationEnd();
608   parsePPUnknown();
609 }
610 
611 void UnwrappedLineParser::parsePPDefine() {
612   nextToken();
613 
614   if (FormatTok->Tok.getKind() != tok::identifier) {
615     parsePPUnknown();
616     return;
617   }
618   nextToken();
619   if (FormatTok->Tok.getKind() == tok::l_paren &&
620       FormatTok->WhitespaceRange.getBegin() ==
621           FormatTok->WhitespaceRange.getEnd()) {
622     parseParens();
623   }
624   addUnwrappedLine();
625   Line->Level = 1;
626 
627   // Errors during a preprocessor directive can only affect the layout of the
628   // preprocessor directive, and thus we ignore them. An alternative approach
629   // would be to use the same approach we use on the file level (no
630   // re-indentation if there was a structural error) within the macro
631   // definition.
632   parseFile();
633 }
634 
635 void UnwrappedLineParser::parsePPUnknown() {
636   do {
637     nextToken();
638   } while (!eof());
639   addUnwrappedLine();
640 }
641 
642 // Here we blacklist certain tokens that are not usually the first token in an
643 // unwrapped line. This is used in attempt to distinguish macro calls without
644 // trailing semicolons from other constructs split to several lines.
645 static bool tokenCanStartNewLine(const clang::Token &Tok) {
646   // Semicolon can be a null-statement, l_square can be a start of a macro or
647   // a C++11 attribute, but this doesn't seem to be common.
648   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
649          Tok.isNot(tok::l_square) &&
650          // Tokens that can only be used as binary operators and a part of
651          // overloaded operator names.
652          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
653          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
654          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
655          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
656          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
657          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
658          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
659          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
660          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
661          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
662          Tok.isNot(tok::lesslessequal) &&
663          // Colon is used in labels, base class lists, initializer lists,
664          // range-based for loops, ternary operator, but should never be the
665          // first token in an unwrapped line.
666          Tok.isNot(tok::colon) &&
667          // 'noexcept' is a trailing annotation.
668          Tok.isNot(tok::kw_noexcept);
669 }
670 
671 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
672                           const FormatToken *FormatTok) {
673   // FIXME: This returns true for C/C++ keywords like 'struct'.
674   return FormatTok->is(tok::identifier) &&
675          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
676           !FormatTok->isOneOf(
677               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
678               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
679               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
680               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
681               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
682               Keywords.kw_instanceof, Keywords.kw_interface,
683               Keywords.kw_throws));
684 }
685 
686 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
687                                  const FormatToken *FormatTok) {
688   return FormatTok->Tok.isLiteral() ||
689          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
690          mustBeJSIdent(Keywords, FormatTok);
691 }
692 
693 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
694 // when encountered after a value (see mustBeJSIdentOrValue).
695 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
696                            const FormatToken *FormatTok) {
697   return FormatTok->isOneOf(
698       tok::kw_return, Keywords.kw_yield,
699       // conditionals
700       tok::kw_if, tok::kw_else,
701       // loops
702       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
703       // switch/case
704       tok::kw_switch, tok::kw_case,
705       // exceptions
706       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
707       // declaration
708       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
709       Keywords.kw_async, Keywords.kw_function,
710       // import/export
711       Keywords.kw_import, tok::kw_export);
712 }
713 
714 // readTokenWithJavaScriptASI reads the next token and terminates the current
715 // line if JavaScript Automatic Semicolon Insertion must
716 // happen between the current token and the next token.
717 //
718 // This method is conservative - it cannot cover all edge cases of JavaScript,
719 // but only aims to correctly handle certain well known cases. It *must not*
720 // return true in speculative cases.
721 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
722   FormatToken *Previous = FormatTok;
723   readToken();
724   FormatToken *Next = FormatTok;
725 
726   bool IsOnSameLine =
727       CommentsBeforeNextToken.empty()
728           ? Next->NewlinesBefore == 0
729           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
730   if (IsOnSameLine)
731     return;
732 
733   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
734   bool PreviousStartsTemplateExpr =
735       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
736   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
737     // If the token before the previous one is an '@', the previous token is an
738     // annotation and can precede another identifier/value.
739     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
740     if (PrePrevious->is(tok::at))
741       return;
742   }
743   if (Next->is(tok::exclaim) && PreviousMustBeValue)
744     return addUnwrappedLine();
745   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
746   bool NextEndsTemplateExpr =
747       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
748   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
749       (PreviousMustBeValue ||
750        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
751                          tok::minusminus)))
752     return addUnwrappedLine();
753   if (PreviousMustBeValue && isJSDeclOrStmt(Keywords, Next))
754     return addUnwrappedLine();
755 }
756 
757 void UnwrappedLineParser::parseStructuralElement() {
758   assert(!FormatTok->is(tok::l_brace));
759   if (Style.Language == FormatStyle::LK_TableGen &&
760       FormatTok->is(tok::pp_include)) {
761     nextToken();
762     if (FormatTok->is(tok::string_literal))
763       nextToken();
764     addUnwrappedLine();
765     return;
766   }
767   switch (FormatTok->Tok.getKind()) {
768   case tok::at:
769     nextToken();
770     if (FormatTok->Tok.is(tok::l_brace)) {
771       parseBracedList();
772       break;
773     }
774     switch (FormatTok->Tok.getObjCKeywordID()) {
775     case tok::objc_public:
776     case tok::objc_protected:
777     case tok::objc_package:
778     case tok::objc_private:
779       return parseAccessSpecifier();
780     case tok::objc_interface:
781     case tok::objc_implementation:
782       return parseObjCInterfaceOrImplementation();
783     case tok::objc_protocol:
784       return parseObjCProtocol();
785     case tok::objc_end:
786       return; // Handled by the caller.
787     case tok::objc_optional:
788     case tok::objc_required:
789       nextToken();
790       addUnwrappedLine();
791       return;
792     case tok::objc_autoreleasepool:
793       nextToken();
794       if (FormatTok->Tok.is(tok::l_brace)) {
795         if (Style.BraceWrapping.AfterObjCDeclaration)
796           addUnwrappedLine();
797         parseBlock(/*MustBeDeclaration=*/false);
798       }
799       addUnwrappedLine();
800       return;
801     case tok::objc_try:
802       // This branch isn't strictly necessary (the kw_try case below would
803       // do this too after the tok::at is parsed above).  But be explicit.
804       parseTryCatch();
805       return;
806     default:
807       break;
808     }
809     break;
810   case tok::kw_asm:
811     nextToken();
812     if (FormatTok->is(tok::l_brace)) {
813       FormatTok->Type = TT_InlineASMBrace;
814       nextToken();
815       while (FormatTok && FormatTok->isNot(tok::eof)) {
816         if (FormatTok->is(tok::r_brace)) {
817           FormatTok->Type = TT_InlineASMBrace;
818           nextToken();
819           addUnwrappedLine();
820           break;
821         }
822         FormatTok->Finalized = true;
823         nextToken();
824       }
825     }
826     break;
827   case tok::kw_namespace:
828     parseNamespace();
829     return;
830   case tok::kw_inline:
831     nextToken();
832     if (FormatTok->Tok.is(tok::kw_namespace)) {
833       parseNamespace();
834       return;
835     }
836     break;
837   case tok::kw_public:
838   case tok::kw_protected:
839   case tok::kw_private:
840     if (Style.Language == FormatStyle::LK_Java ||
841         Style.Language == FormatStyle::LK_JavaScript)
842       nextToken();
843     else
844       parseAccessSpecifier();
845     return;
846   case tok::kw_if:
847     parseIfThenElse();
848     return;
849   case tok::kw_for:
850   case tok::kw_while:
851     parseForOrWhileLoop();
852     return;
853   case tok::kw_do:
854     parseDoWhile();
855     return;
856   case tok::kw_switch:
857     parseSwitch();
858     return;
859   case tok::kw_default:
860     nextToken();
861     parseLabel();
862     return;
863   case tok::kw_case:
864     parseCaseLabel();
865     return;
866   case tok::kw_try:
867   case tok::kw___try:
868     parseTryCatch();
869     return;
870   case tok::kw_extern:
871     nextToken();
872     if (FormatTok->Tok.is(tok::string_literal)) {
873       nextToken();
874       if (FormatTok->Tok.is(tok::l_brace)) {
875         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
876         addUnwrappedLine();
877         return;
878       }
879     }
880     break;
881   case tok::kw_export:
882     if (Style.Language == FormatStyle::LK_JavaScript) {
883       parseJavaScriptEs6ImportExport();
884       return;
885     }
886     break;
887   case tok::identifier:
888     if (FormatTok->is(TT_ForEachMacro)) {
889       parseForOrWhileLoop();
890       return;
891     }
892     if (FormatTok->is(TT_MacroBlockBegin)) {
893       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
894                  /*MunchSemi=*/false);
895       return;
896     }
897     if (FormatTok->is(Keywords.kw_import)) {
898       if (Style.Language == FormatStyle::LK_JavaScript) {
899         parseJavaScriptEs6ImportExport();
900         return;
901       }
902       if (Style.Language == FormatStyle::LK_Proto) {
903         nextToken();
904         if (FormatTok->is(tok::kw_public))
905           nextToken();
906         if (!FormatTok->is(tok::string_literal))
907           return;
908         nextToken();
909         if (FormatTok->is(tok::semi))
910           nextToken();
911         addUnwrappedLine();
912         return;
913       }
914     }
915     if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
916                            Keywords.kw_slots, Keywords.kw_qslots)) {
917       nextToken();
918       if (FormatTok->is(tok::colon)) {
919         nextToken();
920         addUnwrappedLine();
921         return;
922       }
923     }
924     // In all other cases, parse the declaration.
925     break;
926   default:
927     break;
928   }
929   do {
930     const FormatToken *Previous = getPreviousToken();
931     switch (FormatTok->Tok.getKind()) {
932     case tok::at:
933       nextToken();
934       if (FormatTok->Tok.is(tok::l_brace))
935         parseBracedList();
936       break;
937     case tok::kw_enum:
938       // Ignore if this is part of "template <enum ...".
939       if (Previous && Previous->is(tok::less)) {
940         nextToken();
941         break;
942       }
943 
944       // parseEnum falls through and does not yet add an unwrapped line as an
945       // enum definition can start a structural element.
946       if (!parseEnum())
947         break;
948       // This only applies for C++.
949       if (Style.Language != FormatStyle::LK_Cpp) {
950         addUnwrappedLine();
951         return;
952       }
953       break;
954     case tok::kw_typedef:
955       nextToken();
956       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
957                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
958         parseEnum();
959       break;
960     case tok::kw_struct:
961     case tok::kw_union:
962     case tok::kw_class:
963       // parseRecord falls through and does not yet add an unwrapped line as a
964       // record declaration or definition can start a structural element.
965       parseRecord();
966       // This does not apply for Java and JavaScript.
967       if (Style.Language == FormatStyle::LK_Java ||
968           Style.Language == FormatStyle::LK_JavaScript) {
969         if (FormatTok->is(tok::semi))
970           nextToken();
971         addUnwrappedLine();
972         return;
973       }
974       break;
975     case tok::period:
976       nextToken();
977       // In Java, classes have an implicit static member "class".
978       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
979           FormatTok->is(tok::kw_class))
980         nextToken();
981       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
982           FormatTok->Tok.getIdentifierInfo())
983         // JavaScript only has pseudo keywords, all keywords are allowed to
984         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
985         nextToken();
986       break;
987     case tok::semi:
988       nextToken();
989       addUnwrappedLine();
990       return;
991     case tok::r_brace:
992       addUnwrappedLine();
993       return;
994     case tok::l_paren:
995       parseParens();
996       break;
997     case tok::kw_operator:
998       nextToken();
999       if (FormatTok->isBinaryOperator())
1000         nextToken();
1001       break;
1002     case tok::caret:
1003       nextToken();
1004       if (FormatTok->Tok.isAnyIdentifier() ||
1005           FormatTok->isSimpleTypeSpecifier())
1006         nextToken();
1007       if (FormatTok->is(tok::l_paren))
1008         parseParens();
1009       if (FormatTok->is(tok::l_brace))
1010         parseChildBlock();
1011       break;
1012     case tok::l_brace:
1013       if (!tryToParseBracedList()) {
1014         // A block outside of parentheses must be the last part of a
1015         // structural element.
1016         // FIXME: Figure out cases where this is not true, and add projections
1017         // for them (the one we know is missing are lambdas).
1018         if (Style.BraceWrapping.AfterFunction)
1019           addUnwrappedLine();
1020         FormatTok->Type = TT_FunctionLBrace;
1021         parseBlock(/*MustBeDeclaration=*/false);
1022         addUnwrappedLine();
1023         return;
1024       }
1025       // Otherwise this was a braced init list, and the structural
1026       // element continues.
1027       break;
1028     case tok::kw_try:
1029       // We arrive here when parsing function-try blocks.
1030       parseTryCatch();
1031       return;
1032     case tok::identifier: {
1033       if (FormatTok->is(TT_MacroBlockEnd)) {
1034         addUnwrappedLine();
1035         return;
1036       }
1037 
1038       // Parse function literal unless 'function' is the first token in a line
1039       // in which case this should be treated as a free-standing function.
1040       if (Style.Language == FormatStyle::LK_JavaScript &&
1041           (FormatTok->is(Keywords.kw_function) ||
1042            FormatTok->startsSequence(Keywords.kw_async,
1043                                      Keywords.kw_function)) &&
1044           Line->Tokens.size() > 0) {
1045         tryToParseJSFunction();
1046         break;
1047       }
1048       if ((Style.Language == FormatStyle::LK_JavaScript ||
1049            Style.Language == FormatStyle::LK_Java) &&
1050           FormatTok->is(Keywords.kw_interface)) {
1051         if (Style.Language == FormatStyle::LK_JavaScript) {
1052           // In JavaScript/TypeScript, "interface" can be used as a standalone
1053           // identifier, e.g. in `var interface = 1;`. If "interface" is
1054           // followed by another identifier, it is very like to be an actual
1055           // interface declaration.
1056           unsigned StoredPosition = Tokens->getPosition();
1057           FormatToken *Next = Tokens->getNextToken();
1058           FormatTok = Tokens->setPosition(StoredPosition);
1059           if (Next && !mustBeJSIdent(Keywords, Next)) {
1060             nextToken();
1061             break;
1062           }
1063         }
1064         parseRecord();
1065         addUnwrappedLine();
1066         return;
1067       }
1068 
1069       // See if the following token should start a new unwrapped line.
1070       StringRef Text = FormatTok->TokenText;
1071       nextToken();
1072       if (Line->Tokens.size() == 1 &&
1073           // JS doesn't have macros, and within classes colons indicate fields,
1074           // not labels.
1075           Style.Language != FormatStyle::LK_JavaScript) {
1076         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1077           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1078           parseLabel();
1079           return;
1080         }
1081         // Recognize function-like macro usages without trailing semicolon as
1082         // well as free-standing macros like Q_OBJECT.
1083         bool FunctionLike = FormatTok->is(tok::l_paren);
1084         if (FunctionLike)
1085           parseParens();
1086 
1087         bool FollowedByNewline =
1088             CommentsBeforeNextToken.empty()
1089                 ? FormatTok->NewlinesBefore > 0
1090                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1091 
1092         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1093             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1094           addUnwrappedLine();
1095           return;
1096         }
1097       }
1098       break;
1099     }
1100     case tok::equal:
1101       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1102       // TT_JsFatArrow. The always start an expression or a child block if
1103       // followed by a curly.
1104       if (FormatTok->is(TT_JsFatArrow)) {
1105         nextToken();
1106         if (FormatTok->is(tok::l_brace))
1107           parseChildBlock();
1108         break;
1109       }
1110 
1111       nextToken();
1112       if (FormatTok->Tok.is(tok::l_brace)) {
1113         parseBracedList();
1114       }
1115       break;
1116     case tok::l_square:
1117       parseSquare();
1118       break;
1119     case tok::kw_new:
1120       parseNew();
1121       break;
1122     default:
1123       nextToken();
1124       break;
1125     }
1126   } while (!eof());
1127 }
1128 
1129 bool UnwrappedLineParser::tryToParseLambda() {
1130   if (Style.Language != FormatStyle::LK_Cpp) {
1131     nextToken();
1132     return false;
1133   }
1134   const FormatToken* Previous = getPreviousToken();
1135   if (Previous &&
1136       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1137                          tok::kw_delete) ||
1138        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1139     nextToken();
1140     return false;
1141   }
1142   assert(FormatTok->is(tok::l_square));
1143   FormatToken &LSquare = *FormatTok;
1144   if (!tryToParseLambdaIntroducer())
1145     return false;
1146 
1147   while (FormatTok->isNot(tok::l_brace)) {
1148     if (FormatTok->isSimpleTypeSpecifier()) {
1149       nextToken();
1150       continue;
1151     }
1152     switch (FormatTok->Tok.getKind()) {
1153     case tok::l_brace:
1154       break;
1155     case tok::l_paren:
1156       parseParens();
1157       break;
1158     case tok::amp:
1159     case tok::star:
1160     case tok::kw_const:
1161     case tok::comma:
1162     case tok::less:
1163     case tok::greater:
1164     case tok::identifier:
1165     case tok::numeric_constant:
1166     case tok::coloncolon:
1167     case tok::kw_mutable:
1168       nextToken();
1169       break;
1170     case tok::arrow:
1171       FormatTok->Type = TT_LambdaArrow;
1172       nextToken();
1173       break;
1174     default:
1175       return true;
1176     }
1177   }
1178   LSquare.Type = TT_LambdaLSquare;
1179   parseChildBlock();
1180   return true;
1181 }
1182 
1183 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1184   nextToken();
1185   if (FormatTok->is(tok::equal)) {
1186     nextToken();
1187     if (FormatTok->is(tok::r_square)) {
1188       nextToken();
1189       return true;
1190     }
1191     if (FormatTok->isNot(tok::comma))
1192       return false;
1193     nextToken();
1194   } else if (FormatTok->is(tok::amp)) {
1195     nextToken();
1196     if (FormatTok->is(tok::r_square)) {
1197       nextToken();
1198       return true;
1199     }
1200     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1201       return false;
1202     }
1203     if (FormatTok->is(tok::comma))
1204       nextToken();
1205   } else if (FormatTok->is(tok::r_square)) {
1206     nextToken();
1207     return true;
1208   }
1209   do {
1210     if (FormatTok->is(tok::amp))
1211       nextToken();
1212     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1213       return false;
1214     nextToken();
1215     if (FormatTok->is(tok::ellipsis))
1216       nextToken();
1217     if (FormatTok->is(tok::comma)) {
1218       nextToken();
1219     } else if (FormatTok->is(tok::r_square)) {
1220       nextToken();
1221       return true;
1222     } else {
1223       return false;
1224     }
1225   } while (!eof());
1226   return false;
1227 }
1228 
1229 void UnwrappedLineParser::tryToParseJSFunction() {
1230   assert(FormatTok->is(Keywords.kw_function) ||
1231          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1232   if (FormatTok->is(Keywords.kw_async))
1233     nextToken();
1234   // Consume "function".
1235   nextToken();
1236 
1237   // Consume * (generator function). Treat it like C++'s overloaded operators.
1238   if (FormatTok->is(tok::star)) {
1239     FormatTok->Type = TT_OverloadedOperator;
1240     nextToken();
1241   }
1242 
1243   // Consume function name.
1244   if (FormatTok->is(tok::identifier))
1245     nextToken();
1246 
1247   if (FormatTok->isNot(tok::l_paren))
1248     return;
1249 
1250   // Parse formal parameter list.
1251   parseParens();
1252 
1253   if (FormatTok->is(tok::colon)) {
1254     // Parse a type definition.
1255     nextToken();
1256 
1257     // Eat the type declaration. For braced inline object types, balance braces,
1258     // otherwise just parse until finding an l_brace for the function body.
1259     if (FormatTok->is(tok::l_brace))
1260       tryToParseBracedList();
1261     else
1262       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1263         nextToken();
1264   }
1265 
1266   if (FormatTok->is(tok::semi))
1267     return;
1268 
1269   parseChildBlock();
1270 }
1271 
1272 bool UnwrappedLineParser::tryToParseBracedList() {
1273   if (FormatTok->BlockKind == BK_Unknown)
1274     calculateBraceTypes();
1275   assert(FormatTok->BlockKind != BK_Unknown);
1276   if (FormatTok->BlockKind == BK_Block)
1277     return false;
1278   parseBracedList();
1279   return true;
1280 }
1281 
1282 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1283   bool HasError = false;
1284   nextToken();
1285 
1286   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1287   // replace this by using parseAssigmentExpression() inside.
1288   do {
1289     if (Style.Language == FormatStyle::LK_JavaScript) {
1290       if (FormatTok->is(Keywords.kw_function) ||
1291           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1292         tryToParseJSFunction();
1293         continue;
1294       }
1295       if (FormatTok->is(TT_JsFatArrow)) {
1296         nextToken();
1297         // Fat arrows can be followed by simple expressions or by child blocks
1298         // in curly braces.
1299         if (FormatTok->is(tok::l_brace)) {
1300           parseChildBlock();
1301           continue;
1302         }
1303       }
1304       if (FormatTok->is(tok::l_brace)) {
1305         // Could be a method inside of a braced list `{a() { return 1; }}`.
1306         if (tryToParseBracedList())
1307           continue;
1308         parseChildBlock();
1309       }
1310     }
1311     switch (FormatTok->Tok.getKind()) {
1312     case tok::caret:
1313       nextToken();
1314       if (FormatTok->is(tok::l_brace)) {
1315         parseChildBlock();
1316       }
1317       break;
1318     case tok::l_square:
1319       tryToParseLambda();
1320       break;
1321     case tok::l_paren:
1322       parseParens();
1323       // JavaScript can just have free standing methods and getters/setters in
1324       // object literals. Detect them by a "{" following ")".
1325       if (Style.Language == FormatStyle::LK_JavaScript) {
1326         if (FormatTok->is(tok::l_brace))
1327           parseChildBlock();
1328         break;
1329       }
1330       break;
1331     case tok::l_brace:
1332       // Assume there are no blocks inside a braced init list apart
1333       // from the ones we explicitly parse out (like lambdas).
1334       FormatTok->BlockKind = BK_BracedInit;
1335       parseBracedList();
1336       break;
1337     case tok::r_brace:
1338       nextToken();
1339       return !HasError;
1340     case tok::semi:
1341       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1342       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1343       // used for error recovery if we have otherwise determined that this is
1344       // a braced list.
1345       if (Style.Language == FormatStyle::LK_JavaScript) {
1346         nextToken();
1347         break;
1348       }
1349       HasError = true;
1350       if (!ContinueOnSemicolons)
1351         return !HasError;
1352       nextToken();
1353       break;
1354     case tok::comma:
1355       nextToken();
1356       break;
1357     default:
1358       nextToken();
1359       break;
1360     }
1361   } while (!eof());
1362   return false;
1363 }
1364 
1365 void UnwrappedLineParser::parseParens() {
1366   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1367   nextToken();
1368   do {
1369     switch (FormatTok->Tok.getKind()) {
1370     case tok::l_paren:
1371       parseParens();
1372       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1373         parseChildBlock();
1374       break;
1375     case tok::r_paren:
1376       nextToken();
1377       return;
1378     case tok::r_brace:
1379       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1380       return;
1381     case tok::l_square:
1382       tryToParseLambda();
1383       break;
1384     case tok::l_brace:
1385       if (!tryToParseBracedList())
1386         parseChildBlock();
1387       break;
1388     case tok::at:
1389       nextToken();
1390       if (FormatTok->Tok.is(tok::l_brace))
1391         parseBracedList();
1392       break;
1393     case tok::kw_class:
1394       if (Style.Language == FormatStyle::LK_JavaScript)
1395         parseRecord(/*ParseAsExpr=*/true);
1396       else
1397         nextToken();
1398       break;
1399     case tok::identifier:
1400       if (Style.Language == FormatStyle::LK_JavaScript &&
1401           (FormatTok->is(Keywords.kw_function) ||
1402            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1403         tryToParseJSFunction();
1404       else
1405         nextToken();
1406       break;
1407     default:
1408       nextToken();
1409       break;
1410     }
1411   } while (!eof());
1412 }
1413 
1414 void UnwrappedLineParser::parseSquare() {
1415   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1416   if (tryToParseLambda())
1417     return;
1418   do {
1419     switch (FormatTok->Tok.getKind()) {
1420     case tok::l_paren:
1421       parseParens();
1422       break;
1423     case tok::r_square:
1424       nextToken();
1425       return;
1426     case tok::r_brace:
1427       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1428       return;
1429     case tok::l_square:
1430       parseSquare();
1431       break;
1432     case tok::l_brace: {
1433       if (!tryToParseBracedList())
1434         parseChildBlock();
1435       break;
1436     }
1437     case tok::at:
1438       nextToken();
1439       if (FormatTok->Tok.is(tok::l_brace))
1440         parseBracedList();
1441       break;
1442     default:
1443       nextToken();
1444       break;
1445     }
1446   } while (!eof());
1447 }
1448 
1449 void UnwrappedLineParser::parseIfThenElse() {
1450   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1451   nextToken();
1452   if (FormatTok->Tok.is(tok::l_paren))
1453     parseParens();
1454   bool NeedsUnwrappedLine = false;
1455   if (FormatTok->Tok.is(tok::l_brace)) {
1456     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1457     parseBlock(/*MustBeDeclaration=*/false);
1458     if (Style.BraceWrapping.BeforeElse)
1459       addUnwrappedLine();
1460     else
1461       NeedsUnwrappedLine = true;
1462   } else {
1463     addUnwrappedLine();
1464     ++Line->Level;
1465     parseStructuralElement();
1466     --Line->Level;
1467   }
1468   if (FormatTok->Tok.is(tok::kw_else)) {
1469     nextToken();
1470     if (FormatTok->Tok.is(tok::l_brace)) {
1471       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1472       parseBlock(/*MustBeDeclaration=*/false);
1473       addUnwrappedLine();
1474     } else if (FormatTok->Tok.is(tok::kw_if)) {
1475       parseIfThenElse();
1476     } else {
1477       addUnwrappedLine();
1478       ++Line->Level;
1479       parseStructuralElement();
1480       if (FormatTok->is(tok::eof))
1481         addUnwrappedLine();
1482       --Line->Level;
1483     }
1484   } else if (NeedsUnwrappedLine) {
1485     addUnwrappedLine();
1486   }
1487 }
1488 
1489 void UnwrappedLineParser::parseTryCatch() {
1490   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1491   nextToken();
1492   bool NeedsUnwrappedLine = false;
1493   if (FormatTok->is(tok::colon)) {
1494     // We are in a function try block, what comes is an initializer list.
1495     nextToken();
1496     while (FormatTok->is(tok::identifier)) {
1497       nextToken();
1498       if (FormatTok->is(tok::l_paren))
1499         parseParens();
1500       if (FormatTok->is(tok::comma))
1501         nextToken();
1502     }
1503   }
1504   // Parse try with resource.
1505   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1506     parseParens();
1507   }
1508   if (FormatTok->is(tok::l_brace)) {
1509     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1510     parseBlock(/*MustBeDeclaration=*/false);
1511     if (Style.BraceWrapping.BeforeCatch) {
1512       addUnwrappedLine();
1513     } else {
1514       NeedsUnwrappedLine = true;
1515     }
1516   } else if (!FormatTok->is(tok::kw_catch)) {
1517     // The C++ standard requires a compound-statement after a try.
1518     // If there's none, we try to assume there's a structuralElement
1519     // and try to continue.
1520     addUnwrappedLine();
1521     ++Line->Level;
1522     parseStructuralElement();
1523     --Line->Level;
1524   }
1525   while (1) {
1526     if (FormatTok->is(tok::at))
1527       nextToken();
1528     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1529                              tok::kw___finally) ||
1530           ((Style.Language == FormatStyle::LK_Java ||
1531             Style.Language == FormatStyle::LK_JavaScript) &&
1532            FormatTok->is(Keywords.kw_finally)) ||
1533           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1534            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1535       break;
1536     nextToken();
1537     while (FormatTok->isNot(tok::l_brace)) {
1538       if (FormatTok->is(tok::l_paren)) {
1539         parseParens();
1540         continue;
1541       }
1542       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1543         return;
1544       nextToken();
1545     }
1546     NeedsUnwrappedLine = false;
1547     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1548     parseBlock(/*MustBeDeclaration=*/false);
1549     if (Style.BraceWrapping.BeforeCatch)
1550       addUnwrappedLine();
1551     else
1552       NeedsUnwrappedLine = true;
1553   }
1554   if (NeedsUnwrappedLine)
1555     addUnwrappedLine();
1556 }
1557 
1558 void UnwrappedLineParser::parseNamespace() {
1559   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1560 
1561   const FormatToken &InitialToken = *FormatTok;
1562   nextToken();
1563   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1564     nextToken();
1565   if (FormatTok->Tok.is(tok::l_brace)) {
1566     if (ShouldBreakBeforeBrace(Style, InitialToken))
1567       addUnwrappedLine();
1568 
1569     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1570                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1571                      DeclarationScopeStack.size() > 1);
1572     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1573     // Munch the semicolon after a namespace. This is more common than one would
1574     // think. Puttin the semicolon into its own line is very ugly.
1575     if (FormatTok->Tok.is(tok::semi))
1576       nextToken();
1577     addUnwrappedLine();
1578   }
1579   // FIXME: Add error handling.
1580 }
1581 
1582 void UnwrappedLineParser::parseNew() {
1583   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1584   nextToken();
1585   if (Style.Language != FormatStyle::LK_Java)
1586     return;
1587 
1588   // In Java, we can parse everything up to the parens, which aren't optional.
1589   do {
1590     // There should not be a ;, { or } before the new's open paren.
1591     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1592       return;
1593 
1594     // Consume the parens.
1595     if (FormatTok->is(tok::l_paren)) {
1596       parseParens();
1597 
1598       // If there is a class body of an anonymous class, consume that as child.
1599       if (FormatTok->is(tok::l_brace))
1600         parseChildBlock();
1601       return;
1602     }
1603     nextToken();
1604   } while (!eof());
1605 }
1606 
1607 void UnwrappedLineParser::parseForOrWhileLoop() {
1608   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1609          "'for', 'while' or foreach macro expected");
1610   nextToken();
1611   if (FormatTok->Tok.is(tok::l_paren))
1612     parseParens();
1613   if (FormatTok->Tok.is(tok::l_brace)) {
1614     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1615     parseBlock(/*MustBeDeclaration=*/false);
1616     addUnwrappedLine();
1617   } else {
1618     addUnwrappedLine();
1619     ++Line->Level;
1620     parseStructuralElement();
1621     --Line->Level;
1622   }
1623 }
1624 
1625 void UnwrappedLineParser::parseDoWhile() {
1626   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1627   nextToken();
1628   if (FormatTok->Tok.is(tok::l_brace)) {
1629     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1630     parseBlock(/*MustBeDeclaration=*/false);
1631     if (Style.BraceWrapping.IndentBraces)
1632       addUnwrappedLine();
1633   } else {
1634     addUnwrappedLine();
1635     ++Line->Level;
1636     parseStructuralElement();
1637     --Line->Level;
1638   }
1639 
1640   // FIXME: Add error handling.
1641   if (!FormatTok->Tok.is(tok::kw_while)) {
1642     addUnwrappedLine();
1643     return;
1644   }
1645 
1646   nextToken();
1647   parseStructuralElement();
1648 }
1649 
1650 void UnwrappedLineParser::parseLabel() {
1651   nextToken();
1652   unsigned OldLineLevel = Line->Level;
1653   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1654     --Line->Level;
1655   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1656     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1657     parseBlock(/*MustBeDeclaration=*/false);
1658     if (FormatTok->Tok.is(tok::kw_break)) {
1659       if (Style.BraceWrapping.AfterControlStatement)
1660         addUnwrappedLine();
1661       parseStructuralElement();
1662     }
1663     addUnwrappedLine();
1664   } else {
1665     if (FormatTok->is(tok::semi))
1666       nextToken();
1667     addUnwrappedLine();
1668   }
1669   Line->Level = OldLineLevel;
1670   if (FormatTok->isNot(tok::l_brace)) {
1671     parseStructuralElement();
1672     addUnwrappedLine();
1673   }
1674 }
1675 
1676 void UnwrappedLineParser::parseCaseLabel() {
1677   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1678   // FIXME: fix handling of complex expressions here.
1679   do {
1680     nextToken();
1681   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1682   parseLabel();
1683 }
1684 
1685 void UnwrappedLineParser::parseSwitch() {
1686   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1687   nextToken();
1688   if (FormatTok->Tok.is(tok::l_paren))
1689     parseParens();
1690   if (FormatTok->Tok.is(tok::l_brace)) {
1691     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1692     parseBlock(/*MustBeDeclaration=*/false);
1693     addUnwrappedLine();
1694   } else {
1695     addUnwrappedLine();
1696     ++Line->Level;
1697     parseStructuralElement();
1698     --Line->Level;
1699   }
1700 }
1701 
1702 void UnwrappedLineParser::parseAccessSpecifier() {
1703   nextToken();
1704   // Understand Qt's slots.
1705   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1706     nextToken();
1707   // Otherwise, we don't know what it is, and we'd better keep the next token.
1708   if (FormatTok->Tok.is(tok::colon))
1709     nextToken();
1710   addUnwrappedLine();
1711 }
1712 
1713 bool UnwrappedLineParser::parseEnum() {
1714   // Won't be 'enum' for NS_ENUMs.
1715   if (FormatTok->Tok.is(tok::kw_enum))
1716     nextToken();
1717 
1718   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1719   // declarations. An "enum" keyword followed by a colon would be a syntax
1720   // error and thus assume it is just an identifier.
1721   if (Style.Language == FormatStyle::LK_JavaScript &&
1722       FormatTok->isOneOf(tok::colon, tok::question))
1723     return false;
1724 
1725   // Eat up enum class ...
1726   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1727     nextToken();
1728 
1729   while (FormatTok->Tok.getIdentifierInfo() ||
1730          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1731                             tok::greater, tok::comma, tok::question)) {
1732     nextToken();
1733     // We can have macros or attributes in between 'enum' and the enum name.
1734     if (FormatTok->is(tok::l_paren))
1735       parseParens();
1736     if (FormatTok->is(tok::identifier)) {
1737       nextToken();
1738       // If there are two identifiers in a row, this is likely an elaborate
1739       // return type. In Java, this can be "implements", etc.
1740       if (Style.Language == FormatStyle::LK_Cpp &&
1741           FormatTok->is(tok::identifier))
1742         return false;
1743     }
1744   }
1745 
1746   // Just a declaration or something is wrong.
1747   if (FormatTok->isNot(tok::l_brace))
1748     return true;
1749   FormatTok->BlockKind = BK_Block;
1750 
1751   if (Style.Language == FormatStyle::LK_Java) {
1752     // Java enums are different.
1753     parseJavaEnumBody();
1754     return true;
1755   }
1756   if (Style.Language == FormatStyle::LK_Proto) {
1757     parseBlock(/*MustBeDeclaration=*/true);
1758     return true;
1759   }
1760 
1761   // Parse enum body.
1762   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1763   if (HasError) {
1764     if (FormatTok->is(tok::semi))
1765       nextToken();
1766     addUnwrappedLine();
1767   }
1768   return true;
1769 
1770   // There is no addUnwrappedLine() here so that we fall through to parsing a
1771   // structural element afterwards. Thus, in "enum A {} n, m;",
1772   // "} n, m;" will end up in one unwrapped line.
1773 }
1774 
1775 void UnwrappedLineParser::parseJavaEnumBody() {
1776   // Determine whether the enum is simple, i.e. does not have a semicolon or
1777   // constants with class bodies. Simple enums can be formatted like braced
1778   // lists, contracted to a single line, etc.
1779   unsigned StoredPosition = Tokens->getPosition();
1780   bool IsSimple = true;
1781   FormatToken *Tok = Tokens->getNextToken();
1782   while (Tok) {
1783     if (Tok->is(tok::r_brace))
1784       break;
1785     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1786       IsSimple = false;
1787       break;
1788     }
1789     // FIXME: This will also mark enums with braces in the arguments to enum
1790     // constants as "not simple". This is probably fine in practice, though.
1791     Tok = Tokens->getNextToken();
1792   }
1793   FormatTok = Tokens->setPosition(StoredPosition);
1794 
1795   if (IsSimple) {
1796     parseBracedList();
1797     addUnwrappedLine();
1798     return;
1799   }
1800 
1801   // Parse the body of a more complex enum.
1802   // First add a line for everything up to the "{".
1803   nextToken();
1804   addUnwrappedLine();
1805   ++Line->Level;
1806 
1807   // Parse the enum constants.
1808   while (FormatTok) {
1809     if (FormatTok->is(tok::l_brace)) {
1810       // Parse the constant's class body.
1811       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1812                  /*MunchSemi=*/false);
1813     } else if (FormatTok->is(tok::l_paren)) {
1814       parseParens();
1815     } else if (FormatTok->is(tok::comma)) {
1816       nextToken();
1817       addUnwrappedLine();
1818     } else if (FormatTok->is(tok::semi)) {
1819       nextToken();
1820       addUnwrappedLine();
1821       break;
1822     } else if (FormatTok->is(tok::r_brace)) {
1823       addUnwrappedLine();
1824       break;
1825     } else {
1826       nextToken();
1827     }
1828   }
1829 
1830   // Parse the class body after the enum's ";" if any.
1831   parseLevel(/*HasOpeningBrace=*/true);
1832   nextToken();
1833   --Line->Level;
1834   addUnwrappedLine();
1835 }
1836 
1837 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
1838   const FormatToken &InitialToken = *FormatTok;
1839   nextToken();
1840 
1841   // The actual identifier can be a nested name specifier, and in macros
1842   // it is often token-pasted.
1843   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1844                             tok::kw___attribute, tok::kw___declspec,
1845                             tok::kw_alignas) ||
1846          ((Style.Language == FormatStyle::LK_Java ||
1847            Style.Language == FormatStyle::LK_JavaScript) &&
1848           FormatTok->isOneOf(tok::period, tok::comma))) {
1849     bool IsNonMacroIdentifier =
1850         FormatTok->is(tok::identifier) &&
1851         FormatTok->TokenText != FormatTok->TokenText.upper();
1852     nextToken();
1853     // We can have macros or attributes in between 'class' and the class name.
1854     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1855       parseParens();
1856   }
1857 
1858   // Note that parsing away template declarations here leads to incorrectly
1859   // accepting function declarations as record declarations.
1860   // In general, we cannot solve this problem. Consider:
1861   // class A<int> B() {}
1862   // which can be a function definition or a class definition when B() is a
1863   // macro. If we find enough real-world cases where this is a problem, we
1864   // can parse for the 'template' keyword in the beginning of the statement,
1865   // and thus rule out the record production in case there is no template
1866   // (this would still leave us with an ambiguity between template function
1867   // and class declarations).
1868   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1869     while (!eof()) {
1870       if (FormatTok->is(tok::l_brace)) {
1871         calculateBraceTypes(/*ExpectClassBody=*/true);
1872         if (!tryToParseBracedList())
1873           break;
1874       }
1875       if (FormatTok->Tok.is(tok::semi))
1876         return;
1877       nextToken();
1878     }
1879   }
1880   if (FormatTok->Tok.is(tok::l_brace)) {
1881     if (ParseAsExpr) {
1882       parseChildBlock();
1883     } else {
1884       if (ShouldBreakBeforeBrace(Style, InitialToken))
1885         addUnwrappedLine();
1886 
1887       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1888                  /*MunchSemi=*/false);
1889     }
1890   }
1891   // There is no addUnwrappedLine() here so that we fall through to parsing a
1892   // structural element afterwards. Thus, in "class A {} n, m;",
1893   // "} n, m;" will end up in one unwrapped line.
1894 }
1895 
1896 void UnwrappedLineParser::parseObjCProtocolList() {
1897   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1898   do
1899     nextToken();
1900   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1901   nextToken(); // Skip '>'.
1902 }
1903 
1904 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1905   do {
1906     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1907       nextToken();
1908       addUnwrappedLine();
1909       break;
1910     }
1911     if (FormatTok->is(tok::l_brace)) {
1912       parseBlock(/*MustBeDeclaration=*/false);
1913       // In ObjC interfaces, nothing should be following the "}".
1914       addUnwrappedLine();
1915     } else if (FormatTok->is(tok::r_brace)) {
1916       // Ignore stray "}". parseStructuralElement doesn't consume them.
1917       nextToken();
1918       addUnwrappedLine();
1919     } else {
1920       parseStructuralElement();
1921     }
1922   } while (!eof());
1923 }
1924 
1925 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1926   nextToken();
1927   nextToken(); // interface name
1928 
1929   // @interface can be followed by either a base class, or a category.
1930   if (FormatTok->Tok.is(tok::colon)) {
1931     nextToken();
1932     nextToken(); // base class name
1933   } else if (FormatTok->Tok.is(tok::l_paren))
1934     // Skip category, if present.
1935     parseParens();
1936 
1937   if (FormatTok->Tok.is(tok::less))
1938     parseObjCProtocolList();
1939 
1940   if (FormatTok->Tok.is(tok::l_brace)) {
1941     if (Style.BraceWrapping.AfterObjCDeclaration)
1942       addUnwrappedLine();
1943     parseBlock(/*MustBeDeclaration=*/true);
1944   }
1945 
1946   // With instance variables, this puts '}' on its own line.  Without instance
1947   // variables, this ends the @interface line.
1948   addUnwrappedLine();
1949 
1950   parseObjCUntilAtEnd();
1951 }
1952 
1953 void UnwrappedLineParser::parseObjCProtocol() {
1954   nextToken();
1955   nextToken(); // protocol name
1956 
1957   if (FormatTok->Tok.is(tok::less))
1958     parseObjCProtocolList();
1959 
1960   // Check for protocol declaration.
1961   if (FormatTok->Tok.is(tok::semi)) {
1962     nextToken();
1963     return addUnwrappedLine();
1964   }
1965 
1966   addUnwrappedLine();
1967   parseObjCUntilAtEnd();
1968 }
1969 
1970 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1971   bool IsImport = FormatTok->is(Keywords.kw_import);
1972   assert(IsImport || FormatTok->is(tok::kw_export));
1973   nextToken();
1974 
1975   // Consume the "default" in "export default class/function".
1976   if (FormatTok->is(tok::kw_default))
1977     nextToken();
1978 
1979   // Consume "async function", "function" and "default function", so that these
1980   // get parsed as free-standing JS functions, i.e. do not require a trailing
1981   // semicolon.
1982   if (FormatTok->is(Keywords.kw_async))
1983     nextToken();
1984   if (FormatTok->is(Keywords.kw_function)) {
1985     nextToken();
1986     return;
1987   }
1988 
1989   // For imports, `export *`, `export {...}`, consume the rest of the line up
1990   // to the terminating `;`. For everything else, just return and continue
1991   // parsing the structural element, i.e. the declaration or expression for
1992   // `export default`.
1993   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
1994       !FormatTok->isStringLiteral())
1995     return;
1996 
1997   while (!eof()) {
1998     if (FormatTok->is(tok::semi))
1999       return;
2000     if (Line->Tokens.size() == 0) {
2001       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2002       // import statement should terminate.
2003       return;
2004     }
2005     if (FormatTok->is(tok::l_brace)) {
2006       FormatTok->BlockKind = BK_Block;
2007       parseBracedList();
2008     } else {
2009       nextToken();
2010     }
2011   }
2012 }
2013 
2014 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2015                                                  StringRef Prefix = "") {
2016   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2017                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2018   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2019                                                     E = Line.Tokens.end();
2020        I != E; ++I) {
2021     llvm::dbgs() << I->Tok->Tok.getName() << "["
2022                  << "T=" << I->Tok->Type
2023                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2024   }
2025   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2026                                                     E = Line.Tokens.end();
2027        I != E; ++I) {
2028     const UnwrappedLineNode &Node = *I;
2029     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2030              I = Node.Children.begin(),
2031              E = Node.Children.end();
2032          I != E; ++I) {
2033       printDebugInfo(*I, "\nChild: ");
2034     }
2035   }
2036   llvm::dbgs() << "\n";
2037 }
2038 
2039 void UnwrappedLineParser::addUnwrappedLine() {
2040   if (Line->Tokens.empty())
2041     return;
2042   DEBUG({
2043     if (CurrentLines == &Lines)
2044       printDebugInfo(*Line);
2045   });
2046   CurrentLines->push_back(std::move(*Line));
2047   Line->Tokens.clear();
2048   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2049     CurrentLines->append(
2050         std::make_move_iterator(PreprocessorDirectives.begin()),
2051         std::make_move_iterator(PreprocessorDirectives.end()));
2052     PreprocessorDirectives.clear();
2053   }
2054 }
2055 
2056 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2057 
2058 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2059   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2060          FormatTok.NewlinesBefore > 0;
2061 }
2062 
2063 static bool isLineComment(const FormatToken &FormatTok) {
2064   return FormatTok.is(tok::comment) &&
2065          FormatTok.TokenText.startswith("//");
2066 }
2067 
2068 // Checks if \p FormatTok is a line comment that continues the line comment
2069 // section on \p Line.
2070 static bool continuesLineComment(const FormatToken &FormatTok,
2071                                  const UnwrappedLine &Line,
2072                                  llvm::Regex &CommentPragmasRegex) {
2073   if (Line.Tokens.empty())
2074     return false;
2075 
2076   StringRef IndentContent = FormatTok.TokenText;
2077   if (FormatTok.TokenText.startswith("//") ||
2078       FormatTok.TokenText.startswith("/*"))
2079     IndentContent = FormatTok.TokenText.substr(2);
2080   if (CommentPragmasRegex.match(IndentContent))
2081     return false;
2082 
2083   // If Line starts with a line comment, then FormatTok continues the comment
2084   // section if its original column is greater or equal to the original start
2085   // column of the line.
2086   //
2087   // Define the min column token of a line as follows: if a line ends in '{' or
2088   // contains a '{' followed by a line comment, then the min column token is
2089   // that '{'. Otherwise, the min column token of the line is the first token of
2090   // the line.
2091   //
2092   // If Line starts with a token other than a line comment, then FormatTok
2093   // continues the comment section if its original column is greater than the
2094   // original start column of the min column token of the line.
2095   //
2096   // For example, the second line comment continues the first in these cases:
2097   //
2098   // // first line
2099   // // second line
2100   //
2101   // and:
2102   //
2103   // // first line
2104   //  // second line
2105   //
2106   // and:
2107   //
2108   // int i; // first line
2109   //  // second line
2110   //
2111   // and:
2112   //
2113   // do { // first line
2114   //      // second line
2115   //   int i;
2116   // } while (true);
2117   //
2118   // and:
2119   //
2120   // enum {
2121   //   a, // first line
2122   //    // second line
2123   //   b
2124   // };
2125   //
2126   // The second line comment doesn't continue the first in these cases:
2127   //
2128   //   // first line
2129   //  // second line
2130   //
2131   // and:
2132   //
2133   // int i; // first line
2134   // // second line
2135   //
2136   // and:
2137   //
2138   // do { // first line
2139   //   // second line
2140   //   int i;
2141   // } while (true);
2142   //
2143   // and:
2144   //
2145   // enum {
2146   //   a, // first line
2147   //   // second line
2148   // };
2149   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2150 
2151   // Scan for '{//'. If found, use the column of '{' as a min column for line
2152   // comment section continuation.
2153   const FormatToken *PreviousToken = nullptr;
2154   for (const UnwrappedLineNode Node : Line.Tokens) {
2155     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2156         isLineComment(*Node.Tok)) {
2157       MinColumnToken = PreviousToken;
2158       break;
2159     }
2160     PreviousToken = Node.Tok;
2161 
2162     // Grab the last newline preceding a token in this unwrapped line.
2163     if (Node.Tok->NewlinesBefore > 0) {
2164       MinColumnToken = Node.Tok;
2165     }
2166   }
2167   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2168     MinColumnToken = PreviousToken;
2169   }
2170 
2171   unsigned MinContinueColumn =
2172       MinColumnToken->OriginalColumn +
2173       (isLineComment(*MinColumnToken) ? 0 : 1);
2174   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
2175          isLineComment(*(Line.Tokens.back().Tok)) &&
2176          FormatTok.OriginalColumn >= MinContinueColumn;
2177 }
2178 
2179 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2180   bool JustComments = Line->Tokens.empty();
2181   for (SmallVectorImpl<FormatToken *>::const_iterator
2182            I = CommentsBeforeNextToken.begin(),
2183            E = CommentsBeforeNextToken.end();
2184        I != E; ++I) {
2185     // Line comments that belong to the same line comment section are put on the
2186     // same line since later we might want to reflow content between them.
2187     // Additional fine-grained breaking of line comment sections is controlled
2188     // by the class BreakableLineCommentSection in case it is desirable to keep
2189     // several line comment sections in the same unwrapped line.
2190     //
2191     // FIXME: Consider putting separate line comment sections as children to the
2192     // unwrapped line instead.
2193     (*I)->ContinuesLineCommentSection =
2194         continuesLineComment(**I, *Line, CommentPragmasRegex);
2195     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2196       addUnwrappedLine();
2197     pushToken(*I);
2198   }
2199   if (NewlineBeforeNext && JustComments)
2200     addUnwrappedLine();
2201   CommentsBeforeNextToken.clear();
2202 }
2203 
2204 void UnwrappedLineParser::nextToken() {
2205   if (eof())
2206     return;
2207   flushComments(isOnNewLine(*FormatTok));
2208   pushToken(FormatTok);
2209   if (Style.Language != FormatStyle::LK_JavaScript)
2210     readToken();
2211   else
2212     readTokenWithJavaScriptASI();
2213 }
2214 
2215 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2216   // FIXME: This is a dirty way to access the previous token. Find a better
2217   // solution.
2218   if (!Line || Line->Tokens.empty())
2219     return nullptr;
2220   return Line->Tokens.back().Tok;
2221 }
2222 
2223 void UnwrappedLineParser::distributeComments(
2224     const SmallVectorImpl<FormatToken *> &Comments,
2225     const FormatToken *NextTok) {
2226   // Whether or not a line comment token continues a line is controlled by
2227   // the method continuesLineComment, with the following caveat:
2228   //
2229   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2230   // that each comment line from the trail is aligned with the next token, if
2231   // the next token exists. If a trail exists, the beginning of the maximal
2232   // trail is marked as a start of a new comment section.
2233   //
2234   // For example in this code:
2235   //
2236   // int a; // line about a
2237   //   // line 1 about b
2238   //   // line 2 about b
2239   //   int b;
2240   //
2241   // the two lines about b form a maximal trail, so there are two sections, the
2242   // first one consisting of the single comment "// line about a" and the
2243   // second one consisting of the next two comments.
2244   if (Comments.empty())
2245     return;
2246   bool ShouldPushCommentsInCurrentLine = true;
2247   bool HasTrailAlignedWithNextToken = false;
2248   unsigned StartOfTrailAlignedWithNextToken = 0;
2249   if (NextTok) {
2250     // We are skipping the first element intentionally.
2251     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2252       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2253         HasTrailAlignedWithNextToken = true;
2254         StartOfTrailAlignedWithNextToken = i;
2255       }
2256     }
2257   }
2258   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2259     FormatToken *FormatTok = Comments[i];
2260     if (HasTrailAlignedWithNextToken &&
2261         i == StartOfTrailAlignedWithNextToken) {
2262       FormatTok->ContinuesLineCommentSection = false;
2263     } else {
2264       FormatTok->ContinuesLineCommentSection =
2265           continuesLineComment(*FormatTok, *Line, CommentPragmasRegex);
2266     }
2267     if (!FormatTok->ContinuesLineCommentSection &&
2268         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2269       ShouldPushCommentsInCurrentLine = false;
2270     }
2271     if (ShouldPushCommentsInCurrentLine) {
2272       pushToken(FormatTok);
2273     } else {
2274       CommentsBeforeNextToken.push_back(FormatTok);
2275     }
2276   }
2277 }
2278 
2279 void UnwrappedLineParser::readToken() {
2280   SmallVector<FormatToken *, 1> Comments;
2281   do {
2282     FormatTok = Tokens->getNextToken();
2283     assert(FormatTok);
2284     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2285            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2286       distributeComments(Comments, FormatTok);
2287       Comments.clear();
2288       // If there is an unfinished unwrapped line, we flush the preprocessor
2289       // directives only after that unwrapped line was finished later.
2290       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2291       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2292       // Comments stored before the preprocessor directive need to be output
2293       // before the preprocessor directive, at the same level as the
2294       // preprocessor directive, as we consider them to apply to the directive.
2295       flushComments(isOnNewLine(*FormatTok));
2296       parsePPDirective();
2297     }
2298     while (FormatTok->Type == TT_ConflictStart ||
2299            FormatTok->Type == TT_ConflictEnd ||
2300            FormatTok->Type == TT_ConflictAlternative) {
2301       if (FormatTok->Type == TT_ConflictStart) {
2302         conditionalCompilationStart(/*Unreachable=*/false);
2303       } else if (FormatTok->Type == TT_ConflictAlternative) {
2304         conditionalCompilationAlternative();
2305       } else if (FormatTok->Type == TT_ConflictEnd) {
2306         conditionalCompilationEnd();
2307       }
2308       FormatTok = Tokens->getNextToken();
2309       FormatTok->MustBreakBefore = true;
2310     }
2311 
2312     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
2313         !Line->InPPDirective) {
2314       continue;
2315     }
2316 
2317     if (!FormatTok->Tok.is(tok::comment)) {
2318       distributeComments(Comments, FormatTok);
2319       Comments.clear();
2320       return;
2321     }
2322 
2323     Comments.push_back(FormatTok);
2324   } while (!eof());
2325 
2326   distributeComments(Comments, nullptr);
2327   Comments.clear();
2328 }
2329 
2330 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2331   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2332   if (MustBreakBeforeNextToken) {
2333     Line->Tokens.back().Tok->MustBreakBefore = true;
2334     MustBreakBeforeNextToken = false;
2335   }
2336 }
2337 
2338 } // end namespace format
2339 } // end namespace clang
2340