1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59   return FormatTok.is(tok::comment) &&
60          FormatTok.TokenText.startswith("//");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     TokenSource = this;
86     Line.Level = 0;
87     Line.InPPDirective = true;
88   }
89 
90   ~ScopedMacroState() override {
91     TokenSource = PreviousTokenSource;
92     ResetToken = Token;
93     Line.InPPDirective = false;
94     Line.Level = PreviousLineLevel;
95   }
96 
97   FormatToken *getNextToken() override {
98     // The \c UnwrappedLineParser guards against this by never calling
99     // \c getNextToken() after it has encountered the first eof token.
100     assert(!eof());
101     PreviousToken = Token;
102     Token = PreviousTokenSource->getNextToken();
103     if (eof())
104       return getFakeEOF();
105     return Token;
106   }
107 
108   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
109 
110   FormatToken *setPosition(unsigned Position) override {
111     PreviousToken = nullptr;
112     Token = PreviousTokenSource->setPosition(Position);
113     return Token;
114   }
115 
116 private:
117   bool eof() {
118     return Token && Token->HasUnescapedNewline &&
119            !continuesLineComment(*Token, PreviousToken,
120                                  /*MinColumnToken=*/PreviousToken);
121   }
122 
123   FormatToken *getFakeEOF() {
124     static bool EOFInitialized = false;
125     static FormatToken FormatTok;
126     if (!EOFInitialized) {
127       FormatTok.Tok.startToken();
128       FormatTok.Tok.setKind(tok::eof);
129       EOFInitialized = true;
130     }
131     return &FormatTok;
132   }
133 
134   UnwrappedLine &Line;
135   FormatTokenSource *&TokenSource;
136   FormatToken *&ResetToken;
137   unsigned PreviousLineLevel;
138   FormatTokenSource *PreviousTokenSource;
139 
140   FormatToken *Token;
141   FormatToken *PreviousToken;
142 };
143 
144 } // end anonymous namespace
145 
146 class ScopedLineState {
147 public:
148   ScopedLineState(UnwrappedLineParser &Parser,
149                   bool SwitchToPreprocessorLines = false)
150       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
151     if (SwitchToPreprocessorLines)
152       Parser.CurrentLines = &Parser.PreprocessorDirectives;
153     else if (!Parser.Line->Tokens.empty())
154       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
155     PreBlockLine = std::move(Parser.Line);
156     Parser.Line = llvm::make_unique<UnwrappedLine>();
157     Parser.Line->Level = PreBlockLine->Level;
158     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
159   }
160 
161   ~ScopedLineState() {
162     if (!Parser.Line->Tokens.empty()) {
163       Parser.addUnwrappedLine();
164     }
165     assert(Parser.Line->Tokens.empty());
166     Parser.Line = std::move(PreBlockLine);
167     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
168       Parser.MustBreakBeforeNextToken = true;
169     Parser.CurrentLines = OriginalLines;
170   }
171 
172 private:
173   UnwrappedLineParser &Parser;
174 
175   std::unique_ptr<UnwrappedLine> PreBlockLine;
176   SmallVectorImpl<UnwrappedLine> *OriginalLines;
177 };
178 
179 class CompoundStatementIndenter {
180 public:
181   CompoundStatementIndenter(UnwrappedLineParser *Parser,
182                             const FormatStyle &Style, unsigned &LineLevel)
183       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
184     if (Style.BraceWrapping.AfterControlStatement)
185       Parser->addUnwrappedLine();
186     if (Style.BraceWrapping.IndentBraces)
187       ++LineLevel;
188   }
189   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
190 
191 private:
192   unsigned &LineLevel;
193   unsigned OldLineLevel;
194 };
195 
196 namespace {
197 
198 class IndexedTokenSource : public FormatTokenSource {
199 public:
200   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
201       : Tokens(Tokens), Position(-1) {}
202 
203   FormatToken *getNextToken() override {
204     ++Position;
205     return Tokens[Position];
206   }
207 
208   unsigned getPosition() override {
209     assert(Position >= 0);
210     return Position;
211   }
212 
213   FormatToken *setPosition(unsigned P) override {
214     Position = P;
215     return Tokens[Position];
216   }
217 
218   void reset() { Position = -1; }
219 
220 private:
221   ArrayRef<FormatToken *> Tokens;
222   int Position;
223 };
224 
225 } // end anonymous namespace
226 
227 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
228                                          const AdditionalKeywords &Keywords,
229                                          ArrayRef<FormatToken *> Tokens,
230                                          UnwrappedLineConsumer &Callback)
231     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
232       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
233       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
234       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
235       IfNdefCondition(nullptr), FoundIncludeGuardStart(false),
236       IncludeGuardRejected(false) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IfNdefCondition = nullptr;
241   FoundIncludeGuardStart = false;
242   IncludeGuardRejected = false;
243   Line.reset(new UnwrappedLine);
244   CommentsBeforeNextToken.clear();
245   FormatTok = nullptr;
246   MustBreakBeforeNextToken = false;
247   PreprocessorDirectives.clear();
248   CurrentLines = &Lines;
249   DeclarationScopeStack.clear();
250   PPStack.clear();
251 }
252 
253 void UnwrappedLineParser::parse() {
254   IndexedTokenSource TokenSource(AllTokens);
255   do {
256     DEBUG(llvm::dbgs() << "----\n");
257     reset();
258     Tokens = &TokenSource;
259     TokenSource.reset();
260 
261     readToken();
262     parseFile();
263     // Create line with eof token.
264     pushToken(FormatTok);
265     addUnwrappedLine();
266 
267     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
268                                                   E = Lines.end();
269          I != E; ++I) {
270       Callback.consumeUnwrappedLine(*I);
271     }
272     Callback.finishRun();
273     Lines.clear();
274     while (!PPLevelBranchIndex.empty() &&
275            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
276       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
277       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
278     }
279     if (!PPLevelBranchIndex.empty()) {
280       ++PPLevelBranchIndex.back();
281       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
282       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
283     }
284   } while (!PPLevelBranchIndex.empty());
285 }
286 
287 void UnwrappedLineParser::parseFile() {
288   // The top-level context in a file always has declarations, except for pre-
289   // processor directives and JavaScript files.
290   bool MustBeDeclaration =
291       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
292   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
293                                           MustBeDeclaration);
294   if (Style.Language == FormatStyle::LK_TextProto)
295     parseBracedList();
296   else
297     parseLevel(/*HasOpeningBrace=*/false);
298   // Make sure to format the remaining tokens.
299   flushComments(true);
300   addUnwrappedLine();
301 }
302 
303 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
304   bool SwitchLabelEncountered = false;
305   do {
306     tok::TokenKind kind = FormatTok->Tok.getKind();
307     if (FormatTok->Type == TT_MacroBlockBegin) {
308       kind = tok::l_brace;
309     } else if (FormatTok->Type == TT_MacroBlockEnd) {
310       kind = tok::r_brace;
311     }
312 
313     switch (kind) {
314     case tok::comment:
315       nextToken();
316       addUnwrappedLine();
317       break;
318     case tok::l_brace:
319       // FIXME: Add parameter whether this can happen - if this happens, we must
320       // be in a non-declaration context.
321       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
322         continue;
323       parseBlock(/*MustBeDeclaration=*/false);
324       addUnwrappedLine();
325       break;
326     case tok::r_brace:
327       if (HasOpeningBrace)
328         return;
329       nextToken();
330       addUnwrappedLine();
331       break;
332     case tok::kw_default:
333     case tok::kw_case:
334       if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) {
335         // A 'case: string' style field declaration.
336         parseStructuralElement();
337         break;
338       }
339       if (!SwitchLabelEncountered &&
340           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
341         ++Line->Level;
342       SwitchLabelEncountered = true;
343       parseStructuralElement();
344       break;
345     default:
346       parseStructuralElement();
347       break;
348     }
349   } while (!eof());
350 }
351 
352 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
353   // We'll parse forward through the tokens until we hit
354   // a closing brace or eof - note that getNextToken() will
355   // parse macros, so this will magically work inside macro
356   // definitions, too.
357   unsigned StoredPosition = Tokens->getPosition();
358   FormatToken *Tok = FormatTok;
359   const FormatToken *PrevTok = getPreviousToken();
360   // Keep a stack of positions of lbrace tokens. We will
361   // update information about whether an lbrace starts a
362   // braced init list or a different block during the loop.
363   SmallVector<FormatToken *, 8> LBraceStack;
364   assert(Tok->Tok.is(tok::l_brace));
365   do {
366     // Get next non-comment token.
367     FormatToken *NextTok;
368     unsigned ReadTokens = 0;
369     do {
370       NextTok = Tokens->getNextToken();
371       ++ReadTokens;
372     } while (NextTok->is(tok::comment));
373 
374     switch (Tok->Tok.getKind()) {
375     case tok::l_brace:
376       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
377         if (PrevTok->is(tok::colon))
378           // A colon indicates this code is in a type, or a braced list
379           // following a label in an object literal ({a: {b: 1}}). The code
380           // below could be confused by semicolons between the individual
381           // members in a type member list, which would normally trigger
382           // BK_Block. In both cases, this must be parsed as an inline braced
383           // init.
384           Tok->BlockKind = BK_BracedInit;
385         else if (PrevTok->is(tok::r_paren))
386           // `) { }` can only occur in function or method declarations in JS.
387           Tok->BlockKind = BK_Block;
388       } else {
389         Tok->BlockKind = BK_Unknown;
390       }
391       LBraceStack.push_back(Tok);
392       break;
393     case tok::r_brace:
394       if (LBraceStack.empty())
395         break;
396       if (LBraceStack.back()->BlockKind == BK_Unknown) {
397         bool ProbablyBracedList = false;
398         if (Style.Language == FormatStyle::LK_Proto) {
399           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
400         } else {
401           // Using OriginalColumn to distinguish between ObjC methods and
402           // binary operators is a bit hacky.
403           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
404                                   NextTok->OriginalColumn == 0;
405 
406           // If there is a comma, semicolon or right paren after the closing
407           // brace, we assume this is a braced initializer list.  Note that
408           // regardless how we mark inner braces here, we will overwrite the
409           // BlockKind later if we parse a braced list (where all blocks
410           // inside are by default braced lists), or when we explicitly detect
411           // blocks (for example while parsing lambdas).
412           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
413           // braced list in JS.
414           ProbablyBracedList =
415               (Style.Language == FormatStyle::LK_JavaScript &&
416                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
417                                 Keywords.kw_as)) ||
418               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
419               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
420                                tok::r_paren, tok::r_square, tok::l_brace,
421                                tok::l_square, tok::ellipsis) ||
422               (NextTok->is(tok::identifier) &&
423                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
424               (NextTok->is(tok::semi) &&
425                (!ExpectClassBody || LBraceStack.size() != 1)) ||
426               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
427         }
428         if (ProbablyBracedList) {
429           Tok->BlockKind = BK_BracedInit;
430           LBraceStack.back()->BlockKind = BK_BracedInit;
431         } else {
432           Tok->BlockKind = BK_Block;
433           LBraceStack.back()->BlockKind = BK_Block;
434         }
435       }
436       LBraceStack.pop_back();
437       break;
438     case tok::at:
439     case tok::semi:
440     case tok::kw_if:
441     case tok::kw_while:
442     case tok::kw_for:
443     case tok::kw_switch:
444     case tok::kw_try:
445     case tok::kw___try:
446       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
447         LBraceStack.back()->BlockKind = BK_Block;
448       break;
449     default:
450       break;
451     }
452     PrevTok = Tok;
453     Tok = NextTok;
454   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
455 
456   // Assume other blocks for all unclosed opening braces.
457   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
458     if (LBraceStack[i]->BlockKind == BK_Unknown)
459       LBraceStack[i]->BlockKind = BK_Block;
460   }
461 
462   FormatTok = Tokens->setPosition(StoredPosition);
463 }
464 
465 template <class T>
466 static inline void hash_combine(std::size_t &seed, const T &v) {
467   std::hash<T> hasher;
468   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
469 }
470 
471 size_t UnwrappedLineParser::computePPHash() const {
472   size_t h = 0;
473   for (const auto &i : PPStack) {
474     hash_combine(h, size_t(i.Kind));
475     hash_combine(h, i.Line);
476   }
477   return h;
478 }
479 
480 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
481                                      bool MunchSemi) {
482   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
483          "'{' or macro block token expected");
484   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
485   FormatTok->BlockKind = BK_Block;
486 
487   size_t PPStartHash = computePPHash();
488 
489   unsigned InitialLevel = Line->Level;
490   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
491 
492   if (MacroBlock && FormatTok->is(tok::l_paren))
493     parseParens();
494 
495   size_t NbPreprocessorDirectives =
496       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
497   addUnwrappedLine();
498   size_t OpeningLineIndex =
499       CurrentLines->empty()
500           ? (UnwrappedLine::kInvalidIndex)
501           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
502 
503   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
504                                           MustBeDeclaration);
505   if (AddLevel)
506     ++Line->Level;
507   parseLevel(/*HasOpeningBrace=*/true);
508 
509   if (eof())
510     return;
511 
512   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
513                  : !FormatTok->is(tok::r_brace)) {
514     Line->Level = InitialLevel;
515     FormatTok->BlockKind = BK_Block;
516     return;
517   }
518 
519   size_t PPEndHash = computePPHash();
520 
521   // Munch the closing brace.
522   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
523 
524   if (MacroBlock && FormatTok->is(tok::l_paren))
525     parseParens();
526 
527   if (MunchSemi && FormatTok->Tok.is(tok::semi))
528     nextToken();
529   Line->Level = InitialLevel;
530 
531   if (PPStartHash == PPEndHash) {
532     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
533     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
534       // Update the opening line to add the forward reference as well
535       (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
536           CurrentLines->size() - 1;
537     }
538   }
539 }
540 
541 static bool isGoogScope(const UnwrappedLine &Line) {
542   // FIXME: Closure-library specific stuff should not be hard-coded but be
543   // configurable.
544   if (Line.Tokens.size() < 4)
545     return false;
546   auto I = Line.Tokens.begin();
547   if (I->Tok->TokenText != "goog")
548     return false;
549   ++I;
550   if (I->Tok->isNot(tok::period))
551     return false;
552   ++I;
553   if (I->Tok->TokenText != "scope")
554     return false;
555   ++I;
556   return I->Tok->is(tok::l_paren);
557 }
558 
559 static bool isIIFE(const UnwrappedLine &Line,
560                    const AdditionalKeywords &Keywords) {
561   // Look for the start of an immediately invoked anonymous function.
562   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
563   // This is commonly done in JavaScript to create a new, anonymous scope.
564   // Example: (function() { ... })()
565   if (Line.Tokens.size() < 3)
566     return false;
567   auto I = Line.Tokens.begin();
568   if (I->Tok->isNot(tok::l_paren))
569     return false;
570   ++I;
571   if (I->Tok->isNot(Keywords.kw_function))
572     return false;
573   ++I;
574   return I->Tok->is(tok::l_paren);
575 }
576 
577 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
578                                    const FormatToken &InitialToken) {
579   if (InitialToken.is(tok::kw_namespace))
580     return Style.BraceWrapping.AfterNamespace;
581   if (InitialToken.is(tok::kw_class))
582     return Style.BraceWrapping.AfterClass;
583   if (InitialToken.is(tok::kw_union))
584     return Style.BraceWrapping.AfterUnion;
585   if (InitialToken.is(tok::kw_struct))
586     return Style.BraceWrapping.AfterStruct;
587   return false;
588 }
589 
590 void UnwrappedLineParser::parseChildBlock() {
591   FormatTok->BlockKind = BK_Block;
592   nextToken();
593   {
594     bool SkipIndent =
595         (Style.Language == FormatStyle::LK_JavaScript &&
596          (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
597     ScopedLineState LineState(*this);
598     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
599                                             /*MustBeDeclaration=*/false);
600     Line->Level += SkipIndent ? 0 : 1;
601     parseLevel(/*HasOpeningBrace=*/true);
602     flushComments(isOnNewLine(*FormatTok));
603     Line->Level -= SkipIndent ? 0 : 1;
604   }
605   nextToken();
606 }
607 
608 void UnwrappedLineParser::parsePPDirective() {
609   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
610   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
611   nextToken();
612 
613   if (!FormatTok->Tok.getIdentifierInfo()) {
614     parsePPUnknown();
615     return;
616   }
617 
618   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
619   case tok::pp_define:
620     parsePPDefine();
621     return;
622   case tok::pp_if:
623     parsePPIf(/*IfDef=*/false);
624     break;
625   case tok::pp_ifdef:
626   case tok::pp_ifndef:
627     parsePPIf(/*IfDef=*/true);
628     break;
629   case tok::pp_else:
630     parsePPElse();
631     break;
632   case tok::pp_elif:
633     parsePPElIf();
634     break;
635   case tok::pp_endif:
636     parsePPEndIf();
637     break;
638   default:
639     parsePPUnknown();
640     break;
641   }
642 }
643 
644 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
645   size_t Line = CurrentLines->size();
646   if (CurrentLines == &PreprocessorDirectives)
647     Line += Lines.size();
648 
649   if (Unreachable ||
650       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
651     PPStack.push_back({PP_Unreachable, Line});
652   else
653     PPStack.push_back({PP_Conditional, Line});
654 }
655 
656 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
657   ++PPBranchLevel;
658   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
659   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
660     PPLevelBranchIndex.push_back(0);
661     PPLevelBranchCount.push_back(0);
662   }
663   PPChainBranchIndex.push(0);
664   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
665   conditionalCompilationCondition(Unreachable || Skip);
666 }
667 
668 void UnwrappedLineParser::conditionalCompilationAlternative() {
669   if (!PPStack.empty())
670     PPStack.pop_back();
671   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
672   if (!PPChainBranchIndex.empty())
673     ++PPChainBranchIndex.top();
674   conditionalCompilationCondition(
675       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
676       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
677 }
678 
679 void UnwrappedLineParser::conditionalCompilationEnd() {
680   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
681   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
682     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
683       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
684     }
685   }
686   // Guard against #endif's without #if.
687   if (PPBranchLevel > -1)
688     --PPBranchLevel;
689   if (!PPChainBranchIndex.empty())
690     PPChainBranchIndex.pop();
691   if (!PPStack.empty())
692     PPStack.pop_back();
693 }
694 
695 void UnwrappedLineParser::parsePPIf(bool IfDef) {
696   bool IfNDef = FormatTok->is(tok::pp_ifndef);
697   nextToken();
698   bool Unreachable = false;
699   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
700     Unreachable = true;
701   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
702     Unreachable = true;
703   conditionalCompilationStart(Unreachable);
704   FormatToken *IfCondition = FormatTok;
705   // If there's a #ifndef on the first line, and the only lines before it are
706   // comments, it could be an include guard.
707   bool MaybeIncludeGuard = IfNDef;
708   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {
709     for (auto &Line : Lines) {
710       if (!Line.Tokens.front().Tok->is(tok::comment)) {
711         MaybeIncludeGuard = false;
712         IncludeGuardRejected = true;
713         break;
714       }
715     }
716   }
717   --PPBranchLevel;
718   parsePPUnknown();
719   ++PPBranchLevel;
720   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)
721     IfNdefCondition = IfCondition;
722 }
723 
724 void UnwrappedLineParser::parsePPElse() {
725   // If a potential include guard has an #else, it's not an include guard.
726   if (FoundIncludeGuardStart && PPBranchLevel == 0)
727     FoundIncludeGuardStart = false;
728   conditionalCompilationAlternative();
729   if (PPBranchLevel > -1)
730     --PPBranchLevel;
731   parsePPUnknown();
732   ++PPBranchLevel;
733 }
734 
735 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
736 
737 void UnwrappedLineParser::parsePPEndIf() {
738   conditionalCompilationEnd();
739   parsePPUnknown();
740   // If the #endif of a potential include guard is the last thing in the file,
741   // then we count it as a real include guard and subtract one from every
742   // preprocessor indent.
743   unsigned TokenPosition = Tokens->getPosition();
744   FormatToken *PeekNext = AllTokens[TokenPosition];
745   if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) &&
746       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
747     for (auto &Line : Lines)
748       if (Line.InPPDirective && Line.Level > 0)
749         --Line.Level;
750 }
751 
752 void UnwrappedLineParser::parsePPDefine() {
753   nextToken();
754 
755   if (FormatTok->Tok.getKind() != tok::identifier) {
756     parsePPUnknown();
757     return;
758   }
759   if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {
760     FoundIncludeGuardStart = true;
761     for (auto &Line : Lines) {
762       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
763         FoundIncludeGuardStart = false;
764         break;
765       }
766     }
767   }
768   IfNdefCondition = nullptr;
769   nextToken();
770   if (FormatTok->Tok.getKind() == tok::l_paren &&
771       FormatTok->WhitespaceRange.getBegin() ==
772           FormatTok->WhitespaceRange.getEnd()) {
773     parseParens();
774   }
775   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
776     Line->Level += PPBranchLevel + 1;
777   addUnwrappedLine();
778   ++Line->Level;
779 
780   // Errors during a preprocessor directive can only affect the layout of the
781   // preprocessor directive, and thus we ignore them. An alternative approach
782   // would be to use the same approach we use on the file level (no
783   // re-indentation if there was a structural error) within the macro
784   // definition.
785   parseFile();
786 }
787 
788 void UnwrappedLineParser::parsePPUnknown() {
789   do {
790     nextToken();
791   } while (!eof());
792   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
793     Line->Level += PPBranchLevel + 1;
794   addUnwrappedLine();
795   IfNdefCondition = nullptr;
796 }
797 
798 // Here we blacklist certain tokens that are not usually the first token in an
799 // unwrapped line. This is used in attempt to distinguish macro calls without
800 // trailing semicolons from other constructs split to several lines.
801 static bool tokenCanStartNewLine(const clang::Token &Tok) {
802   // Semicolon can be a null-statement, l_square can be a start of a macro or
803   // a C++11 attribute, but this doesn't seem to be common.
804   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
805          Tok.isNot(tok::l_square) &&
806          // Tokens that can only be used as binary operators and a part of
807          // overloaded operator names.
808          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
809          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
810          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
811          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
812          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
813          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
814          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
815          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
816          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
817          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
818          Tok.isNot(tok::lesslessequal) &&
819          // Colon is used in labels, base class lists, initializer lists,
820          // range-based for loops, ternary operator, but should never be the
821          // first token in an unwrapped line.
822          Tok.isNot(tok::colon) &&
823          // 'noexcept' is a trailing annotation.
824          Tok.isNot(tok::kw_noexcept);
825 }
826 
827 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
828                           const FormatToken *FormatTok) {
829   // FIXME: This returns true for C/C++ keywords like 'struct'.
830   return FormatTok->is(tok::identifier) &&
831          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
832           !FormatTok->isOneOf(
833               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
834               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
835               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
836               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
837               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
838               Keywords.kw_instanceof, Keywords.kw_interface,
839               Keywords.kw_throws, Keywords.kw_from));
840 }
841 
842 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
843                                  const FormatToken *FormatTok) {
844   return FormatTok->Tok.isLiteral() ||
845          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
846          mustBeJSIdent(Keywords, FormatTok);
847 }
848 
849 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
850 // when encountered after a value (see mustBeJSIdentOrValue).
851 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
852                            const FormatToken *FormatTok) {
853   return FormatTok->isOneOf(
854       tok::kw_return, Keywords.kw_yield,
855       // conditionals
856       tok::kw_if, tok::kw_else,
857       // loops
858       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
859       // switch/case
860       tok::kw_switch, tok::kw_case,
861       // exceptions
862       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
863       // declaration
864       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
865       Keywords.kw_async, Keywords.kw_function,
866       // import/export
867       Keywords.kw_import, tok::kw_export);
868 }
869 
870 // readTokenWithJavaScriptASI reads the next token and terminates the current
871 // line if JavaScript Automatic Semicolon Insertion must
872 // happen between the current token and the next token.
873 //
874 // This method is conservative - it cannot cover all edge cases of JavaScript,
875 // but only aims to correctly handle certain well known cases. It *must not*
876 // return true in speculative cases.
877 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
878   FormatToken *Previous = FormatTok;
879   readToken();
880   FormatToken *Next = FormatTok;
881 
882   bool IsOnSameLine =
883       CommentsBeforeNextToken.empty()
884           ? Next->NewlinesBefore == 0
885           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
886   if (IsOnSameLine)
887     return;
888 
889   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
890   bool PreviousStartsTemplateExpr =
891       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
892   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
893     // If the token before the previous one is an '@', the previous token is an
894     // annotation and can precede another identifier/value.
895     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
896     if (PrePrevious->is(tok::at))
897       return;
898   }
899   if (Next->is(tok::exclaim) && PreviousMustBeValue)
900     return addUnwrappedLine();
901   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
902   bool NextEndsTemplateExpr =
903       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
904   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
905       (PreviousMustBeValue ||
906        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
907                          tok::minusminus)))
908     return addUnwrappedLine();
909   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
910       isJSDeclOrStmt(Keywords, Next))
911     return addUnwrappedLine();
912 }
913 
914 void UnwrappedLineParser::parseStructuralElement() {
915   assert(!FormatTok->is(tok::l_brace));
916   if (Style.Language == FormatStyle::LK_TableGen &&
917       FormatTok->is(tok::pp_include)) {
918     nextToken();
919     if (FormatTok->is(tok::string_literal))
920       nextToken();
921     addUnwrappedLine();
922     return;
923   }
924   switch (FormatTok->Tok.getKind()) {
925   case tok::at:
926     nextToken();
927     if (FormatTok->Tok.is(tok::l_brace)) {
928       nextToken();
929       parseBracedList();
930       break;
931     }
932     switch (FormatTok->Tok.getObjCKeywordID()) {
933     case tok::objc_public:
934     case tok::objc_protected:
935     case tok::objc_package:
936     case tok::objc_private:
937       return parseAccessSpecifier();
938     case tok::objc_interface:
939     case tok::objc_implementation:
940       return parseObjCInterfaceOrImplementation();
941     case tok::objc_protocol:
942       return parseObjCProtocol();
943     case tok::objc_end:
944       return; // Handled by the caller.
945     case tok::objc_optional:
946     case tok::objc_required:
947       nextToken();
948       addUnwrappedLine();
949       return;
950     case tok::objc_autoreleasepool:
951       nextToken();
952       if (FormatTok->Tok.is(tok::l_brace)) {
953         if (Style.BraceWrapping.AfterObjCDeclaration)
954           addUnwrappedLine();
955         parseBlock(/*MustBeDeclaration=*/false);
956       }
957       addUnwrappedLine();
958       return;
959     case tok::objc_try:
960       // This branch isn't strictly necessary (the kw_try case below would
961       // do this too after the tok::at is parsed above).  But be explicit.
962       parseTryCatch();
963       return;
964     default:
965       break;
966     }
967     break;
968   case tok::kw_asm:
969     nextToken();
970     if (FormatTok->is(tok::l_brace)) {
971       FormatTok->Type = TT_InlineASMBrace;
972       nextToken();
973       while (FormatTok && FormatTok->isNot(tok::eof)) {
974         if (FormatTok->is(tok::r_brace)) {
975           FormatTok->Type = TT_InlineASMBrace;
976           nextToken();
977           addUnwrappedLine();
978           break;
979         }
980         FormatTok->Finalized = true;
981         nextToken();
982       }
983     }
984     break;
985   case tok::kw_namespace:
986     parseNamespace();
987     return;
988   case tok::kw_inline:
989     nextToken();
990     if (FormatTok->Tok.is(tok::kw_namespace)) {
991       parseNamespace();
992       return;
993     }
994     break;
995   case tok::kw_public:
996   case tok::kw_protected:
997   case tok::kw_private:
998     if (Style.Language == FormatStyle::LK_Java ||
999         Style.Language == FormatStyle::LK_JavaScript)
1000       nextToken();
1001     else
1002       parseAccessSpecifier();
1003     return;
1004   case tok::kw_if:
1005     parseIfThenElse();
1006     return;
1007   case tok::kw_for:
1008   case tok::kw_while:
1009     parseForOrWhileLoop();
1010     return;
1011   case tok::kw_do:
1012     parseDoWhile();
1013     return;
1014   case tok::kw_switch:
1015     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1016       // 'switch: string' field declaration.
1017       break;
1018     parseSwitch();
1019     return;
1020   case tok::kw_default:
1021     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1022       // 'default: string' field declaration.
1023       break;
1024     nextToken();
1025     parseLabel();
1026     return;
1027   case tok::kw_case:
1028     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1029       // 'case: string' field declaration.
1030       break;
1031     parseCaseLabel();
1032     return;
1033   case tok::kw_try:
1034   case tok::kw___try:
1035     parseTryCatch();
1036     return;
1037   case tok::kw_extern:
1038     nextToken();
1039     if (FormatTok->Tok.is(tok::string_literal)) {
1040       nextToken();
1041       if (FormatTok->Tok.is(tok::l_brace)) {
1042         if (Style.BraceWrapping.AfterExternBlock) {
1043           addUnwrappedLine();
1044           parseBlock(/*MustBeDeclaration=*/true);
1045         } else {
1046           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1047         }
1048         addUnwrappedLine();
1049         return;
1050       }
1051     }
1052     break;
1053   case tok::kw_export:
1054     if (Style.Language == FormatStyle::LK_JavaScript) {
1055       parseJavaScriptEs6ImportExport();
1056       return;
1057     }
1058     break;
1059   case tok::identifier:
1060     if (FormatTok->is(TT_ForEachMacro)) {
1061       parseForOrWhileLoop();
1062       return;
1063     }
1064     if (FormatTok->is(TT_MacroBlockBegin)) {
1065       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1066                  /*MunchSemi=*/false);
1067       return;
1068     }
1069     if (FormatTok->is(Keywords.kw_import)) {
1070       if (Style.Language == FormatStyle::LK_JavaScript) {
1071         parseJavaScriptEs6ImportExport();
1072         return;
1073       }
1074       if (Style.Language == FormatStyle::LK_Proto) {
1075         nextToken();
1076         if (FormatTok->is(tok::kw_public))
1077           nextToken();
1078         if (!FormatTok->is(tok::string_literal))
1079           return;
1080         nextToken();
1081         if (FormatTok->is(tok::semi))
1082           nextToken();
1083         addUnwrappedLine();
1084         return;
1085       }
1086     }
1087     if (Style.isCpp() &&
1088         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1089                            Keywords.kw_slots, Keywords.kw_qslots)) {
1090       nextToken();
1091       if (FormatTok->is(tok::colon)) {
1092         nextToken();
1093         addUnwrappedLine();
1094         return;
1095       }
1096     }
1097     // In all other cases, parse the declaration.
1098     break;
1099   default:
1100     break;
1101   }
1102   do {
1103     const FormatToken *Previous = getPreviousToken();
1104     switch (FormatTok->Tok.getKind()) {
1105     case tok::at:
1106       nextToken();
1107       if (FormatTok->Tok.is(tok::l_brace)) {
1108         nextToken();
1109         parseBracedList();
1110       }
1111       break;
1112     case tok::kw_enum:
1113       // Ignore if this is part of "template <enum ...".
1114       if (Previous && Previous->is(tok::less)) {
1115         nextToken();
1116         break;
1117       }
1118 
1119       // parseEnum falls through and does not yet add an unwrapped line as an
1120       // enum definition can start a structural element.
1121       if (!parseEnum())
1122         break;
1123       // This only applies for C++.
1124       if (!Style.isCpp()) {
1125         addUnwrappedLine();
1126         return;
1127       }
1128       break;
1129     case tok::kw_typedef:
1130       nextToken();
1131       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1132                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1133         parseEnum();
1134       break;
1135     case tok::kw_struct:
1136     case tok::kw_union:
1137     case tok::kw_class:
1138       // parseRecord falls through and does not yet add an unwrapped line as a
1139       // record declaration or definition can start a structural element.
1140       parseRecord();
1141       // This does not apply for Java and JavaScript.
1142       if (Style.Language == FormatStyle::LK_Java ||
1143           Style.Language == FormatStyle::LK_JavaScript) {
1144         if (FormatTok->is(tok::semi))
1145           nextToken();
1146         addUnwrappedLine();
1147         return;
1148       }
1149       break;
1150     case tok::period:
1151       nextToken();
1152       // In Java, classes have an implicit static member "class".
1153       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1154           FormatTok->is(tok::kw_class))
1155         nextToken();
1156       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1157           FormatTok->Tok.getIdentifierInfo())
1158         // JavaScript only has pseudo keywords, all keywords are allowed to
1159         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1160         nextToken();
1161       break;
1162     case tok::semi:
1163       nextToken();
1164       addUnwrappedLine();
1165       return;
1166     case tok::r_brace:
1167       addUnwrappedLine();
1168       return;
1169     case tok::l_paren:
1170       parseParens();
1171       break;
1172     case tok::kw_operator:
1173       nextToken();
1174       if (FormatTok->isBinaryOperator())
1175         nextToken();
1176       break;
1177     case tok::caret:
1178       nextToken();
1179       if (FormatTok->Tok.isAnyIdentifier() ||
1180           FormatTok->isSimpleTypeSpecifier())
1181         nextToken();
1182       if (FormatTok->is(tok::l_paren))
1183         parseParens();
1184       if (FormatTok->is(tok::l_brace))
1185         parseChildBlock();
1186       break;
1187     case tok::l_brace:
1188       if (!tryToParseBracedList()) {
1189         // A block outside of parentheses must be the last part of a
1190         // structural element.
1191         // FIXME: Figure out cases where this is not true, and add projections
1192         // for them (the one we know is missing are lambdas).
1193         if (Style.BraceWrapping.AfterFunction)
1194           addUnwrappedLine();
1195         FormatTok->Type = TT_FunctionLBrace;
1196         parseBlock(/*MustBeDeclaration=*/false);
1197         addUnwrappedLine();
1198         return;
1199       }
1200       // Otherwise this was a braced init list, and the structural
1201       // element continues.
1202       break;
1203     case tok::kw_try:
1204       // We arrive here when parsing function-try blocks.
1205       parseTryCatch();
1206       return;
1207     case tok::identifier: {
1208       if (FormatTok->is(TT_MacroBlockEnd)) {
1209         addUnwrappedLine();
1210         return;
1211       }
1212 
1213       // Function declarations (as opposed to function expressions) are parsed
1214       // on their own unwrapped line by continuing this loop. Function
1215       // expressions (functions that are not on their own line) must not create
1216       // a new unwrapped line, so they are special cased below.
1217       size_t TokenCount = Line->Tokens.size();
1218       if (Style.Language == FormatStyle::LK_JavaScript &&
1219           FormatTok->is(Keywords.kw_function) &&
1220           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1221                                                      Keywords.kw_async)))) {
1222         tryToParseJSFunction();
1223         break;
1224       }
1225       if ((Style.Language == FormatStyle::LK_JavaScript ||
1226            Style.Language == FormatStyle::LK_Java) &&
1227           FormatTok->is(Keywords.kw_interface)) {
1228         if (Style.Language == FormatStyle::LK_JavaScript) {
1229           // In JavaScript/TypeScript, "interface" can be used as a standalone
1230           // identifier, e.g. in `var interface = 1;`. If "interface" is
1231           // followed by another identifier, it is very like to be an actual
1232           // interface declaration.
1233           unsigned StoredPosition = Tokens->getPosition();
1234           FormatToken *Next = Tokens->getNextToken();
1235           FormatTok = Tokens->setPosition(StoredPosition);
1236           if (Next && !mustBeJSIdent(Keywords, Next)) {
1237             nextToken();
1238             break;
1239           }
1240         }
1241         parseRecord();
1242         addUnwrappedLine();
1243         return;
1244       }
1245 
1246       // See if the following token should start a new unwrapped line.
1247       StringRef Text = FormatTok->TokenText;
1248       nextToken();
1249       if (Line->Tokens.size() == 1 &&
1250           // JS doesn't have macros, and within classes colons indicate fields,
1251           // not labels.
1252           Style.Language != FormatStyle::LK_JavaScript) {
1253         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1254           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1255           parseLabel();
1256           return;
1257         }
1258         // Recognize function-like macro usages without trailing semicolon as
1259         // well as free-standing macros like Q_OBJECT.
1260         bool FunctionLike = FormatTok->is(tok::l_paren);
1261         if (FunctionLike)
1262           parseParens();
1263 
1264         bool FollowedByNewline =
1265             CommentsBeforeNextToken.empty()
1266                 ? FormatTok->NewlinesBefore > 0
1267                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1268 
1269         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1270             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1271           addUnwrappedLine();
1272           return;
1273         }
1274       }
1275       break;
1276     }
1277     case tok::equal:
1278       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1279       // TT_JsFatArrow. The always start an expression or a child block if
1280       // followed by a curly.
1281       if (FormatTok->is(TT_JsFatArrow)) {
1282         nextToken();
1283         if (FormatTok->is(tok::l_brace))
1284           parseChildBlock();
1285         break;
1286       }
1287 
1288       nextToken();
1289       if (FormatTok->Tok.is(tok::l_brace)) {
1290         nextToken();
1291         parseBracedList();
1292       } else if (Style.Language == FormatStyle::LK_Proto &&
1293                FormatTok->Tok.is(tok::less)) {
1294         nextToken();
1295         parseBracedList(/*ContinueOnSemicolons=*/false,
1296                         /*ClosingBraceKind=*/tok::greater);
1297       }
1298       break;
1299     case tok::l_square:
1300       parseSquare();
1301       break;
1302     case tok::kw_new:
1303       parseNew();
1304       break;
1305     default:
1306       nextToken();
1307       break;
1308     }
1309   } while (!eof());
1310 }
1311 
1312 bool UnwrappedLineParser::tryToParseLambda() {
1313   if (!Style.isCpp()) {
1314     nextToken();
1315     return false;
1316   }
1317   assert(FormatTok->is(tok::l_square));
1318   FormatToken &LSquare = *FormatTok;
1319   if (!tryToParseLambdaIntroducer())
1320     return false;
1321 
1322   while (FormatTok->isNot(tok::l_brace)) {
1323     if (FormatTok->isSimpleTypeSpecifier()) {
1324       nextToken();
1325       continue;
1326     }
1327     switch (FormatTok->Tok.getKind()) {
1328     case tok::l_brace:
1329       break;
1330     case tok::l_paren:
1331       parseParens();
1332       break;
1333     case tok::amp:
1334     case tok::star:
1335     case tok::kw_const:
1336     case tok::comma:
1337     case tok::less:
1338     case tok::greater:
1339     case tok::identifier:
1340     case tok::numeric_constant:
1341     case tok::coloncolon:
1342     case tok::kw_mutable:
1343       nextToken();
1344       break;
1345     case tok::arrow:
1346       FormatTok->Type = TT_LambdaArrow;
1347       nextToken();
1348       break;
1349     default:
1350       return true;
1351     }
1352   }
1353   LSquare.Type = TT_LambdaLSquare;
1354   parseChildBlock();
1355   return true;
1356 }
1357 
1358 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1359   const FormatToken* Previous = getPreviousToken();
1360   if (Previous &&
1361       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1362                          tok::kw_delete) ||
1363        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1364     nextToken();
1365     return false;
1366   }
1367   nextToken();
1368   parseSquare(/*LambdaIntroducer=*/true);
1369   return true;
1370 }
1371 
1372 void UnwrappedLineParser::tryToParseJSFunction() {
1373   assert(FormatTok->is(Keywords.kw_function) ||
1374          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1375   if (FormatTok->is(Keywords.kw_async))
1376     nextToken();
1377   // Consume "function".
1378   nextToken();
1379 
1380   // Consume * (generator function). Treat it like C++'s overloaded operators.
1381   if (FormatTok->is(tok::star)) {
1382     FormatTok->Type = TT_OverloadedOperator;
1383     nextToken();
1384   }
1385 
1386   // Consume function name.
1387   if (FormatTok->is(tok::identifier))
1388     nextToken();
1389 
1390   if (FormatTok->isNot(tok::l_paren))
1391     return;
1392 
1393   // Parse formal parameter list.
1394   parseParens();
1395 
1396   if (FormatTok->is(tok::colon)) {
1397     // Parse a type definition.
1398     nextToken();
1399 
1400     // Eat the type declaration. For braced inline object types, balance braces,
1401     // otherwise just parse until finding an l_brace for the function body.
1402     if (FormatTok->is(tok::l_brace))
1403       tryToParseBracedList();
1404     else
1405       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1406         nextToken();
1407   }
1408 
1409   if (FormatTok->is(tok::semi))
1410     return;
1411 
1412   parseChildBlock();
1413 }
1414 
1415 bool UnwrappedLineParser::tryToParseBracedList() {
1416   if (FormatTok->BlockKind == BK_Unknown)
1417     calculateBraceTypes();
1418   assert(FormatTok->BlockKind != BK_Unknown);
1419   if (FormatTok->BlockKind == BK_Block)
1420     return false;
1421   nextToken();
1422   parseBracedList();
1423   return true;
1424 }
1425 
1426 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1427                                           tok::TokenKind ClosingBraceKind) {
1428   bool HasError = false;
1429 
1430   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1431   // replace this by using parseAssigmentExpression() inside.
1432   do {
1433     if (Style.Language == FormatStyle::LK_JavaScript) {
1434       if (FormatTok->is(Keywords.kw_function) ||
1435           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1436         tryToParseJSFunction();
1437         continue;
1438       }
1439       if (FormatTok->is(TT_JsFatArrow)) {
1440         nextToken();
1441         // Fat arrows can be followed by simple expressions or by child blocks
1442         // in curly braces.
1443         if (FormatTok->is(tok::l_brace)) {
1444           parseChildBlock();
1445           continue;
1446         }
1447       }
1448       if (FormatTok->is(tok::l_brace)) {
1449         // Could be a method inside of a braced list `{a() { return 1; }}`.
1450         if (tryToParseBracedList())
1451           continue;
1452         parseChildBlock();
1453       }
1454     }
1455     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1456       nextToken();
1457       return !HasError;
1458     }
1459     switch (FormatTok->Tok.getKind()) {
1460     case tok::caret:
1461       nextToken();
1462       if (FormatTok->is(tok::l_brace)) {
1463         parseChildBlock();
1464       }
1465       break;
1466     case tok::l_square:
1467       tryToParseLambda();
1468       break;
1469     case tok::l_paren:
1470       parseParens();
1471       // JavaScript can just have free standing methods and getters/setters in
1472       // object literals. Detect them by a "{" following ")".
1473       if (Style.Language == FormatStyle::LK_JavaScript) {
1474         if (FormatTok->is(tok::l_brace))
1475           parseChildBlock();
1476         break;
1477       }
1478       break;
1479     case tok::l_brace:
1480       // Assume there are no blocks inside a braced init list apart
1481       // from the ones we explicitly parse out (like lambdas).
1482       FormatTok->BlockKind = BK_BracedInit;
1483       nextToken();
1484       parseBracedList();
1485       break;
1486     case tok::less:
1487       if (Style.Language == FormatStyle::LK_Proto) {
1488         nextToken();
1489         parseBracedList(/*ContinueOnSemicolons=*/false,
1490                         /*ClosingBraceKind=*/tok::greater);
1491       } else {
1492         nextToken();
1493       }
1494       break;
1495     case tok::semi:
1496       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1497       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1498       // used for error recovery if we have otherwise determined that this is
1499       // a braced list.
1500       if (Style.Language == FormatStyle::LK_JavaScript) {
1501         nextToken();
1502         break;
1503       }
1504       HasError = true;
1505       if (!ContinueOnSemicolons)
1506         return !HasError;
1507       nextToken();
1508       break;
1509     case tok::comma:
1510       nextToken();
1511       break;
1512     default:
1513       nextToken();
1514       break;
1515     }
1516   } while (!eof());
1517   return false;
1518 }
1519 
1520 void UnwrappedLineParser::parseParens() {
1521   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1522   nextToken();
1523   do {
1524     switch (FormatTok->Tok.getKind()) {
1525     case tok::l_paren:
1526       parseParens();
1527       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1528         parseChildBlock();
1529       break;
1530     case tok::r_paren:
1531       nextToken();
1532       return;
1533     case tok::r_brace:
1534       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1535       return;
1536     case tok::l_square:
1537       tryToParseLambda();
1538       break;
1539     case tok::l_brace:
1540       if (!tryToParseBracedList())
1541         parseChildBlock();
1542       break;
1543     case tok::at:
1544       nextToken();
1545       if (FormatTok->Tok.is(tok::l_brace)) {
1546         nextToken();
1547         parseBracedList();
1548       }
1549       break;
1550     case tok::kw_class:
1551       if (Style.Language == FormatStyle::LK_JavaScript)
1552         parseRecord(/*ParseAsExpr=*/true);
1553       else
1554         nextToken();
1555       break;
1556     case tok::identifier:
1557       if (Style.Language == FormatStyle::LK_JavaScript &&
1558           (FormatTok->is(Keywords.kw_function) ||
1559            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1560         tryToParseJSFunction();
1561       else
1562         nextToken();
1563       break;
1564     default:
1565       nextToken();
1566       break;
1567     }
1568   } while (!eof());
1569 }
1570 
1571 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1572   if (!LambdaIntroducer) {
1573     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1574     if (tryToParseLambda())
1575       return;
1576   }
1577   do {
1578     switch (FormatTok->Tok.getKind()) {
1579     case tok::l_paren:
1580       parseParens();
1581       break;
1582     case tok::r_square:
1583       nextToken();
1584       return;
1585     case tok::r_brace:
1586       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1587       return;
1588     case tok::l_square:
1589       parseSquare();
1590       break;
1591     case tok::l_brace: {
1592       if (!tryToParseBracedList())
1593         parseChildBlock();
1594       break;
1595     }
1596     case tok::at:
1597       nextToken();
1598       if (FormatTok->Tok.is(tok::l_brace)) {
1599         nextToken();
1600         parseBracedList();
1601       }
1602       break;
1603     default:
1604       nextToken();
1605       break;
1606     }
1607   } while (!eof());
1608 }
1609 
1610 void UnwrappedLineParser::parseIfThenElse() {
1611   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1612   nextToken();
1613   if (FormatTok->Tok.is(tok::kw_constexpr))
1614     nextToken();
1615   if (FormatTok->Tok.is(tok::l_paren))
1616     parseParens();
1617   bool NeedsUnwrappedLine = false;
1618   if (FormatTok->Tok.is(tok::l_brace)) {
1619     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1620     parseBlock(/*MustBeDeclaration=*/false);
1621     if (Style.BraceWrapping.BeforeElse)
1622       addUnwrappedLine();
1623     else
1624       NeedsUnwrappedLine = true;
1625   } else {
1626     addUnwrappedLine();
1627     ++Line->Level;
1628     parseStructuralElement();
1629     --Line->Level;
1630   }
1631   if (FormatTok->Tok.is(tok::kw_else)) {
1632     nextToken();
1633     if (FormatTok->Tok.is(tok::l_brace)) {
1634       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1635       parseBlock(/*MustBeDeclaration=*/false);
1636       addUnwrappedLine();
1637     } else if (FormatTok->Tok.is(tok::kw_if)) {
1638       parseIfThenElse();
1639     } else {
1640       addUnwrappedLine();
1641       ++Line->Level;
1642       parseStructuralElement();
1643       if (FormatTok->is(tok::eof))
1644         addUnwrappedLine();
1645       --Line->Level;
1646     }
1647   } else if (NeedsUnwrappedLine) {
1648     addUnwrappedLine();
1649   }
1650 }
1651 
1652 void UnwrappedLineParser::parseTryCatch() {
1653   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1654   nextToken();
1655   bool NeedsUnwrappedLine = false;
1656   if (FormatTok->is(tok::colon)) {
1657     // We are in a function try block, what comes is an initializer list.
1658     nextToken();
1659     while (FormatTok->is(tok::identifier)) {
1660       nextToken();
1661       if (FormatTok->is(tok::l_paren))
1662         parseParens();
1663       if (FormatTok->is(tok::comma))
1664         nextToken();
1665     }
1666   }
1667   // Parse try with resource.
1668   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1669     parseParens();
1670   }
1671   if (FormatTok->is(tok::l_brace)) {
1672     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1673     parseBlock(/*MustBeDeclaration=*/false);
1674     if (Style.BraceWrapping.BeforeCatch) {
1675       addUnwrappedLine();
1676     } else {
1677       NeedsUnwrappedLine = true;
1678     }
1679   } else if (!FormatTok->is(tok::kw_catch)) {
1680     // The C++ standard requires a compound-statement after a try.
1681     // If there's none, we try to assume there's a structuralElement
1682     // and try to continue.
1683     addUnwrappedLine();
1684     ++Line->Level;
1685     parseStructuralElement();
1686     --Line->Level;
1687   }
1688   while (1) {
1689     if (FormatTok->is(tok::at))
1690       nextToken();
1691     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1692                              tok::kw___finally) ||
1693           ((Style.Language == FormatStyle::LK_Java ||
1694             Style.Language == FormatStyle::LK_JavaScript) &&
1695            FormatTok->is(Keywords.kw_finally)) ||
1696           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1697            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1698       break;
1699     nextToken();
1700     while (FormatTok->isNot(tok::l_brace)) {
1701       if (FormatTok->is(tok::l_paren)) {
1702         parseParens();
1703         continue;
1704       }
1705       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1706         return;
1707       nextToken();
1708     }
1709     NeedsUnwrappedLine = false;
1710     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1711     parseBlock(/*MustBeDeclaration=*/false);
1712     if (Style.BraceWrapping.BeforeCatch)
1713       addUnwrappedLine();
1714     else
1715       NeedsUnwrappedLine = true;
1716   }
1717   if (NeedsUnwrappedLine)
1718     addUnwrappedLine();
1719 }
1720 
1721 void UnwrappedLineParser::parseNamespace() {
1722   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1723 
1724   const FormatToken &InitialToken = *FormatTok;
1725   nextToken();
1726   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1727     nextToken();
1728   if (FormatTok->Tok.is(tok::l_brace)) {
1729     if (ShouldBreakBeforeBrace(Style, InitialToken))
1730       addUnwrappedLine();
1731 
1732     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1733                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1734                      DeclarationScopeStack.size() > 1);
1735     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1736     // Munch the semicolon after a namespace. This is more common than one would
1737     // think. Puttin the semicolon into its own line is very ugly.
1738     if (FormatTok->Tok.is(tok::semi))
1739       nextToken();
1740     addUnwrappedLine();
1741   }
1742   // FIXME: Add error handling.
1743 }
1744 
1745 void UnwrappedLineParser::parseNew() {
1746   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1747   nextToken();
1748   if (Style.Language != FormatStyle::LK_Java)
1749     return;
1750 
1751   // In Java, we can parse everything up to the parens, which aren't optional.
1752   do {
1753     // There should not be a ;, { or } before the new's open paren.
1754     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1755       return;
1756 
1757     // Consume the parens.
1758     if (FormatTok->is(tok::l_paren)) {
1759       parseParens();
1760 
1761       // If there is a class body of an anonymous class, consume that as child.
1762       if (FormatTok->is(tok::l_brace))
1763         parseChildBlock();
1764       return;
1765     }
1766     nextToken();
1767   } while (!eof());
1768 }
1769 
1770 void UnwrappedLineParser::parseForOrWhileLoop() {
1771   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1772          "'for', 'while' or foreach macro expected");
1773   nextToken();
1774   // JS' for await ( ...
1775   if (Style.Language == FormatStyle::LK_JavaScript &&
1776       FormatTok->is(Keywords.kw_await))
1777     nextToken();
1778   if (FormatTok->Tok.is(tok::l_paren))
1779     parseParens();
1780   if (FormatTok->Tok.is(tok::l_brace)) {
1781     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1782     parseBlock(/*MustBeDeclaration=*/false);
1783     addUnwrappedLine();
1784   } else {
1785     addUnwrappedLine();
1786     ++Line->Level;
1787     parseStructuralElement();
1788     --Line->Level;
1789   }
1790 }
1791 
1792 void UnwrappedLineParser::parseDoWhile() {
1793   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1794   nextToken();
1795   if (FormatTok->Tok.is(tok::l_brace)) {
1796     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1797     parseBlock(/*MustBeDeclaration=*/false);
1798     if (Style.BraceWrapping.IndentBraces)
1799       addUnwrappedLine();
1800   } else {
1801     addUnwrappedLine();
1802     ++Line->Level;
1803     parseStructuralElement();
1804     --Line->Level;
1805   }
1806 
1807   // FIXME: Add error handling.
1808   if (!FormatTok->Tok.is(tok::kw_while)) {
1809     addUnwrappedLine();
1810     return;
1811   }
1812 
1813   nextToken();
1814   parseStructuralElement();
1815 }
1816 
1817 void UnwrappedLineParser::parseLabel() {
1818   nextToken();
1819   unsigned OldLineLevel = Line->Level;
1820   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1821     --Line->Level;
1822   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1823     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1824     parseBlock(/*MustBeDeclaration=*/false);
1825     if (FormatTok->Tok.is(tok::kw_break)) {
1826       if (Style.BraceWrapping.AfterControlStatement)
1827         addUnwrappedLine();
1828       parseStructuralElement();
1829     }
1830     addUnwrappedLine();
1831   } else {
1832     if (FormatTok->is(tok::semi))
1833       nextToken();
1834     addUnwrappedLine();
1835   }
1836   Line->Level = OldLineLevel;
1837   if (FormatTok->isNot(tok::l_brace)) {
1838     parseStructuralElement();
1839     addUnwrappedLine();
1840   }
1841 }
1842 
1843 void UnwrappedLineParser::parseCaseLabel() {
1844   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1845   // FIXME: fix handling of complex expressions here.
1846   do {
1847     nextToken();
1848   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1849   parseLabel();
1850 }
1851 
1852 void UnwrappedLineParser::parseSwitch() {
1853   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1854   nextToken();
1855   if (FormatTok->Tok.is(tok::l_paren))
1856     parseParens();
1857   if (FormatTok->Tok.is(tok::l_brace)) {
1858     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1859     parseBlock(/*MustBeDeclaration=*/false);
1860     addUnwrappedLine();
1861   } else {
1862     addUnwrappedLine();
1863     ++Line->Level;
1864     parseStructuralElement();
1865     --Line->Level;
1866   }
1867 }
1868 
1869 void UnwrappedLineParser::parseAccessSpecifier() {
1870   nextToken();
1871   // Understand Qt's slots.
1872   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1873     nextToken();
1874   // Otherwise, we don't know what it is, and we'd better keep the next token.
1875   if (FormatTok->Tok.is(tok::colon))
1876     nextToken();
1877   addUnwrappedLine();
1878 }
1879 
1880 bool UnwrappedLineParser::parseEnum() {
1881   // Won't be 'enum' for NS_ENUMs.
1882   if (FormatTok->Tok.is(tok::kw_enum))
1883     nextToken();
1884 
1885   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1886   // declarations. An "enum" keyword followed by a colon would be a syntax
1887   // error and thus assume it is just an identifier.
1888   if (Style.Language == FormatStyle::LK_JavaScript &&
1889       FormatTok->isOneOf(tok::colon, tok::question))
1890     return false;
1891 
1892   // Eat up enum class ...
1893   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1894     nextToken();
1895 
1896   while (FormatTok->Tok.getIdentifierInfo() ||
1897          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1898                             tok::greater, tok::comma, tok::question)) {
1899     nextToken();
1900     // We can have macros or attributes in between 'enum' and the enum name.
1901     if (FormatTok->is(tok::l_paren))
1902       parseParens();
1903     if (FormatTok->is(tok::identifier)) {
1904       nextToken();
1905       // If there are two identifiers in a row, this is likely an elaborate
1906       // return type. In Java, this can be "implements", etc.
1907       if (Style.isCpp() && FormatTok->is(tok::identifier))
1908         return false;
1909     }
1910   }
1911 
1912   // Just a declaration or something is wrong.
1913   if (FormatTok->isNot(tok::l_brace))
1914     return true;
1915   FormatTok->BlockKind = BK_Block;
1916 
1917   if (Style.Language == FormatStyle::LK_Java) {
1918     // Java enums are different.
1919     parseJavaEnumBody();
1920     return true;
1921   }
1922   if (Style.Language == FormatStyle::LK_Proto) {
1923     parseBlock(/*MustBeDeclaration=*/true);
1924     return true;
1925   }
1926 
1927   // Parse enum body.
1928   nextToken();
1929   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1930   if (HasError) {
1931     if (FormatTok->is(tok::semi))
1932       nextToken();
1933     addUnwrappedLine();
1934   }
1935   return true;
1936 
1937   // There is no addUnwrappedLine() here so that we fall through to parsing a
1938   // structural element afterwards. Thus, in "enum A {} n, m;",
1939   // "} n, m;" will end up in one unwrapped line.
1940 }
1941 
1942 void UnwrappedLineParser::parseJavaEnumBody() {
1943   // Determine whether the enum is simple, i.e. does not have a semicolon or
1944   // constants with class bodies. Simple enums can be formatted like braced
1945   // lists, contracted to a single line, etc.
1946   unsigned StoredPosition = Tokens->getPosition();
1947   bool IsSimple = true;
1948   FormatToken *Tok = Tokens->getNextToken();
1949   while (Tok) {
1950     if (Tok->is(tok::r_brace))
1951       break;
1952     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1953       IsSimple = false;
1954       break;
1955     }
1956     // FIXME: This will also mark enums with braces in the arguments to enum
1957     // constants as "not simple". This is probably fine in practice, though.
1958     Tok = Tokens->getNextToken();
1959   }
1960   FormatTok = Tokens->setPosition(StoredPosition);
1961 
1962   if (IsSimple) {
1963     nextToken();
1964     parseBracedList();
1965     addUnwrappedLine();
1966     return;
1967   }
1968 
1969   // Parse the body of a more complex enum.
1970   // First add a line for everything up to the "{".
1971   nextToken();
1972   addUnwrappedLine();
1973   ++Line->Level;
1974 
1975   // Parse the enum constants.
1976   while (FormatTok) {
1977     if (FormatTok->is(tok::l_brace)) {
1978       // Parse the constant's class body.
1979       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1980                  /*MunchSemi=*/false);
1981     } else if (FormatTok->is(tok::l_paren)) {
1982       parseParens();
1983     } else if (FormatTok->is(tok::comma)) {
1984       nextToken();
1985       addUnwrappedLine();
1986     } else if (FormatTok->is(tok::semi)) {
1987       nextToken();
1988       addUnwrappedLine();
1989       break;
1990     } else if (FormatTok->is(tok::r_brace)) {
1991       addUnwrappedLine();
1992       break;
1993     } else {
1994       nextToken();
1995     }
1996   }
1997 
1998   // Parse the class body after the enum's ";" if any.
1999   parseLevel(/*HasOpeningBrace=*/true);
2000   nextToken();
2001   --Line->Level;
2002   addUnwrappedLine();
2003 }
2004 
2005 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2006   const FormatToken &InitialToken = *FormatTok;
2007   nextToken();
2008 
2009   // The actual identifier can be a nested name specifier, and in macros
2010   // it is often token-pasted.
2011   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2012                             tok::kw___attribute, tok::kw___declspec,
2013                             tok::kw_alignas) ||
2014          ((Style.Language == FormatStyle::LK_Java ||
2015            Style.Language == FormatStyle::LK_JavaScript) &&
2016           FormatTok->isOneOf(tok::period, tok::comma))) {
2017     if (Style.Language == FormatStyle::LK_JavaScript &&
2018         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2019       // JavaScript/TypeScript supports inline object types in
2020       // extends/implements positions:
2021       //     class Foo implements {bar: number} { }
2022       nextToken();
2023       if (FormatTok->is(tok::l_brace)) {
2024         tryToParseBracedList();
2025         continue;
2026       }
2027     }
2028     bool IsNonMacroIdentifier =
2029         FormatTok->is(tok::identifier) &&
2030         FormatTok->TokenText != FormatTok->TokenText.upper();
2031     nextToken();
2032     // We can have macros or attributes in between 'class' and the class name.
2033     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2034       parseParens();
2035   }
2036 
2037   // Note that parsing away template declarations here leads to incorrectly
2038   // accepting function declarations as record declarations.
2039   // In general, we cannot solve this problem. Consider:
2040   // class A<int> B() {}
2041   // which can be a function definition or a class definition when B() is a
2042   // macro. If we find enough real-world cases where this is a problem, we
2043   // can parse for the 'template' keyword in the beginning of the statement,
2044   // and thus rule out the record production in case there is no template
2045   // (this would still leave us with an ambiguity between template function
2046   // and class declarations).
2047   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2048     while (!eof()) {
2049       if (FormatTok->is(tok::l_brace)) {
2050         calculateBraceTypes(/*ExpectClassBody=*/true);
2051         if (!tryToParseBracedList())
2052           break;
2053       }
2054       if (FormatTok->Tok.is(tok::semi))
2055         return;
2056       nextToken();
2057     }
2058   }
2059   if (FormatTok->Tok.is(tok::l_brace)) {
2060     if (ParseAsExpr) {
2061       parseChildBlock();
2062     } else {
2063       if (ShouldBreakBeforeBrace(Style, InitialToken))
2064         addUnwrappedLine();
2065 
2066       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2067                  /*MunchSemi=*/false);
2068     }
2069   }
2070   // There is no addUnwrappedLine() here so that we fall through to parsing a
2071   // structural element afterwards. Thus, in "class A {} n, m;",
2072   // "} n, m;" will end up in one unwrapped line.
2073 }
2074 
2075 void UnwrappedLineParser::parseObjCProtocolList() {
2076   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2077   do
2078     nextToken();
2079   while (!eof() && FormatTok->Tok.isNot(tok::greater));
2080   nextToken(); // Skip '>'.
2081 }
2082 
2083 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2084   do {
2085     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2086       nextToken();
2087       addUnwrappedLine();
2088       break;
2089     }
2090     if (FormatTok->is(tok::l_brace)) {
2091       parseBlock(/*MustBeDeclaration=*/false);
2092       // In ObjC interfaces, nothing should be following the "}".
2093       addUnwrappedLine();
2094     } else if (FormatTok->is(tok::r_brace)) {
2095       // Ignore stray "}". parseStructuralElement doesn't consume them.
2096       nextToken();
2097       addUnwrappedLine();
2098     } else {
2099       parseStructuralElement();
2100     }
2101   } while (!eof());
2102 }
2103 
2104 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2105   nextToken();
2106   nextToken(); // interface name
2107 
2108   // @interface can be followed by either a base class, or a category.
2109   if (FormatTok->Tok.is(tok::colon)) {
2110     nextToken();
2111     nextToken(); // base class name
2112   } else if (FormatTok->Tok.is(tok::l_paren))
2113     // Skip category, if present.
2114     parseParens();
2115 
2116   if (FormatTok->Tok.is(tok::less))
2117     parseObjCProtocolList();
2118 
2119   if (FormatTok->Tok.is(tok::l_brace)) {
2120     if (Style.BraceWrapping.AfterObjCDeclaration)
2121       addUnwrappedLine();
2122     parseBlock(/*MustBeDeclaration=*/true);
2123   }
2124 
2125   // With instance variables, this puts '}' on its own line.  Without instance
2126   // variables, this ends the @interface line.
2127   addUnwrappedLine();
2128 
2129   parseObjCUntilAtEnd();
2130 }
2131 
2132 void UnwrappedLineParser::parseObjCProtocol() {
2133   nextToken();
2134   nextToken(); // protocol name
2135 
2136   if (FormatTok->Tok.is(tok::less))
2137     parseObjCProtocolList();
2138 
2139   // Check for protocol declaration.
2140   if (FormatTok->Tok.is(tok::semi)) {
2141     nextToken();
2142     return addUnwrappedLine();
2143   }
2144 
2145   addUnwrappedLine();
2146   parseObjCUntilAtEnd();
2147 }
2148 
2149 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2150   bool IsImport = FormatTok->is(Keywords.kw_import);
2151   assert(IsImport || FormatTok->is(tok::kw_export));
2152   nextToken();
2153 
2154   // Consume the "default" in "export default class/function".
2155   if (FormatTok->is(tok::kw_default))
2156     nextToken();
2157 
2158   // Consume "async function", "function" and "default function", so that these
2159   // get parsed as free-standing JS functions, i.e. do not require a trailing
2160   // semicolon.
2161   if (FormatTok->is(Keywords.kw_async))
2162     nextToken();
2163   if (FormatTok->is(Keywords.kw_function)) {
2164     nextToken();
2165     return;
2166   }
2167 
2168   // For imports, `export *`, `export {...}`, consume the rest of the line up
2169   // to the terminating `;`. For everything else, just return and continue
2170   // parsing the structural element, i.e. the declaration or expression for
2171   // `export default`.
2172   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2173       !FormatTok->isStringLiteral())
2174     return;
2175 
2176   while (!eof()) {
2177     if (FormatTok->is(tok::semi))
2178       return;
2179     if (Line->Tokens.size() == 0) {
2180       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2181       // import statement should terminate.
2182       return;
2183     }
2184     if (FormatTok->is(tok::l_brace)) {
2185       FormatTok->BlockKind = BK_Block;
2186       nextToken();
2187       parseBracedList();
2188     } else {
2189       nextToken();
2190     }
2191   }
2192 }
2193 
2194 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2195                                                  StringRef Prefix = "") {
2196   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2197                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2198   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2199                                                     E = Line.Tokens.end();
2200        I != E; ++I) {
2201     llvm::dbgs() << I->Tok->Tok.getName() << "["
2202                  << "T=" << I->Tok->Type
2203                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2204   }
2205   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2206                                                     E = Line.Tokens.end();
2207        I != E; ++I) {
2208     const UnwrappedLineNode &Node = *I;
2209     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2210              I = Node.Children.begin(),
2211              E = Node.Children.end();
2212          I != E; ++I) {
2213       printDebugInfo(*I, "\nChild: ");
2214     }
2215   }
2216   llvm::dbgs() << "\n";
2217 }
2218 
2219 void UnwrappedLineParser::addUnwrappedLine() {
2220   if (Line->Tokens.empty())
2221     return;
2222   DEBUG({
2223     if (CurrentLines == &Lines)
2224       printDebugInfo(*Line);
2225   });
2226   CurrentLines->push_back(std::move(*Line));
2227   Line->Tokens.clear();
2228   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2229   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2230     CurrentLines->append(
2231         std::make_move_iterator(PreprocessorDirectives.begin()),
2232         std::make_move_iterator(PreprocessorDirectives.end()));
2233     PreprocessorDirectives.clear();
2234   }
2235 }
2236 
2237 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2238 
2239 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2240   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2241          FormatTok.NewlinesBefore > 0;
2242 }
2243 
2244 // Checks if \p FormatTok is a line comment that continues the line comment
2245 // section on \p Line.
2246 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2247                                         const UnwrappedLine &Line,
2248                                         llvm::Regex &CommentPragmasRegex) {
2249   if (Line.Tokens.empty())
2250     return false;
2251 
2252   StringRef IndentContent = FormatTok.TokenText;
2253   if (FormatTok.TokenText.startswith("//") ||
2254       FormatTok.TokenText.startswith("/*"))
2255     IndentContent = FormatTok.TokenText.substr(2);
2256   if (CommentPragmasRegex.match(IndentContent))
2257     return false;
2258 
2259   // If Line starts with a line comment, then FormatTok continues the comment
2260   // section if its original column is greater or equal to the original start
2261   // column of the line.
2262   //
2263   // Define the min column token of a line as follows: if a line ends in '{' or
2264   // contains a '{' followed by a line comment, then the min column token is
2265   // that '{'. Otherwise, the min column token of the line is the first token of
2266   // the line.
2267   //
2268   // If Line starts with a token other than a line comment, then FormatTok
2269   // continues the comment section if its original column is greater than the
2270   // original start column of the min column token of the line.
2271   //
2272   // For example, the second line comment continues the first in these cases:
2273   //
2274   // // first line
2275   // // second line
2276   //
2277   // and:
2278   //
2279   // // first line
2280   //  // second line
2281   //
2282   // and:
2283   //
2284   // int i; // first line
2285   //  // second line
2286   //
2287   // and:
2288   //
2289   // do { // first line
2290   //      // second line
2291   //   int i;
2292   // } while (true);
2293   //
2294   // and:
2295   //
2296   // enum {
2297   //   a, // first line
2298   //    // second line
2299   //   b
2300   // };
2301   //
2302   // The second line comment doesn't continue the first in these cases:
2303   //
2304   //   // first line
2305   //  // second line
2306   //
2307   // and:
2308   //
2309   // int i; // first line
2310   // // second line
2311   //
2312   // and:
2313   //
2314   // do { // first line
2315   //   // second line
2316   //   int i;
2317   // } while (true);
2318   //
2319   // and:
2320   //
2321   // enum {
2322   //   a, // first line
2323   //   // second line
2324   // };
2325   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2326 
2327   // Scan for '{//'. If found, use the column of '{' as a min column for line
2328   // comment section continuation.
2329   const FormatToken *PreviousToken = nullptr;
2330   for (const UnwrappedLineNode &Node : Line.Tokens) {
2331     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2332         isLineComment(*Node.Tok)) {
2333       MinColumnToken = PreviousToken;
2334       break;
2335     }
2336     PreviousToken = Node.Tok;
2337 
2338     // Grab the last newline preceding a token in this unwrapped line.
2339     if (Node.Tok->NewlinesBefore > 0) {
2340       MinColumnToken = Node.Tok;
2341     }
2342   }
2343   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2344     MinColumnToken = PreviousToken;
2345   }
2346 
2347   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2348                               MinColumnToken);
2349 }
2350 
2351 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2352   bool JustComments = Line->Tokens.empty();
2353   for (SmallVectorImpl<FormatToken *>::const_iterator
2354            I = CommentsBeforeNextToken.begin(),
2355            E = CommentsBeforeNextToken.end();
2356        I != E; ++I) {
2357     // Line comments that belong to the same line comment section are put on the
2358     // same line since later we might want to reflow content between them.
2359     // Additional fine-grained breaking of line comment sections is controlled
2360     // by the class BreakableLineCommentSection in case it is desirable to keep
2361     // several line comment sections in the same unwrapped line.
2362     //
2363     // FIXME: Consider putting separate line comment sections as children to the
2364     // unwrapped line instead.
2365     (*I)->ContinuesLineCommentSection =
2366         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2367     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2368       addUnwrappedLine();
2369     pushToken(*I);
2370   }
2371   if (NewlineBeforeNext && JustComments)
2372     addUnwrappedLine();
2373   CommentsBeforeNextToken.clear();
2374 }
2375 
2376 void UnwrappedLineParser::nextToken(int LevelDifference) {
2377   if (eof())
2378     return;
2379   flushComments(isOnNewLine(*FormatTok));
2380   pushToken(FormatTok);
2381   if (Style.Language != FormatStyle::LK_JavaScript)
2382     readToken(LevelDifference);
2383   else
2384     readTokenWithJavaScriptASI();
2385 }
2386 
2387 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2388   // FIXME: This is a dirty way to access the previous token. Find a better
2389   // solution.
2390   if (!Line || Line->Tokens.empty())
2391     return nullptr;
2392   return Line->Tokens.back().Tok;
2393 }
2394 
2395 void UnwrappedLineParser::distributeComments(
2396     const SmallVectorImpl<FormatToken *> &Comments,
2397     const FormatToken *NextTok) {
2398   // Whether or not a line comment token continues a line is controlled by
2399   // the method continuesLineCommentSection, with the following caveat:
2400   //
2401   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2402   // that each comment line from the trail is aligned with the next token, if
2403   // the next token exists. If a trail exists, the beginning of the maximal
2404   // trail is marked as a start of a new comment section.
2405   //
2406   // For example in this code:
2407   //
2408   // int a; // line about a
2409   //   // line 1 about b
2410   //   // line 2 about b
2411   //   int b;
2412   //
2413   // the two lines about b form a maximal trail, so there are two sections, the
2414   // first one consisting of the single comment "// line about a" and the
2415   // second one consisting of the next two comments.
2416   if (Comments.empty())
2417     return;
2418   bool ShouldPushCommentsInCurrentLine = true;
2419   bool HasTrailAlignedWithNextToken = false;
2420   unsigned StartOfTrailAlignedWithNextToken = 0;
2421   if (NextTok) {
2422     // We are skipping the first element intentionally.
2423     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2424       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2425         HasTrailAlignedWithNextToken = true;
2426         StartOfTrailAlignedWithNextToken = i;
2427       }
2428     }
2429   }
2430   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2431     FormatToken *FormatTok = Comments[i];
2432     if (HasTrailAlignedWithNextToken &&
2433         i == StartOfTrailAlignedWithNextToken) {
2434       FormatTok->ContinuesLineCommentSection = false;
2435     } else {
2436       FormatTok->ContinuesLineCommentSection =
2437           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2438     }
2439     if (!FormatTok->ContinuesLineCommentSection &&
2440         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2441       ShouldPushCommentsInCurrentLine = false;
2442     }
2443     if (ShouldPushCommentsInCurrentLine) {
2444       pushToken(FormatTok);
2445     } else {
2446       CommentsBeforeNextToken.push_back(FormatTok);
2447     }
2448   }
2449 }
2450 
2451 void UnwrappedLineParser::readToken(int LevelDifference) {
2452   SmallVector<FormatToken *, 1> Comments;
2453   do {
2454     FormatTok = Tokens->getNextToken();
2455     assert(FormatTok);
2456     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2457            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2458       distributeComments(Comments, FormatTok);
2459       Comments.clear();
2460       // If there is an unfinished unwrapped line, we flush the preprocessor
2461       // directives only after that unwrapped line was finished later.
2462       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2463       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2464       assert((LevelDifference >= 0 ||
2465               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2466              "LevelDifference makes Line->Level negative");
2467       Line->Level += LevelDifference;
2468       // Comments stored before the preprocessor directive need to be output
2469       // before the preprocessor directive, at the same level as the
2470       // preprocessor directive, as we consider them to apply to the directive.
2471       flushComments(isOnNewLine(*FormatTok));
2472       parsePPDirective();
2473     }
2474     while (FormatTok->Type == TT_ConflictStart ||
2475            FormatTok->Type == TT_ConflictEnd ||
2476            FormatTok->Type == TT_ConflictAlternative) {
2477       if (FormatTok->Type == TT_ConflictStart) {
2478         conditionalCompilationStart(/*Unreachable=*/false);
2479       } else if (FormatTok->Type == TT_ConflictAlternative) {
2480         conditionalCompilationAlternative();
2481       } else if (FormatTok->Type == TT_ConflictEnd) {
2482         conditionalCompilationEnd();
2483       }
2484       FormatTok = Tokens->getNextToken();
2485       FormatTok->MustBreakBefore = true;
2486     }
2487 
2488     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2489         !Line->InPPDirective) {
2490       continue;
2491     }
2492 
2493     if (!FormatTok->Tok.is(tok::comment)) {
2494       distributeComments(Comments, FormatTok);
2495       Comments.clear();
2496       return;
2497     }
2498 
2499     Comments.push_back(FormatTok);
2500   } while (!eof());
2501 
2502   distributeComments(Comments, nullptr);
2503   Comments.clear();
2504 }
2505 
2506 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2507   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2508   if (MustBreakBeforeNextToken) {
2509     Line->Tokens.back().Tok->MustBreakBefore = true;
2510     MustBreakBeforeNextToken = false;
2511   }
2512 }
2513 
2514 } // end namespace format
2515 } // end namespace clang
2516