1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59   return FormatTok.is(tok::comment) &&
60          FormatTok.TokenText.startswith("//");
61 }
62 
63 // Checks if \p FormatTok is a line comment that continues the line comment
64 // \p Previous. The original column of \p MinColumnToken is used to determine
65 // whether \p FormatTok is indented enough to the right to continue \p Previous.
66 static bool continuesLineComment(const FormatToken &FormatTok,
67                                  const FormatToken *Previous,
68                                  const FormatToken *MinColumnToken) {
69   if (!Previous || !MinColumnToken)
70     return false;
71   unsigned MinContinueColumn =
72       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
73   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
74          isLineComment(*Previous) &&
75          FormatTok.OriginalColumn >= MinContinueColumn;
76 }
77 
78 class ScopedMacroState : public FormatTokenSource {
79 public:
80   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
81                    FormatToken *&ResetToken)
82       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
83         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
84         Token(nullptr), PreviousToken(nullptr) {
85     TokenSource = this;
86     Line.Level = 0;
87     Line.InPPDirective = true;
88   }
89 
90   ~ScopedMacroState() override {
91     TokenSource = PreviousTokenSource;
92     ResetToken = Token;
93     Line.InPPDirective = false;
94     Line.Level = PreviousLineLevel;
95   }
96 
97   FormatToken *getNextToken() override {
98     // The \c UnwrappedLineParser guards against this by never calling
99     // \c getNextToken() after it has encountered the first eof token.
100     assert(!eof());
101     PreviousToken = Token;
102     Token = PreviousTokenSource->getNextToken();
103     if (eof())
104       return getFakeEOF();
105     return Token;
106   }
107 
108   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
109 
110   FormatToken *setPosition(unsigned Position) override {
111     PreviousToken = nullptr;
112     Token = PreviousTokenSource->setPosition(Position);
113     return Token;
114   }
115 
116 private:
117   bool eof() {
118     return Token && Token->HasUnescapedNewline &&
119            !continuesLineComment(*Token, PreviousToken,
120                                  /*MinColumnToken=*/PreviousToken);
121   }
122 
123   FormatToken *getFakeEOF() {
124     static bool EOFInitialized = false;
125     static FormatToken FormatTok;
126     if (!EOFInitialized) {
127       FormatTok.Tok.startToken();
128       FormatTok.Tok.setKind(tok::eof);
129       EOFInitialized = true;
130     }
131     return &FormatTok;
132   }
133 
134   UnwrappedLine &Line;
135   FormatTokenSource *&TokenSource;
136   FormatToken *&ResetToken;
137   unsigned PreviousLineLevel;
138   FormatTokenSource *PreviousTokenSource;
139 
140   FormatToken *Token;
141   FormatToken *PreviousToken;
142 };
143 
144 } // end anonymous namespace
145 
146 class ScopedLineState {
147 public:
148   ScopedLineState(UnwrappedLineParser &Parser,
149                   bool SwitchToPreprocessorLines = false)
150       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
151     if (SwitchToPreprocessorLines)
152       Parser.CurrentLines = &Parser.PreprocessorDirectives;
153     else if (!Parser.Line->Tokens.empty())
154       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
155     PreBlockLine = std::move(Parser.Line);
156     Parser.Line = llvm::make_unique<UnwrappedLine>();
157     Parser.Line->Level = PreBlockLine->Level;
158     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
159   }
160 
161   ~ScopedLineState() {
162     if (!Parser.Line->Tokens.empty()) {
163       Parser.addUnwrappedLine();
164     }
165     assert(Parser.Line->Tokens.empty());
166     Parser.Line = std::move(PreBlockLine);
167     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
168       Parser.MustBreakBeforeNextToken = true;
169     Parser.CurrentLines = OriginalLines;
170   }
171 
172 private:
173   UnwrappedLineParser &Parser;
174 
175   std::unique_ptr<UnwrappedLine> PreBlockLine;
176   SmallVectorImpl<UnwrappedLine> *OriginalLines;
177 };
178 
179 class CompoundStatementIndenter {
180 public:
181   CompoundStatementIndenter(UnwrappedLineParser *Parser,
182                             const FormatStyle &Style, unsigned &LineLevel)
183       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
184     if (Style.BraceWrapping.AfterControlStatement)
185       Parser->addUnwrappedLine();
186     if (Style.BraceWrapping.IndentBraces)
187       ++LineLevel;
188   }
189   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
190 
191 private:
192   unsigned &LineLevel;
193   unsigned OldLineLevel;
194 };
195 
196 namespace {
197 
198 class IndexedTokenSource : public FormatTokenSource {
199 public:
200   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
201       : Tokens(Tokens), Position(-1) {}
202 
203   FormatToken *getNextToken() override {
204     ++Position;
205     return Tokens[Position];
206   }
207 
208   unsigned getPosition() override {
209     assert(Position >= 0);
210     return Position;
211   }
212 
213   FormatToken *setPosition(unsigned P) override {
214     Position = P;
215     return Tokens[Position];
216   }
217 
218   void reset() { Position = -1; }
219 
220 private:
221   ArrayRef<FormatToken *> Tokens;
222   int Position;
223 };
224 
225 } // end anonymous namespace
226 
227 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
228                                          const AdditionalKeywords &Keywords,
229                                          ArrayRef<FormatToken *> Tokens,
230                                          UnwrappedLineConsumer &Callback)
231     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
232       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
233       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
234       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
235       IfNdefCondition(nullptr), FoundIncludeGuardStart(false),
236       IncludeGuardRejected(false) {}
237 
238 void UnwrappedLineParser::reset() {
239   PPBranchLevel = -1;
240   IfNdefCondition = nullptr;
241   FoundIncludeGuardStart = false;
242   IncludeGuardRejected = false;
243   Line.reset(new UnwrappedLine);
244   CommentsBeforeNextToken.clear();
245   FormatTok = nullptr;
246   MustBreakBeforeNextToken = false;
247   PreprocessorDirectives.clear();
248   CurrentLines = &Lines;
249   DeclarationScopeStack.clear();
250   PPStack.clear();
251 }
252 
253 void UnwrappedLineParser::parse() {
254   IndexedTokenSource TokenSource(AllTokens);
255   do {
256     DEBUG(llvm::dbgs() << "----\n");
257     reset();
258     Tokens = &TokenSource;
259     TokenSource.reset();
260 
261     readToken();
262     parseFile();
263     // Create line with eof token.
264     pushToken(FormatTok);
265     addUnwrappedLine();
266 
267     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
268                                                   E = Lines.end();
269          I != E; ++I) {
270       Callback.consumeUnwrappedLine(*I);
271     }
272     Callback.finishRun();
273     Lines.clear();
274     while (!PPLevelBranchIndex.empty() &&
275            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
276       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
277       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
278     }
279     if (!PPLevelBranchIndex.empty()) {
280       ++PPLevelBranchIndex.back();
281       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
282       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
283     }
284   } while (!PPLevelBranchIndex.empty());
285 }
286 
287 void UnwrappedLineParser::parseFile() {
288   // The top-level context in a file always has declarations, except for pre-
289   // processor directives and JavaScript files.
290   bool MustBeDeclaration =
291       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
292   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
293                                           MustBeDeclaration);
294   if (Style.Language == FormatStyle::LK_TextProto)
295     parseBracedList();
296   else
297     parseLevel(/*HasOpeningBrace=*/false);
298   // Make sure to format the remaining tokens.
299   flushComments(true);
300   addUnwrappedLine();
301 }
302 
303 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
304   bool SwitchLabelEncountered = false;
305   do {
306     tok::TokenKind kind = FormatTok->Tok.getKind();
307     if (FormatTok->Type == TT_MacroBlockBegin) {
308       kind = tok::l_brace;
309     } else if (FormatTok->Type == TT_MacroBlockEnd) {
310       kind = tok::r_brace;
311     }
312 
313     switch (kind) {
314     case tok::comment:
315       nextToken();
316       addUnwrappedLine();
317       break;
318     case tok::l_brace:
319       // FIXME: Add parameter whether this can happen - if this happens, we must
320       // be in a non-declaration context.
321       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
322         continue;
323       parseBlock(/*MustBeDeclaration=*/false);
324       addUnwrappedLine();
325       break;
326     case tok::r_brace:
327       if (HasOpeningBrace)
328         return;
329       nextToken();
330       addUnwrappedLine();
331       break;
332     case tok::kw_default:
333     case tok::kw_case:
334       if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration) {
335         // A 'case: string' style field declaration.
336         parseStructuralElement();
337         break;
338       }
339       if (!SwitchLabelEncountered &&
340           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
341         ++Line->Level;
342       SwitchLabelEncountered = true;
343       parseStructuralElement();
344       break;
345     default:
346       parseStructuralElement();
347       break;
348     }
349   } while (!eof());
350 }
351 
352 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
353   // We'll parse forward through the tokens until we hit
354   // a closing brace or eof - note that getNextToken() will
355   // parse macros, so this will magically work inside macro
356   // definitions, too.
357   unsigned StoredPosition = Tokens->getPosition();
358   FormatToken *Tok = FormatTok;
359   const FormatToken *PrevTok = getPreviousToken();
360   // Keep a stack of positions of lbrace tokens. We will
361   // update information about whether an lbrace starts a
362   // braced init list or a different block during the loop.
363   SmallVector<FormatToken *, 8> LBraceStack;
364   assert(Tok->Tok.is(tok::l_brace));
365   do {
366     // Get next non-comment token.
367     FormatToken *NextTok;
368     unsigned ReadTokens = 0;
369     do {
370       NextTok = Tokens->getNextToken();
371       ++ReadTokens;
372     } while (NextTok->is(tok::comment));
373 
374     switch (Tok->Tok.getKind()) {
375     case tok::l_brace:
376       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
377         if (PrevTok->is(tok::colon))
378           // A colon indicates this code is in a type, or a braced list
379           // following a label in an object literal ({a: {b: 1}}). The code
380           // below could be confused by semicolons between the individual
381           // members in a type member list, which would normally trigger
382           // BK_Block. In both cases, this must be parsed as an inline braced
383           // init.
384           Tok->BlockKind = BK_BracedInit;
385         else if (PrevTok->is(tok::r_paren))
386           // `) { }` can only occur in function or method declarations in JS.
387           Tok->BlockKind = BK_Block;
388       } else {
389         Tok->BlockKind = BK_Unknown;
390       }
391       LBraceStack.push_back(Tok);
392       break;
393     case tok::r_brace:
394       if (LBraceStack.empty())
395         break;
396       if (LBraceStack.back()->BlockKind == BK_Unknown) {
397         bool ProbablyBracedList = false;
398         if (Style.Language == FormatStyle::LK_Proto) {
399           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
400         } else {
401           // Using OriginalColumn to distinguish between ObjC methods and
402           // binary operators is a bit hacky.
403           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
404                                   NextTok->OriginalColumn == 0;
405 
406           // If there is a comma, semicolon or right paren after the closing
407           // brace, we assume this is a braced initializer list.  Note that
408           // regardless how we mark inner braces here, we will overwrite the
409           // BlockKind later if we parse a braced list (where all blocks
410           // inside are by default braced lists), or when we explicitly detect
411           // blocks (for example while parsing lambdas).
412           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
413           // braced list in JS.
414           ProbablyBracedList =
415               (Style.Language == FormatStyle::LK_JavaScript &&
416                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
417                                 Keywords.kw_as)) ||
418               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
419               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
420                                tok::r_paren, tok::r_square, tok::l_brace,
421                                tok::l_square, tok::ellipsis) ||
422               (NextTok->is(tok::identifier) &&
423                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
424               (NextTok->is(tok::semi) &&
425                (!ExpectClassBody || LBraceStack.size() != 1)) ||
426               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
427         }
428         if (ProbablyBracedList) {
429           Tok->BlockKind = BK_BracedInit;
430           LBraceStack.back()->BlockKind = BK_BracedInit;
431         } else {
432           Tok->BlockKind = BK_Block;
433           LBraceStack.back()->BlockKind = BK_Block;
434         }
435       }
436       LBraceStack.pop_back();
437       break;
438     case tok::at:
439     case tok::semi:
440     case tok::kw_if:
441     case tok::kw_while:
442     case tok::kw_for:
443     case tok::kw_switch:
444     case tok::kw_try:
445     case tok::kw___try:
446       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
447         LBraceStack.back()->BlockKind = BK_Block;
448       break;
449     default:
450       break;
451     }
452     PrevTok = Tok;
453     Tok = NextTok;
454   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
455 
456   // Assume other blocks for all unclosed opening braces.
457   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
458     if (LBraceStack[i]->BlockKind == BK_Unknown)
459       LBraceStack[i]->BlockKind = BK_Block;
460   }
461 
462   FormatTok = Tokens->setPosition(StoredPosition);
463 }
464 
465 template <class T>
466 static inline void hash_combine(std::size_t &seed, const T &v) {
467   std::hash<T> hasher;
468   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
469 }
470 
471 size_t UnwrappedLineParser::computePPHash() const {
472   size_t h = 0;
473   for (const auto &i : PPStack) {
474     hash_combine(h, size_t(i.Kind));
475     hash_combine(h, i.Line);
476   }
477   return h;
478 }
479 
480 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
481                                      bool MunchSemi) {
482   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
483          "'{' or macro block token expected");
484   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
485   FormatTok->BlockKind = BK_Block;
486 
487   size_t PPStartHash = computePPHash();
488 
489   unsigned InitialLevel = Line->Level;
490   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
491 
492   if (MacroBlock && FormatTok->is(tok::l_paren))
493     parseParens();
494 
495   size_t NbPreprocessorDirectives =
496       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
497   addUnwrappedLine();
498   size_t OpeningLineIndex =
499       CurrentLines->empty()
500           ? (UnwrappedLine::kInvalidIndex)
501           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
502 
503   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
504                                           MustBeDeclaration);
505   if (AddLevel)
506     ++Line->Level;
507   parseLevel(/*HasOpeningBrace=*/true);
508 
509   if (eof())
510     return;
511 
512   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
513                  : !FormatTok->is(tok::r_brace)) {
514     Line->Level = InitialLevel;
515     FormatTok->BlockKind = BK_Block;
516     return;
517   }
518 
519   size_t PPEndHash = computePPHash();
520 
521   // Munch the closing brace.
522   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
523 
524   if (MacroBlock && FormatTok->is(tok::l_paren))
525     parseParens();
526 
527   if (MunchSemi && FormatTok->Tok.is(tok::semi))
528     nextToken();
529   Line->Level = InitialLevel;
530 
531   if (PPStartHash == PPEndHash) {
532     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
533     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
534       // Update the opening line to add the forward reference as well
535       (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
536           CurrentLines->size() - 1;
537     }
538   }
539 }
540 
541 static bool isGoogScope(const UnwrappedLine &Line) {
542   // FIXME: Closure-library specific stuff should not be hard-coded but be
543   // configurable.
544   if (Line.Tokens.size() < 4)
545     return false;
546   auto I = Line.Tokens.begin();
547   if (I->Tok->TokenText != "goog")
548     return false;
549   ++I;
550   if (I->Tok->isNot(tok::period))
551     return false;
552   ++I;
553   if (I->Tok->TokenText != "scope")
554     return false;
555   ++I;
556   return I->Tok->is(tok::l_paren);
557 }
558 
559 static bool isIIFE(const UnwrappedLine &Line,
560                    const AdditionalKeywords &Keywords) {
561   // Look for the start of an immediately invoked anonymous function.
562   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
563   // This is commonly done in JavaScript to create a new, anonymous scope.
564   // Example: (function() { ... })()
565   if (Line.Tokens.size() < 3)
566     return false;
567   auto I = Line.Tokens.begin();
568   if (I->Tok->isNot(tok::l_paren))
569     return false;
570   ++I;
571   if (I->Tok->isNot(Keywords.kw_function))
572     return false;
573   ++I;
574   return I->Tok->is(tok::l_paren);
575 }
576 
577 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
578                                    const FormatToken &InitialToken) {
579   if (InitialToken.is(tok::kw_namespace))
580     return Style.BraceWrapping.AfterNamespace;
581   if (InitialToken.is(tok::kw_class))
582     return Style.BraceWrapping.AfterClass;
583   if (InitialToken.is(tok::kw_union))
584     return Style.BraceWrapping.AfterUnion;
585   if (InitialToken.is(tok::kw_struct))
586     return Style.BraceWrapping.AfterStruct;
587   return false;
588 }
589 
590 void UnwrappedLineParser::parseChildBlock() {
591   FormatTok->BlockKind = BK_Block;
592   nextToken();
593   {
594     bool SkipIndent =
595         (Style.Language == FormatStyle::LK_JavaScript &&
596          (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
597     ScopedLineState LineState(*this);
598     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
599                                             /*MustBeDeclaration=*/false);
600     Line->Level += SkipIndent ? 0 : 1;
601     parseLevel(/*HasOpeningBrace=*/true);
602     flushComments(isOnNewLine(*FormatTok));
603     Line->Level -= SkipIndent ? 0 : 1;
604   }
605   nextToken();
606 }
607 
608 void UnwrappedLineParser::parsePPDirective() {
609   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
610   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
611   nextToken();
612 
613   if (!FormatTok->Tok.getIdentifierInfo()) {
614     parsePPUnknown();
615     return;
616   }
617 
618   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
619   case tok::pp_define:
620     parsePPDefine();
621     return;
622   case tok::pp_if:
623     parsePPIf(/*IfDef=*/false);
624     break;
625   case tok::pp_ifdef:
626   case tok::pp_ifndef:
627     parsePPIf(/*IfDef=*/true);
628     break;
629   case tok::pp_else:
630     parsePPElse();
631     break;
632   case tok::pp_elif:
633     parsePPElIf();
634     break;
635   case tok::pp_endif:
636     parsePPEndIf();
637     break;
638   default:
639     parsePPUnknown();
640     break;
641   }
642 }
643 
644 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
645   size_t Line = CurrentLines->size();
646   if (CurrentLines == &PreprocessorDirectives)
647     Line += Lines.size();
648 
649   if (Unreachable ||
650       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
651     PPStack.push_back({PP_Unreachable, Line});
652   else
653     PPStack.push_back({PP_Conditional, Line});
654 }
655 
656 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
657   ++PPBranchLevel;
658   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
659   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
660     PPLevelBranchIndex.push_back(0);
661     PPLevelBranchCount.push_back(0);
662   }
663   PPChainBranchIndex.push(0);
664   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
665   conditionalCompilationCondition(Unreachable || Skip);
666 }
667 
668 void UnwrappedLineParser::conditionalCompilationAlternative() {
669   if (!PPStack.empty())
670     PPStack.pop_back();
671   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
672   if (!PPChainBranchIndex.empty())
673     ++PPChainBranchIndex.top();
674   conditionalCompilationCondition(
675       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
676       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
677 }
678 
679 void UnwrappedLineParser::conditionalCompilationEnd() {
680   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
681   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
682     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
683       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
684     }
685   }
686   // Guard against #endif's without #if.
687   if (PPBranchLevel > -1)
688     --PPBranchLevel;
689   if (!PPChainBranchIndex.empty())
690     PPChainBranchIndex.pop();
691   if (!PPStack.empty())
692     PPStack.pop_back();
693 }
694 
695 void UnwrappedLineParser::parsePPIf(bool IfDef) {
696   bool IfNDef = FormatTok->is(tok::pp_ifndef);
697   nextToken();
698   bool Unreachable = false;
699   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
700     Unreachable = true;
701   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
702     Unreachable = true;
703   conditionalCompilationStart(Unreachable);
704   FormatToken *IfCondition = FormatTok;
705   // If there's a #ifndef on the first line, and the only lines before it are
706   // comments, it could be an include guard.
707   bool MaybeIncludeGuard = IfNDef;
708   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {
709     for (auto &Line : Lines) {
710       if (!Line.Tokens.front().Tok->is(tok::comment)) {
711         MaybeIncludeGuard = false;
712         IncludeGuardRejected = true;
713         break;
714       }
715     }
716   }
717   --PPBranchLevel;
718   parsePPUnknown();
719   ++PPBranchLevel;
720   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)
721     IfNdefCondition = IfCondition;
722 }
723 
724 void UnwrappedLineParser::parsePPElse() {
725   // If a potential include guard has an #else, it's not an include guard.
726   if (FoundIncludeGuardStart && PPBranchLevel == 0)
727     FoundIncludeGuardStart = false;
728   conditionalCompilationAlternative();
729   if (PPBranchLevel > -1)
730     --PPBranchLevel;
731   parsePPUnknown();
732   ++PPBranchLevel;
733 }
734 
735 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
736 
737 void UnwrappedLineParser::parsePPEndIf() {
738   conditionalCompilationEnd();
739   parsePPUnknown();
740   // If the #endif of a potential include guard is the last thing in the file,
741   // then we count it as a real include guard and subtract one from every
742   // preprocessor indent.
743   unsigned TokenPosition = Tokens->getPosition();
744   FormatToken *PeekNext = AllTokens[TokenPosition];
745   if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) &&
746       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
747     for (auto &Line : Lines)
748       if (Line.InPPDirective && Line.Level > 0)
749         --Line.Level;
750 }
751 
752 void UnwrappedLineParser::parsePPDefine() {
753   nextToken();
754 
755   if (FormatTok->Tok.getKind() != tok::identifier) {
756     parsePPUnknown();
757     return;
758   }
759   if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {
760     FoundIncludeGuardStart = true;
761     for (auto &Line : Lines) {
762       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
763         FoundIncludeGuardStart = false;
764         break;
765       }
766     }
767   }
768   IfNdefCondition = nullptr;
769   nextToken();
770   if (FormatTok->Tok.getKind() == tok::l_paren &&
771       FormatTok->WhitespaceRange.getBegin() ==
772           FormatTok->WhitespaceRange.getEnd()) {
773     parseParens();
774   }
775   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
776     Line->Level += PPBranchLevel + 1;
777   addUnwrappedLine();
778   ++Line->Level;
779 
780   // Errors during a preprocessor directive can only affect the layout of the
781   // preprocessor directive, and thus we ignore them. An alternative approach
782   // would be to use the same approach we use on the file level (no
783   // re-indentation if there was a structural error) within the macro
784   // definition.
785   parseFile();
786 }
787 
788 void UnwrappedLineParser::parsePPUnknown() {
789   do {
790     nextToken();
791   } while (!eof());
792   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
793     Line->Level += PPBranchLevel + 1;
794   addUnwrappedLine();
795   IfNdefCondition = nullptr;
796 }
797 
798 // Here we blacklist certain tokens that are not usually the first token in an
799 // unwrapped line. This is used in attempt to distinguish macro calls without
800 // trailing semicolons from other constructs split to several lines.
801 static bool tokenCanStartNewLine(const clang::Token &Tok) {
802   // Semicolon can be a null-statement, l_square can be a start of a macro or
803   // a C++11 attribute, but this doesn't seem to be common.
804   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
805          Tok.isNot(tok::l_square) &&
806          // Tokens that can only be used as binary operators and a part of
807          // overloaded operator names.
808          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
809          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
810          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
811          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
812          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
813          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
814          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
815          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
816          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
817          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
818          Tok.isNot(tok::lesslessequal) &&
819          // Colon is used in labels, base class lists, initializer lists,
820          // range-based for loops, ternary operator, but should never be the
821          // first token in an unwrapped line.
822          Tok.isNot(tok::colon) &&
823          // 'noexcept' is a trailing annotation.
824          Tok.isNot(tok::kw_noexcept);
825 }
826 
827 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
828                           const FormatToken *FormatTok) {
829   // FIXME: This returns true for C/C++ keywords like 'struct'.
830   return FormatTok->is(tok::identifier) &&
831          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
832           !FormatTok->isOneOf(
833               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
834               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
835               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
836               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
837               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
838               Keywords.kw_instanceof, Keywords.kw_interface,
839               Keywords.kw_throws, Keywords.kw_from));
840 }
841 
842 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
843                                  const FormatToken *FormatTok) {
844   return FormatTok->Tok.isLiteral() ||
845          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
846          mustBeJSIdent(Keywords, FormatTok);
847 }
848 
849 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
850 // when encountered after a value (see mustBeJSIdentOrValue).
851 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
852                            const FormatToken *FormatTok) {
853   return FormatTok->isOneOf(
854       tok::kw_return, Keywords.kw_yield,
855       // conditionals
856       tok::kw_if, tok::kw_else,
857       // loops
858       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
859       // switch/case
860       tok::kw_switch, tok::kw_case,
861       // exceptions
862       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
863       // declaration
864       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
865       Keywords.kw_async, Keywords.kw_function,
866       // import/export
867       Keywords.kw_import, tok::kw_export);
868 }
869 
870 // readTokenWithJavaScriptASI reads the next token and terminates the current
871 // line if JavaScript Automatic Semicolon Insertion must
872 // happen between the current token and the next token.
873 //
874 // This method is conservative - it cannot cover all edge cases of JavaScript,
875 // but only aims to correctly handle certain well known cases. It *must not*
876 // return true in speculative cases.
877 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
878   FormatToken *Previous = FormatTok;
879   readToken();
880   FormatToken *Next = FormatTok;
881 
882   bool IsOnSameLine =
883       CommentsBeforeNextToken.empty()
884           ? Next->NewlinesBefore == 0
885           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
886   if (IsOnSameLine)
887     return;
888 
889   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
890   bool PreviousStartsTemplateExpr =
891       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
892   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
893     // If the token before the previous one is an '@', the previous token is an
894     // annotation and can precede another identifier/value.
895     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
896     if (PrePrevious->is(tok::at))
897       return;
898   }
899   if (Next->is(tok::exclaim) && PreviousMustBeValue)
900     return addUnwrappedLine();
901   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
902   bool NextEndsTemplateExpr =
903       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
904   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
905       (PreviousMustBeValue ||
906        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
907                          tok::minusminus)))
908     return addUnwrappedLine();
909   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
910       isJSDeclOrStmt(Keywords, Next))
911     return addUnwrappedLine();
912 }
913 
914 void UnwrappedLineParser::parseStructuralElement() {
915   assert(!FormatTok->is(tok::l_brace));
916   if (Style.Language == FormatStyle::LK_TableGen &&
917       FormatTok->is(tok::pp_include)) {
918     nextToken();
919     if (FormatTok->is(tok::string_literal))
920       nextToken();
921     addUnwrappedLine();
922     return;
923   }
924   switch (FormatTok->Tok.getKind()) {
925   case tok::at:
926     nextToken();
927     if (FormatTok->Tok.is(tok::l_brace)) {
928       nextToken();
929       parseBracedList();
930       break;
931     }
932     switch (FormatTok->Tok.getObjCKeywordID()) {
933     case tok::objc_public:
934     case tok::objc_protected:
935     case tok::objc_package:
936     case tok::objc_private:
937       return parseAccessSpecifier();
938     case tok::objc_interface:
939     case tok::objc_implementation:
940       return parseObjCInterfaceOrImplementation();
941     case tok::objc_protocol:
942       return parseObjCProtocol();
943     case tok::objc_end:
944       return; // Handled by the caller.
945     case tok::objc_optional:
946     case tok::objc_required:
947       nextToken();
948       addUnwrappedLine();
949       return;
950     case tok::objc_autoreleasepool:
951       nextToken();
952       if (FormatTok->Tok.is(tok::l_brace)) {
953         if (Style.BraceWrapping.AfterObjCDeclaration)
954           addUnwrappedLine();
955         parseBlock(/*MustBeDeclaration=*/false);
956       }
957       addUnwrappedLine();
958       return;
959     case tok::objc_try:
960       // This branch isn't strictly necessary (the kw_try case below would
961       // do this too after the tok::at is parsed above).  But be explicit.
962       parseTryCatch();
963       return;
964     default:
965       break;
966     }
967     break;
968   case tok::kw_asm:
969     nextToken();
970     if (FormatTok->is(tok::l_brace)) {
971       FormatTok->Type = TT_InlineASMBrace;
972       nextToken();
973       while (FormatTok && FormatTok->isNot(tok::eof)) {
974         if (FormatTok->is(tok::r_brace)) {
975           FormatTok->Type = TT_InlineASMBrace;
976           nextToken();
977           addUnwrappedLine();
978           break;
979         }
980         FormatTok->Finalized = true;
981         nextToken();
982       }
983     }
984     break;
985   case tok::kw_namespace:
986     parseNamespace();
987     return;
988   case tok::kw_inline:
989     nextToken();
990     if (FormatTok->Tok.is(tok::kw_namespace)) {
991       parseNamespace();
992       return;
993     }
994     break;
995   case tok::kw_public:
996   case tok::kw_protected:
997   case tok::kw_private:
998     if (Style.Language == FormatStyle::LK_Java ||
999         Style.Language == FormatStyle::LK_JavaScript)
1000       nextToken();
1001     else
1002       parseAccessSpecifier();
1003     return;
1004   case tok::kw_if:
1005     parseIfThenElse();
1006     return;
1007   case tok::kw_for:
1008   case tok::kw_while:
1009     parseForOrWhileLoop();
1010     return;
1011   case tok::kw_do:
1012     parseDoWhile();
1013     return;
1014   case tok::kw_switch:
1015     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1016       // 'switch: string' field declaration.
1017       break;
1018     parseSwitch();
1019     return;
1020   case tok::kw_default:
1021     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1022       // 'default: string' field declaration.
1023       break;
1024     nextToken();
1025     parseLabel();
1026     return;
1027   case tok::kw_case:
1028     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1029       // 'case: string' field declaration.
1030       break;
1031     parseCaseLabel();
1032     return;
1033   case tok::kw_try:
1034   case tok::kw___try:
1035     parseTryCatch();
1036     return;
1037   case tok::kw_extern:
1038     nextToken();
1039     if (FormatTok->Tok.is(tok::string_literal)) {
1040       nextToken();
1041       if (FormatTok->Tok.is(tok::l_brace)) {
1042         if (Style.BraceWrapping.AfterExternBlock) {
1043           addUnwrappedLine();
1044           parseBlock(/*MustBeDeclaration=*/true);
1045         } else {
1046           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1047         }
1048         addUnwrappedLine();
1049         return;
1050       }
1051     }
1052     break;
1053   case tok::kw_export:
1054     if (Style.Language == FormatStyle::LK_JavaScript) {
1055       parseJavaScriptEs6ImportExport();
1056       return;
1057     }
1058     break;
1059   case tok::identifier:
1060     if (FormatTok->is(TT_ForEachMacro)) {
1061       parseForOrWhileLoop();
1062       return;
1063     }
1064     if (FormatTok->is(TT_MacroBlockBegin)) {
1065       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1066                  /*MunchSemi=*/false);
1067       return;
1068     }
1069     if (FormatTok->is(Keywords.kw_import)) {
1070       if (Style.Language == FormatStyle::LK_JavaScript) {
1071         parseJavaScriptEs6ImportExport();
1072         return;
1073       }
1074       if (Style.Language == FormatStyle::LK_Proto) {
1075         nextToken();
1076         if (FormatTok->is(tok::kw_public))
1077           nextToken();
1078         if (!FormatTok->is(tok::string_literal))
1079           return;
1080         nextToken();
1081         if (FormatTok->is(tok::semi))
1082           nextToken();
1083         addUnwrappedLine();
1084         return;
1085       }
1086     }
1087     if (Style.isCpp() &&
1088         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1089                            Keywords.kw_slots, Keywords.kw_qslots)) {
1090       nextToken();
1091       if (FormatTok->is(tok::colon)) {
1092         nextToken();
1093         addUnwrappedLine();
1094         return;
1095       }
1096     }
1097     // In all other cases, parse the declaration.
1098     break;
1099   default:
1100     break;
1101   }
1102   do {
1103     const FormatToken *Previous = getPreviousToken();
1104     switch (FormatTok->Tok.getKind()) {
1105     case tok::at:
1106       nextToken();
1107       if (FormatTok->Tok.is(tok::l_brace)) {
1108         nextToken();
1109         parseBracedList();
1110       }
1111       break;
1112     case tok::kw_enum:
1113       // Ignore if this is part of "template <enum ...".
1114       if (Previous && Previous->is(tok::less)) {
1115         nextToken();
1116         break;
1117       }
1118 
1119       // parseEnum falls through and does not yet add an unwrapped line as an
1120       // enum definition can start a structural element.
1121       if (!parseEnum())
1122         break;
1123       // This only applies for C++.
1124       if (!Style.isCpp()) {
1125         addUnwrappedLine();
1126         return;
1127       }
1128       break;
1129     case tok::kw_typedef:
1130       nextToken();
1131       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1132                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1133         parseEnum();
1134       break;
1135     case tok::kw_struct:
1136     case tok::kw_union:
1137     case tok::kw_class:
1138       // parseRecord falls through and does not yet add an unwrapped line as a
1139       // record declaration or definition can start a structural element.
1140       parseRecord();
1141       // This does not apply for Java and JavaScript.
1142       if (Style.Language == FormatStyle::LK_Java ||
1143           Style.Language == FormatStyle::LK_JavaScript) {
1144         if (FormatTok->is(tok::semi))
1145           nextToken();
1146         addUnwrappedLine();
1147         return;
1148       }
1149       break;
1150     case tok::period:
1151       nextToken();
1152       // In Java, classes have an implicit static member "class".
1153       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1154           FormatTok->is(tok::kw_class))
1155         nextToken();
1156       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1157           FormatTok->Tok.getIdentifierInfo())
1158         // JavaScript only has pseudo keywords, all keywords are allowed to
1159         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1160         nextToken();
1161       break;
1162     case tok::semi:
1163       nextToken();
1164       addUnwrappedLine();
1165       return;
1166     case tok::r_brace:
1167       addUnwrappedLine();
1168       return;
1169     case tok::l_paren:
1170       parseParens();
1171       break;
1172     case tok::kw_operator:
1173       nextToken();
1174       if (FormatTok->isBinaryOperator())
1175         nextToken();
1176       break;
1177     case tok::caret:
1178       nextToken();
1179       if (FormatTok->Tok.isAnyIdentifier() ||
1180           FormatTok->isSimpleTypeSpecifier())
1181         nextToken();
1182       if (FormatTok->is(tok::l_paren))
1183         parseParens();
1184       if (FormatTok->is(tok::l_brace))
1185         parseChildBlock();
1186       break;
1187     case tok::l_brace:
1188       if (!tryToParseBracedList()) {
1189         // A block outside of parentheses must be the last part of a
1190         // structural element.
1191         // FIXME: Figure out cases where this is not true, and add projections
1192         // for them (the one we know is missing are lambdas).
1193         if (Style.BraceWrapping.AfterFunction)
1194           addUnwrappedLine();
1195         FormatTok->Type = TT_FunctionLBrace;
1196         parseBlock(/*MustBeDeclaration=*/false);
1197         addUnwrappedLine();
1198         return;
1199       }
1200       // Otherwise this was a braced init list, and the structural
1201       // element continues.
1202       break;
1203     case tok::kw_try:
1204       // We arrive here when parsing function-try blocks.
1205       parseTryCatch();
1206       return;
1207     case tok::identifier: {
1208       if (FormatTok->is(TT_MacroBlockEnd)) {
1209         addUnwrappedLine();
1210         return;
1211       }
1212 
1213       // Function declarations (as opposed to function expressions) are parsed
1214       // on their own unwrapped line by continuing this loop. Function
1215       // expressions (functions that are not on their own line) must not create
1216       // a new unwrapped line, so they are special cased below.
1217       size_t TokenCount = Line->Tokens.size();
1218       if (Style.Language == FormatStyle::LK_JavaScript &&
1219           FormatTok->is(Keywords.kw_function) &&
1220           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1221                                                      Keywords.kw_async)))) {
1222         tryToParseJSFunction();
1223         break;
1224       }
1225       if ((Style.Language == FormatStyle::LK_JavaScript ||
1226            Style.Language == FormatStyle::LK_Java) &&
1227           FormatTok->is(Keywords.kw_interface)) {
1228         if (Style.Language == FormatStyle::LK_JavaScript) {
1229           // In JavaScript/TypeScript, "interface" can be used as a standalone
1230           // identifier, e.g. in `var interface = 1;`. If "interface" is
1231           // followed by another identifier, it is very like to be an actual
1232           // interface declaration.
1233           unsigned StoredPosition = Tokens->getPosition();
1234           FormatToken *Next = Tokens->getNextToken();
1235           FormatTok = Tokens->setPosition(StoredPosition);
1236           if (Next && !mustBeJSIdent(Keywords, Next)) {
1237             nextToken();
1238             break;
1239           }
1240         }
1241         parseRecord();
1242         addUnwrappedLine();
1243         return;
1244       }
1245 
1246       // See if the following token should start a new unwrapped line.
1247       StringRef Text = FormatTok->TokenText;
1248       nextToken();
1249       if (Line->Tokens.size() == 1 &&
1250           // JS doesn't have macros, and within classes colons indicate fields,
1251           // not labels.
1252           Style.Language != FormatStyle::LK_JavaScript) {
1253         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1254           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1255           parseLabel();
1256           return;
1257         }
1258         // Recognize function-like macro usages without trailing semicolon as
1259         // well as free-standing macros like Q_OBJECT.
1260         bool FunctionLike = FormatTok->is(tok::l_paren);
1261         if (FunctionLike)
1262           parseParens();
1263 
1264         bool FollowedByNewline =
1265             CommentsBeforeNextToken.empty()
1266                 ? FormatTok->NewlinesBefore > 0
1267                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1268 
1269         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1270             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1271           addUnwrappedLine();
1272           return;
1273         }
1274       }
1275       break;
1276     }
1277     case tok::equal:
1278       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1279       // TT_JsFatArrow. The always start an expression or a child block if
1280       // followed by a curly.
1281       if (FormatTok->is(TT_JsFatArrow)) {
1282         nextToken();
1283         if (FormatTok->is(tok::l_brace))
1284           parseChildBlock();
1285         break;
1286       }
1287 
1288       nextToken();
1289       if (FormatTok->Tok.is(tok::l_brace)) {
1290         nextToken();
1291         parseBracedList();
1292       } else if (Style.Language == FormatStyle::LK_Proto &&
1293                FormatTok->Tok.is(tok::less)) {
1294         nextToken();
1295         parseBracedList(/*ContinueOnSemicolons=*/false,
1296                         /*ClosingBraceKind=*/tok::greater);
1297       }
1298       break;
1299     case tok::l_square:
1300       parseSquare();
1301       break;
1302     case tok::kw_new:
1303       parseNew();
1304       break;
1305     default:
1306       nextToken();
1307       break;
1308     }
1309   } while (!eof());
1310 }
1311 
1312 bool UnwrappedLineParser::tryToParseLambda() {
1313   if (!Style.isCpp()) {
1314     nextToken();
1315     return false;
1316   }
1317   const FormatToken* Previous = getPreviousToken();
1318   if (Previous &&
1319       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1320                          tok::kw_delete) ||
1321        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1322     nextToken();
1323     return false;
1324   }
1325   assert(FormatTok->is(tok::l_square));
1326   FormatToken &LSquare = *FormatTok;
1327   if (!tryToParseLambdaIntroducer())
1328     return false;
1329 
1330   while (FormatTok->isNot(tok::l_brace)) {
1331     if (FormatTok->isSimpleTypeSpecifier()) {
1332       nextToken();
1333       continue;
1334     }
1335     switch (FormatTok->Tok.getKind()) {
1336     case tok::l_brace:
1337       break;
1338     case tok::l_paren:
1339       parseParens();
1340       break;
1341     case tok::amp:
1342     case tok::star:
1343     case tok::kw_const:
1344     case tok::comma:
1345     case tok::less:
1346     case tok::greater:
1347     case tok::identifier:
1348     case tok::numeric_constant:
1349     case tok::coloncolon:
1350     case tok::kw_mutable:
1351       nextToken();
1352       break;
1353     case tok::arrow:
1354       FormatTok->Type = TT_LambdaArrow;
1355       nextToken();
1356       break;
1357     default:
1358       return true;
1359     }
1360   }
1361   LSquare.Type = TT_LambdaLSquare;
1362   parseChildBlock();
1363   return true;
1364 }
1365 
1366 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1367   nextToken();
1368   if (FormatTok->is(tok::equal)) {
1369     nextToken();
1370     if (FormatTok->is(tok::r_square)) {
1371       nextToken();
1372       return true;
1373     }
1374     if (FormatTok->isNot(tok::comma))
1375       return false;
1376     nextToken();
1377   } else if (FormatTok->is(tok::amp)) {
1378     nextToken();
1379     if (FormatTok->is(tok::r_square)) {
1380       nextToken();
1381       return true;
1382     }
1383     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1384       return false;
1385     }
1386     if (FormatTok->is(tok::comma))
1387       nextToken();
1388   } else if (FormatTok->is(tok::r_square)) {
1389     nextToken();
1390     return true;
1391   }
1392   do {
1393     if (FormatTok->is(tok::amp))
1394       nextToken();
1395     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1396       return false;
1397     nextToken();
1398     if (FormatTok->is(tok::ellipsis))
1399       nextToken();
1400     if (FormatTok->is(tok::comma)) {
1401       nextToken();
1402     } else if (FormatTok->is(tok::r_square)) {
1403       nextToken();
1404       return true;
1405     } else {
1406       return false;
1407     }
1408   } while (!eof());
1409   return false;
1410 }
1411 
1412 void UnwrappedLineParser::tryToParseJSFunction() {
1413   assert(FormatTok->is(Keywords.kw_function) ||
1414          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1415   if (FormatTok->is(Keywords.kw_async))
1416     nextToken();
1417   // Consume "function".
1418   nextToken();
1419 
1420   // Consume * (generator function). Treat it like C++'s overloaded operators.
1421   if (FormatTok->is(tok::star)) {
1422     FormatTok->Type = TT_OverloadedOperator;
1423     nextToken();
1424   }
1425 
1426   // Consume function name.
1427   if (FormatTok->is(tok::identifier))
1428     nextToken();
1429 
1430   if (FormatTok->isNot(tok::l_paren))
1431     return;
1432 
1433   // Parse formal parameter list.
1434   parseParens();
1435 
1436   if (FormatTok->is(tok::colon)) {
1437     // Parse a type definition.
1438     nextToken();
1439 
1440     // Eat the type declaration. For braced inline object types, balance braces,
1441     // otherwise just parse until finding an l_brace for the function body.
1442     if (FormatTok->is(tok::l_brace))
1443       tryToParseBracedList();
1444     else
1445       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1446         nextToken();
1447   }
1448 
1449   if (FormatTok->is(tok::semi))
1450     return;
1451 
1452   parseChildBlock();
1453 }
1454 
1455 bool UnwrappedLineParser::tryToParseBracedList() {
1456   if (FormatTok->BlockKind == BK_Unknown)
1457     calculateBraceTypes();
1458   assert(FormatTok->BlockKind != BK_Unknown);
1459   if (FormatTok->BlockKind == BK_Block)
1460     return false;
1461   nextToken();
1462   parseBracedList();
1463   return true;
1464 }
1465 
1466 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1467                                           tok::TokenKind ClosingBraceKind) {
1468   bool HasError = false;
1469 
1470   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1471   // replace this by using parseAssigmentExpression() inside.
1472   do {
1473     if (Style.Language == FormatStyle::LK_JavaScript) {
1474       if (FormatTok->is(Keywords.kw_function) ||
1475           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1476         tryToParseJSFunction();
1477         continue;
1478       }
1479       if (FormatTok->is(TT_JsFatArrow)) {
1480         nextToken();
1481         // Fat arrows can be followed by simple expressions or by child blocks
1482         // in curly braces.
1483         if (FormatTok->is(tok::l_brace)) {
1484           parseChildBlock();
1485           continue;
1486         }
1487       }
1488       if (FormatTok->is(tok::l_brace)) {
1489         // Could be a method inside of a braced list `{a() { return 1; }}`.
1490         if (tryToParseBracedList())
1491           continue;
1492         parseChildBlock();
1493       }
1494     }
1495     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1496       nextToken();
1497       return !HasError;
1498     }
1499     switch (FormatTok->Tok.getKind()) {
1500     case tok::caret:
1501       nextToken();
1502       if (FormatTok->is(tok::l_brace)) {
1503         parseChildBlock();
1504       }
1505       break;
1506     case tok::l_square:
1507       tryToParseLambda();
1508       break;
1509     case tok::l_paren:
1510       parseParens();
1511       // JavaScript can just have free standing methods and getters/setters in
1512       // object literals. Detect them by a "{" following ")".
1513       if (Style.Language == FormatStyle::LK_JavaScript) {
1514         if (FormatTok->is(tok::l_brace))
1515           parseChildBlock();
1516         break;
1517       }
1518       break;
1519     case tok::l_brace:
1520       // Assume there are no blocks inside a braced init list apart
1521       // from the ones we explicitly parse out (like lambdas).
1522       FormatTok->BlockKind = BK_BracedInit;
1523       nextToken();
1524       parseBracedList();
1525       break;
1526     case tok::less:
1527       if (Style.Language == FormatStyle::LK_Proto) {
1528         nextToken();
1529         parseBracedList(/*ContinueOnSemicolons=*/false,
1530                         /*ClosingBraceKind=*/tok::greater);
1531       } else {
1532         nextToken();
1533       }
1534       break;
1535     case tok::semi:
1536       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1537       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1538       // used for error recovery if we have otherwise determined that this is
1539       // a braced list.
1540       if (Style.Language == FormatStyle::LK_JavaScript) {
1541         nextToken();
1542         break;
1543       }
1544       HasError = true;
1545       if (!ContinueOnSemicolons)
1546         return !HasError;
1547       nextToken();
1548       break;
1549     case tok::comma:
1550       nextToken();
1551       break;
1552     default:
1553       nextToken();
1554       break;
1555     }
1556   } while (!eof());
1557   return false;
1558 }
1559 
1560 void UnwrappedLineParser::parseParens() {
1561   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1562   nextToken();
1563   do {
1564     switch (FormatTok->Tok.getKind()) {
1565     case tok::l_paren:
1566       parseParens();
1567       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1568         parseChildBlock();
1569       break;
1570     case tok::r_paren:
1571       nextToken();
1572       return;
1573     case tok::r_brace:
1574       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1575       return;
1576     case tok::l_square:
1577       tryToParseLambda();
1578       break;
1579     case tok::l_brace:
1580       if (!tryToParseBracedList())
1581         parseChildBlock();
1582       break;
1583     case tok::at:
1584       nextToken();
1585       if (FormatTok->Tok.is(tok::l_brace)) {
1586         nextToken();
1587         parseBracedList();
1588       }
1589       break;
1590     case tok::kw_class:
1591       if (Style.Language == FormatStyle::LK_JavaScript)
1592         parseRecord(/*ParseAsExpr=*/true);
1593       else
1594         nextToken();
1595       break;
1596     case tok::identifier:
1597       if (Style.Language == FormatStyle::LK_JavaScript &&
1598           (FormatTok->is(Keywords.kw_function) ||
1599            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1600         tryToParseJSFunction();
1601       else
1602         nextToken();
1603       break;
1604     default:
1605       nextToken();
1606       break;
1607     }
1608   } while (!eof());
1609 }
1610 
1611 void UnwrappedLineParser::parseSquare() {
1612   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1613   if (tryToParseLambda())
1614     return;
1615   do {
1616     switch (FormatTok->Tok.getKind()) {
1617     case tok::l_paren:
1618       parseParens();
1619       break;
1620     case tok::r_square:
1621       nextToken();
1622       return;
1623     case tok::r_brace:
1624       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1625       return;
1626     case tok::l_square:
1627       parseSquare();
1628       break;
1629     case tok::l_brace: {
1630       if (!tryToParseBracedList())
1631         parseChildBlock();
1632       break;
1633     }
1634     case tok::at:
1635       nextToken();
1636       if (FormatTok->Tok.is(tok::l_brace)) {
1637         nextToken();
1638         parseBracedList();
1639       }
1640       break;
1641     default:
1642       nextToken();
1643       break;
1644     }
1645   } while (!eof());
1646 }
1647 
1648 void UnwrappedLineParser::parseIfThenElse() {
1649   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1650   nextToken();
1651   if (FormatTok->Tok.is(tok::kw_constexpr))
1652     nextToken();
1653   if (FormatTok->Tok.is(tok::l_paren))
1654     parseParens();
1655   bool NeedsUnwrappedLine = false;
1656   if (FormatTok->Tok.is(tok::l_brace)) {
1657     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1658     parseBlock(/*MustBeDeclaration=*/false);
1659     if (Style.BraceWrapping.BeforeElse)
1660       addUnwrappedLine();
1661     else
1662       NeedsUnwrappedLine = true;
1663   } else {
1664     addUnwrappedLine();
1665     ++Line->Level;
1666     parseStructuralElement();
1667     --Line->Level;
1668   }
1669   if (FormatTok->Tok.is(tok::kw_else)) {
1670     nextToken();
1671     if (FormatTok->Tok.is(tok::l_brace)) {
1672       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1673       parseBlock(/*MustBeDeclaration=*/false);
1674       addUnwrappedLine();
1675     } else if (FormatTok->Tok.is(tok::kw_if)) {
1676       parseIfThenElse();
1677     } else {
1678       addUnwrappedLine();
1679       ++Line->Level;
1680       parseStructuralElement();
1681       if (FormatTok->is(tok::eof))
1682         addUnwrappedLine();
1683       --Line->Level;
1684     }
1685   } else if (NeedsUnwrappedLine) {
1686     addUnwrappedLine();
1687   }
1688 }
1689 
1690 void UnwrappedLineParser::parseTryCatch() {
1691   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1692   nextToken();
1693   bool NeedsUnwrappedLine = false;
1694   if (FormatTok->is(tok::colon)) {
1695     // We are in a function try block, what comes is an initializer list.
1696     nextToken();
1697     while (FormatTok->is(tok::identifier)) {
1698       nextToken();
1699       if (FormatTok->is(tok::l_paren))
1700         parseParens();
1701       if (FormatTok->is(tok::comma))
1702         nextToken();
1703     }
1704   }
1705   // Parse try with resource.
1706   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1707     parseParens();
1708   }
1709   if (FormatTok->is(tok::l_brace)) {
1710     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1711     parseBlock(/*MustBeDeclaration=*/false);
1712     if (Style.BraceWrapping.BeforeCatch) {
1713       addUnwrappedLine();
1714     } else {
1715       NeedsUnwrappedLine = true;
1716     }
1717   } else if (!FormatTok->is(tok::kw_catch)) {
1718     // The C++ standard requires a compound-statement after a try.
1719     // If there's none, we try to assume there's a structuralElement
1720     // and try to continue.
1721     addUnwrappedLine();
1722     ++Line->Level;
1723     parseStructuralElement();
1724     --Line->Level;
1725   }
1726   while (1) {
1727     if (FormatTok->is(tok::at))
1728       nextToken();
1729     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1730                              tok::kw___finally) ||
1731           ((Style.Language == FormatStyle::LK_Java ||
1732             Style.Language == FormatStyle::LK_JavaScript) &&
1733            FormatTok->is(Keywords.kw_finally)) ||
1734           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1735            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1736       break;
1737     nextToken();
1738     while (FormatTok->isNot(tok::l_brace)) {
1739       if (FormatTok->is(tok::l_paren)) {
1740         parseParens();
1741         continue;
1742       }
1743       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1744         return;
1745       nextToken();
1746     }
1747     NeedsUnwrappedLine = false;
1748     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1749     parseBlock(/*MustBeDeclaration=*/false);
1750     if (Style.BraceWrapping.BeforeCatch)
1751       addUnwrappedLine();
1752     else
1753       NeedsUnwrappedLine = true;
1754   }
1755   if (NeedsUnwrappedLine)
1756     addUnwrappedLine();
1757 }
1758 
1759 void UnwrappedLineParser::parseNamespace() {
1760   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1761 
1762   const FormatToken &InitialToken = *FormatTok;
1763   nextToken();
1764   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1765     nextToken();
1766   if (FormatTok->Tok.is(tok::l_brace)) {
1767     if (ShouldBreakBeforeBrace(Style, InitialToken))
1768       addUnwrappedLine();
1769 
1770     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1771                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1772                      DeclarationScopeStack.size() > 1);
1773     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1774     // Munch the semicolon after a namespace. This is more common than one would
1775     // think. Puttin the semicolon into its own line is very ugly.
1776     if (FormatTok->Tok.is(tok::semi))
1777       nextToken();
1778     addUnwrappedLine();
1779   }
1780   // FIXME: Add error handling.
1781 }
1782 
1783 void UnwrappedLineParser::parseNew() {
1784   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1785   nextToken();
1786   if (Style.Language != FormatStyle::LK_Java)
1787     return;
1788 
1789   // In Java, we can parse everything up to the parens, which aren't optional.
1790   do {
1791     // There should not be a ;, { or } before the new's open paren.
1792     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1793       return;
1794 
1795     // Consume the parens.
1796     if (FormatTok->is(tok::l_paren)) {
1797       parseParens();
1798 
1799       // If there is a class body of an anonymous class, consume that as child.
1800       if (FormatTok->is(tok::l_brace))
1801         parseChildBlock();
1802       return;
1803     }
1804     nextToken();
1805   } while (!eof());
1806 }
1807 
1808 void UnwrappedLineParser::parseForOrWhileLoop() {
1809   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1810          "'for', 'while' or foreach macro expected");
1811   nextToken();
1812   // JS' for await ( ...
1813   if (Style.Language == FormatStyle::LK_JavaScript &&
1814       FormatTok->is(Keywords.kw_await))
1815     nextToken();
1816   if (FormatTok->Tok.is(tok::l_paren))
1817     parseParens();
1818   if (FormatTok->Tok.is(tok::l_brace)) {
1819     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1820     parseBlock(/*MustBeDeclaration=*/false);
1821     addUnwrappedLine();
1822   } else {
1823     addUnwrappedLine();
1824     ++Line->Level;
1825     parseStructuralElement();
1826     --Line->Level;
1827   }
1828 }
1829 
1830 void UnwrappedLineParser::parseDoWhile() {
1831   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1832   nextToken();
1833   if (FormatTok->Tok.is(tok::l_brace)) {
1834     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1835     parseBlock(/*MustBeDeclaration=*/false);
1836     if (Style.BraceWrapping.IndentBraces)
1837       addUnwrappedLine();
1838   } else {
1839     addUnwrappedLine();
1840     ++Line->Level;
1841     parseStructuralElement();
1842     --Line->Level;
1843   }
1844 
1845   // FIXME: Add error handling.
1846   if (!FormatTok->Tok.is(tok::kw_while)) {
1847     addUnwrappedLine();
1848     return;
1849   }
1850 
1851   nextToken();
1852   parseStructuralElement();
1853 }
1854 
1855 void UnwrappedLineParser::parseLabel() {
1856   nextToken();
1857   unsigned OldLineLevel = Line->Level;
1858   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1859     --Line->Level;
1860   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1861     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1862     parseBlock(/*MustBeDeclaration=*/false);
1863     if (FormatTok->Tok.is(tok::kw_break)) {
1864       if (Style.BraceWrapping.AfterControlStatement)
1865         addUnwrappedLine();
1866       parseStructuralElement();
1867     }
1868     addUnwrappedLine();
1869   } else {
1870     if (FormatTok->is(tok::semi))
1871       nextToken();
1872     addUnwrappedLine();
1873   }
1874   Line->Level = OldLineLevel;
1875   if (FormatTok->isNot(tok::l_brace)) {
1876     parseStructuralElement();
1877     addUnwrappedLine();
1878   }
1879 }
1880 
1881 void UnwrappedLineParser::parseCaseLabel() {
1882   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1883   // FIXME: fix handling of complex expressions here.
1884   do {
1885     nextToken();
1886   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1887   parseLabel();
1888 }
1889 
1890 void UnwrappedLineParser::parseSwitch() {
1891   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1892   nextToken();
1893   if (FormatTok->Tok.is(tok::l_paren))
1894     parseParens();
1895   if (FormatTok->Tok.is(tok::l_brace)) {
1896     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1897     parseBlock(/*MustBeDeclaration=*/false);
1898     addUnwrappedLine();
1899   } else {
1900     addUnwrappedLine();
1901     ++Line->Level;
1902     parseStructuralElement();
1903     --Line->Level;
1904   }
1905 }
1906 
1907 void UnwrappedLineParser::parseAccessSpecifier() {
1908   nextToken();
1909   // Understand Qt's slots.
1910   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1911     nextToken();
1912   // Otherwise, we don't know what it is, and we'd better keep the next token.
1913   if (FormatTok->Tok.is(tok::colon))
1914     nextToken();
1915   addUnwrappedLine();
1916 }
1917 
1918 bool UnwrappedLineParser::parseEnum() {
1919   // Won't be 'enum' for NS_ENUMs.
1920   if (FormatTok->Tok.is(tok::kw_enum))
1921     nextToken();
1922 
1923   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1924   // declarations. An "enum" keyword followed by a colon would be a syntax
1925   // error and thus assume it is just an identifier.
1926   if (Style.Language == FormatStyle::LK_JavaScript &&
1927       FormatTok->isOneOf(tok::colon, tok::question))
1928     return false;
1929 
1930   // Eat up enum class ...
1931   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1932     nextToken();
1933 
1934   while (FormatTok->Tok.getIdentifierInfo() ||
1935          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1936                             tok::greater, tok::comma, tok::question)) {
1937     nextToken();
1938     // We can have macros or attributes in between 'enum' and the enum name.
1939     if (FormatTok->is(tok::l_paren))
1940       parseParens();
1941     if (FormatTok->is(tok::identifier)) {
1942       nextToken();
1943       // If there are two identifiers in a row, this is likely an elaborate
1944       // return type. In Java, this can be "implements", etc.
1945       if (Style.isCpp() && FormatTok->is(tok::identifier))
1946         return false;
1947     }
1948   }
1949 
1950   // Just a declaration or something is wrong.
1951   if (FormatTok->isNot(tok::l_brace))
1952     return true;
1953   FormatTok->BlockKind = BK_Block;
1954 
1955   if (Style.Language == FormatStyle::LK_Java) {
1956     // Java enums are different.
1957     parseJavaEnumBody();
1958     return true;
1959   }
1960   if (Style.Language == FormatStyle::LK_Proto) {
1961     parseBlock(/*MustBeDeclaration=*/true);
1962     return true;
1963   }
1964 
1965   // Parse enum body.
1966   nextToken();
1967   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1968   if (HasError) {
1969     if (FormatTok->is(tok::semi))
1970       nextToken();
1971     addUnwrappedLine();
1972   }
1973   return true;
1974 
1975   // There is no addUnwrappedLine() here so that we fall through to parsing a
1976   // structural element afterwards. Thus, in "enum A {} n, m;",
1977   // "} n, m;" will end up in one unwrapped line.
1978 }
1979 
1980 void UnwrappedLineParser::parseJavaEnumBody() {
1981   // Determine whether the enum is simple, i.e. does not have a semicolon or
1982   // constants with class bodies. Simple enums can be formatted like braced
1983   // lists, contracted to a single line, etc.
1984   unsigned StoredPosition = Tokens->getPosition();
1985   bool IsSimple = true;
1986   FormatToken *Tok = Tokens->getNextToken();
1987   while (Tok) {
1988     if (Tok->is(tok::r_brace))
1989       break;
1990     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1991       IsSimple = false;
1992       break;
1993     }
1994     // FIXME: This will also mark enums with braces in the arguments to enum
1995     // constants as "not simple". This is probably fine in practice, though.
1996     Tok = Tokens->getNextToken();
1997   }
1998   FormatTok = Tokens->setPosition(StoredPosition);
1999 
2000   if (IsSimple) {
2001     nextToken();
2002     parseBracedList();
2003     addUnwrappedLine();
2004     return;
2005   }
2006 
2007   // Parse the body of a more complex enum.
2008   // First add a line for everything up to the "{".
2009   nextToken();
2010   addUnwrappedLine();
2011   ++Line->Level;
2012 
2013   // Parse the enum constants.
2014   while (FormatTok) {
2015     if (FormatTok->is(tok::l_brace)) {
2016       // Parse the constant's class body.
2017       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2018                  /*MunchSemi=*/false);
2019     } else if (FormatTok->is(tok::l_paren)) {
2020       parseParens();
2021     } else if (FormatTok->is(tok::comma)) {
2022       nextToken();
2023       addUnwrappedLine();
2024     } else if (FormatTok->is(tok::semi)) {
2025       nextToken();
2026       addUnwrappedLine();
2027       break;
2028     } else if (FormatTok->is(tok::r_brace)) {
2029       addUnwrappedLine();
2030       break;
2031     } else {
2032       nextToken();
2033     }
2034   }
2035 
2036   // Parse the class body after the enum's ";" if any.
2037   parseLevel(/*HasOpeningBrace=*/true);
2038   nextToken();
2039   --Line->Level;
2040   addUnwrappedLine();
2041 }
2042 
2043 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2044   const FormatToken &InitialToken = *FormatTok;
2045   nextToken();
2046 
2047   // The actual identifier can be a nested name specifier, and in macros
2048   // it is often token-pasted.
2049   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2050                             tok::kw___attribute, tok::kw___declspec,
2051                             tok::kw_alignas) ||
2052          ((Style.Language == FormatStyle::LK_Java ||
2053            Style.Language == FormatStyle::LK_JavaScript) &&
2054           FormatTok->isOneOf(tok::period, tok::comma))) {
2055     if (Style.Language == FormatStyle::LK_JavaScript &&
2056         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2057       // JavaScript/TypeScript supports inline object types in
2058       // extends/implements positions:
2059       //     class Foo implements {bar: number} { }
2060       nextToken();
2061       if (FormatTok->is(tok::l_brace)) {
2062         tryToParseBracedList();
2063         continue;
2064       }
2065     }
2066     bool IsNonMacroIdentifier =
2067         FormatTok->is(tok::identifier) &&
2068         FormatTok->TokenText != FormatTok->TokenText.upper();
2069     nextToken();
2070     // We can have macros or attributes in between 'class' and the class name.
2071     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2072       parseParens();
2073   }
2074 
2075   // Note that parsing away template declarations here leads to incorrectly
2076   // accepting function declarations as record declarations.
2077   // In general, we cannot solve this problem. Consider:
2078   // class A<int> B() {}
2079   // which can be a function definition or a class definition when B() is a
2080   // macro. If we find enough real-world cases where this is a problem, we
2081   // can parse for the 'template' keyword in the beginning of the statement,
2082   // and thus rule out the record production in case there is no template
2083   // (this would still leave us with an ambiguity between template function
2084   // and class declarations).
2085   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2086     while (!eof()) {
2087       if (FormatTok->is(tok::l_brace)) {
2088         calculateBraceTypes(/*ExpectClassBody=*/true);
2089         if (!tryToParseBracedList())
2090           break;
2091       }
2092       if (FormatTok->Tok.is(tok::semi))
2093         return;
2094       nextToken();
2095     }
2096   }
2097   if (FormatTok->Tok.is(tok::l_brace)) {
2098     if (ParseAsExpr) {
2099       parseChildBlock();
2100     } else {
2101       if (ShouldBreakBeforeBrace(Style, InitialToken))
2102         addUnwrappedLine();
2103 
2104       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2105                  /*MunchSemi=*/false);
2106     }
2107   }
2108   // There is no addUnwrappedLine() here so that we fall through to parsing a
2109   // structural element afterwards. Thus, in "class A {} n, m;",
2110   // "} n, m;" will end up in one unwrapped line.
2111 }
2112 
2113 void UnwrappedLineParser::parseObjCProtocolList() {
2114   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2115   do
2116     nextToken();
2117   while (!eof() && FormatTok->Tok.isNot(tok::greater));
2118   nextToken(); // Skip '>'.
2119 }
2120 
2121 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2122   do {
2123     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2124       nextToken();
2125       addUnwrappedLine();
2126       break;
2127     }
2128     if (FormatTok->is(tok::l_brace)) {
2129       parseBlock(/*MustBeDeclaration=*/false);
2130       // In ObjC interfaces, nothing should be following the "}".
2131       addUnwrappedLine();
2132     } else if (FormatTok->is(tok::r_brace)) {
2133       // Ignore stray "}". parseStructuralElement doesn't consume them.
2134       nextToken();
2135       addUnwrappedLine();
2136     } else {
2137       parseStructuralElement();
2138     }
2139   } while (!eof());
2140 }
2141 
2142 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2143   nextToken();
2144   nextToken(); // interface name
2145 
2146   // @interface can be followed by either a base class, or a category.
2147   if (FormatTok->Tok.is(tok::colon)) {
2148     nextToken();
2149     nextToken(); // base class name
2150   } else if (FormatTok->Tok.is(tok::l_paren))
2151     // Skip category, if present.
2152     parseParens();
2153 
2154   if (FormatTok->Tok.is(tok::less))
2155     parseObjCProtocolList();
2156 
2157   if (FormatTok->Tok.is(tok::l_brace)) {
2158     if (Style.BraceWrapping.AfterObjCDeclaration)
2159       addUnwrappedLine();
2160     parseBlock(/*MustBeDeclaration=*/true);
2161   }
2162 
2163   // With instance variables, this puts '}' on its own line.  Without instance
2164   // variables, this ends the @interface line.
2165   addUnwrappedLine();
2166 
2167   parseObjCUntilAtEnd();
2168 }
2169 
2170 void UnwrappedLineParser::parseObjCProtocol() {
2171   nextToken();
2172   nextToken(); // protocol name
2173 
2174   if (FormatTok->Tok.is(tok::less))
2175     parseObjCProtocolList();
2176 
2177   // Check for protocol declaration.
2178   if (FormatTok->Tok.is(tok::semi)) {
2179     nextToken();
2180     return addUnwrappedLine();
2181   }
2182 
2183   addUnwrappedLine();
2184   parseObjCUntilAtEnd();
2185 }
2186 
2187 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2188   bool IsImport = FormatTok->is(Keywords.kw_import);
2189   assert(IsImport || FormatTok->is(tok::kw_export));
2190   nextToken();
2191 
2192   // Consume the "default" in "export default class/function".
2193   if (FormatTok->is(tok::kw_default))
2194     nextToken();
2195 
2196   // Consume "async function", "function" and "default function", so that these
2197   // get parsed as free-standing JS functions, i.e. do not require a trailing
2198   // semicolon.
2199   if (FormatTok->is(Keywords.kw_async))
2200     nextToken();
2201   if (FormatTok->is(Keywords.kw_function)) {
2202     nextToken();
2203     return;
2204   }
2205 
2206   // For imports, `export *`, `export {...}`, consume the rest of the line up
2207   // to the terminating `;`. For everything else, just return and continue
2208   // parsing the structural element, i.e. the declaration or expression for
2209   // `export default`.
2210   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2211       !FormatTok->isStringLiteral())
2212     return;
2213 
2214   while (!eof()) {
2215     if (FormatTok->is(tok::semi))
2216       return;
2217     if (Line->Tokens.size() == 0) {
2218       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2219       // import statement should terminate.
2220       return;
2221     }
2222     if (FormatTok->is(tok::l_brace)) {
2223       FormatTok->BlockKind = BK_Block;
2224       nextToken();
2225       parseBracedList();
2226     } else {
2227       nextToken();
2228     }
2229   }
2230 }
2231 
2232 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2233                                                  StringRef Prefix = "") {
2234   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2235                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2236   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2237                                                     E = Line.Tokens.end();
2238        I != E; ++I) {
2239     llvm::dbgs() << I->Tok->Tok.getName() << "["
2240                  << "T=" << I->Tok->Type
2241                  << ", OC=" << I->Tok->OriginalColumn << "] ";
2242   }
2243   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2244                                                     E = Line.Tokens.end();
2245        I != E; ++I) {
2246     const UnwrappedLineNode &Node = *I;
2247     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2248              I = Node.Children.begin(),
2249              E = Node.Children.end();
2250          I != E; ++I) {
2251       printDebugInfo(*I, "\nChild: ");
2252     }
2253   }
2254   llvm::dbgs() << "\n";
2255 }
2256 
2257 void UnwrappedLineParser::addUnwrappedLine() {
2258   if (Line->Tokens.empty())
2259     return;
2260   DEBUG({
2261     if (CurrentLines == &Lines)
2262       printDebugInfo(*Line);
2263   });
2264   CurrentLines->push_back(std::move(*Line));
2265   Line->Tokens.clear();
2266   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2267   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2268     CurrentLines->append(
2269         std::make_move_iterator(PreprocessorDirectives.begin()),
2270         std::make_move_iterator(PreprocessorDirectives.end()));
2271     PreprocessorDirectives.clear();
2272   }
2273 }
2274 
2275 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2276 
2277 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2278   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2279          FormatTok.NewlinesBefore > 0;
2280 }
2281 
2282 // Checks if \p FormatTok is a line comment that continues the line comment
2283 // section on \p Line.
2284 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2285                                         const UnwrappedLine &Line,
2286                                         llvm::Regex &CommentPragmasRegex) {
2287   if (Line.Tokens.empty())
2288     return false;
2289 
2290   StringRef IndentContent = FormatTok.TokenText;
2291   if (FormatTok.TokenText.startswith("//") ||
2292       FormatTok.TokenText.startswith("/*"))
2293     IndentContent = FormatTok.TokenText.substr(2);
2294   if (CommentPragmasRegex.match(IndentContent))
2295     return false;
2296 
2297   // If Line starts with a line comment, then FormatTok continues the comment
2298   // section if its original column is greater or equal to the original start
2299   // column of the line.
2300   //
2301   // Define the min column token of a line as follows: if a line ends in '{' or
2302   // contains a '{' followed by a line comment, then the min column token is
2303   // that '{'. Otherwise, the min column token of the line is the first token of
2304   // the line.
2305   //
2306   // If Line starts with a token other than a line comment, then FormatTok
2307   // continues the comment section if its original column is greater than the
2308   // original start column of the min column token of the line.
2309   //
2310   // For example, the second line comment continues the first in these cases:
2311   //
2312   // // first line
2313   // // second line
2314   //
2315   // and:
2316   //
2317   // // first line
2318   //  // second line
2319   //
2320   // and:
2321   //
2322   // int i; // first line
2323   //  // second line
2324   //
2325   // and:
2326   //
2327   // do { // first line
2328   //      // second line
2329   //   int i;
2330   // } while (true);
2331   //
2332   // and:
2333   //
2334   // enum {
2335   //   a, // first line
2336   //    // second line
2337   //   b
2338   // };
2339   //
2340   // The second line comment doesn't continue the first in these cases:
2341   //
2342   //   // first line
2343   //  // second line
2344   //
2345   // and:
2346   //
2347   // int i; // first line
2348   // // second line
2349   //
2350   // and:
2351   //
2352   // do { // first line
2353   //   // second line
2354   //   int i;
2355   // } while (true);
2356   //
2357   // and:
2358   //
2359   // enum {
2360   //   a, // first line
2361   //   // second line
2362   // };
2363   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2364 
2365   // Scan for '{//'. If found, use the column of '{' as a min column for line
2366   // comment section continuation.
2367   const FormatToken *PreviousToken = nullptr;
2368   for (const UnwrappedLineNode &Node : Line.Tokens) {
2369     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2370         isLineComment(*Node.Tok)) {
2371       MinColumnToken = PreviousToken;
2372       break;
2373     }
2374     PreviousToken = Node.Tok;
2375 
2376     // Grab the last newline preceding a token in this unwrapped line.
2377     if (Node.Tok->NewlinesBefore > 0) {
2378       MinColumnToken = Node.Tok;
2379     }
2380   }
2381   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2382     MinColumnToken = PreviousToken;
2383   }
2384 
2385   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2386                               MinColumnToken);
2387 }
2388 
2389 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2390   bool JustComments = Line->Tokens.empty();
2391   for (SmallVectorImpl<FormatToken *>::const_iterator
2392            I = CommentsBeforeNextToken.begin(),
2393            E = CommentsBeforeNextToken.end();
2394        I != E; ++I) {
2395     // Line comments that belong to the same line comment section are put on the
2396     // same line since later we might want to reflow content between them.
2397     // Additional fine-grained breaking of line comment sections is controlled
2398     // by the class BreakableLineCommentSection in case it is desirable to keep
2399     // several line comment sections in the same unwrapped line.
2400     //
2401     // FIXME: Consider putting separate line comment sections as children to the
2402     // unwrapped line instead.
2403     (*I)->ContinuesLineCommentSection =
2404         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2405     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2406       addUnwrappedLine();
2407     pushToken(*I);
2408   }
2409   if (NewlineBeforeNext && JustComments)
2410     addUnwrappedLine();
2411   CommentsBeforeNextToken.clear();
2412 }
2413 
2414 void UnwrappedLineParser::nextToken(int LevelDifference) {
2415   if (eof())
2416     return;
2417   flushComments(isOnNewLine(*FormatTok));
2418   pushToken(FormatTok);
2419   if (Style.Language != FormatStyle::LK_JavaScript)
2420     readToken(LevelDifference);
2421   else
2422     readTokenWithJavaScriptASI();
2423 }
2424 
2425 const FormatToken *UnwrappedLineParser::getPreviousToken() {
2426   // FIXME: This is a dirty way to access the previous token. Find a better
2427   // solution.
2428   if (!Line || Line->Tokens.empty())
2429     return nullptr;
2430   return Line->Tokens.back().Tok;
2431 }
2432 
2433 void UnwrappedLineParser::distributeComments(
2434     const SmallVectorImpl<FormatToken *> &Comments,
2435     const FormatToken *NextTok) {
2436   // Whether or not a line comment token continues a line is controlled by
2437   // the method continuesLineCommentSection, with the following caveat:
2438   //
2439   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2440   // that each comment line from the trail is aligned with the next token, if
2441   // the next token exists. If a trail exists, the beginning of the maximal
2442   // trail is marked as a start of a new comment section.
2443   //
2444   // For example in this code:
2445   //
2446   // int a; // line about a
2447   //   // line 1 about b
2448   //   // line 2 about b
2449   //   int b;
2450   //
2451   // the two lines about b form a maximal trail, so there are two sections, the
2452   // first one consisting of the single comment "// line about a" and the
2453   // second one consisting of the next two comments.
2454   if (Comments.empty())
2455     return;
2456   bool ShouldPushCommentsInCurrentLine = true;
2457   bool HasTrailAlignedWithNextToken = false;
2458   unsigned StartOfTrailAlignedWithNextToken = 0;
2459   if (NextTok) {
2460     // We are skipping the first element intentionally.
2461     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2462       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2463         HasTrailAlignedWithNextToken = true;
2464         StartOfTrailAlignedWithNextToken = i;
2465       }
2466     }
2467   }
2468   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2469     FormatToken *FormatTok = Comments[i];
2470     if (HasTrailAlignedWithNextToken &&
2471         i == StartOfTrailAlignedWithNextToken) {
2472       FormatTok->ContinuesLineCommentSection = false;
2473     } else {
2474       FormatTok->ContinuesLineCommentSection =
2475           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2476     }
2477     if (!FormatTok->ContinuesLineCommentSection &&
2478         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2479       ShouldPushCommentsInCurrentLine = false;
2480     }
2481     if (ShouldPushCommentsInCurrentLine) {
2482       pushToken(FormatTok);
2483     } else {
2484       CommentsBeforeNextToken.push_back(FormatTok);
2485     }
2486   }
2487 }
2488 
2489 void UnwrappedLineParser::readToken(int LevelDifference) {
2490   SmallVector<FormatToken *, 1> Comments;
2491   do {
2492     FormatTok = Tokens->getNextToken();
2493     assert(FormatTok);
2494     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2495            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2496       distributeComments(Comments, FormatTok);
2497       Comments.clear();
2498       // If there is an unfinished unwrapped line, we flush the preprocessor
2499       // directives only after that unwrapped line was finished later.
2500       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2501       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2502       assert((LevelDifference >= 0 ||
2503               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2504              "LevelDifference makes Line->Level negative");
2505       Line->Level += LevelDifference;
2506       // Comments stored before the preprocessor directive need to be output
2507       // before the preprocessor directive, at the same level as the
2508       // preprocessor directive, as we consider them to apply to the directive.
2509       flushComments(isOnNewLine(*FormatTok));
2510       parsePPDirective();
2511     }
2512     while (FormatTok->Type == TT_ConflictStart ||
2513            FormatTok->Type == TT_ConflictEnd ||
2514            FormatTok->Type == TT_ConflictAlternative) {
2515       if (FormatTok->Type == TT_ConflictStart) {
2516         conditionalCompilationStart(/*Unreachable=*/false);
2517       } else if (FormatTok->Type == TT_ConflictAlternative) {
2518         conditionalCompilationAlternative();
2519       } else if (FormatTok->Type == TT_ConflictEnd) {
2520         conditionalCompilationEnd();
2521       }
2522       FormatTok = Tokens->getNextToken();
2523       FormatTok->MustBreakBefore = true;
2524     }
2525 
2526     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2527         !Line->InPPDirective) {
2528       continue;
2529     }
2530 
2531     if (!FormatTok->Tok.is(tok::comment)) {
2532       distributeComments(Comments, FormatTok);
2533       Comments.clear();
2534       return;
2535     }
2536 
2537     Comments.push_back(FormatTok);
2538   } while (!eof());
2539 
2540   distributeComments(Comments, nullptr);
2541   Comments.clear();
2542 }
2543 
2544 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2545   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2546   if (MustBreakBeforeNextToken) {
2547     Line->Tokens.back().Tok->MustBreakBefore = true;
2548     MustBreakBeforeNextToken = false;
2549   }
2550 }
2551 
2552 } // end namespace format
2553 } // end namespace clang
2554