1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 static bool isLineComment(const FormatToken &FormatTok) {
59   return FormatTok.is(tok::comment) && FormatTok.TokenText.startswith("//");
60 }
61 
62 // Checks if \p FormatTok is a line comment that continues the line comment
63 // \p Previous. The original column of \p MinColumnToken is used to determine
64 // whether \p FormatTok is indented enough to the right to continue \p Previous.
65 static bool continuesLineComment(const FormatToken &FormatTok,
66                                  const FormatToken *Previous,
67                                  const FormatToken *MinColumnToken) {
68   if (!Previous || !MinColumnToken)
69     return false;
70   unsigned MinContinueColumn =
71       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
72   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
73          isLineComment(*Previous) &&
74          FormatTok.OriginalColumn >= MinContinueColumn;
75 }
76 
77 class ScopedMacroState : public FormatTokenSource {
78 public:
79   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
80                    FormatToken *&ResetToken)
81       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
82         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
83         Token(nullptr), PreviousToken(nullptr) {
84     TokenSource = this;
85     Line.Level = 0;
86     Line.InPPDirective = true;
87   }
88 
89   ~ScopedMacroState() override {
90     TokenSource = PreviousTokenSource;
91     ResetToken = Token;
92     Line.InPPDirective = false;
93     Line.Level = PreviousLineLevel;
94   }
95 
96   FormatToken *getNextToken() override {
97     // The \c UnwrappedLineParser guards against this by never calling
98     // \c getNextToken() after it has encountered the first eof token.
99     assert(!eof());
100     PreviousToken = Token;
101     Token = PreviousTokenSource->getNextToken();
102     if (eof())
103       return getFakeEOF();
104     return Token;
105   }
106 
107   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
108 
109   FormatToken *setPosition(unsigned Position) override {
110     PreviousToken = nullptr;
111     Token = PreviousTokenSource->setPosition(Position);
112     return Token;
113   }
114 
115 private:
116   bool eof() {
117     return Token && Token->HasUnescapedNewline &&
118            !continuesLineComment(*Token, PreviousToken,
119                                  /*MinColumnToken=*/PreviousToken);
120   }
121 
122   FormatToken *getFakeEOF() {
123     static bool EOFInitialized = false;
124     static FormatToken FormatTok;
125     if (!EOFInitialized) {
126       FormatTok.Tok.startToken();
127       FormatTok.Tok.setKind(tok::eof);
128       EOFInitialized = true;
129     }
130     return &FormatTok;
131   }
132 
133   UnwrappedLine &Line;
134   FormatTokenSource *&TokenSource;
135   FormatToken *&ResetToken;
136   unsigned PreviousLineLevel;
137   FormatTokenSource *PreviousTokenSource;
138 
139   FormatToken *Token;
140   FormatToken *PreviousToken;
141 };
142 
143 } // end anonymous namespace
144 
145 class ScopedLineState {
146 public:
147   ScopedLineState(UnwrappedLineParser &Parser,
148                   bool SwitchToPreprocessorLines = false)
149       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
150     if (SwitchToPreprocessorLines)
151       Parser.CurrentLines = &Parser.PreprocessorDirectives;
152     else if (!Parser.Line->Tokens.empty())
153       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
154     PreBlockLine = std::move(Parser.Line);
155     Parser.Line = llvm::make_unique<UnwrappedLine>();
156     Parser.Line->Level = PreBlockLine->Level;
157     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
158   }
159 
160   ~ScopedLineState() {
161     if (!Parser.Line->Tokens.empty()) {
162       Parser.addUnwrappedLine();
163     }
164     assert(Parser.Line->Tokens.empty());
165     Parser.Line = std::move(PreBlockLine);
166     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
167       Parser.MustBreakBeforeNextToken = true;
168     Parser.CurrentLines = OriginalLines;
169   }
170 
171 private:
172   UnwrappedLineParser &Parser;
173 
174   std::unique_ptr<UnwrappedLine> PreBlockLine;
175   SmallVectorImpl<UnwrappedLine> *OriginalLines;
176 };
177 
178 class CompoundStatementIndenter {
179 public:
180   CompoundStatementIndenter(UnwrappedLineParser *Parser,
181                             const FormatStyle &Style, unsigned &LineLevel)
182       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
183     if (Style.BraceWrapping.AfterControlStatement)
184       Parser->addUnwrappedLine();
185     if (Style.BraceWrapping.IndentBraces)
186       ++LineLevel;
187   }
188   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
189 
190 private:
191   unsigned &LineLevel;
192   unsigned OldLineLevel;
193 };
194 
195 namespace {
196 
197 class IndexedTokenSource : public FormatTokenSource {
198 public:
199   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
200       : Tokens(Tokens), Position(-1) {}
201 
202   FormatToken *getNextToken() override {
203     ++Position;
204     return Tokens[Position];
205   }
206 
207   unsigned getPosition() override {
208     assert(Position >= 0);
209     return Position;
210   }
211 
212   FormatToken *setPosition(unsigned P) override {
213     Position = P;
214     return Tokens[Position];
215   }
216 
217   void reset() { Position = -1; }
218 
219 private:
220   ArrayRef<FormatToken *> Tokens;
221   int Position;
222 };
223 
224 } // end anonymous namespace
225 
226 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
227                                          const AdditionalKeywords &Keywords,
228                                          ArrayRef<FormatToken *> Tokens,
229                                          UnwrappedLineConsumer &Callback)
230     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234       IfNdefCondition(nullptr), FoundIncludeGuardStart(false),
235       IncludeGuardRejected(false) {}
236 
237 void UnwrappedLineParser::reset() {
238   PPBranchLevel = -1;
239   IfNdefCondition = nullptr;
240   FoundIncludeGuardStart = false;
241   IncludeGuardRejected = false;
242   Line.reset(new UnwrappedLine);
243   CommentsBeforeNextToken.clear();
244   FormatTok = nullptr;
245   MustBreakBeforeNextToken = false;
246   PreprocessorDirectives.clear();
247   CurrentLines = &Lines;
248   DeclarationScopeStack.clear();
249   PPStack.clear();
250 }
251 
252 void UnwrappedLineParser::parse() {
253   IndexedTokenSource TokenSource(AllTokens);
254   do {
255     DEBUG(llvm::dbgs() << "----\n");
256     reset();
257     Tokens = &TokenSource;
258     TokenSource.reset();
259 
260     readToken();
261     parseFile();
262     // Create line with eof token.
263     pushToken(FormatTok);
264     addUnwrappedLine();
265 
266     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
267                                                   E = Lines.end();
268          I != E; ++I) {
269       Callback.consumeUnwrappedLine(*I);
270     }
271     Callback.finishRun();
272     Lines.clear();
273     while (!PPLevelBranchIndex.empty() &&
274            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
275       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
276       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
277     }
278     if (!PPLevelBranchIndex.empty()) {
279       ++PPLevelBranchIndex.back();
280       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
281       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
282     }
283   } while (!PPLevelBranchIndex.empty());
284 }
285 
286 void UnwrappedLineParser::parseFile() {
287   // The top-level context in a file always has declarations, except for pre-
288   // processor directives and JavaScript files.
289   bool MustBeDeclaration =
290       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
291   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
292                                           MustBeDeclaration);
293   if (Style.Language == FormatStyle::LK_TextProto)
294     parseBracedList();
295   else
296     parseLevel(/*HasOpeningBrace=*/false);
297   // Make sure to format the remaining tokens.
298   flushComments(true);
299   addUnwrappedLine();
300 }
301 
302 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
303   bool SwitchLabelEncountered = false;
304   do {
305     tok::TokenKind kind = FormatTok->Tok.getKind();
306     if (FormatTok->Type == TT_MacroBlockBegin) {
307       kind = tok::l_brace;
308     } else if (FormatTok->Type == TT_MacroBlockEnd) {
309       kind = tok::r_brace;
310     }
311 
312     switch (kind) {
313     case tok::comment:
314       nextToken();
315       addUnwrappedLine();
316       break;
317     case tok::l_brace:
318       // FIXME: Add parameter whether this can happen - if this happens, we must
319       // be in a non-declaration context.
320       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
321         continue;
322       parseBlock(/*MustBeDeclaration=*/false);
323       addUnwrappedLine();
324       break;
325     case tok::r_brace:
326       if (HasOpeningBrace)
327         return;
328       nextToken();
329       addUnwrappedLine();
330       break;
331     case tok::kw_default:
332     case tok::kw_case:
333       if (Style.Language == FormatStyle::LK_JavaScript &&
334           Line->MustBeDeclaration) {
335         // A 'case: string' style field declaration.
336         parseStructuralElement();
337         break;
338       }
339       if (!SwitchLabelEncountered &&
340           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
341         ++Line->Level;
342       SwitchLabelEncountered = true;
343       parseStructuralElement();
344       break;
345     default:
346       parseStructuralElement();
347       break;
348     }
349   } while (!eof());
350 }
351 
352 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
353   // We'll parse forward through the tokens until we hit
354   // a closing brace or eof - note that getNextToken() will
355   // parse macros, so this will magically work inside macro
356   // definitions, too.
357   unsigned StoredPosition = Tokens->getPosition();
358   FormatToken *Tok = FormatTok;
359   const FormatToken *PrevTok = Tok->Previous;
360   // Keep a stack of positions of lbrace tokens. We will
361   // update information about whether an lbrace starts a
362   // braced init list or a different block during the loop.
363   SmallVector<FormatToken *, 8> LBraceStack;
364   assert(Tok->Tok.is(tok::l_brace));
365   do {
366     // Get next non-comment token.
367     FormatToken *NextTok;
368     unsigned ReadTokens = 0;
369     do {
370       NextTok = Tokens->getNextToken();
371       ++ReadTokens;
372     } while (NextTok->is(tok::comment));
373 
374     switch (Tok->Tok.getKind()) {
375     case tok::l_brace:
376       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
377         if (PrevTok->is(tok::colon))
378           // A colon indicates this code is in a type, or a braced list
379           // following a label in an object literal ({a: {b: 1}}). The code
380           // below could be confused by semicolons between the individual
381           // members in a type member list, which would normally trigger
382           // BK_Block. In both cases, this must be parsed as an inline braced
383           // init.
384           Tok->BlockKind = BK_BracedInit;
385         else if (PrevTok->is(tok::r_paren))
386           // `) { }` can only occur in function or method declarations in JS.
387           Tok->BlockKind = BK_Block;
388       } else {
389         Tok->BlockKind = BK_Unknown;
390       }
391       LBraceStack.push_back(Tok);
392       break;
393     case tok::r_brace:
394       if (LBraceStack.empty())
395         break;
396       if (LBraceStack.back()->BlockKind == BK_Unknown) {
397         bool ProbablyBracedList = false;
398         if (Style.Language == FormatStyle::LK_Proto) {
399           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
400         } else {
401           // Using OriginalColumn to distinguish between ObjC methods and
402           // binary operators is a bit hacky.
403           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
404                                   NextTok->OriginalColumn == 0;
405 
406           // If there is a comma, semicolon or right paren after the closing
407           // brace, we assume this is a braced initializer list.  Note that
408           // regardless how we mark inner braces here, we will overwrite the
409           // BlockKind later if we parse a braced list (where all blocks
410           // inside are by default braced lists), or when we explicitly detect
411           // blocks (for example while parsing lambdas).
412           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
413           // braced list in JS.
414           ProbablyBracedList =
415               (Style.Language == FormatStyle::LK_JavaScript &&
416                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
417                                 Keywords.kw_as)) ||
418               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
419               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
420                                tok::r_paren, tok::r_square, tok::l_brace,
421                                tok::l_square, tok::ellipsis) ||
422               (NextTok->is(tok::identifier) &&
423                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
424               (NextTok->is(tok::semi) &&
425                (!ExpectClassBody || LBraceStack.size() != 1)) ||
426               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
427         }
428         if (ProbablyBracedList) {
429           Tok->BlockKind = BK_BracedInit;
430           LBraceStack.back()->BlockKind = BK_BracedInit;
431         } else {
432           Tok->BlockKind = BK_Block;
433           LBraceStack.back()->BlockKind = BK_Block;
434         }
435       }
436       LBraceStack.pop_back();
437       break;
438     case tok::at:
439     case tok::semi:
440     case tok::kw_if:
441     case tok::kw_while:
442     case tok::kw_for:
443     case tok::kw_switch:
444     case tok::kw_try:
445     case tok::kw___try:
446       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
447         LBraceStack.back()->BlockKind = BK_Block;
448       break;
449     default:
450       break;
451     }
452     PrevTok = Tok;
453     Tok = NextTok;
454   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
455 
456   // Assume other blocks for all unclosed opening braces.
457   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
458     if (LBraceStack[i]->BlockKind == BK_Unknown)
459       LBraceStack[i]->BlockKind = BK_Block;
460   }
461 
462   FormatTok = Tokens->setPosition(StoredPosition);
463 }
464 
465 template <class T>
466 static inline void hash_combine(std::size_t &seed, const T &v) {
467   std::hash<T> hasher;
468   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
469 }
470 
471 size_t UnwrappedLineParser::computePPHash() const {
472   size_t h = 0;
473   for (const auto &i : PPStack) {
474     hash_combine(h, size_t(i.Kind));
475     hash_combine(h, i.Line);
476   }
477   return h;
478 }
479 
480 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
481                                      bool MunchSemi) {
482   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
483          "'{' or macro block token expected");
484   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
485   FormatTok->BlockKind = BK_Block;
486 
487   size_t PPStartHash = computePPHash();
488 
489   unsigned InitialLevel = Line->Level;
490   nextToken(/*LevelDifference=*/AddLevel ? 1 : 0);
491 
492   if (MacroBlock && FormatTok->is(tok::l_paren))
493     parseParens();
494 
495   size_t NbPreprocessorDirectives =
496       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
497   addUnwrappedLine();
498   size_t OpeningLineIndex =
499       CurrentLines->empty()
500           ? (UnwrappedLine::kInvalidIndex)
501           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
502 
503   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
504                                           MustBeDeclaration);
505   if (AddLevel)
506     ++Line->Level;
507   parseLevel(/*HasOpeningBrace=*/true);
508 
509   if (eof())
510     return;
511 
512   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
513                  : !FormatTok->is(tok::r_brace)) {
514     Line->Level = InitialLevel;
515     FormatTok->BlockKind = BK_Block;
516     return;
517   }
518 
519   size_t PPEndHash = computePPHash();
520 
521   // Munch the closing brace.
522   nextToken(/*LevelDifference=*/AddLevel ? -1 : 0);
523 
524   if (MacroBlock && FormatTok->is(tok::l_paren))
525     parseParens();
526 
527   if (MunchSemi && FormatTok->Tok.is(tok::semi))
528     nextToken();
529   Line->Level = InitialLevel;
530 
531   if (PPStartHash == PPEndHash) {
532     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
533     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
534       // Update the opening line to add the forward reference as well
535       (*CurrentLines)[OpeningLineIndex].MatchingOpeningBlockLineIndex =
536           CurrentLines->size() - 1;
537     }
538   }
539 }
540 
541 static bool isGoogScope(const UnwrappedLine &Line) {
542   // FIXME: Closure-library specific stuff should not be hard-coded but be
543   // configurable.
544   if (Line.Tokens.size() < 4)
545     return false;
546   auto I = Line.Tokens.begin();
547   if (I->Tok->TokenText != "goog")
548     return false;
549   ++I;
550   if (I->Tok->isNot(tok::period))
551     return false;
552   ++I;
553   if (I->Tok->TokenText != "scope")
554     return false;
555   ++I;
556   return I->Tok->is(tok::l_paren);
557 }
558 
559 static bool isIIFE(const UnwrappedLine &Line,
560                    const AdditionalKeywords &Keywords) {
561   // Look for the start of an immediately invoked anonymous function.
562   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
563   // This is commonly done in JavaScript to create a new, anonymous scope.
564   // Example: (function() { ... })()
565   if (Line.Tokens.size() < 3)
566     return false;
567   auto I = Line.Tokens.begin();
568   if (I->Tok->isNot(tok::l_paren))
569     return false;
570   ++I;
571   if (I->Tok->isNot(Keywords.kw_function))
572     return false;
573   ++I;
574   return I->Tok->is(tok::l_paren);
575 }
576 
577 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
578                                    const FormatToken &InitialToken) {
579   if (InitialToken.is(tok::kw_namespace))
580     return Style.BraceWrapping.AfterNamespace;
581   if (InitialToken.is(tok::kw_class))
582     return Style.BraceWrapping.AfterClass;
583   if (InitialToken.is(tok::kw_union))
584     return Style.BraceWrapping.AfterUnion;
585   if (InitialToken.is(tok::kw_struct))
586     return Style.BraceWrapping.AfterStruct;
587   return false;
588 }
589 
590 void UnwrappedLineParser::parseChildBlock() {
591   FormatTok->BlockKind = BK_Block;
592   nextToken();
593   {
594     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
595                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
596     ScopedLineState LineState(*this);
597     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
598                                             /*MustBeDeclaration=*/false);
599     Line->Level += SkipIndent ? 0 : 1;
600     parseLevel(/*HasOpeningBrace=*/true);
601     flushComments(isOnNewLine(*FormatTok));
602     Line->Level -= SkipIndent ? 0 : 1;
603   }
604   nextToken();
605 }
606 
607 void UnwrappedLineParser::parsePPDirective() {
608   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
609   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
610   nextToken();
611 
612   if (!FormatTok->Tok.getIdentifierInfo()) {
613     parsePPUnknown();
614     return;
615   }
616 
617   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
618   case tok::pp_define:
619     parsePPDefine();
620     return;
621   case tok::pp_if:
622     parsePPIf(/*IfDef=*/false);
623     break;
624   case tok::pp_ifdef:
625   case tok::pp_ifndef:
626     parsePPIf(/*IfDef=*/true);
627     break;
628   case tok::pp_else:
629     parsePPElse();
630     break;
631   case tok::pp_elif:
632     parsePPElIf();
633     break;
634   case tok::pp_endif:
635     parsePPEndIf();
636     break;
637   default:
638     parsePPUnknown();
639     break;
640   }
641 }
642 
643 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
644   size_t Line = CurrentLines->size();
645   if (CurrentLines == &PreprocessorDirectives)
646     Line += Lines.size();
647 
648   if (Unreachable ||
649       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
650     PPStack.push_back({PP_Unreachable, Line});
651   else
652     PPStack.push_back({PP_Conditional, Line});
653 }
654 
655 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
656   ++PPBranchLevel;
657   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
658   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
659     PPLevelBranchIndex.push_back(0);
660     PPLevelBranchCount.push_back(0);
661   }
662   PPChainBranchIndex.push(0);
663   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
664   conditionalCompilationCondition(Unreachable || Skip);
665 }
666 
667 void UnwrappedLineParser::conditionalCompilationAlternative() {
668   if (!PPStack.empty())
669     PPStack.pop_back();
670   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
671   if (!PPChainBranchIndex.empty())
672     ++PPChainBranchIndex.top();
673   conditionalCompilationCondition(
674       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
675       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
676 }
677 
678 void UnwrappedLineParser::conditionalCompilationEnd() {
679   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
680   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
681     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
682       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
683     }
684   }
685   // Guard against #endif's without #if.
686   if (PPBranchLevel > -1)
687     --PPBranchLevel;
688   if (!PPChainBranchIndex.empty())
689     PPChainBranchIndex.pop();
690   if (!PPStack.empty())
691     PPStack.pop_back();
692 }
693 
694 void UnwrappedLineParser::parsePPIf(bool IfDef) {
695   bool IfNDef = FormatTok->is(tok::pp_ifndef);
696   nextToken();
697   bool Unreachable = false;
698   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
699     Unreachable = true;
700   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
701     Unreachable = true;
702   conditionalCompilationStart(Unreachable);
703   FormatToken *IfCondition = FormatTok;
704   // If there's a #ifndef on the first line, and the only lines before it are
705   // comments, it could be an include guard.
706   bool MaybeIncludeGuard = IfNDef;
707   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard) {
708     for (auto &Line : Lines) {
709       if (!Line.Tokens.front().Tok->is(tok::comment)) {
710         MaybeIncludeGuard = false;
711         IncludeGuardRejected = true;
712         break;
713       }
714     }
715   }
716   --PPBranchLevel;
717   parsePPUnknown();
718   ++PPBranchLevel;
719   if (!IncludeGuardRejected && !FoundIncludeGuardStart && MaybeIncludeGuard)
720     IfNdefCondition = IfCondition;
721 }
722 
723 void UnwrappedLineParser::parsePPElse() {
724   // If a potential include guard has an #else, it's not an include guard.
725   if (FoundIncludeGuardStart && PPBranchLevel == 0)
726     FoundIncludeGuardStart = false;
727   conditionalCompilationAlternative();
728   if (PPBranchLevel > -1)
729     --PPBranchLevel;
730   parsePPUnknown();
731   ++PPBranchLevel;
732 }
733 
734 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
735 
736 void UnwrappedLineParser::parsePPEndIf() {
737   conditionalCompilationEnd();
738   parsePPUnknown();
739   // If the #endif of a potential include guard is the last thing in the file,
740   // then we count it as a real include guard and subtract one from every
741   // preprocessor indent.
742   unsigned TokenPosition = Tokens->getPosition();
743   FormatToken *PeekNext = AllTokens[TokenPosition];
744   if (FoundIncludeGuardStart && PPBranchLevel == -1 && PeekNext->is(tok::eof) &&
745       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
746     for (auto &Line : Lines)
747       if (Line.InPPDirective && Line.Level > 0)
748         --Line.Level;
749 }
750 
751 void UnwrappedLineParser::parsePPDefine() {
752   nextToken();
753 
754   if (FormatTok->Tok.getKind() != tok::identifier) {
755     parsePPUnknown();
756     return;
757   }
758   if (IfNdefCondition && IfNdefCondition->TokenText == FormatTok->TokenText) {
759     FoundIncludeGuardStart = true;
760     for (auto &Line : Lines) {
761       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
762         FoundIncludeGuardStart = false;
763         break;
764       }
765     }
766   }
767   IfNdefCondition = nullptr;
768   nextToken();
769   if (FormatTok->Tok.getKind() == tok::l_paren &&
770       FormatTok->WhitespaceRange.getBegin() ==
771           FormatTok->WhitespaceRange.getEnd()) {
772     parseParens();
773   }
774   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
775     Line->Level += PPBranchLevel + 1;
776   addUnwrappedLine();
777   ++Line->Level;
778 
779   // Errors during a preprocessor directive can only affect the layout of the
780   // preprocessor directive, and thus we ignore them. An alternative approach
781   // would be to use the same approach we use on the file level (no
782   // re-indentation if there was a structural error) within the macro
783   // definition.
784   parseFile();
785 }
786 
787 void UnwrappedLineParser::parsePPUnknown() {
788   do {
789     nextToken();
790   } while (!eof());
791   if (Style.IndentPPDirectives == FormatStyle::PPDIS_AfterHash)
792     Line->Level += PPBranchLevel + 1;
793   addUnwrappedLine();
794   IfNdefCondition = nullptr;
795 }
796 
797 // Here we blacklist certain tokens that are not usually the first token in an
798 // unwrapped line. This is used in attempt to distinguish macro calls without
799 // trailing semicolons from other constructs split to several lines.
800 static bool tokenCanStartNewLine(const clang::Token &Tok) {
801   // Semicolon can be a null-statement, l_square can be a start of a macro or
802   // a C++11 attribute, but this doesn't seem to be common.
803   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
804          Tok.isNot(tok::l_square) &&
805          // Tokens that can only be used as binary operators and a part of
806          // overloaded operator names.
807          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
808          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
809          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
810          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
811          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
812          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
813          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
814          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
815          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
816          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
817          Tok.isNot(tok::lesslessequal) &&
818          // Colon is used in labels, base class lists, initializer lists,
819          // range-based for loops, ternary operator, but should never be the
820          // first token in an unwrapped line.
821          Tok.isNot(tok::colon) &&
822          // 'noexcept' is a trailing annotation.
823          Tok.isNot(tok::kw_noexcept);
824 }
825 
826 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
827                           const FormatToken *FormatTok) {
828   // FIXME: This returns true for C/C++ keywords like 'struct'.
829   return FormatTok->is(tok::identifier) &&
830          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
831           !FormatTok->isOneOf(
832               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
833               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
834               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
835               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
836               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
837               Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
838               Keywords.kw_from));
839 }
840 
841 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
842                                  const FormatToken *FormatTok) {
843   return FormatTok->Tok.isLiteral() ||
844          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
845          mustBeJSIdent(Keywords, FormatTok);
846 }
847 
848 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
849 // when encountered after a value (see mustBeJSIdentOrValue).
850 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
851                            const FormatToken *FormatTok) {
852   return FormatTok->isOneOf(
853       tok::kw_return, Keywords.kw_yield,
854       // conditionals
855       tok::kw_if, tok::kw_else,
856       // loops
857       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
858       // switch/case
859       tok::kw_switch, tok::kw_case,
860       // exceptions
861       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
862       // declaration
863       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
864       Keywords.kw_async, Keywords.kw_function,
865       // import/export
866       Keywords.kw_import, tok::kw_export);
867 }
868 
869 // readTokenWithJavaScriptASI reads the next token and terminates the current
870 // line if JavaScript Automatic Semicolon Insertion must
871 // happen between the current token and the next token.
872 //
873 // This method is conservative - it cannot cover all edge cases of JavaScript,
874 // but only aims to correctly handle certain well known cases. It *must not*
875 // return true in speculative cases.
876 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
877   FormatToken *Previous = FormatTok;
878   readToken();
879   FormatToken *Next = FormatTok;
880 
881   bool IsOnSameLine =
882       CommentsBeforeNextToken.empty()
883           ? Next->NewlinesBefore == 0
884           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
885   if (IsOnSameLine)
886     return;
887 
888   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
889   bool PreviousStartsTemplateExpr =
890       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
891   if (PreviousMustBeValue && Line && Line->Tokens.size() > 1) {
892     // If the token before the previous one is an '@', the previous token is an
893     // annotation and can precede another identifier/value.
894     const FormatToken *PrePrevious = std::prev(Line->Tokens.end(), 2)->Tok;
895     if (PrePrevious->is(tok::at))
896       return;
897   }
898   if (Next->is(tok::exclaim) && PreviousMustBeValue)
899     return addUnwrappedLine();
900   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
901   bool NextEndsTemplateExpr =
902       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
903   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
904       (PreviousMustBeValue ||
905        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
906                          tok::minusminus)))
907     return addUnwrappedLine();
908   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
909       isJSDeclOrStmt(Keywords, Next))
910     return addUnwrappedLine();
911 }
912 
913 void UnwrappedLineParser::parseStructuralElement() {
914   assert(!FormatTok->is(tok::l_brace));
915   if (Style.Language == FormatStyle::LK_TableGen &&
916       FormatTok->is(tok::pp_include)) {
917     nextToken();
918     if (FormatTok->is(tok::string_literal))
919       nextToken();
920     addUnwrappedLine();
921     return;
922   }
923   switch (FormatTok->Tok.getKind()) {
924   case tok::at:
925     nextToken();
926     if (FormatTok->Tok.is(tok::l_brace)) {
927       nextToken();
928       parseBracedList();
929       break;
930     }
931     switch (FormatTok->Tok.getObjCKeywordID()) {
932     case tok::objc_public:
933     case tok::objc_protected:
934     case tok::objc_package:
935     case tok::objc_private:
936       return parseAccessSpecifier();
937     case tok::objc_interface:
938     case tok::objc_implementation:
939       return parseObjCInterfaceOrImplementation();
940     case tok::objc_protocol:
941       return parseObjCProtocol();
942     case tok::objc_end:
943       return; // Handled by the caller.
944     case tok::objc_optional:
945     case tok::objc_required:
946       nextToken();
947       addUnwrappedLine();
948       return;
949     case tok::objc_autoreleasepool:
950       nextToken();
951       if (FormatTok->Tok.is(tok::l_brace)) {
952         if (Style.BraceWrapping.AfterObjCDeclaration)
953           addUnwrappedLine();
954         parseBlock(/*MustBeDeclaration=*/false);
955       }
956       addUnwrappedLine();
957       return;
958     case tok::objc_try:
959       // This branch isn't strictly necessary (the kw_try case below would
960       // do this too after the tok::at is parsed above).  But be explicit.
961       parseTryCatch();
962       return;
963     default:
964       break;
965     }
966     break;
967   case tok::kw_asm:
968     nextToken();
969     if (FormatTok->is(tok::l_brace)) {
970       FormatTok->Type = TT_InlineASMBrace;
971       nextToken();
972       while (FormatTok && FormatTok->isNot(tok::eof)) {
973         if (FormatTok->is(tok::r_brace)) {
974           FormatTok->Type = TT_InlineASMBrace;
975           nextToken();
976           addUnwrappedLine();
977           break;
978         }
979         FormatTok->Finalized = true;
980         nextToken();
981       }
982     }
983     break;
984   case tok::kw_namespace:
985     parseNamespace();
986     return;
987   case tok::kw_inline:
988     nextToken();
989     if (FormatTok->Tok.is(tok::kw_namespace)) {
990       parseNamespace();
991       return;
992     }
993     break;
994   case tok::kw_public:
995   case tok::kw_protected:
996   case tok::kw_private:
997     if (Style.Language == FormatStyle::LK_Java ||
998         Style.Language == FormatStyle::LK_JavaScript)
999       nextToken();
1000     else
1001       parseAccessSpecifier();
1002     return;
1003   case tok::kw_if:
1004     parseIfThenElse();
1005     return;
1006   case tok::kw_for:
1007   case tok::kw_while:
1008     parseForOrWhileLoop();
1009     return;
1010   case tok::kw_do:
1011     parseDoWhile();
1012     return;
1013   case tok::kw_switch:
1014     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1015       // 'switch: string' field declaration.
1016       break;
1017     parseSwitch();
1018     return;
1019   case tok::kw_default:
1020     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1021       // 'default: string' field declaration.
1022       break;
1023     nextToken();
1024     parseLabel();
1025     return;
1026   case tok::kw_case:
1027     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1028       // 'case: string' field declaration.
1029       break;
1030     parseCaseLabel();
1031     return;
1032   case tok::kw_try:
1033   case tok::kw___try:
1034     parseTryCatch();
1035     return;
1036   case tok::kw_extern:
1037     nextToken();
1038     if (FormatTok->Tok.is(tok::string_literal)) {
1039       nextToken();
1040       if (FormatTok->Tok.is(tok::l_brace)) {
1041         if (Style.BraceWrapping.AfterExternBlock) {
1042           addUnwrappedLine();
1043           parseBlock(/*MustBeDeclaration=*/true);
1044         } else {
1045           parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
1046         }
1047         addUnwrappedLine();
1048         return;
1049       }
1050     }
1051     break;
1052   case tok::kw_export:
1053     if (Style.Language == FormatStyle::LK_JavaScript) {
1054       parseJavaScriptEs6ImportExport();
1055       return;
1056     }
1057     break;
1058   case tok::identifier:
1059     if (FormatTok->is(TT_ForEachMacro)) {
1060       parseForOrWhileLoop();
1061       return;
1062     }
1063     if (FormatTok->is(TT_MacroBlockBegin)) {
1064       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
1065                  /*MunchSemi=*/false);
1066       return;
1067     }
1068     if (FormatTok->is(Keywords.kw_import)) {
1069       if (Style.Language == FormatStyle::LK_JavaScript) {
1070         parseJavaScriptEs6ImportExport();
1071         return;
1072       }
1073       if (Style.Language == FormatStyle::LK_Proto) {
1074         nextToken();
1075         if (FormatTok->is(tok::kw_public))
1076           nextToken();
1077         if (!FormatTok->is(tok::string_literal))
1078           return;
1079         nextToken();
1080         if (FormatTok->is(tok::semi))
1081           nextToken();
1082         addUnwrappedLine();
1083         return;
1084       }
1085     }
1086     if (Style.isCpp() &&
1087         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1088                            Keywords.kw_slots, Keywords.kw_qslots)) {
1089       nextToken();
1090       if (FormatTok->is(tok::colon)) {
1091         nextToken();
1092         addUnwrappedLine();
1093         return;
1094       }
1095     }
1096     // In all other cases, parse the declaration.
1097     break;
1098   default:
1099     break;
1100   }
1101   do {
1102     const FormatToken *Previous = FormatTok->Previous;
1103     switch (FormatTok->Tok.getKind()) {
1104     case tok::at:
1105       nextToken();
1106       if (FormatTok->Tok.is(tok::l_brace)) {
1107         nextToken();
1108         parseBracedList();
1109       }
1110       break;
1111     case tok::kw_enum:
1112       // Ignore if this is part of "template <enum ...".
1113       if (Previous && Previous->is(tok::less)) {
1114         nextToken();
1115         break;
1116       }
1117 
1118       // parseEnum falls through and does not yet add an unwrapped line as an
1119       // enum definition can start a structural element.
1120       if (!parseEnum())
1121         break;
1122       // This only applies for C++.
1123       if (!Style.isCpp()) {
1124         addUnwrappedLine();
1125         return;
1126       }
1127       break;
1128     case tok::kw_typedef:
1129       nextToken();
1130       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1131                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
1132         parseEnum();
1133       break;
1134     case tok::kw_struct:
1135     case tok::kw_union:
1136     case tok::kw_class:
1137       // parseRecord falls through and does not yet add an unwrapped line as a
1138       // record declaration or definition can start a structural element.
1139       parseRecord();
1140       // This does not apply for Java and JavaScript.
1141       if (Style.Language == FormatStyle::LK_Java ||
1142           Style.Language == FormatStyle::LK_JavaScript) {
1143         if (FormatTok->is(tok::semi))
1144           nextToken();
1145         addUnwrappedLine();
1146         return;
1147       }
1148       break;
1149     case tok::period:
1150       nextToken();
1151       // In Java, classes have an implicit static member "class".
1152       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1153           FormatTok->is(tok::kw_class))
1154         nextToken();
1155       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1156           FormatTok->Tok.getIdentifierInfo())
1157         // JavaScript only has pseudo keywords, all keywords are allowed to
1158         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1159         nextToken();
1160       break;
1161     case tok::semi:
1162       nextToken();
1163       addUnwrappedLine();
1164       return;
1165     case tok::r_brace:
1166       addUnwrappedLine();
1167       return;
1168     case tok::l_paren:
1169       parseParens();
1170       break;
1171     case tok::kw_operator:
1172       nextToken();
1173       if (FormatTok->isBinaryOperator())
1174         nextToken();
1175       break;
1176     case tok::caret:
1177       nextToken();
1178       if (FormatTok->Tok.isAnyIdentifier() ||
1179           FormatTok->isSimpleTypeSpecifier())
1180         nextToken();
1181       if (FormatTok->is(tok::l_paren))
1182         parseParens();
1183       if (FormatTok->is(tok::l_brace))
1184         parseChildBlock();
1185       break;
1186     case tok::l_brace:
1187       if (!tryToParseBracedList()) {
1188         // A block outside of parentheses must be the last part of a
1189         // structural element.
1190         // FIXME: Figure out cases where this is not true, and add projections
1191         // for them (the one we know is missing are lambdas).
1192         if (Style.BraceWrapping.AfterFunction)
1193           addUnwrappedLine();
1194         FormatTok->Type = TT_FunctionLBrace;
1195         parseBlock(/*MustBeDeclaration=*/false);
1196         addUnwrappedLine();
1197         return;
1198       }
1199       // Otherwise this was a braced init list, and the structural
1200       // element continues.
1201       break;
1202     case tok::kw_try:
1203       // We arrive here when parsing function-try blocks.
1204       parseTryCatch();
1205       return;
1206     case tok::identifier: {
1207       if (FormatTok->is(TT_MacroBlockEnd)) {
1208         addUnwrappedLine();
1209         return;
1210       }
1211 
1212       // Function declarations (as opposed to function expressions) are parsed
1213       // on their own unwrapped line by continuing this loop. Function
1214       // expressions (functions that are not on their own line) must not create
1215       // a new unwrapped line, so they are special cased below.
1216       size_t TokenCount = Line->Tokens.size();
1217       if (Style.Language == FormatStyle::LK_JavaScript &&
1218           FormatTok->is(Keywords.kw_function) &&
1219           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1220                                                      Keywords.kw_async)))) {
1221         tryToParseJSFunction();
1222         break;
1223       }
1224       if ((Style.Language == FormatStyle::LK_JavaScript ||
1225            Style.Language == FormatStyle::LK_Java) &&
1226           FormatTok->is(Keywords.kw_interface)) {
1227         if (Style.Language == FormatStyle::LK_JavaScript) {
1228           // In JavaScript/TypeScript, "interface" can be used as a standalone
1229           // identifier, e.g. in `var interface = 1;`. If "interface" is
1230           // followed by another identifier, it is very like to be an actual
1231           // interface declaration.
1232           unsigned StoredPosition = Tokens->getPosition();
1233           FormatToken *Next = Tokens->getNextToken();
1234           FormatTok = Tokens->setPosition(StoredPosition);
1235           if (Next && !mustBeJSIdent(Keywords, Next)) {
1236             nextToken();
1237             break;
1238           }
1239         }
1240         parseRecord();
1241         addUnwrappedLine();
1242         return;
1243       }
1244 
1245       // See if the following token should start a new unwrapped line.
1246       StringRef Text = FormatTok->TokenText;
1247       nextToken();
1248       if (Line->Tokens.size() == 1 &&
1249           // JS doesn't have macros, and within classes colons indicate fields,
1250           // not labels.
1251           Style.Language != FormatStyle::LK_JavaScript) {
1252         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1253           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1254           parseLabel();
1255           return;
1256         }
1257         // Recognize function-like macro usages without trailing semicolon as
1258         // well as free-standing macros like Q_OBJECT.
1259         bool FunctionLike = FormatTok->is(tok::l_paren);
1260         if (FunctionLike)
1261           parseParens();
1262 
1263         bool FollowedByNewline =
1264             CommentsBeforeNextToken.empty()
1265                 ? FormatTok->NewlinesBefore > 0
1266                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1267 
1268         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1269             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
1270           addUnwrappedLine();
1271           return;
1272         }
1273       }
1274       break;
1275     }
1276     case tok::equal:
1277       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1278       // TT_JsFatArrow. The always start an expression or a child block if
1279       // followed by a curly.
1280       if (FormatTok->is(TT_JsFatArrow)) {
1281         nextToken();
1282         if (FormatTok->is(tok::l_brace))
1283           parseChildBlock();
1284         break;
1285       }
1286 
1287       nextToken();
1288       if (FormatTok->Tok.is(tok::l_brace)) {
1289         nextToken();
1290         parseBracedList();
1291       } else if (Style.Language == FormatStyle::LK_Proto &&
1292                  FormatTok->Tok.is(tok::less)) {
1293         nextToken();
1294         parseBracedList(/*ContinueOnSemicolons=*/false,
1295                         /*ClosingBraceKind=*/tok::greater);
1296       }
1297       break;
1298     case tok::l_square:
1299       parseSquare();
1300       break;
1301     case tok::kw_new:
1302       parseNew();
1303       break;
1304     default:
1305       nextToken();
1306       break;
1307     }
1308   } while (!eof());
1309 }
1310 
1311 bool UnwrappedLineParser::tryToParseLambda() {
1312   if (!Style.isCpp()) {
1313     nextToken();
1314     return false;
1315   }
1316   assert(FormatTok->is(tok::l_square));
1317   FormatToken &LSquare = *FormatTok;
1318   if (!tryToParseLambdaIntroducer())
1319     return false;
1320 
1321   while (FormatTok->isNot(tok::l_brace)) {
1322     if (FormatTok->isSimpleTypeSpecifier()) {
1323       nextToken();
1324       continue;
1325     }
1326     switch (FormatTok->Tok.getKind()) {
1327     case tok::l_brace:
1328       break;
1329     case tok::l_paren:
1330       parseParens();
1331       break;
1332     case tok::amp:
1333     case tok::star:
1334     case tok::kw_const:
1335     case tok::comma:
1336     case tok::less:
1337     case tok::greater:
1338     case tok::identifier:
1339     case tok::numeric_constant:
1340     case tok::coloncolon:
1341     case tok::kw_mutable:
1342       nextToken();
1343       break;
1344     case tok::arrow:
1345       FormatTok->Type = TT_LambdaArrow;
1346       nextToken();
1347       break;
1348     default:
1349       return true;
1350     }
1351   }
1352   LSquare.Type = TT_LambdaLSquare;
1353   parseChildBlock();
1354   return true;
1355 }
1356 
1357 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1358   const FormatToken *Previous = FormatTok->Previous;
1359   if (Previous &&
1360       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1361                          tok::kw_delete) ||
1362        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1363        Previous->isSimpleTypeSpecifier())) {
1364     nextToken();
1365     return false;
1366   }
1367   nextToken();
1368   parseSquare(/*LambdaIntroducer=*/true);
1369   return true;
1370 }
1371 
1372 void UnwrappedLineParser::tryToParseJSFunction() {
1373   assert(FormatTok->is(Keywords.kw_function) ||
1374          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1375   if (FormatTok->is(Keywords.kw_async))
1376     nextToken();
1377   // Consume "function".
1378   nextToken();
1379 
1380   // Consume * (generator function). Treat it like C++'s overloaded operators.
1381   if (FormatTok->is(tok::star)) {
1382     FormatTok->Type = TT_OverloadedOperator;
1383     nextToken();
1384   }
1385 
1386   // Consume function name.
1387   if (FormatTok->is(tok::identifier))
1388     nextToken();
1389 
1390   if (FormatTok->isNot(tok::l_paren))
1391     return;
1392 
1393   // Parse formal parameter list.
1394   parseParens();
1395 
1396   if (FormatTok->is(tok::colon)) {
1397     // Parse a type definition.
1398     nextToken();
1399 
1400     // Eat the type declaration. For braced inline object types, balance braces,
1401     // otherwise just parse until finding an l_brace for the function body.
1402     if (FormatTok->is(tok::l_brace))
1403       tryToParseBracedList();
1404     else
1405       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1406         nextToken();
1407   }
1408 
1409   if (FormatTok->is(tok::semi))
1410     return;
1411 
1412   parseChildBlock();
1413 }
1414 
1415 bool UnwrappedLineParser::tryToParseBracedList() {
1416   if (FormatTok->BlockKind == BK_Unknown)
1417     calculateBraceTypes();
1418   assert(FormatTok->BlockKind != BK_Unknown);
1419   if (FormatTok->BlockKind == BK_Block)
1420     return false;
1421   nextToken();
1422   parseBracedList();
1423   return true;
1424 }
1425 
1426 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1427                                           tok::TokenKind ClosingBraceKind) {
1428   bool HasError = false;
1429 
1430   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1431   // replace this by using parseAssigmentExpression() inside.
1432   do {
1433     if (Style.Language == FormatStyle::LK_JavaScript) {
1434       if (FormatTok->is(Keywords.kw_function) ||
1435           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1436         tryToParseJSFunction();
1437         continue;
1438       }
1439       if (FormatTok->is(TT_JsFatArrow)) {
1440         nextToken();
1441         // Fat arrows can be followed by simple expressions or by child blocks
1442         // in curly braces.
1443         if (FormatTok->is(tok::l_brace)) {
1444           parseChildBlock();
1445           continue;
1446         }
1447       }
1448       if (FormatTok->is(tok::l_brace)) {
1449         // Could be a method inside of a braced list `{a() { return 1; }}`.
1450         if (tryToParseBracedList())
1451           continue;
1452         parseChildBlock();
1453       }
1454     }
1455     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1456       nextToken();
1457       return !HasError;
1458     }
1459     switch (FormatTok->Tok.getKind()) {
1460     case tok::caret:
1461       nextToken();
1462       if (FormatTok->is(tok::l_brace)) {
1463         parseChildBlock();
1464       }
1465       break;
1466     case tok::l_square:
1467       tryToParseLambda();
1468       break;
1469     case tok::l_paren:
1470       parseParens();
1471       // JavaScript can just have free standing methods and getters/setters in
1472       // object literals. Detect them by a "{" following ")".
1473       if (Style.Language == FormatStyle::LK_JavaScript) {
1474         if (FormatTok->is(tok::l_brace))
1475           parseChildBlock();
1476         break;
1477       }
1478       break;
1479     case tok::l_brace:
1480       // Assume there are no blocks inside a braced init list apart
1481       // from the ones we explicitly parse out (like lambdas).
1482       FormatTok->BlockKind = BK_BracedInit;
1483       nextToken();
1484       parseBracedList();
1485       break;
1486     case tok::less:
1487       if (Style.Language == FormatStyle::LK_Proto) {
1488         nextToken();
1489         parseBracedList(/*ContinueOnSemicolons=*/false,
1490                         /*ClosingBraceKind=*/tok::greater);
1491       } else {
1492         nextToken();
1493       }
1494       break;
1495     case tok::semi:
1496       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1497       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1498       // used for error recovery if we have otherwise determined that this is
1499       // a braced list.
1500       if (Style.Language == FormatStyle::LK_JavaScript) {
1501         nextToken();
1502         break;
1503       }
1504       HasError = true;
1505       if (!ContinueOnSemicolons)
1506         return !HasError;
1507       nextToken();
1508       break;
1509     case tok::comma:
1510       nextToken();
1511       break;
1512     default:
1513       nextToken();
1514       break;
1515     }
1516   } while (!eof());
1517   return false;
1518 }
1519 
1520 void UnwrappedLineParser::parseParens() {
1521   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1522   nextToken();
1523   do {
1524     switch (FormatTok->Tok.getKind()) {
1525     case tok::l_paren:
1526       parseParens();
1527       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1528         parseChildBlock();
1529       break;
1530     case tok::r_paren:
1531       nextToken();
1532       return;
1533     case tok::r_brace:
1534       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1535       return;
1536     case tok::l_square:
1537       tryToParseLambda();
1538       break;
1539     case tok::l_brace:
1540       if (!tryToParseBracedList())
1541         parseChildBlock();
1542       break;
1543     case tok::at:
1544       nextToken();
1545       if (FormatTok->Tok.is(tok::l_brace)) {
1546         nextToken();
1547         parseBracedList();
1548       }
1549       break;
1550     case tok::kw_class:
1551       if (Style.Language == FormatStyle::LK_JavaScript)
1552         parseRecord(/*ParseAsExpr=*/true);
1553       else
1554         nextToken();
1555       break;
1556     case tok::identifier:
1557       if (Style.Language == FormatStyle::LK_JavaScript &&
1558           (FormatTok->is(Keywords.kw_function) ||
1559            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
1560         tryToParseJSFunction();
1561       else
1562         nextToken();
1563       break;
1564     default:
1565       nextToken();
1566       break;
1567     }
1568   } while (!eof());
1569 }
1570 
1571 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
1572   if (!LambdaIntroducer) {
1573     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1574     if (tryToParseLambda())
1575       return;
1576   }
1577   do {
1578     switch (FormatTok->Tok.getKind()) {
1579     case tok::l_paren:
1580       parseParens();
1581       break;
1582     case tok::r_square:
1583       nextToken();
1584       return;
1585     case tok::r_brace:
1586       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1587       return;
1588     case tok::l_square:
1589       parseSquare();
1590       break;
1591     case tok::l_brace: {
1592       if (!tryToParseBracedList())
1593         parseChildBlock();
1594       break;
1595     }
1596     case tok::at:
1597       nextToken();
1598       if (FormatTok->Tok.is(tok::l_brace)) {
1599         nextToken();
1600         parseBracedList();
1601       }
1602       break;
1603     default:
1604       nextToken();
1605       break;
1606     }
1607   } while (!eof());
1608 }
1609 
1610 void UnwrappedLineParser::parseIfThenElse() {
1611   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1612   nextToken();
1613   if (FormatTok->Tok.is(tok::kw_constexpr))
1614     nextToken();
1615   if (FormatTok->Tok.is(tok::l_paren))
1616     parseParens();
1617   bool NeedsUnwrappedLine = false;
1618   if (FormatTok->Tok.is(tok::l_brace)) {
1619     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1620     parseBlock(/*MustBeDeclaration=*/false);
1621     if (Style.BraceWrapping.BeforeElse)
1622       addUnwrappedLine();
1623     else
1624       NeedsUnwrappedLine = true;
1625   } else {
1626     addUnwrappedLine();
1627     ++Line->Level;
1628     parseStructuralElement();
1629     --Line->Level;
1630   }
1631   if (FormatTok->Tok.is(tok::kw_else)) {
1632     nextToken();
1633     if (FormatTok->Tok.is(tok::l_brace)) {
1634       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1635       parseBlock(/*MustBeDeclaration=*/false);
1636       addUnwrappedLine();
1637     } else if (FormatTok->Tok.is(tok::kw_if)) {
1638       parseIfThenElse();
1639     } else {
1640       addUnwrappedLine();
1641       ++Line->Level;
1642       parseStructuralElement();
1643       if (FormatTok->is(tok::eof))
1644         addUnwrappedLine();
1645       --Line->Level;
1646     }
1647   } else if (NeedsUnwrappedLine) {
1648     addUnwrappedLine();
1649   }
1650 }
1651 
1652 void UnwrappedLineParser::parseTryCatch() {
1653   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1654   nextToken();
1655   bool NeedsUnwrappedLine = false;
1656   if (FormatTok->is(tok::colon)) {
1657     // We are in a function try block, what comes is an initializer list.
1658     nextToken();
1659     while (FormatTok->is(tok::identifier)) {
1660       nextToken();
1661       if (FormatTok->is(tok::l_paren))
1662         parseParens();
1663       if (FormatTok->is(tok::comma))
1664         nextToken();
1665     }
1666   }
1667   // Parse try with resource.
1668   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1669     parseParens();
1670   }
1671   if (FormatTok->is(tok::l_brace)) {
1672     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1673     parseBlock(/*MustBeDeclaration=*/false);
1674     if (Style.BraceWrapping.BeforeCatch) {
1675       addUnwrappedLine();
1676     } else {
1677       NeedsUnwrappedLine = true;
1678     }
1679   } else if (!FormatTok->is(tok::kw_catch)) {
1680     // The C++ standard requires a compound-statement after a try.
1681     // If there's none, we try to assume there's a structuralElement
1682     // and try to continue.
1683     addUnwrappedLine();
1684     ++Line->Level;
1685     parseStructuralElement();
1686     --Line->Level;
1687   }
1688   while (1) {
1689     if (FormatTok->is(tok::at))
1690       nextToken();
1691     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1692                              tok::kw___finally) ||
1693           ((Style.Language == FormatStyle::LK_Java ||
1694             Style.Language == FormatStyle::LK_JavaScript) &&
1695            FormatTok->is(Keywords.kw_finally)) ||
1696           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1697            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1698       break;
1699     nextToken();
1700     while (FormatTok->isNot(tok::l_brace)) {
1701       if (FormatTok->is(tok::l_paren)) {
1702         parseParens();
1703         continue;
1704       }
1705       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1706         return;
1707       nextToken();
1708     }
1709     NeedsUnwrappedLine = false;
1710     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1711     parseBlock(/*MustBeDeclaration=*/false);
1712     if (Style.BraceWrapping.BeforeCatch)
1713       addUnwrappedLine();
1714     else
1715       NeedsUnwrappedLine = true;
1716   }
1717   if (NeedsUnwrappedLine)
1718     addUnwrappedLine();
1719 }
1720 
1721 void UnwrappedLineParser::parseNamespace() {
1722   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1723 
1724   const FormatToken &InitialToken = *FormatTok;
1725   nextToken();
1726   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1727     nextToken();
1728   if (FormatTok->Tok.is(tok::l_brace)) {
1729     if (ShouldBreakBeforeBrace(Style, InitialToken))
1730       addUnwrappedLine();
1731 
1732     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1733                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1734                      DeclarationScopeStack.size() > 1);
1735     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1736     // Munch the semicolon after a namespace. This is more common than one would
1737     // think. Puttin the semicolon into its own line is very ugly.
1738     if (FormatTok->Tok.is(tok::semi))
1739       nextToken();
1740     addUnwrappedLine();
1741   }
1742   // FIXME: Add error handling.
1743 }
1744 
1745 void UnwrappedLineParser::parseNew() {
1746   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1747   nextToken();
1748   if (Style.Language != FormatStyle::LK_Java)
1749     return;
1750 
1751   // In Java, we can parse everything up to the parens, which aren't optional.
1752   do {
1753     // There should not be a ;, { or } before the new's open paren.
1754     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1755       return;
1756 
1757     // Consume the parens.
1758     if (FormatTok->is(tok::l_paren)) {
1759       parseParens();
1760 
1761       // If there is a class body of an anonymous class, consume that as child.
1762       if (FormatTok->is(tok::l_brace))
1763         parseChildBlock();
1764       return;
1765     }
1766     nextToken();
1767   } while (!eof());
1768 }
1769 
1770 void UnwrappedLineParser::parseForOrWhileLoop() {
1771   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1772          "'for', 'while' or foreach macro expected");
1773   nextToken();
1774   // JS' for await ( ...
1775   if (Style.Language == FormatStyle::LK_JavaScript &&
1776       FormatTok->is(Keywords.kw_await))
1777     nextToken();
1778   if (FormatTok->Tok.is(tok::l_paren))
1779     parseParens();
1780   if (FormatTok->Tok.is(tok::l_brace)) {
1781     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1782     parseBlock(/*MustBeDeclaration=*/false);
1783     addUnwrappedLine();
1784   } else {
1785     addUnwrappedLine();
1786     ++Line->Level;
1787     parseStructuralElement();
1788     --Line->Level;
1789   }
1790 }
1791 
1792 void UnwrappedLineParser::parseDoWhile() {
1793   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1794   nextToken();
1795   if (FormatTok->Tok.is(tok::l_brace)) {
1796     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1797     parseBlock(/*MustBeDeclaration=*/false);
1798     if (Style.BraceWrapping.IndentBraces)
1799       addUnwrappedLine();
1800   } else {
1801     addUnwrappedLine();
1802     ++Line->Level;
1803     parseStructuralElement();
1804     --Line->Level;
1805   }
1806 
1807   // FIXME: Add error handling.
1808   if (!FormatTok->Tok.is(tok::kw_while)) {
1809     addUnwrappedLine();
1810     return;
1811   }
1812 
1813   nextToken();
1814   parseStructuralElement();
1815 }
1816 
1817 void UnwrappedLineParser::parseLabel() {
1818   nextToken();
1819   unsigned OldLineLevel = Line->Level;
1820   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1821     --Line->Level;
1822   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1823     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1824     parseBlock(/*MustBeDeclaration=*/false);
1825     if (FormatTok->Tok.is(tok::kw_break)) {
1826       if (Style.BraceWrapping.AfterControlStatement)
1827         addUnwrappedLine();
1828       parseStructuralElement();
1829     }
1830     addUnwrappedLine();
1831   } else {
1832     if (FormatTok->is(tok::semi))
1833       nextToken();
1834     addUnwrappedLine();
1835   }
1836   Line->Level = OldLineLevel;
1837   if (FormatTok->isNot(tok::l_brace)) {
1838     parseStructuralElement();
1839     addUnwrappedLine();
1840   }
1841 }
1842 
1843 void UnwrappedLineParser::parseCaseLabel() {
1844   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1845   // FIXME: fix handling of complex expressions here.
1846   do {
1847     nextToken();
1848   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1849   parseLabel();
1850 }
1851 
1852 void UnwrappedLineParser::parseSwitch() {
1853   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1854   nextToken();
1855   if (FormatTok->Tok.is(tok::l_paren))
1856     parseParens();
1857   if (FormatTok->Tok.is(tok::l_brace)) {
1858     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1859     parseBlock(/*MustBeDeclaration=*/false);
1860     addUnwrappedLine();
1861   } else {
1862     addUnwrappedLine();
1863     ++Line->Level;
1864     parseStructuralElement();
1865     --Line->Level;
1866   }
1867 }
1868 
1869 void UnwrappedLineParser::parseAccessSpecifier() {
1870   nextToken();
1871   // Understand Qt's slots.
1872   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1873     nextToken();
1874   // Otherwise, we don't know what it is, and we'd better keep the next token.
1875   if (FormatTok->Tok.is(tok::colon))
1876     nextToken();
1877   addUnwrappedLine();
1878 }
1879 
1880 bool UnwrappedLineParser::parseEnum() {
1881   // Won't be 'enum' for NS_ENUMs.
1882   if (FormatTok->Tok.is(tok::kw_enum))
1883     nextToken();
1884 
1885   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1886   // declarations. An "enum" keyword followed by a colon would be a syntax
1887   // error and thus assume it is just an identifier.
1888   if (Style.Language == FormatStyle::LK_JavaScript &&
1889       FormatTok->isOneOf(tok::colon, tok::question))
1890     return false;
1891 
1892   // Eat up enum class ...
1893   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1894     nextToken();
1895 
1896   while (FormatTok->Tok.getIdentifierInfo() ||
1897          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1898                             tok::greater, tok::comma, tok::question)) {
1899     nextToken();
1900     // We can have macros or attributes in between 'enum' and the enum name.
1901     if (FormatTok->is(tok::l_paren))
1902       parseParens();
1903     if (FormatTok->is(tok::identifier)) {
1904       nextToken();
1905       // If there are two identifiers in a row, this is likely an elaborate
1906       // return type. In Java, this can be "implements", etc.
1907       if (Style.isCpp() && FormatTok->is(tok::identifier))
1908         return false;
1909     }
1910   }
1911 
1912   // Just a declaration or something is wrong.
1913   if (FormatTok->isNot(tok::l_brace))
1914     return true;
1915   FormatTok->BlockKind = BK_Block;
1916 
1917   if (Style.Language == FormatStyle::LK_Java) {
1918     // Java enums are different.
1919     parseJavaEnumBody();
1920     return true;
1921   }
1922   if (Style.Language == FormatStyle::LK_Proto) {
1923     parseBlock(/*MustBeDeclaration=*/true);
1924     return true;
1925   }
1926 
1927   // Parse enum body.
1928   nextToken();
1929   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1930   if (HasError) {
1931     if (FormatTok->is(tok::semi))
1932       nextToken();
1933     addUnwrappedLine();
1934   }
1935   return true;
1936 
1937   // There is no addUnwrappedLine() here so that we fall through to parsing a
1938   // structural element afterwards. Thus, in "enum A {} n, m;",
1939   // "} n, m;" will end up in one unwrapped line.
1940 }
1941 
1942 void UnwrappedLineParser::parseJavaEnumBody() {
1943   // Determine whether the enum is simple, i.e. does not have a semicolon or
1944   // constants with class bodies. Simple enums can be formatted like braced
1945   // lists, contracted to a single line, etc.
1946   unsigned StoredPosition = Tokens->getPosition();
1947   bool IsSimple = true;
1948   FormatToken *Tok = Tokens->getNextToken();
1949   while (Tok) {
1950     if (Tok->is(tok::r_brace))
1951       break;
1952     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1953       IsSimple = false;
1954       break;
1955     }
1956     // FIXME: This will also mark enums with braces in the arguments to enum
1957     // constants as "not simple". This is probably fine in practice, though.
1958     Tok = Tokens->getNextToken();
1959   }
1960   FormatTok = Tokens->setPosition(StoredPosition);
1961 
1962   if (IsSimple) {
1963     nextToken();
1964     parseBracedList();
1965     addUnwrappedLine();
1966     return;
1967   }
1968 
1969   // Parse the body of a more complex enum.
1970   // First add a line for everything up to the "{".
1971   nextToken();
1972   addUnwrappedLine();
1973   ++Line->Level;
1974 
1975   // Parse the enum constants.
1976   while (FormatTok) {
1977     if (FormatTok->is(tok::l_brace)) {
1978       // Parse the constant's class body.
1979       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1980                  /*MunchSemi=*/false);
1981     } else if (FormatTok->is(tok::l_paren)) {
1982       parseParens();
1983     } else if (FormatTok->is(tok::comma)) {
1984       nextToken();
1985       addUnwrappedLine();
1986     } else if (FormatTok->is(tok::semi)) {
1987       nextToken();
1988       addUnwrappedLine();
1989       break;
1990     } else if (FormatTok->is(tok::r_brace)) {
1991       addUnwrappedLine();
1992       break;
1993     } else {
1994       nextToken();
1995     }
1996   }
1997 
1998   // Parse the class body after the enum's ";" if any.
1999   parseLevel(/*HasOpeningBrace=*/true);
2000   nextToken();
2001   --Line->Level;
2002   addUnwrappedLine();
2003 }
2004 
2005 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2006   const FormatToken &InitialToken = *FormatTok;
2007   nextToken();
2008 
2009   // The actual identifier can be a nested name specifier, and in macros
2010   // it is often token-pasted.
2011   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2012                             tok::kw___attribute, tok::kw___declspec,
2013                             tok::kw_alignas) ||
2014          ((Style.Language == FormatStyle::LK_Java ||
2015            Style.Language == FormatStyle::LK_JavaScript) &&
2016           FormatTok->isOneOf(tok::period, tok::comma))) {
2017     if (Style.Language == FormatStyle::LK_JavaScript &&
2018         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2019       // JavaScript/TypeScript supports inline object types in
2020       // extends/implements positions:
2021       //     class Foo implements {bar: number} { }
2022       nextToken();
2023       if (FormatTok->is(tok::l_brace)) {
2024         tryToParseBracedList();
2025         continue;
2026       }
2027     }
2028     bool IsNonMacroIdentifier =
2029         FormatTok->is(tok::identifier) &&
2030         FormatTok->TokenText != FormatTok->TokenText.upper();
2031     nextToken();
2032     // We can have macros or attributes in between 'class' and the class name.
2033     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
2034       parseParens();
2035   }
2036 
2037   // Note that parsing away template declarations here leads to incorrectly
2038   // accepting function declarations as record declarations.
2039   // In general, we cannot solve this problem. Consider:
2040   // class A<int> B() {}
2041   // which can be a function definition or a class definition when B() is a
2042   // macro. If we find enough real-world cases where this is a problem, we
2043   // can parse for the 'template' keyword in the beginning of the statement,
2044   // and thus rule out the record production in case there is no template
2045   // (this would still leave us with an ambiguity between template function
2046   // and class declarations).
2047   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2048     while (!eof()) {
2049       if (FormatTok->is(tok::l_brace)) {
2050         calculateBraceTypes(/*ExpectClassBody=*/true);
2051         if (!tryToParseBracedList())
2052           break;
2053       }
2054       if (FormatTok->Tok.is(tok::semi))
2055         return;
2056       nextToken();
2057     }
2058   }
2059   if (FormatTok->Tok.is(tok::l_brace)) {
2060     if (ParseAsExpr) {
2061       parseChildBlock();
2062     } else {
2063       if (ShouldBreakBeforeBrace(Style, InitialToken))
2064         addUnwrappedLine();
2065 
2066       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
2067                  /*MunchSemi=*/false);
2068     }
2069   }
2070   // There is no addUnwrappedLine() here so that we fall through to parsing a
2071   // structural element afterwards. Thus, in "class A {} n, m;",
2072   // "} n, m;" will end up in one unwrapped line.
2073 }
2074 
2075 void UnwrappedLineParser::parseObjCProtocolList() {
2076   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2077   do
2078     nextToken();
2079   while (!eof() && FormatTok->Tok.isNot(tok::greater));
2080   nextToken(); // Skip '>'.
2081 }
2082 
2083 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2084   do {
2085     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2086       nextToken();
2087       addUnwrappedLine();
2088       break;
2089     }
2090     if (FormatTok->is(tok::l_brace)) {
2091       parseBlock(/*MustBeDeclaration=*/false);
2092       // In ObjC interfaces, nothing should be following the "}".
2093       addUnwrappedLine();
2094     } else if (FormatTok->is(tok::r_brace)) {
2095       // Ignore stray "}". parseStructuralElement doesn't consume them.
2096       nextToken();
2097       addUnwrappedLine();
2098     } else {
2099       parseStructuralElement();
2100     }
2101   } while (!eof());
2102 }
2103 
2104 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2105   nextToken();
2106   nextToken(); // interface name
2107 
2108   // @interface can be followed by either a base class, or a category.
2109   if (FormatTok->Tok.is(tok::colon)) {
2110     nextToken();
2111     nextToken(); // base class name
2112   } else if (FormatTok->Tok.is(tok::l_paren))
2113     // Skip category, if present.
2114     parseParens();
2115 
2116   if (FormatTok->Tok.is(tok::less))
2117     parseObjCProtocolList();
2118 
2119   if (FormatTok->Tok.is(tok::l_brace)) {
2120     if (Style.BraceWrapping.AfterObjCDeclaration)
2121       addUnwrappedLine();
2122     parseBlock(/*MustBeDeclaration=*/true);
2123   }
2124 
2125   // With instance variables, this puts '}' on its own line.  Without instance
2126   // variables, this ends the @interface line.
2127   addUnwrappedLine();
2128 
2129   parseObjCUntilAtEnd();
2130 }
2131 
2132 void UnwrappedLineParser::parseObjCProtocol() {
2133   nextToken();
2134   nextToken(); // protocol name
2135 
2136   if (FormatTok->Tok.is(tok::less))
2137     parseObjCProtocolList();
2138 
2139   // Check for protocol declaration.
2140   if (FormatTok->Tok.is(tok::semi)) {
2141     nextToken();
2142     return addUnwrappedLine();
2143   }
2144 
2145   addUnwrappedLine();
2146   parseObjCUntilAtEnd();
2147 }
2148 
2149 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2150   bool IsImport = FormatTok->is(Keywords.kw_import);
2151   assert(IsImport || FormatTok->is(tok::kw_export));
2152   nextToken();
2153 
2154   // Consume the "default" in "export default class/function".
2155   if (FormatTok->is(tok::kw_default))
2156     nextToken();
2157 
2158   // Consume "async function", "function" and "default function", so that these
2159   // get parsed as free-standing JS functions, i.e. do not require a trailing
2160   // semicolon.
2161   if (FormatTok->is(Keywords.kw_async))
2162     nextToken();
2163   if (FormatTok->is(Keywords.kw_function)) {
2164     nextToken();
2165     return;
2166   }
2167 
2168   // For imports, `export *`, `export {...}`, consume the rest of the line up
2169   // to the terminating `;`. For everything else, just return and continue
2170   // parsing the structural element, i.e. the declaration or expression for
2171   // `export default`.
2172   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2173       !FormatTok->isStringLiteral())
2174     return;
2175 
2176   while (!eof()) {
2177     if (FormatTok->is(tok::semi))
2178       return;
2179     if (Line->Tokens.size() == 0) {
2180       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2181       // import statement should terminate.
2182       return;
2183     }
2184     if (FormatTok->is(tok::l_brace)) {
2185       FormatTok->BlockKind = BK_Block;
2186       nextToken();
2187       parseBracedList();
2188     } else {
2189       nextToken();
2190     }
2191   }
2192 }
2193 
2194 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
2195                                                  StringRef Prefix = "") {
2196   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
2197                << (Line.InPPDirective ? " MACRO" : "") << ": ";
2198   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2199                                                     E = Line.Tokens.end();
2200        I != E; ++I) {
2201     llvm::dbgs() << I->Tok->Tok.getName() << "["
2202                  << "T=" << I->Tok->Type << ", OC=" << I->Tok->OriginalColumn
2203                  << "] ";
2204   }
2205   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
2206                                                     E = Line.Tokens.end();
2207        I != E; ++I) {
2208     const UnwrappedLineNode &Node = *I;
2209     for (SmallVectorImpl<UnwrappedLine>::const_iterator
2210              I = Node.Children.begin(),
2211              E = Node.Children.end();
2212          I != E; ++I) {
2213       printDebugInfo(*I, "\nChild: ");
2214     }
2215   }
2216   llvm::dbgs() << "\n";
2217 }
2218 
2219 void UnwrappedLineParser::addUnwrappedLine() {
2220   if (Line->Tokens.empty())
2221     return;
2222   DEBUG({
2223     if (CurrentLines == &Lines)
2224       printDebugInfo(*Line);
2225   });
2226   CurrentLines->push_back(std::move(*Line));
2227   Line->Tokens.clear();
2228   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
2229   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
2230     CurrentLines->append(
2231         std::make_move_iterator(PreprocessorDirectives.begin()),
2232         std::make_move_iterator(PreprocessorDirectives.end()));
2233     PreprocessorDirectives.clear();
2234   }
2235   // Disconnect the current token from the last token on the previous line.
2236   FormatTok->Previous = nullptr;
2237 }
2238 
2239 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
2240 
2241 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
2242   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
2243          FormatTok.NewlinesBefore > 0;
2244 }
2245 
2246 // Checks if \p FormatTok is a line comment that continues the line comment
2247 // section on \p Line.
2248 static bool continuesLineCommentSection(const FormatToken &FormatTok,
2249                                         const UnwrappedLine &Line,
2250                                         llvm::Regex &CommentPragmasRegex) {
2251   if (Line.Tokens.empty())
2252     return false;
2253 
2254   StringRef IndentContent = FormatTok.TokenText;
2255   if (FormatTok.TokenText.startswith("//") ||
2256       FormatTok.TokenText.startswith("/*"))
2257     IndentContent = FormatTok.TokenText.substr(2);
2258   if (CommentPragmasRegex.match(IndentContent))
2259     return false;
2260 
2261   // If Line starts with a line comment, then FormatTok continues the comment
2262   // section if its original column is greater or equal to the original start
2263   // column of the line.
2264   //
2265   // Define the min column token of a line as follows: if a line ends in '{' or
2266   // contains a '{' followed by a line comment, then the min column token is
2267   // that '{'. Otherwise, the min column token of the line is the first token of
2268   // the line.
2269   //
2270   // If Line starts with a token other than a line comment, then FormatTok
2271   // continues the comment section if its original column is greater than the
2272   // original start column of the min column token of the line.
2273   //
2274   // For example, the second line comment continues the first in these cases:
2275   //
2276   // // first line
2277   // // second line
2278   //
2279   // and:
2280   //
2281   // // first line
2282   //  // second line
2283   //
2284   // and:
2285   //
2286   // int i; // first line
2287   //  // second line
2288   //
2289   // and:
2290   //
2291   // do { // first line
2292   //      // second line
2293   //   int i;
2294   // } while (true);
2295   //
2296   // and:
2297   //
2298   // enum {
2299   //   a, // first line
2300   //    // second line
2301   //   b
2302   // };
2303   //
2304   // The second line comment doesn't continue the first in these cases:
2305   //
2306   //   // first line
2307   //  // second line
2308   //
2309   // and:
2310   //
2311   // int i; // first line
2312   // // second line
2313   //
2314   // and:
2315   //
2316   // do { // first line
2317   //   // second line
2318   //   int i;
2319   // } while (true);
2320   //
2321   // and:
2322   //
2323   // enum {
2324   //   a, // first line
2325   //   // second line
2326   // };
2327   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
2328 
2329   // Scan for '{//'. If found, use the column of '{' as a min column for line
2330   // comment section continuation.
2331   const FormatToken *PreviousToken = nullptr;
2332   for (const UnwrappedLineNode &Node : Line.Tokens) {
2333     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
2334         isLineComment(*Node.Tok)) {
2335       MinColumnToken = PreviousToken;
2336       break;
2337     }
2338     PreviousToken = Node.Tok;
2339 
2340     // Grab the last newline preceding a token in this unwrapped line.
2341     if (Node.Tok->NewlinesBefore > 0) {
2342       MinColumnToken = Node.Tok;
2343     }
2344   }
2345   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
2346     MinColumnToken = PreviousToken;
2347   }
2348 
2349   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
2350                               MinColumnToken);
2351 }
2352 
2353 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
2354   bool JustComments = Line->Tokens.empty();
2355   for (SmallVectorImpl<FormatToken *>::const_iterator
2356            I = CommentsBeforeNextToken.begin(),
2357            E = CommentsBeforeNextToken.end();
2358        I != E; ++I) {
2359     // Line comments that belong to the same line comment section are put on the
2360     // same line since later we might want to reflow content between them.
2361     // Additional fine-grained breaking of line comment sections is controlled
2362     // by the class BreakableLineCommentSection in case it is desirable to keep
2363     // several line comment sections in the same unwrapped line.
2364     //
2365     // FIXME: Consider putting separate line comment sections as children to the
2366     // unwrapped line instead.
2367     (*I)->ContinuesLineCommentSection =
2368         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
2369     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
2370       addUnwrappedLine();
2371     pushToken(*I);
2372   }
2373   if (NewlineBeforeNext && JustComments)
2374     addUnwrappedLine();
2375   CommentsBeforeNextToken.clear();
2376 }
2377 
2378 void UnwrappedLineParser::nextToken(int LevelDifference) {
2379   if (eof())
2380     return;
2381   flushComments(isOnNewLine(*FormatTok));
2382   pushToken(FormatTok);
2383   FormatToken *Previous = FormatTok;
2384   if (Style.Language != FormatStyle::LK_JavaScript)
2385     readToken(LevelDifference);
2386   else
2387     readTokenWithJavaScriptASI();
2388   FormatTok->Previous = Previous;
2389 }
2390 
2391 void UnwrappedLineParser::distributeComments(
2392     const SmallVectorImpl<FormatToken *> &Comments,
2393     const FormatToken *NextTok) {
2394   // Whether or not a line comment token continues a line is controlled by
2395   // the method continuesLineCommentSection, with the following caveat:
2396   //
2397   // Define a trail of Comments to be a nonempty proper postfix of Comments such
2398   // that each comment line from the trail is aligned with the next token, if
2399   // the next token exists. If a trail exists, the beginning of the maximal
2400   // trail is marked as a start of a new comment section.
2401   //
2402   // For example in this code:
2403   //
2404   // int a; // line about a
2405   //   // line 1 about b
2406   //   // line 2 about b
2407   //   int b;
2408   //
2409   // the two lines about b form a maximal trail, so there are two sections, the
2410   // first one consisting of the single comment "// line about a" and the
2411   // second one consisting of the next two comments.
2412   if (Comments.empty())
2413     return;
2414   bool ShouldPushCommentsInCurrentLine = true;
2415   bool HasTrailAlignedWithNextToken = false;
2416   unsigned StartOfTrailAlignedWithNextToken = 0;
2417   if (NextTok) {
2418     // We are skipping the first element intentionally.
2419     for (unsigned i = Comments.size() - 1; i > 0; --i) {
2420       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
2421         HasTrailAlignedWithNextToken = true;
2422         StartOfTrailAlignedWithNextToken = i;
2423       }
2424     }
2425   }
2426   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
2427     FormatToken *FormatTok = Comments[i];
2428     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
2429       FormatTok->ContinuesLineCommentSection = false;
2430     } else {
2431       FormatTok->ContinuesLineCommentSection =
2432           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
2433     }
2434     if (!FormatTok->ContinuesLineCommentSection &&
2435         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
2436       ShouldPushCommentsInCurrentLine = false;
2437     }
2438     if (ShouldPushCommentsInCurrentLine) {
2439       pushToken(FormatTok);
2440     } else {
2441       CommentsBeforeNextToken.push_back(FormatTok);
2442     }
2443   }
2444 }
2445 
2446 void UnwrappedLineParser::readToken(int LevelDifference) {
2447   SmallVector<FormatToken *, 1> Comments;
2448   do {
2449     FormatTok = Tokens->getNextToken();
2450     assert(FormatTok);
2451     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
2452            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
2453       distributeComments(Comments, FormatTok);
2454       Comments.clear();
2455       // If there is an unfinished unwrapped line, we flush the preprocessor
2456       // directives only after that unwrapped line was finished later.
2457       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
2458       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
2459       assert((LevelDifference >= 0 ||
2460               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
2461              "LevelDifference makes Line->Level negative");
2462       Line->Level += LevelDifference;
2463       // Comments stored before the preprocessor directive need to be output
2464       // before the preprocessor directive, at the same level as the
2465       // preprocessor directive, as we consider them to apply to the directive.
2466       flushComments(isOnNewLine(*FormatTok));
2467       parsePPDirective();
2468     }
2469     while (FormatTok->Type == TT_ConflictStart ||
2470            FormatTok->Type == TT_ConflictEnd ||
2471            FormatTok->Type == TT_ConflictAlternative) {
2472       if (FormatTok->Type == TT_ConflictStart) {
2473         conditionalCompilationStart(/*Unreachable=*/false);
2474       } else if (FormatTok->Type == TT_ConflictAlternative) {
2475         conditionalCompilationAlternative();
2476       } else if (FormatTok->Type == TT_ConflictEnd) {
2477         conditionalCompilationEnd();
2478       }
2479       FormatTok = Tokens->getNextToken();
2480       FormatTok->MustBreakBefore = true;
2481     }
2482 
2483     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
2484         !Line->InPPDirective) {
2485       continue;
2486     }
2487 
2488     if (!FormatTok->Tok.is(tok::comment)) {
2489       distributeComments(Comments, FormatTok);
2490       Comments.clear();
2491       return;
2492     }
2493 
2494     Comments.push_back(FormatTok);
2495   } while (!eof());
2496 
2497   distributeComments(Comments, nullptr);
2498   Comments.clear();
2499 }
2500 
2501 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
2502   Line->Tokens.push_back(UnwrappedLineNode(Tok));
2503   if (MustBreakBeforeNextToken) {
2504     Line->Tokens.back().Tok->MustBreakBefore = true;
2505     MustBreakBeforeNextToken = false;
2506   }
2507 }
2508 
2509 } // end namespace format
2510 } // end namespace clang
2511