1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31 
32   // Returns the next token in the token stream.
33   virtual FormatToken *getNextToken() = 0;
34 
35   // Returns the token preceding the token returned by the last call to
36   // getNextToken() in the token stream, or nullptr if no such token exists.
37   virtual FormatToken *getPreviousToken() = 0;
38 
39   // Returns the token that would be returned by the next call to
40   // getNextToken().
41   virtual FormatToken *peekNextToken() = 0;
42 
43   // Returns whether we are at the end of the file.
44   // This can be different from whether getNextToken() returned an eof token
45   // when the FormatTokenSource is a view on a part of the token stream.
46   virtual bool isEOF() = 0;
47 
48   // Gets the current position in the token stream, to be used by setPosition().
49   virtual unsigned getPosition() = 0;
50 
51   // Resets the token stream to the state it was in when getPosition() returned
52   // Position, and return the token at that position in the stream.
53   virtual FormatToken *setPosition(unsigned Position) = 0;
54 };
55 
56 namespace {
57 
58 class ScopedDeclarationState {
59 public:
60   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
61                          bool MustBeDeclaration)
62       : Line(Line), Stack(Stack) {
63     Line.MustBeDeclaration = MustBeDeclaration;
64     Stack.push_back(MustBeDeclaration);
65   }
66   ~ScopedDeclarationState() {
67     Stack.pop_back();
68     if (!Stack.empty())
69       Line.MustBeDeclaration = Stack.back();
70     else
71       Line.MustBeDeclaration = true;
72   }
73 
74 private:
75   UnwrappedLine &Line;
76   llvm::BitVector &Stack;
77 };
78 
79 static bool isLineComment(const FormatToken &FormatTok) {
80   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
81 }
82 
83 // Checks if \p FormatTok is a line comment that continues the line comment
84 // \p Previous. The original column of \p MinColumnToken is used to determine
85 // whether \p FormatTok is indented enough to the right to continue \p Previous.
86 static bool continuesLineComment(const FormatToken &FormatTok,
87                                  const FormatToken *Previous,
88                                  const FormatToken *MinColumnToken) {
89   if (!Previous || !MinColumnToken)
90     return false;
91   unsigned MinContinueColumn =
92       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
93   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
94          isLineComment(*Previous) &&
95          FormatTok.OriginalColumn >= MinContinueColumn;
96 }
97 
98 class ScopedMacroState : public FormatTokenSource {
99 public:
100   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
101                    FormatToken *&ResetToken)
102       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
103         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
104         Token(nullptr), PreviousToken(nullptr) {
105     FakeEOF.Tok.startToken();
106     FakeEOF.Tok.setKind(tok::eof);
107     TokenSource = this;
108     Line.Level = 0;
109     Line.InPPDirective = true;
110   }
111 
112   ~ScopedMacroState() override {
113     TokenSource = PreviousTokenSource;
114     ResetToken = Token;
115     Line.InPPDirective = false;
116     Line.Level = PreviousLineLevel;
117   }
118 
119   FormatToken *getNextToken() override {
120     // The \c UnwrappedLineParser guards against this by never calling
121     // \c getNextToken() after it has encountered the first eof token.
122     assert(!eof());
123     PreviousToken = Token;
124     Token = PreviousTokenSource->getNextToken();
125     if (eof())
126       return &FakeEOF;
127     return Token;
128   }
129 
130   FormatToken *getPreviousToken() override {
131     return PreviousTokenSource->getPreviousToken();
132   }
133 
134   FormatToken *peekNextToken() override {
135     if (eof())
136       return &FakeEOF;
137     return PreviousTokenSource->peekNextToken();
138   }
139 
140   bool isEOF() override { return PreviousTokenSource->isEOF(); }
141 
142   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
143 
144   FormatToken *setPosition(unsigned Position) override {
145     PreviousToken = nullptr;
146     Token = PreviousTokenSource->setPosition(Position);
147     return Token;
148   }
149 
150 private:
151   bool eof() {
152     return Token && Token->HasUnescapedNewline &&
153            !continuesLineComment(*Token, PreviousToken,
154                                  /*MinColumnToken=*/PreviousToken);
155   }
156 
157   FormatToken FakeEOF;
158   UnwrappedLine &Line;
159   FormatTokenSource *&TokenSource;
160   FormatToken *&ResetToken;
161   unsigned PreviousLineLevel;
162   FormatTokenSource *PreviousTokenSource;
163 
164   FormatToken *Token;
165   FormatToken *PreviousToken;
166 };
167 
168 } // end anonymous namespace
169 
170 class ScopedLineState {
171 public:
172   ScopedLineState(UnwrappedLineParser &Parser,
173                   bool SwitchToPreprocessorLines = false)
174       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
175     if (SwitchToPreprocessorLines)
176       Parser.CurrentLines = &Parser.PreprocessorDirectives;
177     else if (!Parser.Line->Tokens.empty())
178       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
179     PreBlockLine = std::move(Parser.Line);
180     Parser.Line = std::make_unique<UnwrappedLine>();
181     Parser.Line->Level = PreBlockLine->Level;
182     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
183   }
184 
185   ~ScopedLineState() {
186     if (!Parser.Line->Tokens.empty())
187       Parser.addUnwrappedLine();
188     assert(Parser.Line->Tokens.empty());
189     Parser.Line = std::move(PreBlockLine);
190     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
191       Parser.MustBreakBeforeNextToken = true;
192     Parser.CurrentLines = OriginalLines;
193   }
194 
195 private:
196   UnwrappedLineParser &Parser;
197 
198   std::unique_ptr<UnwrappedLine> PreBlockLine;
199   SmallVectorImpl<UnwrappedLine> *OriginalLines;
200 };
201 
202 class CompoundStatementIndenter {
203 public:
204   CompoundStatementIndenter(UnwrappedLineParser *Parser,
205                             const FormatStyle &Style, unsigned &LineLevel)
206       : CompoundStatementIndenter(Parser, LineLevel,
207                                   Style.BraceWrapping.AfterControlStatement,
208                                   Style.BraceWrapping.IndentBraces) {}
209   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
210                             bool WrapBrace, bool IndentBrace)
211       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
212     if (WrapBrace)
213       Parser->addUnwrappedLine();
214     if (IndentBrace)
215       ++LineLevel;
216   }
217   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
218 
219 private:
220   unsigned &LineLevel;
221   unsigned OldLineLevel;
222 };
223 
224 namespace {
225 
226 class IndexedTokenSource : public FormatTokenSource {
227 public:
228   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
229       : Tokens(Tokens), Position(-1) {}
230 
231   FormatToken *getNextToken() override {
232     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
233       LLVM_DEBUG({
234         llvm::dbgs() << "Next ";
235         dbgToken(Position);
236       });
237       return Tokens[Position];
238     }
239     ++Position;
240     LLVM_DEBUG({
241       llvm::dbgs() << "Next ";
242       dbgToken(Position);
243     });
244     return Tokens[Position];
245   }
246 
247   FormatToken *getPreviousToken() override {
248     return Position > 0 ? Tokens[Position - 1] : nullptr;
249   }
250 
251   FormatToken *peekNextToken() override {
252     int Next = Position + 1;
253     LLVM_DEBUG({
254       llvm::dbgs() << "Peeking ";
255       dbgToken(Next);
256     });
257     return Tokens[Next];
258   }
259 
260   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
261 
262   unsigned getPosition() override {
263     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
264     assert(Position >= 0);
265     return Position;
266   }
267 
268   FormatToken *setPosition(unsigned P) override {
269     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
270     Position = P;
271     return Tokens[Position];
272   }
273 
274   void reset() { Position = -1; }
275 
276 private:
277   void dbgToken(int Position, llvm::StringRef Indent = "") {
278     FormatToken *Tok = Tokens[Position];
279     llvm::dbgs() << Indent << "[" << Position
280                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
281                  << ", Macro: " << !!Tok->MacroCtx << "\n";
282   }
283 
284   ArrayRef<FormatToken *> Tokens;
285   int Position;
286 };
287 
288 } // end anonymous namespace
289 
290 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
291                                          const AdditionalKeywords &Keywords,
292                                          unsigned FirstStartColumn,
293                                          ArrayRef<FormatToken *> Tokens,
294                                          UnwrappedLineConsumer &Callback)
295     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
296       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
297       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
298       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
299       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
300                        ? IG_Rejected
301                        : IG_Inited),
302       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
303 
304 void UnwrappedLineParser::reset() {
305   PPBranchLevel = -1;
306   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
307                      ? IG_Rejected
308                      : IG_Inited;
309   IncludeGuardToken = nullptr;
310   Line.reset(new UnwrappedLine);
311   CommentsBeforeNextToken.clear();
312   FormatTok = nullptr;
313   MustBreakBeforeNextToken = false;
314   PreprocessorDirectives.clear();
315   CurrentLines = &Lines;
316   DeclarationScopeStack.clear();
317   NestedTooDeep.clear();
318   PPStack.clear();
319   Line->FirstStartColumn = FirstStartColumn;
320 }
321 
322 void UnwrappedLineParser::parse() {
323   IndexedTokenSource TokenSource(AllTokens);
324   Line->FirstStartColumn = FirstStartColumn;
325   do {
326     LLVM_DEBUG(llvm::dbgs() << "----\n");
327     reset();
328     Tokens = &TokenSource;
329     TokenSource.reset();
330 
331     readToken();
332     parseFile();
333 
334     // If we found an include guard then all preprocessor directives (other than
335     // the guard) are over-indented by one.
336     if (IncludeGuard == IG_Found)
337       for (auto &Line : Lines)
338         if (Line.InPPDirective && Line.Level > 0)
339           --Line.Level;
340 
341     // Create line with eof token.
342     pushToken(FormatTok);
343     addUnwrappedLine();
344 
345     for (const UnwrappedLine &Line : Lines)
346       Callback.consumeUnwrappedLine(Line);
347 
348     Callback.finishRun();
349     Lines.clear();
350     while (!PPLevelBranchIndex.empty() &&
351            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
352       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
353       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
354     }
355     if (!PPLevelBranchIndex.empty()) {
356       ++PPLevelBranchIndex.back();
357       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
358       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
359     }
360   } while (!PPLevelBranchIndex.empty());
361 }
362 
363 void UnwrappedLineParser::parseFile() {
364   // The top-level context in a file always has declarations, except for pre-
365   // processor directives and JavaScript files.
366   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
367   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
368                                           MustBeDeclaration);
369   if (Style.Language == FormatStyle::LK_TextProto)
370     parseBracedList();
371   else
372     parseLevel(/*HasOpeningBrace=*/false);
373   // Make sure to format the remaining tokens.
374   //
375   // LK_TextProto is special since its top-level is parsed as the body of a
376   // braced list, which does not necessarily have natural line separators such
377   // as a semicolon. Comments after the last entry that have been determined to
378   // not belong to that line, as in:
379   //   key: value
380   //   // endfile comment
381   // do not have a chance to be put on a line of their own until this point.
382   // Here we add this newline before end-of-file comments.
383   if (Style.Language == FormatStyle::LK_TextProto &&
384       !CommentsBeforeNextToken.empty())
385     addUnwrappedLine();
386   flushComments(true);
387   addUnwrappedLine();
388 }
389 
390 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
391   do {
392     switch (FormatTok->Tok.getKind()) {
393     case tok::l_brace:
394       return;
395     default:
396       if (FormatTok->is(Keywords.kw_where)) {
397         addUnwrappedLine();
398         nextToken();
399         parseCSharpGenericTypeConstraint();
400         break;
401       }
402       nextToken();
403       break;
404     }
405   } while (!eof());
406 }
407 
408 void UnwrappedLineParser::parseCSharpAttribute() {
409   int UnpairedSquareBrackets = 1;
410   do {
411     switch (FormatTok->Tok.getKind()) {
412     case tok::r_square:
413       nextToken();
414       --UnpairedSquareBrackets;
415       if (UnpairedSquareBrackets == 0) {
416         addUnwrappedLine();
417         return;
418       }
419       break;
420     case tok::l_square:
421       ++UnpairedSquareBrackets;
422       nextToken();
423       break;
424     default:
425       nextToken();
426       break;
427     }
428   } while (!eof());
429 }
430 
431 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
432   if (!Lines.empty() && Lines.back().InPPDirective)
433     return true;
434 
435   const FormatToken *Previous = Tokens->getPreviousToken();
436   return Previous && Previous->is(tok::comment) &&
437          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
438 }
439 
440 // Returns true if a simple block, or false otherwise. (A simple block has a
441 // single statement.)
442 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) {
443   const bool IsPrecededByCommentOrPPDirective =
444       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
445   unsigned StatementCount = 0;
446   bool SwitchLabelEncountered = false;
447   do {
448     tok::TokenKind kind = FormatTok->Tok.getKind();
449     if (FormatTok->getType() == TT_MacroBlockBegin)
450       kind = tok::l_brace;
451     else if (FormatTok->getType() == TT_MacroBlockEnd)
452       kind = tok::r_brace;
453 
454     switch (kind) {
455     case tok::comment:
456       nextToken();
457       addUnwrappedLine();
458       break;
459     case tok::l_brace:
460       // FIXME: Add parameter whether this can happen - if this happens, we must
461       // be in a non-declaration context.
462       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
463         continue;
464       parseBlock();
465       ++StatementCount;
466       assert(StatementCount > 0 && "StatementCount overflow!");
467       addUnwrappedLine();
468       break;
469     case tok::r_brace:
470       if (HasOpeningBrace) {
471         if (!Style.RemoveBracesLLVM)
472           return false;
473         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 ||
474             IsPrecededByCommentOrPPDirective ||
475             precededByCommentOrPPDirective())
476           return false;
477         const FormatToken *Next = Tokens->peekNextToken();
478         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
479       }
480       nextToken();
481       addUnwrappedLine();
482       break;
483     case tok::kw_default: {
484       unsigned StoredPosition = Tokens->getPosition();
485       FormatToken *Next;
486       do {
487         Next = Tokens->getNextToken();
488         assert(Next);
489       } while (Next->is(tok::comment));
490       FormatTok = Tokens->setPosition(StoredPosition);
491       if (Next->isNot(tok::colon)) {
492         // default not followed by ':' is not a case label; treat it like
493         // an identifier.
494         parseStructuralElement();
495         break;
496       }
497       // Else, if it is 'default:', fall through to the case handling.
498       LLVM_FALLTHROUGH;
499     }
500     case tok::kw_case:
501       if (Style.isJavaScript() && Line->MustBeDeclaration) {
502         // A 'case: string' style field declaration.
503         parseStructuralElement();
504         break;
505       }
506       if (!SwitchLabelEncountered &&
507           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
508         ++Line->Level;
509       SwitchLabelEncountered = true;
510       parseStructuralElement();
511       break;
512     case tok::l_square:
513       if (Style.isCSharp()) {
514         nextToken();
515         parseCSharpAttribute();
516         break;
517       }
518       LLVM_FALLTHROUGH;
519     default:
520       parseStructuralElement(IfKind, !HasOpeningBrace);
521       ++StatementCount;
522       assert(StatementCount > 0 && "StatementCount overflow!");
523       break;
524     }
525   } while (!eof());
526   return false;
527 }
528 
529 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
530   // We'll parse forward through the tokens until we hit
531   // a closing brace or eof - note that getNextToken() will
532   // parse macros, so this will magically work inside macro
533   // definitions, too.
534   unsigned StoredPosition = Tokens->getPosition();
535   FormatToken *Tok = FormatTok;
536   const FormatToken *PrevTok = Tok->Previous;
537   // Keep a stack of positions of lbrace tokens. We will
538   // update information about whether an lbrace starts a
539   // braced init list or a different block during the loop.
540   SmallVector<FormatToken *, 8> LBraceStack;
541   assert(Tok->Tok.is(tok::l_brace));
542   do {
543     // Get next non-comment token.
544     FormatToken *NextTok;
545     unsigned ReadTokens = 0;
546     do {
547       NextTok = Tokens->getNextToken();
548       ++ReadTokens;
549     } while (NextTok->is(tok::comment));
550 
551     switch (Tok->Tok.getKind()) {
552     case tok::l_brace:
553       if (Style.isJavaScript() && PrevTok) {
554         if (PrevTok->isOneOf(tok::colon, tok::less))
555           // A ':' indicates this code is in a type, or a braced list
556           // following a label in an object literal ({a: {b: 1}}).
557           // A '<' could be an object used in a comparison, but that is nonsense
558           // code (can never return true), so more likely it is a generic type
559           // argument (`X<{a: string; b: number}>`).
560           // The code below could be confused by semicolons between the
561           // individual members in a type member list, which would normally
562           // trigger BK_Block. In both cases, this must be parsed as an inline
563           // braced init.
564           Tok->setBlockKind(BK_BracedInit);
565         else if (PrevTok->is(tok::r_paren))
566           // `) { }` can only occur in function or method declarations in JS.
567           Tok->setBlockKind(BK_Block);
568       } else {
569         Tok->setBlockKind(BK_Unknown);
570       }
571       LBraceStack.push_back(Tok);
572       break;
573     case tok::r_brace:
574       if (LBraceStack.empty())
575         break;
576       if (LBraceStack.back()->is(BK_Unknown)) {
577         bool ProbablyBracedList = false;
578         if (Style.Language == FormatStyle::LK_Proto) {
579           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
580         } else {
581           // Skip NextTok over preprocessor lines, otherwise we may not
582           // properly diagnose the block as a braced intializer
583           // if the comma separator appears after the pp directive.
584           while (NextTok->is(tok::hash)) {
585             ScopedMacroState MacroState(*Line, Tokens, NextTok);
586             do {
587               NextTok = Tokens->getNextToken();
588               ++ReadTokens;
589             } while (NextTok->isNot(tok::eof));
590           }
591 
592           // Using OriginalColumn to distinguish between ObjC methods and
593           // binary operators is a bit hacky.
594           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
595                                   NextTok->OriginalColumn == 0;
596 
597           // If there is a comma, semicolon or right paren after the closing
598           // brace, we assume this is a braced initializer list.  Note that
599           // regardless how we mark inner braces here, we will overwrite the
600           // BlockKind later if we parse a braced list (where all blocks
601           // inside are by default braced lists), or when we explicitly detect
602           // blocks (for example while parsing lambdas).
603           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
604           // braced list in JS.
605           ProbablyBracedList =
606               (Style.isJavaScript() &&
607                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
608                                 Keywords.kw_as)) ||
609               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
610               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
611                                tok::r_paren, tok::r_square, tok::l_brace,
612                                tok::ellipsis) ||
613               (NextTok->is(tok::identifier) &&
614                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
615               (NextTok->is(tok::semi) &&
616                (!ExpectClassBody || LBraceStack.size() != 1)) ||
617               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
618           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
619             // We can have an array subscript after a braced init
620             // list, but C++11 attributes are expected after blocks.
621             NextTok = Tokens->getNextToken();
622             ++ReadTokens;
623             ProbablyBracedList = NextTok->isNot(tok::l_square);
624           }
625         }
626         if (ProbablyBracedList) {
627           Tok->setBlockKind(BK_BracedInit);
628           LBraceStack.back()->setBlockKind(BK_BracedInit);
629         } else {
630           Tok->setBlockKind(BK_Block);
631           LBraceStack.back()->setBlockKind(BK_Block);
632         }
633       }
634       LBraceStack.pop_back();
635       break;
636     case tok::identifier:
637       if (!Tok->is(TT_StatementMacro))
638         break;
639       LLVM_FALLTHROUGH;
640     case tok::at:
641     case tok::semi:
642     case tok::kw_if:
643     case tok::kw_while:
644     case tok::kw_for:
645     case tok::kw_switch:
646     case tok::kw_try:
647     case tok::kw___try:
648       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
649         LBraceStack.back()->setBlockKind(BK_Block);
650       break;
651     default:
652       break;
653     }
654     PrevTok = Tok;
655     Tok = NextTok;
656   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
657 
658   // Assume other blocks for all unclosed opening braces.
659   for (FormatToken *LBrace : LBraceStack)
660     if (LBrace->is(BK_Unknown))
661       LBrace->setBlockKind(BK_Block);
662 
663   FormatTok = Tokens->setPosition(StoredPosition);
664 }
665 
666 template <class T>
667 static inline void hash_combine(std::size_t &seed, const T &v) {
668   std::hash<T> hasher;
669   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
670 }
671 
672 size_t UnwrappedLineParser::computePPHash() const {
673   size_t h = 0;
674   for (const auto &i : PPStack) {
675     hash_combine(h, size_t(i.Kind));
676     hash_combine(h, i.Line);
677   }
678   return h;
679 }
680 
681 UnwrappedLineParser::IfStmtKind
682 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
683                                 bool MunchSemi,
684                                 bool UnindentWhitesmithsBraces) {
685   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
686          "'{' or macro block token expected");
687   FormatToken *Tok = FormatTok;
688   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
689   FormatTok->setBlockKind(BK_Block);
690 
691   // For Whitesmiths mode, jump to the next level prior to skipping over the
692   // braces.
693   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
694     ++Line->Level;
695 
696   size_t PPStartHash = computePPHash();
697 
698   unsigned InitialLevel = Line->Level;
699   nextToken(/*LevelDifference=*/AddLevels);
700 
701   if (MacroBlock && FormatTok->is(tok::l_paren))
702     parseParens();
703 
704   size_t NbPreprocessorDirectives =
705       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
706   addUnwrappedLine();
707   size_t OpeningLineIndex =
708       CurrentLines->empty()
709           ? (UnwrappedLine::kInvalidIndex)
710           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
711 
712   // Whitesmiths is weird here. The brace needs to be indented for the namespace
713   // block, but the block itself may not be indented depending on the style
714   // settings. This allows the format to back up one level in those cases.
715   if (UnindentWhitesmithsBraces)
716     --Line->Level;
717 
718   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
719                                           MustBeDeclaration);
720   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
721     Line->Level += AddLevels;
722 
723   IfStmtKind IfKind = IfStmtKind::NotIf;
724   const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind);
725 
726   if (eof())
727     return IfKind;
728 
729   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
730                  : !FormatTok->is(tok::r_brace)) {
731     Line->Level = InitialLevel;
732     FormatTok->setBlockKind(BK_Block);
733     return IfKind;
734   }
735 
736   if (SimpleBlock && Tok->is(tok::l_brace)) {
737     assert(FormatTok->is(tok::r_brace));
738     const FormatToken *Previous = Tokens->getPreviousToken();
739     assert(Previous);
740     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
741       Tok->MatchingParen = FormatTok;
742       FormatTok->MatchingParen = Tok;
743     }
744   }
745 
746   size_t PPEndHash = computePPHash();
747 
748   // Munch the closing brace.
749   nextToken(/*LevelDifference=*/-AddLevels);
750 
751   if (MacroBlock && FormatTok->is(tok::l_paren))
752     parseParens();
753 
754   if (FormatTok->is(tok::arrow)) {
755     // Following the } we can find a trailing return type arrow
756     // as part of an implicit conversion constraint.
757     nextToken();
758     parseStructuralElement();
759   }
760 
761   if (MunchSemi && FormatTok->Tok.is(tok::semi))
762     nextToken();
763 
764   Line->Level = InitialLevel;
765 
766   if (PPStartHash == PPEndHash) {
767     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
768     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
769       // Update the opening line to add the forward reference as well
770       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
771           CurrentLines->size() - 1;
772     }
773   }
774 
775   return IfKind;
776 }
777 
778 static bool isGoogScope(const UnwrappedLine &Line) {
779   // FIXME: Closure-library specific stuff should not be hard-coded but be
780   // configurable.
781   if (Line.Tokens.size() < 4)
782     return false;
783   auto I = Line.Tokens.begin();
784   if (I->Tok->TokenText != "goog")
785     return false;
786   ++I;
787   if (I->Tok->isNot(tok::period))
788     return false;
789   ++I;
790   if (I->Tok->TokenText != "scope")
791     return false;
792   ++I;
793   return I->Tok->is(tok::l_paren);
794 }
795 
796 static bool isIIFE(const UnwrappedLine &Line,
797                    const AdditionalKeywords &Keywords) {
798   // Look for the start of an immediately invoked anonymous function.
799   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
800   // This is commonly done in JavaScript to create a new, anonymous scope.
801   // Example: (function() { ... })()
802   if (Line.Tokens.size() < 3)
803     return false;
804   auto I = Line.Tokens.begin();
805   if (I->Tok->isNot(tok::l_paren))
806     return false;
807   ++I;
808   if (I->Tok->isNot(Keywords.kw_function))
809     return false;
810   ++I;
811   return I->Tok->is(tok::l_paren);
812 }
813 
814 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
815                                    const FormatToken &InitialToken) {
816   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
817     return Style.BraceWrapping.AfterNamespace;
818   if (InitialToken.is(tok::kw_class))
819     return Style.BraceWrapping.AfterClass;
820   if (InitialToken.is(tok::kw_union))
821     return Style.BraceWrapping.AfterUnion;
822   if (InitialToken.is(tok::kw_struct))
823     return Style.BraceWrapping.AfterStruct;
824   if (InitialToken.is(tok::kw_enum))
825     return Style.BraceWrapping.AfterEnum;
826   return false;
827 }
828 
829 void UnwrappedLineParser::parseChildBlock() {
830   FormatTok->setBlockKind(BK_Block);
831   nextToken();
832   {
833     bool SkipIndent = (Style.isJavaScript() &&
834                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
835     ScopedLineState LineState(*this);
836     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
837                                             /*MustBeDeclaration=*/false);
838     Line->Level += SkipIndent ? 0 : 1;
839     parseLevel(/*HasOpeningBrace=*/true);
840     flushComments(isOnNewLine(*FormatTok));
841     Line->Level -= SkipIndent ? 0 : 1;
842   }
843   nextToken();
844 }
845 
846 void UnwrappedLineParser::parsePPDirective() {
847   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
848   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
849 
850   nextToken();
851 
852   if (!FormatTok->Tok.getIdentifierInfo()) {
853     parsePPUnknown();
854     return;
855   }
856 
857   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
858   case tok::pp_define:
859     parsePPDefine();
860     return;
861   case tok::pp_if:
862     parsePPIf(/*IfDef=*/false);
863     break;
864   case tok::pp_ifdef:
865   case tok::pp_ifndef:
866     parsePPIf(/*IfDef=*/true);
867     break;
868   case tok::pp_else:
869     parsePPElse();
870     break;
871   case tok::pp_elifdef:
872   case tok::pp_elifndef:
873   case tok::pp_elif:
874     parsePPElIf();
875     break;
876   case tok::pp_endif:
877     parsePPEndIf();
878     break;
879   default:
880     parsePPUnknown();
881     break;
882   }
883 }
884 
885 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
886   size_t Line = CurrentLines->size();
887   if (CurrentLines == &PreprocessorDirectives)
888     Line += Lines.size();
889 
890   if (Unreachable ||
891       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
892     PPStack.push_back({PP_Unreachable, Line});
893   else
894     PPStack.push_back({PP_Conditional, Line});
895 }
896 
897 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
898   ++PPBranchLevel;
899   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
900   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
901     PPLevelBranchIndex.push_back(0);
902     PPLevelBranchCount.push_back(0);
903   }
904   PPChainBranchIndex.push(0);
905   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
906   conditionalCompilationCondition(Unreachable || Skip);
907 }
908 
909 void UnwrappedLineParser::conditionalCompilationAlternative() {
910   if (!PPStack.empty())
911     PPStack.pop_back();
912   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
913   if (!PPChainBranchIndex.empty())
914     ++PPChainBranchIndex.top();
915   conditionalCompilationCondition(
916       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
917       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
918 }
919 
920 void UnwrappedLineParser::conditionalCompilationEnd() {
921   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
922   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
923     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
924       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
925   }
926   // Guard against #endif's without #if.
927   if (PPBranchLevel > -1)
928     --PPBranchLevel;
929   if (!PPChainBranchIndex.empty())
930     PPChainBranchIndex.pop();
931   if (!PPStack.empty())
932     PPStack.pop_back();
933 }
934 
935 void UnwrappedLineParser::parsePPIf(bool IfDef) {
936   bool IfNDef = FormatTok->is(tok::pp_ifndef);
937   nextToken();
938   bool Unreachable = false;
939   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
940     Unreachable = true;
941   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
942     Unreachable = true;
943   conditionalCompilationStart(Unreachable);
944   FormatToken *IfCondition = FormatTok;
945   // If there's a #ifndef on the first line, and the only lines before it are
946   // comments, it could be an include guard.
947   bool MaybeIncludeGuard = IfNDef;
948   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
949     for (auto &Line : Lines) {
950       if (!Line.Tokens.front().Tok->is(tok::comment)) {
951         MaybeIncludeGuard = false;
952         IncludeGuard = IG_Rejected;
953         break;
954       }
955     }
956   --PPBranchLevel;
957   parsePPUnknown();
958   ++PPBranchLevel;
959   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
960     IncludeGuard = IG_IfNdefed;
961     IncludeGuardToken = IfCondition;
962   }
963 }
964 
965 void UnwrappedLineParser::parsePPElse() {
966   // If a potential include guard has an #else, it's not an include guard.
967   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
968     IncludeGuard = IG_Rejected;
969   conditionalCompilationAlternative();
970   if (PPBranchLevel > -1)
971     --PPBranchLevel;
972   parsePPUnknown();
973   ++PPBranchLevel;
974 }
975 
976 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
977 
978 void UnwrappedLineParser::parsePPEndIf() {
979   conditionalCompilationEnd();
980   parsePPUnknown();
981   // If the #endif of a potential include guard is the last thing in the file,
982   // then we found an include guard.
983   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
984       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
985     IncludeGuard = IG_Found;
986 }
987 
988 void UnwrappedLineParser::parsePPDefine() {
989   nextToken();
990 
991   if (!FormatTok->Tok.getIdentifierInfo()) {
992     IncludeGuard = IG_Rejected;
993     IncludeGuardToken = nullptr;
994     parsePPUnknown();
995     return;
996   }
997 
998   if (IncludeGuard == IG_IfNdefed &&
999       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1000     IncludeGuard = IG_Defined;
1001     IncludeGuardToken = nullptr;
1002     for (auto &Line : Lines) {
1003       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1004         IncludeGuard = IG_Rejected;
1005         break;
1006       }
1007     }
1008   }
1009 
1010   // In the context of a define, even keywords should be treated as normal
1011   // identifiers. Setting the kind to identifier is not enough, because we need
1012   // to treat additional keywords like __except as well, which are already
1013   // identifiers.
1014   FormatTok->Tok.setKind(tok::identifier);
1015   FormatTok->Tok.setIdentifierInfo(nullptr);
1016   nextToken();
1017   if (FormatTok->Tok.getKind() == tok::l_paren &&
1018       !FormatTok->hasWhitespaceBefore())
1019     parseParens();
1020   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1021     Line->Level += PPBranchLevel + 1;
1022   addUnwrappedLine();
1023   ++Line->Level;
1024 
1025   // Errors during a preprocessor directive can only affect the layout of the
1026   // preprocessor directive, and thus we ignore them. An alternative approach
1027   // would be to use the same approach we use on the file level (no
1028   // re-indentation if there was a structural error) within the macro
1029   // definition.
1030   parseFile();
1031 }
1032 
1033 void UnwrappedLineParser::parsePPUnknown() {
1034   do {
1035     nextToken();
1036   } while (!eof());
1037   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1038     Line->Level += PPBranchLevel + 1;
1039   addUnwrappedLine();
1040 }
1041 
1042 // Here we exclude certain tokens that are not usually the first token in an
1043 // unwrapped line. This is used in attempt to distinguish macro calls without
1044 // trailing semicolons from other constructs split to several lines.
1045 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1046   // Semicolon can be a null-statement, l_square can be a start of a macro or
1047   // a C++11 attribute, but this doesn't seem to be common.
1048   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1049          Tok.isNot(TT_AttributeSquare) &&
1050          // Tokens that can only be used as binary operators and a part of
1051          // overloaded operator names.
1052          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1053          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1054          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1055          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1056          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1057          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1058          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1059          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1060          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1061          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1062          Tok.isNot(tok::lesslessequal) &&
1063          // Colon is used in labels, base class lists, initializer lists,
1064          // range-based for loops, ternary operator, but should never be the
1065          // first token in an unwrapped line.
1066          Tok.isNot(tok::colon) &&
1067          // 'noexcept' is a trailing annotation.
1068          Tok.isNot(tok::kw_noexcept);
1069 }
1070 
1071 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1072                           const FormatToken *FormatTok) {
1073   // FIXME: This returns true for C/C++ keywords like 'struct'.
1074   return FormatTok->is(tok::identifier) &&
1075          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1076           !FormatTok->isOneOf(
1077               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1078               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1079               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1080               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1081               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1082               Keywords.kw_instanceof, Keywords.kw_interface,
1083               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1084 }
1085 
1086 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1087                                  const FormatToken *FormatTok) {
1088   return FormatTok->Tok.isLiteral() ||
1089          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1090          mustBeJSIdent(Keywords, FormatTok);
1091 }
1092 
1093 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1094 // when encountered after a value (see mustBeJSIdentOrValue).
1095 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1096                            const FormatToken *FormatTok) {
1097   return FormatTok->isOneOf(
1098       tok::kw_return, Keywords.kw_yield,
1099       // conditionals
1100       tok::kw_if, tok::kw_else,
1101       // loops
1102       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1103       // switch/case
1104       tok::kw_switch, tok::kw_case,
1105       // exceptions
1106       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1107       // declaration
1108       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1109       Keywords.kw_async, Keywords.kw_function,
1110       // import/export
1111       Keywords.kw_import, tok::kw_export);
1112 }
1113 
1114 // Checks whether a token is a type in K&R C (aka C78).
1115 static bool isC78Type(const FormatToken &Tok) {
1116   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1117                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1118                      tok::identifier);
1119 }
1120 
1121 // This function checks whether a token starts the first parameter declaration
1122 // in a K&R C (aka C78) function definition, e.g.:
1123 //   int f(a, b)
1124 //   short a, b;
1125 //   {
1126 //      return a + b;
1127 //   }
1128 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1129                                const FormatToken *FuncName) {
1130   assert(Tok);
1131   assert(Next);
1132   assert(FuncName);
1133 
1134   if (FuncName->isNot(tok::identifier))
1135     return false;
1136 
1137   const FormatToken *Prev = FuncName->Previous;
1138   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1139     return false;
1140 
1141   if (!isC78Type(*Tok) &&
1142       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1143     return false;
1144 
1145   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1146     return false;
1147 
1148   Tok = Tok->Previous;
1149   if (!Tok || Tok->isNot(tok::r_paren))
1150     return false;
1151 
1152   Tok = Tok->Previous;
1153   if (!Tok || Tok->isNot(tok::identifier))
1154     return false;
1155 
1156   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1157 }
1158 
1159 void UnwrappedLineParser::parseModuleImport() {
1160   nextToken();
1161   while (!eof()) {
1162     if (FormatTok->is(tok::colon)) {
1163       FormatTok->setType(TT_ModulePartitionColon);
1164     }
1165     // Handle import <foo/bar.h> as we would an include statement.
1166     else if (FormatTok->is(tok::less)) {
1167       nextToken();
1168       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1169         // Mark tokens up to the trailing line comments as implicit string
1170         // literals.
1171         if (FormatTok->isNot(tok::comment) &&
1172             !FormatTok->TokenText.startswith("//"))
1173           FormatTok->setType(TT_ImplicitStringLiteral);
1174         nextToken();
1175       }
1176     }
1177     if (FormatTok->is(tok::semi)) {
1178       nextToken();
1179       break;
1180     }
1181     nextToken();
1182   }
1183 
1184   addUnwrappedLine();
1185 }
1186 
1187 // readTokenWithJavaScriptASI reads the next token and terminates the current
1188 // line if JavaScript Automatic Semicolon Insertion must
1189 // happen between the current token and the next token.
1190 //
1191 // This method is conservative - it cannot cover all edge cases of JavaScript,
1192 // but only aims to correctly handle certain well known cases. It *must not*
1193 // return true in speculative cases.
1194 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1195   FormatToken *Previous = FormatTok;
1196   readToken();
1197   FormatToken *Next = FormatTok;
1198 
1199   bool IsOnSameLine =
1200       CommentsBeforeNextToken.empty()
1201           ? Next->NewlinesBefore == 0
1202           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1203   if (IsOnSameLine)
1204     return;
1205 
1206   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1207   bool PreviousStartsTemplateExpr =
1208       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1209   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1210     // If the line contains an '@' sign, the previous token might be an
1211     // annotation, which can precede another identifier/value.
1212     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1213       return LineNode.Tok->is(tok::at);
1214     });
1215     if (HasAt)
1216       return;
1217   }
1218   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1219     return addUnwrappedLine();
1220   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1221   bool NextEndsTemplateExpr =
1222       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1223   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1224       (PreviousMustBeValue ||
1225        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1226                          tok::minusminus)))
1227     return addUnwrappedLine();
1228   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1229       isJSDeclOrStmt(Keywords, Next))
1230     return addUnwrappedLine();
1231 }
1232 
1233 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1234                                                  bool IsTopLevel) {
1235   if (Style.Language == FormatStyle::LK_TableGen &&
1236       FormatTok->is(tok::pp_include)) {
1237     nextToken();
1238     if (FormatTok->is(tok::string_literal))
1239       nextToken();
1240     addUnwrappedLine();
1241     return;
1242   }
1243   switch (FormatTok->Tok.getKind()) {
1244   case tok::kw_asm:
1245     nextToken();
1246     if (FormatTok->is(tok::l_brace)) {
1247       FormatTok->setType(TT_InlineASMBrace);
1248       nextToken();
1249       while (FormatTok && FormatTok->isNot(tok::eof)) {
1250         if (FormatTok->is(tok::r_brace)) {
1251           FormatTok->setType(TT_InlineASMBrace);
1252           nextToken();
1253           addUnwrappedLine();
1254           break;
1255         }
1256         FormatTok->Finalized = true;
1257         nextToken();
1258       }
1259     }
1260     break;
1261   case tok::kw_namespace:
1262     parseNamespace();
1263     return;
1264   case tok::kw_public:
1265   case tok::kw_protected:
1266   case tok::kw_private:
1267     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1268         Style.isCSharp())
1269       nextToken();
1270     else
1271       parseAccessSpecifier();
1272     return;
1273   case tok::kw_if:
1274     if (Style.isJavaScript() && Line->MustBeDeclaration)
1275       // field/method declaration.
1276       break;
1277     parseIfThenElse(IfKind);
1278     return;
1279   case tok::kw_for:
1280   case tok::kw_while:
1281     if (Style.isJavaScript() && Line->MustBeDeclaration)
1282       // field/method declaration.
1283       break;
1284     parseForOrWhileLoop();
1285     return;
1286   case tok::kw_do:
1287     if (Style.isJavaScript() && Line->MustBeDeclaration)
1288       // field/method declaration.
1289       break;
1290     parseDoWhile();
1291     return;
1292   case tok::kw_switch:
1293     if (Style.isJavaScript() && Line->MustBeDeclaration)
1294       // 'switch: string' field declaration.
1295       break;
1296     parseSwitch();
1297     return;
1298   case tok::kw_default:
1299     if (Style.isJavaScript() && Line->MustBeDeclaration)
1300       // 'default: string' field declaration.
1301       break;
1302     nextToken();
1303     if (FormatTok->is(tok::colon)) {
1304       parseLabel();
1305       return;
1306     }
1307     // e.g. "default void f() {}" in a Java interface.
1308     break;
1309   case tok::kw_case:
1310     if (Style.isJavaScript() && Line->MustBeDeclaration)
1311       // 'case: string' field declaration.
1312       break;
1313     parseCaseLabel();
1314     return;
1315   case tok::kw_try:
1316   case tok::kw___try:
1317     if (Style.isJavaScript() && Line->MustBeDeclaration)
1318       // field/method declaration.
1319       break;
1320     parseTryCatch();
1321     return;
1322   case tok::kw_extern:
1323     nextToken();
1324     if (FormatTok->Tok.is(tok::string_literal)) {
1325       nextToken();
1326       if (FormatTok->Tok.is(tok::l_brace)) {
1327         if (Style.BraceWrapping.AfterExternBlock)
1328           addUnwrappedLine();
1329         // Either we indent or for backwards compatibility we follow the
1330         // AfterExternBlock style.
1331         unsigned AddLevels =
1332             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1333                     (Style.BraceWrapping.AfterExternBlock &&
1334                      Style.IndentExternBlock ==
1335                          FormatStyle::IEBS_AfterExternBlock)
1336                 ? 1u
1337                 : 0u;
1338         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1339         addUnwrappedLine();
1340         return;
1341       }
1342     }
1343     break;
1344   case tok::kw_export:
1345     if (Style.isJavaScript()) {
1346       parseJavaScriptEs6ImportExport();
1347       return;
1348     }
1349     if (!Style.isCpp())
1350       break;
1351     // Handle C++ "(inline|export) namespace".
1352     LLVM_FALLTHROUGH;
1353   case tok::kw_inline:
1354     nextToken();
1355     if (FormatTok->Tok.is(tok::kw_namespace)) {
1356       parseNamespace();
1357       return;
1358     }
1359     break;
1360   case tok::identifier:
1361     if (FormatTok->is(TT_ForEachMacro)) {
1362       parseForOrWhileLoop();
1363       return;
1364     }
1365     if (FormatTok->is(TT_MacroBlockBegin)) {
1366       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1367                  /*MunchSemi=*/false);
1368       return;
1369     }
1370     if (FormatTok->is(Keywords.kw_import)) {
1371       if (Style.isJavaScript()) {
1372         parseJavaScriptEs6ImportExport();
1373         return;
1374       }
1375       if (Style.Language == FormatStyle::LK_Proto) {
1376         nextToken();
1377         if (FormatTok->is(tok::kw_public))
1378           nextToken();
1379         if (!FormatTok->is(tok::string_literal))
1380           return;
1381         nextToken();
1382         if (FormatTok->is(tok::semi))
1383           nextToken();
1384         addUnwrappedLine();
1385         return;
1386       }
1387       if (Style.isCpp()) {
1388         parseModuleImport();
1389         return;
1390       }
1391     }
1392     if (Style.isCpp() &&
1393         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1394                            Keywords.kw_slots, Keywords.kw_qslots)) {
1395       nextToken();
1396       if (FormatTok->is(tok::colon)) {
1397         nextToken();
1398         addUnwrappedLine();
1399         return;
1400       }
1401     }
1402     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1403       parseStatementMacro();
1404       return;
1405     }
1406     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1407       parseNamespace();
1408       return;
1409     }
1410     // In all other cases, parse the declaration.
1411     break;
1412   default:
1413     break;
1414   }
1415   do {
1416     const FormatToken *Previous = FormatTok->Previous;
1417     switch (FormatTok->Tok.getKind()) {
1418     case tok::at:
1419       nextToken();
1420       if (FormatTok->Tok.is(tok::l_brace)) {
1421         nextToken();
1422         parseBracedList();
1423         break;
1424       } else if (Style.Language == FormatStyle::LK_Java &&
1425                  FormatTok->is(Keywords.kw_interface)) {
1426         nextToken();
1427         break;
1428       }
1429       switch (FormatTok->Tok.getObjCKeywordID()) {
1430       case tok::objc_public:
1431       case tok::objc_protected:
1432       case tok::objc_package:
1433       case tok::objc_private:
1434         return parseAccessSpecifier();
1435       case tok::objc_interface:
1436       case tok::objc_implementation:
1437         return parseObjCInterfaceOrImplementation();
1438       case tok::objc_protocol:
1439         if (parseObjCProtocol())
1440           return;
1441         break;
1442       case tok::objc_end:
1443         return; // Handled by the caller.
1444       case tok::objc_optional:
1445       case tok::objc_required:
1446         nextToken();
1447         addUnwrappedLine();
1448         return;
1449       case tok::objc_autoreleasepool:
1450         nextToken();
1451         if (FormatTok->Tok.is(tok::l_brace)) {
1452           if (Style.BraceWrapping.AfterControlStatement ==
1453               FormatStyle::BWACS_Always)
1454             addUnwrappedLine();
1455           parseBlock();
1456         }
1457         addUnwrappedLine();
1458         return;
1459       case tok::objc_synchronized:
1460         nextToken();
1461         if (FormatTok->Tok.is(tok::l_paren))
1462           // Skip synchronization object
1463           parseParens();
1464         if (FormatTok->Tok.is(tok::l_brace)) {
1465           if (Style.BraceWrapping.AfterControlStatement ==
1466               FormatStyle::BWACS_Always)
1467             addUnwrappedLine();
1468           parseBlock();
1469         }
1470         addUnwrappedLine();
1471         return;
1472       case tok::objc_try:
1473         // This branch isn't strictly necessary (the kw_try case below would
1474         // do this too after the tok::at is parsed above).  But be explicit.
1475         parseTryCatch();
1476         return;
1477       default:
1478         break;
1479       }
1480       break;
1481     case tok::kw_concept:
1482       parseConcept();
1483       return;
1484     case tok::kw_requires:
1485       parseRequires();
1486       return;
1487     case tok::kw_enum:
1488       // Ignore if this is part of "template <enum ...".
1489       if (Previous && Previous->is(tok::less)) {
1490         nextToken();
1491         break;
1492       }
1493 
1494       // parseEnum falls through and does not yet add an unwrapped line as an
1495       // enum definition can start a structural element.
1496       if (!parseEnum())
1497         break;
1498       // This only applies for C++.
1499       if (!Style.isCpp()) {
1500         addUnwrappedLine();
1501         return;
1502       }
1503       break;
1504     case tok::kw_typedef:
1505       nextToken();
1506       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1507                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1508                              Keywords.kw_CF_CLOSED_ENUM,
1509                              Keywords.kw_NS_CLOSED_ENUM))
1510         parseEnum();
1511       break;
1512     case tok::kw_struct:
1513     case tok::kw_union:
1514     case tok::kw_class:
1515       if (parseStructLike())
1516         return;
1517       break;
1518     case tok::period:
1519       nextToken();
1520       // In Java, classes have an implicit static member "class".
1521       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1522           FormatTok->is(tok::kw_class))
1523         nextToken();
1524       if (Style.isJavaScript() && FormatTok &&
1525           FormatTok->Tok.getIdentifierInfo())
1526         // JavaScript only has pseudo keywords, all keywords are allowed to
1527         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1528         nextToken();
1529       break;
1530     case tok::semi:
1531       nextToken();
1532       addUnwrappedLine();
1533       return;
1534     case tok::r_brace:
1535       addUnwrappedLine();
1536       return;
1537     case tok::l_paren: {
1538       parseParens();
1539       // Break the unwrapped line if a K&R C function definition has a parameter
1540       // declaration.
1541       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1542         break;
1543       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1544         addUnwrappedLine();
1545         return;
1546       }
1547       break;
1548     }
1549     case tok::kw_operator:
1550       nextToken();
1551       if (FormatTok->isBinaryOperator())
1552         nextToken();
1553       break;
1554     case tok::caret:
1555       nextToken();
1556       if (FormatTok->Tok.isAnyIdentifier() ||
1557           FormatTok->isSimpleTypeSpecifier())
1558         nextToken();
1559       if (FormatTok->is(tok::l_paren))
1560         parseParens();
1561       if (FormatTok->is(tok::l_brace))
1562         parseChildBlock();
1563       break;
1564     case tok::l_brace:
1565       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1566         // A block outside of parentheses must be the last part of a
1567         // structural element.
1568         // FIXME: Figure out cases where this is not true, and add projections
1569         // for them (the one we know is missing are lambdas).
1570         if (Style.Language == FormatStyle::LK_Java &&
1571             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1572           // If necessary, we could set the type to something different than
1573           // TT_FunctionLBrace.
1574           if (Style.BraceWrapping.AfterControlStatement ==
1575               FormatStyle::BWACS_Always)
1576             addUnwrappedLine();
1577         } else if (Style.BraceWrapping.AfterFunction) {
1578           addUnwrappedLine();
1579         }
1580         FormatTok->setType(TT_FunctionLBrace);
1581         parseBlock();
1582         addUnwrappedLine();
1583         return;
1584       }
1585       // Otherwise this was a braced init list, and the structural
1586       // element continues.
1587       break;
1588     case tok::kw_try:
1589       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1590         // field/method declaration.
1591         nextToken();
1592         break;
1593       }
1594       // We arrive here when parsing function-try blocks.
1595       if (Style.BraceWrapping.AfterFunction)
1596         addUnwrappedLine();
1597       parseTryCatch();
1598       return;
1599     case tok::identifier: {
1600       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1601           Line->MustBeDeclaration) {
1602         addUnwrappedLine();
1603         parseCSharpGenericTypeConstraint();
1604         break;
1605       }
1606       if (FormatTok->is(TT_MacroBlockEnd)) {
1607         addUnwrappedLine();
1608         return;
1609       }
1610 
1611       // Function declarations (as opposed to function expressions) are parsed
1612       // on their own unwrapped line by continuing this loop. Function
1613       // expressions (functions that are not on their own line) must not create
1614       // a new unwrapped line, so they are special cased below.
1615       size_t TokenCount = Line->Tokens.size();
1616       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1617           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1618                                                      Keywords.kw_async)))) {
1619         tryToParseJSFunction();
1620         break;
1621       }
1622       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1623           FormatTok->is(Keywords.kw_interface)) {
1624         if (Style.isJavaScript()) {
1625           // In JavaScript/TypeScript, "interface" can be used as a standalone
1626           // identifier, e.g. in `var interface = 1;`. If "interface" is
1627           // followed by another identifier, it is very like to be an actual
1628           // interface declaration.
1629           unsigned StoredPosition = Tokens->getPosition();
1630           FormatToken *Next = Tokens->getNextToken();
1631           FormatTok = Tokens->setPosition(StoredPosition);
1632           if (!mustBeJSIdent(Keywords, Next)) {
1633             nextToken();
1634             break;
1635           }
1636         }
1637         parseRecord();
1638         addUnwrappedLine();
1639         return;
1640       }
1641 
1642       if (FormatTok->is(Keywords.kw_interface)) {
1643         if (parseStructLike())
1644           return;
1645         break;
1646       }
1647 
1648       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1649         parseStatementMacro();
1650         return;
1651       }
1652 
1653       // See if the following token should start a new unwrapped line.
1654       StringRef Text = FormatTok->TokenText;
1655 
1656       FormatToken *PreviousToken = FormatTok;
1657       nextToken();
1658 
1659       // JS doesn't have macros, and within classes colons indicate fields, not
1660       // labels.
1661       if (Style.isJavaScript())
1662         break;
1663 
1664       TokenCount = Line->Tokens.size();
1665       if (TokenCount == 1 ||
1666           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1667         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1668           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1669           parseLabel(!Style.IndentGotoLabels);
1670           return;
1671         }
1672         // Recognize function-like macro usages without trailing semicolon as
1673         // well as free-standing macros like Q_OBJECT.
1674         bool FunctionLike = FormatTok->is(tok::l_paren);
1675         if (FunctionLike)
1676           parseParens();
1677 
1678         bool FollowedByNewline =
1679             CommentsBeforeNextToken.empty()
1680                 ? FormatTok->NewlinesBefore > 0
1681                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1682 
1683         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1684             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1685           PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro);
1686           addUnwrappedLine();
1687           return;
1688         }
1689       }
1690       break;
1691     }
1692     case tok::equal:
1693       if ((Style.isJavaScript() || Style.isCSharp()) &&
1694           FormatTok->is(TT_FatArrow)) {
1695         tryToParseChildBlock();
1696         break;
1697       }
1698 
1699       nextToken();
1700       if (FormatTok->Tok.is(tok::l_brace)) {
1701         // Block kind should probably be set to BK_BracedInit for any language.
1702         // C# needs this change to ensure that array initialisers and object
1703         // initialisers are indented the same way.
1704         if (Style.isCSharp())
1705           FormatTok->setBlockKind(BK_BracedInit);
1706         nextToken();
1707         parseBracedList();
1708       } else if (Style.Language == FormatStyle::LK_Proto &&
1709                  FormatTok->Tok.is(tok::less)) {
1710         nextToken();
1711         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1712                         /*ClosingBraceKind=*/tok::greater);
1713       }
1714       break;
1715     case tok::l_square:
1716       parseSquare();
1717       break;
1718     case tok::kw_new:
1719       parseNew();
1720       break;
1721     default:
1722       nextToken();
1723       break;
1724     }
1725   } while (!eof());
1726 }
1727 
1728 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1729   assert(FormatTok->is(tok::l_brace));
1730   if (!Style.isCSharp())
1731     return false;
1732   // See if it's a property accessor.
1733   if (FormatTok->Previous->isNot(tok::identifier))
1734     return false;
1735 
1736   // See if we are inside a property accessor.
1737   //
1738   // Record the current tokenPosition so that we can advance and
1739   // reset the current token. `Next` is not set yet so we need
1740   // another way to advance along the token stream.
1741   unsigned int StoredPosition = Tokens->getPosition();
1742   FormatToken *Tok = Tokens->getNextToken();
1743 
1744   // A trivial property accessor is of the form:
1745   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1746   // Track these as they do not require line breaks to be introduced.
1747   bool HasGetOrSet = false;
1748   bool IsTrivialPropertyAccessor = true;
1749   while (!eof()) {
1750     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1751                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1752                      Keywords.kw_set)) {
1753       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1754         HasGetOrSet = true;
1755       Tok = Tokens->getNextToken();
1756       continue;
1757     }
1758     if (Tok->isNot(tok::r_brace))
1759       IsTrivialPropertyAccessor = false;
1760     break;
1761   }
1762 
1763   if (!HasGetOrSet) {
1764     Tokens->setPosition(StoredPosition);
1765     return false;
1766   }
1767 
1768   // Try to parse the property accessor:
1769   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1770   Tokens->setPosition(StoredPosition);
1771   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1772     addUnwrappedLine();
1773   nextToken();
1774   do {
1775     switch (FormatTok->Tok.getKind()) {
1776     case tok::r_brace:
1777       nextToken();
1778       if (FormatTok->is(tok::equal)) {
1779         while (!eof() && FormatTok->isNot(tok::semi))
1780           nextToken();
1781         nextToken();
1782       }
1783       addUnwrappedLine();
1784       return true;
1785     case tok::l_brace:
1786       ++Line->Level;
1787       parseBlock(/*MustBeDeclaration=*/true);
1788       addUnwrappedLine();
1789       --Line->Level;
1790       break;
1791     case tok::equal:
1792       if (FormatTok->is(TT_FatArrow)) {
1793         ++Line->Level;
1794         do {
1795           nextToken();
1796         } while (!eof() && FormatTok->isNot(tok::semi));
1797         nextToken();
1798         addUnwrappedLine();
1799         --Line->Level;
1800         break;
1801       }
1802       nextToken();
1803       break;
1804     default:
1805       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1806           !IsTrivialPropertyAccessor) {
1807         // Non-trivial get/set needs to be on its own line.
1808         addUnwrappedLine();
1809       }
1810       nextToken();
1811     }
1812   } while (!eof());
1813 
1814   // Unreachable for well-formed code (paired '{' and '}').
1815   return true;
1816 }
1817 
1818 bool UnwrappedLineParser::tryToParseLambda() {
1819   if (!Style.isCpp()) {
1820     nextToken();
1821     return false;
1822   }
1823   assert(FormatTok->is(tok::l_square));
1824   FormatToken &LSquare = *FormatTok;
1825   if (!tryToParseLambdaIntroducer())
1826     return false;
1827 
1828   bool SeenArrow = false;
1829   bool InTemplateParameterList = false;
1830 
1831   while (FormatTok->isNot(tok::l_brace)) {
1832     if (FormatTok->isSimpleTypeSpecifier()) {
1833       nextToken();
1834       continue;
1835     }
1836     switch (FormatTok->Tok.getKind()) {
1837     case tok::l_brace:
1838       break;
1839     case tok::l_paren:
1840       parseParens();
1841       break;
1842     case tok::l_square:
1843       parseSquare();
1844       break;
1845     case tok::kw_class:
1846     case tok::kw_template:
1847     case tok::kw_typename:
1848       assert(FormatTok->Previous);
1849       if (FormatTok->Previous->is(tok::less))
1850         InTemplateParameterList = true;
1851       nextToken();
1852       break;
1853     case tok::amp:
1854     case tok::star:
1855     case tok::kw_const:
1856     case tok::comma:
1857     case tok::less:
1858     case tok::greater:
1859     case tok::identifier:
1860     case tok::numeric_constant:
1861     case tok::coloncolon:
1862     case tok::kw_mutable:
1863     case tok::kw_noexcept:
1864       nextToken();
1865       break;
1866     // Specialization of a template with an integer parameter can contain
1867     // arithmetic, logical, comparison and ternary operators.
1868     //
1869     // FIXME: This also accepts sequences of operators that are not in the scope
1870     // of a template argument list.
1871     //
1872     // In a C++ lambda a template type can only occur after an arrow. We use
1873     // this as an heuristic to distinguish between Objective-C expressions
1874     // followed by an `a->b` expression, such as:
1875     // ([obj func:arg] + a->b)
1876     // Otherwise the code below would parse as a lambda.
1877     //
1878     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1879     // explicit template lists: []<bool b = true && false>(U &&u){}
1880     case tok::plus:
1881     case tok::minus:
1882     case tok::exclaim:
1883     case tok::tilde:
1884     case tok::slash:
1885     case tok::percent:
1886     case tok::lessless:
1887     case tok::pipe:
1888     case tok::pipepipe:
1889     case tok::ampamp:
1890     case tok::caret:
1891     case tok::equalequal:
1892     case tok::exclaimequal:
1893     case tok::greaterequal:
1894     case tok::lessequal:
1895     case tok::question:
1896     case tok::colon:
1897     case tok::ellipsis:
1898     case tok::kw_true:
1899     case tok::kw_false:
1900       if (SeenArrow || InTemplateParameterList) {
1901         nextToken();
1902         break;
1903       }
1904       return true;
1905     case tok::arrow:
1906       // This might or might not actually be a lambda arrow (this could be an
1907       // ObjC method invocation followed by a dereferencing arrow). We might
1908       // reset this back to TT_Unknown in TokenAnnotator.
1909       FormatTok->setType(TT_LambdaArrow);
1910       SeenArrow = true;
1911       nextToken();
1912       break;
1913     default:
1914       return true;
1915     }
1916   }
1917   FormatTok->setType(TT_LambdaLBrace);
1918   LSquare.setType(TT_LambdaLSquare);
1919   parseChildBlock();
1920   return true;
1921 }
1922 
1923 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1924   const FormatToken *Previous = FormatTok->Previous;
1925   if (Previous &&
1926       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1927                          tok::kw_delete, tok::l_square) ||
1928        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1929        Previous->isSimpleTypeSpecifier())) {
1930     nextToken();
1931     return false;
1932   }
1933   nextToken();
1934   if (FormatTok->is(tok::l_square))
1935     return false;
1936   parseSquare(/*LambdaIntroducer=*/true);
1937   return true;
1938 }
1939 
1940 void UnwrappedLineParser::tryToParseJSFunction() {
1941   assert(FormatTok->is(Keywords.kw_function) ||
1942          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1943   if (FormatTok->is(Keywords.kw_async))
1944     nextToken();
1945   // Consume "function".
1946   nextToken();
1947 
1948   // Consume * (generator function). Treat it like C++'s overloaded operators.
1949   if (FormatTok->is(tok::star)) {
1950     FormatTok->setType(TT_OverloadedOperator);
1951     nextToken();
1952   }
1953 
1954   // Consume function name.
1955   if (FormatTok->is(tok::identifier))
1956     nextToken();
1957 
1958   if (FormatTok->isNot(tok::l_paren))
1959     return;
1960 
1961   // Parse formal parameter list.
1962   parseParens();
1963 
1964   if (FormatTok->is(tok::colon)) {
1965     // Parse a type definition.
1966     nextToken();
1967 
1968     // Eat the type declaration. For braced inline object types, balance braces,
1969     // otherwise just parse until finding an l_brace for the function body.
1970     if (FormatTok->is(tok::l_brace))
1971       tryToParseBracedList();
1972     else
1973       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1974         nextToken();
1975   }
1976 
1977   if (FormatTok->is(tok::semi))
1978     return;
1979 
1980   parseChildBlock();
1981 }
1982 
1983 bool UnwrappedLineParser::tryToParseBracedList() {
1984   if (FormatTok->is(BK_Unknown))
1985     calculateBraceTypes();
1986   assert(FormatTok->isNot(BK_Unknown));
1987   if (FormatTok->is(BK_Block))
1988     return false;
1989   nextToken();
1990   parseBracedList();
1991   return true;
1992 }
1993 
1994 bool UnwrappedLineParser::tryToParseChildBlock() {
1995   assert(Style.isJavaScript() || Style.isCSharp());
1996   assert(FormatTok->is(TT_FatArrow));
1997   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
1998   // They always start an expression or a child block if followed by a curly
1999   // brace.
2000   nextToken();
2001   if (FormatTok->isNot(tok::l_brace))
2002     return false;
2003   parseChildBlock();
2004   return true;
2005 }
2006 
2007 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2008                                           bool IsEnum,
2009                                           tok::TokenKind ClosingBraceKind) {
2010   bool HasError = false;
2011 
2012   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2013   // replace this by using parseAssignmentExpression() inside.
2014   do {
2015     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2016         tryToParseChildBlock())
2017       continue;
2018     if (Style.isJavaScript()) {
2019       if (FormatTok->is(Keywords.kw_function) ||
2020           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2021         tryToParseJSFunction();
2022         continue;
2023       }
2024       if (FormatTok->is(tok::l_brace)) {
2025         // Could be a method inside of a braced list `{a() { return 1; }}`.
2026         if (tryToParseBracedList())
2027           continue;
2028         parseChildBlock();
2029       }
2030     }
2031     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2032       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2033         addUnwrappedLine();
2034       nextToken();
2035       return !HasError;
2036     }
2037     switch (FormatTok->Tok.getKind()) {
2038     case tok::l_square:
2039       if (Style.isCSharp())
2040         parseSquare();
2041       else
2042         tryToParseLambda();
2043       break;
2044     case tok::l_paren:
2045       parseParens();
2046       // JavaScript can just have free standing methods and getters/setters in
2047       // object literals. Detect them by a "{" following ")".
2048       if (Style.isJavaScript()) {
2049         if (FormatTok->is(tok::l_brace))
2050           parseChildBlock();
2051         break;
2052       }
2053       break;
2054     case tok::l_brace:
2055       // Assume there are no blocks inside a braced init list apart
2056       // from the ones we explicitly parse out (like lambdas).
2057       FormatTok->setBlockKind(BK_BracedInit);
2058       nextToken();
2059       parseBracedList();
2060       break;
2061     case tok::less:
2062       if (Style.Language == FormatStyle::LK_Proto) {
2063         nextToken();
2064         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2065                         /*ClosingBraceKind=*/tok::greater);
2066       } else {
2067         nextToken();
2068       }
2069       break;
2070     case tok::semi:
2071       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2072       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2073       // used for error recovery if we have otherwise determined that this is
2074       // a braced list.
2075       if (Style.isJavaScript()) {
2076         nextToken();
2077         break;
2078       }
2079       HasError = true;
2080       if (!ContinueOnSemicolons)
2081         return !HasError;
2082       nextToken();
2083       break;
2084     case tok::comma:
2085       nextToken();
2086       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2087         addUnwrappedLine();
2088       break;
2089     default:
2090       nextToken();
2091       break;
2092     }
2093   } while (!eof());
2094   return false;
2095 }
2096 
2097 void UnwrappedLineParser::parseParens() {
2098   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2099   nextToken();
2100   do {
2101     switch (FormatTok->Tok.getKind()) {
2102     case tok::l_paren:
2103       parseParens();
2104       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2105         parseChildBlock();
2106       break;
2107     case tok::r_paren:
2108       nextToken();
2109       return;
2110     case tok::r_brace:
2111       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2112       return;
2113     case tok::l_square:
2114       tryToParseLambda();
2115       break;
2116     case tok::l_brace:
2117       if (!tryToParseBracedList())
2118         parseChildBlock();
2119       break;
2120     case tok::at:
2121       nextToken();
2122       if (FormatTok->Tok.is(tok::l_brace)) {
2123         nextToken();
2124         parseBracedList();
2125       }
2126       break;
2127     case tok::equal:
2128       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2129         tryToParseChildBlock();
2130       else
2131         nextToken();
2132       break;
2133     case tok::kw_class:
2134       if (Style.isJavaScript())
2135         parseRecord(/*ParseAsExpr=*/true);
2136       else
2137         nextToken();
2138       break;
2139     case tok::identifier:
2140       if (Style.isJavaScript() &&
2141           (FormatTok->is(Keywords.kw_function) ||
2142            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2143         tryToParseJSFunction();
2144       else
2145         nextToken();
2146       break;
2147     default:
2148       nextToken();
2149       break;
2150     }
2151   } while (!eof());
2152 }
2153 
2154 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2155   if (!LambdaIntroducer) {
2156     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2157     if (tryToParseLambda())
2158       return;
2159   }
2160   do {
2161     switch (FormatTok->Tok.getKind()) {
2162     case tok::l_paren:
2163       parseParens();
2164       break;
2165     case tok::r_square:
2166       nextToken();
2167       return;
2168     case tok::r_brace:
2169       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2170       return;
2171     case tok::l_square:
2172       parseSquare();
2173       break;
2174     case tok::l_brace: {
2175       if (!tryToParseBracedList())
2176         parseChildBlock();
2177       break;
2178     }
2179     case tok::at:
2180       nextToken();
2181       if (FormatTok->Tok.is(tok::l_brace)) {
2182         nextToken();
2183         parseBracedList();
2184       }
2185       break;
2186     default:
2187       nextToken();
2188       break;
2189     }
2190   } while (!eof());
2191 }
2192 
2193 void UnwrappedLineParser::keepAncestorBraces() {
2194   if (!Style.RemoveBracesLLVM)
2195     return;
2196 
2197   const int MaxNestingLevels = 2;
2198   const int Size = NestedTooDeep.size();
2199   if (Size >= MaxNestingLevels)
2200     NestedTooDeep[Size - MaxNestingLevels] = true;
2201   NestedTooDeep.push_back(false);
2202 }
2203 
2204 static void markOptionalBraces(FormatToken *LeftBrace) {
2205   if (!LeftBrace)
2206     return;
2207 
2208   assert(LeftBrace->is(tok::l_brace));
2209 
2210   FormatToken *RightBrace = LeftBrace->MatchingParen;
2211   if (!RightBrace) {
2212     assert(!LeftBrace->Optional);
2213     return;
2214   }
2215 
2216   assert(RightBrace->is(tok::r_brace));
2217   assert(RightBrace->MatchingParen == LeftBrace);
2218   assert(LeftBrace->Optional == RightBrace->Optional);
2219 
2220   LeftBrace->Optional = true;
2221   RightBrace->Optional = true;
2222 }
2223 
2224 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2225                                                   bool KeepBraces) {
2226   auto HandleAttributes = [this]() {
2227     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2228     if (FormatTok->is(TT_AttributeMacro))
2229       nextToken();
2230     // Handle [[likely]] / [[unlikely]] attributes.
2231     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2232       parseSquare();
2233   };
2234 
2235   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2236   nextToken();
2237   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2238     nextToken();
2239   if (FormatTok->Tok.is(tok::l_paren))
2240     parseParens();
2241   HandleAttributes();
2242 
2243   bool NeedsUnwrappedLine = false;
2244   keepAncestorBraces();
2245 
2246   FormatToken *IfLeftBrace = nullptr;
2247   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2248 
2249   if (FormatTok->Tok.is(tok::l_brace)) {
2250     IfLeftBrace = FormatTok;
2251     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2252     IfBlockKind = parseBlock();
2253     if (Style.BraceWrapping.BeforeElse)
2254       addUnwrappedLine();
2255     else
2256       NeedsUnwrappedLine = true;
2257   } else {
2258     addUnwrappedLine();
2259     ++Line->Level;
2260     parseStructuralElement();
2261     --Line->Level;
2262   }
2263 
2264   bool KeepIfBraces = false;
2265   if (Style.RemoveBracesLLVM) {
2266     assert(!NestedTooDeep.empty());
2267     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2268                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2269                    IfBlockKind == IfStmtKind::IfElseIf;
2270   }
2271 
2272   FormatToken *ElseLeftBrace = nullptr;
2273   IfStmtKind Kind = IfStmtKind::IfOnly;
2274 
2275   if (FormatTok->Tok.is(tok::kw_else)) {
2276     if (Style.RemoveBracesLLVM) {
2277       NestedTooDeep.back() = false;
2278       Kind = IfStmtKind::IfElse;
2279     }
2280     nextToken();
2281     HandleAttributes();
2282     if (FormatTok->Tok.is(tok::l_brace)) {
2283       ElseLeftBrace = FormatTok;
2284       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2285       if (parseBlock() == IfStmtKind::IfOnly)
2286         Kind = IfStmtKind::IfElseIf;
2287       addUnwrappedLine();
2288     } else if (FormatTok->Tok.is(tok::kw_if)) {
2289       FormatToken *Previous = Tokens->getPreviousToken();
2290       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2291       if (IsPrecededByComment) {
2292         addUnwrappedLine();
2293         ++Line->Level;
2294       }
2295       bool TooDeep = true;
2296       if (Style.RemoveBracesLLVM) {
2297         Kind = IfStmtKind::IfElseIf;
2298         TooDeep = NestedTooDeep.pop_back_val();
2299       }
2300       ElseLeftBrace =
2301           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2302       if (Style.RemoveBracesLLVM)
2303         NestedTooDeep.push_back(TooDeep);
2304       if (IsPrecededByComment)
2305         --Line->Level;
2306     } else {
2307       addUnwrappedLine();
2308       ++Line->Level;
2309       parseStructuralElement();
2310       if (FormatTok->is(tok::eof))
2311         addUnwrappedLine();
2312       --Line->Level;
2313     }
2314   } else {
2315     if (Style.RemoveBracesLLVM)
2316       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2317     if (NeedsUnwrappedLine)
2318       addUnwrappedLine();
2319   }
2320 
2321   if (!Style.RemoveBracesLLVM)
2322     return nullptr;
2323 
2324   assert(!NestedTooDeep.empty());
2325   const bool KeepElseBraces =
2326       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2327 
2328   NestedTooDeep.pop_back();
2329 
2330   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2331     markOptionalBraces(IfLeftBrace);
2332     markOptionalBraces(ElseLeftBrace);
2333   } else if (IfLeftBrace) {
2334     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2335     if (IfRightBrace) {
2336       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2337       assert(!IfLeftBrace->Optional);
2338       assert(!IfRightBrace->Optional);
2339       IfLeftBrace->MatchingParen = nullptr;
2340       IfRightBrace->MatchingParen = nullptr;
2341     }
2342   }
2343 
2344   if (IfKind)
2345     *IfKind = Kind;
2346 
2347   return IfLeftBrace;
2348 }
2349 
2350 void UnwrappedLineParser::parseTryCatch() {
2351   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2352   nextToken();
2353   bool NeedsUnwrappedLine = false;
2354   if (FormatTok->is(tok::colon)) {
2355     // We are in a function try block, what comes is an initializer list.
2356     nextToken();
2357 
2358     // In case identifiers were removed by clang-tidy, what might follow is
2359     // multiple commas in sequence - before the first identifier.
2360     while (FormatTok->is(tok::comma))
2361       nextToken();
2362 
2363     while (FormatTok->is(tok::identifier)) {
2364       nextToken();
2365       if (FormatTok->is(tok::l_paren))
2366         parseParens();
2367       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2368           FormatTok->is(tok::l_brace)) {
2369         do {
2370           nextToken();
2371         } while (!FormatTok->is(tok::r_brace));
2372         nextToken();
2373       }
2374 
2375       // In case identifiers were removed by clang-tidy, what might follow is
2376       // multiple commas in sequence - after the first identifier.
2377       while (FormatTok->is(tok::comma))
2378         nextToken();
2379     }
2380   }
2381   // Parse try with resource.
2382   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2383     parseParens();
2384 
2385   keepAncestorBraces();
2386 
2387   if (FormatTok->is(tok::l_brace)) {
2388     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2389     parseBlock();
2390     if (Style.BraceWrapping.BeforeCatch)
2391       addUnwrappedLine();
2392     else
2393       NeedsUnwrappedLine = true;
2394   } else if (!FormatTok->is(tok::kw_catch)) {
2395     // The C++ standard requires a compound-statement after a try.
2396     // If there's none, we try to assume there's a structuralElement
2397     // and try to continue.
2398     addUnwrappedLine();
2399     ++Line->Level;
2400     parseStructuralElement();
2401     --Line->Level;
2402   }
2403   while (true) {
2404     if (FormatTok->is(tok::at))
2405       nextToken();
2406     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2407                              tok::kw___finally) ||
2408           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2409            FormatTok->is(Keywords.kw_finally)) ||
2410           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2411            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2412       break;
2413     nextToken();
2414     while (FormatTok->isNot(tok::l_brace)) {
2415       if (FormatTok->is(tok::l_paren)) {
2416         parseParens();
2417         continue;
2418       }
2419       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2420         if (Style.RemoveBracesLLVM)
2421           NestedTooDeep.pop_back();
2422         return;
2423       }
2424       nextToken();
2425     }
2426     NeedsUnwrappedLine = false;
2427     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2428     parseBlock();
2429     if (Style.BraceWrapping.BeforeCatch)
2430       addUnwrappedLine();
2431     else
2432       NeedsUnwrappedLine = true;
2433   }
2434 
2435   if (Style.RemoveBracesLLVM)
2436     NestedTooDeep.pop_back();
2437 
2438   if (NeedsUnwrappedLine)
2439     addUnwrappedLine();
2440 }
2441 
2442 void UnwrappedLineParser::parseNamespace() {
2443   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2444          "'namespace' expected");
2445 
2446   const FormatToken &InitialToken = *FormatTok;
2447   nextToken();
2448   if (InitialToken.is(TT_NamespaceMacro)) {
2449     parseParens();
2450   } else {
2451     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2452                               tok::l_square, tok::period) ||
2453            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2454       if (FormatTok->is(tok::l_square))
2455         parseSquare();
2456       else
2457         nextToken();
2458   }
2459   if (FormatTok->Tok.is(tok::l_brace)) {
2460     if (ShouldBreakBeforeBrace(Style, InitialToken))
2461       addUnwrappedLine();
2462 
2463     unsigned AddLevels =
2464         Style.NamespaceIndentation == FormatStyle::NI_All ||
2465                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2466                  DeclarationScopeStack.size() > 1)
2467             ? 1u
2468             : 0u;
2469     bool ManageWhitesmithsBraces =
2470         AddLevels == 0u &&
2471         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2472 
2473     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2474     // the whole block.
2475     if (ManageWhitesmithsBraces)
2476       ++Line->Level;
2477 
2478     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2479                /*MunchSemi=*/true,
2480                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2481 
2482     // Munch the semicolon after a namespace. This is more common than one would
2483     // think. Putting the semicolon into its own line is very ugly.
2484     if (FormatTok->Tok.is(tok::semi))
2485       nextToken();
2486 
2487     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2488 
2489     if (ManageWhitesmithsBraces)
2490       --Line->Level;
2491   }
2492   // FIXME: Add error handling.
2493 }
2494 
2495 void UnwrappedLineParser::parseNew() {
2496   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2497   nextToken();
2498 
2499   if (Style.isCSharp()) {
2500     do {
2501       if (FormatTok->is(tok::l_brace))
2502         parseBracedList();
2503 
2504       if (FormatTok->isOneOf(tok::semi, tok::comma))
2505         return;
2506 
2507       nextToken();
2508     } while (!eof());
2509   }
2510 
2511   if (Style.Language != FormatStyle::LK_Java)
2512     return;
2513 
2514   // In Java, we can parse everything up to the parens, which aren't optional.
2515   do {
2516     // There should not be a ;, { or } before the new's open paren.
2517     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2518       return;
2519 
2520     // Consume the parens.
2521     if (FormatTok->is(tok::l_paren)) {
2522       parseParens();
2523 
2524       // If there is a class body of an anonymous class, consume that as child.
2525       if (FormatTok->is(tok::l_brace))
2526         parseChildBlock();
2527       return;
2528     }
2529     nextToken();
2530   } while (!eof());
2531 }
2532 
2533 void UnwrappedLineParser::parseForOrWhileLoop() {
2534   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2535          "'for', 'while' or foreach macro expected");
2536   nextToken();
2537   // JS' for await ( ...
2538   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2539     nextToken();
2540   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2541     nextToken();
2542   if (FormatTok->Tok.is(tok::l_paren))
2543     parseParens();
2544 
2545   keepAncestorBraces();
2546 
2547   if (FormatTok->Tok.is(tok::l_brace)) {
2548     FormatToken *LeftBrace = FormatTok;
2549     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2550     parseBlock();
2551     if (Style.RemoveBracesLLVM) {
2552       assert(!NestedTooDeep.empty());
2553       if (!NestedTooDeep.back())
2554         markOptionalBraces(LeftBrace);
2555     }
2556     addUnwrappedLine();
2557   } else {
2558     addUnwrappedLine();
2559     ++Line->Level;
2560     parseStructuralElement();
2561     --Line->Level;
2562   }
2563 
2564   if (Style.RemoveBracesLLVM)
2565     NestedTooDeep.pop_back();
2566 }
2567 
2568 void UnwrappedLineParser::parseDoWhile() {
2569   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2570   nextToken();
2571 
2572   keepAncestorBraces();
2573 
2574   if (FormatTok->Tok.is(tok::l_brace)) {
2575     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2576     parseBlock();
2577     if (Style.BraceWrapping.BeforeWhile)
2578       addUnwrappedLine();
2579   } else {
2580     addUnwrappedLine();
2581     ++Line->Level;
2582     parseStructuralElement();
2583     --Line->Level;
2584   }
2585 
2586   if (Style.RemoveBracesLLVM)
2587     NestedTooDeep.pop_back();
2588 
2589   // FIXME: Add error handling.
2590   if (!FormatTok->Tok.is(tok::kw_while)) {
2591     addUnwrappedLine();
2592     return;
2593   }
2594 
2595   // If in Whitesmiths mode, the line with the while() needs to be indented
2596   // to the same level as the block.
2597   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2598     ++Line->Level;
2599 
2600   nextToken();
2601   parseStructuralElement();
2602 }
2603 
2604 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2605   nextToken();
2606   unsigned OldLineLevel = Line->Level;
2607   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2608     --Line->Level;
2609   if (LeftAlignLabel)
2610     Line->Level = 0;
2611 
2612   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2613       FormatTok->Tok.is(tok::l_brace)) {
2614 
2615     CompoundStatementIndenter Indenter(this, Line->Level,
2616                                        Style.BraceWrapping.AfterCaseLabel,
2617                                        Style.BraceWrapping.IndentBraces);
2618     parseBlock();
2619     if (FormatTok->Tok.is(tok::kw_break)) {
2620       if (Style.BraceWrapping.AfterControlStatement ==
2621           FormatStyle::BWACS_Always) {
2622         addUnwrappedLine();
2623         if (!Style.IndentCaseBlocks &&
2624             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2625           ++Line->Level;
2626       }
2627       parseStructuralElement();
2628     }
2629     addUnwrappedLine();
2630   } else {
2631     if (FormatTok->is(tok::semi))
2632       nextToken();
2633     addUnwrappedLine();
2634   }
2635   Line->Level = OldLineLevel;
2636   if (FormatTok->isNot(tok::l_brace)) {
2637     parseStructuralElement();
2638     addUnwrappedLine();
2639   }
2640 }
2641 
2642 void UnwrappedLineParser::parseCaseLabel() {
2643   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2644 
2645   // FIXME: fix handling of complex expressions here.
2646   do {
2647     nextToken();
2648   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2649   parseLabel();
2650 }
2651 
2652 void UnwrappedLineParser::parseSwitch() {
2653   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2654   nextToken();
2655   if (FormatTok->Tok.is(tok::l_paren))
2656     parseParens();
2657 
2658   keepAncestorBraces();
2659 
2660   if (FormatTok->Tok.is(tok::l_brace)) {
2661     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2662     parseBlock();
2663     addUnwrappedLine();
2664   } else {
2665     addUnwrappedLine();
2666     ++Line->Level;
2667     parseStructuralElement();
2668     --Line->Level;
2669   }
2670 
2671   if (Style.RemoveBracesLLVM)
2672     NestedTooDeep.pop_back();
2673 }
2674 
2675 void UnwrappedLineParser::parseAccessSpecifier() {
2676   FormatToken *AccessSpecifierCandidate = FormatTok;
2677   nextToken();
2678   // Understand Qt's slots.
2679   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2680     nextToken();
2681   // Otherwise, we don't know what it is, and we'd better keep the next token.
2682   if (FormatTok->Tok.is(tok::colon)) {
2683     nextToken();
2684     addUnwrappedLine();
2685   } else if (!FormatTok->Tok.is(tok::coloncolon) &&
2686              !std::binary_search(COperatorsFollowingVar.begin(),
2687                                  COperatorsFollowingVar.end(),
2688                                  FormatTok->Tok.getKind())) {
2689     // Not a variable name nor namespace name.
2690     addUnwrappedLine();
2691   } else if (AccessSpecifierCandidate) {
2692     // Consider the access specifier to be a C identifier.
2693     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2694   }
2695 }
2696 
2697 void UnwrappedLineParser::parseConcept() {
2698   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2699   nextToken();
2700   if (!FormatTok->Tok.is(tok::identifier))
2701     return;
2702   nextToken();
2703   if (!FormatTok->Tok.is(tok::equal))
2704     return;
2705   nextToken();
2706   if (FormatTok->Tok.is(tok::kw_requires)) {
2707     nextToken();
2708     parseRequiresExpression(Line->Level);
2709   } else {
2710     parseConstraintExpression(Line->Level);
2711   }
2712 }
2713 
2714 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2715   // requires (R range)
2716   if (FormatTok->Tok.is(tok::l_paren)) {
2717     parseParens();
2718     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2719       addUnwrappedLine();
2720       --Line->Level;
2721     }
2722   }
2723 
2724   if (FormatTok->Tok.is(tok::l_brace)) {
2725     if (Style.BraceWrapping.AfterFunction)
2726       addUnwrappedLine();
2727     FormatTok->setType(TT_FunctionLBrace);
2728     parseBlock();
2729     addUnwrappedLine();
2730   } else {
2731     parseConstraintExpression(OriginalLevel);
2732   }
2733 }
2734 
2735 void UnwrappedLineParser::parseConstraintExpression(
2736     unsigned int OriginalLevel) {
2737   // requires Id<T> && Id<T> || Id<T>
2738   while (
2739       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2740     nextToken();
2741     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2742                               tok::greater, tok::comma, tok::ellipsis)) {
2743       if (FormatTok->Tok.is(tok::less)) {
2744         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2745                         /*ClosingBraceKind=*/tok::greater);
2746         continue;
2747       }
2748       nextToken();
2749     }
2750     if (FormatTok->Tok.is(tok::kw_requires))
2751       parseRequiresExpression(OriginalLevel);
2752     if (FormatTok->Tok.is(tok::less)) {
2753       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2754                       /*ClosingBraceKind=*/tok::greater);
2755     }
2756 
2757     if (FormatTok->Tok.is(tok::l_paren))
2758       parseParens();
2759     if (FormatTok->Tok.is(tok::l_brace)) {
2760       if (Style.BraceWrapping.AfterFunction)
2761         addUnwrappedLine();
2762       FormatTok->setType(TT_FunctionLBrace);
2763       parseBlock();
2764     }
2765     if (FormatTok->Tok.is(tok::semi)) {
2766       // Eat any trailing semi.
2767       nextToken();
2768       addUnwrappedLine();
2769     }
2770     if (FormatTok->Tok.is(tok::colon))
2771       return;
2772     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2773       if (FormatTok->Previous &&
2774           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2775                                         tok::coloncolon))
2776         addUnwrappedLine();
2777       if (Style.IndentRequires && OriginalLevel != Line->Level)
2778         --Line->Level;
2779       break;
2780     } else {
2781       FormatTok->setType(TT_ConstraintJunctions);
2782     }
2783 
2784     nextToken();
2785   }
2786 }
2787 
2788 void UnwrappedLineParser::parseRequires() {
2789   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2790 
2791   unsigned OriginalLevel = Line->Level;
2792   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2793     addUnwrappedLine();
2794     if (Style.IndentRequires)
2795       ++Line->Level;
2796   }
2797   nextToken();
2798 
2799   parseRequiresExpression(OriginalLevel);
2800 }
2801 
2802 bool UnwrappedLineParser::parseEnum() {
2803   const FormatToken &InitialToken = *FormatTok;
2804 
2805   // Won't be 'enum' for NS_ENUMs.
2806   if (FormatTok->Tok.is(tok::kw_enum))
2807     nextToken();
2808 
2809   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2810   // declarations. An "enum" keyword followed by a colon would be a syntax
2811   // error and thus assume it is just an identifier.
2812   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
2813     return false;
2814 
2815   // In protobuf, "enum" can be used as a field name.
2816   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2817     return false;
2818 
2819   // Eat up enum class ...
2820   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2821     nextToken();
2822 
2823   while (FormatTok->Tok.getIdentifierInfo() ||
2824          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2825                             tok::greater, tok::comma, tok::question)) {
2826     nextToken();
2827     // We can have macros or attributes in between 'enum' and the enum name.
2828     if (FormatTok->is(tok::l_paren))
2829       parseParens();
2830     if (FormatTok->is(tok::identifier)) {
2831       nextToken();
2832       // If there are two identifiers in a row, this is likely an elaborate
2833       // return type. In Java, this can be "implements", etc.
2834       if (Style.isCpp() && FormatTok->is(tok::identifier))
2835         return false;
2836     }
2837   }
2838 
2839   // Just a declaration or something is wrong.
2840   if (FormatTok->isNot(tok::l_brace))
2841     return true;
2842   FormatTok->setType(TT_RecordLBrace);
2843   FormatTok->setBlockKind(BK_Block);
2844 
2845   if (Style.Language == FormatStyle::LK_Java) {
2846     // Java enums are different.
2847     parseJavaEnumBody();
2848     return true;
2849   }
2850   if (Style.Language == FormatStyle::LK_Proto) {
2851     parseBlock(/*MustBeDeclaration=*/true);
2852     return true;
2853   }
2854 
2855   if (!Style.AllowShortEnumsOnASingleLine &&
2856       ShouldBreakBeforeBrace(Style, InitialToken))
2857     addUnwrappedLine();
2858   // Parse enum body.
2859   nextToken();
2860   if (!Style.AllowShortEnumsOnASingleLine) {
2861     addUnwrappedLine();
2862     Line->Level += 1;
2863   }
2864   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2865                                    /*IsEnum=*/true);
2866   if (!Style.AllowShortEnumsOnASingleLine)
2867     Line->Level -= 1;
2868   if (HasError) {
2869     if (FormatTok->is(tok::semi))
2870       nextToken();
2871     addUnwrappedLine();
2872   }
2873   return true;
2874 
2875   // There is no addUnwrappedLine() here so that we fall through to parsing a
2876   // structural element afterwards. Thus, in "enum A {} n, m;",
2877   // "} n, m;" will end up in one unwrapped line.
2878 }
2879 
2880 bool UnwrappedLineParser::parseStructLike() {
2881   // parseRecord falls through and does not yet add an unwrapped line as a
2882   // record declaration or definition can start a structural element.
2883   parseRecord();
2884   // This does not apply to Java, JavaScript and C#.
2885   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
2886       Style.isCSharp()) {
2887     if (FormatTok->is(tok::semi))
2888       nextToken();
2889     addUnwrappedLine();
2890     return true;
2891   }
2892   return false;
2893 }
2894 
2895 namespace {
2896 // A class used to set and restore the Token position when peeking
2897 // ahead in the token source.
2898 class ScopedTokenPosition {
2899   unsigned StoredPosition;
2900   FormatTokenSource *Tokens;
2901 
2902 public:
2903   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2904     assert(Tokens && "Tokens expected to not be null");
2905     StoredPosition = Tokens->getPosition();
2906   }
2907 
2908   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2909 };
2910 } // namespace
2911 
2912 // Look to see if we have [[ by looking ahead, if
2913 // its not then rewind to the original position.
2914 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2915   ScopedTokenPosition AutoPosition(Tokens);
2916   FormatToken *Tok = Tokens->getNextToken();
2917   // We already read the first [ check for the second.
2918   if (!Tok->is(tok::l_square))
2919     return false;
2920   // Double check that the attribute is just something
2921   // fairly simple.
2922   while (Tok->isNot(tok::eof)) {
2923     if (Tok->is(tok::r_square))
2924       break;
2925     Tok = Tokens->getNextToken();
2926   }
2927   if (Tok->is(tok::eof))
2928     return false;
2929   Tok = Tokens->getNextToken();
2930   if (!Tok->is(tok::r_square))
2931     return false;
2932   Tok = Tokens->getNextToken();
2933   if (Tok->is(tok::semi))
2934     return false;
2935   return true;
2936 }
2937 
2938 void UnwrappedLineParser::parseJavaEnumBody() {
2939   // Determine whether the enum is simple, i.e. does not have a semicolon or
2940   // constants with class bodies. Simple enums can be formatted like braced
2941   // lists, contracted to a single line, etc.
2942   unsigned StoredPosition = Tokens->getPosition();
2943   bool IsSimple = true;
2944   FormatToken *Tok = Tokens->getNextToken();
2945   while (!Tok->is(tok::eof)) {
2946     if (Tok->is(tok::r_brace))
2947       break;
2948     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2949       IsSimple = false;
2950       break;
2951     }
2952     // FIXME: This will also mark enums with braces in the arguments to enum
2953     // constants as "not simple". This is probably fine in practice, though.
2954     Tok = Tokens->getNextToken();
2955   }
2956   FormatTok = Tokens->setPosition(StoredPosition);
2957 
2958   if (IsSimple) {
2959     nextToken();
2960     parseBracedList();
2961     addUnwrappedLine();
2962     return;
2963   }
2964 
2965   // Parse the body of a more complex enum.
2966   // First add a line for everything up to the "{".
2967   nextToken();
2968   addUnwrappedLine();
2969   ++Line->Level;
2970 
2971   // Parse the enum constants.
2972   while (FormatTok) {
2973     if (FormatTok->is(tok::l_brace)) {
2974       // Parse the constant's class body.
2975       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2976                  /*MunchSemi=*/false);
2977     } else if (FormatTok->is(tok::l_paren)) {
2978       parseParens();
2979     } else if (FormatTok->is(tok::comma)) {
2980       nextToken();
2981       addUnwrappedLine();
2982     } else if (FormatTok->is(tok::semi)) {
2983       nextToken();
2984       addUnwrappedLine();
2985       break;
2986     } else if (FormatTok->is(tok::r_brace)) {
2987       addUnwrappedLine();
2988       break;
2989     } else {
2990       nextToken();
2991     }
2992   }
2993 
2994   // Parse the class body after the enum's ";" if any.
2995   parseLevel(/*HasOpeningBrace=*/true);
2996   nextToken();
2997   --Line->Level;
2998   addUnwrappedLine();
2999 }
3000 
3001 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3002   const FormatToken &InitialToken = *FormatTok;
3003   nextToken();
3004 
3005   // The actual identifier can be a nested name specifier, and in macros
3006   // it is often token-pasted.
3007   // An [[attribute]] can be before the identifier.
3008   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3009                             tok::kw___attribute, tok::kw___declspec,
3010                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3011          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3012           FormatTok->isOneOf(tok::period, tok::comma))) {
3013     if (Style.isJavaScript() &&
3014         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3015       // JavaScript/TypeScript supports inline object types in
3016       // extends/implements positions:
3017       //     class Foo implements {bar: number} { }
3018       nextToken();
3019       if (FormatTok->is(tok::l_brace)) {
3020         tryToParseBracedList();
3021         continue;
3022       }
3023     }
3024     bool IsNonMacroIdentifier =
3025         FormatTok->is(tok::identifier) &&
3026         FormatTok->TokenText != FormatTok->TokenText.upper();
3027     nextToken();
3028     // We can have macros or attributes in between 'class' and the class name.
3029     if (!IsNonMacroIdentifier) {
3030       if (FormatTok->Tok.is(tok::l_paren)) {
3031         parseParens();
3032       } else if (FormatTok->is(TT_AttributeSquare)) {
3033         parseSquare();
3034         // Consume the closing TT_AttributeSquare.
3035         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3036           nextToken();
3037       }
3038     }
3039   }
3040 
3041   // Note that parsing away template declarations here leads to incorrectly
3042   // accepting function declarations as record declarations.
3043   // In general, we cannot solve this problem. Consider:
3044   // class A<int> B() {}
3045   // which can be a function definition or a class definition when B() is a
3046   // macro. If we find enough real-world cases where this is a problem, we
3047   // can parse for the 'template' keyword in the beginning of the statement,
3048   // and thus rule out the record production in case there is no template
3049   // (this would still leave us with an ambiguity between template function
3050   // and class declarations).
3051   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3052     while (!eof()) {
3053       if (FormatTok->is(tok::l_brace)) {
3054         calculateBraceTypes(/*ExpectClassBody=*/true);
3055         if (!tryToParseBracedList())
3056           break;
3057       }
3058       if (FormatTok->is(tok::l_square)) {
3059         FormatToken *Previous = FormatTok->Previous;
3060         if (!Previous ||
3061             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3062           // Don't try parsing a lambda if we had a closing parenthesis before,
3063           // it was probably a pointer to an array: int (*)[].
3064           if (!tryToParseLambda())
3065             break;
3066         }
3067       }
3068       if (FormatTok->Tok.is(tok::semi))
3069         return;
3070       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3071         addUnwrappedLine();
3072         nextToken();
3073         parseCSharpGenericTypeConstraint();
3074         break;
3075       }
3076       nextToken();
3077     }
3078   }
3079   if (FormatTok->Tok.is(tok::l_brace)) {
3080     FormatTok->setType(TT_RecordLBrace);
3081     if (ParseAsExpr) {
3082       parseChildBlock();
3083     } else {
3084       if (ShouldBreakBeforeBrace(Style, InitialToken))
3085         addUnwrappedLine();
3086 
3087       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3088       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3089     }
3090   }
3091   // There is no addUnwrappedLine() here so that we fall through to parsing a
3092   // structural element afterwards. Thus, in "class A {} n, m;",
3093   // "} n, m;" will end up in one unwrapped line.
3094 }
3095 
3096 void UnwrappedLineParser::parseObjCMethod() {
3097   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
3098          "'(' or identifier expected.");
3099   do {
3100     if (FormatTok->Tok.is(tok::semi)) {
3101       nextToken();
3102       addUnwrappedLine();
3103       return;
3104     } else if (FormatTok->Tok.is(tok::l_brace)) {
3105       if (Style.BraceWrapping.AfterFunction)
3106         addUnwrappedLine();
3107       parseBlock();
3108       addUnwrappedLine();
3109       return;
3110     } else {
3111       nextToken();
3112     }
3113   } while (!eof());
3114 }
3115 
3116 void UnwrappedLineParser::parseObjCProtocolList() {
3117   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
3118   do {
3119     nextToken();
3120     // Early exit in case someone forgot a close angle.
3121     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3122         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3123       return;
3124   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
3125   nextToken(); // Skip '>'.
3126 }
3127 
3128 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3129   do {
3130     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
3131       nextToken();
3132       addUnwrappedLine();
3133       break;
3134     }
3135     if (FormatTok->is(tok::l_brace)) {
3136       parseBlock();
3137       // In ObjC interfaces, nothing should be following the "}".
3138       addUnwrappedLine();
3139     } else if (FormatTok->is(tok::r_brace)) {
3140       // Ignore stray "}". parseStructuralElement doesn't consume them.
3141       nextToken();
3142       addUnwrappedLine();
3143     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3144       nextToken();
3145       parseObjCMethod();
3146     } else {
3147       parseStructuralElement();
3148     }
3149   } while (!eof());
3150 }
3151 
3152 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3153   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3154          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3155   nextToken();
3156   nextToken(); // interface name
3157 
3158   // @interface can be followed by a lightweight generic
3159   // specialization list, then either a base class or a category.
3160   if (FormatTok->Tok.is(tok::less))
3161     parseObjCLightweightGenerics();
3162   if (FormatTok->Tok.is(tok::colon)) {
3163     nextToken();
3164     nextToken(); // base class name
3165     // The base class can also have lightweight generics applied to it.
3166     if (FormatTok->Tok.is(tok::less))
3167       parseObjCLightweightGenerics();
3168   } else if (FormatTok->Tok.is(tok::l_paren))
3169     // Skip category, if present.
3170     parseParens();
3171 
3172   if (FormatTok->Tok.is(tok::less))
3173     parseObjCProtocolList();
3174 
3175   if (FormatTok->Tok.is(tok::l_brace)) {
3176     if (Style.BraceWrapping.AfterObjCDeclaration)
3177       addUnwrappedLine();
3178     parseBlock(/*MustBeDeclaration=*/true);
3179   }
3180 
3181   // With instance variables, this puts '}' on its own line.  Without instance
3182   // variables, this ends the @interface line.
3183   addUnwrappedLine();
3184 
3185   parseObjCUntilAtEnd();
3186 }
3187 
3188 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3189   assert(FormatTok->Tok.is(tok::less));
3190   // Unlike protocol lists, generic parameterizations support
3191   // nested angles:
3192   //
3193   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3194   //     NSObject <NSCopying, NSSecureCoding>
3195   //
3196   // so we need to count how many open angles we have left.
3197   unsigned NumOpenAngles = 1;
3198   do {
3199     nextToken();
3200     // Early exit in case someone forgot a close angle.
3201     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3202         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3203       break;
3204     if (FormatTok->Tok.is(tok::less))
3205       ++NumOpenAngles;
3206     else if (FormatTok->Tok.is(tok::greater)) {
3207       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3208       --NumOpenAngles;
3209     }
3210   } while (!eof() && NumOpenAngles != 0);
3211   nextToken(); // Skip '>'.
3212 }
3213 
3214 // Returns true for the declaration/definition form of @protocol,
3215 // false for the expression form.
3216 bool UnwrappedLineParser::parseObjCProtocol() {
3217   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3218   nextToken();
3219 
3220   if (FormatTok->is(tok::l_paren))
3221     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3222     return false;
3223 
3224   // The definition/declaration form,
3225   // @protocol Foo
3226   // - (int)someMethod;
3227   // @end
3228 
3229   nextToken(); // protocol name
3230 
3231   if (FormatTok->Tok.is(tok::less))
3232     parseObjCProtocolList();
3233 
3234   // Check for protocol declaration.
3235   if (FormatTok->Tok.is(tok::semi)) {
3236     nextToken();
3237     addUnwrappedLine();
3238     return true;
3239   }
3240 
3241   addUnwrappedLine();
3242   parseObjCUntilAtEnd();
3243   return true;
3244 }
3245 
3246 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3247   bool IsImport = FormatTok->is(Keywords.kw_import);
3248   assert(IsImport || FormatTok->is(tok::kw_export));
3249   nextToken();
3250 
3251   // Consume the "default" in "export default class/function".
3252   if (FormatTok->is(tok::kw_default))
3253     nextToken();
3254 
3255   // Consume "async function", "function" and "default function", so that these
3256   // get parsed as free-standing JS functions, i.e. do not require a trailing
3257   // semicolon.
3258   if (FormatTok->is(Keywords.kw_async))
3259     nextToken();
3260   if (FormatTok->is(Keywords.kw_function)) {
3261     nextToken();
3262     return;
3263   }
3264 
3265   // For imports, `export *`, `export {...}`, consume the rest of the line up
3266   // to the terminating `;`. For everything else, just return and continue
3267   // parsing the structural element, i.e. the declaration or expression for
3268   // `export default`.
3269   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3270       !FormatTok->isStringLiteral())
3271     return;
3272 
3273   while (!eof()) {
3274     if (FormatTok->is(tok::semi))
3275       return;
3276     if (Line->Tokens.empty()) {
3277       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3278       // import statement should terminate.
3279       return;
3280     }
3281     if (FormatTok->is(tok::l_brace)) {
3282       FormatTok->setBlockKind(BK_Block);
3283       nextToken();
3284       parseBracedList();
3285     } else {
3286       nextToken();
3287     }
3288   }
3289 }
3290 
3291 void UnwrappedLineParser::parseStatementMacro() {
3292   nextToken();
3293   if (FormatTok->is(tok::l_paren))
3294     parseParens();
3295   if (FormatTok->is(tok::semi))
3296     nextToken();
3297   addUnwrappedLine();
3298 }
3299 
3300 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3301                                                  StringRef Prefix = "") {
3302   llvm::dbgs() << Prefix << "Line(" << Line.Level
3303                << ", FSC=" << Line.FirstStartColumn << ")"
3304                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3305   for (const auto &Node : Line.Tokens) {
3306     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3307                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3308                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3309   }
3310   for (const auto &Node : Line.Tokens)
3311     for (const auto &ChildNode : Node.Children)
3312       printDebugInfo(ChildNode, "\nChild: ");
3313 
3314   llvm::dbgs() << "\n";
3315 }
3316 
3317 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3318   if (Line->Tokens.empty())
3319     return;
3320   LLVM_DEBUG({
3321     if (CurrentLines == &Lines)
3322       printDebugInfo(*Line);
3323   });
3324 
3325   // If this line closes a block when in Whitesmiths mode, remember that
3326   // information so that the level can be decreased after the line is added.
3327   // This has to happen after the addition of the line since the line itself
3328   // needs to be indented.
3329   bool ClosesWhitesmithsBlock =
3330       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3331       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3332 
3333   CurrentLines->push_back(std::move(*Line));
3334   Line->Tokens.clear();
3335   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3336   Line->FirstStartColumn = 0;
3337 
3338   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3339     --Line->Level;
3340   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3341     CurrentLines->append(
3342         std::make_move_iterator(PreprocessorDirectives.begin()),
3343         std::make_move_iterator(PreprocessorDirectives.end()));
3344     PreprocessorDirectives.clear();
3345   }
3346   // Disconnect the current token from the last token on the previous line.
3347   FormatTok->Previous = nullptr;
3348 }
3349 
3350 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3351 
3352 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3353   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3354          FormatTok.NewlinesBefore > 0;
3355 }
3356 
3357 // Checks if \p FormatTok is a line comment that continues the line comment
3358 // section on \p Line.
3359 static bool
3360 continuesLineCommentSection(const FormatToken &FormatTok,
3361                             const UnwrappedLine &Line,
3362                             const llvm::Regex &CommentPragmasRegex) {
3363   if (Line.Tokens.empty())
3364     return false;
3365 
3366   StringRef IndentContent = FormatTok.TokenText;
3367   if (FormatTok.TokenText.startswith("//") ||
3368       FormatTok.TokenText.startswith("/*"))
3369     IndentContent = FormatTok.TokenText.substr(2);
3370   if (CommentPragmasRegex.match(IndentContent))
3371     return false;
3372 
3373   // If Line starts with a line comment, then FormatTok continues the comment
3374   // section if its original column is greater or equal to the original start
3375   // column of the line.
3376   //
3377   // Define the min column token of a line as follows: if a line ends in '{' or
3378   // contains a '{' followed by a line comment, then the min column token is
3379   // that '{'. Otherwise, the min column token of the line is the first token of
3380   // the line.
3381   //
3382   // If Line starts with a token other than a line comment, then FormatTok
3383   // continues the comment section if its original column is greater than the
3384   // original start column of the min column token of the line.
3385   //
3386   // For example, the second line comment continues the first in these cases:
3387   //
3388   // // first line
3389   // // second line
3390   //
3391   // and:
3392   //
3393   // // first line
3394   //  // second line
3395   //
3396   // and:
3397   //
3398   // int i; // first line
3399   //  // second line
3400   //
3401   // and:
3402   //
3403   // do { // first line
3404   //      // second line
3405   //   int i;
3406   // } while (true);
3407   //
3408   // and:
3409   //
3410   // enum {
3411   //   a, // first line
3412   //    // second line
3413   //   b
3414   // };
3415   //
3416   // The second line comment doesn't continue the first in these cases:
3417   //
3418   //   // first line
3419   //  // second line
3420   //
3421   // and:
3422   //
3423   // int i; // first line
3424   // // second line
3425   //
3426   // and:
3427   //
3428   // do { // first line
3429   //   // second line
3430   //   int i;
3431   // } while (true);
3432   //
3433   // and:
3434   //
3435   // enum {
3436   //   a, // first line
3437   //   // second line
3438   // };
3439   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3440 
3441   // Scan for '{//'. If found, use the column of '{' as a min column for line
3442   // comment section continuation.
3443   const FormatToken *PreviousToken = nullptr;
3444   for (const UnwrappedLineNode &Node : Line.Tokens) {
3445     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3446         isLineComment(*Node.Tok)) {
3447       MinColumnToken = PreviousToken;
3448       break;
3449     }
3450     PreviousToken = Node.Tok;
3451 
3452     // Grab the last newline preceding a token in this unwrapped line.
3453     if (Node.Tok->NewlinesBefore > 0)
3454       MinColumnToken = Node.Tok;
3455   }
3456   if (PreviousToken && PreviousToken->is(tok::l_brace))
3457     MinColumnToken = PreviousToken;
3458 
3459   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3460                               MinColumnToken);
3461 }
3462 
3463 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3464   bool JustComments = Line->Tokens.empty();
3465   for (FormatToken *Tok : CommentsBeforeNextToken) {
3466     // Line comments that belong to the same line comment section are put on the
3467     // same line since later we might want to reflow content between them.
3468     // Additional fine-grained breaking of line comment sections is controlled
3469     // by the class BreakableLineCommentSection in case it is desirable to keep
3470     // several line comment sections in the same unwrapped line.
3471     //
3472     // FIXME: Consider putting separate line comment sections as children to the
3473     // unwrapped line instead.
3474     Tok->ContinuesLineCommentSection =
3475         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3476     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3477       addUnwrappedLine();
3478     pushToken(Tok);
3479   }
3480   if (NewlineBeforeNext && JustComments)
3481     addUnwrappedLine();
3482   CommentsBeforeNextToken.clear();
3483 }
3484 
3485 void UnwrappedLineParser::nextToken(int LevelDifference) {
3486   if (eof())
3487     return;
3488   flushComments(isOnNewLine(*FormatTok));
3489   pushToken(FormatTok);
3490   FormatToken *Previous = FormatTok;
3491   if (!Style.isJavaScript())
3492     readToken(LevelDifference);
3493   else
3494     readTokenWithJavaScriptASI();
3495   FormatTok->Previous = Previous;
3496 }
3497 
3498 void UnwrappedLineParser::distributeComments(
3499     const SmallVectorImpl<FormatToken *> &Comments,
3500     const FormatToken *NextTok) {
3501   // Whether or not a line comment token continues a line is controlled by
3502   // the method continuesLineCommentSection, with the following caveat:
3503   //
3504   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3505   // that each comment line from the trail is aligned with the next token, if
3506   // the next token exists. If a trail exists, the beginning of the maximal
3507   // trail is marked as a start of a new comment section.
3508   //
3509   // For example in this code:
3510   //
3511   // int a; // line about a
3512   //   // line 1 about b
3513   //   // line 2 about b
3514   //   int b;
3515   //
3516   // the two lines about b form a maximal trail, so there are two sections, the
3517   // first one consisting of the single comment "// line about a" and the
3518   // second one consisting of the next two comments.
3519   if (Comments.empty())
3520     return;
3521   bool ShouldPushCommentsInCurrentLine = true;
3522   bool HasTrailAlignedWithNextToken = false;
3523   unsigned StartOfTrailAlignedWithNextToken = 0;
3524   if (NextTok) {
3525     // We are skipping the first element intentionally.
3526     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3527       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3528         HasTrailAlignedWithNextToken = true;
3529         StartOfTrailAlignedWithNextToken = i;
3530       }
3531     }
3532   }
3533   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3534     FormatToken *FormatTok = Comments[i];
3535     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3536       FormatTok->ContinuesLineCommentSection = false;
3537     } else {
3538       FormatTok->ContinuesLineCommentSection =
3539           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3540     }
3541     if (!FormatTok->ContinuesLineCommentSection &&
3542         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
3543       ShouldPushCommentsInCurrentLine = false;
3544     if (ShouldPushCommentsInCurrentLine)
3545       pushToken(FormatTok);
3546     else
3547       CommentsBeforeNextToken.push_back(FormatTok);
3548   }
3549 }
3550 
3551 void UnwrappedLineParser::readToken(int LevelDifference) {
3552   SmallVector<FormatToken *, 1> Comments;
3553   do {
3554     FormatTok = Tokens->getNextToken();
3555     assert(FormatTok);
3556     while (FormatTok->getType() == TT_ConflictStart ||
3557            FormatTok->getType() == TT_ConflictEnd ||
3558            FormatTok->getType() == TT_ConflictAlternative) {
3559       if (FormatTok->getType() == TT_ConflictStart)
3560         conditionalCompilationStart(/*Unreachable=*/false);
3561       else if (FormatTok->getType() == TT_ConflictAlternative)
3562         conditionalCompilationAlternative();
3563       else if (FormatTok->getType() == TT_ConflictEnd)
3564         conditionalCompilationEnd();
3565       FormatTok = Tokens->getNextToken();
3566       FormatTok->MustBreakBefore = true;
3567     }
3568 
3569     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3570            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3571       distributeComments(Comments, FormatTok);
3572       Comments.clear();
3573       // If there is an unfinished unwrapped line, we flush the preprocessor
3574       // directives only after that unwrapped line was finished later.
3575       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3576       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3577       assert((LevelDifference >= 0 ||
3578               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3579              "LevelDifference makes Line->Level negative");
3580       Line->Level += LevelDifference;
3581       // Comments stored before the preprocessor directive need to be output
3582       // before the preprocessor directive, at the same level as the
3583       // preprocessor directive, as we consider them to apply to the directive.
3584       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3585           PPBranchLevel > 0)
3586         Line->Level += PPBranchLevel;
3587       flushComments(isOnNewLine(*FormatTok));
3588       parsePPDirective();
3589     }
3590 
3591     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3592         !Line->InPPDirective)
3593       continue;
3594 
3595     if (!FormatTok->Tok.is(tok::comment)) {
3596       distributeComments(Comments, FormatTok);
3597       Comments.clear();
3598       return;
3599     }
3600 
3601     Comments.push_back(FormatTok);
3602   } while (!eof());
3603 
3604   distributeComments(Comments, nullptr);
3605   Comments.clear();
3606 }
3607 
3608 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3609   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3610   if (MustBreakBeforeNextToken) {
3611     Line->Tokens.back().Tok->MustBreakBefore = true;
3612     MustBreakBeforeNextToken = false;
3613   }
3614 }
3615 
3616 } // end namespace format
3617 } // end namespace clang
3618