1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31 
32   // Returns the next token in the token stream.
33   virtual FormatToken *getNextToken() = 0;
34 
35   // Returns the token preceding the token returned by the last call to
36   // getNextToken() in the token stream, or nullptr if no such token exists.
37   virtual FormatToken *getPreviousToken() = 0;
38 
39   // Returns the token that would be returned by the next call to
40   // getNextToken().
41   virtual FormatToken *peekNextToken() = 0;
42 
43   // Returns whether we are at the end of the file.
44   // This can be different from whether getNextToken() returned an eof token
45   // when the FormatTokenSource is a view on a part of the token stream.
46   virtual bool isEOF() = 0;
47 
48   // Gets the current position in the token stream, to be used by setPosition().
49   virtual unsigned getPosition() = 0;
50 
51   // Resets the token stream to the state it was in when getPosition() returned
52   // Position, and return the token at that position in the stream.
53   virtual FormatToken *setPosition(unsigned Position) = 0;
54 };
55 
56 namespace {
57 
58 class ScopedDeclarationState {
59 public:
60   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
61                          bool MustBeDeclaration)
62       : Line(Line), Stack(Stack) {
63     Line.MustBeDeclaration = MustBeDeclaration;
64     Stack.push_back(MustBeDeclaration);
65   }
66   ~ScopedDeclarationState() {
67     Stack.pop_back();
68     if (!Stack.empty())
69       Line.MustBeDeclaration = Stack.back();
70     else
71       Line.MustBeDeclaration = true;
72   }
73 
74 private:
75   UnwrappedLine &Line;
76   llvm::BitVector &Stack;
77 };
78 
79 static bool isLineComment(const FormatToken &FormatTok) {
80   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
81 }
82 
83 // Checks if \p FormatTok is a line comment that continues the line comment
84 // \p Previous. The original column of \p MinColumnToken is used to determine
85 // whether \p FormatTok is indented enough to the right to continue \p Previous.
86 static bool continuesLineComment(const FormatToken &FormatTok,
87                                  const FormatToken *Previous,
88                                  const FormatToken *MinColumnToken) {
89   if (!Previous || !MinColumnToken)
90     return false;
91   unsigned MinContinueColumn =
92       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
93   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
94          isLineComment(*Previous) &&
95          FormatTok.OriginalColumn >= MinContinueColumn;
96 }
97 
98 class ScopedMacroState : public FormatTokenSource {
99 public:
100   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
101                    FormatToken *&ResetToken)
102       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
103         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
104         Token(nullptr), PreviousToken(nullptr) {
105     FakeEOF.Tok.startToken();
106     FakeEOF.Tok.setKind(tok::eof);
107     TokenSource = this;
108     Line.Level = 0;
109     Line.InPPDirective = true;
110   }
111 
112   ~ScopedMacroState() override {
113     TokenSource = PreviousTokenSource;
114     ResetToken = Token;
115     Line.InPPDirective = false;
116     Line.Level = PreviousLineLevel;
117   }
118 
119   FormatToken *getNextToken() override {
120     // The \c UnwrappedLineParser guards against this by never calling
121     // \c getNextToken() after it has encountered the first eof token.
122     assert(!eof());
123     PreviousToken = Token;
124     Token = PreviousTokenSource->getNextToken();
125     if (eof())
126       return &FakeEOF;
127     return Token;
128   }
129 
130   FormatToken *getPreviousToken() override {
131     return PreviousTokenSource->getPreviousToken();
132   }
133 
134   FormatToken *peekNextToken() override {
135     if (eof())
136       return &FakeEOF;
137     return PreviousTokenSource->peekNextToken();
138   }
139 
140   bool isEOF() override { return PreviousTokenSource->isEOF(); }
141 
142   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
143 
144   FormatToken *setPosition(unsigned Position) override {
145     PreviousToken = nullptr;
146     Token = PreviousTokenSource->setPosition(Position);
147     return Token;
148   }
149 
150 private:
151   bool eof() {
152     return Token && Token->HasUnescapedNewline &&
153            !continuesLineComment(*Token, PreviousToken,
154                                  /*MinColumnToken=*/PreviousToken);
155   }
156 
157   FormatToken FakeEOF;
158   UnwrappedLine &Line;
159   FormatTokenSource *&TokenSource;
160   FormatToken *&ResetToken;
161   unsigned PreviousLineLevel;
162   FormatTokenSource *PreviousTokenSource;
163 
164   FormatToken *Token;
165   FormatToken *PreviousToken;
166 };
167 
168 } // end anonymous namespace
169 
170 class ScopedLineState {
171 public:
172   ScopedLineState(UnwrappedLineParser &Parser,
173                   bool SwitchToPreprocessorLines = false)
174       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
175     if (SwitchToPreprocessorLines)
176       Parser.CurrentLines = &Parser.PreprocessorDirectives;
177     else if (!Parser.Line->Tokens.empty())
178       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
179     PreBlockLine = std::move(Parser.Line);
180     Parser.Line = std::make_unique<UnwrappedLine>();
181     Parser.Line->Level = PreBlockLine->Level;
182     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
183   }
184 
185   ~ScopedLineState() {
186     if (!Parser.Line->Tokens.empty())
187       Parser.addUnwrappedLine();
188     assert(Parser.Line->Tokens.empty());
189     Parser.Line = std::move(PreBlockLine);
190     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
191       Parser.MustBreakBeforeNextToken = true;
192     Parser.CurrentLines = OriginalLines;
193   }
194 
195 private:
196   UnwrappedLineParser &Parser;
197 
198   std::unique_ptr<UnwrappedLine> PreBlockLine;
199   SmallVectorImpl<UnwrappedLine> *OriginalLines;
200 };
201 
202 class CompoundStatementIndenter {
203 public:
204   CompoundStatementIndenter(UnwrappedLineParser *Parser,
205                             const FormatStyle &Style, unsigned &LineLevel)
206       : CompoundStatementIndenter(Parser, LineLevel,
207                                   Style.BraceWrapping.AfterControlStatement,
208                                   Style.BraceWrapping.IndentBraces) {}
209   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
210                             bool WrapBrace, bool IndentBrace)
211       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
212     if (WrapBrace)
213       Parser->addUnwrappedLine();
214     if (IndentBrace)
215       ++LineLevel;
216   }
217   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
218 
219 private:
220   unsigned &LineLevel;
221   unsigned OldLineLevel;
222 };
223 
224 namespace {
225 
226 class IndexedTokenSource : public FormatTokenSource {
227 public:
228   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
229       : Tokens(Tokens), Position(-1) {}
230 
231   FormatToken *getNextToken() override {
232     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
233       LLVM_DEBUG({
234         llvm::dbgs() << "Next ";
235         dbgToken(Position);
236       });
237       return Tokens[Position];
238     }
239     ++Position;
240     LLVM_DEBUG({
241       llvm::dbgs() << "Next ";
242       dbgToken(Position);
243     });
244     return Tokens[Position];
245   }
246 
247   FormatToken *getPreviousToken() override {
248     return Position > 0 ? Tokens[Position - 1] : nullptr;
249   }
250 
251   FormatToken *peekNextToken() override {
252     int Next = Position + 1;
253     LLVM_DEBUG({
254       llvm::dbgs() << "Peeking ";
255       dbgToken(Next);
256     });
257     return Tokens[Next];
258   }
259 
260   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
261 
262   unsigned getPosition() override {
263     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
264     assert(Position >= 0);
265     return Position;
266   }
267 
268   FormatToken *setPosition(unsigned P) override {
269     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
270     Position = P;
271     return Tokens[Position];
272   }
273 
274   void reset() { Position = -1; }
275 
276 private:
277   void dbgToken(int Position, llvm::StringRef Indent = "") {
278     FormatToken *Tok = Tokens[Position];
279     llvm::dbgs() << Indent << "[" << Position
280                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
281                  << ", Macro: " << !!Tok->MacroCtx << "\n";
282   }
283 
284   ArrayRef<FormatToken *> Tokens;
285   int Position;
286 };
287 
288 } // end anonymous namespace
289 
290 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
291                                          const AdditionalKeywords &Keywords,
292                                          unsigned FirstStartColumn,
293                                          ArrayRef<FormatToken *> Tokens,
294                                          UnwrappedLineConsumer &Callback)
295     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
296       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
297       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
298       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
299       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
300                        ? IG_Rejected
301                        : IG_Inited),
302       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
303 
304 void UnwrappedLineParser::reset() {
305   PPBranchLevel = -1;
306   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
307                      ? IG_Rejected
308                      : IG_Inited;
309   IncludeGuardToken = nullptr;
310   Line.reset(new UnwrappedLine);
311   CommentsBeforeNextToken.clear();
312   FormatTok = nullptr;
313   MustBreakBeforeNextToken = false;
314   PreprocessorDirectives.clear();
315   CurrentLines = &Lines;
316   DeclarationScopeStack.clear();
317   NestedTooDeep.clear();
318   PPStack.clear();
319   Line->FirstStartColumn = FirstStartColumn;
320 }
321 
322 void UnwrappedLineParser::parse() {
323   IndexedTokenSource TokenSource(AllTokens);
324   Line->FirstStartColumn = FirstStartColumn;
325   do {
326     LLVM_DEBUG(llvm::dbgs() << "----\n");
327     reset();
328     Tokens = &TokenSource;
329     TokenSource.reset();
330 
331     readToken();
332     parseFile();
333 
334     // If we found an include guard then all preprocessor directives (other than
335     // the guard) are over-indented by one.
336     if (IncludeGuard == IG_Found)
337       for (auto &Line : Lines)
338         if (Line.InPPDirective && Line.Level > 0)
339           --Line.Level;
340 
341     // Create line with eof token.
342     pushToken(FormatTok);
343     addUnwrappedLine();
344 
345     for (const UnwrappedLine &Line : Lines)
346       Callback.consumeUnwrappedLine(Line);
347 
348     Callback.finishRun();
349     Lines.clear();
350     while (!PPLevelBranchIndex.empty() &&
351            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
352       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
353       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
354     }
355     if (!PPLevelBranchIndex.empty()) {
356       ++PPLevelBranchIndex.back();
357       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
358       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
359     }
360   } while (!PPLevelBranchIndex.empty());
361 }
362 
363 void UnwrappedLineParser::parseFile() {
364   // The top-level context in a file always has declarations, except for pre-
365   // processor directives and JavaScript files.
366   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
367   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
368                                           MustBeDeclaration);
369   if (Style.Language == FormatStyle::LK_TextProto)
370     parseBracedList();
371   else
372     parseLevel(/*HasOpeningBrace=*/false);
373   // Make sure to format the remaining tokens.
374   //
375   // LK_TextProto is special since its top-level is parsed as the body of a
376   // braced list, which does not necessarily have natural line separators such
377   // as a semicolon. Comments after the last entry that have been determined to
378   // not belong to that line, as in:
379   //   key: value
380   //   // endfile comment
381   // do not have a chance to be put on a line of their own until this point.
382   // Here we add this newline before end-of-file comments.
383   if (Style.Language == FormatStyle::LK_TextProto &&
384       !CommentsBeforeNextToken.empty())
385     addUnwrappedLine();
386   flushComments(true);
387   addUnwrappedLine();
388 }
389 
390 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
391   do {
392     switch (FormatTok->Tok.getKind()) {
393     case tok::l_brace:
394       return;
395     default:
396       if (FormatTok->is(Keywords.kw_where)) {
397         addUnwrappedLine();
398         nextToken();
399         parseCSharpGenericTypeConstraint();
400         break;
401       }
402       nextToken();
403       break;
404     }
405   } while (!eof());
406 }
407 
408 void UnwrappedLineParser::parseCSharpAttribute() {
409   int UnpairedSquareBrackets = 1;
410   do {
411     switch (FormatTok->Tok.getKind()) {
412     case tok::r_square:
413       nextToken();
414       --UnpairedSquareBrackets;
415       if (UnpairedSquareBrackets == 0) {
416         addUnwrappedLine();
417         return;
418       }
419       break;
420     case tok::l_square:
421       ++UnpairedSquareBrackets;
422       nextToken();
423       break;
424     default:
425       nextToken();
426       break;
427     }
428   } while (!eof());
429 }
430 
431 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
432   if (!Lines.empty() && Lines.back().InPPDirective)
433     return true;
434 
435   const FormatToken *Previous = Tokens->getPreviousToken();
436   return Previous && Previous->is(tok::comment) &&
437          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
438 }
439 
440 // Returns true if a simple block, or false otherwise. (A simple block has a
441 // single statement.)
442 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) {
443   const bool IsPrecededByCommentOrPPDirective =
444       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
445   unsigned StatementCount = 0;
446   bool SwitchLabelEncountered = false;
447   do {
448     tok::TokenKind kind = FormatTok->Tok.getKind();
449     if (FormatTok->getType() == TT_MacroBlockBegin)
450       kind = tok::l_brace;
451     else if (FormatTok->getType() == TT_MacroBlockEnd)
452       kind = tok::r_brace;
453 
454     switch (kind) {
455     case tok::comment:
456       nextToken();
457       addUnwrappedLine();
458       break;
459     case tok::l_brace:
460       // FIXME: Add parameter whether this can happen - if this happens, we must
461       // be in a non-declaration context.
462       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
463         continue;
464       parseBlock();
465       ++StatementCount;
466       assert(StatementCount > 0 && "StatementCount overflow!");
467       addUnwrappedLine();
468       break;
469     case tok::r_brace:
470       if (HasOpeningBrace) {
471         if (!Style.RemoveBracesLLVM)
472           return false;
473         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 ||
474             IsPrecededByCommentOrPPDirective ||
475             precededByCommentOrPPDirective())
476           return false;
477         const FormatToken *Next = Tokens->peekNextToken();
478         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
479       }
480       nextToken();
481       addUnwrappedLine();
482       break;
483     case tok::kw_default: {
484       unsigned StoredPosition = Tokens->getPosition();
485       FormatToken *Next;
486       do {
487         Next = Tokens->getNextToken();
488         assert(Next);
489       } while (Next->is(tok::comment));
490       FormatTok = Tokens->setPosition(StoredPosition);
491       if (Next->isNot(tok::colon)) {
492         // default not followed by ':' is not a case label; treat it like
493         // an identifier.
494         parseStructuralElement();
495         break;
496       }
497       // Else, if it is 'default:', fall through to the case handling.
498       LLVM_FALLTHROUGH;
499     }
500     case tok::kw_case:
501       if (Style.isJavaScript() && Line->MustBeDeclaration) {
502         // A 'case: string' style field declaration.
503         parseStructuralElement();
504         break;
505       }
506       if (!SwitchLabelEncountered &&
507           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
508         ++Line->Level;
509       SwitchLabelEncountered = true;
510       parseStructuralElement();
511       break;
512     case tok::l_square:
513       if (Style.isCSharp()) {
514         nextToken();
515         parseCSharpAttribute();
516         break;
517       }
518       LLVM_FALLTHROUGH;
519     default:
520       parseStructuralElement(IfKind, !HasOpeningBrace);
521       ++StatementCount;
522       assert(StatementCount > 0 && "StatementCount overflow!");
523       break;
524     }
525   } while (!eof());
526   return false;
527 }
528 
529 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
530   // We'll parse forward through the tokens until we hit
531   // a closing brace or eof - note that getNextToken() will
532   // parse macros, so this will magically work inside macro
533   // definitions, too.
534   unsigned StoredPosition = Tokens->getPosition();
535   FormatToken *Tok = FormatTok;
536   const FormatToken *PrevTok = Tok->Previous;
537   // Keep a stack of positions of lbrace tokens. We will
538   // update information about whether an lbrace starts a
539   // braced init list or a different block during the loop.
540   SmallVector<FormatToken *, 8> LBraceStack;
541   assert(Tok->Tok.is(tok::l_brace));
542   do {
543     // Get next non-comment token.
544     FormatToken *NextTok;
545     unsigned ReadTokens = 0;
546     do {
547       NextTok = Tokens->getNextToken();
548       ++ReadTokens;
549     } while (NextTok->is(tok::comment));
550 
551     switch (Tok->Tok.getKind()) {
552     case tok::l_brace:
553       if (Style.isJavaScript() && PrevTok) {
554         if (PrevTok->isOneOf(tok::colon, tok::less))
555           // A ':' indicates this code is in a type, or a braced list
556           // following a label in an object literal ({a: {b: 1}}).
557           // A '<' could be an object used in a comparison, but that is nonsense
558           // code (can never return true), so more likely it is a generic type
559           // argument (`X<{a: string; b: number}>`).
560           // The code below could be confused by semicolons between the
561           // individual members in a type member list, which would normally
562           // trigger BK_Block. In both cases, this must be parsed as an inline
563           // braced init.
564           Tok->setBlockKind(BK_BracedInit);
565         else if (PrevTok->is(tok::r_paren))
566           // `) { }` can only occur in function or method declarations in JS.
567           Tok->setBlockKind(BK_Block);
568       } else {
569         Tok->setBlockKind(BK_Unknown);
570       }
571       LBraceStack.push_back(Tok);
572       break;
573     case tok::r_brace:
574       if (LBraceStack.empty())
575         break;
576       if (LBraceStack.back()->is(BK_Unknown)) {
577         bool ProbablyBracedList = false;
578         if (Style.Language == FormatStyle::LK_Proto) {
579           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
580         } else {
581           // Skip NextTok over preprocessor lines, otherwise we may not
582           // properly diagnose the block as a braced intializer
583           // if the comma separator appears after the pp directive.
584           while (NextTok->is(tok::hash)) {
585             ScopedMacroState MacroState(*Line, Tokens, NextTok);
586             do {
587               NextTok = Tokens->getNextToken();
588               ++ReadTokens;
589             } while (NextTok->isNot(tok::eof));
590           }
591 
592           // Using OriginalColumn to distinguish between ObjC methods and
593           // binary operators is a bit hacky.
594           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
595                                   NextTok->OriginalColumn == 0;
596 
597           // If there is a comma, semicolon or right paren after the closing
598           // brace, we assume this is a braced initializer list.  Note that
599           // regardless how we mark inner braces here, we will overwrite the
600           // BlockKind later if we parse a braced list (where all blocks
601           // inside are by default braced lists), or when we explicitly detect
602           // blocks (for example while parsing lambdas).
603           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
604           // braced list in JS.
605           ProbablyBracedList =
606               (Style.isJavaScript() &&
607                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
608                                 Keywords.kw_as)) ||
609               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
610               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
611                                tok::r_paren, tok::r_square, tok::l_brace,
612                                tok::ellipsis) ||
613               (NextTok->is(tok::identifier) &&
614                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
615               (NextTok->is(tok::semi) &&
616                (!ExpectClassBody || LBraceStack.size() != 1)) ||
617               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
618           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
619             // We can have an array subscript after a braced init
620             // list, but C++11 attributes are expected after blocks.
621             NextTok = Tokens->getNextToken();
622             ++ReadTokens;
623             ProbablyBracedList = NextTok->isNot(tok::l_square);
624           }
625         }
626         if (ProbablyBracedList) {
627           Tok->setBlockKind(BK_BracedInit);
628           LBraceStack.back()->setBlockKind(BK_BracedInit);
629         } else {
630           Tok->setBlockKind(BK_Block);
631           LBraceStack.back()->setBlockKind(BK_Block);
632         }
633       }
634       LBraceStack.pop_back();
635       break;
636     case tok::identifier:
637       if (!Tok->is(TT_StatementMacro))
638         break;
639       LLVM_FALLTHROUGH;
640     case tok::at:
641     case tok::semi:
642     case tok::kw_if:
643     case tok::kw_while:
644     case tok::kw_for:
645     case tok::kw_switch:
646     case tok::kw_try:
647     case tok::kw___try:
648       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
649         LBraceStack.back()->setBlockKind(BK_Block);
650       break;
651     default:
652       break;
653     }
654     PrevTok = Tok;
655     Tok = NextTok;
656   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
657 
658   // Assume other blocks for all unclosed opening braces.
659   for (FormatToken *LBrace : LBraceStack)
660     if (LBrace->is(BK_Unknown))
661       LBrace->setBlockKind(BK_Block);
662 
663   FormatTok = Tokens->setPosition(StoredPosition);
664 }
665 
666 template <class T>
667 static inline void hash_combine(std::size_t &seed, const T &v) {
668   std::hash<T> hasher;
669   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
670 }
671 
672 size_t UnwrappedLineParser::computePPHash() const {
673   size_t h = 0;
674   for (const auto &i : PPStack) {
675     hash_combine(h, size_t(i.Kind));
676     hash_combine(h, i.Line);
677   }
678   return h;
679 }
680 
681 UnwrappedLineParser::IfStmtKind
682 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
683                                 bool MunchSemi,
684                                 bool UnindentWhitesmithsBraces) {
685   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
686          "'{' or macro block token expected");
687   FormatToken *Tok = FormatTok;
688   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
689   FormatTok->setBlockKind(BK_Block);
690 
691   // For Whitesmiths mode, jump to the next level prior to skipping over the
692   // braces.
693   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
694     ++Line->Level;
695 
696   size_t PPStartHash = computePPHash();
697 
698   unsigned InitialLevel = Line->Level;
699   nextToken(/*LevelDifference=*/AddLevels);
700 
701   if (MacroBlock && FormatTok->is(tok::l_paren))
702     parseParens();
703 
704   size_t NbPreprocessorDirectives =
705       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
706   addUnwrappedLine();
707   size_t OpeningLineIndex =
708       CurrentLines->empty()
709           ? (UnwrappedLine::kInvalidIndex)
710           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
711 
712   // Whitesmiths is weird here. The brace needs to be indented for the namespace
713   // block, but the block itself may not be indented depending on the style
714   // settings. This allows the format to back up one level in those cases.
715   if (UnindentWhitesmithsBraces)
716     --Line->Level;
717 
718   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
719                                           MustBeDeclaration);
720   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
721     Line->Level += AddLevels;
722 
723   IfStmtKind IfKind = IfStmtKind::NotIf;
724   const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind);
725 
726   if (eof())
727     return IfKind;
728 
729   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
730                  : !FormatTok->is(tok::r_brace)) {
731     Line->Level = InitialLevel;
732     FormatTok->setBlockKind(BK_Block);
733     return IfKind;
734   }
735 
736   if (SimpleBlock && Tok->is(tok::l_brace)) {
737     assert(FormatTok->is(tok::r_brace));
738     const FormatToken *Previous = Tokens->getPreviousToken();
739     assert(Previous);
740     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
741       Tok->MatchingParen = FormatTok;
742       FormatTok->MatchingParen = Tok;
743     }
744   }
745 
746   size_t PPEndHash = computePPHash();
747 
748   // Munch the closing brace.
749   nextToken(/*LevelDifference=*/-AddLevels);
750 
751   if (MacroBlock && FormatTok->is(tok::l_paren))
752     parseParens();
753 
754   if (FormatTok->is(tok::arrow)) {
755     // Following the } we can find a trailing return type arrow
756     // as part of an implicit conversion constraint.
757     nextToken();
758     parseStructuralElement();
759   }
760 
761   if (MunchSemi && FormatTok->Tok.is(tok::semi))
762     nextToken();
763 
764   Line->Level = InitialLevel;
765 
766   if (PPStartHash == PPEndHash) {
767     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
768     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
769       // Update the opening line to add the forward reference as well
770       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
771           CurrentLines->size() - 1;
772     }
773   }
774 
775   return IfKind;
776 }
777 
778 static bool isGoogScope(const UnwrappedLine &Line) {
779   // FIXME: Closure-library specific stuff should not be hard-coded but be
780   // configurable.
781   if (Line.Tokens.size() < 4)
782     return false;
783   auto I = Line.Tokens.begin();
784   if (I->Tok->TokenText != "goog")
785     return false;
786   ++I;
787   if (I->Tok->isNot(tok::period))
788     return false;
789   ++I;
790   if (I->Tok->TokenText != "scope")
791     return false;
792   ++I;
793   return I->Tok->is(tok::l_paren);
794 }
795 
796 static bool isIIFE(const UnwrappedLine &Line,
797                    const AdditionalKeywords &Keywords) {
798   // Look for the start of an immediately invoked anonymous function.
799   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
800   // This is commonly done in JavaScript to create a new, anonymous scope.
801   // Example: (function() { ... })()
802   if (Line.Tokens.size() < 3)
803     return false;
804   auto I = Line.Tokens.begin();
805   if (I->Tok->isNot(tok::l_paren))
806     return false;
807   ++I;
808   if (I->Tok->isNot(Keywords.kw_function))
809     return false;
810   ++I;
811   return I->Tok->is(tok::l_paren);
812 }
813 
814 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
815                                    const FormatToken &InitialToken) {
816   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
817     return Style.BraceWrapping.AfterNamespace;
818   if (InitialToken.is(tok::kw_class))
819     return Style.BraceWrapping.AfterClass;
820   if (InitialToken.is(tok::kw_union))
821     return Style.BraceWrapping.AfterUnion;
822   if (InitialToken.is(tok::kw_struct))
823     return Style.BraceWrapping.AfterStruct;
824   if (InitialToken.is(tok::kw_enum))
825     return Style.BraceWrapping.AfterEnum;
826   return false;
827 }
828 
829 void UnwrappedLineParser::parseChildBlock() {
830   FormatTok->setBlockKind(BK_Block);
831   nextToken();
832   {
833     bool SkipIndent = (Style.isJavaScript() &&
834                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
835     ScopedLineState LineState(*this);
836     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
837                                             /*MustBeDeclaration=*/false);
838     Line->Level += SkipIndent ? 0 : 1;
839     parseLevel(/*HasOpeningBrace=*/true);
840     flushComments(isOnNewLine(*FormatTok));
841     Line->Level -= SkipIndent ? 0 : 1;
842   }
843   nextToken();
844 }
845 
846 void UnwrappedLineParser::parsePPDirective() {
847   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
848   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
849 
850   nextToken();
851 
852   if (!FormatTok->Tok.getIdentifierInfo()) {
853     parsePPUnknown();
854     return;
855   }
856 
857   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
858   case tok::pp_define:
859     parsePPDefine();
860     return;
861   case tok::pp_if:
862     parsePPIf(/*IfDef=*/false);
863     break;
864   case tok::pp_ifdef:
865   case tok::pp_ifndef:
866     parsePPIf(/*IfDef=*/true);
867     break;
868   case tok::pp_else:
869     parsePPElse();
870     break;
871   case tok::pp_elifdef:
872   case tok::pp_elifndef:
873   case tok::pp_elif:
874     parsePPElIf();
875     break;
876   case tok::pp_endif:
877     parsePPEndIf();
878     break;
879   default:
880     parsePPUnknown();
881     break;
882   }
883 }
884 
885 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
886   size_t Line = CurrentLines->size();
887   if (CurrentLines == &PreprocessorDirectives)
888     Line += Lines.size();
889 
890   if (Unreachable ||
891       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
892     PPStack.push_back({PP_Unreachable, Line});
893   else
894     PPStack.push_back({PP_Conditional, Line});
895 }
896 
897 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
898   ++PPBranchLevel;
899   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
900   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
901     PPLevelBranchIndex.push_back(0);
902     PPLevelBranchCount.push_back(0);
903   }
904   PPChainBranchIndex.push(0);
905   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
906   conditionalCompilationCondition(Unreachable || Skip);
907 }
908 
909 void UnwrappedLineParser::conditionalCompilationAlternative() {
910   if (!PPStack.empty())
911     PPStack.pop_back();
912   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
913   if (!PPChainBranchIndex.empty())
914     ++PPChainBranchIndex.top();
915   conditionalCompilationCondition(
916       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
917       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
918 }
919 
920 void UnwrappedLineParser::conditionalCompilationEnd() {
921   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
922   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
923     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
924       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
925   }
926   // Guard against #endif's without #if.
927   if (PPBranchLevel > -1)
928     --PPBranchLevel;
929   if (!PPChainBranchIndex.empty())
930     PPChainBranchIndex.pop();
931   if (!PPStack.empty())
932     PPStack.pop_back();
933 }
934 
935 void UnwrappedLineParser::parsePPIf(bool IfDef) {
936   bool IfNDef = FormatTok->is(tok::pp_ifndef);
937   nextToken();
938   bool Unreachable = false;
939   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
940     Unreachable = true;
941   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
942     Unreachable = true;
943   conditionalCompilationStart(Unreachable);
944   FormatToken *IfCondition = FormatTok;
945   // If there's a #ifndef on the first line, and the only lines before it are
946   // comments, it could be an include guard.
947   bool MaybeIncludeGuard = IfNDef;
948   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
949     for (auto &Line : Lines) {
950       if (!Line.Tokens.front().Tok->is(tok::comment)) {
951         MaybeIncludeGuard = false;
952         IncludeGuard = IG_Rejected;
953         break;
954       }
955     }
956   --PPBranchLevel;
957   parsePPUnknown();
958   ++PPBranchLevel;
959   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
960     IncludeGuard = IG_IfNdefed;
961     IncludeGuardToken = IfCondition;
962   }
963 }
964 
965 void UnwrappedLineParser::parsePPElse() {
966   // If a potential include guard has an #else, it's not an include guard.
967   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
968     IncludeGuard = IG_Rejected;
969   conditionalCompilationAlternative();
970   if (PPBranchLevel > -1)
971     --PPBranchLevel;
972   parsePPUnknown();
973   ++PPBranchLevel;
974 }
975 
976 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
977 
978 void UnwrappedLineParser::parsePPEndIf() {
979   conditionalCompilationEnd();
980   parsePPUnknown();
981   // If the #endif of a potential include guard is the last thing in the file,
982   // then we found an include guard.
983   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
984       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
985     IncludeGuard = IG_Found;
986 }
987 
988 void UnwrappedLineParser::parsePPDefine() {
989   nextToken();
990 
991   if (!FormatTok->Tok.getIdentifierInfo()) {
992     IncludeGuard = IG_Rejected;
993     IncludeGuardToken = nullptr;
994     parsePPUnknown();
995     return;
996   }
997 
998   if (IncludeGuard == IG_IfNdefed &&
999       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1000     IncludeGuard = IG_Defined;
1001     IncludeGuardToken = nullptr;
1002     for (auto &Line : Lines) {
1003       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1004         IncludeGuard = IG_Rejected;
1005         break;
1006       }
1007     }
1008   }
1009 
1010   nextToken();
1011   if (FormatTok->Tok.getKind() == tok::l_paren &&
1012       !FormatTok->hasWhitespaceBefore())
1013     parseParens();
1014   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1015     Line->Level += PPBranchLevel + 1;
1016   addUnwrappedLine();
1017   ++Line->Level;
1018 
1019   // Errors during a preprocessor directive can only affect the layout of the
1020   // preprocessor directive, and thus we ignore them. An alternative approach
1021   // would be to use the same approach we use on the file level (no
1022   // re-indentation if there was a structural error) within the macro
1023   // definition.
1024   parseFile();
1025 }
1026 
1027 void UnwrappedLineParser::parsePPUnknown() {
1028   do {
1029     nextToken();
1030   } while (!eof());
1031   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1032     Line->Level += PPBranchLevel + 1;
1033   addUnwrappedLine();
1034 }
1035 
1036 // Here we exclude certain tokens that are not usually the first token in an
1037 // unwrapped line. This is used in attempt to distinguish macro calls without
1038 // trailing semicolons from other constructs split to several lines.
1039 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1040   // Semicolon can be a null-statement, l_square can be a start of a macro or
1041   // a C++11 attribute, but this doesn't seem to be common.
1042   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1043          Tok.isNot(TT_AttributeSquare) &&
1044          // Tokens that can only be used as binary operators and a part of
1045          // overloaded operator names.
1046          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1047          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1048          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1049          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1050          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1051          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1052          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1053          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1054          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1055          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1056          Tok.isNot(tok::lesslessequal) &&
1057          // Colon is used in labels, base class lists, initializer lists,
1058          // range-based for loops, ternary operator, but should never be the
1059          // first token in an unwrapped line.
1060          Tok.isNot(tok::colon) &&
1061          // 'noexcept' is a trailing annotation.
1062          Tok.isNot(tok::kw_noexcept);
1063 }
1064 
1065 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1066                           const FormatToken *FormatTok) {
1067   // FIXME: This returns true for C/C++ keywords like 'struct'.
1068   return FormatTok->is(tok::identifier) &&
1069          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1070           !FormatTok->isOneOf(
1071               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1072               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1073               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1074               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1075               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1076               Keywords.kw_instanceof, Keywords.kw_interface,
1077               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1078 }
1079 
1080 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1081                                  const FormatToken *FormatTok) {
1082   return FormatTok->Tok.isLiteral() ||
1083          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1084          mustBeJSIdent(Keywords, FormatTok);
1085 }
1086 
1087 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1088 // when encountered after a value (see mustBeJSIdentOrValue).
1089 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1090                            const FormatToken *FormatTok) {
1091   return FormatTok->isOneOf(
1092       tok::kw_return, Keywords.kw_yield,
1093       // conditionals
1094       tok::kw_if, tok::kw_else,
1095       // loops
1096       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1097       // switch/case
1098       tok::kw_switch, tok::kw_case,
1099       // exceptions
1100       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1101       // declaration
1102       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1103       Keywords.kw_async, Keywords.kw_function,
1104       // import/export
1105       Keywords.kw_import, tok::kw_export);
1106 }
1107 
1108 // Checks whether a token is a type in K&R C (aka C78).
1109 static bool isC78Type(const FormatToken &Tok) {
1110   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1111                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1112                      tok::identifier);
1113 }
1114 
1115 // This function checks whether a token starts the first parameter declaration
1116 // in a K&R C (aka C78) function definition, e.g.:
1117 //   int f(a, b)
1118 //   short a, b;
1119 //   {
1120 //      return a + b;
1121 //   }
1122 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1123                                const FormatToken *FuncName) {
1124   assert(Tok);
1125   assert(Next);
1126   assert(FuncName);
1127 
1128   if (FuncName->isNot(tok::identifier))
1129     return false;
1130 
1131   const FormatToken *Prev = FuncName->Previous;
1132   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1133     return false;
1134 
1135   if (!isC78Type(*Tok) &&
1136       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1137     return false;
1138 
1139   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1140     return false;
1141 
1142   Tok = Tok->Previous;
1143   if (!Tok || Tok->isNot(tok::r_paren))
1144     return false;
1145 
1146   Tok = Tok->Previous;
1147   if (!Tok || Tok->isNot(tok::identifier))
1148     return false;
1149 
1150   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1151 }
1152 
1153 void UnwrappedLineParser::parseModuleImport() {
1154   nextToken();
1155   while (!eof()) {
1156     if (FormatTok->is(tok::colon)) {
1157       FormatTok->setType(TT_ModulePartitionColon);
1158     }
1159     // Handle import <foo/bar.h> as we would an include statement.
1160     else if (FormatTok->is(tok::less)) {
1161       nextToken();
1162       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1163         // Mark tokens up to the trailing line comments as implicit string
1164         // literals.
1165         if (FormatTok->isNot(tok::comment) &&
1166             !FormatTok->TokenText.startswith("//"))
1167           FormatTok->setType(TT_ImplicitStringLiteral);
1168         nextToken();
1169       }
1170     }
1171     if (FormatTok->is(tok::semi)) {
1172       nextToken();
1173       break;
1174     }
1175     nextToken();
1176   }
1177 
1178   addUnwrappedLine();
1179 }
1180 
1181 // readTokenWithJavaScriptASI reads the next token and terminates the current
1182 // line if JavaScript Automatic Semicolon Insertion must
1183 // happen between the current token and the next token.
1184 //
1185 // This method is conservative - it cannot cover all edge cases of JavaScript,
1186 // but only aims to correctly handle certain well known cases. It *must not*
1187 // return true in speculative cases.
1188 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1189   FormatToken *Previous = FormatTok;
1190   readToken();
1191   FormatToken *Next = FormatTok;
1192 
1193   bool IsOnSameLine =
1194       CommentsBeforeNextToken.empty()
1195           ? Next->NewlinesBefore == 0
1196           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1197   if (IsOnSameLine)
1198     return;
1199 
1200   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1201   bool PreviousStartsTemplateExpr =
1202       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1203   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1204     // If the line contains an '@' sign, the previous token might be an
1205     // annotation, which can precede another identifier/value.
1206     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1207       return LineNode.Tok->is(tok::at);
1208     });
1209     if (HasAt)
1210       return;
1211   }
1212   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1213     return addUnwrappedLine();
1214   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1215   bool NextEndsTemplateExpr =
1216       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1217   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1218       (PreviousMustBeValue ||
1219        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1220                          tok::minusminus)))
1221     return addUnwrappedLine();
1222   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1223       isJSDeclOrStmt(Keywords, Next))
1224     return addUnwrappedLine();
1225 }
1226 
1227 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1228                                                  bool IsTopLevel) {
1229   if (Style.Language == FormatStyle::LK_TableGen &&
1230       FormatTok->is(tok::pp_include)) {
1231     nextToken();
1232     if (FormatTok->is(tok::string_literal))
1233       nextToken();
1234     addUnwrappedLine();
1235     return;
1236   }
1237   switch (FormatTok->Tok.getKind()) {
1238   case tok::kw_asm:
1239     nextToken();
1240     if (FormatTok->is(tok::l_brace)) {
1241       FormatTok->setType(TT_InlineASMBrace);
1242       nextToken();
1243       while (FormatTok && FormatTok->isNot(tok::eof)) {
1244         if (FormatTok->is(tok::r_brace)) {
1245           FormatTok->setType(TT_InlineASMBrace);
1246           nextToken();
1247           addUnwrappedLine();
1248           break;
1249         }
1250         FormatTok->Finalized = true;
1251         nextToken();
1252       }
1253     }
1254     break;
1255   case tok::kw_namespace:
1256     parseNamespace();
1257     return;
1258   case tok::kw_public:
1259   case tok::kw_protected:
1260   case tok::kw_private:
1261     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1262         Style.isCSharp())
1263       nextToken();
1264     else
1265       parseAccessSpecifier();
1266     return;
1267   case tok::kw_if:
1268     if (Style.isJavaScript() && Line->MustBeDeclaration)
1269       // field/method declaration.
1270       break;
1271     parseIfThenElse(IfKind);
1272     return;
1273   case tok::kw_for:
1274   case tok::kw_while:
1275     if (Style.isJavaScript() && Line->MustBeDeclaration)
1276       // field/method declaration.
1277       break;
1278     parseForOrWhileLoop();
1279     return;
1280   case tok::kw_do:
1281     if (Style.isJavaScript() && Line->MustBeDeclaration)
1282       // field/method declaration.
1283       break;
1284     parseDoWhile();
1285     return;
1286   case tok::kw_switch:
1287     if (Style.isJavaScript() && Line->MustBeDeclaration)
1288       // 'switch: string' field declaration.
1289       break;
1290     parseSwitch();
1291     return;
1292   case tok::kw_default:
1293     if (Style.isJavaScript() && Line->MustBeDeclaration)
1294       // 'default: string' field declaration.
1295       break;
1296     nextToken();
1297     if (FormatTok->is(tok::colon)) {
1298       parseLabel();
1299       return;
1300     }
1301     // e.g. "default void f() {}" in a Java interface.
1302     break;
1303   case tok::kw_case:
1304     if (Style.isJavaScript() && Line->MustBeDeclaration)
1305       // 'case: string' field declaration.
1306       break;
1307     parseCaseLabel();
1308     return;
1309   case tok::kw_try:
1310   case tok::kw___try:
1311     if (Style.isJavaScript() && Line->MustBeDeclaration)
1312       // field/method declaration.
1313       break;
1314     parseTryCatch();
1315     return;
1316   case tok::kw_extern:
1317     nextToken();
1318     if (FormatTok->Tok.is(tok::string_literal)) {
1319       nextToken();
1320       if (FormatTok->Tok.is(tok::l_brace)) {
1321         if (Style.BraceWrapping.AfterExternBlock)
1322           addUnwrappedLine();
1323         // Either we indent or for backwards compatibility we follow the
1324         // AfterExternBlock style.
1325         unsigned AddLevels =
1326             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1327                     (Style.BraceWrapping.AfterExternBlock &&
1328                      Style.IndentExternBlock ==
1329                          FormatStyle::IEBS_AfterExternBlock)
1330                 ? 1u
1331                 : 0u;
1332         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1333         addUnwrappedLine();
1334         return;
1335       }
1336     }
1337     break;
1338   case tok::kw_export:
1339     if (Style.isJavaScript()) {
1340       parseJavaScriptEs6ImportExport();
1341       return;
1342     }
1343     if (!Style.isCpp())
1344       break;
1345     // Handle C++ "(inline|export) namespace".
1346     LLVM_FALLTHROUGH;
1347   case tok::kw_inline:
1348     nextToken();
1349     if (FormatTok->Tok.is(tok::kw_namespace)) {
1350       parseNamespace();
1351       return;
1352     }
1353     break;
1354   case tok::identifier:
1355     if (FormatTok->is(TT_ForEachMacro)) {
1356       parseForOrWhileLoop();
1357       return;
1358     }
1359     if (FormatTok->is(TT_MacroBlockBegin)) {
1360       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1361                  /*MunchSemi=*/false);
1362       return;
1363     }
1364     if (FormatTok->is(Keywords.kw_import)) {
1365       if (Style.isJavaScript()) {
1366         parseJavaScriptEs6ImportExport();
1367         return;
1368       }
1369       if (Style.Language == FormatStyle::LK_Proto) {
1370         nextToken();
1371         if (FormatTok->is(tok::kw_public))
1372           nextToken();
1373         if (!FormatTok->is(tok::string_literal))
1374           return;
1375         nextToken();
1376         if (FormatTok->is(tok::semi))
1377           nextToken();
1378         addUnwrappedLine();
1379         return;
1380       }
1381       if (Style.isCpp()) {
1382         parseModuleImport();
1383         return;
1384       }
1385     }
1386     if (Style.isCpp() &&
1387         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1388                            Keywords.kw_slots, Keywords.kw_qslots)) {
1389       nextToken();
1390       if (FormatTok->is(tok::colon)) {
1391         nextToken();
1392         addUnwrappedLine();
1393         return;
1394       }
1395     }
1396     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1397       parseStatementMacro();
1398       return;
1399     }
1400     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1401       parseNamespace();
1402       return;
1403     }
1404     // In all other cases, parse the declaration.
1405     break;
1406   default:
1407     break;
1408   }
1409   do {
1410     const FormatToken *Previous = FormatTok->Previous;
1411     switch (FormatTok->Tok.getKind()) {
1412     case tok::at:
1413       nextToken();
1414       if (FormatTok->Tok.is(tok::l_brace)) {
1415         nextToken();
1416         parseBracedList();
1417         break;
1418       } else if (Style.Language == FormatStyle::LK_Java &&
1419                  FormatTok->is(Keywords.kw_interface)) {
1420         nextToken();
1421         break;
1422       }
1423       switch (FormatTok->Tok.getObjCKeywordID()) {
1424       case tok::objc_public:
1425       case tok::objc_protected:
1426       case tok::objc_package:
1427       case tok::objc_private:
1428         return parseAccessSpecifier();
1429       case tok::objc_interface:
1430       case tok::objc_implementation:
1431         return parseObjCInterfaceOrImplementation();
1432       case tok::objc_protocol:
1433         if (parseObjCProtocol())
1434           return;
1435         break;
1436       case tok::objc_end:
1437         return; // Handled by the caller.
1438       case tok::objc_optional:
1439       case tok::objc_required:
1440         nextToken();
1441         addUnwrappedLine();
1442         return;
1443       case tok::objc_autoreleasepool:
1444         nextToken();
1445         if (FormatTok->Tok.is(tok::l_brace)) {
1446           if (Style.BraceWrapping.AfterControlStatement ==
1447               FormatStyle::BWACS_Always)
1448             addUnwrappedLine();
1449           parseBlock();
1450         }
1451         addUnwrappedLine();
1452         return;
1453       case tok::objc_synchronized:
1454         nextToken();
1455         if (FormatTok->Tok.is(tok::l_paren))
1456           // Skip synchronization object
1457           parseParens();
1458         if (FormatTok->Tok.is(tok::l_brace)) {
1459           if (Style.BraceWrapping.AfterControlStatement ==
1460               FormatStyle::BWACS_Always)
1461             addUnwrappedLine();
1462           parseBlock();
1463         }
1464         addUnwrappedLine();
1465         return;
1466       case tok::objc_try:
1467         // This branch isn't strictly necessary (the kw_try case below would
1468         // do this too after the tok::at is parsed above).  But be explicit.
1469         parseTryCatch();
1470         return;
1471       default:
1472         break;
1473       }
1474       break;
1475     case tok::kw_concept:
1476       parseConcept();
1477       return;
1478     case tok::kw_requires:
1479       parseRequires();
1480       return;
1481     case tok::kw_enum:
1482       // Ignore if this is part of "template <enum ...".
1483       if (Previous && Previous->is(tok::less)) {
1484         nextToken();
1485         break;
1486       }
1487 
1488       // parseEnum falls through and does not yet add an unwrapped line as an
1489       // enum definition can start a structural element.
1490       if (!parseEnum())
1491         break;
1492       // This only applies for C++.
1493       if (!Style.isCpp()) {
1494         addUnwrappedLine();
1495         return;
1496       }
1497       break;
1498     case tok::kw_typedef:
1499       nextToken();
1500       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1501                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1502                              Keywords.kw_CF_CLOSED_ENUM,
1503                              Keywords.kw_NS_CLOSED_ENUM))
1504         parseEnum();
1505       break;
1506     case tok::kw_struct:
1507     case tok::kw_union:
1508     case tok::kw_class:
1509       if (parseStructLike())
1510         return;
1511       break;
1512     case tok::period:
1513       nextToken();
1514       // In Java, classes have an implicit static member "class".
1515       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1516           FormatTok->is(tok::kw_class))
1517         nextToken();
1518       if (Style.isJavaScript() && FormatTok &&
1519           FormatTok->Tok.getIdentifierInfo())
1520         // JavaScript only has pseudo keywords, all keywords are allowed to
1521         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1522         nextToken();
1523       break;
1524     case tok::semi:
1525       nextToken();
1526       addUnwrappedLine();
1527       return;
1528     case tok::r_brace:
1529       addUnwrappedLine();
1530       return;
1531     case tok::l_paren: {
1532       parseParens();
1533       // Break the unwrapped line if a K&R C function definition has a parameter
1534       // declaration.
1535       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1536         break;
1537       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1538         addUnwrappedLine();
1539         return;
1540       }
1541       break;
1542     }
1543     case tok::kw_operator:
1544       nextToken();
1545       if (FormatTok->isBinaryOperator())
1546         nextToken();
1547       break;
1548     case tok::caret:
1549       nextToken();
1550       if (FormatTok->Tok.isAnyIdentifier() ||
1551           FormatTok->isSimpleTypeSpecifier())
1552         nextToken();
1553       if (FormatTok->is(tok::l_paren))
1554         parseParens();
1555       if (FormatTok->is(tok::l_brace))
1556         parseChildBlock();
1557       break;
1558     case tok::l_brace:
1559       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1560         // A block outside of parentheses must be the last part of a
1561         // structural element.
1562         // FIXME: Figure out cases where this is not true, and add projections
1563         // for them (the one we know is missing are lambdas).
1564         if (Style.Language == FormatStyle::LK_Java &&
1565             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1566           // If necessary, we could set the type to something different than
1567           // TT_FunctionLBrace.
1568           if (Style.BraceWrapping.AfterControlStatement ==
1569               FormatStyle::BWACS_Always)
1570             addUnwrappedLine();
1571         } else if (Style.BraceWrapping.AfterFunction) {
1572           addUnwrappedLine();
1573         }
1574         FormatTok->setType(TT_FunctionLBrace);
1575         parseBlock();
1576         addUnwrappedLine();
1577         return;
1578       }
1579       // Otherwise this was a braced init list, and the structural
1580       // element continues.
1581       break;
1582     case tok::kw_try:
1583       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1584         // field/method declaration.
1585         nextToken();
1586         break;
1587       }
1588       // We arrive here when parsing function-try blocks.
1589       if (Style.BraceWrapping.AfterFunction)
1590         addUnwrappedLine();
1591       parseTryCatch();
1592       return;
1593     case tok::identifier: {
1594       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1595           Line->MustBeDeclaration) {
1596         addUnwrappedLine();
1597         parseCSharpGenericTypeConstraint();
1598         break;
1599       }
1600       if (FormatTok->is(TT_MacroBlockEnd)) {
1601         addUnwrappedLine();
1602         return;
1603       }
1604 
1605       // Function declarations (as opposed to function expressions) are parsed
1606       // on their own unwrapped line by continuing this loop. Function
1607       // expressions (functions that are not on their own line) must not create
1608       // a new unwrapped line, so they are special cased below.
1609       size_t TokenCount = Line->Tokens.size();
1610       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1611           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1612                                                      Keywords.kw_async)))) {
1613         tryToParseJSFunction();
1614         break;
1615       }
1616       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1617           FormatTok->is(Keywords.kw_interface)) {
1618         if (Style.isJavaScript()) {
1619           // In JavaScript/TypeScript, "interface" can be used as a standalone
1620           // identifier, e.g. in `var interface = 1;`. If "interface" is
1621           // followed by another identifier, it is very like to be an actual
1622           // interface declaration.
1623           unsigned StoredPosition = Tokens->getPosition();
1624           FormatToken *Next = Tokens->getNextToken();
1625           FormatTok = Tokens->setPosition(StoredPosition);
1626           if (!mustBeJSIdent(Keywords, Next)) {
1627             nextToken();
1628             break;
1629           }
1630         }
1631         parseRecord();
1632         addUnwrappedLine();
1633         return;
1634       }
1635 
1636       if (FormatTok->is(Keywords.kw_interface)) {
1637         if (parseStructLike())
1638           return;
1639         break;
1640       }
1641 
1642       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1643         parseStatementMacro();
1644         return;
1645       }
1646 
1647       // See if the following token should start a new unwrapped line.
1648       StringRef Text = FormatTok->TokenText;
1649 
1650       FormatToken *PreviousToken = FormatTok;
1651       nextToken();
1652 
1653       // JS doesn't have macros, and within classes colons indicate fields, not
1654       // labels.
1655       if (Style.isJavaScript())
1656         break;
1657 
1658       TokenCount = Line->Tokens.size();
1659       if (TokenCount == 1 ||
1660           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1661         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1662           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1663           parseLabel(!Style.IndentGotoLabels);
1664           return;
1665         }
1666         // Recognize function-like macro usages without trailing semicolon as
1667         // well as free-standing macros like Q_OBJECT.
1668         bool FunctionLike = FormatTok->is(tok::l_paren);
1669         if (FunctionLike)
1670           parseParens();
1671 
1672         bool FollowedByNewline =
1673             CommentsBeforeNextToken.empty()
1674                 ? FormatTok->NewlinesBefore > 0
1675                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1676 
1677         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1678             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1679           PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro);
1680           addUnwrappedLine();
1681           return;
1682         }
1683       }
1684       break;
1685     }
1686     case tok::equal:
1687       if ((Style.isJavaScript() || Style.isCSharp()) &&
1688           FormatTok->is(TT_FatArrow)) {
1689         tryToParseChildBlock();
1690         break;
1691       }
1692 
1693       nextToken();
1694       if (FormatTok->Tok.is(tok::l_brace)) {
1695         // Block kind should probably be set to BK_BracedInit for any language.
1696         // C# needs this change to ensure that array initialisers and object
1697         // initialisers are indented the same way.
1698         if (Style.isCSharp())
1699           FormatTok->setBlockKind(BK_BracedInit);
1700         nextToken();
1701         parseBracedList();
1702       } else if (Style.Language == FormatStyle::LK_Proto &&
1703                  FormatTok->Tok.is(tok::less)) {
1704         nextToken();
1705         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1706                         /*ClosingBraceKind=*/tok::greater);
1707       }
1708       break;
1709     case tok::l_square:
1710       parseSquare();
1711       break;
1712     case tok::kw_new:
1713       parseNew();
1714       break;
1715     default:
1716       nextToken();
1717       break;
1718     }
1719   } while (!eof());
1720 }
1721 
1722 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1723   assert(FormatTok->is(tok::l_brace));
1724   if (!Style.isCSharp())
1725     return false;
1726   // See if it's a property accessor.
1727   if (FormatTok->Previous->isNot(tok::identifier))
1728     return false;
1729 
1730   // See if we are inside a property accessor.
1731   //
1732   // Record the current tokenPosition so that we can advance and
1733   // reset the current token. `Next` is not set yet so we need
1734   // another way to advance along the token stream.
1735   unsigned int StoredPosition = Tokens->getPosition();
1736   FormatToken *Tok = Tokens->getNextToken();
1737 
1738   // A trivial property accessor is of the form:
1739   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1740   // Track these as they do not require line breaks to be introduced.
1741   bool HasGetOrSet = false;
1742   bool IsTrivialPropertyAccessor = true;
1743   while (!eof()) {
1744     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1745                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1746                      Keywords.kw_set)) {
1747       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1748         HasGetOrSet = true;
1749       Tok = Tokens->getNextToken();
1750       continue;
1751     }
1752     if (Tok->isNot(tok::r_brace))
1753       IsTrivialPropertyAccessor = false;
1754     break;
1755   }
1756 
1757   if (!HasGetOrSet) {
1758     Tokens->setPosition(StoredPosition);
1759     return false;
1760   }
1761 
1762   // Try to parse the property accessor:
1763   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1764   Tokens->setPosition(StoredPosition);
1765   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1766     addUnwrappedLine();
1767   nextToken();
1768   do {
1769     switch (FormatTok->Tok.getKind()) {
1770     case tok::r_brace:
1771       nextToken();
1772       if (FormatTok->is(tok::equal)) {
1773         while (!eof() && FormatTok->isNot(tok::semi))
1774           nextToken();
1775         nextToken();
1776       }
1777       addUnwrappedLine();
1778       return true;
1779     case tok::l_brace:
1780       ++Line->Level;
1781       parseBlock(/*MustBeDeclaration=*/true);
1782       addUnwrappedLine();
1783       --Line->Level;
1784       break;
1785     case tok::equal:
1786       if (FormatTok->is(TT_FatArrow)) {
1787         ++Line->Level;
1788         do {
1789           nextToken();
1790         } while (!eof() && FormatTok->isNot(tok::semi));
1791         nextToken();
1792         addUnwrappedLine();
1793         --Line->Level;
1794         break;
1795       }
1796       nextToken();
1797       break;
1798     default:
1799       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1800           !IsTrivialPropertyAccessor) {
1801         // Non-trivial get/set needs to be on its own line.
1802         addUnwrappedLine();
1803       }
1804       nextToken();
1805     }
1806   } while (!eof());
1807 
1808   // Unreachable for well-formed code (paired '{' and '}').
1809   return true;
1810 }
1811 
1812 bool UnwrappedLineParser::tryToParseLambda() {
1813   if (!Style.isCpp()) {
1814     nextToken();
1815     return false;
1816   }
1817   assert(FormatTok->is(tok::l_square));
1818   FormatToken &LSquare = *FormatTok;
1819   if (!tryToParseLambdaIntroducer())
1820     return false;
1821 
1822   bool SeenArrow = false;
1823   bool InTemplateParameterList = false;
1824 
1825   while (FormatTok->isNot(tok::l_brace)) {
1826     if (FormatTok->isSimpleTypeSpecifier()) {
1827       nextToken();
1828       continue;
1829     }
1830     switch (FormatTok->Tok.getKind()) {
1831     case tok::l_brace:
1832       break;
1833     case tok::l_paren:
1834       parseParens();
1835       break;
1836     case tok::l_square:
1837       parseSquare();
1838       break;
1839     case tok::kw_class:
1840     case tok::kw_template:
1841     case tok::kw_typename:
1842       assert(FormatTok->Previous);
1843       if (FormatTok->Previous->is(tok::less))
1844         InTemplateParameterList = true;
1845       nextToken();
1846       break;
1847     case tok::amp:
1848     case tok::star:
1849     case tok::kw_const:
1850     case tok::comma:
1851     case tok::less:
1852     case tok::greater:
1853     case tok::identifier:
1854     case tok::numeric_constant:
1855     case tok::coloncolon:
1856     case tok::kw_mutable:
1857     case tok::kw_noexcept:
1858       nextToken();
1859       break;
1860     // Specialization of a template with an integer parameter can contain
1861     // arithmetic, logical, comparison and ternary operators.
1862     //
1863     // FIXME: This also accepts sequences of operators that are not in the scope
1864     // of a template argument list.
1865     //
1866     // In a C++ lambda a template type can only occur after an arrow. We use
1867     // this as an heuristic to distinguish between Objective-C expressions
1868     // followed by an `a->b` expression, such as:
1869     // ([obj func:arg] + a->b)
1870     // Otherwise the code below would parse as a lambda.
1871     //
1872     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1873     // explicit template lists: []<bool b = true && false>(U &&u){}
1874     case tok::plus:
1875     case tok::minus:
1876     case tok::exclaim:
1877     case tok::tilde:
1878     case tok::slash:
1879     case tok::percent:
1880     case tok::lessless:
1881     case tok::pipe:
1882     case tok::pipepipe:
1883     case tok::ampamp:
1884     case tok::caret:
1885     case tok::equalequal:
1886     case tok::exclaimequal:
1887     case tok::greaterequal:
1888     case tok::lessequal:
1889     case tok::question:
1890     case tok::colon:
1891     case tok::ellipsis:
1892     case tok::kw_true:
1893     case tok::kw_false:
1894       if (SeenArrow || InTemplateParameterList) {
1895         nextToken();
1896         break;
1897       }
1898       return true;
1899     case tok::arrow:
1900       // This might or might not actually be a lambda arrow (this could be an
1901       // ObjC method invocation followed by a dereferencing arrow). We might
1902       // reset this back to TT_Unknown in TokenAnnotator.
1903       FormatTok->setType(TT_LambdaArrow);
1904       SeenArrow = true;
1905       nextToken();
1906       break;
1907     default:
1908       return true;
1909     }
1910   }
1911   FormatTok->setType(TT_LambdaLBrace);
1912   LSquare.setType(TT_LambdaLSquare);
1913   parseChildBlock();
1914   return true;
1915 }
1916 
1917 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1918   const FormatToken *Previous = FormatTok->Previous;
1919   if (Previous &&
1920       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1921                          tok::kw_delete, tok::l_square) ||
1922        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1923        Previous->isSimpleTypeSpecifier())) {
1924     nextToken();
1925     return false;
1926   }
1927   nextToken();
1928   if (FormatTok->is(tok::l_square))
1929     return false;
1930   parseSquare(/*LambdaIntroducer=*/true);
1931   return true;
1932 }
1933 
1934 void UnwrappedLineParser::tryToParseJSFunction() {
1935   assert(FormatTok->is(Keywords.kw_function) ||
1936          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1937   if (FormatTok->is(Keywords.kw_async))
1938     nextToken();
1939   // Consume "function".
1940   nextToken();
1941 
1942   // Consume * (generator function). Treat it like C++'s overloaded operators.
1943   if (FormatTok->is(tok::star)) {
1944     FormatTok->setType(TT_OverloadedOperator);
1945     nextToken();
1946   }
1947 
1948   // Consume function name.
1949   if (FormatTok->is(tok::identifier))
1950     nextToken();
1951 
1952   if (FormatTok->isNot(tok::l_paren))
1953     return;
1954 
1955   // Parse formal parameter list.
1956   parseParens();
1957 
1958   if (FormatTok->is(tok::colon)) {
1959     // Parse a type definition.
1960     nextToken();
1961 
1962     // Eat the type declaration. For braced inline object types, balance braces,
1963     // otherwise just parse until finding an l_brace for the function body.
1964     if (FormatTok->is(tok::l_brace))
1965       tryToParseBracedList();
1966     else
1967       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1968         nextToken();
1969   }
1970 
1971   if (FormatTok->is(tok::semi))
1972     return;
1973 
1974   parseChildBlock();
1975 }
1976 
1977 bool UnwrappedLineParser::tryToParseBracedList() {
1978   if (FormatTok->is(BK_Unknown))
1979     calculateBraceTypes();
1980   assert(FormatTok->isNot(BK_Unknown));
1981   if (FormatTok->is(BK_Block))
1982     return false;
1983   nextToken();
1984   parseBracedList();
1985   return true;
1986 }
1987 
1988 bool UnwrappedLineParser::tryToParseChildBlock() {
1989   assert(Style.isJavaScript() || Style.isCSharp());
1990   assert(FormatTok->is(TT_FatArrow));
1991   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
1992   // They always start an expression or a child block if followed by a curly
1993   // brace.
1994   nextToken();
1995   if (FormatTok->isNot(tok::l_brace))
1996     return false;
1997   parseChildBlock();
1998   return true;
1999 }
2000 
2001 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2002                                           bool IsEnum,
2003                                           tok::TokenKind ClosingBraceKind) {
2004   bool HasError = false;
2005 
2006   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2007   // replace this by using parseAssignmentExpression() inside.
2008   do {
2009     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2010         tryToParseChildBlock())
2011       continue;
2012     if (Style.isJavaScript()) {
2013       if (FormatTok->is(Keywords.kw_function) ||
2014           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2015         tryToParseJSFunction();
2016         continue;
2017       }
2018       if (FormatTok->is(tok::l_brace)) {
2019         // Could be a method inside of a braced list `{a() { return 1; }}`.
2020         if (tryToParseBracedList())
2021           continue;
2022         parseChildBlock();
2023       }
2024     }
2025     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2026       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2027         addUnwrappedLine();
2028       nextToken();
2029       return !HasError;
2030     }
2031     switch (FormatTok->Tok.getKind()) {
2032     case tok::l_square:
2033       if (Style.isCSharp())
2034         parseSquare();
2035       else
2036         tryToParseLambda();
2037       break;
2038     case tok::l_paren:
2039       parseParens();
2040       // JavaScript can just have free standing methods and getters/setters in
2041       // object literals. Detect them by a "{" following ")".
2042       if (Style.isJavaScript()) {
2043         if (FormatTok->is(tok::l_brace))
2044           parseChildBlock();
2045         break;
2046       }
2047       break;
2048     case tok::l_brace:
2049       // Assume there are no blocks inside a braced init list apart
2050       // from the ones we explicitly parse out (like lambdas).
2051       FormatTok->setBlockKind(BK_BracedInit);
2052       nextToken();
2053       parseBracedList();
2054       break;
2055     case tok::less:
2056       if (Style.Language == FormatStyle::LK_Proto) {
2057         nextToken();
2058         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2059                         /*ClosingBraceKind=*/tok::greater);
2060       } else {
2061         nextToken();
2062       }
2063       break;
2064     case tok::semi:
2065       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2066       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2067       // used for error recovery if we have otherwise determined that this is
2068       // a braced list.
2069       if (Style.isJavaScript()) {
2070         nextToken();
2071         break;
2072       }
2073       HasError = true;
2074       if (!ContinueOnSemicolons)
2075         return !HasError;
2076       nextToken();
2077       break;
2078     case tok::comma:
2079       nextToken();
2080       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2081         addUnwrappedLine();
2082       break;
2083     default:
2084       nextToken();
2085       break;
2086     }
2087   } while (!eof());
2088   return false;
2089 }
2090 
2091 void UnwrappedLineParser::parseParens() {
2092   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2093   nextToken();
2094   do {
2095     switch (FormatTok->Tok.getKind()) {
2096     case tok::l_paren:
2097       parseParens();
2098       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2099         parseChildBlock();
2100       break;
2101     case tok::r_paren:
2102       nextToken();
2103       return;
2104     case tok::r_brace:
2105       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2106       return;
2107     case tok::l_square:
2108       tryToParseLambda();
2109       break;
2110     case tok::l_brace:
2111       if (!tryToParseBracedList())
2112         parseChildBlock();
2113       break;
2114     case tok::at:
2115       nextToken();
2116       if (FormatTok->Tok.is(tok::l_brace)) {
2117         nextToken();
2118         parseBracedList();
2119       }
2120       break;
2121     case tok::equal:
2122       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2123         tryToParseChildBlock();
2124       else
2125         nextToken();
2126       break;
2127     case tok::kw_class:
2128       if (Style.isJavaScript())
2129         parseRecord(/*ParseAsExpr=*/true);
2130       else
2131         nextToken();
2132       break;
2133     case tok::identifier:
2134       if (Style.isJavaScript() &&
2135           (FormatTok->is(Keywords.kw_function) ||
2136            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2137         tryToParseJSFunction();
2138       else
2139         nextToken();
2140       break;
2141     default:
2142       nextToken();
2143       break;
2144     }
2145   } while (!eof());
2146 }
2147 
2148 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2149   if (!LambdaIntroducer) {
2150     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2151     if (tryToParseLambda())
2152       return;
2153   }
2154   do {
2155     switch (FormatTok->Tok.getKind()) {
2156     case tok::l_paren:
2157       parseParens();
2158       break;
2159     case tok::r_square:
2160       nextToken();
2161       return;
2162     case tok::r_brace:
2163       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2164       return;
2165     case tok::l_square:
2166       parseSquare();
2167       break;
2168     case tok::l_brace: {
2169       if (!tryToParseBracedList())
2170         parseChildBlock();
2171       break;
2172     }
2173     case tok::at:
2174       nextToken();
2175       if (FormatTok->Tok.is(tok::l_brace)) {
2176         nextToken();
2177         parseBracedList();
2178       }
2179       break;
2180     default:
2181       nextToken();
2182       break;
2183     }
2184   } while (!eof());
2185 }
2186 
2187 void UnwrappedLineParser::keepAncestorBraces() {
2188   if (!Style.RemoveBracesLLVM)
2189     return;
2190 
2191   const int MaxNestingLevels = 2;
2192   const int Size = NestedTooDeep.size();
2193   if (Size >= MaxNestingLevels)
2194     NestedTooDeep[Size - MaxNestingLevels] = true;
2195   NestedTooDeep.push_back(false);
2196 }
2197 
2198 static void markOptionalBraces(FormatToken *LeftBrace) {
2199   if (!LeftBrace)
2200     return;
2201 
2202   assert(LeftBrace->is(tok::l_brace));
2203 
2204   FormatToken *RightBrace = LeftBrace->MatchingParen;
2205   if (!RightBrace) {
2206     assert(!LeftBrace->Optional);
2207     return;
2208   }
2209 
2210   assert(RightBrace->is(tok::r_brace));
2211   assert(RightBrace->MatchingParen == LeftBrace);
2212   assert(LeftBrace->Optional == RightBrace->Optional);
2213 
2214   LeftBrace->Optional = true;
2215   RightBrace->Optional = true;
2216 }
2217 
2218 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2219                                                   bool KeepBraces) {
2220   auto HandleAttributes = [this]() {
2221     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2222     if (FormatTok->is(TT_AttributeMacro))
2223       nextToken();
2224     // Handle [[likely]] / [[unlikely]] attributes.
2225     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2226       parseSquare();
2227   };
2228 
2229   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2230   nextToken();
2231   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2232     nextToken();
2233   if (FormatTok->Tok.is(tok::l_paren))
2234     parseParens();
2235   HandleAttributes();
2236 
2237   bool NeedsUnwrappedLine = false;
2238   keepAncestorBraces();
2239 
2240   FormatToken *IfLeftBrace = nullptr;
2241   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2242 
2243   if (FormatTok->Tok.is(tok::l_brace)) {
2244     IfLeftBrace = FormatTok;
2245     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2246     IfBlockKind = parseBlock();
2247     if (Style.BraceWrapping.BeforeElse)
2248       addUnwrappedLine();
2249     else
2250       NeedsUnwrappedLine = true;
2251   } else {
2252     addUnwrappedLine();
2253     ++Line->Level;
2254     parseStructuralElement();
2255     --Line->Level;
2256   }
2257 
2258   bool KeepIfBraces = false;
2259   if (Style.RemoveBracesLLVM) {
2260     assert(!NestedTooDeep.empty());
2261     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2262                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2263                    IfBlockKind == IfStmtKind::IfElseIf;
2264   }
2265 
2266   FormatToken *ElseLeftBrace = nullptr;
2267   IfStmtKind Kind = IfStmtKind::IfOnly;
2268 
2269   if (FormatTok->Tok.is(tok::kw_else)) {
2270     if (Style.RemoveBracesLLVM) {
2271       NestedTooDeep.back() = false;
2272       Kind = IfStmtKind::IfElse;
2273     }
2274     nextToken();
2275     HandleAttributes();
2276     if (FormatTok->Tok.is(tok::l_brace)) {
2277       ElseLeftBrace = FormatTok;
2278       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2279       if (parseBlock() == IfStmtKind::IfOnly)
2280         Kind = IfStmtKind::IfElseIf;
2281       addUnwrappedLine();
2282     } else if (FormatTok->Tok.is(tok::kw_if)) {
2283       FormatToken *Previous = Tokens->getPreviousToken();
2284       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2285       if (IsPrecededByComment) {
2286         addUnwrappedLine();
2287         ++Line->Level;
2288       }
2289       bool TooDeep = true;
2290       if (Style.RemoveBracesLLVM) {
2291         Kind = IfStmtKind::IfElseIf;
2292         TooDeep = NestedTooDeep.pop_back_val();
2293       }
2294       ElseLeftBrace =
2295           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2296       if (Style.RemoveBracesLLVM)
2297         NestedTooDeep.push_back(TooDeep);
2298       if (IsPrecededByComment)
2299         --Line->Level;
2300     } else {
2301       addUnwrappedLine();
2302       ++Line->Level;
2303       parseStructuralElement();
2304       if (FormatTok->is(tok::eof))
2305         addUnwrappedLine();
2306       --Line->Level;
2307     }
2308   } else {
2309     if (Style.RemoveBracesLLVM)
2310       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2311     if (NeedsUnwrappedLine)
2312       addUnwrappedLine();
2313   }
2314 
2315   if (!Style.RemoveBracesLLVM)
2316     return nullptr;
2317 
2318   assert(!NestedTooDeep.empty());
2319   const bool KeepElseBraces =
2320       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2321 
2322   NestedTooDeep.pop_back();
2323 
2324   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2325     markOptionalBraces(IfLeftBrace);
2326     markOptionalBraces(ElseLeftBrace);
2327   } else if (IfLeftBrace) {
2328     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2329     if (IfRightBrace) {
2330       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2331       assert(!IfLeftBrace->Optional);
2332       assert(!IfRightBrace->Optional);
2333       IfLeftBrace->MatchingParen = nullptr;
2334       IfRightBrace->MatchingParen = nullptr;
2335     }
2336   }
2337 
2338   if (IfKind)
2339     *IfKind = Kind;
2340 
2341   return IfLeftBrace;
2342 }
2343 
2344 void UnwrappedLineParser::parseTryCatch() {
2345   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2346   nextToken();
2347   bool NeedsUnwrappedLine = false;
2348   if (FormatTok->is(tok::colon)) {
2349     // We are in a function try block, what comes is an initializer list.
2350     nextToken();
2351 
2352     // In case identifiers were removed by clang-tidy, what might follow is
2353     // multiple commas in sequence - before the first identifier.
2354     while (FormatTok->is(tok::comma))
2355       nextToken();
2356 
2357     while (FormatTok->is(tok::identifier)) {
2358       nextToken();
2359       if (FormatTok->is(tok::l_paren))
2360         parseParens();
2361       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2362           FormatTok->is(tok::l_brace)) {
2363         do {
2364           nextToken();
2365         } while (!FormatTok->is(tok::r_brace));
2366         nextToken();
2367       }
2368 
2369       // In case identifiers were removed by clang-tidy, what might follow is
2370       // multiple commas in sequence - after the first identifier.
2371       while (FormatTok->is(tok::comma))
2372         nextToken();
2373     }
2374   }
2375   // Parse try with resource.
2376   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2377     parseParens();
2378 
2379   keepAncestorBraces();
2380 
2381   if (FormatTok->is(tok::l_brace)) {
2382     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2383     parseBlock();
2384     if (Style.BraceWrapping.BeforeCatch)
2385       addUnwrappedLine();
2386     else
2387       NeedsUnwrappedLine = true;
2388   } else if (!FormatTok->is(tok::kw_catch)) {
2389     // The C++ standard requires a compound-statement after a try.
2390     // If there's none, we try to assume there's a structuralElement
2391     // and try to continue.
2392     addUnwrappedLine();
2393     ++Line->Level;
2394     parseStructuralElement();
2395     --Line->Level;
2396   }
2397   while (true) {
2398     if (FormatTok->is(tok::at))
2399       nextToken();
2400     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2401                              tok::kw___finally) ||
2402           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2403            FormatTok->is(Keywords.kw_finally)) ||
2404           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2405            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2406       break;
2407     nextToken();
2408     while (FormatTok->isNot(tok::l_brace)) {
2409       if (FormatTok->is(tok::l_paren)) {
2410         parseParens();
2411         continue;
2412       }
2413       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2414         if (Style.RemoveBracesLLVM)
2415           NestedTooDeep.pop_back();
2416         return;
2417       }
2418       nextToken();
2419     }
2420     NeedsUnwrappedLine = false;
2421     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2422     parseBlock();
2423     if (Style.BraceWrapping.BeforeCatch)
2424       addUnwrappedLine();
2425     else
2426       NeedsUnwrappedLine = true;
2427   }
2428 
2429   if (Style.RemoveBracesLLVM)
2430     NestedTooDeep.pop_back();
2431 
2432   if (NeedsUnwrappedLine)
2433     addUnwrappedLine();
2434 }
2435 
2436 void UnwrappedLineParser::parseNamespace() {
2437   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2438          "'namespace' expected");
2439 
2440   const FormatToken &InitialToken = *FormatTok;
2441   nextToken();
2442   if (InitialToken.is(TT_NamespaceMacro)) {
2443     parseParens();
2444   } else {
2445     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2446                               tok::l_square, tok::period) ||
2447            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2448       if (FormatTok->is(tok::l_square))
2449         parseSquare();
2450       else
2451         nextToken();
2452   }
2453   if (FormatTok->Tok.is(tok::l_brace)) {
2454     if (ShouldBreakBeforeBrace(Style, InitialToken))
2455       addUnwrappedLine();
2456 
2457     unsigned AddLevels =
2458         Style.NamespaceIndentation == FormatStyle::NI_All ||
2459                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2460                  DeclarationScopeStack.size() > 1)
2461             ? 1u
2462             : 0u;
2463     bool ManageWhitesmithsBraces =
2464         AddLevels == 0u &&
2465         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2466 
2467     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2468     // the whole block.
2469     if (ManageWhitesmithsBraces)
2470       ++Line->Level;
2471 
2472     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2473                /*MunchSemi=*/true,
2474                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2475 
2476     // Munch the semicolon after a namespace. This is more common than one would
2477     // think. Putting the semicolon into its own line is very ugly.
2478     if (FormatTok->Tok.is(tok::semi))
2479       nextToken();
2480 
2481     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2482 
2483     if (ManageWhitesmithsBraces)
2484       --Line->Level;
2485   }
2486   // FIXME: Add error handling.
2487 }
2488 
2489 void UnwrappedLineParser::parseNew() {
2490   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2491   nextToken();
2492 
2493   if (Style.isCSharp()) {
2494     do {
2495       if (FormatTok->is(tok::l_brace))
2496         parseBracedList();
2497 
2498       if (FormatTok->isOneOf(tok::semi, tok::comma))
2499         return;
2500 
2501       nextToken();
2502     } while (!eof());
2503   }
2504 
2505   if (Style.Language != FormatStyle::LK_Java)
2506     return;
2507 
2508   // In Java, we can parse everything up to the parens, which aren't optional.
2509   do {
2510     // There should not be a ;, { or } before the new's open paren.
2511     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2512       return;
2513 
2514     // Consume the parens.
2515     if (FormatTok->is(tok::l_paren)) {
2516       parseParens();
2517 
2518       // If there is a class body of an anonymous class, consume that as child.
2519       if (FormatTok->is(tok::l_brace))
2520         parseChildBlock();
2521       return;
2522     }
2523     nextToken();
2524   } while (!eof());
2525 }
2526 
2527 void UnwrappedLineParser::parseForOrWhileLoop() {
2528   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2529          "'for', 'while' or foreach macro expected");
2530   nextToken();
2531   // JS' for await ( ...
2532   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2533     nextToken();
2534   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2535     nextToken();
2536   if (FormatTok->Tok.is(tok::l_paren))
2537     parseParens();
2538 
2539   keepAncestorBraces();
2540 
2541   if (FormatTok->Tok.is(tok::l_brace)) {
2542     FormatToken *LeftBrace = FormatTok;
2543     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2544     parseBlock();
2545     if (Style.RemoveBracesLLVM) {
2546       assert(!NestedTooDeep.empty());
2547       if (!NestedTooDeep.back())
2548         markOptionalBraces(LeftBrace);
2549     }
2550     addUnwrappedLine();
2551   } else {
2552     addUnwrappedLine();
2553     ++Line->Level;
2554     parseStructuralElement();
2555     --Line->Level;
2556   }
2557 
2558   if (Style.RemoveBracesLLVM)
2559     NestedTooDeep.pop_back();
2560 }
2561 
2562 void UnwrappedLineParser::parseDoWhile() {
2563   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2564   nextToken();
2565 
2566   keepAncestorBraces();
2567 
2568   if (FormatTok->Tok.is(tok::l_brace)) {
2569     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2570     parseBlock();
2571     if (Style.BraceWrapping.BeforeWhile)
2572       addUnwrappedLine();
2573   } else {
2574     addUnwrappedLine();
2575     ++Line->Level;
2576     parseStructuralElement();
2577     --Line->Level;
2578   }
2579 
2580   if (Style.RemoveBracesLLVM)
2581     NestedTooDeep.pop_back();
2582 
2583   // FIXME: Add error handling.
2584   if (!FormatTok->Tok.is(tok::kw_while)) {
2585     addUnwrappedLine();
2586     return;
2587   }
2588 
2589   // If in Whitesmiths mode, the line with the while() needs to be indented
2590   // to the same level as the block.
2591   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2592     ++Line->Level;
2593 
2594   nextToken();
2595   parseStructuralElement();
2596 }
2597 
2598 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2599   nextToken();
2600   unsigned OldLineLevel = Line->Level;
2601   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2602     --Line->Level;
2603   if (LeftAlignLabel)
2604     Line->Level = 0;
2605 
2606   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2607       FormatTok->Tok.is(tok::l_brace)) {
2608 
2609     CompoundStatementIndenter Indenter(this, Line->Level,
2610                                        Style.BraceWrapping.AfterCaseLabel,
2611                                        Style.BraceWrapping.IndentBraces);
2612     parseBlock();
2613     if (FormatTok->Tok.is(tok::kw_break)) {
2614       if (Style.BraceWrapping.AfterControlStatement ==
2615           FormatStyle::BWACS_Always) {
2616         addUnwrappedLine();
2617         if (!Style.IndentCaseBlocks &&
2618             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2619           ++Line->Level;
2620       }
2621       parseStructuralElement();
2622     }
2623     addUnwrappedLine();
2624   } else {
2625     if (FormatTok->is(tok::semi))
2626       nextToken();
2627     addUnwrappedLine();
2628   }
2629   Line->Level = OldLineLevel;
2630   if (FormatTok->isNot(tok::l_brace)) {
2631     parseStructuralElement();
2632     addUnwrappedLine();
2633   }
2634 }
2635 
2636 void UnwrappedLineParser::parseCaseLabel() {
2637   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2638 
2639   // FIXME: fix handling of complex expressions here.
2640   do {
2641     nextToken();
2642   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2643   parseLabel();
2644 }
2645 
2646 void UnwrappedLineParser::parseSwitch() {
2647   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2648   nextToken();
2649   if (FormatTok->Tok.is(tok::l_paren))
2650     parseParens();
2651 
2652   keepAncestorBraces();
2653 
2654   if (FormatTok->Tok.is(tok::l_brace)) {
2655     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2656     parseBlock();
2657     addUnwrappedLine();
2658   } else {
2659     addUnwrappedLine();
2660     ++Line->Level;
2661     parseStructuralElement();
2662     --Line->Level;
2663   }
2664 
2665   if (Style.RemoveBracesLLVM)
2666     NestedTooDeep.pop_back();
2667 }
2668 
2669 void UnwrappedLineParser::parseAccessSpecifier() {
2670   FormatToken *AccessSpecifierCandidate = FormatTok;
2671   nextToken();
2672   // Understand Qt's slots.
2673   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2674     nextToken();
2675   // Otherwise, we don't know what it is, and we'd better keep the next token.
2676   if (FormatTok->Tok.is(tok::colon)) {
2677     nextToken();
2678     addUnwrappedLine();
2679   } else if (!FormatTok->Tok.is(tok::coloncolon) &&
2680              !std::binary_search(COperatorsFollowingVar.begin(),
2681                                  COperatorsFollowingVar.end(),
2682                                  FormatTok->Tok.getKind())) {
2683     // Not a variable name nor namespace name.
2684     addUnwrappedLine();
2685   } else if (AccessSpecifierCandidate) {
2686     // Consider the access specifier to be a C identifier.
2687     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2688   }
2689 }
2690 
2691 void UnwrappedLineParser::parseConcept() {
2692   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2693   nextToken();
2694   if (!FormatTok->Tok.is(tok::identifier))
2695     return;
2696   nextToken();
2697   if (!FormatTok->Tok.is(tok::equal))
2698     return;
2699   nextToken();
2700   if (FormatTok->Tok.is(tok::kw_requires)) {
2701     nextToken();
2702     parseRequiresExpression(Line->Level);
2703   } else {
2704     parseConstraintExpression(Line->Level);
2705   }
2706 }
2707 
2708 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2709   // requires (R range)
2710   if (FormatTok->Tok.is(tok::l_paren)) {
2711     parseParens();
2712     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2713       addUnwrappedLine();
2714       --Line->Level;
2715     }
2716   }
2717 
2718   if (FormatTok->Tok.is(tok::l_brace)) {
2719     if (Style.BraceWrapping.AfterFunction)
2720       addUnwrappedLine();
2721     FormatTok->setType(TT_FunctionLBrace);
2722     parseBlock();
2723     addUnwrappedLine();
2724   } else {
2725     parseConstraintExpression(OriginalLevel);
2726   }
2727 }
2728 
2729 void UnwrappedLineParser::parseConstraintExpression(
2730     unsigned int OriginalLevel) {
2731   // requires Id<T> && Id<T> || Id<T>
2732   while (
2733       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2734     nextToken();
2735     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2736                               tok::greater, tok::comma, tok::ellipsis)) {
2737       if (FormatTok->Tok.is(tok::less)) {
2738         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2739                         /*ClosingBraceKind=*/tok::greater);
2740         continue;
2741       }
2742       nextToken();
2743     }
2744     if (FormatTok->Tok.is(tok::kw_requires))
2745       parseRequiresExpression(OriginalLevel);
2746     if (FormatTok->Tok.is(tok::less)) {
2747       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2748                       /*ClosingBraceKind=*/tok::greater);
2749     }
2750 
2751     if (FormatTok->Tok.is(tok::l_paren))
2752       parseParens();
2753     if (FormatTok->Tok.is(tok::l_brace)) {
2754       if (Style.BraceWrapping.AfterFunction)
2755         addUnwrappedLine();
2756       FormatTok->setType(TT_FunctionLBrace);
2757       parseBlock();
2758     }
2759     if (FormatTok->Tok.is(tok::semi)) {
2760       // Eat any trailing semi.
2761       nextToken();
2762       addUnwrappedLine();
2763     }
2764     if (FormatTok->Tok.is(tok::colon))
2765       return;
2766     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2767       if (FormatTok->Previous &&
2768           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2769                                         tok::coloncolon))
2770         addUnwrappedLine();
2771       if (Style.IndentRequires && OriginalLevel != Line->Level)
2772         --Line->Level;
2773       break;
2774     } else {
2775       FormatTok->setType(TT_ConstraintJunctions);
2776     }
2777 
2778     nextToken();
2779   }
2780 }
2781 
2782 void UnwrappedLineParser::parseRequires() {
2783   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2784 
2785   unsigned OriginalLevel = Line->Level;
2786   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2787     addUnwrappedLine();
2788     if (Style.IndentRequires)
2789       ++Line->Level;
2790   }
2791   nextToken();
2792 
2793   parseRequiresExpression(OriginalLevel);
2794 }
2795 
2796 bool UnwrappedLineParser::parseEnum() {
2797   const FormatToken &InitialToken = *FormatTok;
2798 
2799   // Won't be 'enum' for NS_ENUMs.
2800   if (FormatTok->Tok.is(tok::kw_enum))
2801     nextToken();
2802 
2803   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2804   // declarations. An "enum" keyword followed by a colon would be a syntax
2805   // error and thus assume it is just an identifier.
2806   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
2807     return false;
2808 
2809   // In protobuf, "enum" can be used as a field name.
2810   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2811     return false;
2812 
2813   // Eat up enum class ...
2814   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2815     nextToken();
2816 
2817   while (FormatTok->Tok.getIdentifierInfo() ||
2818          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2819                             tok::greater, tok::comma, tok::question)) {
2820     nextToken();
2821     // We can have macros or attributes in between 'enum' and the enum name.
2822     if (FormatTok->is(tok::l_paren))
2823       parseParens();
2824     if (FormatTok->is(tok::identifier)) {
2825       nextToken();
2826       // If there are two identifiers in a row, this is likely an elaborate
2827       // return type. In Java, this can be "implements", etc.
2828       if (Style.isCpp() && FormatTok->is(tok::identifier))
2829         return false;
2830     }
2831   }
2832 
2833   // Just a declaration or something is wrong.
2834   if (FormatTok->isNot(tok::l_brace))
2835     return true;
2836   FormatTok->setType(TT_RecordLBrace);
2837   FormatTok->setBlockKind(BK_Block);
2838 
2839   if (Style.Language == FormatStyle::LK_Java) {
2840     // Java enums are different.
2841     parseJavaEnumBody();
2842     return true;
2843   }
2844   if (Style.Language == FormatStyle::LK_Proto) {
2845     parseBlock(/*MustBeDeclaration=*/true);
2846     return true;
2847   }
2848 
2849   if (!Style.AllowShortEnumsOnASingleLine &&
2850       ShouldBreakBeforeBrace(Style, InitialToken))
2851     addUnwrappedLine();
2852   // Parse enum body.
2853   nextToken();
2854   if (!Style.AllowShortEnumsOnASingleLine) {
2855     addUnwrappedLine();
2856     Line->Level += 1;
2857   }
2858   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2859                                    /*IsEnum=*/true);
2860   if (!Style.AllowShortEnumsOnASingleLine)
2861     Line->Level -= 1;
2862   if (HasError) {
2863     if (FormatTok->is(tok::semi))
2864       nextToken();
2865     addUnwrappedLine();
2866   }
2867   return true;
2868 
2869   // There is no addUnwrappedLine() here so that we fall through to parsing a
2870   // structural element afterwards. Thus, in "enum A {} n, m;",
2871   // "} n, m;" will end up in one unwrapped line.
2872 }
2873 
2874 bool UnwrappedLineParser::parseStructLike() {
2875   // parseRecord falls through and does not yet add an unwrapped line as a
2876   // record declaration or definition can start a structural element.
2877   parseRecord();
2878   // This does not apply to Java, JavaScript and C#.
2879   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
2880       Style.isCSharp()) {
2881     if (FormatTok->is(tok::semi))
2882       nextToken();
2883     addUnwrappedLine();
2884     return true;
2885   }
2886   return false;
2887 }
2888 
2889 namespace {
2890 // A class used to set and restore the Token position when peeking
2891 // ahead in the token source.
2892 class ScopedTokenPosition {
2893   unsigned StoredPosition;
2894   FormatTokenSource *Tokens;
2895 
2896 public:
2897   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2898     assert(Tokens && "Tokens expected to not be null");
2899     StoredPosition = Tokens->getPosition();
2900   }
2901 
2902   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2903 };
2904 } // namespace
2905 
2906 // Look to see if we have [[ by looking ahead, if
2907 // its not then rewind to the original position.
2908 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2909   ScopedTokenPosition AutoPosition(Tokens);
2910   FormatToken *Tok = Tokens->getNextToken();
2911   // We already read the first [ check for the second.
2912   if (!Tok->is(tok::l_square))
2913     return false;
2914   // Double check that the attribute is just something
2915   // fairly simple.
2916   while (Tok->isNot(tok::eof)) {
2917     if (Tok->is(tok::r_square))
2918       break;
2919     Tok = Tokens->getNextToken();
2920   }
2921   if (Tok->is(tok::eof))
2922     return false;
2923   Tok = Tokens->getNextToken();
2924   if (!Tok->is(tok::r_square))
2925     return false;
2926   Tok = Tokens->getNextToken();
2927   if (Tok->is(tok::semi))
2928     return false;
2929   return true;
2930 }
2931 
2932 void UnwrappedLineParser::parseJavaEnumBody() {
2933   // Determine whether the enum is simple, i.e. does not have a semicolon or
2934   // constants with class bodies. Simple enums can be formatted like braced
2935   // lists, contracted to a single line, etc.
2936   unsigned StoredPosition = Tokens->getPosition();
2937   bool IsSimple = true;
2938   FormatToken *Tok = Tokens->getNextToken();
2939   while (!Tok->is(tok::eof)) {
2940     if (Tok->is(tok::r_brace))
2941       break;
2942     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2943       IsSimple = false;
2944       break;
2945     }
2946     // FIXME: This will also mark enums with braces in the arguments to enum
2947     // constants as "not simple". This is probably fine in practice, though.
2948     Tok = Tokens->getNextToken();
2949   }
2950   FormatTok = Tokens->setPosition(StoredPosition);
2951 
2952   if (IsSimple) {
2953     nextToken();
2954     parseBracedList();
2955     addUnwrappedLine();
2956     return;
2957   }
2958 
2959   // Parse the body of a more complex enum.
2960   // First add a line for everything up to the "{".
2961   nextToken();
2962   addUnwrappedLine();
2963   ++Line->Level;
2964 
2965   // Parse the enum constants.
2966   while (FormatTok) {
2967     if (FormatTok->is(tok::l_brace)) {
2968       // Parse the constant's class body.
2969       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2970                  /*MunchSemi=*/false);
2971     } else if (FormatTok->is(tok::l_paren)) {
2972       parseParens();
2973     } else if (FormatTok->is(tok::comma)) {
2974       nextToken();
2975       addUnwrappedLine();
2976     } else if (FormatTok->is(tok::semi)) {
2977       nextToken();
2978       addUnwrappedLine();
2979       break;
2980     } else if (FormatTok->is(tok::r_brace)) {
2981       addUnwrappedLine();
2982       break;
2983     } else {
2984       nextToken();
2985     }
2986   }
2987 
2988   // Parse the class body after the enum's ";" if any.
2989   parseLevel(/*HasOpeningBrace=*/true);
2990   nextToken();
2991   --Line->Level;
2992   addUnwrappedLine();
2993 }
2994 
2995 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2996   const FormatToken &InitialToken = *FormatTok;
2997   nextToken();
2998 
2999   // The actual identifier can be a nested name specifier, and in macros
3000   // it is often token-pasted.
3001   // An [[attribute]] can be before the identifier.
3002   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3003                             tok::kw___attribute, tok::kw___declspec,
3004                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3005          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3006           FormatTok->isOneOf(tok::period, tok::comma))) {
3007     if (Style.isJavaScript() &&
3008         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3009       // JavaScript/TypeScript supports inline object types in
3010       // extends/implements positions:
3011       //     class Foo implements {bar: number} { }
3012       nextToken();
3013       if (FormatTok->is(tok::l_brace)) {
3014         tryToParseBracedList();
3015         continue;
3016       }
3017     }
3018     bool IsNonMacroIdentifier =
3019         FormatTok->is(tok::identifier) &&
3020         FormatTok->TokenText != FormatTok->TokenText.upper();
3021     nextToken();
3022     // We can have macros or attributes in between 'class' and the class name.
3023     if (!IsNonMacroIdentifier) {
3024       if (FormatTok->Tok.is(tok::l_paren)) {
3025         parseParens();
3026       } else if (FormatTok->is(TT_AttributeSquare)) {
3027         parseSquare();
3028         // Consume the closing TT_AttributeSquare.
3029         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3030           nextToken();
3031       }
3032     }
3033   }
3034 
3035   // Note that parsing away template declarations here leads to incorrectly
3036   // accepting function declarations as record declarations.
3037   // In general, we cannot solve this problem. Consider:
3038   // class A<int> B() {}
3039   // which can be a function definition or a class definition when B() is a
3040   // macro. If we find enough real-world cases where this is a problem, we
3041   // can parse for the 'template' keyword in the beginning of the statement,
3042   // and thus rule out the record production in case there is no template
3043   // (this would still leave us with an ambiguity between template function
3044   // and class declarations).
3045   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3046     while (!eof()) {
3047       if (FormatTok->is(tok::l_brace)) {
3048         calculateBraceTypes(/*ExpectClassBody=*/true);
3049         if (!tryToParseBracedList())
3050           break;
3051       }
3052       if (FormatTok->is(tok::l_square)) {
3053         FormatToken *Previous = FormatTok->Previous;
3054         if (!Previous || Previous->isNot(tok::r_paren)) {
3055           // Don't try parsing a lambda if we had a closing parenthesis before,
3056           // it was probably a pointer to an array: int (*)[].
3057           if (!tryToParseLambda())
3058             break;
3059         }
3060       }
3061       if (FormatTok->Tok.is(tok::semi))
3062         return;
3063       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3064         addUnwrappedLine();
3065         nextToken();
3066         parseCSharpGenericTypeConstraint();
3067         break;
3068       }
3069       nextToken();
3070     }
3071   }
3072   if (FormatTok->Tok.is(tok::l_brace)) {
3073     FormatTok->setType(TT_RecordLBrace);
3074     if (ParseAsExpr) {
3075       parseChildBlock();
3076     } else {
3077       if (ShouldBreakBeforeBrace(Style, InitialToken))
3078         addUnwrappedLine();
3079 
3080       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3081       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3082     }
3083   }
3084   // There is no addUnwrappedLine() here so that we fall through to parsing a
3085   // structural element afterwards. Thus, in "class A {} n, m;",
3086   // "} n, m;" will end up in one unwrapped line.
3087 }
3088 
3089 void UnwrappedLineParser::parseObjCMethod() {
3090   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
3091          "'(' or identifier expected.");
3092   do {
3093     if (FormatTok->Tok.is(tok::semi)) {
3094       nextToken();
3095       addUnwrappedLine();
3096       return;
3097     } else if (FormatTok->Tok.is(tok::l_brace)) {
3098       if (Style.BraceWrapping.AfterFunction)
3099         addUnwrappedLine();
3100       parseBlock();
3101       addUnwrappedLine();
3102       return;
3103     } else {
3104       nextToken();
3105     }
3106   } while (!eof());
3107 }
3108 
3109 void UnwrappedLineParser::parseObjCProtocolList() {
3110   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
3111   do {
3112     nextToken();
3113     // Early exit in case someone forgot a close angle.
3114     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3115         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3116       return;
3117   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
3118   nextToken(); // Skip '>'.
3119 }
3120 
3121 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3122   do {
3123     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
3124       nextToken();
3125       addUnwrappedLine();
3126       break;
3127     }
3128     if (FormatTok->is(tok::l_brace)) {
3129       parseBlock();
3130       // In ObjC interfaces, nothing should be following the "}".
3131       addUnwrappedLine();
3132     } else if (FormatTok->is(tok::r_brace)) {
3133       // Ignore stray "}". parseStructuralElement doesn't consume them.
3134       nextToken();
3135       addUnwrappedLine();
3136     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3137       nextToken();
3138       parseObjCMethod();
3139     } else {
3140       parseStructuralElement();
3141     }
3142   } while (!eof());
3143 }
3144 
3145 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3146   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3147          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3148   nextToken();
3149   nextToken(); // interface name
3150 
3151   // @interface can be followed by a lightweight generic
3152   // specialization list, then either a base class or a category.
3153   if (FormatTok->Tok.is(tok::less))
3154     parseObjCLightweightGenerics();
3155   if (FormatTok->Tok.is(tok::colon)) {
3156     nextToken();
3157     nextToken(); // base class name
3158     // The base class can also have lightweight generics applied to it.
3159     if (FormatTok->Tok.is(tok::less))
3160       parseObjCLightweightGenerics();
3161   } else if (FormatTok->Tok.is(tok::l_paren))
3162     // Skip category, if present.
3163     parseParens();
3164 
3165   if (FormatTok->Tok.is(tok::less))
3166     parseObjCProtocolList();
3167 
3168   if (FormatTok->Tok.is(tok::l_brace)) {
3169     if (Style.BraceWrapping.AfterObjCDeclaration)
3170       addUnwrappedLine();
3171     parseBlock(/*MustBeDeclaration=*/true);
3172   }
3173 
3174   // With instance variables, this puts '}' on its own line.  Without instance
3175   // variables, this ends the @interface line.
3176   addUnwrappedLine();
3177 
3178   parseObjCUntilAtEnd();
3179 }
3180 
3181 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3182   assert(FormatTok->Tok.is(tok::less));
3183   // Unlike protocol lists, generic parameterizations support
3184   // nested angles:
3185   //
3186   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3187   //     NSObject <NSCopying, NSSecureCoding>
3188   //
3189   // so we need to count how many open angles we have left.
3190   unsigned NumOpenAngles = 1;
3191   do {
3192     nextToken();
3193     // Early exit in case someone forgot a close angle.
3194     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3195         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3196       break;
3197     if (FormatTok->Tok.is(tok::less))
3198       ++NumOpenAngles;
3199     else if (FormatTok->Tok.is(tok::greater)) {
3200       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3201       --NumOpenAngles;
3202     }
3203   } while (!eof() && NumOpenAngles != 0);
3204   nextToken(); // Skip '>'.
3205 }
3206 
3207 // Returns true for the declaration/definition form of @protocol,
3208 // false for the expression form.
3209 bool UnwrappedLineParser::parseObjCProtocol() {
3210   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3211   nextToken();
3212 
3213   if (FormatTok->is(tok::l_paren))
3214     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3215     return false;
3216 
3217   // The definition/declaration form,
3218   // @protocol Foo
3219   // - (int)someMethod;
3220   // @end
3221 
3222   nextToken(); // protocol name
3223 
3224   if (FormatTok->Tok.is(tok::less))
3225     parseObjCProtocolList();
3226 
3227   // Check for protocol declaration.
3228   if (FormatTok->Tok.is(tok::semi)) {
3229     nextToken();
3230     addUnwrappedLine();
3231     return true;
3232   }
3233 
3234   addUnwrappedLine();
3235   parseObjCUntilAtEnd();
3236   return true;
3237 }
3238 
3239 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3240   bool IsImport = FormatTok->is(Keywords.kw_import);
3241   assert(IsImport || FormatTok->is(tok::kw_export));
3242   nextToken();
3243 
3244   // Consume the "default" in "export default class/function".
3245   if (FormatTok->is(tok::kw_default))
3246     nextToken();
3247 
3248   // Consume "async function", "function" and "default function", so that these
3249   // get parsed as free-standing JS functions, i.e. do not require a trailing
3250   // semicolon.
3251   if (FormatTok->is(Keywords.kw_async))
3252     nextToken();
3253   if (FormatTok->is(Keywords.kw_function)) {
3254     nextToken();
3255     return;
3256   }
3257 
3258   // For imports, `export *`, `export {...}`, consume the rest of the line up
3259   // to the terminating `;`. For everything else, just return and continue
3260   // parsing the structural element, i.e. the declaration or expression for
3261   // `export default`.
3262   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3263       !FormatTok->isStringLiteral())
3264     return;
3265 
3266   while (!eof()) {
3267     if (FormatTok->is(tok::semi))
3268       return;
3269     if (Line->Tokens.empty()) {
3270       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3271       // import statement should terminate.
3272       return;
3273     }
3274     if (FormatTok->is(tok::l_brace)) {
3275       FormatTok->setBlockKind(BK_Block);
3276       nextToken();
3277       parseBracedList();
3278     } else {
3279       nextToken();
3280     }
3281   }
3282 }
3283 
3284 void UnwrappedLineParser::parseStatementMacro() {
3285   nextToken();
3286   if (FormatTok->is(tok::l_paren))
3287     parseParens();
3288   if (FormatTok->is(tok::semi))
3289     nextToken();
3290   addUnwrappedLine();
3291 }
3292 
3293 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3294                                                  StringRef Prefix = "") {
3295   llvm::dbgs() << Prefix << "Line(" << Line.Level
3296                << ", FSC=" << Line.FirstStartColumn << ")"
3297                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3298   for (const auto &Node : Line.Tokens) {
3299     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3300                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3301                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3302   }
3303   for (const auto &Node : Line.Tokens)
3304     for (const auto &ChildNode : Node.Children)
3305       printDebugInfo(ChildNode, "\nChild: ");
3306 
3307   llvm::dbgs() << "\n";
3308 }
3309 
3310 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3311   if (Line->Tokens.empty())
3312     return;
3313   LLVM_DEBUG({
3314     if (CurrentLines == &Lines)
3315       printDebugInfo(*Line);
3316   });
3317 
3318   // If this line closes a block when in Whitesmiths mode, remember that
3319   // information so that the level can be decreased after the line is added.
3320   // This has to happen after the addition of the line since the line itself
3321   // needs to be indented.
3322   bool ClosesWhitesmithsBlock =
3323       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3324       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3325 
3326   CurrentLines->push_back(std::move(*Line));
3327   Line->Tokens.clear();
3328   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3329   Line->FirstStartColumn = 0;
3330 
3331   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3332     --Line->Level;
3333   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3334     CurrentLines->append(
3335         std::make_move_iterator(PreprocessorDirectives.begin()),
3336         std::make_move_iterator(PreprocessorDirectives.end()));
3337     PreprocessorDirectives.clear();
3338   }
3339   // Disconnect the current token from the last token on the previous line.
3340   FormatTok->Previous = nullptr;
3341 }
3342 
3343 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3344 
3345 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3346   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3347          FormatTok.NewlinesBefore > 0;
3348 }
3349 
3350 // Checks if \p FormatTok is a line comment that continues the line comment
3351 // section on \p Line.
3352 static bool
3353 continuesLineCommentSection(const FormatToken &FormatTok,
3354                             const UnwrappedLine &Line,
3355                             const llvm::Regex &CommentPragmasRegex) {
3356   if (Line.Tokens.empty())
3357     return false;
3358 
3359   StringRef IndentContent = FormatTok.TokenText;
3360   if (FormatTok.TokenText.startswith("//") ||
3361       FormatTok.TokenText.startswith("/*"))
3362     IndentContent = FormatTok.TokenText.substr(2);
3363   if (CommentPragmasRegex.match(IndentContent))
3364     return false;
3365 
3366   // If Line starts with a line comment, then FormatTok continues the comment
3367   // section if its original column is greater or equal to the original start
3368   // column of the line.
3369   //
3370   // Define the min column token of a line as follows: if a line ends in '{' or
3371   // contains a '{' followed by a line comment, then the min column token is
3372   // that '{'. Otherwise, the min column token of the line is the first token of
3373   // the line.
3374   //
3375   // If Line starts with a token other than a line comment, then FormatTok
3376   // continues the comment section if its original column is greater than the
3377   // original start column of the min column token of the line.
3378   //
3379   // For example, the second line comment continues the first in these cases:
3380   //
3381   // // first line
3382   // // second line
3383   //
3384   // and:
3385   //
3386   // // first line
3387   //  // second line
3388   //
3389   // and:
3390   //
3391   // int i; // first line
3392   //  // second line
3393   //
3394   // and:
3395   //
3396   // do { // first line
3397   //      // second line
3398   //   int i;
3399   // } while (true);
3400   //
3401   // and:
3402   //
3403   // enum {
3404   //   a, // first line
3405   //    // second line
3406   //   b
3407   // };
3408   //
3409   // The second line comment doesn't continue the first in these cases:
3410   //
3411   //   // first line
3412   //  // second line
3413   //
3414   // and:
3415   //
3416   // int i; // first line
3417   // // second line
3418   //
3419   // and:
3420   //
3421   // do { // first line
3422   //   // second line
3423   //   int i;
3424   // } while (true);
3425   //
3426   // and:
3427   //
3428   // enum {
3429   //   a, // first line
3430   //   // second line
3431   // };
3432   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3433 
3434   // Scan for '{//'. If found, use the column of '{' as a min column for line
3435   // comment section continuation.
3436   const FormatToken *PreviousToken = nullptr;
3437   for (const UnwrappedLineNode &Node : Line.Tokens) {
3438     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3439         isLineComment(*Node.Tok)) {
3440       MinColumnToken = PreviousToken;
3441       break;
3442     }
3443     PreviousToken = Node.Tok;
3444 
3445     // Grab the last newline preceding a token in this unwrapped line.
3446     if (Node.Tok->NewlinesBefore > 0)
3447       MinColumnToken = Node.Tok;
3448   }
3449   if (PreviousToken && PreviousToken->is(tok::l_brace))
3450     MinColumnToken = PreviousToken;
3451 
3452   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3453                               MinColumnToken);
3454 }
3455 
3456 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3457   bool JustComments = Line->Tokens.empty();
3458   for (FormatToken *Tok : CommentsBeforeNextToken) {
3459     // Line comments that belong to the same line comment section are put on the
3460     // same line since later we might want to reflow content between them.
3461     // Additional fine-grained breaking of line comment sections is controlled
3462     // by the class BreakableLineCommentSection in case it is desirable to keep
3463     // several line comment sections in the same unwrapped line.
3464     //
3465     // FIXME: Consider putting separate line comment sections as children to the
3466     // unwrapped line instead.
3467     Tok->ContinuesLineCommentSection =
3468         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3469     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3470       addUnwrappedLine();
3471     pushToken(Tok);
3472   }
3473   if (NewlineBeforeNext && JustComments)
3474     addUnwrappedLine();
3475   CommentsBeforeNextToken.clear();
3476 }
3477 
3478 void UnwrappedLineParser::nextToken(int LevelDifference) {
3479   if (eof())
3480     return;
3481   flushComments(isOnNewLine(*FormatTok));
3482   pushToken(FormatTok);
3483   FormatToken *Previous = FormatTok;
3484   if (!Style.isJavaScript())
3485     readToken(LevelDifference);
3486   else
3487     readTokenWithJavaScriptASI();
3488   FormatTok->Previous = Previous;
3489 }
3490 
3491 void UnwrappedLineParser::distributeComments(
3492     const SmallVectorImpl<FormatToken *> &Comments,
3493     const FormatToken *NextTok) {
3494   // Whether or not a line comment token continues a line is controlled by
3495   // the method continuesLineCommentSection, with the following caveat:
3496   //
3497   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3498   // that each comment line from the trail is aligned with the next token, if
3499   // the next token exists. If a trail exists, the beginning of the maximal
3500   // trail is marked as a start of a new comment section.
3501   //
3502   // For example in this code:
3503   //
3504   // int a; // line about a
3505   //   // line 1 about b
3506   //   // line 2 about b
3507   //   int b;
3508   //
3509   // the two lines about b form a maximal trail, so there are two sections, the
3510   // first one consisting of the single comment "// line about a" and the
3511   // second one consisting of the next two comments.
3512   if (Comments.empty())
3513     return;
3514   bool ShouldPushCommentsInCurrentLine = true;
3515   bool HasTrailAlignedWithNextToken = false;
3516   unsigned StartOfTrailAlignedWithNextToken = 0;
3517   if (NextTok) {
3518     // We are skipping the first element intentionally.
3519     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3520       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3521         HasTrailAlignedWithNextToken = true;
3522         StartOfTrailAlignedWithNextToken = i;
3523       }
3524     }
3525   }
3526   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3527     FormatToken *FormatTok = Comments[i];
3528     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3529       FormatTok->ContinuesLineCommentSection = false;
3530     } else {
3531       FormatTok->ContinuesLineCommentSection =
3532           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3533     }
3534     if (!FormatTok->ContinuesLineCommentSection &&
3535         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
3536       ShouldPushCommentsInCurrentLine = false;
3537     if (ShouldPushCommentsInCurrentLine)
3538       pushToken(FormatTok);
3539     else
3540       CommentsBeforeNextToken.push_back(FormatTok);
3541   }
3542 }
3543 
3544 void UnwrappedLineParser::readToken(int LevelDifference) {
3545   SmallVector<FormatToken *, 1> Comments;
3546   do {
3547     FormatTok = Tokens->getNextToken();
3548     assert(FormatTok);
3549     while (FormatTok->getType() == TT_ConflictStart ||
3550            FormatTok->getType() == TT_ConflictEnd ||
3551            FormatTok->getType() == TT_ConflictAlternative) {
3552       if (FormatTok->getType() == TT_ConflictStart)
3553         conditionalCompilationStart(/*Unreachable=*/false);
3554       else if (FormatTok->getType() == TT_ConflictAlternative)
3555         conditionalCompilationAlternative();
3556       else if (FormatTok->getType() == TT_ConflictEnd)
3557         conditionalCompilationEnd();
3558       FormatTok = Tokens->getNextToken();
3559       FormatTok->MustBreakBefore = true;
3560     }
3561 
3562     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3563            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3564       distributeComments(Comments, FormatTok);
3565       Comments.clear();
3566       // If there is an unfinished unwrapped line, we flush the preprocessor
3567       // directives only after that unwrapped line was finished later.
3568       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3569       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3570       assert((LevelDifference >= 0 ||
3571               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3572              "LevelDifference makes Line->Level negative");
3573       Line->Level += LevelDifference;
3574       // Comments stored before the preprocessor directive need to be output
3575       // before the preprocessor directive, at the same level as the
3576       // preprocessor directive, as we consider them to apply to the directive.
3577       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3578           PPBranchLevel > 0)
3579         Line->Level += PPBranchLevel;
3580       flushComments(isOnNewLine(*FormatTok));
3581       parsePPDirective();
3582     }
3583 
3584     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3585         !Line->InPPDirective)
3586       continue;
3587 
3588     if (!FormatTok->Tok.is(tok::comment)) {
3589       distributeComments(Comments, FormatTok);
3590       Comments.clear();
3591       return;
3592     }
3593 
3594     Comments.push_back(FormatTok);
3595   } while (!eof());
3596 
3597   distributeComments(Comments, nullptr);
3598   Comments.clear();
3599 }
3600 
3601 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3602   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3603   if (MustBreakBeforeNextToken) {
3604     Line->Tokens.back().Tok->MustBreakBefore = true;
3605     MustBreakBeforeNextToken = false;
3606   }
3607 }
3608 
3609 } // end namespace format
3610 } // end namespace clang
3611