1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 #include <utility>
23 
24 #define DEBUG_TYPE "format-parser"
25 
26 namespace clang {
27 namespace format {
28 
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   virtual FormatToken *getPreviousToken() = 0;
39 
40   // Returns the token that would be returned by the next call to
41   // getNextToken().
42   virtual FormatToken *peekNextToken() = 0;
43 
44   // Returns the token that would be returned after the next N calls to
45   // getNextToken(). N needs to be greater than zero, and small enough that
46   // there are still tokens. Check for tok::eof with N-1 before calling it with
47   // N.
48   virtual FormatToken *peekNextToken(int N) = 0;
49 
50   // Returns whether we are at the end of the file.
51   // This can be different from whether getNextToken() returned an eof token
52   // when the FormatTokenSource is a view on a part of the token stream.
53   virtual bool isEOF() = 0;
54 
55   // Gets the current position in the token stream, to be used by setPosition().
56   virtual unsigned getPosition() = 0;
57 
58   // Resets the token stream to the state it was in when getPosition() returned
59   // Position, and return the token at that position in the stream.
60   virtual FormatToken *setPosition(unsigned Position) = 0;
61 };
62 
63 namespace {
64 
65 class ScopedDeclarationState {
66 public:
67   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
68                          bool MustBeDeclaration)
69       : Line(Line), Stack(Stack) {
70     Line.MustBeDeclaration = MustBeDeclaration;
71     Stack.push_back(MustBeDeclaration);
72   }
73   ~ScopedDeclarationState() {
74     Stack.pop_back();
75     if (!Stack.empty())
76       Line.MustBeDeclaration = Stack.back();
77     else
78       Line.MustBeDeclaration = true;
79   }
80 
81 private:
82   UnwrappedLine &Line;
83   llvm::BitVector &Stack;
84 };
85 
86 static bool isLineComment(const FormatToken &FormatTok) {
87   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
88 }
89 
90 // Checks if \p FormatTok is a line comment that continues the line comment
91 // \p Previous. The original column of \p MinColumnToken is used to determine
92 // whether \p FormatTok is indented enough to the right to continue \p Previous.
93 static bool continuesLineComment(const FormatToken &FormatTok,
94                                  const FormatToken *Previous,
95                                  const FormatToken *MinColumnToken) {
96   if (!Previous || !MinColumnToken)
97     return false;
98   unsigned MinContinueColumn =
99       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
100   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
101          isLineComment(*Previous) &&
102          FormatTok.OriginalColumn >= MinContinueColumn;
103 }
104 
105 class ScopedMacroState : public FormatTokenSource {
106 public:
107   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
108                    FormatToken *&ResetToken)
109       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
110         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
111         Token(nullptr), PreviousToken(nullptr) {
112     FakeEOF.Tok.startToken();
113     FakeEOF.Tok.setKind(tok::eof);
114     TokenSource = this;
115     Line.Level = 0;
116     Line.InPPDirective = true;
117   }
118 
119   ~ScopedMacroState() override {
120     TokenSource = PreviousTokenSource;
121     ResetToken = Token;
122     Line.InPPDirective = false;
123     Line.Level = PreviousLineLevel;
124   }
125 
126   FormatToken *getNextToken() override {
127     // The \c UnwrappedLineParser guards against this by never calling
128     // \c getNextToken() after it has encountered the first eof token.
129     assert(!eof());
130     PreviousToken = Token;
131     Token = PreviousTokenSource->getNextToken();
132     if (eof())
133       return &FakeEOF;
134     return Token;
135   }
136 
137   FormatToken *getPreviousToken() override {
138     return PreviousTokenSource->getPreviousToken();
139   }
140 
141   FormatToken *peekNextToken() override {
142     if (eof())
143       return &FakeEOF;
144     return PreviousTokenSource->peekNextToken();
145   }
146 
147   FormatToken *peekNextToken(int N) override {
148     assert(N > 0);
149     if (eof())
150       return &FakeEOF;
151     return PreviousTokenSource->peekNextToken(N);
152   }
153 
154   bool isEOF() override { return PreviousTokenSource->isEOF(); }
155 
156   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
157 
158   FormatToken *setPosition(unsigned Position) override {
159     PreviousToken = nullptr;
160     Token = PreviousTokenSource->setPosition(Position);
161     return Token;
162   }
163 
164 private:
165   bool eof() {
166     return Token && Token->HasUnescapedNewline &&
167            !continuesLineComment(*Token, PreviousToken,
168                                  /*MinColumnToken=*/PreviousToken);
169   }
170 
171   FormatToken FakeEOF;
172   UnwrappedLine &Line;
173   FormatTokenSource *&TokenSource;
174   FormatToken *&ResetToken;
175   unsigned PreviousLineLevel;
176   FormatTokenSource *PreviousTokenSource;
177 
178   FormatToken *Token;
179   FormatToken *PreviousToken;
180 };
181 
182 } // end anonymous namespace
183 
184 class ScopedLineState {
185 public:
186   ScopedLineState(UnwrappedLineParser &Parser,
187                   bool SwitchToPreprocessorLines = false)
188       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
189     if (SwitchToPreprocessorLines)
190       Parser.CurrentLines = &Parser.PreprocessorDirectives;
191     else if (!Parser.Line->Tokens.empty())
192       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
193     PreBlockLine = std::move(Parser.Line);
194     Parser.Line = std::make_unique<UnwrappedLine>();
195     Parser.Line->Level = PreBlockLine->Level;
196     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
197   }
198 
199   ~ScopedLineState() {
200     if (!Parser.Line->Tokens.empty())
201       Parser.addUnwrappedLine();
202     assert(Parser.Line->Tokens.empty());
203     Parser.Line = std::move(PreBlockLine);
204     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
205       Parser.MustBreakBeforeNextToken = true;
206     Parser.CurrentLines = OriginalLines;
207   }
208 
209 private:
210   UnwrappedLineParser &Parser;
211 
212   std::unique_ptr<UnwrappedLine> PreBlockLine;
213   SmallVectorImpl<UnwrappedLine> *OriginalLines;
214 };
215 
216 class CompoundStatementIndenter {
217 public:
218   CompoundStatementIndenter(UnwrappedLineParser *Parser,
219                             const FormatStyle &Style, unsigned &LineLevel)
220       : CompoundStatementIndenter(Parser, LineLevel,
221                                   Style.BraceWrapping.AfterControlStatement,
222                                   Style.BraceWrapping.IndentBraces) {}
223   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
224                             bool WrapBrace, bool IndentBrace)
225       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
226     if (WrapBrace)
227       Parser->addUnwrappedLine();
228     if (IndentBrace)
229       ++LineLevel;
230   }
231   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
232 
233 private:
234   unsigned &LineLevel;
235   unsigned OldLineLevel;
236 };
237 
238 namespace {
239 
240 class IndexedTokenSource : public FormatTokenSource {
241 public:
242   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
243       : Tokens(Tokens), Position(-1) {}
244 
245   FormatToken *getNextToken() override {
246     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
247       LLVM_DEBUG({
248         llvm::dbgs() << "Next ";
249         dbgToken(Position);
250       });
251       return Tokens[Position];
252     }
253     ++Position;
254     LLVM_DEBUG({
255       llvm::dbgs() << "Next ";
256       dbgToken(Position);
257     });
258     return Tokens[Position];
259   }
260 
261   FormatToken *getPreviousToken() override {
262     return Position > 0 ? Tokens[Position - 1] : nullptr;
263   }
264 
265   FormatToken *peekNextToken() override {
266     int Next = Position + 1;
267     LLVM_DEBUG({
268       llvm::dbgs() << "Peeking ";
269       dbgToken(Next);
270     });
271     return Tokens[Next];
272   }
273 
274   FormatToken *peekNextToken(int N) override {
275     assert(N > 0);
276     int Next = Position + N;
277     LLVM_DEBUG({
278       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
279       dbgToken(Next);
280     });
281     return Tokens[Next];
282   }
283 
284   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
285 
286   unsigned getPosition() override {
287     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
288     assert(Position >= 0);
289     return Position;
290   }
291 
292   FormatToken *setPosition(unsigned P) override {
293     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
294     Position = P;
295     return Tokens[Position];
296   }
297 
298   void reset() { Position = -1; }
299 
300 private:
301   void dbgToken(int Position, llvm::StringRef Indent = "") {
302     FormatToken *Tok = Tokens[Position];
303     llvm::dbgs() << Indent << "[" << Position
304                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
305                  << ", Macro: " << !!Tok->MacroCtx << "\n";
306   }
307 
308   ArrayRef<FormatToken *> Tokens;
309   int Position;
310 };
311 
312 } // end anonymous namespace
313 
314 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
315                                          const AdditionalKeywords &Keywords,
316                                          unsigned FirstStartColumn,
317                                          ArrayRef<FormatToken *> Tokens,
318                                          UnwrappedLineConsumer &Callback)
319     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
320       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
321       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
322       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
323       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
324                        ? IG_Rejected
325                        : IG_Inited),
326       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
327 
328 void UnwrappedLineParser::reset() {
329   PPBranchLevel = -1;
330   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
331                      ? IG_Rejected
332                      : IG_Inited;
333   IncludeGuardToken = nullptr;
334   Line.reset(new UnwrappedLine);
335   CommentsBeforeNextToken.clear();
336   FormatTok = nullptr;
337   MustBreakBeforeNextToken = false;
338   PreprocessorDirectives.clear();
339   CurrentLines = &Lines;
340   DeclarationScopeStack.clear();
341   NestedTooDeep.clear();
342   PPStack.clear();
343   Line->FirstStartColumn = FirstStartColumn;
344 }
345 
346 void UnwrappedLineParser::parse() {
347   IndexedTokenSource TokenSource(AllTokens);
348   Line->FirstStartColumn = FirstStartColumn;
349   do {
350     LLVM_DEBUG(llvm::dbgs() << "----\n");
351     reset();
352     Tokens = &TokenSource;
353     TokenSource.reset();
354 
355     readToken();
356     parseFile();
357 
358     // If we found an include guard then all preprocessor directives (other than
359     // the guard) are over-indented by one.
360     if (IncludeGuard == IG_Found)
361       for (auto &Line : Lines)
362         if (Line.InPPDirective && Line.Level > 0)
363           --Line.Level;
364 
365     // Create line with eof token.
366     pushToken(FormatTok);
367     addUnwrappedLine();
368 
369     for (const UnwrappedLine &Line : Lines)
370       Callback.consumeUnwrappedLine(Line);
371 
372     Callback.finishRun();
373     Lines.clear();
374     while (!PPLevelBranchIndex.empty() &&
375            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
376       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
377       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
378     }
379     if (!PPLevelBranchIndex.empty()) {
380       ++PPLevelBranchIndex.back();
381       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
382       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
383     }
384   } while (!PPLevelBranchIndex.empty());
385 }
386 
387 void UnwrappedLineParser::parseFile() {
388   // The top-level context in a file always has declarations, except for pre-
389   // processor directives and JavaScript files.
390   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
391   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
392                                           MustBeDeclaration);
393   if (Style.Language == FormatStyle::LK_TextProto)
394     parseBracedList();
395   else
396     parseLevel(/*HasOpeningBrace=*/false, /*CanContainBracedList=*/true);
397   // Make sure to format the remaining tokens.
398   //
399   // LK_TextProto is special since its top-level is parsed as the body of a
400   // braced list, which does not necessarily have natural line separators such
401   // as a semicolon. Comments after the last entry that have been determined to
402   // not belong to that line, as in:
403   //   key: value
404   //   // endfile comment
405   // do not have a chance to be put on a line of their own until this point.
406   // Here we add this newline before end-of-file comments.
407   if (Style.Language == FormatStyle::LK_TextProto &&
408       !CommentsBeforeNextToken.empty())
409     addUnwrappedLine();
410   flushComments(true);
411   addUnwrappedLine();
412 }
413 
414 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
415   do {
416     switch (FormatTok->Tok.getKind()) {
417     case tok::l_brace:
418       return;
419     default:
420       if (FormatTok->is(Keywords.kw_where)) {
421         addUnwrappedLine();
422         nextToken();
423         parseCSharpGenericTypeConstraint();
424         break;
425       }
426       nextToken();
427       break;
428     }
429   } while (!eof());
430 }
431 
432 void UnwrappedLineParser::parseCSharpAttribute() {
433   int UnpairedSquareBrackets = 1;
434   do {
435     switch (FormatTok->Tok.getKind()) {
436     case tok::r_square:
437       nextToken();
438       --UnpairedSquareBrackets;
439       if (UnpairedSquareBrackets == 0) {
440         addUnwrappedLine();
441         return;
442       }
443       break;
444     case tok::l_square:
445       ++UnpairedSquareBrackets;
446       nextToken();
447       break;
448     default:
449       nextToken();
450       break;
451     }
452   } while (!eof());
453 }
454 
455 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
456   if (!Lines.empty() && Lines.back().InPPDirective)
457     return true;
458 
459   const FormatToken *Previous = Tokens->getPreviousToken();
460   return Previous && Previous->is(tok::comment) &&
461          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
462 }
463 /// \brief Parses a level, that is ???.
464 /// \param HasOpeningBrace If that level is started by an opening brace.
465 /// \param CanContainBracedList If the content can contain (at any level) a
466 /// braced list.
467 /// \param NextLBracesType The type for left brace found in this level.
468 /// \returns true if a simple block, or false otherwise. (A simple block has a
469 /// single statement.)
470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace,
471                                      bool CanContainBracedList,
472                                      IfStmtKind *IfKind,
473                                      TokenType NextLBracesType) {
474   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
475                                   ? TT_BracedListLBrace
476                                   : TT_Unknown;
477   const bool IsPrecededByCommentOrPPDirective =
478       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
479   bool HasLabel = false;
480   unsigned StatementCount = 0;
481   bool SwitchLabelEncountered = false;
482   do {
483     if (FormatTok->getType() == TT_AttributeMacro) {
484       nextToken();
485       continue;
486     }
487     tok::TokenKind kind = FormatTok->Tok.getKind();
488     if (FormatTok->getType() == TT_MacroBlockBegin)
489       kind = tok::l_brace;
490     else if (FormatTok->getType() == TT_MacroBlockEnd)
491       kind = tok::r_brace;
492 
493     auto ParseDefault = [this, HasOpeningBrace, IfKind, NextLevelLBracesType,
494                          &HasLabel, &StatementCount] {
495       parseStructuralElement(IfKind, !HasOpeningBrace, NextLevelLBracesType,
496                              HasLabel ? nullptr : &HasLabel);
497       ++StatementCount;
498       assert(StatementCount > 0 && "StatementCount overflow!");
499     };
500 
501     switch (kind) {
502     case tok::comment:
503       nextToken();
504       addUnwrappedLine();
505       break;
506     case tok::l_brace:
507       if (NextLBracesType != TT_Unknown)
508         FormatTok->setFinalizedType(NextLBracesType);
509       else if (FormatTok->Previous &&
510                FormatTok->Previous->ClosesRequiresClause) {
511         // We need the 'default' case here to correctly parse a function
512         // l_brace.
513         ParseDefault();
514         continue;
515       }
516       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
517           tryToParseBracedList())
518         continue;
519       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
520                  /*MunchSemi=*/true, /*UnindentWhitesmithBraces=*/false,
521                  CanContainBracedList,
522                  /*NextLBracesType=*/NextLBracesType);
523       ++StatementCount;
524       assert(StatementCount > 0 && "StatementCount overflow!");
525       addUnwrappedLine();
526       break;
527     case tok::r_brace:
528       if (HasOpeningBrace) {
529         if (!Style.RemoveBracesLLVM)
530           return false;
531         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
532             IsPrecededByCommentOrPPDirective ||
533             precededByCommentOrPPDirective())
534           return false;
535         const FormatToken *Next = Tokens->peekNextToken();
536         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
537       }
538       nextToken();
539       addUnwrappedLine();
540       break;
541     case tok::kw_default: {
542       unsigned StoredPosition = Tokens->getPosition();
543       FormatToken *Next;
544       do {
545         Next = Tokens->getNextToken();
546         assert(Next);
547       } while (Next->is(tok::comment));
548       FormatTok = Tokens->setPosition(StoredPosition);
549       if (Next->isNot(tok::colon)) {
550         // default not followed by ':' is not a case label; treat it like
551         // an identifier.
552         parseStructuralElement();
553         break;
554       }
555       // Else, if it is 'default:', fall through to the case handling.
556       LLVM_FALLTHROUGH;
557     }
558     case tok::kw_case:
559       if (Style.isJavaScript() && Line->MustBeDeclaration) {
560         // A 'case: string' style field declaration.
561         parseStructuralElement();
562         break;
563       }
564       if (!SwitchLabelEncountered &&
565           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
566         ++Line->Level;
567       SwitchLabelEncountered = true;
568       parseStructuralElement();
569       break;
570     case tok::l_square:
571       if (Style.isCSharp()) {
572         nextToken();
573         parseCSharpAttribute();
574         break;
575       }
576       if (handleCppAttributes())
577         break;
578       LLVM_FALLTHROUGH;
579     default:
580       ParseDefault();
581       break;
582     }
583   } while (!eof());
584   return false;
585 }
586 
587 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
588   // We'll parse forward through the tokens until we hit
589   // a closing brace or eof - note that getNextToken() will
590   // parse macros, so this will magically work inside macro
591   // definitions, too.
592   unsigned StoredPosition = Tokens->getPosition();
593   FormatToken *Tok = FormatTok;
594   const FormatToken *PrevTok = Tok->Previous;
595   // Keep a stack of positions of lbrace tokens. We will
596   // update information about whether an lbrace starts a
597   // braced init list or a different block during the loop.
598   SmallVector<FormatToken *, 8> LBraceStack;
599   assert(Tok->is(tok::l_brace));
600   do {
601     // Get next non-comment token.
602     FormatToken *NextTok;
603     do {
604       NextTok = Tokens->getNextToken();
605     } while (NextTok->is(tok::comment));
606 
607     switch (Tok->Tok.getKind()) {
608     case tok::l_brace:
609       if (Style.isJavaScript() && PrevTok) {
610         if (PrevTok->isOneOf(tok::colon, tok::less))
611           // A ':' indicates this code is in a type, or a braced list
612           // following a label in an object literal ({a: {b: 1}}).
613           // A '<' could be an object used in a comparison, but that is nonsense
614           // code (can never return true), so more likely it is a generic type
615           // argument (`X<{a: string; b: number}>`).
616           // The code below could be confused by semicolons between the
617           // individual members in a type member list, which would normally
618           // trigger BK_Block. In both cases, this must be parsed as an inline
619           // braced init.
620           Tok->setBlockKind(BK_BracedInit);
621         else if (PrevTok->is(tok::r_paren))
622           // `) { }` can only occur in function or method declarations in JS.
623           Tok->setBlockKind(BK_Block);
624       } else {
625         Tok->setBlockKind(BK_Unknown);
626       }
627       LBraceStack.push_back(Tok);
628       break;
629     case tok::r_brace:
630       if (LBraceStack.empty())
631         break;
632       if (LBraceStack.back()->is(BK_Unknown)) {
633         bool ProbablyBracedList = false;
634         if (Style.Language == FormatStyle::LK_Proto) {
635           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
636         } else {
637           // Skip NextTok over preprocessor lines, otherwise we may not
638           // properly diagnose the block as a braced intializer
639           // if the comma separator appears after the pp directive.
640           while (NextTok->is(tok::hash)) {
641             ScopedMacroState MacroState(*Line, Tokens, NextTok);
642             do {
643               NextTok = Tokens->getNextToken();
644             } while (NextTok->isNot(tok::eof));
645           }
646 
647           // Using OriginalColumn to distinguish between ObjC methods and
648           // binary operators is a bit hacky.
649           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
650                                   NextTok->OriginalColumn == 0;
651 
652           // Try to detect a braced list. Note that regardless how we mark inner
653           // braces here, we will overwrite the BlockKind later if we parse a
654           // braced list (where all blocks inside are by default braced lists),
655           // or when we explicitly detect blocks (for example while parsing
656           // lambdas).
657 
658           // If we already marked the opening brace as braced list, the closing
659           // must also be part of it.
660           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
661 
662           ProbablyBracedList = ProbablyBracedList ||
663                                (Style.isJavaScript() &&
664                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
665                                                  Keywords.kw_as));
666           ProbablyBracedList = ProbablyBracedList ||
667                                (Style.isCpp() && NextTok->is(tok::l_paren));
668 
669           // If there is a comma, semicolon or right paren after the closing
670           // brace, we assume this is a braced initializer list.
671           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
672           // braced list in JS.
673           ProbablyBracedList =
674               ProbablyBracedList ||
675               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
676                                tok::r_paren, tok::r_square, tok::l_brace,
677                                tok::ellipsis);
678 
679           ProbablyBracedList =
680               ProbablyBracedList ||
681               (NextTok->is(tok::identifier) &&
682                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
683 
684           ProbablyBracedList = ProbablyBracedList ||
685                                (NextTok->is(tok::semi) &&
686                                 (!ExpectClassBody || LBraceStack.size() != 1));
687 
688           ProbablyBracedList =
689               ProbablyBracedList ||
690               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
691 
692           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
693             // We can have an array subscript after a braced init
694             // list, but C++11 attributes are expected after blocks.
695             NextTok = Tokens->getNextToken();
696             ProbablyBracedList = NextTok->isNot(tok::l_square);
697           }
698         }
699         if (ProbablyBracedList) {
700           Tok->setBlockKind(BK_BracedInit);
701           LBraceStack.back()->setBlockKind(BK_BracedInit);
702         } else {
703           Tok->setBlockKind(BK_Block);
704           LBraceStack.back()->setBlockKind(BK_Block);
705         }
706       }
707       LBraceStack.pop_back();
708       break;
709     case tok::identifier:
710       if (!Tok->is(TT_StatementMacro))
711         break;
712       LLVM_FALLTHROUGH;
713     case tok::at:
714     case tok::semi:
715     case tok::kw_if:
716     case tok::kw_while:
717     case tok::kw_for:
718     case tok::kw_switch:
719     case tok::kw_try:
720     case tok::kw___try:
721       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
722         LBraceStack.back()->setBlockKind(BK_Block);
723       break;
724     default:
725       break;
726     }
727     PrevTok = Tok;
728     Tok = NextTok;
729   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
730 
731   // Assume other blocks for all unclosed opening braces.
732   for (FormatToken *LBrace : LBraceStack)
733     if (LBrace->is(BK_Unknown))
734       LBrace->setBlockKind(BK_Block);
735 
736   FormatTok = Tokens->setPosition(StoredPosition);
737 }
738 
739 template <class T>
740 static inline void hash_combine(std::size_t &seed, const T &v) {
741   std::hash<T> hasher;
742   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
743 }
744 
745 size_t UnwrappedLineParser::computePPHash() const {
746   size_t h = 0;
747   for (const auto &i : PPStack) {
748     hash_combine(h, size_t(i.Kind));
749     hash_combine(h, i.Line);
750   }
751   return h;
752 }
753 
754 UnwrappedLineParser::IfStmtKind
755 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
756                                 bool MunchSemi, bool UnindentWhitesmithsBraces,
757                                 bool CanContainBracedList,
758                                 TokenType NextLBracesType) {
759   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
760          "'{' or macro block token expected");
761   FormatToken *Tok = FormatTok;
762   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
763   FormatTok->setBlockKind(BK_Block);
764 
765   // For Whitesmiths mode, jump to the next level prior to skipping over the
766   // braces.
767   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
768     ++Line->Level;
769 
770   size_t PPStartHash = computePPHash();
771 
772   unsigned InitialLevel = Line->Level;
773   nextToken(/*LevelDifference=*/AddLevels);
774 
775   if (MacroBlock && FormatTok->is(tok::l_paren))
776     parseParens();
777 
778   size_t NbPreprocessorDirectives =
779       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
780   addUnwrappedLine();
781   size_t OpeningLineIndex =
782       CurrentLines->empty()
783           ? (UnwrappedLine::kInvalidIndex)
784           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
785 
786   // Whitesmiths is weird here. The brace needs to be indented for the namespace
787   // block, but the block itself may not be indented depending on the style
788   // settings. This allows the format to back up one level in those cases.
789   if (UnindentWhitesmithsBraces)
790     --Line->Level;
791 
792   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
793                                           MustBeDeclaration);
794   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
795     Line->Level += AddLevels;
796 
797   IfStmtKind IfKind = IfStmtKind::NotIf;
798   const bool SimpleBlock = parseLevel(
799       /*HasOpeningBrace=*/true, CanContainBracedList, &IfKind, NextLBracesType);
800 
801   if (eof())
802     return IfKind;
803 
804   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
805                  : !FormatTok->is(tok::r_brace)) {
806     Line->Level = InitialLevel;
807     FormatTok->setBlockKind(BK_Block);
808     return IfKind;
809   }
810 
811   if (SimpleBlock && Tok->is(tok::l_brace)) {
812     assert(FormatTok->is(tok::r_brace));
813     const FormatToken *Previous = Tokens->getPreviousToken();
814     assert(Previous);
815     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
816       Tok->MatchingParen = FormatTok;
817       FormatTok->MatchingParen = Tok;
818     }
819   }
820 
821   size_t PPEndHash = computePPHash();
822 
823   // Munch the closing brace.
824   nextToken(/*LevelDifference=*/-AddLevels);
825 
826   if (MacroBlock && FormatTok->is(tok::l_paren))
827     parseParens();
828 
829   if (FormatTok->is(tok::kw_noexcept)) {
830     // A noexcept in a requires expression.
831     nextToken();
832   }
833 
834   if (FormatTok->is(tok::arrow)) {
835     // Following the } or noexcept we can find a trailing return type arrow
836     // as part of an implicit conversion constraint.
837     nextToken();
838     parseStructuralElement();
839   }
840 
841   if (MunchSemi && FormatTok->is(tok::semi))
842     nextToken();
843 
844   Line->Level = InitialLevel;
845 
846   if (PPStartHash == PPEndHash) {
847     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
848     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
849       // Update the opening line to add the forward reference as well
850       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
851           CurrentLines->size() - 1;
852     }
853   }
854 
855   return IfKind;
856 }
857 
858 static bool isGoogScope(const UnwrappedLine &Line) {
859   // FIXME: Closure-library specific stuff should not be hard-coded but be
860   // configurable.
861   if (Line.Tokens.size() < 4)
862     return false;
863   auto I = Line.Tokens.begin();
864   if (I->Tok->TokenText != "goog")
865     return false;
866   ++I;
867   if (I->Tok->isNot(tok::period))
868     return false;
869   ++I;
870   if (I->Tok->TokenText != "scope")
871     return false;
872   ++I;
873   return I->Tok->is(tok::l_paren);
874 }
875 
876 static bool isIIFE(const UnwrappedLine &Line,
877                    const AdditionalKeywords &Keywords) {
878   // Look for the start of an immediately invoked anonymous function.
879   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
880   // This is commonly done in JavaScript to create a new, anonymous scope.
881   // Example: (function() { ... })()
882   if (Line.Tokens.size() < 3)
883     return false;
884   auto I = Line.Tokens.begin();
885   if (I->Tok->isNot(tok::l_paren))
886     return false;
887   ++I;
888   if (I->Tok->isNot(Keywords.kw_function))
889     return false;
890   ++I;
891   return I->Tok->is(tok::l_paren);
892 }
893 
894 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
895                                    const FormatToken &InitialToken) {
896   tok::TokenKind Kind = InitialToken.Tok.getKind();
897   if (InitialToken.is(TT_NamespaceMacro))
898     Kind = tok::kw_namespace;
899 
900   switch (Kind) {
901   case tok::kw_namespace:
902     return Style.BraceWrapping.AfterNamespace;
903   case tok::kw_class:
904     return Style.BraceWrapping.AfterClass;
905   case tok::kw_union:
906     return Style.BraceWrapping.AfterUnion;
907   case tok::kw_struct:
908     return Style.BraceWrapping.AfterStruct;
909   case tok::kw_enum:
910     return Style.BraceWrapping.AfterEnum;
911   default:
912     return false;
913   }
914 }
915 
916 void UnwrappedLineParser::parseChildBlock(
917     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
918   FormatTok->setBlockKind(BK_Block);
919   nextToken();
920   {
921     bool SkipIndent = (Style.isJavaScript() &&
922                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
923     ScopedLineState LineState(*this);
924     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
925                                             /*MustBeDeclaration=*/false);
926     Line->Level += SkipIndent ? 0 : 1;
927     parseLevel(/*HasOpeningBrace=*/true, CanContainBracedList,
928                /*IfKind=*/nullptr, NextLBracesType);
929     flushComments(isOnNewLine(*FormatTok));
930     Line->Level -= SkipIndent ? 0 : 1;
931   }
932   nextToken();
933 }
934 
935 void UnwrappedLineParser::parsePPDirective() {
936   assert(FormatTok->is(tok::hash) && "'#' expected");
937   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
938 
939   nextToken();
940 
941   if (!FormatTok->Tok.getIdentifierInfo()) {
942     parsePPUnknown();
943     return;
944   }
945 
946   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
947   case tok::pp_define:
948     parsePPDefine();
949     return;
950   case tok::pp_if:
951     parsePPIf(/*IfDef=*/false);
952     break;
953   case tok::pp_ifdef:
954   case tok::pp_ifndef:
955     parsePPIf(/*IfDef=*/true);
956     break;
957   case tok::pp_else:
958     parsePPElse();
959     break;
960   case tok::pp_elifdef:
961   case tok::pp_elifndef:
962   case tok::pp_elif:
963     parsePPElIf();
964     break;
965   case tok::pp_endif:
966     parsePPEndIf();
967     break;
968   default:
969     parsePPUnknown();
970     break;
971   }
972 }
973 
974 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
975   size_t Line = CurrentLines->size();
976   if (CurrentLines == &PreprocessorDirectives)
977     Line += Lines.size();
978 
979   if (Unreachable ||
980       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
981     PPStack.push_back({PP_Unreachable, Line});
982   else
983     PPStack.push_back({PP_Conditional, Line});
984 }
985 
986 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
987   ++PPBranchLevel;
988   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
989   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
990     PPLevelBranchIndex.push_back(0);
991     PPLevelBranchCount.push_back(0);
992   }
993   PPChainBranchIndex.push(0);
994   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
995   conditionalCompilationCondition(Unreachable || Skip);
996 }
997 
998 void UnwrappedLineParser::conditionalCompilationAlternative() {
999   if (!PPStack.empty())
1000     PPStack.pop_back();
1001   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1002   if (!PPChainBranchIndex.empty())
1003     ++PPChainBranchIndex.top();
1004   conditionalCompilationCondition(
1005       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1006       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1007 }
1008 
1009 void UnwrappedLineParser::conditionalCompilationEnd() {
1010   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1011   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1012     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1013       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1014   }
1015   // Guard against #endif's without #if.
1016   if (PPBranchLevel > -1)
1017     --PPBranchLevel;
1018   if (!PPChainBranchIndex.empty())
1019     PPChainBranchIndex.pop();
1020   if (!PPStack.empty())
1021     PPStack.pop_back();
1022 }
1023 
1024 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1025   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1026   nextToken();
1027   bool Unreachable = false;
1028   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1029     Unreachable = true;
1030   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1031     Unreachable = true;
1032   conditionalCompilationStart(Unreachable);
1033   FormatToken *IfCondition = FormatTok;
1034   // If there's a #ifndef on the first line, and the only lines before it are
1035   // comments, it could be an include guard.
1036   bool MaybeIncludeGuard = IfNDef;
1037   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
1038     for (auto &Line : Lines) {
1039       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1040         MaybeIncludeGuard = false;
1041         IncludeGuard = IG_Rejected;
1042         break;
1043       }
1044     }
1045   --PPBranchLevel;
1046   parsePPUnknown();
1047   ++PPBranchLevel;
1048   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1049     IncludeGuard = IG_IfNdefed;
1050     IncludeGuardToken = IfCondition;
1051   }
1052 }
1053 
1054 void UnwrappedLineParser::parsePPElse() {
1055   // If a potential include guard has an #else, it's not an include guard.
1056   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1057     IncludeGuard = IG_Rejected;
1058   conditionalCompilationAlternative();
1059   if (PPBranchLevel > -1)
1060     --PPBranchLevel;
1061   parsePPUnknown();
1062   ++PPBranchLevel;
1063 }
1064 
1065 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1066 
1067 void UnwrappedLineParser::parsePPEndIf() {
1068   conditionalCompilationEnd();
1069   parsePPUnknown();
1070   // If the #endif of a potential include guard is the last thing in the file,
1071   // then we found an include guard.
1072   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1073       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1074     IncludeGuard = IG_Found;
1075 }
1076 
1077 void UnwrappedLineParser::parsePPDefine() {
1078   nextToken();
1079 
1080   if (!FormatTok->Tok.getIdentifierInfo()) {
1081     IncludeGuard = IG_Rejected;
1082     IncludeGuardToken = nullptr;
1083     parsePPUnknown();
1084     return;
1085   }
1086 
1087   if (IncludeGuard == IG_IfNdefed &&
1088       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1089     IncludeGuard = IG_Defined;
1090     IncludeGuardToken = nullptr;
1091     for (auto &Line : Lines) {
1092       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1093         IncludeGuard = IG_Rejected;
1094         break;
1095       }
1096     }
1097   }
1098 
1099   // In the context of a define, even keywords should be treated as normal
1100   // identifiers. Setting the kind to identifier is not enough, because we need
1101   // to treat additional keywords like __except as well, which are already
1102   // identifiers. Setting the identifier info to null interferes with include
1103   // guard processing above, and changes preprocessing nesting.
1104   FormatTok->Tok.setKind(tok::identifier);
1105   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1106   nextToken();
1107   if (FormatTok->Tok.getKind() == tok::l_paren &&
1108       !FormatTok->hasWhitespaceBefore())
1109     parseParens();
1110   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1111     Line->Level += PPBranchLevel + 1;
1112   addUnwrappedLine();
1113   ++Line->Level;
1114 
1115   // Errors during a preprocessor directive can only affect the layout of the
1116   // preprocessor directive, and thus we ignore them. An alternative approach
1117   // would be to use the same approach we use on the file level (no
1118   // re-indentation if there was a structural error) within the macro
1119   // definition.
1120   parseFile();
1121 }
1122 
1123 void UnwrappedLineParser::parsePPUnknown() {
1124   do {
1125     nextToken();
1126   } while (!eof());
1127   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1128     Line->Level += PPBranchLevel + 1;
1129   addUnwrappedLine();
1130 }
1131 
1132 // Here we exclude certain tokens that are not usually the first token in an
1133 // unwrapped line. This is used in attempt to distinguish macro calls without
1134 // trailing semicolons from other constructs split to several lines.
1135 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1136   // Semicolon can be a null-statement, l_square can be a start of a macro or
1137   // a C++11 attribute, but this doesn't seem to be common.
1138   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1139          Tok.isNot(TT_AttributeSquare) &&
1140          // Tokens that can only be used as binary operators and a part of
1141          // overloaded operator names.
1142          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1143          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1144          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1145          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1146          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1147          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1148          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1149          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1150          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1151          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1152          Tok.isNot(tok::lesslessequal) &&
1153          // Colon is used in labels, base class lists, initializer lists,
1154          // range-based for loops, ternary operator, but should never be the
1155          // first token in an unwrapped line.
1156          Tok.isNot(tok::colon) &&
1157          // 'noexcept' is a trailing annotation.
1158          Tok.isNot(tok::kw_noexcept);
1159 }
1160 
1161 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1162                           const FormatToken *FormatTok) {
1163   // FIXME: This returns true for C/C++ keywords like 'struct'.
1164   return FormatTok->is(tok::identifier) &&
1165          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1166           !FormatTok->isOneOf(
1167               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1168               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1169               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1170               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1171               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1172               Keywords.kw_instanceof, Keywords.kw_interface,
1173               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1174 }
1175 
1176 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1177                                  const FormatToken *FormatTok) {
1178   return FormatTok->Tok.isLiteral() ||
1179          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1180          mustBeJSIdent(Keywords, FormatTok);
1181 }
1182 
1183 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1184 // when encountered after a value (see mustBeJSIdentOrValue).
1185 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1186                            const FormatToken *FormatTok) {
1187   return FormatTok->isOneOf(
1188       tok::kw_return, Keywords.kw_yield,
1189       // conditionals
1190       tok::kw_if, tok::kw_else,
1191       // loops
1192       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1193       // switch/case
1194       tok::kw_switch, tok::kw_case,
1195       // exceptions
1196       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1197       // declaration
1198       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1199       Keywords.kw_async, Keywords.kw_function,
1200       // import/export
1201       Keywords.kw_import, tok::kw_export);
1202 }
1203 
1204 // Checks whether a token is a type in K&R C (aka C78).
1205 static bool isC78Type(const FormatToken &Tok) {
1206   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1207                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1208                      tok::identifier);
1209 }
1210 
1211 // This function checks whether a token starts the first parameter declaration
1212 // in a K&R C (aka C78) function definition, e.g.:
1213 //   int f(a, b)
1214 //   short a, b;
1215 //   {
1216 //      return a + b;
1217 //   }
1218 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1219                                const FormatToken *FuncName) {
1220   assert(Tok);
1221   assert(Next);
1222   assert(FuncName);
1223 
1224   if (FuncName->isNot(tok::identifier))
1225     return false;
1226 
1227   const FormatToken *Prev = FuncName->Previous;
1228   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1229     return false;
1230 
1231   if (!isC78Type(*Tok) &&
1232       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1233     return false;
1234 
1235   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1236     return false;
1237 
1238   Tok = Tok->Previous;
1239   if (!Tok || Tok->isNot(tok::r_paren))
1240     return false;
1241 
1242   Tok = Tok->Previous;
1243   if (!Tok || Tok->isNot(tok::identifier))
1244     return false;
1245 
1246   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1247 }
1248 
1249 void UnwrappedLineParser::parseModuleImport() {
1250   nextToken();
1251   while (!eof()) {
1252     if (FormatTok->is(tok::colon)) {
1253       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1254     }
1255     // Handle import <foo/bar.h> as we would an include statement.
1256     else if (FormatTok->is(tok::less)) {
1257       nextToken();
1258       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1259         // Mark tokens up to the trailing line comments as implicit string
1260         // literals.
1261         if (FormatTok->isNot(tok::comment) &&
1262             !FormatTok->TokenText.startswith("//"))
1263           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1264         nextToken();
1265       }
1266     }
1267     if (FormatTok->is(tok::semi)) {
1268       nextToken();
1269       break;
1270     }
1271     nextToken();
1272   }
1273 
1274   addUnwrappedLine();
1275 }
1276 
1277 // readTokenWithJavaScriptASI reads the next token and terminates the current
1278 // line if JavaScript Automatic Semicolon Insertion must
1279 // happen between the current token and the next token.
1280 //
1281 // This method is conservative - it cannot cover all edge cases of JavaScript,
1282 // but only aims to correctly handle certain well known cases. It *must not*
1283 // return true in speculative cases.
1284 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1285   FormatToken *Previous = FormatTok;
1286   readToken();
1287   FormatToken *Next = FormatTok;
1288 
1289   bool IsOnSameLine =
1290       CommentsBeforeNextToken.empty()
1291           ? Next->NewlinesBefore == 0
1292           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1293   if (IsOnSameLine)
1294     return;
1295 
1296   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1297   bool PreviousStartsTemplateExpr =
1298       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1299   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1300     // If the line contains an '@' sign, the previous token might be an
1301     // annotation, which can precede another identifier/value.
1302     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1303       return LineNode.Tok->is(tok::at);
1304     });
1305     if (HasAt)
1306       return;
1307   }
1308   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1309     return addUnwrappedLine();
1310   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1311   bool NextEndsTemplateExpr =
1312       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1313   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1314       (PreviousMustBeValue ||
1315        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1316                          tok::minusminus)))
1317     return addUnwrappedLine();
1318   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1319       isJSDeclOrStmt(Keywords, Next))
1320     return addUnwrappedLine();
1321 }
1322 
1323 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1324                                                  bool IsTopLevel,
1325                                                  TokenType NextLBracesType,
1326                                                  bool *HasLabel) {
1327   if (Style.Language == FormatStyle::LK_TableGen &&
1328       FormatTok->is(tok::pp_include)) {
1329     nextToken();
1330     if (FormatTok->is(tok::string_literal))
1331       nextToken();
1332     addUnwrappedLine();
1333     return;
1334   }
1335   switch (FormatTok->Tok.getKind()) {
1336   case tok::kw_asm:
1337     nextToken();
1338     if (FormatTok->is(tok::l_brace)) {
1339       FormatTok->setFinalizedType(TT_InlineASMBrace);
1340       nextToken();
1341       while (FormatTok && FormatTok->isNot(tok::eof)) {
1342         if (FormatTok->is(tok::r_brace)) {
1343           FormatTok->setFinalizedType(TT_InlineASMBrace);
1344           nextToken();
1345           addUnwrappedLine();
1346           break;
1347         }
1348         FormatTok->Finalized = true;
1349         nextToken();
1350       }
1351     }
1352     break;
1353   case tok::kw_namespace:
1354     parseNamespace();
1355     return;
1356   case tok::kw_public:
1357   case tok::kw_protected:
1358   case tok::kw_private:
1359     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1360         Style.isCSharp())
1361       nextToken();
1362     else
1363       parseAccessSpecifier();
1364     return;
1365   case tok::kw_if:
1366     if (Style.isJavaScript() && Line->MustBeDeclaration)
1367       // field/method declaration.
1368       break;
1369     parseIfThenElse(IfKind);
1370     return;
1371   case tok::kw_for:
1372   case tok::kw_while:
1373     if (Style.isJavaScript() && Line->MustBeDeclaration)
1374       // field/method declaration.
1375       break;
1376     parseForOrWhileLoop();
1377     return;
1378   case tok::kw_do:
1379     if (Style.isJavaScript() && Line->MustBeDeclaration)
1380       // field/method declaration.
1381       break;
1382     parseDoWhile();
1383     return;
1384   case tok::kw_switch:
1385     if (Style.isJavaScript() && Line->MustBeDeclaration)
1386       // 'switch: string' field declaration.
1387       break;
1388     parseSwitch();
1389     return;
1390   case tok::kw_default:
1391     if (Style.isJavaScript() && Line->MustBeDeclaration)
1392       // 'default: string' field declaration.
1393       break;
1394     nextToken();
1395     if (FormatTok->is(tok::colon)) {
1396       parseLabel();
1397       return;
1398     }
1399     // e.g. "default void f() {}" in a Java interface.
1400     break;
1401   case tok::kw_case:
1402     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1403       // 'case: string' field declaration.
1404       nextToken();
1405       break;
1406     }
1407     parseCaseLabel();
1408     return;
1409   case tok::kw_try:
1410   case tok::kw___try:
1411     if (Style.isJavaScript() && Line->MustBeDeclaration)
1412       // field/method declaration.
1413       break;
1414     parseTryCatch();
1415     return;
1416   case tok::kw_extern:
1417     nextToken();
1418     if (FormatTok->is(tok::string_literal)) {
1419       nextToken();
1420       if (FormatTok->is(tok::l_brace)) {
1421         if (Style.BraceWrapping.AfterExternBlock)
1422           addUnwrappedLine();
1423         // Either we indent or for backwards compatibility we follow the
1424         // AfterExternBlock style.
1425         unsigned AddLevels =
1426             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1427                     (Style.BraceWrapping.AfterExternBlock &&
1428                      Style.IndentExternBlock ==
1429                          FormatStyle::IEBS_AfterExternBlock)
1430                 ? 1u
1431                 : 0u;
1432         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1433         addUnwrappedLine();
1434         return;
1435       }
1436     }
1437     break;
1438   case tok::kw_export:
1439     if (Style.isJavaScript()) {
1440       parseJavaScriptEs6ImportExport();
1441       return;
1442     }
1443     if (!Style.isCpp())
1444       break;
1445     // Handle C++ "(inline|export) namespace".
1446     LLVM_FALLTHROUGH;
1447   case tok::kw_inline:
1448     nextToken();
1449     if (FormatTok->is(tok::kw_namespace)) {
1450       parseNamespace();
1451       return;
1452     }
1453     break;
1454   case tok::identifier:
1455     if (FormatTok->is(TT_ForEachMacro)) {
1456       parseForOrWhileLoop();
1457       return;
1458     }
1459     if (FormatTok->is(TT_MacroBlockBegin)) {
1460       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1461                  /*MunchSemi=*/false);
1462       return;
1463     }
1464     if (FormatTok->is(Keywords.kw_import)) {
1465       if (Style.isJavaScript()) {
1466         parseJavaScriptEs6ImportExport();
1467         return;
1468       }
1469       if (Style.Language == FormatStyle::LK_Proto) {
1470         nextToken();
1471         if (FormatTok->is(tok::kw_public))
1472           nextToken();
1473         if (!FormatTok->is(tok::string_literal))
1474           return;
1475         nextToken();
1476         if (FormatTok->is(tok::semi))
1477           nextToken();
1478         addUnwrappedLine();
1479         return;
1480       }
1481       if (Style.isCpp()) {
1482         parseModuleImport();
1483         return;
1484       }
1485     }
1486     if (Style.isCpp() &&
1487         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1488                            Keywords.kw_slots, Keywords.kw_qslots)) {
1489       nextToken();
1490       if (FormatTok->is(tok::colon)) {
1491         nextToken();
1492         addUnwrappedLine();
1493         return;
1494       }
1495     }
1496     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1497       parseStatementMacro();
1498       return;
1499     }
1500     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1501       parseNamespace();
1502       return;
1503     }
1504     // In all other cases, parse the declaration.
1505     break;
1506   default:
1507     break;
1508   }
1509   do {
1510     const FormatToken *Previous = FormatTok->Previous;
1511     switch (FormatTok->Tok.getKind()) {
1512     case tok::at:
1513       nextToken();
1514       if (FormatTok->is(tok::l_brace)) {
1515         nextToken();
1516         parseBracedList();
1517         break;
1518       } else if (Style.Language == FormatStyle::LK_Java &&
1519                  FormatTok->is(Keywords.kw_interface)) {
1520         nextToken();
1521         break;
1522       }
1523       switch (FormatTok->Tok.getObjCKeywordID()) {
1524       case tok::objc_public:
1525       case tok::objc_protected:
1526       case tok::objc_package:
1527       case tok::objc_private:
1528         return parseAccessSpecifier();
1529       case tok::objc_interface:
1530       case tok::objc_implementation:
1531         return parseObjCInterfaceOrImplementation();
1532       case tok::objc_protocol:
1533         if (parseObjCProtocol())
1534           return;
1535         break;
1536       case tok::objc_end:
1537         return; // Handled by the caller.
1538       case tok::objc_optional:
1539       case tok::objc_required:
1540         nextToken();
1541         addUnwrappedLine();
1542         return;
1543       case tok::objc_autoreleasepool:
1544         nextToken();
1545         if (FormatTok->is(tok::l_brace)) {
1546           if (Style.BraceWrapping.AfterControlStatement ==
1547               FormatStyle::BWACS_Always)
1548             addUnwrappedLine();
1549           parseBlock();
1550         }
1551         addUnwrappedLine();
1552         return;
1553       case tok::objc_synchronized:
1554         nextToken();
1555         if (FormatTok->is(tok::l_paren))
1556           // Skip synchronization object
1557           parseParens();
1558         if (FormatTok->is(tok::l_brace)) {
1559           if (Style.BraceWrapping.AfterControlStatement ==
1560               FormatStyle::BWACS_Always)
1561             addUnwrappedLine();
1562           parseBlock();
1563         }
1564         addUnwrappedLine();
1565         return;
1566       case tok::objc_try:
1567         // This branch isn't strictly necessary (the kw_try case below would
1568         // do this too after the tok::at is parsed above).  But be explicit.
1569         parseTryCatch();
1570         return;
1571       default:
1572         break;
1573       }
1574       break;
1575     case tok::kw_concept:
1576       parseConcept();
1577       return;
1578     case tok::kw_requires: {
1579       if (Style.isCpp()) {
1580         bool ParsedClause = parseRequires();
1581         if (ParsedClause)
1582           return;
1583       } else {
1584         nextToken();
1585       }
1586       break;
1587     }
1588     case tok::kw_enum:
1589       // Ignore if this is part of "template <enum ...".
1590       if (Previous && Previous->is(tok::less)) {
1591         nextToken();
1592         break;
1593       }
1594 
1595       // parseEnum falls through and does not yet add an unwrapped line as an
1596       // enum definition can start a structural element.
1597       if (!parseEnum())
1598         break;
1599       // This only applies for C++.
1600       if (!Style.isCpp()) {
1601         addUnwrappedLine();
1602         return;
1603       }
1604       break;
1605     case tok::kw_typedef:
1606       nextToken();
1607       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1608                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1609                              Keywords.kw_CF_CLOSED_ENUM,
1610                              Keywords.kw_NS_CLOSED_ENUM))
1611         parseEnum();
1612       break;
1613     case tok::kw_struct:
1614     case tok::kw_union:
1615     case tok::kw_class:
1616       if (parseStructLike())
1617         return;
1618       break;
1619     case tok::period:
1620       nextToken();
1621       // In Java, classes have an implicit static member "class".
1622       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1623           FormatTok->is(tok::kw_class))
1624         nextToken();
1625       if (Style.isJavaScript() && FormatTok &&
1626           FormatTok->Tok.getIdentifierInfo())
1627         // JavaScript only has pseudo keywords, all keywords are allowed to
1628         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1629         nextToken();
1630       break;
1631     case tok::semi:
1632       nextToken();
1633       addUnwrappedLine();
1634       return;
1635     case tok::r_brace:
1636       addUnwrappedLine();
1637       return;
1638     case tok::l_paren: {
1639       parseParens();
1640       // Break the unwrapped line if a K&R C function definition has a parameter
1641       // declaration.
1642       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1643         break;
1644       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1645         addUnwrappedLine();
1646         return;
1647       }
1648       break;
1649     }
1650     case tok::kw_operator:
1651       nextToken();
1652       if (FormatTok->isBinaryOperator())
1653         nextToken();
1654       break;
1655     case tok::caret:
1656       nextToken();
1657       if (FormatTok->Tok.isAnyIdentifier() ||
1658           FormatTok->isSimpleTypeSpecifier())
1659         nextToken();
1660       if (FormatTok->is(tok::l_paren))
1661         parseParens();
1662       if (FormatTok->is(tok::l_brace))
1663         parseChildBlock();
1664       break;
1665     case tok::l_brace:
1666       if (NextLBracesType != TT_Unknown)
1667         FormatTok->setFinalizedType(NextLBracesType);
1668       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1669         // A block outside of parentheses must be the last part of a
1670         // structural element.
1671         // FIXME: Figure out cases where this is not true, and add projections
1672         // for them (the one we know is missing are lambdas).
1673         if (Style.Language == FormatStyle::LK_Java &&
1674             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1675           // If necessary, we could set the type to something different than
1676           // TT_FunctionLBrace.
1677           if (Style.BraceWrapping.AfterControlStatement ==
1678               FormatStyle::BWACS_Always)
1679             addUnwrappedLine();
1680         } else if (Style.BraceWrapping.AfterFunction) {
1681           addUnwrappedLine();
1682         }
1683         if (!Line->InPPDirective)
1684           FormatTok->setFinalizedType(TT_FunctionLBrace);
1685         parseBlock();
1686         addUnwrappedLine();
1687         return;
1688       }
1689       // Otherwise this was a braced init list, and the structural
1690       // element continues.
1691       break;
1692     case tok::kw_try:
1693       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1694         // field/method declaration.
1695         nextToken();
1696         break;
1697       }
1698       // We arrive here when parsing function-try blocks.
1699       if (Style.BraceWrapping.AfterFunction)
1700         addUnwrappedLine();
1701       parseTryCatch();
1702       return;
1703     case tok::identifier: {
1704       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1705           Line->MustBeDeclaration) {
1706         addUnwrappedLine();
1707         parseCSharpGenericTypeConstraint();
1708         break;
1709       }
1710       if (FormatTok->is(TT_MacroBlockEnd)) {
1711         addUnwrappedLine();
1712         return;
1713       }
1714 
1715       // Function declarations (as opposed to function expressions) are parsed
1716       // on their own unwrapped line by continuing this loop. Function
1717       // expressions (functions that are not on their own line) must not create
1718       // a new unwrapped line, so they are special cased below.
1719       size_t TokenCount = Line->Tokens.size();
1720       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1721           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1722                                                      Keywords.kw_async)))) {
1723         tryToParseJSFunction();
1724         break;
1725       }
1726       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1727           FormatTok->is(Keywords.kw_interface)) {
1728         if (Style.isJavaScript()) {
1729           // In JavaScript/TypeScript, "interface" can be used as a standalone
1730           // identifier, e.g. in `var interface = 1;`. If "interface" is
1731           // followed by another identifier, it is very like to be an actual
1732           // interface declaration.
1733           unsigned StoredPosition = Tokens->getPosition();
1734           FormatToken *Next = Tokens->getNextToken();
1735           FormatTok = Tokens->setPosition(StoredPosition);
1736           if (!mustBeJSIdent(Keywords, Next)) {
1737             nextToken();
1738             break;
1739           }
1740         }
1741         parseRecord();
1742         addUnwrappedLine();
1743         return;
1744       }
1745 
1746       if (FormatTok->is(Keywords.kw_interface)) {
1747         if (parseStructLike())
1748           return;
1749         break;
1750       }
1751 
1752       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1753         parseStatementMacro();
1754         return;
1755       }
1756 
1757       // See if the following token should start a new unwrapped line.
1758       StringRef Text = FormatTok->TokenText;
1759 
1760       FormatToken *PreviousToken = FormatTok;
1761       nextToken();
1762 
1763       // JS doesn't have macros, and within classes colons indicate fields, not
1764       // labels.
1765       if (Style.isJavaScript())
1766         break;
1767 
1768       TokenCount = Line->Tokens.size();
1769       if (TokenCount == 1 ||
1770           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1771         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1772           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1773           parseLabel(!Style.IndentGotoLabels);
1774           if (HasLabel)
1775             *HasLabel = true;
1776           return;
1777         }
1778         // Recognize function-like macro usages without trailing semicolon as
1779         // well as free-standing macros like Q_OBJECT.
1780         bool FunctionLike = FormatTok->is(tok::l_paren);
1781         if (FunctionLike)
1782           parseParens();
1783 
1784         bool FollowedByNewline =
1785             CommentsBeforeNextToken.empty()
1786                 ? FormatTok->NewlinesBefore > 0
1787                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1788 
1789         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1790             tokenCanStartNewLine(*FormatTok) && Text == Text.upper() &&
1791             !PreviousToken->isTypeFinalized()) {
1792           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1793           addUnwrappedLine();
1794           return;
1795         }
1796       }
1797       break;
1798     }
1799     case tok::equal:
1800       if ((Style.isJavaScript() || Style.isCSharp()) &&
1801           FormatTok->is(TT_FatArrow)) {
1802         tryToParseChildBlock();
1803         break;
1804       }
1805 
1806       nextToken();
1807       if (FormatTok->is(tok::l_brace)) {
1808         // Block kind should probably be set to BK_BracedInit for any language.
1809         // C# needs this change to ensure that array initialisers and object
1810         // initialisers are indented the same way.
1811         if (Style.isCSharp())
1812           FormatTok->setBlockKind(BK_BracedInit);
1813         nextToken();
1814         parseBracedList();
1815       } else if (Style.Language == FormatStyle::LK_Proto &&
1816                  FormatTok->is(tok::less)) {
1817         nextToken();
1818         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1819                         /*ClosingBraceKind=*/tok::greater);
1820       }
1821       break;
1822     case tok::l_square:
1823       parseSquare();
1824       break;
1825     case tok::kw_new:
1826       parseNew();
1827       break;
1828     case tok::kw_case:
1829       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1830         // 'case: string' field declaration.
1831         nextToken();
1832         break;
1833       }
1834       parseCaseLabel();
1835       break;
1836     default:
1837       nextToken();
1838       break;
1839     }
1840   } while (!eof());
1841 }
1842 
1843 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1844   assert(FormatTok->is(tok::l_brace));
1845   if (!Style.isCSharp())
1846     return false;
1847   // See if it's a property accessor.
1848   if (FormatTok->Previous->isNot(tok::identifier))
1849     return false;
1850 
1851   // See if we are inside a property accessor.
1852   //
1853   // Record the current tokenPosition so that we can advance and
1854   // reset the current token. `Next` is not set yet so we need
1855   // another way to advance along the token stream.
1856   unsigned int StoredPosition = Tokens->getPosition();
1857   FormatToken *Tok = Tokens->getNextToken();
1858 
1859   // A trivial property accessor is of the form:
1860   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1861   // Track these as they do not require line breaks to be introduced.
1862   bool HasSpecialAccessor = false;
1863   bool IsTrivialPropertyAccessor = true;
1864   while (!eof()) {
1865     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1866                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1867                      Keywords.kw_init, Keywords.kw_set)) {
1868       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1869         HasSpecialAccessor = true;
1870       Tok = Tokens->getNextToken();
1871       continue;
1872     }
1873     if (Tok->isNot(tok::r_brace))
1874       IsTrivialPropertyAccessor = false;
1875     break;
1876   }
1877 
1878   if (!HasSpecialAccessor) {
1879     Tokens->setPosition(StoredPosition);
1880     return false;
1881   }
1882 
1883   // Try to parse the property accessor:
1884   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1885   Tokens->setPosition(StoredPosition);
1886   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1887     addUnwrappedLine();
1888   nextToken();
1889   do {
1890     switch (FormatTok->Tok.getKind()) {
1891     case tok::r_brace:
1892       nextToken();
1893       if (FormatTok->is(tok::equal)) {
1894         while (!eof() && FormatTok->isNot(tok::semi))
1895           nextToken();
1896         nextToken();
1897       }
1898       addUnwrappedLine();
1899       return true;
1900     case tok::l_brace:
1901       ++Line->Level;
1902       parseBlock(/*MustBeDeclaration=*/true);
1903       addUnwrappedLine();
1904       --Line->Level;
1905       break;
1906     case tok::equal:
1907       if (FormatTok->is(TT_FatArrow)) {
1908         ++Line->Level;
1909         do {
1910           nextToken();
1911         } while (!eof() && FormatTok->isNot(tok::semi));
1912         nextToken();
1913         addUnwrappedLine();
1914         --Line->Level;
1915         break;
1916       }
1917       nextToken();
1918       break;
1919     default:
1920       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
1921                              Keywords.kw_set) &&
1922           !IsTrivialPropertyAccessor) {
1923         // Non-trivial get/set needs to be on its own line.
1924         addUnwrappedLine();
1925       }
1926       nextToken();
1927     }
1928   } while (!eof());
1929 
1930   // Unreachable for well-formed code (paired '{' and '}').
1931   return true;
1932 }
1933 
1934 bool UnwrappedLineParser::tryToParseLambda() {
1935   assert(FormatTok->is(tok::l_square));
1936   if (!Style.isCpp()) {
1937     nextToken();
1938     return false;
1939   }
1940   FormatToken &LSquare = *FormatTok;
1941   if (!tryToParseLambdaIntroducer())
1942     return false;
1943 
1944   bool SeenArrow = false;
1945   bool InTemplateParameterList = false;
1946 
1947   while (FormatTok->isNot(tok::l_brace)) {
1948     if (FormatTok->isSimpleTypeSpecifier()) {
1949       nextToken();
1950       continue;
1951     }
1952     switch (FormatTok->Tok.getKind()) {
1953     case tok::l_brace:
1954       break;
1955     case tok::l_paren:
1956       parseParens();
1957       break;
1958     case tok::l_square:
1959       parseSquare();
1960       break;
1961     case tok::kw_class:
1962     case tok::kw_template:
1963     case tok::kw_typename:
1964       assert(FormatTok->Previous);
1965       if (FormatTok->Previous->is(tok::less))
1966         InTemplateParameterList = true;
1967       nextToken();
1968       break;
1969     case tok::amp:
1970     case tok::star:
1971     case tok::kw_const:
1972     case tok::comma:
1973     case tok::less:
1974     case tok::greater:
1975     case tok::identifier:
1976     case tok::numeric_constant:
1977     case tok::coloncolon:
1978     case tok::kw_mutable:
1979     case tok::kw_noexcept:
1980       nextToken();
1981       break;
1982     // Specialization of a template with an integer parameter can contain
1983     // arithmetic, logical, comparison and ternary operators.
1984     //
1985     // FIXME: This also accepts sequences of operators that are not in the scope
1986     // of a template argument list.
1987     //
1988     // In a C++ lambda a template type can only occur after an arrow. We use
1989     // this as an heuristic to distinguish between Objective-C expressions
1990     // followed by an `a->b` expression, such as:
1991     // ([obj func:arg] + a->b)
1992     // Otherwise the code below would parse as a lambda.
1993     //
1994     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1995     // explicit template lists: []<bool b = true && false>(U &&u){}
1996     case tok::plus:
1997     case tok::minus:
1998     case tok::exclaim:
1999     case tok::tilde:
2000     case tok::slash:
2001     case tok::percent:
2002     case tok::lessless:
2003     case tok::pipe:
2004     case tok::pipepipe:
2005     case tok::ampamp:
2006     case tok::caret:
2007     case tok::equalequal:
2008     case tok::exclaimequal:
2009     case tok::greaterequal:
2010     case tok::lessequal:
2011     case tok::question:
2012     case tok::colon:
2013     case tok::ellipsis:
2014     case tok::kw_true:
2015     case tok::kw_false:
2016       if (SeenArrow || InTemplateParameterList) {
2017         nextToken();
2018         break;
2019       }
2020       return true;
2021     case tok::arrow:
2022       // This might or might not actually be a lambda arrow (this could be an
2023       // ObjC method invocation followed by a dereferencing arrow). We might
2024       // reset this back to TT_Unknown in TokenAnnotator.
2025       FormatTok->setFinalizedType(TT_LambdaArrow);
2026       SeenArrow = true;
2027       nextToken();
2028       break;
2029     default:
2030       return true;
2031     }
2032   }
2033   FormatTok->setFinalizedType(TT_LambdaLBrace);
2034   LSquare.setFinalizedType(TT_LambdaLSquare);
2035   parseChildBlock();
2036   return true;
2037 }
2038 
2039 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2040   const FormatToken *Previous = FormatTok->Previous;
2041   const FormatToken *LeftSquare = FormatTok;
2042   nextToken();
2043   if (Previous &&
2044       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2045                          tok::kw_delete, tok::l_square) ||
2046        LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2047        Previous->isSimpleTypeSpecifier())) {
2048     return false;
2049   }
2050   if (FormatTok->is(tok::l_square))
2051     return false;
2052   if (FormatTok->is(tok::r_square)) {
2053     const FormatToken *Next = Tokens->peekNextToken();
2054     if (Next->is(tok::greater))
2055       return false;
2056   }
2057   parseSquare(/*LambdaIntroducer=*/true);
2058   return true;
2059 }
2060 
2061 void UnwrappedLineParser::tryToParseJSFunction() {
2062   assert(FormatTok->is(Keywords.kw_function) ||
2063          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2064   if (FormatTok->is(Keywords.kw_async))
2065     nextToken();
2066   // Consume "function".
2067   nextToken();
2068 
2069   // Consume * (generator function). Treat it like C++'s overloaded operators.
2070   if (FormatTok->is(tok::star)) {
2071     FormatTok->setFinalizedType(TT_OverloadedOperator);
2072     nextToken();
2073   }
2074 
2075   // Consume function name.
2076   if (FormatTok->is(tok::identifier))
2077     nextToken();
2078 
2079   if (FormatTok->isNot(tok::l_paren))
2080     return;
2081 
2082   // Parse formal parameter list.
2083   parseParens();
2084 
2085   if (FormatTok->is(tok::colon)) {
2086     // Parse a type definition.
2087     nextToken();
2088 
2089     // Eat the type declaration. For braced inline object types, balance braces,
2090     // otherwise just parse until finding an l_brace for the function body.
2091     if (FormatTok->is(tok::l_brace))
2092       tryToParseBracedList();
2093     else
2094       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2095         nextToken();
2096   }
2097 
2098   if (FormatTok->is(tok::semi))
2099     return;
2100 
2101   parseChildBlock();
2102 }
2103 
2104 bool UnwrappedLineParser::tryToParseBracedList() {
2105   if (FormatTok->is(BK_Unknown))
2106     calculateBraceTypes();
2107   assert(FormatTok->isNot(BK_Unknown));
2108   if (FormatTok->is(BK_Block))
2109     return false;
2110   nextToken();
2111   parseBracedList();
2112   return true;
2113 }
2114 
2115 bool UnwrappedLineParser::tryToParseChildBlock() {
2116   assert(Style.isJavaScript() || Style.isCSharp());
2117   assert(FormatTok->is(TT_FatArrow));
2118   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2119   // They always start an expression or a child block if followed by a curly
2120   // brace.
2121   nextToken();
2122   if (FormatTok->isNot(tok::l_brace))
2123     return false;
2124   parseChildBlock();
2125   return true;
2126 }
2127 
2128 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2129                                           bool IsEnum,
2130                                           tok::TokenKind ClosingBraceKind) {
2131   bool HasError = false;
2132 
2133   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2134   // replace this by using parseAssignmentExpression() inside.
2135   do {
2136     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2137         tryToParseChildBlock())
2138       continue;
2139     if (Style.isJavaScript()) {
2140       if (FormatTok->is(Keywords.kw_function) ||
2141           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2142         tryToParseJSFunction();
2143         continue;
2144       }
2145       if (FormatTok->is(tok::l_brace)) {
2146         // Could be a method inside of a braced list `{a() { return 1; }}`.
2147         if (tryToParseBracedList())
2148           continue;
2149         parseChildBlock();
2150       }
2151     }
2152     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2153       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2154         addUnwrappedLine();
2155       nextToken();
2156       return !HasError;
2157     }
2158     switch (FormatTok->Tok.getKind()) {
2159     case tok::l_square:
2160       if (Style.isCSharp())
2161         parseSquare();
2162       else
2163         tryToParseLambda();
2164       break;
2165     case tok::l_paren:
2166       parseParens();
2167       // JavaScript can just have free standing methods and getters/setters in
2168       // object literals. Detect them by a "{" following ")".
2169       if (Style.isJavaScript()) {
2170         if (FormatTok->is(tok::l_brace))
2171           parseChildBlock();
2172         break;
2173       }
2174       break;
2175     case tok::l_brace:
2176       // Assume there are no blocks inside a braced init list apart
2177       // from the ones we explicitly parse out (like lambdas).
2178       FormatTok->setBlockKind(BK_BracedInit);
2179       nextToken();
2180       parseBracedList();
2181       break;
2182     case tok::less:
2183       if (Style.Language == FormatStyle::LK_Proto ||
2184           ClosingBraceKind == tok::greater) {
2185         nextToken();
2186         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2187                         /*ClosingBraceKind=*/tok::greater);
2188       } else {
2189         nextToken();
2190       }
2191       break;
2192     case tok::semi:
2193       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2194       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2195       // used for error recovery if we have otherwise determined that this is
2196       // a braced list.
2197       if (Style.isJavaScript()) {
2198         nextToken();
2199         break;
2200       }
2201       HasError = true;
2202       if (!ContinueOnSemicolons)
2203         return !HasError;
2204       nextToken();
2205       break;
2206     case tok::comma:
2207       nextToken();
2208       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2209         addUnwrappedLine();
2210       break;
2211     default:
2212       nextToken();
2213       break;
2214     }
2215   } while (!eof());
2216   return false;
2217 }
2218 
2219 /// \brief Parses a pair of parentheses (and everything between them).
2220 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2221 /// double ampersands. This only counts for the current parens scope.
2222 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2223   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2224   nextToken();
2225   do {
2226     switch (FormatTok->Tok.getKind()) {
2227     case tok::l_paren:
2228       parseParens();
2229       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2230         parseChildBlock();
2231       break;
2232     case tok::r_paren:
2233       nextToken();
2234       return;
2235     case tok::r_brace:
2236       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2237       return;
2238     case tok::l_square:
2239       tryToParseLambda();
2240       break;
2241     case tok::l_brace:
2242       if (!tryToParseBracedList())
2243         parseChildBlock();
2244       break;
2245     case tok::at:
2246       nextToken();
2247       if (FormatTok->is(tok::l_brace)) {
2248         nextToken();
2249         parseBracedList();
2250       }
2251       break;
2252     case tok::equal:
2253       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2254         tryToParseChildBlock();
2255       else
2256         nextToken();
2257       break;
2258     case tok::kw_class:
2259       if (Style.isJavaScript())
2260         parseRecord(/*ParseAsExpr=*/true);
2261       else
2262         nextToken();
2263       break;
2264     case tok::identifier:
2265       if (Style.isJavaScript() &&
2266           (FormatTok->is(Keywords.kw_function) ||
2267            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2268         tryToParseJSFunction();
2269       else
2270         nextToken();
2271       break;
2272     case tok::kw_requires: {
2273       auto RequiresToken = FormatTok;
2274       nextToken();
2275       parseRequiresExpression(RequiresToken);
2276       break;
2277     }
2278     case tok::ampamp:
2279       if (AmpAmpTokenType != TT_Unknown)
2280         FormatTok->setFinalizedType(AmpAmpTokenType);
2281       LLVM_FALLTHROUGH;
2282     default:
2283       nextToken();
2284       break;
2285     }
2286   } while (!eof());
2287 }
2288 
2289 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2290   if (!LambdaIntroducer) {
2291     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2292     if (tryToParseLambda())
2293       return;
2294   }
2295   do {
2296     switch (FormatTok->Tok.getKind()) {
2297     case tok::l_paren:
2298       parseParens();
2299       break;
2300     case tok::r_square:
2301       nextToken();
2302       return;
2303     case tok::r_brace:
2304       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2305       return;
2306     case tok::l_square:
2307       parseSquare();
2308       break;
2309     case tok::l_brace: {
2310       if (!tryToParseBracedList())
2311         parseChildBlock();
2312       break;
2313     }
2314     case tok::at:
2315       nextToken();
2316       if (FormatTok->is(tok::l_brace)) {
2317         nextToken();
2318         parseBracedList();
2319       }
2320       break;
2321     default:
2322       nextToken();
2323       break;
2324     }
2325   } while (!eof());
2326 }
2327 
2328 void UnwrappedLineParser::keepAncestorBraces() {
2329   if (!Style.RemoveBracesLLVM)
2330     return;
2331 
2332   const int MaxNestingLevels = 2;
2333   const int Size = NestedTooDeep.size();
2334   if (Size >= MaxNestingLevels)
2335     NestedTooDeep[Size - MaxNestingLevels] = true;
2336   NestedTooDeep.push_back(false);
2337 }
2338 
2339 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2340   for (const auto &Token : llvm::reverse(Line.Tokens))
2341     if (Token.Tok->isNot(tok::comment))
2342       return Token.Tok;
2343 
2344   return nullptr;
2345 }
2346 
2347 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2348   FormatToken *Tok = nullptr;
2349 
2350   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2351       PreprocessorDirectives.empty()) {
2352     Tok = getLastNonComment(*Line);
2353     assert(Tok);
2354     if (Tok->BraceCount < 0) {
2355       assert(Tok->BraceCount == -1);
2356       Tok = nullptr;
2357     } else {
2358       Tok->BraceCount = -1;
2359     }
2360   }
2361 
2362   addUnwrappedLine();
2363   ++Line->Level;
2364   parseStructuralElement();
2365 
2366   if (Tok) {
2367     assert(!Line->InPPDirective);
2368     Tok = nullptr;
2369     for (const auto &L : llvm::reverse(*CurrentLines)) {
2370       if (!L.InPPDirective && getLastNonComment(L)) {
2371         Tok = L.Tokens.back().Tok;
2372         break;
2373       }
2374     }
2375     assert(Tok);
2376     ++Tok->BraceCount;
2377   }
2378 
2379   if (CheckEOF && FormatTok->is(tok::eof))
2380     addUnwrappedLine();
2381 
2382   --Line->Level;
2383 }
2384 
2385 static void markOptionalBraces(FormatToken *LeftBrace) {
2386   if (!LeftBrace)
2387     return;
2388 
2389   assert(LeftBrace->is(tok::l_brace));
2390 
2391   FormatToken *RightBrace = LeftBrace->MatchingParen;
2392   if (!RightBrace) {
2393     assert(!LeftBrace->Optional);
2394     return;
2395   }
2396 
2397   assert(RightBrace->is(tok::r_brace));
2398   assert(RightBrace->MatchingParen == LeftBrace);
2399   assert(LeftBrace->Optional == RightBrace->Optional);
2400 
2401   LeftBrace->Optional = true;
2402   RightBrace->Optional = true;
2403 }
2404 
2405 void UnwrappedLineParser::handleAttributes() {
2406   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2407   if (FormatTok->is(TT_AttributeMacro))
2408     nextToken();
2409   handleCppAttributes();
2410 }
2411 
2412 bool UnwrappedLineParser::handleCppAttributes() {
2413   // Handle [[likely]] / [[unlikely]] attributes.
2414   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2415     parseSquare();
2416     return true;
2417   }
2418   return false;
2419 }
2420 
2421 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2422                                                   bool KeepBraces) {
2423   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2424   nextToken();
2425   if (FormatTok->is(tok::exclaim))
2426     nextToken();
2427   if (FormatTok->is(tok::kw_consteval)) {
2428     nextToken();
2429   } else {
2430     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2431       nextToken();
2432     if (FormatTok->is(tok::l_paren))
2433       parseParens();
2434   }
2435   handleAttributes();
2436 
2437   bool NeedsUnwrappedLine = false;
2438   keepAncestorBraces();
2439 
2440   FormatToken *IfLeftBrace = nullptr;
2441   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2442 
2443   if (FormatTok->is(tok::l_brace)) {
2444     IfLeftBrace = FormatTok;
2445     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2446     IfBlockKind = parseBlock();
2447     if (Style.BraceWrapping.BeforeElse)
2448       addUnwrappedLine();
2449     else
2450       NeedsUnwrappedLine = true;
2451   } else {
2452     parseUnbracedBody();
2453   }
2454 
2455   bool KeepIfBraces = false;
2456   if (Style.RemoveBracesLLVM) {
2457     assert(!NestedTooDeep.empty());
2458     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2459                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2460                    IfBlockKind == IfStmtKind::IfElseIf;
2461   }
2462 
2463   FormatToken *ElseLeftBrace = nullptr;
2464   IfStmtKind Kind = IfStmtKind::IfOnly;
2465 
2466   if (FormatTok->is(tok::kw_else)) {
2467     if (Style.RemoveBracesLLVM) {
2468       NestedTooDeep.back() = false;
2469       Kind = IfStmtKind::IfElse;
2470     }
2471     nextToken();
2472     handleAttributes();
2473     if (FormatTok->is(tok::l_brace)) {
2474       ElseLeftBrace = FormatTok;
2475       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2476       if (parseBlock() == IfStmtKind::IfOnly)
2477         Kind = IfStmtKind::IfElseIf;
2478       addUnwrappedLine();
2479     } else if (FormatTok->is(tok::kw_if)) {
2480       const FormatToken *Previous = Tokens->getPreviousToken();
2481       assert(Previous);
2482       const bool IsPrecededByComment = Previous->is(tok::comment);
2483       if (IsPrecededByComment) {
2484         addUnwrappedLine();
2485         ++Line->Level;
2486       }
2487       bool TooDeep = true;
2488       if (Style.RemoveBracesLLVM) {
2489         Kind = IfStmtKind::IfElseIf;
2490         TooDeep = NestedTooDeep.pop_back_val();
2491       }
2492       ElseLeftBrace =
2493           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2494       if (Style.RemoveBracesLLVM)
2495         NestedTooDeep.push_back(TooDeep);
2496       if (IsPrecededByComment)
2497         --Line->Level;
2498     } else {
2499       parseUnbracedBody(/*CheckEOF=*/true);
2500     }
2501   } else {
2502     if (Style.RemoveBracesLLVM)
2503       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2504     if (NeedsUnwrappedLine)
2505       addUnwrappedLine();
2506   }
2507 
2508   if (!Style.RemoveBracesLLVM)
2509     return nullptr;
2510 
2511   assert(!NestedTooDeep.empty());
2512   const bool KeepElseBraces =
2513       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2514 
2515   NestedTooDeep.pop_back();
2516 
2517   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2518     markOptionalBraces(IfLeftBrace);
2519     markOptionalBraces(ElseLeftBrace);
2520   } else if (IfLeftBrace) {
2521     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2522     if (IfRightBrace) {
2523       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2524       assert(!IfLeftBrace->Optional);
2525       assert(!IfRightBrace->Optional);
2526       IfLeftBrace->MatchingParen = nullptr;
2527       IfRightBrace->MatchingParen = nullptr;
2528     }
2529   }
2530 
2531   if (IfKind)
2532     *IfKind = Kind;
2533 
2534   return IfLeftBrace;
2535 }
2536 
2537 void UnwrappedLineParser::parseTryCatch() {
2538   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2539   nextToken();
2540   bool NeedsUnwrappedLine = false;
2541   if (FormatTok->is(tok::colon)) {
2542     // We are in a function try block, what comes is an initializer list.
2543     nextToken();
2544 
2545     // In case identifiers were removed by clang-tidy, what might follow is
2546     // multiple commas in sequence - before the first identifier.
2547     while (FormatTok->is(tok::comma))
2548       nextToken();
2549 
2550     while (FormatTok->is(tok::identifier)) {
2551       nextToken();
2552       if (FormatTok->is(tok::l_paren))
2553         parseParens();
2554       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2555           FormatTok->is(tok::l_brace)) {
2556         do {
2557           nextToken();
2558         } while (!FormatTok->is(tok::r_brace));
2559         nextToken();
2560       }
2561 
2562       // In case identifiers were removed by clang-tidy, what might follow is
2563       // multiple commas in sequence - after the first identifier.
2564       while (FormatTok->is(tok::comma))
2565         nextToken();
2566     }
2567   }
2568   // Parse try with resource.
2569   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2570     parseParens();
2571 
2572   keepAncestorBraces();
2573 
2574   if (FormatTok->is(tok::l_brace)) {
2575     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2576     parseBlock();
2577     if (Style.BraceWrapping.BeforeCatch)
2578       addUnwrappedLine();
2579     else
2580       NeedsUnwrappedLine = true;
2581   } else if (!FormatTok->is(tok::kw_catch)) {
2582     // The C++ standard requires a compound-statement after a try.
2583     // If there's none, we try to assume there's a structuralElement
2584     // and try to continue.
2585     addUnwrappedLine();
2586     ++Line->Level;
2587     parseStructuralElement();
2588     --Line->Level;
2589   }
2590   while (true) {
2591     if (FormatTok->is(tok::at))
2592       nextToken();
2593     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2594                              tok::kw___finally) ||
2595           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2596            FormatTok->is(Keywords.kw_finally)) ||
2597           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2598            FormatTok->isObjCAtKeyword(tok::objc_finally))))
2599       break;
2600     nextToken();
2601     while (FormatTok->isNot(tok::l_brace)) {
2602       if (FormatTok->is(tok::l_paren)) {
2603         parseParens();
2604         continue;
2605       }
2606       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2607         if (Style.RemoveBracesLLVM)
2608           NestedTooDeep.pop_back();
2609         return;
2610       }
2611       nextToken();
2612     }
2613     NeedsUnwrappedLine = false;
2614     Line->MustBeDeclaration = false;
2615     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2616     parseBlock();
2617     if (Style.BraceWrapping.BeforeCatch)
2618       addUnwrappedLine();
2619     else
2620       NeedsUnwrappedLine = true;
2621   }
2622 
2623   if (Style.RemoveBracesLLVM)
2624     NestedTooDeep.pop_back();
2625 
2626   if (NeedsUnwrappedLine)
2627     addUnwrappedLine();
2628 }
2629 
2630 void UnwrappedLineParser::parseNamespace() {
2631   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2632          "'namespace' expected");
2633 
2634   const FormatToken &InitialToken = *FormatTok;
2635   nextToken();
2636   if (InitialToken.is(TT_NamespaceMacro)) {
2637     parseParens();
2638   } else {
2639     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2640                               tok::l_square, tok::period, tok::l_paren) ||
2641            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2642       if (FormatTok->is(tok::l_square))
2643         parseSquare();
2644       else if (FormatTok->is(tok::l_paren))
2645         parseParens();
2646       else
2647         nextToken();
2648   }
2649   if (FormatTok->is(tok::l_brace)) {
2650     if (ShouldBreakBeforeBrace(Style, InitialToken))
2651       addUnwrappedLine();
2652 
2653     unsigned AddLevels =
2654         Style.NamespaceIndentation == FormatStyle::NI_All ||
2655                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2656                  DeclarationScopeStack.size() > 1)
2657             ? 1u
2658             : 0u;
2659     bool ManageWhitesmithsBraces =
2660         AddLevels == 0u &&
2661         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2662 
2663     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2664     // the whole block.
2665     if (ManageWhitesmithsBraces)
2666       ++Line->Level;
2667 
2668     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2669                /*MunchSemi=*/true,
2670                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2671 
2672     // Munch the semicolon after a namespace. This is more common than one would
2673     // think. Putting the semicolon into its own line is very ugly.
2674     if (FormatTok->is(tok::semi))
2675       nextToken();
2676 
2677     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2678 
2679     if (ManageWhitesmithsBraces)
2680       --Line->Level;
2681   }
2682   // FIXME: Add error handling.
2683 }
2684 
2685 void UnwrappedLineParser::parseNew() {
2686   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2687   nextToken();
2688 
2689   if (Style.isCSharp()) {
2690     do {
2691       if (FormatTok->is(tok::l_brace))
2692         parseBracedList();
2693 
2694       if (FormatTok->isOneOf(tok::semi, tok::comma))
2695         return;
2696 
2697       nextToken();
2698     } while (!eof());
2699   }
2700 
2701   if (Style.Language != FormatStyle::LK_Java)
2702     return;
2703 
2704   // In Java, we can parse everything up to the parens, which aren't optional.
2705   do {
2706     // There should not be a ;, { or } before the new's open paren.
2707     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2708       return;
2709 
2710     // Consume the parens.
2711     if (FormatTok->is(tok::l_paren)) {
2712       parseParens();
2713 
2714       // If there is a class body of an anonymous class, consume that as child.
2715       if (FormatTok->is(tok::l_brace))
2716         parseChildBlock();
2717       return;
2718     }
2719     nextToken();
2720   } while (!eof());
2721 }
2722 
2723 void UnwrappedLineParser::parseLoopBody(bool TryRemoveBraces,
2724                                         bool WrapRightBrace) {
2725   keepAncestorBraces();
2726 
2727   if (FormatTok->is(tok::l_brace)) {
2728     FormatToken *LeftBrace = FormatTok;
2729     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2730     parseBlock();
2731     if (TryRemoveBraces) {
2732       assert(!NestedTooDeep.empty());
2733       if (!NestedTooDeep.back())
2734         markOptionalBraces(LeftBrace);
2735     }
2736     if (WrapRightBrace)
2737       addUnwrappedLine();
2738   } else {
2739     parseUnbracedBody();
2740   }
2741 
2742   if (TryRemoveBraces)
2743     NestedTooDeep.pop_back();
2744 }
2745 
2746 void UnwrappedLineParser::parseForOrWhileLoop() {
2747   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2748          "'for', 'while' or foreach macro expected");
2749   nextToken();
2750   // JS' for await ( ...
2751   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2752     nextToken();
2753   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2754     nextToken();
2755   if (FormatTok->is(tok::l_paren))
2756     parseParens();
2757 
2758   parseLoopBody(Style.RemoveBracesLLVM, true);
2759 }
2760 
2761 void UnwrappedLineParser::parseDoWhile() {
2762   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2763   nextToken();
2764 
2765   parseLoopBody(false, Style.BraceWrapping.BeforeWhile);
2766 
2767   // FIXME: Add error handling.
2768   if (!FormatTok->is(tok::kw_while)) {
2769     addUnwrappedLine();
2770     return;
2771   }
2772 
2773   // If in Whitesmiths mode, the line with the while() needs to be indented
2774   // to the same level as the block.
2775   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2776     ++Line->Level;
2777 
2778   nextToken();
2779   parseStructuralElement();
2780 }
2781 
2782 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2783   nextToken();
2784   unsigned OldLineLevel = Line->Level;
2785   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2786     --Line->Level;
2787   if (LeftAlignLabel)
2788     Line->Level = 0;
2789 
2790   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2791       FormatTok->is(tok::l_brace)) {
2792 
2793     CompoundStatementIndenter Indenter(this, Line->Level,
2794                                        Style.BraceWrapping.AfterCaseLabel,
2795                                        Style.BraceWrapping.IndentBraces);
2796     parseBlock();
2797     if (FormatTok->is(tok::kw_break)) {
2798       if (Style.BraceWrapping.AfterControlStatement ==
2799           FormatStyle::BWACS_Always) {
2800         addUnwrappedLine();
2801         if (!Style.IndentCaseBlocks &&
2802             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2803           ++Line->Level;
2804       }
2805       parseStructuralElement();
2806     }
2807     addUnwrappedLine();
2808   } else {
2809     if (FormatTok->is(tok::semi))
2810       nextToken();
2811     addUnwrappedLine();
2812   }
2813   Line->Level = OldLineLevel;
2814   if (FormatTok->isNot(tok::l_brace)) {
2815     parseStructuralElement();
2816     addUnwrappedLine();
2817   }
2818 }
2819 
2820 void UnwrappedLineParser::parseCaseLabel() {
2821   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2822 
2823   // FIXME: fix handling of complex expressions here.
2824   do {
2825     nextToken();
2826   } while (!eof() && !FormatTok->is(tok::colon));
2827   parseLabel();
2828 }
2829 
2830 void UnwrappedLineParser::parseSwitch() {
2831   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2832   nextToken();
2833   if (FormatTok->is(tok::l_paren))
2834     parseParens();
2835 
2836   keepAncestorBraces();
2837 
2838   if (FormatTok->is(tok::l_brace)) {
2839     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2840     parseBlock();
2841     addUnwrappedLine();
2842   } else {
2843     addUnwrappedLine();
2844     ++Line->Level;
2845     parseStructuralElement();
2846     --Line->Level;
2847   }
2848 
2849   if (Style.RemoveBracesLLVM)
2850     NestedTooDeep.pop_back();
2851 }
2852 
2853 // Operators that can follow a C variable.
2854 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
2855   switch (kind) {
2856   case tok::ampamp:
2857   case tok::ampequal:
2858   case tok::arrow:
2859   case tok::caret:
2860   case tok::caretequal:
2861   case tok::comma:
2862   case tok::ellipsis:
2863   case tok::equal:
2864   case tok::equalequal:
2865   case tok::exclaim:
2866   case tok::exclaimequal:
2867   case tok::greater:
2868   case tok::greaterequal:
2869   case tok::greatergreater:
2870   case tok::greatergreaterequal:
2871   case tok::l_paren:
2872   case tok::l_square:
2873   case tok::less:
2874   case tok::lessequal:
2875   case tok::lessless:
2876   case tok::lesslessequal:
2877   case tok::minus:
2878   case tok::minusequal:
2879   case tok::minusminus:
2880   case tok::percent:
2881   case tok::percentequal:
2882   case tok::period:
2883   case tok::pipe:
2884   case tok::pipeequal:
2885   case tok::pipepipe:
2886   case tok::plus:
2887   case tok::plusequal:
2888   case tok::plusplus:
2889   case tok::question:
2890   case tok::r_brace:
2891   case tok::r_paren:
2892   case tok::r_square:
2893   case tok::semi:
2894   case tok::slash:
2895   case tok::slashequal:
2896   case tok::star:
2897   case tok::starequal:
2898     return true;
2899   default:
2900     return false;
2901   }
2902 }
2903 
2904 void UnwrappedLineParser::parseAccessSpecifier() {
2905   FormatToken *AccessSpecifierCandidate = FormatTok;
2906   nextToken();
2907   // Understand Qt's slots.
2908   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2909     nextToken();
2910   // Otherwise, we don't know what it is, and we'd better keep the next token.
2911   if (FormatTok->is(tok::colon)) {
2912     nextToken();
2913     addUnwrappedLine();
2914   } else if (!FormatTok->is(tok::coloncolon) &&
2915              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
2916     // Not a variable name nor namespace name.
2917     addUnwrappedLine();
2918   } else if (AccessSpecifierCandidate) {
2919     // Consider the access specifier to be a C identifier.
2920     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2921   }
2922 }
2923 
2924 /// \brief Parses a concept definition.
2925 /// \pre The current token has to be the concept keyword.
2926 ///
2927 /// Returns if either the concept has been completely parsed, or if it detects
2928 /// that the concept definition is incorrect.
2929 void UnwrappedLineParser::parseConcept() {
2930   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
2931   nextToken();
2932   if (!FormatTok->is(tok::identifier))
2933     return;
2934   nextToken();
2935   if (!FormatTok->is(tok::equal))
2936     return;
2937   nextToken();
2938   parseConstraintExpression();
2939   if (FormatTok->is(tok::semi))
2940     nextToken();
2941   addUnwrappedLine();
2942 }
2943 
2944 /// \brief Parses a requires, decides if it is a clause or an expression.
2945 /// \pre The current token has to be the requires keyword.
2946 /// \returns true if it parsed a clause.
2947 bool clang::format::UnwrappedLineParser::parseRequires() {
2948   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
2949   auto RequiresToken = FormatTok;
2950 
2951   // We try to guess if it is a requires clause, or a requires expression. For
2952   // that we first consume the keyword and check the next token.
2953   nextToken();
2954 
2955   switch (FormatTok->Tok.getKind()) {
2956   case tok::l_brace:
2957     // This can only be an expression, never a clause.
2958     parseRequiresExpression(RequiresToken);
2959     return false;
2960   case tok::l_paren:
2961     // Clauses and expression can start with a paren, it's unclear what we have.
2962     break;
2963   default:
2964     // All other tokens can only be a clause.
2965     parseRequiresClause(RequiresToken);
2966     return true;
2967   }
2968 
2969   // Looking forward we would have to decide if there are function declaration
2970   // like arguments to the requires expression:
2971   // requires (T t) {
2972   // Or there is a constraint expression for the requires clause:
2973   // requires (C<T> && ...
2974 
2975   // But first let's look behind.
2976   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
2977 
2978   if (!PreviousNonComment ||
2979       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
2980     // If there is no token, or an expression left brace, we are a requires
2981     // clause within a requires expression.
2982     parseRequiresClause(RequiresToken);
2983     return true;
2984   }
2985 
2986   switch (PreviousNonComment->Tok.getKind()) {
2987   case tok::greater:
2988   case tok::r_paren:
2989   case tok::kw_noexcept:
2990   case tok::kw_const:
2991     // This is a requires clause.
2992     parseRequiresClause(RequiresToken);
2993     return true;
2994   case tok::amp:
2995   case tok::ampamp: {
2996     // This can be either:
2997     // if (... && requires (T t) ...)
2998     // Or
2999     // void member(...) && requires (C<T> ...
3000     // We check the one token before that for a const:
3001     // void member(...) const && requires (C<T> ...
3002     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3003     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3004       parseRequiresClause(RequiresToken);
3005       return true;
3006     }
3007     break;
3008   }
3009   default:
3010     // It's an expression.
3011     parseRequiresExpression(RequiresToken);
3012     return false;
3013   }
3014 
3015   // Now we look forward and try to check if the paren content is a parameter
3016   // list. The parameters can be cv-qualified and contain references or
3017   // pointers.
3018   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3019   // of stuff: typename, const, *, &, &&, ::, identifiers.
3020 
3021   int NextTokenOffset = 1;
3022   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3023   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3024     ++NextTokenOffset;
3025     NextToken = Tokens->peekNextToken(NextTokenOffset);
3026   };
3027 
3028   bool FoundType = false;
3029   bool LastWasColonColon = false;
3030   int OpenAngles = 0;
3031 
3032   for (; NextTokenOffset < 50; PeekNext()) {
3033     switch (NextToken->Tok.getKind()) {
3034     case tok::kw_volatile:
3035     case tok::kw_const:
3036     case tok::comma:
3037       parseRequiresExpression(RequiresToken);
3038       return false;
3039     case tok::r_paren:
3040     case tok::pipepipe:
3041       parseRequiresClause(RequiresToken);
3042       return true;
3043     case tok::eof:
3044       // Break out of the loop.
3045       NextTokenOffset = 50;
3046       break;
3047     case tok::coloncolon:
3048       LastWasColonColon = true;
3049       break;
3050     case tok::identifier:
3051       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3052         parseRequiresExpression(RequiresToken);
3053         return false;
3054       }
3055       FoundType = true;
3056       LastWasColonColon = false;
3057       break;
3058     case tok::less:
3059       ++OpenAngles;
3060       break;
3061     case tok::greater:
3062       --OpenAngles;
3063       break;
3064     default:
3065       if (NextToken->isSimpleTypeSpecifier()) {
3066         parseRequiresExpression(RequiresToken);
3067         return false;
3068       }
3069       break;
3070     }
3071   }
3072 
3073   // This seems to be a complicated expression, just assume it's a clause.
3074   parseRequiresClause(RequiresToken);
3075   return true;
3076 }
3077 
3078 /// \brief Parses a requires clause.
3079 /// \param RequiresToken The requires keyword token, which starts this clause.
3080 /// \pre We need to be on the next token after the requires keyword.
3081 /// \sa parseRequiresExpression
3082 ///
3083 /// Returns if it either has finished parsing the clause, or it detects, that
3084 /// the clause is incorrect.
3085 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3086   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3087   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3088 
3089   // If there is no previous token, we are within a requires expression,
3090   // otherwise we will always have the template or function declaration in front
3091   // of it.
3092   bool InRequiresExpression =
3093       !RequiresToken->Previous ||
3094       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3095 
3096   RequiresToken->setFinalizedType(InRequiresExpression
3097                                       ? TT_RequiresClauseInARequiresExpression
3098                                       : TT_RequiresClause);
3099 
3100   parseConstraintExpression();
3101 
3102   if (!InRequiresExpression)
3103     FormatTok->Previous->ClosesRequiresClause = true;
3104 }
3105 
3106 /// \brief Parses a requires expression.
3107 /// \param RequiresToken The requires keyword token, which starts this clause.
3108 /// \pre We need to be on the next token after the requires keyword.
3109 /// \sa parseRequiresClause
3110 ///
3111 /// Returns if it either has finished parsing the expression, or it detects,
3112 /// that the expression is incorrect.
3113 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3114   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3115   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3116 
3117   RequiresToken->setFinalizedType(TT_RequiresExpression);
3118 
3119   if (FormatTok->is(tok::l_paren)) {
3120     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3121     parseParens();
3122   }
3123 
3124   if (FormatTok->is(tok::l_brace)) {
3125     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3126     parseChildBlock(/*CanContainBracedList=*/false,
3127                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3128   }
3129 }
3130 
3131 /// \brief Parses a constraint expression.
3132 ///
3133 /// This is either the definition of a concept, or the body of a requires
3134 /// clause. It returns, when the parsing is complete, or the expression is
3135 /// incorrect.
3136 void UnwrappedLineParser::parseConstraintExpression() {
3137   // The special handling for lambdas is needed since tryToParseLambda() eats a
3138   // token and if a requires expression is the last part of a requires clause
3139   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3140   // not set on the correct token. Thus we need to be aware if we even expect a
3141   // lambda to be possible.
3142   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3143   bool LambdaNextTimeAllowed = true;
3144   do {
3145     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3146 
3147     switch (FormatTok->Tok.getKind()) {
3148     case tok::kw_requires: {
3149       auto RequiresToken = FormatTok;
3150       nextToken();
3151       parseRequiresExpression(RequiresToken);
3152       break;
3153     }
3154 
3155     case tok::l_paren:
3156       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3157       break;
3158 
3159     case tok::l_square:
3160       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3161         return;
3162       break;
3163 
3164     case tok::kw_const:
3165     case tok::semi:
3166     case tok::kw_class:
3167     case tok::kw_struct:
3168     case tok::kw_union:
3169       return;
3170 
3171     case tok::l_brace:
3172       // Potential function body.
3173       return;
3174 
3175     case tok::ampamp:
3176     case tok::pipepipe:
3177       FormatTok->setFinalizedType(TT_BinaryOperator);
3178       nextToken();
3179       LambdaNextTimeAllowed = true;
3180       break;
3181 
3182     case tok::comma:
3183     case tok::comment:
3184       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3185       nextToken();
3186       break;
3187 
3188     case tok::kw_sizeof:
3189     case tok::greater:
3190     case tok::greaterequal:
3191     case tok::greatergreater:
3192     case tok::less:
3193     case tok::lessequal:
3194     case tok::lessless:
3195     case tok::equalequal:
3196     case tok::exclaim:
3197     case tok::exclaimequal:
3198     case tok::plus:
3199     case tok::minus:
3200     case tok::star:
3201     case tok::slash:
3202     case tok::kw_decltype:
3203       LambdaNextTimeAllowed = true;
3204       // Just eat them.
3205       nextToken();
3206       break;
3207 
3208     case tok::numeric_constant:
3209     case tok::coloncolon:
3210     case tok::kw_true:
3211     case tok::kw_false:
3212       // Just eat them.
3213       nextToken();
3214       break;
3215 
3216     case tok::kw_static_cast:
3217     case tok::kw_const_cast:
3218     case tok::kw_reinterpret_cast:
3219     case tok::kw_dynamic_cast:
3220       nextToken();
3221       if (!FormatTok->is(tok::less))
3222         return;
3223 
3224       nextToken();
3225       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3226                       /*ClosingBraceKind=*/tok::greater);
3227       break;
3228 
3229     case tok::kw_bool:
3230       // bool is only allowed if it is directly followed by a paren for a cast:
3231       // concept C = bool(...);
3232       // and bool is the only type, all other types as cast must be inside a
3233       // cast to bool an thus are handled by the other cases.
3234       nextToken();
3235       if (FormatTok->isNot(tok::l_paren))
3236         return;
3237       parseParens();
3238       break;
3239 
3240     default:
3241       if (!FormatTok->Tok.getIdentifierInfo()) {
3242         // Identifiers are part of the default case, we check for more then
3243         // tok::identifier to handle builtin type traits.
3244         return;
3245       }
3246 
3247       // We need to differentiate identifiers for a template deduction guide,
3248       // variables, or function return types (the constraint expression has
3249       // ended before that), and basically all other cases. But it's easier to
3250       // check the other way around.
3251       assert(FormatTok->Previous);
3252       switch (FormatTok->Previous->Tok.getKind()) {
3253       case tok::coloncolon:  // Nested identifier.
3254       case tok::ampamp:      // Start of a function or variable for the
3255       case tok::pipepipe:    // constraint expression.
3256       case tok::kw_requires: // Initial identifier of a requires clause.
3257       case tok::equal:       // Initial identifier of a concept declaration.
3258         break;
3259       default:
3260         return;
3261       }
3262 
3263       // Read identifier with optional template declaration.
3264       nextToken();
3265       if (FormatTok->is(tok::less)) {
3266         nextToken();
3267         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3268                         /*ClosingBraceKind=*/tok::greater);
3269       }
3270       break;
3271     }
3272   } while (!eof());
3273 }
3274 
3275 bool UnwrappedLineParser::parseEnum() {
3276   const FormatToken &InitialToken = *FormatTok;
3277 
3278   // Won't be 'enum' for NS_ENUMs.
3279   if (FormatTok->is(tok::kw_enum))
3280     nextToken();
3281 
3282   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3283   // declarations. An "enum" keyword followed by a colon would be a syntax
3284   // error and thus assume it is just an identifier.
3285   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3286     return false;
3287 
3288   // In protobuf, "enum" can be used as a field name.
3289   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3290     return false;
3291 
3292   // Eat up enum class ...
3293   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3294     nextToken();
3295 
3296   while (FormatTok->Tok.getIdentifierInfo() ||
3297          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3298                             tok::greater, tok::comma, tok::question)) {
3299     nextToken();
3300     // We can have macros or attributes in between 'enum' and the enum name.
3301     if (FormatTok->is(tok::l_paren))
3302       parseParens();
3303     if (FormatTok->is(tok::identifier)) {
3304       nextToken();
3305       // If there are two identifiers in a row, this is likely an elaborate
3306       // return type. In Java, this can be "implements", etc.
3307       if (Style.isCpp() && FormatTok->is(tok::identifier))
3308         return false;
3309     }
3310   }
3311 
3312   // Just a declaration or something is wrong.
3313   if (FormatTok->isNot(tok::l_brace))
3314     return true;
3315   FormatTok->setFinalizedType(TT_EnumLBrace);
3316   FormatTok->setBlockKind(BK_Block);
3317 
3318   if (Style.Language == FormatStyle::LK_Java) {
3319     // Java enums are different.
3320     parseJavaEnumBody();
3321     return true;
3322   }
3323   if (Style.Language == FormatStyle::LK_Proto) {
3324     parseBlock(/*MustBeDeclaration=*/true);
3325     return true;
3326   }
3327 
3328   if (!Style.AllowShortEnumsOnASingleLine &&
3329       ShouldBreakBeforeBrace(Style, InitialToken))
3330     addUnwrappedLine();
3331   // Parse enum body.
3332   nextToken();
3333   if (!Style.AllowShortEnumsOnASingleLine) {
3334     addUnwrappedLine();
3335     Line->Level += 1;
3336   }
3337   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3338                                    /*IsEnum=*/true);
3339   if (!Style.AllowShortEnumsOnASingleLine)
3340     Line->Level -= 1;
3341   if (HasError) {
3342     if (FormatTok->is(tok::semi))
3343       nextToken();
3344     addUnwrappedLine();
3345   }
3346   return true;
3347 
3348   // There is no addUnwrappedLine() here so that we fall through to parsing a
3349   // structural element afterwards. Thus, in "enum A {} n, m;",
3350   // "} n, m;" will end up in one unwrapped line.
3351 }
3352 
3353 bool UnwrappedLineParser::parseStructLike() {
3354   // parseRecord falls through and does not yet add an unwrapped line as a
3355   // record declaration or definition can start a structural element.
3356   parseRecord();
3357   // This does not apply to Java, JavaScript and C#.
3358   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3359       Style.isCSharp()) {
3360     if (FormatTok->is(tok::semi))
3361       nextToken();
3362     addUnwrappedLine();
3363     return true;
3364   }
3365   return false;
3366 }
3367 
3368 namespace {
3369 // A class used to set and restore the Token position when peeking
3370 // ahead in the token source.
3371 class ScopedTokenPosition {
3372   unsigned StoredPosition;
3373   FormatTokenSource *Tokens;
3374 
3375 public:
3376   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3377     assert(Tokens && "Tokens expected to not be null");
3378     StoredPosition = Tokens->getPosition();
3379   }
3380 
3381   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3382 };
3383 } // namespace
3384 
3385 // Look to see if we have [[ by looking ahead, if
3386 // its not then rewind to the original position.
3387 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3388   ScopedTokenPosition AutoPosition(Tokens);
3389   FormatToken *Tok = Tokens->getNextToken();
3390   // We already read the first [ check for the second.
3391   if (!Tok->is(tok::l_square))
3392     return false;
3393   // Double check that the attribute is just something
3394   // fairly simple.
3395   while (Tok->isNot(tok::eof)) {
3396     if (Tok->is(tok::r_square))
3397       break;
3398     Tok = Tokens->getNextToken();
3399   }
3400   if (Tok->is(tok::eof))
3401     return false;
3402   Tok = Tokens->getNextToken();
3403   if (!Tok->is(tok::r_square))
3404     return false;
3405   Tok = Tokens->getNextToken();
3406   if (Tok->is(tok::semi))
3407     return false;
3408   return true;
3409 }
3410 
3411 void UnwrappedLineParser::parseJavaEnumBody() {
3412   // Determine whether the enum is simple, i.e. does not have a semicolon or
3413   // constants with class bodies. Simple enums can be formatted like braced
3414   // lists, contracted to a single line, etc.
3415   unsigned StoredPosition = Tokens->getPosition();
3416   bool IsSimple = true;
3417   FormatToken *Tok = Tokens->getNextToken();
3418   while (!Tok->is(tok::eof)) {
3419     if (Tok->is(tok::r_brace))
3420       break;
3421     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3422       IsSimple = false;
3423       break;
3424     }
3425     // FIXME: This will also mark enums with braces in the arguments to enum
3426     // constants as "not simple". This is probably fine in practice, though.
3427     Tok = Tokens->getNextToken();
3428   }
3429   FormatTok = Tokens->setPosition(StoredPosition);
3430 
3431   if (IsSimple) {
3432     nextToken();
3433     parseBracedList();
3434     addUnwrappedLine();
3435     return;
3436   }
3437 
3438   // Parse the body of a more complex enum.
3439   // First add a line for everything up to the "{".
3440   nextToken();
3441   addUnwrappedLine();
3442   ++Line->Level;
3443 
3444   // Parse the enum constants.
3445   while (FormatTok) {
3446     if (FormatTok->is(tok::l_brace)) {
3447       // Parse the constant's class body.
3448       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3449                  /*MunchSemi=*/false);
3450     } else if (FormatTok->is(tok::l_paren)) {
3451       parseParens();
3452     } else if (FormatTok->is(tok::comma)) {
3453       nextToken();
3454       addUnwrappedLine();
3455     } else if (FormatTok->is(tok::semi)) {
3456       nextToken();
3457       addUnwrappedLine();
3458       break;
3459     } else if (FormatTok->is(tok::r_brace)) {
3460       addUnwrappedLine();
3461       break;
3462     } else {
3463       nextToken();
3464     }
3465   }
3466 
3467   // Parse the class body after the enum's ";" if any.
3468   parseLevel(/*HasOpeningBrace=*/true, /*CanContainBracedList=*/true);
3469   nextToken();
3470   --Line->Level;
3471   addUnwrappedLine();
3472 }
3473 
3474 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3475   const FormatToken &InitialToken = *FormatTok;
3476   nextToken();
3477 
3478   // The actual identifier can be a nested name specifier, and in macros
3479   // it is often token-pasted.
3480   // An [[attribute]] can be before the identifier.
3481   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3482                             tok::kw___attribute, tok::kw___declspec,
3483                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3484          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3485           FormatTok->isOneOf(tok::period, tok::comma))) {
3486     if (Style.isJavaScript() &&
3487         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3488       // JavaScript/TypeScript supports inline object types in
3489       // extends/implements positions:
3490       //     class Foo implements {bar: number} { }
3491       nextToken();
3492       if (FormatTok->is(tok::l_brace)) {
3493         tryToParseBracedList();
3494         continue;
3495       }
3496     }
3497     bool IsNonMacroIdentifier =
3498         FormatTok->is(tok::identifier) &&
3499         FormatTok->TokenText != FormatTok->TokenText.upper();
3500     nextToken();
3501     // We can have macros or attributes in between 'class' and the class name.
3502     if (!IsNonMacroIdentifier) {
3503       if (FormatTok->is(tok::l_paren)) {
3504         parseParens();
3505       } else if (FormatTok->is(TT_AttributeSquare)) {
3506         parseSquare();
3507         // Consume the closing TT_AttributeSquare.
3508         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3509           nextToken();
3510       }
3511     }
3512   }
3513 
3514   // Note that parsing away template declarations here leads to incorrectly
3515   // accepting function declarations as record declarations.
3516   // In general, we cannot solve this problem. Consider:
3517   // class A<int> B() {}
3518   // which can be a function definition or a class definition when B() is a
3519   // macro. If we find enough real-world cases where this is a problem, we
3520   // can parse for the 'template' keyword in the beginning of the statement,
3521   // and thus rule out the record production in case there is no template
3522   // (this would still leave us with an ambiguity between template function
3523   // and class declarations).
3524   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3525     do {
3526       if (FormatTok->is(tok::l_brace)) {
3527         calculateBraceTypes(/*ExpectClassBody=*/true);
3528         if (!tryToParseBracedList())
3529           break;
3530       }
3531       if (FormatTok->is(tok::l_square)) {
3532         FormatToken *Previous = FormatTok->Previous;
3533         if (!Previous ||
3534             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3535           // Don't try parsing a lambda if we had a closing parenthesis before,
3536           // it was probably a pointer to an array: int (*)[].
3537           if (!tryToParseLambda())
3538             break;
3539         } else {
3540           parseSquare();
3541           continue;
3542         }
3543       }
3544       if (FormatTok->is(tok::semi))
3545         return;
3546       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3547         addUnwrappedLine();
3548         nextToken();
3549         parseCSharpGenericTypeConstraint();
3550         break;
3551       }
3552       nextToken();
3553     } while (!eof());
3554   }
3555 
3556   auto GetBraceType = [](const FormatToken &RecordTok) {
3557     switch (RecordTok.Tok.getKind()) {
3558     case tok::kw_class:
3559       return TT_ClassLBrace;
3560     case tok::kw_struct:
3561       return TT_StructLBrace;
3562     case tok::kw_union:
3563       return TT_UnionLBrace;
3564     default:
3565       // Useful for e.g. interface.
3566       return TT_RecordLBrace;
3567     }
3568   };
3569   if (FormatTok->is(tok::l_brace)) {
3570     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3571     if (ParseAsExpr) {
3572       parseChildBlock();
3573     } else {
3574       if (ShouldBreakBeforeBrace(Style, InitialToken))
3575         addUnwrappedLine();
3576 
3577       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3578       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3579     }
3580   }
3581   // There is no addUnwrappedLine() here so that we fall through to parsing a
3582   // structural element afterwards. Thus, in "class A {} n, m;",
3583   // "} n, m;" will end up in one unwrapped line.
3584 }
3585 
3586 void UnwrappedLineParser::parseObjCMethod() {
3587   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3588          "'(' or identifier expected.");
3589   do {
3590     if (FormatTok->is(tok::semi)) {
3591       nextToken();
3592       addUnwrappedLine();
3593       return;
3594     } else if (FormatTok->is(tok::l_brace)) {
3595       if (Style.BraceWrapping.AfterFunction)
3596         addUnwrappedLine();
3597       parseBlock();
3598       addUnwrappedLine();
3599       return;
3600     } else {
3601       nextToken();
3602     }
3603   } while (!eof());
3604 }
3605 
3606 void UnwrappedLineParser::parseObjCProtocolList() {
3607   assert(FormatTok->is(tok::less) && "'<' expected.");
3608   do {
3609     nextToken();
3610     // Early exit in case someone forgot a close angle.
3611     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3612         FormatTok->isObjCAtKeyword(tok::objc_end))
3613       return;
3614   } while (!eof() && FormatTok->isNot(tok::greater));
3615   nextToken(); // Skip '>'.
3616 }
3617 
3618 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3619   do {
3620     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3621       nextToken();
3622       addUnwrappedLine();
3623       break;
3624     }
3625     if (FormatTok->is(tok::l_brace)) {
3626       parseBlock();
3627       // In ObjC interfaces, nothing should be following the "}".
3628       addUnwrappedLine();
3629     } else if (FormatTok->is(tok::r_brace)) {
3630       // Ignore stray "}". parseStructuralElement doesn't consume them.
3631       nextToken();
3632       addUnwrappedLine();
3633     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3634       nextToken();
3635       parseObjCMethod();
3636     } else {
3637       parseStructuralElement();
3638     }
3639   } while (!eof());
3640 }
3641 
3642 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3643   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3644          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3645   nextToken();
3646   nextToken(); // interface name
3647 
3648   // @interface can be followed by a lightweight generic
3649   // specialization list, then either a base class or a category.
3650   if (FormatTok->is(tok::less))
3651     parseObjCLightweightGenerics();
3652   if (FormatTok->is(tok::colon)) {
3653     nextToken();
3654     nextToken(); // base class name
3655     // The base class can also have lightweight generics applied to it.
3656     if (FormatTok->is(tok::less))
3657       parseObjCLightweightGenerics();
3658   } else if (FormatTok->is(tok::l_paren))
3659     // Skip category, if present.
3660     parseParens();
3661 
3662   if (FormatTok->is(tok::less))
3663     parseObjCProtocolList();
3664 
3665   if (FormatTok->is(tok::l_brace)) {
3666     if (Style.BraceWrapping.AfterObjCDeclaration)
3667       addUnwrappedLine();
3668     parseBlock(/*MustBeDeclaration=*/true);
3669   }
3670 
3671   // With instance variables, this puts '}' on its own line.  Without instance
3672   // variables, this ends the @interface line.
3673   addUnwrappedLine();
3674 
3675   parseObjCUntilAtEnd();
3676 }
3677 
3678 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3679   assert(FormatTok->is(tok::less));
3680   // Unlike protocol lists, generic parameterizations support
3681   // nested angles:
3682   //
3683   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3684   //     NSObject <NSCopying, NSSecureCoding>
3685   //
3686   // so we need to count how many open angles we have left.
3687   unsigned NumOpenAngles = 1;
3688   do {
3689     nextToken();
3690     // Early exit in case someone forgot a close angle.
3691     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3692         FormatTok->isObjCAtKeyword(tok::objc_end))
3693       break;
3694     if (FormatTok->is(tok::less))
3695       ++NumOpenAngles;
3696     else if (FormatTok->is(tok::greater)) {
3697       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3698       --NumOpenAngles;
3699     }
3700   } while (!eof() && NumOpenAngles != 0);
3701   nextToken(); // Skip '>'.
3702 }
3703 
3704 // Returns true for the declaration/definition form of @protocol,
3705 // false for the expression form.
3706 bool UnwrappedLineParser::parseObjCProtocol() {
3707   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3708   nextToken();
3709 
3710   if (FormatTok->is(tok::l_paren))
3711     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3712     return false;
3713 
3714   // The definition/declaration form,
3715   // @protocol Foo
3716   // - (int)someMethod;
3717   // @end
3718 
3719   nextToken(); // protocol name
3720 
3721   if (FormatTok->is(tok::less))
3722     parseObjCProtocolList();
3723 
3724   // Check for protocol declaration.
3725   if (FormatTok->is(tok::semi)) {
3726     nextToken();
3727     addUnwrappedLine();
3728     return true;
3729   }
3730 
3731   addUnwrappedLine();
3732   parseObjCUntilAtEnd();
3733   return true;
3734 }
3735 
3736 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3737   bool IsImport = FormatTok->is(Keywords.kw_import);
3738   assert(IsImport || FormatTok->is(tok::kw_export));
3739   nextToken();
3740 
3741   // Consume the "default" in "export default class/function".
3742   if (FormatTok->is(tok::kw_default))
3743     nextToken();
3744 
3745   // Consume "async function", "function" and "default function", so that these
3746   // get parsed as free-standing JS functions, i.e. do not require a trailing
3747   // semicolon.
3748   if (FormatTok->is(Keywords.kw_async))
3749     nextToken();
3750   if (FormatTok->is(Keywords.kw_function)) {
3751     nextToken();
3752     return;
3753   }
3754 
3755   // For imports, `export *`, `export {...}`, consume the rest of the line up
3756   // to the terminating `;`. For everything else, just return and continue
3757   // parsing the structural element, i.e. the declaration or expression for
3758   // `export default`.
3759   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3760       !FormatTok->isStringLiteral())
3761     return;
3762 
3763   while (!eof()) {
3764     if (FormatTok->is(tok::semi))
3765       return;
3766     if (Line->Tokens.empty()) {
3767       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3768       // import statement should terminate.
3769       return;
3770     }
3771     if (FormatTok->is(tok::l_brace)) {
3772       FormatTok->setBlockKind(BK_Block);
3773       nextToken();
3774       parseBracedList();
3775     } else {
3776       nextToken();
3777     }
3778   }
3779 }
3780 
3781 void UnwrappedLineParser::parseStatementMacro() {
3782   nextToken();
3783   if (FormatTok->is(tok::l_paren))
3784     parseParens();
3785   if (FormatTok->is(tok::semi))
3786     nextToken();
3787   addUnwrappedLine();
3788 }
3789 
3790 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3791                                                  StringRef Prefix = "") {
3792   llvm::dbgs() << Prefix << "Line(" << Line.Level
3793                << ", FSC=" << Line.FirstStartColumn << ")"
3794                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3795   for (const auto &Node : Line.Tokens) {
3796     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3797                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3798                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3799   }
3800   for (const auto &Node : Line.Tokens)
3801     for (const auto &ChildNode : Node.Children)
3802       printDebugInfo(ChildNode, "\nChild: ");
3803 
3804   llvm::dbgs() << "\n";
3805 }
3806 
3807 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3808   if (Line->Tokens.empty())
3809     return;
3810   LLVM_DEBUG({
3811     if (CurrentLines == &Lines)
3812       printDebugInfo(*Line);
3813   });
3814 
3815   // If this line closes a block when in Whitesmiths mode, remember that
3816   // information so that the level can be decreased after the line is added.
3817   // This has to happen after the addition of the line since the line itself
3818   // needs to be indented.
3819   bool ClosesWhitesmithsBlock =
3820       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3821       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3822 
3823   CurrentLines->push_back(std::move(*Line));
3824   Line->Tokens.clear();
3825   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3826   Line->FirstStartColumn = 0;
3827 
3828   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3829     --Line->Level;
3830   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3831     CurrentLines->append(
3832         std::make_move_iterator(PreprocessorDirectives.begin()),
3833         std::make_move_iterator(PreprocessorDirectives.end()));
3834     PreprocessorDirectives.clear();
3835   }
3836   // Disconnect the current token from the last token on the previous line.
3837   FormatTok->Previous = nullptr;
3838 }
3839 
3840 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
3841 
3842 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3843   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3844          FormatTok.NewlinesBefore > 0;
3845 }
3846 
3847 // Checks if \p FormatTok is a line comment that continues the line comment
3848 // section on \p Line.
3849 static bool
3850 continuesLineCommentSection(const FormatToken &FormatTok,
3851                             const UnwrappedLine &Line,
3852                             const llvm::Regex &CommentPragmasRegex) {
3853   if (Line.Tokens.empty())
3854     return false;
3855 
3856   StringRef IndentContent = FormatTok.TokenText;
3857   if (FormatTok.TokenText.startswith("//") ||
3858       FormatTok.TokenText.startswith("/*"))
3859     IndentContent = FormatTok.TokenText.substr(2);
3860   if (CommentPragmasRegex.match(IndentContent))
3861     return false;
3862 
3863   // If Line starts with a line comment, then FormatTok continues the comment
3864   // section if its original column is greater or equal to the original start
3865   // column of the line.
3866   //
3867   // Define the min column token of a line as follows: if a line ends in '{' or
3868   // contains a '{' followed by a line comment, then the min column token is
3869   // that '{'. Otherwise, the min column token of the line is the first token of
3870   // the line.
3871   //
3872   // If Line starts with a token other than a line comment, then FormatTok
3873   // continues the comment section if its original column is greater than the
3874   // original start column of the min column token of the line.
3875   //
3876   // For example, the second line comment continues the first in these cases:
3877   //
3878   // // first line
3879   // // second line
3880   //
3881   // and:
3882   //
3883   // // first line
3884   //  // second line
3885   //
3886   // and:
3887   //
3888   // int i; // first line
3889   //  // second line
3890   //
3891   // and:
3892   //
3893   // do { // first line
3894   //      // second line
3895   //   int i;
3896   // } while (true);
3897   //
3898   // and:
3899   //
3900   // enum {
3901   //   a, // first line
3902   //    // second line
3903   //   b
3904   // };
3905   //
3906   // The second line comment doesn't continue the first in these cases:
3907   //
3908   //   // first line
3909   //  // second line
3910   //
3911   // and:
3912   //
3913   // int i; // first line
3914   // // second line
3915   //
3916   // and:
3917   //
3918   // do { // first line
3919   //   // second line
3920   //   int i;
3921   // } while (true);
3922   //
3923   // and:
3924   //
3925   // enum {
3926   //   a, // first line
3927   //   // second line
3928   // };
3929   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3930 
3931   // Scan for '{//'. If found, use the column of '{' as a min column for line
3932   // comment section continuation.
3933   const FormatToken *PreviousToken = nullptr;
3934   for (const UnwrappedLineNode &Node : Line.Tokens) {
3935     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3936         isLineComment(*Node.Tok)) {
3937       MinColumnToken = PreviousToken;
3938       break;
3939     }
3940     PreviousToken = Node.Tok;
3941 
3942     // Grab the last newline preceding a token in this unwrapped line.
3943     if (Node.Tok->NewlinesBefore > 0)
3944       MinColumnToken = Node.Tok;
3945   }
3946   if (PreviousToken && PreviousToken->is(tok::l_brace))
3947     MinColumnToken = PreviousToken;
3948 
3949   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3950                               MinColumnToken);
3951 }
3952 
3953 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3954   bool JustComments = Line->Tokens.empty();
3955   for (FormatToken *Tok : CommentsBeforeNextToken) {
3956     // Line comments that belong to the same line comment section are put on the
3957     // same line since later we might want to reflow content between them.
3958     // Additional fine-grained breaking of line comment sections is controlled
3959     // by the class BreakableLineCommentSection in case it is desirable to keep
3960     // several line comment sections in the same unwrapped line.
3961     //
3962     // FIXME: Consider putting separate line comment sections as children to the
3963     // unwrapped line instead.
3964     Tok->ContinuesLineCommentSection =
3965         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3966     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3967       addUnwrappedLine();
3968     pushToken(Tok);
3969   }
3970   if (NewlineBeforeNext && JustComments)
3971     addUnwrappedLine();
3972   CommentsBeforeNextToken.clear();
3973 }
3974 
3975 void UnwrappedLineParser::nextToken(int LevelDifference) {
3976   if (eof())
3977     return;
3978   flushComments(isOnNewLine(*FormatTok));
3979   pushToken(FormatTok);
3980   FormatToken *Previous = FormatTok;
3981   if (!Style.isJavaScript())
3982     readToken(LevelDifference);
3983   else
3984     readTokenWithJavaScriptASI();
3985   FormatTok->Previous = Previous;
3986 }
3987 
3988 void UnwrappedLineParser::distributeComments(
3989     const SmallVectorImpl<FormatToken *> &Comments,
3990     const FormatToken *NextTok) {
3991   // Whether or not a line comment token continues a line is controlled by
3992   // the method continuesLineCommentSection, with the following caveat:
3993   //
3994   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3995   // that each comment line from the trail is aligned with the next token, if
3996   // the next token exists. If a trail exists, the beginning of the maximal
3997   // trail is marked as a start of a new comment section.
3998   //
3999   // For example in this code:
4000   //
4001   // int a; // line about a
4002   //   // line 1 about b
4003   //   // line 2 about b
4004   //   int b;
4005   //
4006   // the two lines about b form a maximal trail, so there are two sections, the
4007   // first one consisting of the single comment "// line about a" and the
4008   // second one consisting of the next two comments.
4009   if (Comments.empty())
4010     return;
4011   bool ShouldPushCommentsInCurrentLine = true;
4012   bool HasTrailAlignedWithNextToken = false;
4013   unsigned StartOfTrailAlignedWithNextToken = 0;
4014   if (NextTok) {
4015     // We are skipping the first element intentionally.
4016     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4017       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4018         HasTrailAlignedWithNextToken = true;
4019         StartOfTrailAlignedWithNextToken = i;
4020       }
4021     }
4022   }
4023   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4024     FormatToken *FormatTok = Comments[i];
4025     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4026       FormatTok->ContinuesLineCommentSection = false;
4027     } else {
4028       FormatTok->ContinuesLineCommentSection =
4029           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4030     }
4031     if (!FormatTok->ContinuesLineCommentSection &&
4032         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
4033       ShouldPushCommentsInCurrentLine = false;
4034     if (ShouldPushCommentsInCurrentLine)
4035       pushToken(FormatTok);
4036     else
4037       CommentsBeforeNextToken.push_back(FormatTok);
4038   }
4039 }
4040 
4041 void UnwrappedLineParser::readToken(int LevelDifference) {
4042   SmallVector<FormatToken *, 1> Comments;
4043   bool PreviousWasComment = false;
4044   bool FirstNonCommentOnLine = false;
4045   do {
4046     FormatTok = Tokens->getNextToken();
4047     assert(FormatTok);
4048     while (FormatTok->getType() == TT_ConflictStart ||
4049            FormatTok->getType() == TT_ConflictEnd ||
4050            FormatTok->getType() == TT_ConflictAlternative) {
4051       if (FormatTok->getType() == TT_ConflictStart)
4052         conditionalCompilationStart(/*Unreachable=*/false);
4053       else if (FormatTok->getType() == TT_ConflictAlternative)
4054         conditionalCompilationAlternative();
4055       else if (FormatTok->getType() == TT_ConflictEnd)
4056         conditionalCompilationEnd();
4057       FormatTok = Tokens->getNextToken();
4058       FormatTok->MustBreakBefore = true;
4059     }
4060 
4061     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4062                                       const FormatToken &Tok,
4063                                       bool PreviousWasComment) {
4064       auto IsFirstOnLine = [](const FormatToken &Tok) {
4065         return Tok.HasUnescapedNewline || Tok.IsFirst;
4066       };
4067 
4068       // Consider preprocessor directives preceded by block comments as first
4069       // on line.
4070       if (PreviousWasComment)
4071         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4072       return IsFirstOnLine(Tok);
4073     };
4074 
4075     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4076         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4077     PreviousWasComment = FormatTok->is(tok::comment);
4078 
4079     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4080            FirstNonCommentOnLine) {
4081       distributeComments(Comments, FormatTok);
4082       Comments.clear();
4083       // If there is an unfinished unwrapped line, we flush the preprocessor
4084       // directives only after that unwrapped line was finished later.
4085       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4086       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4087       assert((LevelDifference >= 0 ||
4088               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4089              "LevelDifference makes Line->Level negative");
4090       Line->Level += LevelDifference;
4091       // Comments stored before the preprocessor directive need to be output
4092       // before the preprocessor directive, at the same level as the
4093       // preprocessor directive, as we consider them to apply to the directive.
4094       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4095           PPBranchLevel > 0)
4096         Line->Level += PPBranchLevel;
4097       flushComments(isOnNewLine(*FormatTok));
4098       parsePPDirective();
4099       PreviousWasComment = FormatTok->is(tok::comment);
4100       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4101           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4102     }
4103 
4104     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4105         !Line->InPPDirective)
4106       continue;
4107 
4108     if (!FormatTok->is(tok::comment)) {
4109       distributeComments(Comments, FormatTok);
4110       Comments.clear();
4111       return;
4112     }
4113 
4114     Comments.push_back(FormatTok);
4115   } while (!eof());
4116 
4117   distributeComments(Comments, nullptr);
4118   Comments.clear();
4119 }
4120 
4121 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4122   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4123   if (MustBreakBeforeNextToken) {
4124     Line->Tokens.back().Tok->MustBreakBefore = true;
4125     MustBreakBeforeNextToken = false;
4126   }
4127 }
4128 
4129 } // end namespace format
4130 } // end namespace clang
4131