1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 #include <utility>
23 
24 #define DEBUG_TYPE "format-parser"
25 
26 namespace clang {
27 namespace format {
28 
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   virtual FormatToken *getPreviousToken() = 0;
39 
40   // Returns the token that would be returned by the next call to
41   // getNextToken().
42   virtual FormatToken *peekNextToken() = 0;
43 
44   // Returns the token that would be returned after the next N calls to
45   // getNextToken(). N needs to be greater than zero, and small enough that
46   // there are still tokens. Check for tok::eof with N-1 before calling it with
47   // N.
48   virtual FormatToken *peekNextToken(int N) = 0;
49 
50   // Returns whether we are at the end of the file.
51   // This can be different from whether getNextToken() returned an eof token
52   // when the FormatTokenSource is a view on a part of the token stream.
53   virtual bool isEOF() = 0;
54 
55   // Gets the current position in the token stream, to be used by setPosition().
56   virtual unsigned getPosition() = 0;
57 
58   // Resets the token stream to the state it was in when getPosition() returned
59   // Position, and return the token at that position in the stream.
60   virtual FormatToken *setPosition(unsigned Position) = 0;
61 };
62 
63 namespace {
64 
65 class ScopedDeclarationState {
66 public:
67   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
68                          bool MustBeDeclaration)
69       : Line(Line), Stack(Stack) {
70     Line.MustBeDeclaration = MustBeDeclaration;
71     Stack.push_back(MustBeDeclaration);
72   }
73   ~ScopedDeclarationState() {
74     Stack.pop_back();
75     if (!Stack.empty())
76       Line.MustBeDeclaration = Stack.back();
77     else
78       Line.MustBeDeclaration = true;
79   }
80 
81 private:
82   UnwrappedLine &Line;
83   llvm::BitVector &Stack;
84 };
85 
86 static bool isLineComment(const FormatToken &FormatTok) {
87   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
88 }
89 
90 // Checks if \p FormatTok is a line comment that continues the line comment
91 // \p Previous. The original column of \p MinColumnToken is used to determine
92 // whether \p FormatTok is indented enough to the right to continue \p Previous.
93 static bool continuesLineComment(const FormatToken &FormatTok,
94                                  const FormatToken *Previous,
95                                  const FormatToken *MinColumnToken) {
96   if (!Previous || !MinColumnToken)
97     return false;
98   unsigned MinContinueColumn =
99       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
100   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
101          isLineComment(*Previous) &&
102          FormatTok.OriginalColumn >= MinContinueColumn;
103 }
104 
105 class ScopedMacroState : public FormatTokenSource {
106 public:
107   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
108                    FormatToken *&ResetToken)
109       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
110         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
111         Token(nullptr), PreviousToken(nullptr) {
112     FakeEOF.Tok.startToken();
113     FakeEOF.Tok.setKind(tok::eof);
114     TokenSource = this;
115     Line.Level = 0;
116     Line.InPPDirective = true;
117   }
118 
119   ~ScopedMacroState() override {
120     TokenSource = PreviousTokenSource;
121     ResetToken = Token;
122     Line.InPPDirective = false;
123     Line.Level = PreviousLineLevel;
124   }
125 
126   FormatToken *getNextToken() override {
127     // The \c UnwrappedLineParser guards against this by never calling
128     // \c getNextToken() after it has encountered the first eof token.
129     assert(!eof());
130     PreviousToken = Token;
131     Token = PreviousTokenSource->getNextToken();
132     if (eof())
133       return &FakeEOF;
134     return Token;
135   }
136 
137   FormatToken *getPreviousToken() override {
138     return PreviousTokenSource->getPreviousToken();
139   }
140 
141   FormatToken *peekNextToken() override {
142     if (eof())
143       return &FakeEOF;
144     return PreviousTokenSource->peekNextToken();
145   }
146 
147   FormatToken *peekNextToken(int N) override {
148     assert(N > 0);
149     if (eof())
150       return &FakeEOF;
151     return PreviousTokenSource->peekNextToken(N);
152   }
153 
154   bool isEOF() override { return PreviousTokenSource->isEOF(); }
155 
156   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
157 
158   FormatToken *setPosition(unsigned Position) override {
159     PreviousToken = nullptr;
160     Token = PreviousTokenSource->setPosition(Position);
161     return Token;
162   }
163 
164 private:
165   bool eof() {
166     return Token && Token->HasUnescapedNewline &&
167            !continuesLineComment(*Token, PreviousToken,
168                                  /*MinColumnToken=*/PreviousToken);
169   }
170 
171   FormatToken FakeEOF;
172   UnwrappedLine &Line;
173   FormatTokenSource *&TokenSource;
174   FormatToken *&ResetToken;
175   unsigned PreviousLineLevel;
176   FormatTokenSource *PreviousTokenSource;
177 
178   FormatToken *Token;
179   FormatToken *PreviousToken;
180 };
181 
182 } // end anonymous namespace
183 
184 class ScopedLineState {
185 public:
186   ScopedLineState(UnwrappedLineParser &Parser,
187                   bool SwitchToPreprocessorLines = false)
188       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
189     if (SwitchToPreprocessorLines)
190       Parser.CurrentLines = &Parser.PreprocessorDirectives;
191     else if (!Parser.Line->Tokens.empty())
192       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
193     PreBlockLine = std::move(Parser.Line);
194     Parser.Line = std::make_unique<UnwrappedLine>();
195     Parser.Line->Level = PreBlockLine->Level;
196     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
197   }
198 
199   ~ScopedLineState() {
200     if (!Parser.Line->Tokens.empty())
201       Parser.addUnwrappedLine();
202     assert(Parser.Line->Tokens.empty());
203     Parser.Line = std::move(PreBlockLine);
204     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
205       Parser.MustBreakBeforeNextToken = true;
206     Parser.CurrentLines = OriginalLines;
207   }
208 
209 private:
210   UnwrappedLineParser &Parser;
211 
212   std::unique_ptr<UnwrappedLine> PreBlockLine;
213   SmallVectorImpl<UnwrappedLine> *OriginalLines;
214 };
215 
216 class CompoundStatementIndenter {
217 public:
218   CompoundStatementIndenter(UnwrappedLineParser *Parser,
219                             const FormatStyle &Style, unsigned &LineLevel)
220       : CompoundStatementIndenter(Parser, LineLevel,
221                                   Style.BraceWrapping.AfterControlStatement,
222                                   Style.BraceWrapping.IndentBraces) {}
223   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
224                             bool WrapBrace, bool IndentBrace)
225       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
226     if (WrapBrace)
227       Parser->addUnwrappedLine();
228     if (IndentBrace)
229       ++LineLevel;
230   }
231   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
232 
233 private:
234   unsigned &LineLevel;
235   unsigned OldLineLevel;
236 };
237 
238 namespace {
239 
240 class IndexedTokenSource : public FormatTokenSource {
241 public:
242   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
243       : Tokens(Tokens), Position(-1) {}
244 
245   FormatToken *getNextToken() override {
246     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
247       LLVM_DEBUG({
248         llvm::dbgs() << "Next ";
249         dbgToken(Position);
250       });
251       return Tokens[Position];
252     }
253     ++Position;
254     LLVM_DEBUG({
255       llvm::dbgs() << "Next ";
256       dbgToken(Position);
257     });
258     return Tokens[Position];
259   }
260 
261   FormatToken *getPreviousToken() override {
262     return Position > 0 ? Tokens[Position - 1] : nullptr;
263   }
264 
265   FormatToken *peekNextToken() override {
266     int Next = Position + 1;
267     LLVM_DEBUG({
268       llvm::dbgs() << "Peeking ";
269       dbgToken(Next);
270     });
271     return Tokens[Next];
272   }
273 
274   FormatToken *peekNextToken(int N) override {
275     assert(N > 0);
276     int Next = Position + N;
277     LLVM_DEBUG({
278       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
279       dbgToken(Next);
280     });
281     return Tokens[Next];
282   }
283 
284   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
285 
286   unsigned getPosition() override {
287     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
288     assert(Position >= 0);
289     return Position;
290   }
291 
292   FormatToken *setPosition(unsigned P) override {
293     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
294     Position = P;
295     return Tokens[Position];
296   }
297 
298   void reset() { Position = -1; }
299 
300 private:
301   void dbgToken(int Position, llvm::StringRef Indent = "") {
302     FormatToken *Tok = Tokens[Position];
303     llvm::dbgs() << Indent << "[" << Position
304                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
305                  << ", Macro: " << !!Tok->MacroCtx << "\n";
306   }
307 
308   ArrayRef<FormatToken *> Tokens;
309   int Position;
310 };
311 
312 } // end anonymous namespace
313 
314 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
315                                          const AdditionalKeywords &Keywords,
316                                          unsigned FirstStartColumn,
317                                          ArrayRef<FormatToken *> Tokens,
318                                          UnwrappedLineConsumer &Callback)
319     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
320       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
321       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
322       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
323       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
324                        ? IG_Rejected
325                        : IG_Inited),
326       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
327 
328 void UnwrappedLineParser::reset() {
329   PPBranchLevel = -1;
330   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
331                      ? IG_Rejected
332                      : IG_Inited;
333   IncludeGuardToken = nullptr;
334   Line.reset(new UnwrappedLine);
335   CommentsBeforeNextToken.clear();
336   FormatTok = nullptr;
337   MustBreakBeforeNextToken = false;
338   PreprocessorDirectives.clear();
339   CurrentLines = &Lines;
340   DeclarationScopeStack.clear();
341   NestedTooDeep.clear();
342   PPStack.clear();
343   Line->FirstStartColumn = FirstStartColumn;
344 }
345 
346 void UnwrappedLineParser::parse() {
347   IndexedTokenSource TokenSource(AllTokens);
348   Line->FirstStartColumn = FirstStartColumn;
349   do {
350     LLVM_DEBUG(llvm::dbgs() << "----\n");
351     reset();
352     Tokens = &TokenSource;
353     TokenSource.reset();
354 
355     readToken();
356     parseFile();
357 
358     // If we found an include guard then all preprocessor directives (other than
359     // the guard) are over-indented by one.
360     if (IncludeGuard == IG_Found)
361       for (auto &Line : Lines)
362         if (Line.InPPDirective && Line.Level > 0)
363           --Line.Level;
364 
365     // Create line with eof token.
366     pushToken(FormatTok);
367     addUnwrappedLine();
368 
369     for (const UnwrappedLine &Line : Lines)
370       Callback.consumeUnwrappedLine(Line);
371 
372     Callback.finishRun();
373     Lines.clear();
374     while (!PPLevelBranchIndex.empty() &&
375            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
376       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
377       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
378     }
379     if (!PPLevelBranchIndex.empty()) {
380       ++PPLevelBranchIndex.back();
381       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
382       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
383     }
384   } while (!PPLevelBranchIndex.empty());
385 }
386 
387 void UnwrappedLineParser::parseFile() {
388   // The top-level context in a file always has declarations, except for pre-
389   // processor directives and JavaScript files.
390   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
391   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
392                                           MustBeDeclaration);
393   if (Style.Language == FormatStyle::LK_TextProto)
394     parseBracedList();
395   else
396     parseLevel(/*HasOpeningBrace=*/false, /*CanContainBracedList=*/true);
397   // Make sure to format the remaining tokens.
398   //
399   // LK_TextProto is special since its top-level is parsed as the body of a
400   // braced list, which does not necessarily have natural line separators such
401   // as a semicolon. Comments after the last entry that have been determined to
402   // not belong to that line, as in:
403   //   key: value
404   //   // endfile comment
405   // do not have a chance to be put on a line of their own until this point.
406   // Here we add this newline before end-of-file comments.
407   if (Style.Language == FormatStyle::LK_TextProto &&
408       !CommentsBeforeNextToken.empty())
409     addUnwrappedLine();
410   flushComments(true);
411   addUnwrappedLine();
412 }
413 
414 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
415   do {
416     switch (FormatTok->Tok.getKind()) {
417     case tok::l_brace:
418       return;
419     default:
420       if (FormatTok->is(Keywords.kw_where)) {
421         addUnwrappedLine();
422         nextToken();
423         parseCSharpGenericTypeConstraint();
424         break;
425       }
426       nextToken();
427       break;
428     }
429   } while (!eof());
430 }
431 
432 void UnwrappedLineParser::parseCSharpAttribute() {
433   int UnpairedSquareBrackets = 1;
434   do {
435     switch (FormatTok->Tok.getKind()) {
436     case tok::r_square:
437       nextToken();
438       --UnpairedSquareBrackets;
439       if (UnpairedSquareBrackets == 0) {
440         addUnwrappedLine();
441         return;
442       }
443       break;
444     case tok::l_square:
445       ++UnpairedSquareBrackets;
446       nextToken();
447       break;
448     default:
449       nextToken();
450       break;
451     }
452   } while (!eof());
453 }
454 
455 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
456   if (!Lines.empty() && Lines.back().InPPDirective)
457     return true;
458 
459   const FormatToken *Previous = Tokens->getPreviousToken();
460   return Previous && Previous->is(tok::comment) &&
461          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
462 }
463 /// \brief Parses a level, that is ???.
464 /// \param HasOpeningBrace If that level is started by an opening brace.
465 /// \param CanContainBracedList If the content can contain (at any level) a
466 /// braced list.
467 /// \param NextLBracesType The type for left brace found in this level.
468 /// \returns true if a simple block, or false otherwise. (A simple block has a
469 /// single statement.)
470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace,
471                                      bool CanContainBracedList,
472                                      IfStmtKind *IfKind,
473                                      TokenType NextLBracesType) {
474   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
475                                   ? TT_BracedListLBrace
476                                   : TT_Unknown;
477   const bool IsPrecededByCommentOrPPDirective =
478       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
479   bool HasLabel = false;
480   unsigned StatementCount = 0;
481   bool SwitchLabelEncountered = false;
482   do {
483     if (FormatTok->getType() == TT_AttributeMacro) {
484       nextToken();
485       continue;
486     }
487     tok::TokenKind kind = FormatTok->Tok.getKind();
488     if (FormatTok->getType() == TT_MacroBlockBegin)
489       kind = tok::l_brace;
490     else if (FormatTok->getType() == TT_MacroBlockEnd)
491       kind = tok::r_brace;
492 
493     auto ParseDefault = [this, HasOpeningBrace, IfKind, NextLevelLBracesType,
494                          &HasLabel, &StatementCount] {
495       parseStructuralElement(IfKind, !HasOpeningBrace, NextLevelLBracesType,
496                              HasLabel ? nullptr : &HasLabel);
497       ++StatementCount;
498       assert(StatementCount > 0 && "StatementCount overflow!");
499     };
500 
501     switch (kind) {
502     case tok::comment:
503       nextToken();
504       addUnwrappedLine();
505       break;
506     case tok::l_brace:
507       if (NextLBracesType != TT_Unknown)
508         FormatTok->setFinalizedType(NextLBracesType);
509       else if (FormatTok->Previous &&
510                FormatTok->Previous->ClosesRequiresClause) {
511         // We need the 'default' case here to correctly parse a function
512         // l_brace.
513         ParseDefault();
514         continue;
515       }
516       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
517           tryToParseBracedList())
518         continue;
519       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
520                  /*MunchSemi=*/true, /*UnindentWhitesmithBraces=*/false,
521                  CanContainBracedList,
522                  /*NextLBracesType=*/NextLBracesType);
523       ++StatementCount;
524       assert(StatementCount > 0 && "StatementCount overflow!");
525       addUnwrappedLine();
526       break;
527     case tok::r_brace:
528       if (HasOpeningBrace) {
529         if (!Style.RemoveBracesLLVM)
530           return false;
531         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
532             IsPrecededByCommentOrPPDirective ||
533             precededByCommentOrPPDirective())
534           return false;
535         const FormatToken *Next = Tokens->peekNextToken();
536         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
537       }
538       nextToken();
539       addUnwrappedLine();
540       break;
541     case tok::kw_default: {
542       unsigned StoredPosition = Tokens->getPosition();
543       FormatToken *Next;
544       do {
545         Next = Tokens->getNextToken();
546         assert(Next);
547       } while (Next->is(tok::comment));
548       FormatTok = Tokens->setPosition(StoredPosition);
549       if (Next->isNot(tok::colon)) {
550         // default not followed by ':' is not a case label; treat it like
551         // an identifier.
552         parseStructuralElement();
553         break;
554       }
555       // Else, if it is 'default:', fall through to the case handling.
556       LLVM_FALLTHROUGH;
557     }
558     case tok::kw_case:
559       if (Style.isJavaScript() && Line->MustBeDeclaration) {
560         // A 'case: string' style field declaration.
561         parseStructuralElement();
562         break;
563       }
564       if (!SwitchLabelEncountered &&
565           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
566         ++Line->Level;
567       SwitchLabelEncountered = true;
568       parseStructuralElement();
569       break;
570     case tok::l_square:
571       if (Style.isCSharp()) {
572         nextToken();
573         parseCSharpAttribute();
574         break;
575       }
576       if (handleCppAttributes())
577         break;
578       LLVM_FALLTHROUGH;
579     default:
580       ParseDefault();
581       break;
582     }
583   } while (!eof());
584   return false;
585 }
586 
587 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
588   // We'll parse forward through the tokens until we hit
589   // a closing brace or eof - note that getNextToken() will
590   // parse macros, so this will magically work inside macro
591   // definitions, too.
592   unsigned StoredPosition = Tokens->getPosition();
593   FormatToken *Tok = FormatTok;
594   const FormatToken *PrevTok = Tok->Previous;
595   // Keep a stack of positions of lbrace tokens. We will
596   // update information about whether an lbrace starts a
597   // braced init list or a different block during the loop.
598   SmallVector<FormatToken *, 8> LBraceStack;
599   assert(Tok->is(tok::l_brace));
600   do {
601     // Get next non-comment token.
602     FormatToken *NextTok;
603     unsigned ReadTokens = 0;
604     do {
605       NextTok = Tokens->getNextToken();
606       ++ReadTokens;
607     } while (NextTok->is(tok::comment));
608 
609     switch (Tok->Tok.getKind()) {
610     case tok::l_brace:
611       if (Style.isJavaScript() && PrevTok) {
612         if (PrevTok->isOneOf(tok::colon, tok::less))
613           // A ':' indicates this code is in a type, or a braced list
614           // following a label in an object literal ({a: {b: 1}}).
615           // A '<' could be an object used in a comparison, but that is nonsense
616           // code (can never return true), so more likely it is a generic type
617           // argument (`X<{a: string; b: number}>`).
618           // The code below could be confused by semicolons between the
619           // individual members in a type member list, which would normally
620           // trigger BK_Block. In both cases, this must be parsed as an inline
621           // braced init.
622           Tok->setBlockKind(BK_BracedInit);
623         else if (PrevTok->is(tok::r_paren))
624           // `) { }` can only occur in function or method declarations in JS.
625           Tok->setBlockKind(BK_Block);
626       } else {
627         Tok->setBlockKind(BK_Unknown);
628       }
629       LBraceStack.push_back(Tok);
630       break;
631     case tok::r_brace:
632       if (LBraceStack.empty())
633         break;
634       if (LBraceStack.back()->is(BK_Unknown)) {
635         bool ProbablyBracedList = false;
636         if (Style.Language == FormatStyle::LK_Proto) {
637           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
638         } else {
639           // Skip NextTok over preprocessor lines, otherwise we may not
640           // properly diagnose the block as a braced intializer
641           // if the comma separator appears after the pp directive.
642           while (NextTok->is(tok::hash)) {
643             ScopedMacroState MacroState(*Line, Tokens, NextTok);
644             do {
645               NextTok = Tokens->getNextToken();
646               ++ReadTokens;
647             } while (NextTok->isNot(tok::eof));
648           }
649 
650           // Using OriginalColumn to distinguish between ObjC methods and
651           // binary operators is a bit hacky.
652           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
653                                   NextTok->OriginalColumn == 0;
654 
655           // Try to detect a braced list. Note that regardless how we mark inner
656           // braces here, we will overwrite the BlockKind later if we parse a
657           // braced list (where all blocks inside are by default braced lists),
658           // or when we explicitly detect blocks (for example while parsing
659           // lambdas).
660 
661           // If we already marked the opening brace as braced list, the closing
662           // must also be part of it.
663           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
664 
665           ProbablyBracedList = ProbablyBracedList ||
666                                (Style.isJavaScript() &&
667                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
668                                                  Keywords.kw_as));
669           ProbablyBracedList = ProbablyBracedList ||
670                                (Style.isCpp() && NextTok->is(tok::l_paren));
671 
672           // If there is a comma, semicolon or right paren after the closing
673           // brace, we assume this is a braced initializer list.
674           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
675           // braced list in JS.
676           ProbablyBracedList =
677               ProbablyBracedList ||
678               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
679                                tok::r_paren, tok::r_square, tok::l_brace,
680                                tok::ellipsis);
681 
682           ProbablyBracedList =
683               ProbablyBracedList ||
684               (NextTok->is(tok::identifier) &&
685                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
686 
687           ProbablyBracedList = ProbablyBracedList ||
688                                (NextTok->is(tok::semi) &&
689                                 (!ExpectClassBody || LBraceStack.size() != 1));
690 
691           ProbablyBracedList =
692               ProbablyBracedList ||
693               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
694 
695           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
696             // We can have an array subscript after a braced init
697             // list, but C++11 attributes are expected after blocks.
698             NextTok = Tokens->getNextToken();
699             ++ReadTokens;
700             ProbablyBracedList = NextTok->isNot(tok::l_square);
701           }
702         }
703         if (ProbablyBracedList) {
704           Tok->setBlockKind(BK_BracedInit);
705           LBraceStack.back()->setBlockKind(BK_BracedInit);
706         } else {
707           Tok->setBlockKind(BK_Block);
708           LBraceStack.back()->setBlockKind(BK_Block);
709         }
710       }
711       LBraceStack.pop_back();
712       break;
713     case tok::identifier:
714       if (!Tok->is(TT_StatementMacro))
715         break;
716       LLVM_FALLTHROUGH;
717     case tok::at:
718     case tok::semi:
719     case tok::kw_if:
720     case tok::kw_while:
721     case tok::kw_for:
722     case tok::kw_switch:
723     case tok::kw_try:
724     case tok::kw___try:
725       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
726         LBraceStack.back()->setBlockKind(BK_Block);
727       break;
728     default:
729       break;
730     }
731     PrevTok = Tok;
732     Tok = NextTok;
733   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
734 
735   // Assume other blocks for all unclosed opening braces.
736   for (FormatToken *LBrace : LBraceStack)
737     if (LBrace->is(BK_Unknown))
738       LBrace->setBlockKind(BK_Block);
739 
740   FormatTok = Tokens->setPosition(StoredPosition);
741 }
742 
743 template <class T>
744 static inline void hash_combine(std::size_t &seed, const T &v) {
745   std::hash<T> hasher;
746   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
747 }
748 
749 size_t UnwrappedLineParser::computePPHash() const {
750   size_t h = 0;
751   for (const auto &i : PPStack) {
752     hash_combine(h, size_t(i.Kind));
753     hash_combine(h, i.Line);
754   }
755   return h;
756 }
757 
758 UnwrappedLineParser::IfStmtKind
759 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
760                                 bool MunchSemi, bool UnindentWhitesmithsBraces,
761                                 bool CanContainBracedList,
762                                 TokenType NextLBracesType) {
763   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
764          "'{' or macro block token expected");
765   FormatToken *Tok = FormatTok;
766   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
767   FormatTok->setBlockKind(BK_Block);
768 
769   // For Whitesmiths mode, jump to the next level prior to skipping over the
770   // braces.
771   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
772     ++Line->Level;
773 
774   size_t PPStartHash = computePPHash();
775 
776   unsigned InitialLevel = Line->Level;
777   nextToken(/*LevelDifference=*/AddLevels);
778 
779   if (MacroBlock && FormatTok->is(tok::l_paren))
780     parseParens();
781 
782   size_t NbPreprocessorDirectives =
783       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
784   addUnwrappedLine();
785   size_t OpeningLineIndex =
786       CurrentLines->empty()
787           ? (UnwrappedLine::kInvalidIndex)
788           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
789 
790   // Whitesmiths is weird here. The brace needs to be indented for the namespace
791   // block, but the block itself may not be indented depending on the style
792   // settings. This allows the format to back up one level in those cases.
793   if (UnindentWhitesmithsBraces)
794     --Line->Level;
795 
796   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
797                                           MustBeDeclaration);
798   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
799     Line->Level += AddLevels;
800 
801   IfStmtKind IfKind = IfStmtKind::NotIf;
802   const bool SimpleBlock = parseLevel(
803       /*HasOpeningBrace=*/true, CanContainBracedList, &IfKind, NextLBracesType);
804 
805   if (eof())
806     return IfKind;
807 
808   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
809                  : !FormatTok->is(tok::r_brace)) {
810     Line->Level = InitialLevel;
811     FormatTok->setBlockKind(BK_Block);
812     return IfKind;
813   }
814 
815   if (SimpleBlock && Tok->is(tok::l_brace)) {
816     assert(FormatTok->is(tok::r_brace));
817     const FormatToken *Previous = Tokens->getPreviousToken();
818     assert(Previous);
819     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
820       Tok->MatchingParen = FormatTok;
821       FormatTok->MatchingParen = Tok;
822     }
823   }
824 
825   size_t PPEndHash = computePPHash();
826 
827   // Munch the closing brace.
828   nextToken(/*LevelDifference=*/-AddLevels);
829 
830   if (MacroBlock && FormatTok->is(tok::l_paren))
831     parseParens();
832 
833   if (FormatTok->is(tok::kw_noexcept)) {
834     // A noexcept in a requires expression.
835     nextToken();
836   }
837 
838   if (FormatTok->is(tok::arrow)) {
839     // Following the } or noexcept we can find a trailing return type arrow
840     // as part of an implicit conversion constraint.
841     nextToken();
842     parseStructuralElement();
843   }
844 
845   if (MunchSemi && FormatTok->is(tok::semi))
846     nextToken();
847 
848   Line->Level = InitialLevel;
849 
850   if (PPStartHash == PPEndHash) {
851     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
852     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
853       // Update the opening line to add the forward reference as well
854       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
855           CurrentLines->size() - 1;
856     }
857   }
858 
859   return IfKind;
860 }
861 
862 static bool isGoogScope(const UnwrappedLine &Line) {
863   // FIXME: Closure-library specific stuff should not be hard-coded but be
864   // configurable.
865   if (Line.Tokens.size() < 4)
866     return false;
867   auto I = Line.Tokens.begin();
868   if (I->Tok->TokenText != "goog")
869     return false;
870   ++I;
871   if (I->Tok->isNot(tok::period))
872     return false;
873   ++I;
874   if (I->Tok->TokenText != "scope")
875     return false;
876   ++I;
877   return I->Tok->is(tok::l_paren);
878 }
879 
880 static bool isIIFE(const UnwrappedLine &Line,
881                    const AdditionalKeywords &Keywords) {
882   // Look for the start of an immediately invoked anonymous function.
883   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
884   // This is commonly done in JavaScript to create a new, anonymous scope.
885   // Example: (function() { ... })()
886   if (Line.Tokens.size() < 3)
887     return false;
888   auto I = Line.Tokens.begin();
889   if (I->Tok->isNot(tok::l_paren))
890     return false;
891   ++I;
892   if (I->Tok->isNot(Keywords.kw_function))
893     return false;
894   ++I;
895   return I->Tok->is(tok::l_paren);
896 }
897 
898 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
899                                    const FormatToken &InitialToken) {
900   tok::TokenKind Kind = InitialToken.Tok.getKind();
901   if (InitialToken.is(TT_NamespaceMacro))
902     Kind = tok::kw_namespace;
903 
904   switch (Kind) {
905   case tok::kw_namespace:
906     return Style.BraceWrapping.AfterNamespace;
907   case tok::kw_class:
908     return Style.BraceWrapping.AfterClass;
909   case tok::kw_union:
910     return Style.BraceWrapping.AfterUnion;
911   case tok::kw_struct:
912     return Style.BraceWrapping.AfterStruct;
913   case tok::kw_enum:
914     return Style.BraceWrapping.AfterEnum;
915   default:
916     return false;
917   }
918 }
919 
920 void UnwrappedLineParser::parseChildBlock(
921     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
922   FormatTok->setBlockKind(BK_Block);
923   nextToken();
924   {
925     bool SkipIndent = (Style.isJavaScript() &&
926                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
927     ScopedLineState LineState(*this);
928     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
929                                             /*MustBeDeclaration=*/false);
930     Line->Level += SkipIndent ? 0 : 1;
931     parseLevel(/*HasOpeningBrace=*/true, CanContainBracedList,
932                /*IfKind=*/nullptr, NextLBracesType);
933     flushComments(isOnNewLine(*FormatTok));
934     Line->Level -= SkipIndent ? 0 : 1;
935   }
936   nextToken();
937 }
938 
939 void UnwrappedLineParser::parsePPDirective() {
940   assert(FormatTok->is(tok::hash) && "'#' expected");
941   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
942 
943   nextToken();
944 
945   if (!FormatTok->Tok.getIdentifierInfo()) {
946     parsePPUnknown();
947     return;
948   }
949 
950   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
951   case tok::pp_define:
952     parsePPDefine();
953     return;
954   case tok::pp_if:
955     parsePPIf(/*IfDef=*/false);
956     break;
957   case tok::pp_ifdef:
958   case tok::pp_ifndef:
959     parsePPIf(/*IfDef=*/true);
960     break;
961   case tok::pp_else:
962     parsePPElse();
963     break;
964   case tok::pp_elifdef:
965   case tok::pp_elifndef:
966   case tok::pp_elif:
967     parsePPElIf();
968     break;
969   case tok::pp_endif:
970     parsePPEndIf();
971     break;
972   default:
973     parsePPUnknown();
974     break;
975   }
976 }
977 
978 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
979   size_t Line = CurrentLines->size();
980   if (CurrentLines == &PreprocessorDirectives)
981     Line += Lines.size();
982 
983   if (Unreachable ||
984       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
985     PPStack.push_back({PP_Unreachable, Line});
986   else
987     PPStack.push_back({PP_Conditional, Line});
988 }
989 
990 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
991   ++PPBranchLevel;
992   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
993   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
994     PPLevelBranchIndex.push_back(0);
995     PPLevelBranchCount.push_back(0);
996   }
997   PPChainBranchIndex.push(0);
998   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
999   conditionalCompilationCondition(Unreachable || Skip);
1000 }
1001 
1002 void UnwrappedLineParser::conditionalCompilationAlternative() {
1003   if (!PPStack.empty())
1004     PPStack.pop_back();
1005   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1006   if (!PPChainBranchIndex.empty())
1007     ++PPChainBranchIndex.top();
1008   conditionalCompilationCondition(
1009       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1010       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1011 }
1012 
1013 void UnwrappedLineParser::conditionalCompilationEnd() {
1014   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1015   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1016     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1017       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1018   }
1019   // Guard against #endif's without #if.
1020   if (PPBranchLevel > -1)
1021     --PPBranchLevel;
1022   if (!PPChainBranchIndex.empty())
1023     PPChainBranchIndex.pop();
1024   if (!PPStack.empty())
1025     PPStack.pop_back();
1026 }
1027 
1028 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1029   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1030   nextToken();
1031   bool Unreachable = false;
1032   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1033     Unreachable = true;
1034   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1035     Unreachable = true;
1036   conditionalCompilationStart(Unreachable);
1037   FormatToken *IfCondition = FormatTok;
1038   // If there's a #ifndef on the first line, and the only lines before it are
1039   // comments, it could be an include guard.
1040   bool MaybeIncludeGuard = IfNDef;
1041   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
1042     for (auto &Line : Lines) {
1043       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1044         MaybeIncludeGuard = false;
1045         IncludeGuard = IG_Rejected;
1046         break;
1047       }
1048     }
1049   --PPBranchLevel;
1050   parsePPUnknown();
1051   ++PPBranchLevel;
1052   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1053     IncludeGuard = IG_IfNdefed;
1054     IncludeGuardToken = IfCondition;
1055   }
1056 }
1057 
1058 void UnwrappedLineParser::parsePPElse() {
1059   // If a potential include guard has an #else, it's not an include guard.
1060   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1061     IncludeGuard = IG_Rejected;
1062   conditionalCompilationAlternative();
1063   if (PPBranchLevel > -1)
1064     --PPBranchLevel;
1065   parsePPUnknown();
1066   ++PPBranchLevel;
1067 }
1068 
1069 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1070 
1071 void UnwrappedLineParser::parsePPEndIf() {
1072   conditionalCompilationEnd();
1073   parsePPUnknown();
1074   // If the #endif of a potential include guard is the last thing in the file,
1075   // then we found an include guard.
1076   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1077       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1078     IncludeGuard = IG_Found;
1079 }
1080 
1081 void UnwrappedLineParser::parsePPDefine() {
1082   nextToken();
1083 
1084   if (!FormatTok->Tok.getIdentifierInfo()) {
1085     IncludeGuard = IG_Rejected;
1086     IncludeGuardToken = nullptr;
1087     parsePPUnknown();
1088     return;
1089   }
1090 
1091   if (IncludeGuard == IG_IfNdefed &&
1092       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1093     IncludeGuard = IG_Defined;
1094     IncludeGuardToken = nullptr;
1095     for (auto &Line : Lines) {
1096       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1097         IncludeGuard = IG_Rejected;
1098         break;
1099       }
1100     }
1101   }
1102 
1103   // In the context of a define, even keywords should be treated as normal
1104   // identifiers. Setting the kind to identifier is not enough, because we need
1105   // to treat additional keywords like __except as well, which are already
1106   // identifiers. Setting the identifier info to null interferes with include
1107   // guard processing above, and changes preprocessing nesting.
1108   FormatTok->Tok.setKind(tok::identifier);
1109   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1110   nextToken();
1111   if (FormatTok->Tok.getKind() == tok::l_paren &&
1112       !FormatTok->hasWhitespaceBefore())
1113     parseParens();
1114   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1115     Line->Level += PPBranchLevel + 1;
1116   addUnwrappedLine();
1117   ++Line->Level;
1118 
1119   // Errors during a preprocessor directive can only affect the layout of the
1120   // preprocessor directive, and thus we ignore them. An alternative approach
1121   // would be to use the same approach we use on the file level (no
1122   // re-indentation if there was a structural error) within the macro
1123   // definition.
1124   parseFile();
1125 }
1126 
1127 void UnwrappedLineParser::parsePPUnknown() {
1128   do {
1129     nextToken();
1130   } while (!eof());
1131   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1132     Line->Level += PPBranchLevel + 1;
1133   addUnwrappedLine();
1134 }
1135 
1136 // Here we exclude certain tokens that are not usually the first token in an
1137 // unwrapped line. This is used in attempt to distinguish macro calls without
1138 // trailing semicolons from other constructs split to several lines.
1139 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1140   // Semicolon can be a null-statement, l_square can be a start of a macro or
1141   // a C++11 attribute, but this doesn't seem to be common.
1142   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1143          Tok.isNot(TT_AttributeSquare) &&
1144          // Tokens that can only be used as binary operators and a part of
1145          // overloaded operator names.
1146          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1147          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1148          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1149          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1150          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1151          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1152          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1153          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1154          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1155          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1156          Tok.isNot(tok::lesslessequal) &&
1157          // Colon is used in labels, base class lists, initializer lists,
1158          // range-based for loops, ternary operator, but should never be the
1159          // first token in an unwrapped line.
1160          Tok.isNot(tok::colon) &&
1161          // 'noexcept' is a trailing annotation.
1162          Tok.isNot(tok::kw_noexcept);
1163 }
1164 
1165 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1166                           const FormatToken *FormatTok) {
1167   // FIXME: This returns true for C/C++ keywords like 'struct'.
1168   return FormatTok->is(tok::identifier) &&
1169          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1170           !FormatTok->isOneOf(
1171               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1172               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1173               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1174               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1175               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1176               Keywords.kw_instanceof, Keywords.kw_interface,
1177               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1178 }
1179 
1180 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1181                                  const FormatToken *FormatTok) {
1182   return FormatTok->Tok.isLiteral() ||
1183          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1184          mustBeJSIdent(Keywords, FormatTok);
1185 }
1186 
1187 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1188 // when encountered after a value (see mustBeJSIdentOrValue).
1189 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1190                            const FormatToken *FormatTok) {
1191   return FormatTok->isOneOf(
1192       tok::kw_return, Keywords.kw_yield,
1193       // conditionals
1194       tok::kw_if, tok::kw_else,
1195       // loops
1196       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1197       // switch/case
1198       tok::kw_switch, tok::kw_case,
1199       // exceptions
1200       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1201       // declaration
1202       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1203       Keywords.kw_async, Keywords.kw_function,
1204       // import/export
1205       Keywords.kw_import, tok::kw_export);
1206 }
1207 
1208 // Checks whether a token is a type in K&R C (aka C78).
1209 static bool isC78Type(const FormatToken &Tok) {
1210   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1211                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1212                      tok::identifier);
1213 }
1214 
1215 // This function checks whether a token starts the first parameter declaration
1216 // in a K&R C (aka C78) function definition, e.g.:
1217 //   int f(a, b)
1218 //   short a, b;
1219 //   {
1220 //      return a + b;
1221 //   }
1222 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1223                                const FormatToken *FuncName) {
1224   assert(Tok);
1225   assert(Next);
1226   assert(FuncName);
1227 
1228   if (FuncName->isNot(tok::identifier))
1229     return false;
1230 
1231   const FormatToken *Prev = FuncName->Previous;
1232   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1233     return false;
1234 
1235   if (!isC78Type(*Tok) &&
1236       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1237     return false;
1238 
1239   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1240     return false;
1241 
1242   Tok = Tok->Previous;
1243   if (!Tok || Tok->isNot(tok::r_paren))
1244     return false;
1245 
1246   Tok = Tok->Previous;
1247   if (!Tok || Tok->isNot(tok::identifier))
1248     return false;
1249 
1250   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1251 }
1252 
1253 void UnwrappedLineParser::parseModuleImport() {
1254   nextToken();
1255   while (!eof()) {
1256     if (FormatTok->is(tok::colon)) {
1257       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1258     }
1259     // Handle import <foo/bar.h> as we would an include statement.
1260     else if (FormatTok->is(tok::less)) {
1261       nextToken();
1262       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1263         // Mark tokens up to the trailing line comments as implicit string
1264         // literals.
1265         if (FormatTok->isNot(tok::comment) &&
1266             !FormatTok->TokenText.startswith("//"))
1267           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1268         nextToken();
1269       }
1270     }
1271     if (FormatTok->is(tok::semi)) {
1272       nextToken();
1273       break;
1274     }
1275     nextToken();
1276   }
1277 
1278   addUnwrappedLine();
1279 }
1280 
1281 // readTokenWithJavaScriptASI reads the next token and terminates the current
1282 // line if JavaScript Automatic Semicolon Insertion must
1283 // happen between the current token and the next token.
1284 //
1285 // This method is conservative - it cannot cover all edge cases of JavaScript,
1286 // but only aims to correctly handle certain well known cases. It *must not*
1287 // return true in speculative cases.
1288 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1289   FormatToken *Previous = FormatTok;
1290   readToken();
1291   FormatToken *Next = FormatTok;
1292 
1293   bool IsOnSameLine =
1294       CommentsBeforeNextToken.empty()
1295           ? Next->NewlinesBefore == 0
1296           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1297   if (IsOnSameLine)
1298     return;
1299 
1300   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1301   bool PreviousStartsTemplateExpr =
1302       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1303   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1304     // If the line contains an '@' sign, the previous token might be an
1305     // annotation, which can precede another identifier/value.
1306     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1307       return LineNode.Tok->is(tok::at);
1308     });
1309     if (HasAt)
1310       return;
1311   }
1312   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1313     return addUnwrappedLine();
1314   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1315   bool NextEndsTemplateExpr =
1316       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1317   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1318       (PreviousMustBeValue ||
1319        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1320                          tok::minusminus)))
1321     return addUnwrappedLine();
1322   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1323       isJSDeclOrStmt(Keywords, Next))
1324     return addUnwrappedLine();
1325 }
1326 
1327 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1328                                                  bool IsTopLevel,
1329                                                  TokenType NextLBracesType,
1330                                                  bool *HasLabel) {
1331   if (Style.Language == FormatStyle::LK_TableGen &&
1332       FormatTok->is(tok::pp_include)) {
1333     nextToken();
1334     if (FormatTok->is(tok::string_literal))
1335       nextToken();
1336     addUnwrappedLine();
1337     return;
1338   }
1339   switch (FormatTok->Tok.getKind()) {
1340   case tok::kw_asm:
1341     nextToken();
1342     if (FormatTok->is(tok::l_brace)) {
1343       FormatTok->setFinalizedType(TT_InlineASMBrace);
1344       nextToken();
1345       while (FormatTok && FormatTok->isNot(tok::eof)) {
1346         if (FormatTok->is(tok::r_brace)) {
1347           FormatTok->setFinalizedType(TT_InlineASMBrace);
1348           nextToken();
1349           addUnwrappedLine();
1350           break;
1351         }
1352         FormatTok->Finalized = true;
1353         nextToken();
1354       }
1355     }
1356     break;
1357   case tok::kw_namespace:
1358     parseNamespace();
1359     return;
1360   case tok::kw_public:
1361   case tok::kw_protected:
1362   case tok::kw_private:
1363     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1364         Style.isCSharp())
1365       nextToken();
1366     else
1367       parseAccessSpecifier();
1368     return;
1369   case tok::kw_if:
1370     if (Style.isJavaScript() && Line->MustBeDeclaration)
1371       // field/method declaration.
1372       break;
1373     parseIfThenElse(IfKind);
1374     return;
1375   case tok::kw_for:
1376   case tok::kw_while:
1377     if (Style.isJavaScript() && Line->MustBeDeclaration)
1378       // field/method declaration.
1379       break;
1380     parseForOrWhileLoop();
1381     return;
1382   case tok::kw_do:
1383     if (Style.isJavaScript() && Line->MustBeDeclaration)
1384       // field/method declaration.
1385       break;
1386     parseDoWhile();
1387     return;
1388   case tok::kw_switch:
1389     if (Style.isJavaScript() && Line->MustBeDeclaration)
1390       // 'switch: string' field declaration.
1391       break;
1392     parseSwitch();
1393     return;
1394   case tok::kw_default:
1395     if (Style.isJavaScript() && Line->MustBeDeclaration)
1396       // 'default: string' field declaration.
1397       break;
1398     nextToken();
1399     if (FormatTok->is(tok::colon)) {
1400       parseLabel();
1401       return;
1402     }
1403     // e.g. "default void f() {}" in a Java interface.
1404     break;
1405   case tok::kw_case:
1406     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1407       // 'case: string' field declaration.
1408       nextToken();
1409       break;
1410     }
1411     parseCaseLabel();
1412     return;
1413   case tok::kw_try:
1414   case tok::kw___try:
1415     if (Style.isJavaScript() && Line->MustBeDeclaration)
1416       // field/method declaration.
1417       break;
1418     parseTryCatch();
1419     return;
1420   case tok::kw_extern:
1421     nextToken();
1422     if (FormatTok->is(tok::string_literal)) {
1423       nextToken();
1424       if (FormatTok->is(tok::l_brace)) {
1425         if (Style.BraceWrapping.AfterExternBlock)
1426           addUnwrappedLine();
1427         // Either we indent or for backwards compatibility we follow the
1428         // AfterExternBlock style.
1429         unsigned AddLevels =
1430             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1431                     (Style.BraceWrapping.AfterExternBlock &&
1432                      Style.IndentExternBlock ==
1433                          FormatStyle::IEBS_AfterExternBlock)
1434                 ? 1u
1435                 : 0u;
1436         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1437         addUnwrappedLine();
1438         return;
1439       }
1440     }
1441     break;
1442   case tok::kw_export:
1443     if (Style.isJavaScript()) {
1444       parseJavaScriptEs6ImportExport();
1445       return;
1446     }
1447     if (!Style.isCpp())
1448       break;
1449     // Handle C++ "(inline|export) namespace".
1450     LLVM_FALLTHROUGH;
1451   case tok::kw_inline:
1452     nextToken();
1453     if (FormatTok->is(tok::kw_namespace)) {
1454       parseNamespace();
1455       return;
1456     }
1457     break;
1458   case tok::identifier:
1459     if (FormatTok->is(TT_ForEachMacro)) {
1460       parseForOrWhileLoop();
1461       return;
1462     }
1463     if (FormatTok->is(TT_MacroBlockBegin)) {
1464       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1465                  /*MunchSemi=*/false);
1466       return;
1467     }
1468     if (FormatTok->is(Keywords.kw_import)) {
1469       if (Style.isJavaScript()) {
1470         parseJavaScriptEs6ImportExport();
1471         return;
1472       }
1473       if (Style.Language == FormatStyle::LK_Proto) {
1474         nextToken();
1475         if (FormatTok->is(tok::kw_public))
1476           nextToken();
1477         if (!FormatTok->is(tok::string_literal))
1478           return;
1479         nextToken();
1480         if (FormatTok->is(tok::semi))
1481           nextToken();
1482         addUnwrappedLine();
1483         return;
1484       }
1485       if (Style.isCpp()) {
1486         parseModuleImport();
1487         return;
1488       }
1489     }
1490     if (Style.isCpp() &&
1491         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1492                            Keywords.kw_slots, Keywords.kw_qslots)) {
1493       nextToken();
1494       if (FormatTok->is(tok::colon)) {
1495         nextToken();
1496         addUnwrappedLine();
1497         return;
1498       }
1499     }
1500     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1501       parseStatementMacro();
1502       return;
1503     }
1504     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1505       parseNamespace();
1506       return;
1507     }
1508     // In all other cases, parse the declaration.
1509     break;
1510   default:
1511     break;
1512   }
1513   do {
1514     const FormatToken *Previous = FormatTok->Previous;
1515     switch (FormatTok->Tok.getKind()) {
1516     case tok::at:
1517       nextToken();
1518       if (FormatTok->is(tok::l_brace)) {
1519         nextToken();
1520         parseBracedList();
1521         break;
1522       } else if (Style.Language == FormatStyle::LK_Java &&
1523                  FormatTok->is(Keywords.kw_interface)) {
1524         nextToken();
1525         break;
1526       }
1527       switch (FormatTok->Tok.getObjCKeywordID()) {
1528       case tok::objc_public:
1529       case tok::objc_protected:
1530       case tok::objc_package:
1531       case tok::objc_private:
1532         return parseAccessSpecifier();
1533       case tok::objc_interface:
1534       case tok::objc_implementation:
1535         return parseObjCInterfaceOrImplementation();
1536       case tok::objc_protocol:
1537         if (parseObjCProtocol())
1538           return;
1539         break;
1540       case tok::objc_end:
1541         return; // Handled by the caller.
1542       case tok::objc_optional:
1543       case tok::objc_required:
1544         nextToken();
1545         addUnwrappedLine();
1546         return;
1547       case tok::objc_autoreleasepool:
1548         nextToken();
1549         if (FormatTok->is(tok::l_brace)) {
1550           if (Style.BraceWrapping.AfterControlStatement ==
1551               FormatStyle::BWACS_Always)
1552             addUnwrappedLine();
1553           parseBlock();
1554         }
1555         addUnwrappedLine();
1556         return;
1557       case tok::objc_synchronized:
1558         nextToken();
1559         if (FormatTok->is(tok::l_paren))
1560           // Skip synchronization object
1561           parseParens();
1562         if (FormatTok->is(tok::l_brace)) {
1563           if (Style.BraceWrapping.AfterControlStatement ==
1564               FormatStyle::BWACS_Always)
1565             addUnwrappedLine();
1566           parseBlock();
1567         }
1568         addUnwrappedLine();
1569         return;
1570       case tok::objc_try:
1571         // This branch isn't strictly necessary (the kw_try case below would
1572         // do this too after the tok::at is parsed above).  But be explicit.
1573         parseTryCatch();
1574         return;
1575       default:
1576         break;
1577       }
1578       break;
1579     case tok::kw_concept:
1580       parseConcept();
1581       return;
1582     case tok::kw_requires: {
1583       if (Style.isCpp()) {
1584         bool ParsedClause = parseRequires();
1585         if (ParsedClause)
1586           return;
1587       } else {
1588         nextToken();
1589       }
1590       break;
1591     }
1592     case tok::kw_enum:
1593       // Ignore if this is part of "template <enum ...".
1594       if (Previous && Previous->is(tok::less)) {
1595         nextToken();
1596         break;
1597       }
1598 
1599       // parseEnum falls through and does not yet add an unwrapped line as an
1600       // enum definition can start a structural element.
1601       if (!parseEnum())
1602         break;
1603       // This only applies for C++.
1604       if (!Style.isCpp()) {
1605         addUnwrappedLine();
1606         return;
1607       }
1608       break;
1609     case tok::kw_typedef:
1610       nextToken();
1611       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1612                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1613                              Keywords.kw_CF_CLOSED_ENUM,
1614                              Keywords.kw_NS_CLOSED_ENUM))
1615         parseEnum();
1616       break;
1617     case tok::kw_struct:
1618     case tok::kw_union:
1619     case tok::kw_class:
1620       if (parseStructLike())
1621         return;
1622       break;
1623     case tok::period:
1624       nextToken();
1625       // In Java, classes have an implicit static member "class".
1626       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1627           FormatTok->is(tok::kw_class))
1628         nextToken();
1629       if (Style.isJavaScript() && FormatTok &&
1630           FormatTok->Tok.getIdentifierInfo())
1631         // JavaScript only has pseudo keywords, all keywords are allowed to
1632         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1633         nextToken();
1634       break;
1635     case tok::semi:
1636       nextToken();
1637       addUnwrappedLine();
1638       return;
1639     case tok::r_brace:
1640       addUnwrappedLine();
1641       return;
1642     case tok::l_paren: {
1643       parseParens();
1644       // Break the unwrapped line if a K&R C function definition has a parameter
1645       // declaration.
1646       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1647         break;
1648       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1649         addUnwrappedLine();
1650         return;
1651       }
1652       break;
1653     }
1654     case tok::kw_operator:
1655       nextToken();
1656       if (FormatTok->isBinaryOperator())
1657         nextToken();
1658       break;
1659     case tok::caret:
1660       nextToken();
1661       if (FormatTok->Tok.isAnyIdentifier() ||
1662           FormatTok->isSimpleTypeSpecifier())
1663         nextToken();
1664       if (FormatTok->is(tok::l_paren))
1665         parseParens();
1666       if (FormatTok->is(tok::l_brace))
1667         parseChildBlock();
1668       break;
1669     case tok::l_brace:
1670       if (NextLBracesType != TT_Unknown)
1671         FormatTok->setFinalizedType(NextLBracesType);
1672       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1673         // A block outside of parentheses must be the last part of a
1674         // structural element.
1675         // FIXME: Figure out cases where this is not true, and add projections
1676         // for them (the one we know is missing are lambdas).
1677         if (Style.Language == FormatStyle::LK_Java &&
1678             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1679           // If necessary, we could set the type to something different than
1680           // TT_FunctionLBrace.
1681           if (Style.BraceWrapping.AfterControlStatement ==
1682               FormatStyle::BWACS_Always)
1683             addUnwrappedLine();
1684         } else if (Style.BraceWrapping.AfterFunction) {
1685           addUnwrappedLine();
1686         }
1687         if (!Line->InPPDirective)
1688           FormatTok->setFinalizedType(TT_FunctionLBrace);
1689         parseBlock();
1690         addUnwrappedLine();
1691         return;
1692       }
1693       // Otherwise this was a braced init list, and the structural
1694       // element continues.
1695       break;
1696     case tok::kw_try:
1697       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1698         // field/method declaration.
1699         nextToken();
1700         break;
1701       }
1702       // We arrive here when parsing function-try blocks.
1703       if (Style.BraceWrapping.AfterFunction)
1704         addUnwrappedLine();
1705       parseTryCatch();
1706       return;
1707     case tok::identifier: {
1708       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1709           Line->MustBeDeclaration) {
1710         addUnwrappedLine();
1711         parseCSharpGenericTypeConstraint();
1712         break;
1713       }
1714       if (FormatTok->is(TT_MacroBlockEnd)) {
1715         addUnwrappedLine();
1716         return;
1717       }
1718 
1719       // Function declarations (as opposed to function expressions) are parsed
1720       // on their own unwrapped line by continuing this loop. Function
1721       // expressions (functions that are not on their own line) must not create
1722       // a new unwrapped line, so they are special cased below.
1723       size_t TokenCount = Line->Tokens.size();
1724       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1725           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1726                                                      Keywords.kw_async)))) {
1727         tryToParseJSFunction();
1728         break;
1729       }
1730       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1731           FormatTok->is(Keywords.kw_interface)) {
1732         if (Style.isJavaScript()) {
1733           // In JavaScript/TypeScript, "interface" can be used as a standalone
1734           // identifier, e.g. in `var interface = 1;`. If "interface" is
1735           // followed by another identifier, it is very like to be an actual
1736           // interface declaration.
1737           unsigned StoredPosition = Tokens->getPosition();
1738           FormatToken *Next = Tokens->getNextToken();
1739           FormatTok = Tokens->setPosition(StoredPosition);
1740           if (!mustBeJSIdent(Keywords, Next)) {
1741             nextToken();
1742             break;
1743           }
1744         }
1745         parseRecord();
1746         addUnwrappedLine();
1747         return;
1748       }
1749 
1750       if (FormatTok->is(Keywords.kw_interface)) {
1751         if (parseStructLike())
1752           return;
1753         break;
1754       }
1755 
1756       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1757         parseStatementMacro();
1758         return;
1759       }
1760 
1761       // See if the following token should start a new unwrapped line.
1762       StringRef Text = FormatTok->TokenText;
1763 
1764       FormatToken *PreviousToken = FormatTok;
1765       nextToken();
1766 
1767       // JS doesn't have macros, and within classes colons indicate fields, not
1768       // labels.
1769       if (Style.isJavaScript())
1770         break;
1771 
1772       TokenCount = Line->Tokens.size();
1773       if (TokenCount == 1 ||
1774           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1775         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1776           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1777           parseLabel(!Style.IndentGotoLabels);
1778           if (HasLabel)
1779             *HasLabel = true;
1780           return;
1781         }
1782         // Recognize function-like macro usages without trailing semicolon as
1783         // well as free-standing macros like Q_OBJECT.
1784         bool FunctionLike = FormatTok->is(tok::l_paren);
1785         if (FunctionLike)
1786           parseParens();
1787 
1788         bool FollowedByNewline =
1789             CommentsBeforeNextToken.empty()
1790                 ? FormatTok->NewlinesBefore > 0
1791                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1792 
1793         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1794             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1795           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1796           addUnwrappedLine();
1797           return;
1798         }
1799       }
1800       break;
1801     }
1802     case tok::equal:
1803       if ((Style.isJavaScript() || Style.isCSharp()) &&
1804           FormatTok->is(TT_FatArrow)) {
1805         tryToParseChildBlock();
1806         break;
1807       }
1808 
1809       nextToken();
1810       if (FormatTok->is(tok::l_brace)) {
1811         // Block kind should probably be set to BK_BracedInit for any language.
1812         // C# needs this change to ensure that array initialisers and object
1813         // initialisers are indented the same way.
1814         if (Style.isCSharp())
1815           FormatTok->setBlockKind(BK_BracedInit);
1816         nextToken();
1817         parseBracedList();
1818       } else if (Style.Language == FormatStyle::LK_Proto &&
1819                  FormatTok->is(tok::less)) {
1820         nextToken();
1821         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1822                         /*ClosingBraceKind=*/tok::greater);
1823       }
1824       break;
1825     case tok::l_square:
1826       parseSquare();
1827       break;
1828     case tok::kw_new:
1829       parseNew();
1830       break;
1831     case tok::kw_case:
1832       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1833         // 'case: string' field declaration.
1834         nextToken();
1835         break;
1836       }
1837       parseCaseLabel();
1838       break;
1839     default:
1840       nextToken();
1841       break;
1842     }
1843   } while (!eof());
1844 }
1845 
1846 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1847   assert(FormatTok->is(tok::l_brace));
1848   if (!Style.isCSharp())
1849     return false;
1850   // See if it's a property accessor.
1851   if (FormatTok->Previous->isNot(tok::identifier))
1852     return false;
1853 
1854   // See if we are inside a property accessor.
1855   //
1856   // Record the current tokenPosition so that we can advance and
1857   // reset the current token. `Next` is not set yet so we need
1858   // another way to advance along the token stream.
1859   unsigned int StoredPosition = Tokens->getPosition();
1860   FormatToken *Tok = Tokens->getNextToken();
1861 
1862   // A trivial property accessor is of the form:
1863   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1864   // Track these as they do not require line breaks to be introduced.
1865   bool HasSpecialAccessor = false;
1866   bool IsTrivialPropertyAccessor = true;
1867   while (!eof()) {
1868     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1869                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1870                      Keywords.kw_init, Keywords.kw_set)) {
1871       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1872         HasSpecialAccessor = true;
1873       Tok = Tokens->getNextToken();
1874       continue;
1875     }
1876     if (Tok->isNot(tok::r_brace))
1877       IsTrivialPropertyAccessor = false;
1878     break;
1879   }
1880 
1881   if (!HasSpecialAccessor) {
1882     Tokens->setPosition(StoredPosition);
1883     return false;
1884   }
1885 
1886   // Try to parse the property accessor:
1887   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1888   Tokens->setPosition(StoredPosition);
1889   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1890     addUnwrappedLine();
1891   nextToken();
1892   do {
1893     switch (FormatTok->Tok.getKind()) {
1894     case tok::r_brace:
1895       nextToken();
1896       if (FormatTok->is(tok::equal)) {
1897         while (!eof() && FormatTok->isNot(tok::semi))
1898           nextToken();
1899         nextToken();
1900       }
1901       addUnwrappedLine();
1902       return true;
1903     case tok::l_brace:
1904       ++Line->Level;
1905       parseBlock(/*MustBeDeclaration=*/true);
1906       addUnwrappedLine();
1907       --Line->Level;
1908       break;
1909     case tok::equal:
1910       if (FormatTok->is(TT_FatArrow)) {
1911         ++Line->Level;
1912         do {
1913           nextToken();
1914         } while (!eof() && FormatTok->isNot(tok::semi));
1915         nextToken();
1916         addUnwrappedLine();
1917         --Line->Level;
1918         break;
1919       }
1920       nextToken();
1921       break;
1922     default:
1923       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
1924                              Keywords.kw_set) &&
1925           !IsTrivialPropertyAccessor) {
1926         // Non-trivial get/set needs to be on its own line.
1927         addUnwrappedLine();
1928       }
1929       nextToken();
1930     }
1931   } while (!eof());
1932 
1933   // Unreachable for well-formed code (paired '{' and '}').
1934   return true;
1935 }
1936 
1937 bool UnwrappedLineParser::tryToParseLambda() {
1938   if (!Style.isCpp()) {
1939     nextToken();
1940     return false;
1941   }
1942   assert(FormatTok->is(tok::l_square));
1943   FormatToken &LSquare = *FormatTok;
1944   if (!tryToParseLambdaIntroducer())
1945     return false;
1946 
1947   // `[something] >` is not a lambda, but an array type in a template parameter
1948   // list.
1949   if (FormatTok->is(tok::greater))
1950     return false;
1951 
1952   bool SeenArrow = false;
1953   bool InTemplateParameterList = false;
1954 
1955   while (FormatTok->isNot(tok::l_brace)) {
1956     if (FormatTok->isSimpleTypeSpecifier()) {
1957       nextToken();
1958       continue;
1959     }
1960     switch (FormatTok->Tok.getKind()) {
1961     case tok::l_brace:
1962       break;
1963     case tok::l_paren:
1964       parseParens();
1965       break;
1966     case tok::l_square:
1967       parseSquare();
1968       break;
1969     case tok::kw_class:
1970     case tok::kw_template:
1971     case tok::kw_typename:
1972       assert(FormatTok->Previous);
1973       if (FormatTok->Previous->is(tok::less))
1974         InTemplateParameterList = true;
1975       nextToken();
1976       break;
1977     case tok::amp:
1978     case tok::star:
1979     case tok::kw_const:
1980     case tok::comma:
1981     case tok::less:
1982     case tok::greater:
1983     case tok::identifier:
1984     case tok::numeric_constant:
1985     case tok::coloncolon:
1986     case tok::kw_mutable:
1987     case tok::kw_noexcept:
1988       nextToken();
1989       break;
1990     // Specialization of a template with an integer parameter can contain
1991     // arithmetic, logical, comparison and ternary operators.
1992     //
1993     // FIXME: This also accepts sequences of operators that are not in the scope
1994     // of a template argument list.
1995     //
1996     // In a C++ lambda a template type can only occur after an arrow. We use
1997     // this as an heuristic to distinguish between Objective-C expressions
1998     // followed by an `a->b` expression, such as:
1999     // ([obj func:arg] + a->b)
2000     // Otherwise the code below would parse as a lambda.
2001     //
2002     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2003     // explicit template lists: []<bool b = true && false>(U &&u){}
2004     case tok::plus:
2005     case tok::minus:
2006     case tok::exclaim:
2007     case tok::tilde:
2008     case tok::slash:
2009     case tok::percent:
2010     case tok::lessless:
2011     case tok::pipe:
2012     case tok::pipepipe:
2013     case tok::ampamp:
2014     case tok::caret:
2015     case tok::equalequal:
2016     case tok::exclaimequal:
2017     case tok::greaterequal:
2018     case tok::lessequal:
2019     case tok::question:
2020     case tok::colon:
2021     case tok::ellipsis:
2022     case tok::kw_true:
2023     case tok::kw_false:
2024       if (SeenArrow || InTemplateParameterList) {
2025         nextToken();
2026         break;
2027       }
2028       return true;
2029     case tok::arrow:
2030       // This might or might not actually be a lambda arrow (this could be an
2031       // ObjC method invocation followed by a dereferencing arrow). We might
2032       // reset this back to TT_Unknown in TokenAnnotator.
2033       FormatTok->setFinalizedType(TT_LambdaArrow);
2034       SeenArrow = true;
2035       nextToken();
2036       break;
2037     default:
2038       return true;
2039     }
2040   }
2041   FormatTok->setFinalizedType(TT_LambdaLBrace);
2042   LSquare.setFinalizedType(TT_LambdaLSquare);
2043   parseChildBlock();
2044   return true;
2045 }
2046 
2047 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2048   const FormatToken *Previous = FormatTok->Previous;
2049   if (Previous &&
2050       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2051                          tok::kw_delete, tok::l_square) ||
2052        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
2053        Previous->isSimpleTypeSpecifier())) {
2054     nextToken();
2055     return false;
2056   }
2057   nextToken();
2058   if (FormatTok->is(tok::l_square))
2059     return false;
2060   parseSquare(/*LambdaIntroducer=*/true);
2061   return true;
2062 }
2063 
2064 void UnwrappedLineParser::tryToParseJSFunction() {
2065   assert(FormatTok->is(Keywords.kw_function) ||
2066          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2067   if (FormatTok->is(Keywords.kw_async))
2068     nextToken();
2069   // Consume "function".
2070   nextToken();
2071 
2072   // Consume * (generator function). Treat it like C++'s overloaded operators.
2073   if (FormatTok->is(tok::star)) {
2074     FormatTok->setFinalizedType(TT_OverloadedOperator);
2075     nextToken();
2076   }
2077 
2078   // Consume function name.
2079   if (FormatTok->is(tok::identifier))
2080     nextToken();
2081 
2082   if (FormatTok->isNot(tok::l_paren))
2083     return;
2084 
2085   // Parse formal parameter list.
2086   parseParens();
2087 
2088   if (FormatTok->is(tok::colon)) {
2089     // Parse a type definition.
2090     nextToken();
2091 
2092     // Eat the type declaration. For braced inline object types, balance braces,
2093     // otherwise just parse until finding an l_brace for the function body.
2094     if (FormatTok->is(tok::l_brace))
2095       tryToParseBracedList();
2096     else
2097       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2098         nextToken();
2099   }
2100 
2101   if (FormatTok->is(tok::semi))
2102     return;
2103 
2104   parseChildBlock();
2105 }
2106 
2107 bool UnwrappedLineParser::tryToParseBracedList() {
2108   if (FormatTok->is(BK_Unknown))
2109     calculateBraceTypes();
2110   assert(FormatTok->isNot(BK_Unknown));
2111   if (FormatTok->is(BK_Block))
2112     return false;
2113   nextToken();
2114   parseBracedList();
2115   return true;
2116 }
2117 
2118 bool UnwrappedLineParser::tryToParseChildBlock() {
2119   assert(Style.isJavaScript() || Style.isCSharp());
2120   assert(FormatTok->is(TT_FatArrow));
2121   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2122   // They always start an expression or a child block if followed by a curly
2123   // brace.
2124   nextToken();
2125   if (FormatTok->isNot(tok::l_brace))
2126     return false;
2127   parseChildBlock();
2128   return true;
2129 }
2130 
2131 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2132                                           bool IsEnum,
2133                                           tok::TokenKind ClosingBraceKind) {
2134   bool HasError = false;
2135 
2136   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2137   // replace this by using parseAssignmentExpression() inside.
2138   do {
2139     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2140         tryToParseChildBlock())
2141       continue;
2142     if (Style.isJavaScript()) {
2143       if (FormatTok->is(Keywords.kw_function) ||
2144           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2145         tryToParseJSFunction();
2146         continue;
2147       }
2148       if (FormatTok->is(tok::l_brace)) {
2149         // Could be a method inside of a braced list `{a() { return 1; }}`.
2150         if (tryToParseBracedList())
2151           continue;
2152         parseChildBlock();
2153       }
2154     }
2155     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2156       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2157         addUnwrappedLine();
2158       nextToken();
2159       return !HasError;
2160     }
2161     switch (FormatTok->Tok.getKind()) {
2162     case tok::l_square:
2163       if (Style.isCSharp())
2164         parseSquare();
2165       else
2166         tryToParseLambda();
2167       break;
2168     case tok::l_paren:
2169       parseParens();
2170       // JavaScript can just have free standing methods and getters/setters in
2171       // object literals. Detect them by a "{" following ")".
2172       if (Style.isJavaScript()) {
2173         if (FormatTok->is(tok::l_brace))
2174           parseChildBlock();
2175         break;
2176       }
2177       break;
2178     case tok::l_brace:
2179       // Assume there are no blocks inside a braced init list apart
2180       // from the ones we explicitly parse out (like lambdas).
2181       FormatTok->setBlockKind(BK_BracedInit);
2182       nextToken();
2183       parseBracedList();
2184       break;
2185     case tok::less:
2186       if (Style.Language == FormatStyle::LK_Proto) {
2187         nextToken();
2188         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2189                         /*ClosingBraceKind=*/tok::greater);
2190       } else {
2191         nextToken();
2192       }
2193       break;
2194     case tok::semi:
2195       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2196       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2197       // used for error recovery if we have otherwise determined that this is
2198       // a braced list.
2199       if (Style.isJavaScript()) {
2200         nextToken();
2201         break;
2202       }
2203       HasError = true;
2204       if (!ContinueOnSemicolons)
2205         return !HasError;
2206       nextToken();
2207       break;
2208     case tok::comma:
2209       nextToken();
2210       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2211         addUnwrappedLine();
2212       break;
2213     default:
2214       nextToken();
2215       break;
2216     }
2217   } while (!eof());
2218   return false;
2219 }
2220 
2221 /// \brief Parses a pair of parentheses (and everything between them).
2222 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2223 /// double ampersands. This only counts for the current parens scope.
2224 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2225   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2226   nextToken();
2227   do {
2228     switch (FormatTok->Tok.getKind()) {
2229     case tok::l_paren:
2230       parseParens();
2231       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2232         parseChildBlock();
2233       break;
2234     case tok::r_paren:
2235       nextToken();
2236       return;
2237     case tok::r_brace:
2238       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2239       return;
2240     case tok::l_square:
2241       tryToParseLambda();
2242       break;
2243     case tok::l_brace:
2244       if (!tryToParseBracedList())
2245         parseChildBlock();
2246       break;
2247     case tok::at:
2248       nextToken();
2249       if (FormatTok->is(tok::l_brace)) {
2250         nextToken();
2251         parseBracedList();
2252       }
2253       break;
2254     case tok::equal:
2255       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2256         tryToParseChildBlock();
2257       else
2258         nextToken();
2259       break;
2260     case tok::kw_class:
2261       if (Style.isJavaScript())
2262         parseRecord(/*ParseAsExpr=*/true);
2263       else
2264         nextToken();
2265       break;
2266     case tok::identifier:
2267       if (Style.isJavaScript() &&
2268           (FormatTok->is(Keywords.kw_function) ||
2269            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2270         tryToParseJSFunction();
2271       else
2272         nextToken();
2273       break;
2274     case tok::kw_requires: {
2275       auto RequiresToken = FormatTok;
2276       nextToken();
2277       parseRequiresExpression(RequiresToken);
2278       break;
2279     }
2280     case tok::ampamp:
2281       if (AmpAmpTokenType != TT_Unknown)
2282         FormatTok->setFinalizedType(AmpAmpTokenType);
2283       LLVM_FALLTHROUGH;
2284     default:
2285       nextToken();
2286       break;
2287     }
2288   } while (!eof());
2289 }
2290 
2291 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2292   if (!LambdaIntroducer) {
2293     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2294     if (tryToParseLambda())
2295       return;
2296   }
2297   do {
2298     switch (FormatTok->Tok.getKind()) {
2299     case tok::l_paren:
2300       parseParens();
2301       break;
2302     case tok::r_square:
2303       nextToken();
2304       return;
2305     case tok::r_brace:
2306       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2307       return;
2308     case tok::l_square:
2309       parseSquare();
2310       break;
2311     case tok::l_brace: {
2312       if (!tryToParseBracedList())
2313         parseChildBlock();
2314       break;
2315     }
2316     case tok::at:
2317       nextToken();
2318       if (FormatTok->is(tok::l_brace)) {
2319         nextToken();
2320         parseBracedList();
2321       }
2322       break;
2323     default:
2324       nextToken();
2325       break;
2326     }
2327   } while (!eof());
2328 }
2329 
2330 void UnwrappedLineParser::keepAncestorBraces() {
2331   if (!Style.RemoveBracesLLVM)
2332     return;
2333 
2334   const int MaxNestingLevels = 2;
2335   const int Size = NestedTooDeep.size();
2336   if (Size >= MaxNestingLevels)
2337     NestedTooDeep[Size - MaxNestingLevels] = true;
2338   NestedTooDeep.push_back(false);
2339 }
2340 
2341 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2342   for (const auto &Token : llvm::reverse(Line.Tokens))
2343     if (Token.Tok->isNot(tok::comment))
2344       return Token.Tok;
2345 
2346   return nullptr;
2347 }
2348 
2349 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2350   FormatToken *Tok = nullptr;
2351 
2352   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2353       PreprocessorDirectives.empty()) {
2354     Tok = getLastNonComment(*Line);
2355     assert(Tok);
2356     if (Tok->BraceCount < 0) {
2357       assert(Tok->BraceCount == -1);
2358       Tok = nullptr;
2359     } else {
2360       Tok->BraceCount = -1;
2361     }
2362   }
2363 
2364   addUnwrappedLine();
2365   ++Line->Level;
2366   parseStructuralElement();
2367 
2368   if (Tok) {
2369     assert(!Line->InPPDirective);
2370     Tok = nullptr;
2371     for (const auto &L : llvm::reverse(*CurrentLines)) {
2372       if (!L.InPPDirective && getLastNonComment(L)) {
2373         Tok = L.Tokens.back().Tok;
2374         break;
2375       }
2376     }
2377     assert(Tok);
2378     ++Tok->BraceCount;
2379   }
2380 
2381   if (CheckEOF && FormatTok->is(tok::eof))
2382     addUnwrappedLine();
2383 
2384   --Line->Level;
2385 }
2386 
2387 static void markOptionalBraces(FormatToken *LeftBrace) {
2388   if (!LeftBrace)
2389     return;
2390 
2391   assert(LeftBrace->is(tok::l_brace));
2392 
2393   FormatToken *RightBrace = LeftBrace->MatchingParen;
2394   if (!RightBrace) {
2395     assert(!LeftBrace->Optional);
2396     return;
2397   }
2398 
2399   assert(RightBrace->is(tok::r_brace));
2400   assert(RightBrace->MatchingParen == LeftBrace);
2401   assert(LeftBrace->Optional == RightBrace->Optional);
2402 
2403   LeftBrace->Optional = true;
2404   RightBrace->Optional = true;
2405 }
2406 
2407 void UnwrappedLineParser::handleAttributes() {
2408   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2409   if (FormatTok->is(TT_AttributeMacro))
2410     nextToken();
2411   handleCppAttributes();
2412 }
2413 
2414 bool UnwrappedLineParser::handleCppAttributes() {
2415   // Handle [[likely]] / [[unlikely]] attributes.
2416   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2417     parseSquare();
2418     return true;
2419   }
2420   return false;
2421 }
2422 
2423 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2424                                                   bool KeepBraces) {
2425   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2426   nextToken();
2427   if (FormatTok->is(tok::exclaim))
2428     nextToken();
2429   if (FormatTok->is(tok::kw_consteval)) {
2430     nextToken();
2431   } else {
2432     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2433       nextToken();
2434     if (FormatTok->is(tok::l_paren))
2435       parseParens();
2436   }
2437   handleAttributes();
2438 
2439   bool NeedsUnwrappedLine = false;
2440   keepAncestorBraces();
2441 
2442   FormatToken *IfLeftBrace = nullptr;
2443   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2444 
2445   if (FormatTok->is(tok::l_brace)) {
2446     IfLeftBrace = FormatTok;
2447     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2448     IfBlockKind = parseBlock();
2449     if (Style.BraceWrapping.BeforeElse)
2450       addUnwrappedLine();
2451     else
2452       NeedsUnwrappedLine = true;
2453   } else {
2454     parseUnbracedBody();
2455   }
2456 
2457   bool KeepIfBraces = false;
2458   if (Style.RemoveBracesLLVM) {
2459     assert(!NestedTooDeep.empty());
2460     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2461                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2462                    IfBlockKind == IfStmtKind::IfElseIf;
2463   }
2464 
2465   FormatToken *ElseLeftBrace = nullptr;
2466   IfStmtKind Kind = IfStmtKind::IfOnly;
2467 
2468   if (FormatTok->is(tok::kw_else)) {
2469     if (Style.RemoveBracesLLVM) {
2470       NestedTooDeep.back() = false;
2471       Kind = IfStmtKind::IfElse;
2472     }
2473     nextToken();
2474     handleAttributes();
2475     if (FormatTok->is(tok::l_brace)) {
2476       ElseLeftBrace = FormatTok;
2477       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2478       if (parseBlock() == IfStmtKind::IfOnly)
2479         Kind = IfStmtKind::IfElseIf;
2480       addUnwrappedLine();
2481     } else if (FormatTok->is(tok::kw_if)) {
2482       FormatToken *Previous = Tokens->getPreviousToken();
2483       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2484       if (IsPrecededByComment) {
2485         addUnwrappedLine();
2486         ++Line->Level;
2487       }
2488       bool TooDeep = true;
2489       if (Style.RemoveBracesLLVM) {
2490         Kind = IfStmtKind::IfElseIf;
2491         TooDeep = NestedTooDeep.pop_back_val();
2492       }
2493       ElseLeftBrace =
2494           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2495       if (Style.RemoveBracesLLVM)
2496         NestedTooDeep.push_back(TooDeep);
2497       if (IsPrecededByComment)
2498         --Line->Level;
2499     } else {
2500       parseUnbracedBody(/*CheckEOF=*/true);
2501     }
2502   } else {
2503     if (Style.RemoveBracesLLVM)
2504       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2505     if (NeedsUnwrappedLine)
2506       addUnwrappedLine();
2507   }
2508 
2509   if (!Style.RemoveBracesLLVM)
2510     return nullptr;
2511 
2512   assert(!NestedTooDeep.empty());
2513   const bool KeepElseBraces =
2514       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2515 
2516   NestedTooDeep.pop_back();
2517 
2518   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2519     markOptionalBraces(IfLeftBrace);
2520     markOptionalBraces(ElseLeftBrace);
2521   } else if (IfLeftBrace) {
2522     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2523     if (IfRightBrace) {
2524       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2525       assert(!IfLeftBrace->Optional);
2526       assert(!IfRightBrace->Optional);
2527       IfLeftBrace->MatchingParen = nullptr;
2528       IfRightBrace->MatchingParen = nullptr;
2529     }
2530   }
2531 
2532   if (IfKind)
2533     *IfKind = Kind;
2534 
2535   return IfLeftBrace;
2536 }
2537 
2538 void UnwrappedLineParser::parseTryCatch() {
2539   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2540   nextToken();
2541   bool NeedsUnwrappedLine = false;
2542   if (FormatTok->is(tok::colon)) {
2543     // We are in a function try block, what comes is an initializer list.
2544     nextToken();
2545 
2546     // In case identifiers were removed by clang-tidy, what might follow is
2547     // multiple commas in sequence - before the first identifier.
2548     while (FormatTok->is(tok::comma))
2549       nextToken();
2550 
2551     while (FormatTok->is(tok::identifier)) {
2552       nextToken();
2553       if (FormatTok->is(tok::l_paren))
2554         parseParens();
2555       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2556           FormatTok->is(tok::l_brace)) {
2557         do {
2558           nextToken();
2559         } while (!FormatTok->is(tok::r_brace));
2560         nextToken();
2561       }
2562 
2563       // In case identifiers were removed by clang-tidy, what might follow is
2564       // multiple commas in sequence - after the first identifier.
2565       while (FormatTok->is(tok::comma))
2566         nextToken();
2567     }
2568   }
2569   // Parse try with resource.
2570   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2571     parseParens();
2572 
2573   keepAncestorBraces();
2574 
2575   if (FormatTok->is(tok::l_brace)) {
2576     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2577     parseBlock();
2578     if (Style.BraceWrapping.BeforeCatch)
2579       addUnwrappedLine();
2580     else
2581       NeedsUnwrappedLine = true;
2582   } else if (!FormatTok->is(tok::kw_catch)) {
2583     // The C++ standard requires a compound-statement after a try.
2584     // If there's none, we try to assume there's a structuralElement
2585     // and try to continue.
2586     addUnwrappedLine();
2587     ++Line->Level;
2588     parseStructuralElement();
2589     --Line->Level;
2590   }
2591   while (true) {
2592     if (FormatTok->is(tok::at))
2593       nextToken();
2594     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2595                              tok::kw___finally) ||
2596           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2597            FormatTok->is(Keywords.kw_finally)) ||
2598           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2599            FormatTok->isObjCAtKeyword(tok::objc_finally))))
2600       break;
2601     nextToken();
2602     while (FormatTok->isNot(tok::l_brace)) {
2603       if (FormatTok->is(tok::l_paren)) {
2604         parseParens();
2605         continue;
2606       }
2607       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2608         if (Style.RemoveBracesLLVM)
2609           NestedTooDeep.pop_back();
2610         return;
2611       }
2612       nextToken();
2613     }
2614     NeedsUnwrappedLine = false;
2615     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2616     parseBlock();
2617     if (Style.BraceWrapping.BeforeCatch)
2618       addUnwrappedLine();
2619     else
2620       NeedsUnwrappedLine = true;
2621   }
2622 
2623   if (Style.RemoveBracesLLVM)
2624     NestedTooDeep.pop_back();
2625 
2626   if (NeedsUnwrappedLine)
2627     addUnwrappedLine();
2628 }
2629 
2630 void UnwrappedLineParser::parseNamespace() {
2631   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2632          "'namespace' expected");
2633 
2634   const FormatToken &InitialToken = *FormatTok;
2635   nextToken();
2636   if (InitialToken.is(TT_NamespaceMacro)) {
2637     parseParens();
2638   } else {
2639     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2640                               tok::l_square, tok::period, tok::l_paren) ||
2641            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2642       if (FormatTok->is(tok::l_square))
2643         parseSquare();
2644       else if (FormatTok->is(tok::l_paren))
2645         parseParens();
2646       else
2647         nextToken();
2648   }
2649   if (FormatTok->is(tok::l_brace)) {
2650     if (ShouldBreakBeforeBrace(Style, InitialToken))
2651       addUnwrappedLine();
2652 
2653     unsigned AddLevels =
2654         Style.NamespaceIndentation == FormatStyle::NI_All ||
2655                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2656                  DeclarationScopeStack.size() > 1)
2657             ? 1u
2658             : 0u;
2659     bool ManageWhitesmithsBraces =
2660         AddLevels == 0u &&
2661         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2662 
2663     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2664     // the whole block.
2665     if (ManageWhitesmithsBraces)
2666       ++Line->Level;
2667 
2668     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2669                /*MunchSemi=*/true,
2670                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2671 
2672     // Munch the semicolon after a namespace. This is more common than one would
2673     // think. Putting the semicolon into its own line is very ugly.
2674     if (FormatTok->is(tok::semi))
2675       nextToken();
2676 
2677     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2678 
2679     if (ManageWhitesmithsBraces)
2680       --Line->Level;
2681   }
2682   // FIXME: Add error handling.
2683 }
2684 
2685 void UnwrappedLineParser::parseNew() {
2686   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2687   nextToken();
2688 
2689   if (Style.isCSharp()) {
2690     do {
2691       if (FormatTok->is(tok::l_brace))
2692         parseBracedList();
2693 
2694       if (FormatTok->isOneOf(tok::semi, tok::comma))
2695         return;
2696 
2697       nextToken();
2698     } while (!eof());
2699   }
2700 
2701   if (Style.Language != FormatStyle::LK_Java)
2702     return;
2703 
2704   // In Java, we can parse everything up to the parens, which aren't optional.
2705   do {
2706     // There should not be a ;, { or } before the new's open paren.
2707     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2708       return;
2709 
2710     // Consume the parens.
2711     if (FormatTok->is(tok::l_paren)) {
2712       parseParens();
2713 
2714       // If there is a class body of an anonymous class, consume that as child.
2715       if (FormatTok->is(tok::l_brace))
2716         parseChildBlock();
2717       return;
2718     }
2719     nextToken();
2720   } while (!eof());
2721 }
2722 
2723 void UnwrappedLineParser::parseForOrWhileLoop() {
2724   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2725          "'for', 'while' or foreach macro expected");
2726   nextToken();
2727   // JS' for await ( ...
2728   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2729     nextToken();
2730   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2731     nextToken();
2732   if (FormatTok->is(tok::l_paren))
2733     parseParens();
2734 
2735   keepAncestorBraces();
2736 
2737   if (FormatTok->is(tok::l_brace)) {
2738     FormatToken *LeftBrace = FormatTok;
2739     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2740     parseBlock();
2741     if (Style.RemoveBracesLLVM) {
2742       assert(!NestedTooDeep.empty());
2743       if (!NestedTooDeep.back())
2744         markOptionalBraces(LeftBrace);
2745     }
2746     addUnwrappedLine();
2747   } else {
2748     parseUnbracedBody();
2749   }
2750 
2751   if (Style.RemoveBracesLLVM)
2752     NestedTooDeep.pop_back();
2753 }
2754 
2755 void UnwrappedLineParser::parseDoWhile() {
2756   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2757   nextToken();
2758 
2759   keepAncestorBraces();
2760 
2761   if (FormatTok->is(tok::l_brace)) {
2762     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2763     parseBlock();
2764     if (Style.BraceWrapping.BeforeWhile)
2765       addUnwrappedLine();
2766   } else {
2767     parseUnbracedBody();
2768   }
2769 
2770   if (Style.RemoveBracesLLVM)
2771     NestedTooDeep.pop_back();
2772 
2773   // FIXME: Add error handling.
2774   if (!FormatTok->is(tok::kw_while)) {
2775     addUnwrappedLine();
2776     return;
2777   }
2778 
2779   // If in Whitesmiths mode, the line with the while() needs to be indented
2780   // to the same level as the block.
2781   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2782     ++Line->Level;
2783 
2784   nextToken();
2785   parseStructuralElement();
2786 }
2787 
2788 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2789   nextToken();
2790   unsigned OldLineLevel = Line->Level;
2791   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2792     --Line->Level;
2793   if (LeftAlignLabel)
2794     Line->Level = 0;
2795 
2796   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2797       FormatTok->is(tok::l_brace)) {
2798 
2799     CompoundStatementIndenter Indenter(this, Line->Level,
2800                                        Style.BraceWrapping.AfterCaseLabel,
2801                                        Style.BraceWrapping.IndentBraces);
2802     parseBlock();
2803     if (FormatTok->is(tok::kw_break)) {
2804       if (Style.BraceWrapping.AfterControlStatement ==
2805           FormatStyle::BWACS_Always) {
2806         addUnwrappedLine();
2807         if (!Style.IndentCaseBlocks &&
2808             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2809           ++Line->Level;
2810       }
2811       parseStructuralElement();
2812     }
2813     addUnwrappedLine();
2814   } else {
2815     if (FormatTok->is(tok::semi))
2816       nextToken();
2817     addUnwrappedLine();
2818   }
2819   Line->Level = OldLineLevel;
2820   if (FormatTok->isNot(tok::l_brace)) {
2821     parseStructuralElement();
2822     addUnwrappedLine();
2823   }
2824 }
2825 
2826 void UnwrappedLineParser::parseCaseLabel() {
2827   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2828 
2829   // FIXME: fix handling of complex expressions here.
2830   do {
2831     nextToken();
2832   } while (!eof() && !FormatTok->is(tok::colon));
2833   parseLabel();
2834 }
2835 
2836 void UnwrappedLineParser::parseSwitch() {
2837   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2838   nextToken();
2839   if (FormatTok->is(tok::l_paren))
2840     parseParens();
2841 
2842   keepAncestorBraces();
2843 
2844   if (FormatTok->is(tok::l_brace)) {
2845     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2846     parseBlock();
2847     addUnwrappedLine();
2848   } else {
2849     addUnwrappedLine();
2850     ++Line->Level;
2851     parseStructuralElement();
2852     --Line->Level;
2853   }
2854 
2855   if (Style.RemoveBracesLLVM)
2856     NestedTooDeep.pop_back();
2857 }
2858 
2859 // Operators that can follow a C variable.
2860 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
2861   switch (kind) {
2862   case tok::ampamp:
2863   case tok::ampequal:
2864   case tok::arrow:
2865   case tok::caret:
2866   case tok::caretequal:
2867   case tok::comma:
2868   case tok::ellipsis:
2869   case tok::equal:
2870   case tok::equalequal:
2871   case tok::exclaim:
2872   case tok::exclaimequal:
2873   case tok::greater:
2874   case tok::greaterequal:
2875   case tok::greatergreater:
2876   case tok::greatergreaterequal:
2877   case tok::l_paren:
2878   case tok::l_square:
2879   case tok::less:
2880   case tok::lessequal:
2881   case tok::lessless:
2882   case tok::lesslessequal:
2883   case tok::minus:
2884   case tok::minusequal:
2885   case tok::minusminus:
2886   case tok::percent:
2887   case tok::percentequal:
2888   case tok::period:
2889   case tok::pipe:
2890   case tok::pipeequal:
2891   case tok::pipepipe:
2892   case tok::plus:
2893   case tok::plusequal:
2894   case tok::plusplus:
2895   case tok::question:
2896   case tok::r_brace:
2897   case tok::r_paren:
2898   case tok::r_square:
2899   case tok::semi:
2900   case tok::slash:
2901   case tok::slashequal:
2902   case tok::star:
2903   case tok::starequal:
2904     return true;
2905   default:
2906     return false;
2907   }
2908 }
2909 
2910 void UnwrappedLineParser::parseAccessSpecifier() {
2911   FormatToken *AccessSpecifierCandidate = FormatTok;
2912   nextToken();
2913   // Understand Qt's slots.
2914   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2915     nextToken();
2916   // Otherwise, we don't know what it is, and we'd better keep the next token.
2917   if (FormatTok->is(tok::colon)) {
2918     nextToken();
2919     addUnwrappedLine();
2920   } else if (!FormatTok->is(tok::coloncolon) &&
2921              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
2922     // Not a variable name nor namespace name.
2923     addUnwrappedLine();
2924   } else if (AccessSpecifierCandidate) {
2925     // Consider the access specifier to be a C identifier.
2926     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2927   }
2928 }
2929 
2930 /// \brief Parses a concept definition.
2931 /// \pre The current token has to be the concept keyword.
2932 ///
2933 /// Returns if either the concept has been completely parsed, or if it detects
2934 /// that the concept definition is incorrect.
2935 void UnwrappedLineParser::parseConcept() {
2936   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
2937   nextToken();
2938   if (!FormatTok->is(tok::identifier))
2939     return;
2940   nextToken();
2941   if (!FormatTok->is(tok::equal))
2942     return;
2943   nextToken();
2944   parseConstraintExpression();
2945   if (FormatTok->is(tok::semi))
2946     nextToken();
2947   addUnwrappedLine();
2948 }
2949 
2950 /// \brief Parses a requires, decides if it is a clause or an expression.
2951 /// \pre The current token has to be the requires keyword.
2952 /// \returns true if it parsed a clause.
2953 bool clang::format::UnwrappedLineParser::parseRequires() {
2954   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
2955   auto RequiresToken = FormatTok;
2956 
2957   // We try to guess if it is a requires clause, or a requires expression. For
2958   // that we first consume the keyword and check the next token.
2959   nextToken();
2960 
2961   switch (FormatTok->Tok.getKind()) {
2962   case tok::l_brace:
2963     // This can only be an expression, never a clause.
2964     parseRequiresExpression(RequiresToken);
2965     return false;
2966   case tok::l_paren:
2967     // Clauses and expression can start with a paren, it's unclear what we have.
2968     break;
2969   default:
2970     // All other tokens can only be a clause.
2971     parseRequiresClause(RequiresToken);
2972     return true;
2973   }
2974 
2975   // Looking forward we would have to decide if there are function declaration
2976   // like arguments to the requires expression:
2977   // requires (T t) {
2978   // Or there is a constraint expression for the requires clause:
2979   // requires (C<T> && ...
2980 
2981   // But first let's look behind.
2982   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
2983 
2984   if (!PreviousNonComment ||
2985       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
2986     // If there is no token, or an expression left brace, we are a requires
2987     // clause within a requires expression.
2988     parseRequiresClause(RequiresToken);
2989     return true;
2990   }
2991 
2992   switch (PreviousNonComment->Tok.getKind()) {
2993   case tok::greater:
2994   case tok::r_paren:
2995   case tok::kw_noexcept:
2996   case tok::kw_const:
2997     // This is a requires clause.
2998     parseRequiresClause(RequiresToken);
2999     return true;
3000   case tok::amp:
3001   case tok::ampamp: {
3002     // This can be either:
3003     // if (... && requires (T t) ...)
3004     // Or
3005     // void member(...) && requires (C<T> ...
3006     // We check the one token before that for a const:
3007     // void member(...) const && requires (C<T> ...
3008     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3009     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3010       parseRequiresClause(RequiresToken);
3011       return true;
3012     }
3013     break;
3014   }
3015   default:
3016     // It's an expression.
3017     parseRequiresExpression(RequiresToken);
3018     return false;
3019   }
3020 
3021   // Now we look forward and try to check if the paren content is a parameter
3022   // list. The parameters can be cv-qualified and contain references or
3023   // pointers.
3024   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3025   // of stuff: typename, const, *, &, &&, ::, identifiers.
3026 
3027   int NextTokenOffset = 1;
3028   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3029   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3030     ++NextTokenOffset;
3031     NextToken = Tokens->peekNextToken(NextTokenOffset);
3032   };
3033 
3034   bool FoundType = false;
3035   bool LastWasColonColon = false;
3036   int OpenAngles = 0;
3037 
3038   for (; NextTokenOffset < 50; PeekNext()) {
3039     switch (NextToken->Tok.getKind()) {
3040     case tok::kw_volatile:
3041     case tok::kw_const:
3042     case tok::comma:
3043       parseRequiresExpression(RequiresToken);
3044       return false;
3045     case tok::r_paren:
3046     case tok::pipepipe:
3047       parseRequiresClause(RequiresToken);
3048       return true;
3049     case tok::eof:
3050       // Break out of the loop.
3051       NextTokenOffset = 50;
3052       break;
3053     case tok::coloncolon:
3054       LastWasColonColon = true;
3055       break;
3056     case tok::identifier:
3057       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3058         parseRequiresExpression(RequiresToken);
3059         return false;
3060       }
3061       FoundType = true;
3062       LastWasColonColon = false;
3063       break;
3064     case tok::less:
3065       ++OpenAngles;
3066       break;
3067     case tok::greater:
3068       --OpenAngles;
3069       break;
3070     default:
3071       if (NextToken->isSimpleTypeSpecifier()) {
3072         parseRequiresExpression(RequiresToken);
3073         return false;
3074       }
3075       break;
3076     }
3077   }
3078 
3079   // This seems to be a complicated expression, just assume it's a clause.
3080   parseRequiresClause(RequiresToken);
3081   return true;
3082 }
3083 
3084 /// \brief Parses a requires clause.
3085 /// \param RequiresToken The requires keyword token, which starts this clause.
3086 /// \pre We need to be on the next token after the requires keyword.
3087 /// \sa parseRequiresExpression
3088 ///
3089 /// Returns if it either has finished parsing the clause, or it detects, that
3090 /// the clause is incorrect.
3091 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3092   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3093   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3094 
3095   // If there is no previous token, we are within a requires expression,
3096   // otherwise we will always have the template or function declaration in front
3097   // of it.
3098   bool InRequiresExpression =
3099       !RequiresToken->Previous ||
3100       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3101 
3102   RequiresToken->setFinalizedType(InRequiresExpression
3103                                       ? TT_RequiresClauseInARequiresExpression
3104                                       : TT_RequiresClause);
3105 
3106   parseConstraintExpression();
3107 
3108   if (!InRequiresExpression)
3109     FormatTok->Previous->ClosesRequiresClause = true;
3110 }
3111 
3112 /// \brief Parses a requires expression.
3113 /// \param RequiresToken The requires keyword token, which starts this clause.
3114 /// \pre We need to be on the next token after the requires keyword.
3115 /// \sa parseRequiresClause
3116 ///
3117 /// Returns if it either has finished parsing the expression, or it detects,
3118 /// that the expression is incorrect.
3119 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3120   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3121   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3122 
3123   RequiresToken->setFinalizedType(TT_RequiresExpression);
3124 
3125   if (FormatTok->is(tok::l_paren)) {
3126     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3127     parseParens();
3128   }
3129 
3130   if (FormatTok->is(tok::l_brace)) {
3131     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3132     parseChildBlock(/*CanContainBracedList=*/false,
3133                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3134   }
3135 }
3136 
3137 /// \brief Parses a constraint expression.
3138 ///
3139 /// This is either the definition of a concept, or the body of a requires
3140 /// clause. It returns, when the parsing is complete, or the expression is
3141 /// incorrect.
3142 void UnwrappedLineParser::parseConstraintExpression() {
3143   // The special handling for lambdas is needed since tryToParseLambda() eats a
3144   // token and if a requires expression is the last part of a requires clause
3145   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3146   // not set on the correct token. Thus we need to be aware if we even expect a
3147   // lambda to be possible.
3148   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3149   bool LambdaNextTimeAllowed = true;
3150   do {
3151     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3152 
3153     switch (FormatTok->Tok.getKind()) {
3154     case tok::kw_requires: {
3155       auto RequiresToken = FormatTok;
3156       nextToken();
3157       parseRequiresExpression(RequiresToken);
3158       break;
3159     }
3160 
3161     case tok::l_paren:
3162       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3163       break;
3164 
3165     case tok::l_square:
3166       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3167         return;
3168       break;
3169 
3170     case tok::kw_const:
3171     case tok::semi:
3172     case tok::kw_class:
3173     case tok::kw_struct:
3174     case tok::kw_union:
3175       return;
3176 
3177     case tok::l_brace:
3178       // Potential function body.
3179       return;
3180 
3181     case tok::ampamp:
3182     case tok::pipepipe:
3183       FormatTok->setFinalizedType(TT_BinaryOperator);
3184       nextToken();
3185       LambdaNextTimeAllowed = true;
3186       break;
3187 
3188     case tok::comma:
3189     case tok::comment:
3190       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3191       nextToken();
3192       break;
3193 
3194     case tok::kw_sizeof:
3195     case tok::greater:
3196     case tok::greaterequal:
3197     case tok::greatergreater:
3198     case tok::less:
3199     case tok::lessequal:
3200     case tok::lessless:
3201     case tok::equalequal:
3202     case tok::exclaim:
3203     case tok::exclaimequal:
3204     case tok::plus:
3205     case tok::minus:
3206     case tok::star:
3207     case tok::slash:
3208     case tok::kw_decltype:
3209       LambdaNextTimeAllowed = true;
3210       // Just eat them.
3211       nextToken();
3212       break;
3213 
3214     case tok::numeric_constant:
3215     case tok::coloncolon:
3216     case tok::kw_true:
3217     case tok::kw_false:
3218       // Just eat them.
3219       nextToken();
3220       break;
3221 
3222     case tok::kw_static_cast:
3223     case tok::kw_const_cast:
3224     case tok::kw_reinterpret_cast:
3225     case tok::kw_dynamic_cast:
3226       nextToken();
3227       if (!FormatTok->is(tok::less))
3228         return;
3229 
3230       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3231                       /*ClosingBraceKind=*/tok::greater);
3232       break;
3233 
3234     case tok::kw_bool:
3235       // bool is only allowed if it is directly followed by a paren for a cast:
3236       // concept C = bool(...);
3237       // and bool is the only type, all other types as cast must be inside a
3238       // cast to bool an thus are handled by the other cases.
3239       nextToken();
3240       if (FormatTok->isNot(tok::l_paren))
3241         return;
3242       parseParens();
3243       break;
3244 
3245     default:
3246       if (!FormatTok->Tok.getIdentifierInfo()) {
3247         // Identifiers are part of the default case, we check for more then
3248         // tok::identifier to handle builtin type traits.
3249         return;
3250       }
3251 
3252       // We need to differentiate identifiers for a template deduction guide,
3253       // variables, or function return types (the constraint expression has
3254       // ended before that), and basically all other cases. But it's easier to
3255       // check the other way around.
3256       assert(FormatTok->Previous);
3257       switch (FormatTok->Previous->Tok.getKind()) {
3258       case tok::coloncolon:  // Nested identifier.
3259       case tok::ampamp:      // Start of a function or variable for the
3260       case tok::pipepipe:    // constraint expression.
3261       case tok::kw_requires: // Initial identifier of a requires clause.
3262       case tok::equal:       // Initial identifier of a concept declaration.
3263         break;
3264       default:
3265         return;
3266       }
3267 
3268       // Read identifier with optional template declaration.
3269       nextToken();
3270       if (FormatTok->is(tok::less))
3271         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3272                         /*ClosingBraceKind=*/tok::greater);
3273       break;
3274     }
3275   } while (!eof());
3276 }
3277 
3278 bool UnwrappedLineParser::parseEnum() {
3279   const FormatToken &InitialToken = *FormatTok;
3280 
3281   // Won't be 'enum' for NS_ENUMs.
3282   if (FormatTok->is(tok::kw_enum))
3283     nextToken();
3284 
3285   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3286   // declarations. An "enum" keyword followed by a colon would be a syntax
3287   // error and thus assume it is just an identifier.
3288   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3289     return false;
3290 
3291   // In protobuf, "enum" can be used as a field name.
3292   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3293     return false;
3294 
3295   // Eat up enum class ...
3296   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3297     nextToken();
3298 
3299   while (FormatTok->Tok.getIdentifierInfo() ||
3300          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3301                             tok::greater, tok::comma, tok::question)) {
3302     nextToken();
3303     // We can have macros or attributes in between 'enum' and the enum name.
3304     if (FormatTok->is(tok::l_paren))
3305       parseParens();
3306     if (FormatTok->is(tok::identifier)) {
3307       nextToken();
3308       // If there are two identifiers in a row, this is likely an elaborate
3309       // return type. In Java, this can be "implements", etc.
3310       if (Style.isCpp() && FormatTok->is(tok::identifier))
3311         return false;
3312     }
3313   }
3314 
3315   // Just a declaration or something is wrong.
3316   if (FormatTok->isNot(tok::l_brace))
3317     return true;
3318   FormatTok->setFinalizedType(TT_EnumLBrace);
3319   FormatTok->setBlockKind(BK_Block);
3320 
3321   if (Style.Language == FormatStyle::LK_Java) {
3322     // Java enums are different.
3323     parseJavaEnumBody();
3324     return true;
3325   }
3326   if (Style.Language == FormatStyle::LK_Proto) {
3327     parseBlock(/*MustBeDeclaration=*/true);
3328     return true;
3329   }
3330 
3331   if (!Style.AllowShortEnumsOnASingleLine &&
3332       ShouldBreakBeforeBrace(Style, InitialToken))
3333     addUnwrappedLine();
3334   // Parse enum body.
3335   nextToken();
3336   if (!Style.AllowShortEnumsOnASingleLine) {
3337     addUnwrappedLine();
3338     Line->Level += 1;
3339   }
3340   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3341                                    /*IsEnum=*/true);
3342   if (!Style.AllowShortEnumsOnASingleLine)
3343     Line->Level -= 1;
3344   if (HasError) {
3345     if (FormatTok->is(tok::semi))
3346       nextToken();
3347     addUnwrappedLine();
3348   }
3349   return true;
3350 
3351   // There is no addUnwrappedLine() here so that we fall through to parsing a
3352   // structural element afterwards. Thus, in "enum A {} n, m;",
3353   // "} n, m;" will end up in one unwrapped line.
3354 }
3355 
3356 bool UnwrappedLineParser::parseStructLike() {
3357   // parseRecord falls through and does not yet add an unwrapped line as a
3358   // record declaration or definition can start a structural element.
3359   parseRecord();
3360   // This does not apply to Java, JavaScript and C#.
3361   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3362       Style.isCSharp()) {
3363     if (FormatTok->is(tok::semi))
3364       nextToken();
3365     addUnwrappedLine();
3366     return true;
3367   }
3368   return false;
3369 }
3370 
3371 namespace {
3372 // A class used to set and restore the Token position when peeking
3373 // ahead in the token source.
3374 class ScopedTokenPosition {
3375   unsigned StoredPosition;
3376   FormatTokenSource *Tokens;
3377 
3378 public:
3379   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3380     assert(Tokens && "Tokens expected to not be null");
3381     StoredPosition = Tokens->getPosition();
3382   }
3383 
3384   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3385 };
3386 } // namespace
3387 
3388 // Look to see if we have [[ by looking ahead, if
3389 // its not then rewind to the original position.
3390 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3391   ScopedTokenPosition AutoPosition(Tokens);
3392   FormatToken *Tok = Tokens->getNextToken();
3393   // We already read the first [ check for the second.
3394   if (!Tok->is(tok::l_square))
3395     return false;
3396   // Double check that the attribute is just something
3397   // fairly simple.
3398   while (Tok->isNot(tok::eof)) {
3399     if (Tok->is(tok::r_square))
3400       break;
3401     Tok = Tokens->getNextToken();
3402   }
3403   if (Tok->is(tok::eof))
3404     return false;
3405   Tok = Tokens->getNextToken();
3406   if (!Tok->is(tok::r_square))
3407     return false;
3408   Tok = Tokens->getNextToken();
3409   if (Tok->is(tok::semi))
3410     return false;
3411   return true;
3412 }
3413 
3414 void UnwrappedLineParser::parseJavaEnumBody() {
3415   // Determine whether the enum is simple, i.e. does not have a semicolon or
3416   // constants with class bodies. Simple enums can be formatted like braced
3417   // lists, contracted to a single line, etc.
3418   unsigned StoredPosition = Tokens->getPosition();
3419   bool IsSimple = true;
3420   FormatToken *Tok = Tokens->getNextToken();
3421   while (!Tok->is(tok::eof)) {
3422     if (Tok->is(tok::r_brace))
3423       break;
3424     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3425       IsSimple = false;
3426       break;
3427     }
3428     // FIXME: This will also mark enums with braces in the arguments to enum
3429     // constants as "not simple". This is probably fine in practice, though.
3430     Tok = Tokens->getNextToken();
3431   }
3432   FormatTok = Tokens->setPosition(StoredPosition);
3433 
3434   if (IsSimple) {
3435     nextToken();
3436     parseBracedList();
3437     addUnwrappedLine();
3438     return;
3439   }
3440 
3441   // Parse the body of a more complex enum.
3442   // First add a line for everything up to the "{".
3443   nextToken();
3444   addUnwrappedLine();
3445   ++Line->Level;
3446 
3447   // Parse the enum constants.
3448   while (FormatTok) {
3449     if (FormatTok->is(tok::l_brace)) {
3450       // Parse the constant's class body.
3451       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3452                  /*MunchSemi=*/false);
3453     } else if (FormatTok->is(tok::l_paren)) {
3454       parseParens();
3455     } else if (FormatTok->is(tok::comma)) {
3456       nextToken();
3457       addUnwrappedLine();
3458     } else if (FormatTok->is(tok::semi)) {
3459       nextToken();
3460       addUnwrappedLine();
3461       break;
3462     } else if (FormatTok->is(tok::r_brace)) {
3463       addUnwrappedLine();
3464       break;
3465     } else {
3466       nextToken();
3467     }
3468   }
3469 
3470   // Parse the class body after the enum's ";" if any.
3471   parseLevel(/*HasOpeningBrace=*/true, /*CanContainBracedList=*/true);
3472   nextToken();
3473   --Line->Level;
3474   addUnwrappedLine();
3475 }
3476 
3477 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3478   const FormatToken &InitialToken = *FormatTok;
3479   nextToken();
3480 
3481   // The actual identifier can be a nested name specifier, and in macros
3482   // it is often token-pasted.
3483   // An [[attribute]] can be before the identifier.
3484   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3485                             tok::kw___attribute, tok::kw___declspec,
3486                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3487          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3488           FormatTok->isOneOf(tok::period, tok::comma))) {
3489     if (Style.isJavaScript() &&
3490         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3491       // JavaScript/TypeScript supports inline object types in
3492       // extends/implements positions:
3493       //     class Foo implements {bar: number} { }
3494       nextToken();
3495       if (FormatTok->is(tok::l_brace)) {
3496         tryToParseBracedList();
3497         continue;
3498       }
3499     }
3500     bool IsNonMacroIdentifier =
3501         FormatTok->is(tok::identifier) &&
3502         FormatTok->TokenText != FormatTok->TokenText.upper();
3503     nextToken();
3504     // We can have macros or attributes in between 'class' and the class name.
3505     if (!IsNonMacroIdentifier) {
3506       if (FormatTok->is(tok::l_paren)) {
3507         parseParens();
3508       } else if (FormatTok->is(TT_AttributeSquare)) {
3509         parseSquare();
3510         // Consume the closing TT_AttributeSquare.
3511         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3512           nextToken();
3513       }
3514     }
3515   }
3516 
3517   // Note that parsing away template declarations here leads to incorrectly
3518   // accepting function declarations as record declarations.
3519   // In general, we cannot solve this problem. Consider:
3520   // class A<int> B() {}
3521   // which can be a function definition or a class definition when B() is a
3522   // macro. If we find enough real-world cases where this is a problem, we
3523   // can parse for the 'template' keyword in the beginning of the statement,
3524   // and thus rule out the record production in case there is no template
3525   // (this would still leave us with an ambiguity between template function
3526   // and class declarations).
3527   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3528     do {
3529       if (FormatTok->is(tok::l_brace)) {
3530         calculateBraceTypes(/*ExpectClassBody=*/true);
3531         if (!tryToParseBracedList())
3532           break;
3533       }
3534       if (FormatTok->is(tok::l_square)) {
3535         FormatToken *Previous = FormatTok->Previous;
3536         if (!Previous ||
3537             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3538           // Don't try parsing a lambda if we had a closing parenthesis before,
3539           // it was probably a pointer to an array: int (*)[].
3540           if (!tryToParseLambda())
3541             continue;
3542         } else {
3543           parseSquare();
3544           continue;
3545         }
3546       }
3547       if (FormatTok->is(tok::semi))
3548         return;
3549       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3550         addUnwrappedLine();
3551         nextToken();
3552         parseCSharpGenericTypeConstraint();
3553         break;
3554       }
3555       nextToken();
3556     } while (!eof());
3557   }
3558 
3559   auto GetBraceType = [](const FormatToken &RecordTok) {
3560     switch (RecordTok.Tok.getKind()) {
3561     case tok::kw_class:
3562       return TT_ClassLBrace;
3563     case tok::kw_struct:
3564       return TT_StructLBrace;
3565     case tok::kw_union:
3566       return TT_UnionLBrace;
3567     default:
3568       // Useful for e.g. interface.
3569       return TT_RecordLBrace;
3570     }
3571   };
3572   if (FormatTok->is(tok::l_brace)) {
3573     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3574     if (ParseAsExpr) {
3575       parseChildBlock();
3576     } else {
3577       if (ShouldBreakBeforeBrace(Style, InitialToken))
3578         addUnwrappedLine();
3579 
3580       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3581       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3582     }
3583   }
3584   // There is no addUnwrappedLine() here so that we fall through to parsing a
3585   // structural element afterwards. Thus, in "class A {} n, m;",
3586   // "} n, m;" will end up in one unwrapped line.
3587 }
3588 
3589 void UnwrappedLineParser::parseObjCMethod() {
3590   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3591          "'(' or identifier expected.");
3592   do {
3593     if (FormatTok->is(tok::semi)) {
3594       nextToken();
3595       addUnwrappedLine();
3596       return;
3597     } else if (FormatTok->is(tok::l_brace)) {
3598       if (Style.BraceWrapping.AfterFunction)
3599         addUnwrappedLine();
3600       parseBlock();
3601       addUnwrappedLine();
3602       return;
3603     } else {
3604       nextToken();
3605     }
3606   } while (!eof());
3607 }
3608 
3609 void UnwrappedLineParser::parseObjCProtocolList() {
3610   assert(FormatTok->is(tok::less) && "'<' expected.");
3611   do {
3612     nextToken();
3613     // Early exit in case someone forgot a close angle.
3614     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3615         FormatTok->isObjCAtKeyword(tok::objc_end))
3616       return;
3617   } while (!eof() && FormatTok->isNot(tok::greater));
3618   nextToken(); // Skip '>'.
3619 }
3620 
3621 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3622   do {
3623     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3624       nextToken();
3625       addUnwrappedLine();
3626       break;
3627     }
3628     if (FormatTok->is(tok::l_brace)) {
3629       parseBlock();
3630       // In ObjC interfaces, nothing should be following the "}".
3631       addUnwrappedLine();
3632     } else if (FormatTok->is(tok::r_brace)) {
3633       // Ignore stray "}". parseStructuralElement doesn't consume them.
3634       nextToken();
3635       addUnwrappedLine();
3636     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3637       nextToken();
3638       parseObjCMethod();
3639     } else {
3640       parseStructuralElement();
3641     }
3642   } while (!eof());
3643 }
3644 
3645 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3646   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3647          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3648   nextToken();
3649   nextToken(); // interface name
3650 
3651   // @interface can be followed by a lightweight generic
3652   // specialization list, then either a base class or a category.
3653   if (FormatTok->is(tok::less))
3654     parseObjCLightweightGenerics();
3655   if (FormatTok->is(tok::colon)) {
3656     nextToken();
3657     nextToken(); // base class name
3658     // The base class can also have lightweight generics applied to it.
3659     if (FormatTok->is(tok::less))
3660       parseObjCLightweightGenerics();
3661   } else if (FormatTok->is(tok::l_paren))
3662     // Skip category, if present.
3663     parseParens();
3664 
3665   if (FormatTok->is(tok::less))
3666     parseObjCProtocolList();
3667 
3668   if (FormatTok->is(tok::l_brace)) {
3669     if (Style.BraceWrapping.AfterObjCDeclaration)
3670       addUnwrappedLine();
3671     parseBlock(/*MustBeDeclaration=*/true);
3672   }
3673 
3674   // With instance variables, this puts '}' on its own line.  Without instance
3675   // variables, this ends the @interface line.
3676   addUnwrappedLine();
3677 
3678   parseObjCUntilAtEnd();
3679 }
3680 
3681 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3682   assert(FormatTok->is(tok::less));
3683   // Unlike protocol lists, generic parameterizations support
3684   // nested angles:
3685   //
3686   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3687   //     NSObject <NSCopying, NSSecureCoding>
3688   //
3689   // so we need to count how many open angles we have left.
3690   unsigned NumOpenAngles = 1;
3691   do {
3692     nextToken();
3693     // Early exit in case someone forgot a close angle.
3694     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3695         FormatTok->isObjCAtKeyword(tok::objc_end))
3696       break;
3697     if (FormatTok->is(tok::less))
3698       ++NumOpenAngles;
3699     else if (FormatTok->is(tok::greater)) {
3700       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3701       --NumOpenAngles;
3702     }
3703   } while (!eof() && NumOpenAngles != 0);
3704   nextToken(); // Skip '>'.
3705 }
3706 
3707 // Returns true for the declaration/definition form of @protocol,
3708 // false for the expression form.
3709 bool UnwrappedLineParser::parseObjCProtocol() {
3710   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3711   nextToken();
3712 
3713   if (FormatTok->is(tok::l_paren))
3714     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3715     return false;
3716 
3717   // The definition/declaration form,
3718   // @protocol Foo
3719   // - (int)someMethod;
3720   // @end
3721 
3722   nextToken(); // protocol name
3723 
3724   if (FormatTok->is(tok::less))
3725     parseObjCProtocolList();
3726 
3727   // Check for protocol declaration.
3728   if (FormatTok->is(tok::semi)) {
3729     nextToken();
3730     addUnwrappedLine();
3731     return true;
3732   }
3733 
3734   addUnwrappedLine();
3735   parseObjCUntilAtEnd();
3736   return true;
3737 }
3738 
3739 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3740   bool IsImport = FormatTok->is(Keywords.kw_import);
3741   assert(IsImport || FormatTok->is(tok::kw_export));
3742   nextToken();
3743 
3744   // Consume the "default" in "export default class/function".
3745   if (FormatTok->is(tok::kw_default))
3746     nextToken();
3747 
3748   // Consume "async function", "function" and "default function", so that these
3749   // get parsed as free-standing JS functions, i.e. do not require a trailing
3750   // semicolon.
3751   if (FormatTok->is(Keywords.kw_async))
3752     nextToken();
3753   if (FormatTok->is(Keywords.kw_function)) {
3754     nextToken();
3755     return;
3756   }
3757 
3758   // For imports, `export *`, `export {...}`, consume the rest of the line up
3759   // to the terminating `;`. For everything else, just return and continue
3760   // parsing the structural element, i.e. the declaration or expression for
3761   // `export default`.
3762   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3763       !FormatTok->isStringLiteral())
3764     return;
3765 
3766   while (!eof()) {
3767     if (FormatTok->is(tok::semi))
3768       return;
3769     if (Line->Tokens.empty()) {
3770       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3771       // import statement should terminate.
3772       return;
3773     }
3774     if (FormatTok->is(tok::l_brace)) {
3775       FormatTok->setBlockKind(BK_Block);
3776       nextToken();
3777       parseBracedList();
3778     } else {
3779       nextToken();
3780     }
3781   }
3782 }
3783 
3784 void UnwrappedLineParser::parseStatementMacro() {
3785   nextToken();
3786   if (FormatTok->is(tok::l_paren))
3787     parseParens();
3788   if (FormatTok->is(tok::semi))
3789     nextToken();
3790   addUnwrappedLine();
3791 }
3792 
3793 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3794                                                  StringRef Prefix = "") {
3795   llvm::dbgs() << Prefix << "Line(" << Line.Level
3796                << ", FSC=" << Line.FirstStartColumn << ")"
3797                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3798   for (const auto &Node : Line.Tokens) {
3799     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3800                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3801                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3802   }
3803   for (const auto &Node : Line.Tokens)
3804     for (const auto &ChildNode : Node.Children)
3805       printDebugInfo(ChildNode, "\nChild: ");
3806 
3807   llvm::dbgs() << "\n";
3808 }
3809 
3810 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3811   if (Line->Tokens.empty())
3812     return;
3813   LLVM_DEBUG({
3814     if (CurrentLines == &Lines)
3815       printDebugInfo(*Line);
3816   });
3817 
3818   // If this line closes a block when in Whitesmiths mode, remember that
3819   // information so that the level can be decreased after the line is added.
3820   // This has to happen after the addition of the line since the line itself
3821   // needs to be indented.
3822   bool ClosesWhitesmithsBlock =
3823       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3824       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3825 
3826   CurrentLines->push_back(std::move(*Line));
3827   Line->Tokens.clear();
3828   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3829   Line->FirstStartColumn = 0;
3830 
3831   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3832     --Line->Level;
3833   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3834     CurrentLines->append(
3835         std::make_move_iterator(PreprocessorDirectives.begin()),
3836         std::make_move_iterator(PreprocessorDirectives.end()));
3837     PreprocessorDirectives.clear();
3838   }
3839   // Disconnect the current token from the last token on the previous line.
3840   FormatTok->Previous = nullptr;
3841 }
3842 
3843 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
3844 
3845 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3846   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3847          FormatTok.NewlinesBefore > 0;
3848 }
3849 
3850 // Checks if \p FormatTok is a line comment that continues the line comment
3851 // section on \p Line.
3852 static bool
3853 continuesLineCommentSection(const FormatToken &FormatTok,
3854                             const UnwrappedLine &Line,
3855                             const llvm::Regex &CommentPragmasRegex) {
3856   if (Line.Tokens.empty())
3857     return false;
3858 
3859   StringRef IndentContent = FormatTok.TokenText;
3860   if (FormatTok.TokenText.startswith("//") ||
3861       FormatTok.TokenText.startswith("/*"))
3862     IndentContent = FormatTok.TokenText.substr(2);
3863   if (CommentPragmasRegex.match(IndentContent))
3864     return false;
3865 
3866   // If Line starts with a line comment, then FormatTok continues the comment
3867   // section if its original column is greater or equal to the original start
3868   // column of the line.
3869   //
3870   // Define the min column token of a line as follows: if a line ends in '{' or
3871   // contains a '{' followed by a line comment, then the min column token is
3872   // that '{'. Otherwise, the min column token of the line is the first token of
3873   // the line.
3874   //
3875   // If Line starts with a token other than a line comment, then FormatTok
3876   // continues the comment section if its original column is greater than the
3877   // original start column of the min column token of the line.
3878   //
3879   // For example, the second line comment continues the first in these cases:
3880   //
3881   // // first line
3882   // // second line
3883   //
3884   // and:
3885   //
3886   // // first line
3887   //  // second line
3888   //
3889   // and:
3890   //
3891   // int i; // first line
3892   //  // second line
3893   //
3894   // and:
3895   //
3896   // do { // first line
3897   //      // second line
3898   //   int i;
3899   // } while (true);
3900   //
3901   // and:
3902   //
3903   // enum {
3904   //   a, // first line
3905   //    // second line
3906   //   b
3907   // };
3908   //
3909   // The second line comment doesn't continue the first in these cases:
3910   //
3911   //   // first line
3912   //  // second line
3913   //
3914   // and:
3915   //
3916   // int i; // first line
3917   // // second line
3918   //
3919   // and:
3920   //
3921   // do { // first line
3922   //   // second line
3923   //   int i;
3924   // } while (true);
3925   //
3926   // and:
3927   //
3928   // enum {
3929   //   a, // first line
3930   //   // second line
3931   // };
3932   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3933 
3934   // Scan for '{//'. If found, use the column of '{' as a min column for line
3935   // comment section continuation.
3936   const FormatToken *PreviousToken = nullptr;
3937   for (const UnwrappedLineNode &Node : Line.Tokens) {
3938     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3939         isLineComment(*Node.Tok)) {
3940       MinColumnToken = PreviousToken;
3941       break;
3942     }
3943     PreviousToken = Node.Tok;
3944 
3945     // Grab the last newline preceding a token in this unwrapped line.
3946     if (Node.Tok->NewlinesBefore > 0)
3947       MinColumnToken = Node.Tok;
3948   }
3949   if (PreviousToken && PreviousToken->is(tok::l_brace))
3950     MinColumnToken = PreviousToken;
3951 
3952   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3953                               MinColumnToken);
3954 }
3955 
3956 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3957   bool JustComments = Line->Tokens.empty();
3958   for (FormatToken *Tok : CommentsBeforeNextToken) {
3959     // Line comments that belong to the same line comment section are put on the
3960     // same line since later we might want to reflow content between them.
3961     // Additional fine-grained breaking of line comment sections is controlled
3962     // by the class BreakableLineCommentSection in case it is desirable to keep
3963     // several line comment sections in the same unwrapped line.
3964     //
3965     // FIXME: Consider putting separate line comment sections as children to the
3966     // unwrapped line instead.
3967     Tok->ContinuesLineCommentSection =
3968         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3969     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3970       addUnwrappedLine();
3971     pushToken(Tok);
3972   }
3973   if (NewlineBeforeNext && JustComments)
3974     addUnwrappedLine();
3975   CommentsBeforeNextToken.clear();
3976 }
3977 
3978 void UnwrappedLineParser::nextToken(int LevelDifference) {
3979   if (eof())
3980     return;
3981   flushComments(isOnNewLine(*FormatTok));
3982   pushToken(FormatTok);
3983   FormatToken *Previous = FormatTok;
3984   if (!Style.isJavaScript())
3985     readToken(LevelDifference);
3986   else
3987     readTokenWithJavaScriptASI();
3988   FormatTok->Previous = Previous;
3989 }
3990 
3991 void UnwrappedLineParser::distributeComments(
3992     const SmallVectorImpl<FormatToken *> &Comments,
3993     const FormatToken *NextTok) {
3994   // Whether or not a line comment token continues a line is controlled by
3995   // the method continuesLineCommentSection, with the following caveat:
3996   //
3997   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3998   // that each comment line from the trail is aligned with the next token, if
3999   // the next token exists. If a trail exists, the beginning of the maximal
4000   // trail is marked as a start of a new comment section.
4001   //
4002   // For example in this code:
4003   //
4004   // int a; // line about a
4005   //   // line 1 about b
4006   //   // line 2 about b
4007   //   int b;
4008   //
4009   // the two lines about b form a maximal trail, so there are two sections, the
4010   // first one consisting of the single comment "// line about a" and the
4011   // second one consisting of the next two comments.
4012   if (Comments.empty())
4013     return;
4014   bool ShouldPushCommentsInCurrentLine = true;
4015   bool HasTrailAlignedWithNextToken = false;
4016   unsigned StartOfTrailAlignedWithNextToken = 0;
4017   if (NextTok) {
4018     // We are skipping the first element intentionally.
4019     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4020       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4021         HasTrailAlignedWithNextToken = true;
4022         StartOfTrailAlignedWithNextToken = i;
4023       }
4024     }
4025   }
4026   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4027     FormatToken *FormatTok = Comments[i];
4028     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4029       FormatTok->ContinuesLineCommentSection = false;
4030     } else {
4031       FormatTok->ContinuesLineCommentSection =
4032           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4033     }
4034     if (!FormatTok->ContinuesLineCommentSection &&
4035         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
4036       ShouldPushCommentsInCurrentLine = false;
4037     if (ShouldPushCommentsInCurrentLine)
4038       pushToken(FormatTok);
4039     else
4040       CommentsBeforeNextToken.push_back(FormatTok);
4041   }
4042 }
4043 
4044 void UnwrappedLineParser::readToken(int LevelDifference) {
4045   SmallVector<FormatToken *, 1> Comments;
4046   bool PreviousWasComment = false;
4047   bool FirstNonCommentOnLine = false;
4048   do {
4049     FormatTok = Tokens->getNextToken();
4050     assert(FormatTok);
4051     while (FormatTok->getType() == TT_ConflictStart ||
4052            FormatTok->getType() == TT_ConflictEnd ||
4053            FormatTok->getType() == TT_ConflictAlternative) {
4054       if (FormatTok->getType() == TT_ConflictStart)
4055         conditionalCompilationStart(/*Unreachable=*/false);
4056       else if (FormatTok->getType() == TT_ConflictAlternative)
4057         conditionalCompilationAlternative();
4058       else if (FormatTok->getType() == TT_ConflictEnd)
4059         conditionalCompilationEnd();
4060       FormatTok = Tokens->getNextToken();
4061       FormatTok->MustBreakBefore = true;
4062     }
4063 
4064     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4065                                       const FormatToken &Tok,
4066                                       bool PreviousWasComment) {
4067       auto IsFirstOnLine = [](const FormatToken &Tok) {
4068         return Tok.HasUnescapedNewline || Tok.IsFirst;
4069       };
4070 
4071       // Consider preprocessor directives preceded by block comments as first
4072       // on line.
4073       if (PreviousWasComment)
4074         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4075       return IsFirstOnLine(Tok);
4076     };
4077 
4078     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4079         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4080     PreviousWasComment = FormatTok->is(tok::comment);
4081 
4082     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4083            FirstNonCommentOnLine) {
4084       distributeComments(Comments, FormatTok);
4085       Comments.clear();
4086       // If there is an unfinished unwrapped line, we flush the preprocessor
4087       // directives only after that unwrapped line was finished later.
4088       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4089       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4090       assert((LevelDifference >= 0 ||
4091               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4092              "LevelDifference makes Line->Level negative");
4093       Line->Level += LevelDifference;
4094       // Comments stored before the preprocessor directive need to be output
4095       // before the preprocessor directive, at the same level as the
4096       // preprocessor directive, as we consider them to apply to the directive.
4097       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4098           PPBranchLevel > 0)
4099         Line->Level += PPBranchLevel;
4100       flushComments(isOnNewLine(*FormatTok));
4101       parsePPDirective();
4102       PreviousWasComment = FormatTok->is(tok::comment);
4103       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4104           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4105     }
4106 
4107     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4108         !Line->InPPDirective)
4109       continue;
4110 
4111     if (!FormatTok->is(tok::comment)) {
4112       distributeComments(Comments, FormatTok);
4113       Comments.clear();
4114       return;
4115     }
4116 
4117     Comments.push_back(FormatTok);
4118   } while (!eof());
4119 
4120   distributeComments(Comments, nullptr);
4121   Comments.clear();
4122 }
4123 
4124 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4125   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4126   if (MustBreakBeforeNextToken) {
4127     Line->Tokens.back().Tok->MustBreakBefore = true;
4128     MustBreakBeforeNextToken = false;
4129   }
4130 }
4131 
4132 } // end namespace format
4133 } // end namespace clang
4134