1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 #include <utility>
23 
24 #define DEBUG_TYPE "format-parser"
25 
26 namespace clang {
27 namespace format {
28 
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   virtual FormatToken *getPreviousToken() = 0;
39 
40   // Returns the token that would be returned by the next call to
41   // getNextToken().
42   virtual FormatToken *peekNextToken() = 0;
43 
44   // Returns the token that would be returned after the next N calls to
45   // getNextToken(). N needs to be greater than zero, and small enough that
46   // there are still tokens. Check for tok::eof with N-1 before calling it with
47   // N.
48   virtual FormatToken *peekNextToken(int N) = 0;
49 
50   // Returns whether we are at the end of the file.
51   // This can be different from whether getNextToken() returned an eof token
52   // when the FormatTokenSource is a view on a part of the token stream.
53   virtual bool isEOF() = 0;
54 
55   // Gets the current position in the token stream, to be used by setPosition().
56   virtual unsigned getPosition() = 0;
57 
58   // Resets the token stream to the state it was in when getPosition() returned
59   // Position, and return the token at that position in the stream.
60   virtual FormatToken *setPosition(unsigned Position) = 0;
61 };
62 
63 namespace {
64 
65 class ScopedDeclarationState {
66 public:
67   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
68                          bool MustBeDeclaration)
69       : Line(Line), Stack(Stack) {
70     Line.MustBeDeclaration = MustBeDeclaration;
71     Stack.push_back(MustBeDeclaration);
72   }
73   ~ScopedDeclarationState() {
74     Stack.pop_back();
75     if (!Stack.empty())
76       Line.MustBeDeclaration = Stack.back();
77     else
78       Line.MustBeDeclaration = true;
79   }
80 
81 private:
82   UnwrappedLine &Line;
83   llvm::BitVector &Stack;
84 };
85 
86 static bool isLineComment(const FormatToken &FormatTok) {
87   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
88 }
89 
90 // Checks if \p FormatTok is a line comment that continues the line comment
91 // \p Previous. The original column of \p MinColumnToken is used to determine
92 // whether \p FormatTok is indented enough to the right to continue \p Previous.
93 static bool continuesLineComment(const FormatToken &FormatTok,
94                                  const FormatToken *Previous,
95                                  const FormatToken *MinColumnToken) {
96   if (!Previous || !MinColumnToken)
97     return false;
98   unsigned MinContinueColumn =
99       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
100   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
101          isLineComment(*Previous) &&
102          FormatTok.OriginalColumn >= MinContinueColumn;
103 }
104 
105 class ScopedMacroState : public FormatTokenSource {
106 public:
107   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
108                    FormatToken *&ResetToken)
109       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
110         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
111         Token(nullptr), PreviousToken(nullptr) {
112     FakeEOF.Tok.startToken();
113     FakeEOF.Tok.setKind(tok::eof);
114     TokenSource = this;
115     Line.Level = 0;
116     Line.InPPDirective = true;
117   }
118 
119   ~ScopedMacroState() override {
120     TokenSource = PreviousTokenSource;
121     ResetToken = Token;
122     Line.InPPDirective = false;
123     Line.Level = PreviousLineLevel;
124   }
125 
126   FormatToken *getNextToken() override {
127     // The \c UnwrappedLineParser guards against this by never calling
128     // \c getNextToken() after it has encountered the first eof token.
129     assert(!eof());
130     PreviousToken = Token;
131     Token = PreviousTokenSource->getNextToken();
132     if (eof())
133       return &FakeEOF;
134     return Token;
135   }
136 
137   FormatToken *getPreviousToken() override {
138     return PreviousTokenSource->getPreviousToken();
139   }
140 
141   FormatToken *peekNextToken() override {
142     if (eof())
143       return &FakeEOF;
144     return PreviousTokenSource->peekNextToken();
145   }
146 
147   FormatToken *peekNextToken(int N) override {
148     assert(N > 0);
149     if (eof())
150       return &FakeEOF;
151     return PreviousTokenSource->peekNextToken(N);
152   }
153 
154   bool isEOF() override { return PreviousTokenSource->isEOF(); }
155 
156   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
157 
158   FormatToken *setPosition(unsigned Position) override {
159     PreviousToken = nullptr;
160     Token = PreviousTokenSource->setPosition(Position);
161     return Token;
162   }
163 
164 private:
165   bool eof() {
166     return Token && Token->HasUnescapedNewline &&
167            !continuesLineComment(*Token, PreviousToken,
168                                  /*MinColumnToken=*/PreviousToken);
169   }
170 
171   FormatToken FakeEOF;
172   UnwrappedLine &Line;
173   FormatTokenSource *&TokenSource;
174   FormatToken *&ResetToken;
175   unsigned PreviousLineLevel;
176   FormatTokenSource *PreviousTokenSource;
177 
178   FormatToken *Token;
179   FormatToken *PreviousToken;
180 };
181 
182 } // end anonymous namespace
183 
184 class ScopedLineState {
185 public:
186   ScopedLineState(UnwrappedLineParser &Parser,
187                   bool SwitchToPreprocessorLines = false)
188       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
189     if (SwitchToPreprocessorLines)
190       Parser.CurrentLines = &Parser.PreprocessorDirectives;
191     else if (!Parser.Line->Tokens.empty())
192       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
193     PreBlockLine = std::move(Parser.Line);
194     Parser.Line = std::make_unique<UnwrappedLine>();
195     Parser.Line->Level = PreBlockLine->Level;
196     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
197   }
198 
199   ~ScopedLineState() {
200     if (!Parser.Line->Tokens.empty())
201       Parser.addUnwrappedLine();
202     assert(Parser.Line->Tokens.empty());
203     Parser.Line = std::move(PreBlockLine);
204     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
205       Parser.MustBreakBeforeNextToken = true;
206     Parser.CurrentLines = OriginalLines;
207   }
208 
209 private:
210   UnwrappedLineParser &Parser;
211 
212   std::unique_ptr<UnwrappedLine> PreBlockLine;
213   SmallVectorImpl<UnwrappedLine> *OriginalLines;
214 };
215 
216 class CompoundStatementIndenter {
217 public:
218   CompoundStatementIndenter(UnwrappedLineParser *Parser,
219                             const FormatStyle &Style, unsigned &LineLevel)
220       : CompoundStatementIndenter(Parser, LineLevel,
221                                   Style.BraceWrapping.AfterControlStatement,
222                                   Style.BraceWrapping.IndentBraces) {}
223   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
224                             bool WrapBrace, bool IndentBrace)
225       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
226     if (WrapBrace)
227       Parser->addUnwrappedLine();
228     if (IndentBrace)
229       ++LineLevel;
230   }
231   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
232 
233 private:
234   unsigned &LineLevel;
235   unsigned OldLineLevel;
236 };
237 
238 namespace {
239 
240 class IndexedTokenSource : public FormatTokenSource {
241 public:
242   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
243       : Tokens(Tokens), Position(-1) {}
244 
245   FormatToken *getNextToken() override {
246     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
247       LLVM_DEBUG({
248         llvm::dbgs() << "Next ";
249         dbgToken(Position);
250       });
251       return Tokens[Position];
252     }
253     ++Position;
254     LLVM_DEBUG({
255       llvm::dbgs() << "Next ";
256       dbgToken(Position);
257     });
258     return Tokens[Position];
259   }
260 
261   FormatToken *getPreviousToken() override {
262     return Position > 0 ? Tokens[Position - 1] : nullptr;
263   }
264 
265   FormatToken *peekNextToken() override {
266     int Next = Position + 1;
267     LLVM_DEBUG({
268       llvm::dbgs() << "Peeking ";
269       dbgToken(Next);
270     });
271     return Tokens[Next];
272   }
273 
274   FormatToken *peekNextToken(int N) override {
275     assert(N > 0);
276     int Next = Position + N;
277     LLVM_DEBUG({
278       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
279       dbgToken(Next);
280     });
281     return Tokens[Next];
282   }
283 
284   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
285 
286   unsigned getPosition() override {
287     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
288     assert(Position >= 0);
289     return Position;
290   }
291 
292   FormatToken *setPosition(unsigned P) override {
293     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
294     Position = P;
295     return Tokens[Position];
296   }
297 
298   void reset() { Position = -1; }
299 
300 private:
301   void dbgToken(int Position, llvm::StringRef Indent = "") {
302     FormatToken *Tok = Tokens[Position];
303     llvm::dbgs() << Indent << "[" << Position
304                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
305                  << ", Macro: " << !!Tok->MacroCtx << "\n";
306   }
307 
308   ArrayRef<FormatToken *> Tokens;
309   int Position;
310 };
311 
312 } // end anonymous namespace
313 
314 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
315                                          const AdditionalKeywords &Keywords,
316                                          unsigned FirstStartColumn,
317                                          ArrayRef<FormatToken *> Tokens,
318                                          UnwrappedLineConsumer &Callback)
319     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
320       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
321       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
322       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
323       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
324                        ? IG_Rejected
325                        : IG_Inited),
326       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
327 
328 void UnwrappedLineParser::reset() {
329   PPBranchLevel = -1;
330   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
331                      ? IG_Rejected
332                      : IG_Inited;
333   IncludeGuardToken = nullptr;
334   Line.reset(new UnwrappedLine);
335   CommentsBeforeNextToken.clear();
336   FormatTok = nullptr;
337   MustBreakBeforeNextToken = false;
338   PreprocessorDirectives.clear();
339   CurrentLines = &Lines;
340   DeclarationScopeStack.clear();
341   NestedTooDeep.clear();
342   PPStack.clear();
343   Line->FirstStartColumn = FirstStartColumn;
344 }
345 
346 void UnwrappedLineParser::parse() {
347   IndexedTokenSource TokenSource(AllTokens);
348   Line->FirstStartColumn = FirstStartColumn;
349   do {
350     LLVM_DEBUG(llvm::dbgs() << "----\n");
351     reset();
352     Tokens = &TokenSource;
353     TokenSource.reset();
354 
355     readToken();
356     parseFile();
357 
358     // If we found an include guard then all preprocessor directives (other than
359     // the guard) are over-indented by one.
360     if (IncludeGuard == IG_Found)
361       for (auto &Line : Lines)
362         if (Line.InPPDirective && Line.Level > 0)
363           --Line.Level;
364 
365     // Create line with eof token.
366     pushToken(FormatTok);
367     addUnwrappedLine();
368 
369     for (const UnwrappedLine &Line : Lines)
370       Callback.consumeUnwrappedLine(Line);
371 
372     Callback.finishRun();
373     Lines.clear();
374     while (!PPLevelBranchIndex.empty() &&
375            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
376       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
377       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
378     }
379     if (!PPLevelBranchIndex.empty()) {
380       ++PPLevelBranchIndex.back();
381       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
382       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
383     }
384   } while (!PPLevelBranchIndex.empty());
385 }
386 
387 void UnwrappedLineParser::parseFile() {
388   // The top-level context in a file always has declarations, except for pre-
389   // processor directives and JavaScript files.
390   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
391   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
392                                           MustBeDeclaration);
393   if (Style.Language == FormatStyle::LK_TextProto)
394     parseBracedList();
395   else
396     parseLevel(/*HasOpeningBrace=*/false, /*CanContainBracedList=*/true);
397   // Make sure to format the remaining tokens.
398   //
399   // LK_TextProto is special since its top-level is parsed as the body of a
400   // braced list, which does not necessarily have natural line separators such
401   // as a semicolon. Comments after the last entry that have been determined to
402   // not belong to that line, as in:
403   //   key: value
404   //   // endfile comment
405   // do not have a chance to be put on a line of their own until this point.
406   // Here we add this newline before end-of-file comments.
407   if (Style.Language == FormatStyle::LK_TextProto &&
408       !CommentsBeforeNextToken.empty())
409     addUnwrappedLine();
410   flushComments(true);
411   addUnwrappedLine();
412 }
413 
414 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
415   do {
416     switch (FormatTok->Tok.getKind()) {
417     case tok::l_brace:
418       return;
419     default:
420       if (FormatTok->is(Keywords.kw_where)) {
421         addUnwrappedLine();
422         nextToken();
423         parseCSharpGenericTypeConstraint();
424         break;
425       }
426       nextToken();
427       break;
428     }
429   } while (!eof());
430 }
431 
432 void UnwrappedLineParser::parseCSharpAttribute() {
433   int UnpairedSquareBrackets = 1;
434   do {
435     switch (FormatTok->Tok.getKind()) {
436     case tok::r_square:
437       nextToken();
438       --UnpairedSquareBrackets;
439       if (UnpairedSquareBrackets == 0) {
440         addUnwrappedLine();
441         return;
442       }
443       break;
444     case tok::l_square:
445       ++UnpairedSquareBrackets;
446       nextToken();
447       break;
448     default:
449       nextToken();
450       break;
451     }
452   } while (!eof());
453 }
454 
455 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
456   if (!Lines.empty() && Lines.back().InPPDirective)
457     return true;
458 
459   const FormatToken *Previous = Tokens->getPreviousToken();
460   return Previous && Previous->is(tok::comment) &&
461          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
462 }
463 /// \brief Parses a level, that is ???.
464 /// \param HasOpeningBrace If that level is started by an opening brace.
465 /// \param CanContainBracedList If the content can contain (at any level) a
466 /// braced list.
467 /// \param NextLBracesType The type for left brace found in this level.
468 /// \returns true if a simple block, or false otherwise. (A simple block has a
469 /// single statement.)
470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace,
471                                      bool CanContainBracedList,
472                                      IfStmtKind *IfKind,
473                                      TokenType NextLBracesType) {
474   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
475                                   ? TT_BracedListLBrace
476                                   : TT_Unknown;
477   const bool IsPrecededByCommentOrPPDirective =
478       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
479   bool HasLabel = false;
480   unsigned StatementCount = 0;
481   bool SwitchLabelEncountered = false;
482   do {
483     if (FormatTok->getType() == TT_AttributeMacro) {
484       nextToken();
485       continue;
486     }
487     tok::TokenKind kind = FormatTok->Tok.getKind();
488     if (FormatTok->getType() == TT_MacroBlockBegin)
489       kind = tok::l_brace;
490     else if (FormatTok->getType() == TT_MacroBlockEnd)
491       kind = tok::r_brace;
492 
493     auto ParseDefault = [this, HasOpeningBrace, IfKind, NextLevelLBracesType,
494                          &HasLabel, &StatementCount] {
495       parseStructuralElement(IfKind, !HasOpeningBrace, NextLevelLBracesType,
496                              HasLabel ? nullptr : &HasLabel);
497       ++StatementCount;
498       assert(StatementCount > 0 && "StatementCount overflow!");
499     };
500 
501     switch (kind) {
502     case tok::comment:
503       nextToken();
504       addUnwrappedLine();
505       break;
506     case tok::l_brace:
507       if (NextLBracesType != TT_Unknown)
508         FormatTok->setFinalizedType(NextLBracesType);
509       else if (FormatTok->Previous &&
510                FormatTok->Previous->ClosesRequiresClause) {
511         // We need the 'default' case here to correctly parse a function
512         // l_brace.
513         ParseDefault();
514         continue;
515       }
516       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
517           tryToParseBracedList())
518         continue;
519       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
520                  /*MunchSemi=*/true, /*UnindentWhitesmithBraces=*/false,
521                  CanContainBracedList,
522                  /*NextLBracesType=*/NextLBracesType);
523       ++StatementCount;
524       assert(StatementCount > 0 && "StatementCount overflow!");
525       addUnwrappedLine();
526       break;
527     case tok::r_brace:
528       if (HasOpeningBrace) {
529         if (!Style.RemoveBracesLLVM)
530           return false;
531         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
532             IsPrecededByCommentOrPPDirective ||
533             precededByCommentOrPPDirective())
534           return false;
535         const FormatToken *Next = Tokens->peekNextToken();
536         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
537       }
538       nextToken();
539       addUnwrappedLine();
540       break;
541     case tok::kw_default: {
542       unsigned StoredPosition = Tokens->getPosition();
543       FormatToken *Next;
544       do {
545         Next = Tokens->getNextToken();
546         assert(Next);
547       } while (Next->is(tok::comment));
548       FormatTok = Tokens->setPosition(StoredPosition);
549       if (Next->isNot(tok::colon)) {
550         // default not followed by ':' is not a case label; treat it like
551         // an identifier.
552         parseStructuralElement();
553         break;
554       }
555       // Else, if it is 'default:', fall through to the case handling.
556       LLVM_FALLTHROUGH;
557     }
558     case tok::kw_case:
559       if (Style.isJavaScript() && Line->MustBeDeclaration) {
560         // A 'case: string' style field declaration.
561         parseStructuralElement();
562         break;
563       }
564       if (!SwitchLabelEncountered &&
565           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
566         ++Line->Level;
567       SwitchLabelEncountered = true;
568       parseStructuralElement();
569       break;
570     case tok::l_square:
571       if (Style.isCSharp()) {
572         nextToken();
573         parseCSharpAttribute();
574         break;
575       }
576       if (handleCppAttributes())
577         break;
578       LLVM_FALLTHROUGH;
579     default:
580       ParseDefault();
581       break;
582     }
583   } while (!eof());
584   return false;
585 }
586 
587 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
588   // We'll parse forward through the tokens until we hit
589   // a closing brace or eof - note that getNextToken() will
590   // parse macros, so this will magically work inside macro
591   // definitions, too.
592   unsigned StoredPosition = Tokens->getPosition();
593   FormatToken *Tok = FormatTok;
594   const FormatToken *PrevTok = Tok->Previous;
595   // Keep a stack of positions of lbrace tokens. We will
596   // update information about whether an lbrace starts a
597   // braced init list or a different block during the loop.
598   SmallVector<FormatToken *, 8> LBraceStack;
599   assert(Tok->is(tok::l_brace));
600   do {
601     // Get next non-comment token.
602     FormatToken *NextTok;
603     unsigned ReadTokens = 0;
604     do {
605       NextTok = Tokens->getNextToken();
606       ++ReadTokens;
607     } while (NextTok->is(tok::comment));
608 
609     switch (Tok->Tok.getKind()) {
610     case tok::l_brace:
611       if (Style.isJavaScript() && PrevTok) {
612         if (PrevTok->isOneOf(tok::colon, tok::less))
613           // A ':' indicates this code is in a type, or a braced list
614           // following a label in an object literal ({a: {b: 1}}).
615           // A '<' could be an object used in a comparison, but that is nonsense
616           // code (can never return true), so more likely it is a generic type
617           // argument (`X<{a: string; b: number}>`).
618           // The code below could be confused by semicolons between the
619           // individual members in a type member list, which would normally
620           // trigger BK_Block. In both cases, this must be parsed as an inline
621           // braced init.
622           Tok->setBlockKind(BK_BracedInit);
623         else if (PrevTok->is(tok::r_paren))
624           // `) { }` can only occur in function or method declarations in JS.
625           Tok->setBlockKind(BK_Block);
626       } else {
627         Tok->setBlockKind(BK_Unknown);
628       }
629       LBraceStack.push_back(Tok);
630       break;
631     case tok::r_brace:
632       if (LBraceStack.empty())
633         break;
634       if (LBraceStack.back()->is(BK_Unknown)) {
635         bool ProbablyBracedList = false;
636         if (Style.Language == FormatStyle::LK_Proto) {
637           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
638         } else {
639           // Skip NextTok over preprocessor lines, otherwise we may not
640           // properly diagnose the block as a braced intializer
641           // if the comma separator appears after the pp directive.
642           while (NextTok->is(tok::hash)) {
643             ScopedMacroState MacroState(*Line, Tokens, NextTok);
644             do {
645               NextTok = Tokens->getNextToken();
646               ++ReadTokens;
647             } while (NextTok->isNot(tok::eof));
648           }
649 
650           // Using OriginalColumn to distinguish between ObjC methods and
651           // binary operators is a bit hacky.
652           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
653                                   NextTok->OriginalColumn == 0;
654 
655           // Try to detect a braced list. Note that regardless how we mark inner
656           // braces here, we will overwrite the BlockKind later if we parse a
657           // braced list (where all blocks inside are by default braced lists),
658           // or when we explicitly detect blocks (for example while parsing
659           // lambdas).
660 
661           // If we already marked the opening brace as braced list, the closing
662           // must also be part of it.
663           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
664 
665           ProbablyBracedList = ProbablyBracedList ||
666                                (Style.isJavaScript() &&
667                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
668                                                  Keywords.kw_as));
669           ProbablyBracedList = ProbablyBracedList ||
670                                (Style.isCpp() && NextTok->is(tok::l_paren));
671 
672           // If there is a comma, semicolon or right paren after the closing
673           // brace, we assume this is a braced initializer list.
674           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
675           // braced list in JS.
676           ProbablyBracedList =
677               ProbablyBracedList ||
678               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
679                                tok::r_paren, tok::r_square, tok::l_brace,
680                                tok::ellipsis);
681 
682           ProbablyBracedList =
683               ProbablyBracedList ||
684               (NextTok->is(tok::identifier) &&
685                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
686 
687           ProbablyBracedList = ProbablyBracedList ||
688                                (NextTok->is(tok::semi) &&
689                                 (!ExpectClassBody || LBraceStack.size() != 1));
690 
691           ProbablyBracedList =
692               ProbablyBracedList ||
693               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
694 
695           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
696             // We can have an array subscript after a braced init
697             // list, but C++11 attributes are expected after blocks.
698             NextTok = Tokens->getNextToken();
699             ++ReadTokens;
700             ProbablyBracedList = NextTok->isNot(tok::l_square);
701           }
702         }
703         if (ProbablyBracedList) {
704           Tok->setBlockKind(BK_BracedInit);
705           LBraceStack.back()->setBlockKind(BK_BracedInit);
706         } else {
707           Tok->setBlockKind(BK_Block);
708           LBraceStack.back()->setBlockKind(BK_Block);
709         }
710       }
711       LBraceStack.pop_back();
712       break;
713     case tok::identifier:
714       if (!Tok->is(TT_StatementMacro))
715         break;
716       LLVM_FALLTHROUGH;
717     case tok::at:
718     case tok::semi:
719     case tok::kw_if:
720     case tok::kw_while:
721     case tok::kw_for:
722     case tok::kw_switch:
723     case tok::kw_try:
724     case tok::kw___try:
725       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
726         LBraceStack.back()->setBlockKind(BK_Block);
727       break;
728     default:
729       break;
730     }
731     PrevTok = Tok;
732     Tok = NextTok;
733   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
734 
735   // Assume other blocks for all unclosed opening braces.
736   for (FormatToken *LBrace : LBraceStack)
737     if (LBrace->is(BK_Unknown))
738       LBrace->setBlockKind(BK_Block);
739 
740   FormatTok = Tokens->setPosition(StoredPosition);
741 }
742 
743 template <class T>
744 static inline void hash_combine(std::size_t &seed, const T &v) {
745   std::hash<T> hasher;
746   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
747 }
748 
749 size_t UnwrappedLineParser::computePPHash() const {
750   size_t h = 0;
751   for (const auto &i : PPStack) {
752     hash_combine(h, size_t(i.Kind));
753     hash_combine(h, i.Line);
754   }
755   return h;
756 }
757 
758 UnwrappedLineParser::IfStmtKind
759 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
760                                 bool MunchSemi, bool UnindentWhitesmithsBraces,
761                                 bool CanContainBracedList,
762                                 TokenType NextLBracesType) {
763   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
764          "'{' or macro block token expected");
765   FormatToken *Tok = FormatTok;
766   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
767   FormatTok->setBlockKind(BK_Block);
768 
769   // For Whitesmiths mode, jump to the next level prior to skipping over the
770   // braces.
771   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
772     ++Line->Level;
773 
774   size_t PPStartHash = computePPHash();
775 
776   unsigned InitialLevel = Line->Level;
777   nextToken(/*LevelDifference=*/AddLevels);
778 
779   if (MacroBlock && FormatTok->is(tok::l_paren))
780     parseParens();
781 
782   size_t NbPreprocessorDirectives =
783       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
784   addUnwrappedLine();
785   size_t OpeningLineIndex =
786       CurrentLines->empty()
787           ? (UnwrappedLine::kInvalidIndex)
788           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
789 
790   // Whitesmiths is weird here. The brace needs to be indented for the namespace
791   // block, but the block itself may not be indented depending on the style
792   // settings. This allows the format to back up one level in those cases.
793   if (UnindentWhitesmithsBraces)
794     --Line->Level;
795 
796   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
797                                           MustBeDeclaration);
798   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
799     Line->Level += AddLevels;
800 
801   IfStmtKind IfKind = IfStmtKind::NotIf;
802   const bool SimpleBlock = parseLevel(
803       /*HasOpeningBrace=*/true, CanContainBracedList, &IfKind, NextLBracesType);
804 
805   if (eof())
806     return IfKind;
807 
808   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
809                  : !FormatTok->is(tok::r_brace)) {
810     Line->Level = InitialLevel;
811     FormatTok->setBlockKind(BK_Block);
812     return IfKind;
813   }
814 
815   if (SimpleBlock && Tok->is(tok::l_brace)) {
816     assert(FormatTok->is(tok::r_brace));
817     const FormatToken *Previous = Tokens->getPreviousToken();
818     assert(Previous);
819     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
820       Tok->MatchingParen = FormatTok;
821       FormatTok->MatchingParen = Tok;
822     }
823   }
824 
825   size_t PPEndHash = computePPHash();
826 
827   // Munch the closing brace.
828   nextToken(/*LevelDifference=*/-AddLevels);
829 
830   if (MacroBlock && FormatTok->is(tok::l_paren))
831     parseParens();
832 
833   if (FormatTok->is(tok::kw_noexcept)) {
834     // A noexcept in a requires expression.
835     nextToken();
836   }
837 
838   if (FormatTok->is(tok::arrow)) {
839     // Following the } or noexcept we can find a trailing return type arrow
840     // as part of an implicit conversion constraint.
841     nextToken();
842     parseStructuralElement();
843   }
844 
845   if (MunchSemi && FormatTok->is(tok::semi))
846     nextToken();
847 
848   Line->Level = InitialLevel;
849 
850   if (PPStartHash == PPEndHash) {
851     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
852     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
853       // Update the opening line to add the forward reference as well
854       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
855           CurrentLines->size() - 1;
856     }
857   }
858 
859   return IfKind;
860 }
861 
862 static bool isGoogScope(const UnwrappedLine &Line) {
863   // FIXME: Closure-library specific stuff should not be hard-coded but be
864   // configurable.
865   if (Line.Tokens.size() < 4)
866     return false;
867   auto I = Line.Tokens.begin();
868   if (I->Tok->TokenText != "goog")
869     return false;
870   ++I;
871   if (I->Tok->isNot(tok::period))
872     return false;
873   ++I;
874   if (I->Tok->TokenText != "scope")
875     return false;
876   ++I;
877   return I->Tok->is(tok::l_paren);
878 }
879 
880 static bool isIIFE(const UnwrappedLine &Line,
881                    const AdditionalKeywords &Keywords) {
882   // Look for the start of an immediately invoked anonymous function.
883   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
884   // This is commonly done in JavaScript to create a new, anonymous scope.
885   // Example: (function() { ... })()
886   if (Line.Tokens.size() < 3)
887     return false;
888   auto I = Line.Tokens.begin();
889   if (I->Tok->isNot(tok::l_paren))
890     return false;
891   ++I;
892   if (I->Tok->isNot(Keywords.kw_function))
893     return false;
894   ++I;
895   return I->Tok->is(tok::l_paren);
896 }
897 
898 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
899                                    const FormatToken &InitialToken) {
900   tok::TokenKind Kind = InitialToken.Tok.getKind();
901   if (InitialToken.is(TT_NamespaceMacro))
902     Kind = tok::kw_namespace;
903 
904   switch (Kind) {
905   case tok::kw_namespace:
906     return Style.BraceWrapping.AfterNamespace;
907   case tok::kw_class:
908     return Style.BraceWrapping.AfterClass;
909   case tok::kw_union:
910     return Style.BraceWrapping.AfterUnion;
911   case tok::kw_struct:
912     return Style.BraceWrapping.AfterStruct;
913   case tok::kw_enum:
914     return Style.BraceWrapping.AfterEnum;
915   default:
916     return false;
917   }
918 }
919 
920 void UnwrappedLineParser::parseChildBlock(
921     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
922   FormatTok->setBlockKind(BK_Block);
923   nextToken();
924   {
925     bool SkipIndent = (Style.isJavaScript() &&
926                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
927     ScopedLineState LineState(*this);
928     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
929                                             /*MustBeDeclaration=*/false);
930     Line->Level += SkipIndent ? 0 : 1;
931     parseLevel(/*HasOpeningBrace=*/true, CanContainBracedList,
932                /*IfKind=*/nullptr, NextLBracesType);
933     flushComments(isOnNewLine(*FormatTok));
934     Line->Level -= SkipIndent ? 0 : 1;
935   }
936   nextToken();
937 }
938 
939 void UnwrappedLineParser::parsePPDirective() {
940   assert(FormatTok->is(tok::hash) && "'#' expected");
941   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
942 
943   nextToken();
944 
945   if (!FormatTok->Tok.getIdentifierInfo()) {
946     parsePPUnknown();
947     return;
948   }
949 
950   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
951   case tok::pp_define:
952     parsePPDefine();
953     return;
954   case tok::pp_if:
955     parsePPIf(/*IfDef=*/false);
956     break;
957   case tok::pp_ifdef:
958   case tok::pp_ifndef:
959     parsePPIf(/*IfDef=*/true);
960     break;
961   case tok::pp_else:
962     parsePPElse();
963     break;
964   case tok::pp_elifdef:
965   case tok::pp_elifndef:
966   case tok::pp_elif:
967     parsePPElIf();
968     break;
969   case tok::pp_endif:
970     parsePPEndIf();
971     break;
972   default:
973     parsePPUnknown();
974     break;
975   }
976 }
977 
978 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
979   size_t Line = CurrentLines->size();
980   if (CurrentLines == &PreprocessorDirectives)
981     Line += Lines.size();
982 
983   if (Unreachable ||
984       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
985     PPStack.push_back({PP_Unreachable, Line});
986   else
987     PPStack.push_back({PP_Conditional, Line});
988 }
989 
990 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
991   ++PPBranchLevel;
992   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
993   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
994     PPLevelBranchIndex.push_back(0);
995     PPLevelBranchCount.push_back(0);
996   }
997   PPChainBranchIndex.push(0);
998   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
999   conditionalCompilationCondition(Unreachable || Skip);
1000 }
1001 
1002 void UnwrappedLineParser::conditionalCompilationAlternative() {
1003   if (!PPStack.empty())
1004     PPStack.pop_back();
1005   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1006   if (!PPChainBranchIndex.empty())
1007     ++PPChainBranchIndex.top();
1008   conditionalCompilationCondition(
1009       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1010       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1011 }
1012 
1013 void UnwrappedLineParser::conditionalCompilationEnd() {
1014   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1015   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1016     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1017       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1018   }
1019   // Guard against #endif's without #if.
1020   if (PPBranchLevel > -1)
1021     --PPBranchLevel;
1022   if (!PPChainBranchIndex.empty())
1023     PPChainBranchIndex.pop();
1024   if (!PPStack.empty())
1025     PPStack.pop_back();
1026 }
1027 
1028 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1029   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1030   nextToken();
1031   bool Unreachable = false;
1032   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1033     Unreachable = true;
1034   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1035     Unreachable = true;
1036   conditionalCompilationStart(Unreachable);
1037   FormatToken *IfCondition = FormatTok;
1038   // If there's a #ifndef on the first line, and the only lines before it are
1039   // comments, it could be an include guard.
1040   bool MaybeIncludeGuard = IfNDef;
1041   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
1042     for (auto &Line : Lines) {
1043       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1044         MaybeIncludeGuard = false;
1045         IncludeGuard = IG_Rejected;
1046         break;
1047       }
1048     }
1049   --PPBranchLevel;
1050   parsePPUnknown();
1051   ++PPBranchLevel;
1052   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1053     IncludeGuard = IG_IfNdefed;
1054     IncludeGuardToken = IfCondition;
1055   }
1056 }
1057 
1058 void UnwrappedLineParser::parsePPElse() {
1059   // If a potential include guard has an #else, it's not an include guard.
1060   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1061     IncludeGuard = IG_Rejected;
1062   conditionalCompilationAlternative();
1063   if (PPBranchLevel > -1)
1064     --PPBranchLevel;
1065   parsePPUnknown();
1066   ++PPBranchLevel;
1067 }
1068 
1069 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1070 
1071 void UnwrappedLineParser::parsePPEndIf() {
1072   conditionalCompilationEnd();
1073   parsePPUnknown();
1074   // If the #endif of a potential include guard is the last thing in the file,
1075   // then we found an include guard.
1076   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1077       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1078     IncludeGuard = IG_Found;
1079 }
1080 
1081 void UnwrappedLineParser::parsePPDefine() {
1082   nextToken();
1083 
1084   if (!FormatTok->Tok.getIdentifierInfo()) {
1085     IncludeGuard = IG_Rejected;
1086     IncludeGuardToken = nullptr;
1087     parsePPUnknown();
1088     return;
1089   }
1090 
1091   if (IncludeGuard == IG_IfNdefed &&
1092       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1093     IncludeGuard = IG_Defined;
1094     IncludeGuardToken = nullptr;
1095     for (auto &Line : Lines) {
1096       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1097         IncludeGuard = IG_Rejected;
1098         break;
1099       }
1100     }
1101   }
1102 
1103   // In the context of a define, even keywords should be treated as normal
1104   // identifiers. Setting the kind to identifier is not enough, because we need
1105   // to treat additional keywords like __except as well, which are already
1106   // identifiers. Setting the identifier info to null interferes with include
1107   // guard processing above, and changes preprocessing nesting.
1108   FormatTok->Tok.setKind(tok::identifier);
1109   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1110   nextToken();
1111   if (FormatTok->Tok.getKind() == tok::l_paren &&
1112       !FormatTok->hasWhitespaceBefore())
1113     parseParens();
1114   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1115     Line->Level += PPBranchLevel + 1;
1116   addUnwrappedLine();
1117   ++Line->Level;
1118 
1119   // Errors during a preprocessor directive can only affect the layout of the
1120   // preprocessor directive, and thus we ignore them. An alternative approach
1121   // would be to use the same approach we use on the file level (no
1122   // re-indentation if there was a structural error) within the macro
1123   // definition.
1124   parseFile();
1125 }
1126 
1127 void UnwrappedLineParser::parsePPUnknown() {
1128   do {
1129     nextToken();
1130   } while (!eof());
1131   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1132     Line->Level += PPBranchLevel + 1;
1133   addUnwrappedLine();
1134 }
1135 
1136 // Here we exclude certain tokens that are not usually the first token in an
1137 // unwrapped line. This is used in attempt to distinguish macro calls without
1138 // trailing semicolons from other constructs split to several lines.
1139 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1140   // Semicolon can be a null-statement, l_square can be a start of a macro or
1141   // a C++11 attribute, but this doesn't seem to be common.
1142   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1143          Tok.isNot(TT_AttributeSquare) &&
1144          // Tokens that can only be used as binary operators and a part of
1145          // overloaded operator names.
1146          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1147          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1148          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1149          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1150          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1151          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1152          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1153          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1154          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1155          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1156          Tok.isNot(tok::lesslessequal) &&
1157          // Colon is used in labels, base class lists, initializer lists,
1158          // range-based for loops, ternary operator, but should never be the
1159          // first token in an unwrapped line.
1160          Tok.isNot(tok::colon) &&
1161          // 'noexcept' is a trailing annotation.
1162          Tok.isNot(tok::kw_noexcept);
1163 }
1164 
1165 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1166                           const FormatToken *FormatTok) {
1167   // FIXME: This returns true for C/C++ keywords like 'struct'.
1168   return FormatTok->is(tok::identifier) &&
1169          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1170           !FormatTok->isOneOf(
1171               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1172               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1173               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1174               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1175               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1176               Keywords.kw_instanceof, Keywords.kw_interface,
1177               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1178 }
1179 
1180 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1181                                  const FormatToken *FormatTok) {
1182   return FormatTok->Tok.isLiteral() ||
1183          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1184          mustBeJSIdent(Keywords, FormatTok);
1185 }
1186 
1187 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1188 // when encountered after a value (see mustBeJSIdentOrValue).
1189 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1190                            const FormatToken *FormatTok) {
1191   return FormatTok->isOneOf(
1192       tok::kw_return, Keywords.kw_yield,
1193       // conditionals
1194       tok::kw_if, tok::kw_else,
1195       // loops
1196       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1197       // switch/case
1198       tok::kw_switch, tok::kw_case,
1199       // exceptions
1200       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1201       // declaration
1202       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1203       Keywords.kw_async, Keywords.kw_function,
1204       // import/export
1205       Keywords.kw_import, tok::kw_export);
1206 }
1207 
1208 // Checks whether a token is a type in K&R C (aka C78).
1209 static bool isC78Type(const FormatToken &Tok) {
1210   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1211                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1212                      tok::identifier);
1213 }
1214 
1215 // This function checks whether a token starts the first parameter declaration
1216 // in a K&R C (aka C78) function definition, e.g.:
1217 //   int f(a, b)
1218 //   short a, b;
1219 //   {
1220 //      return a + b;
1221 //   }
1222 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1223                                const FormatToken *FuncName) {
1224   assert(Tok);
1225   assert(Next);
1226   assert(FuncName);
1227 
1228   if (FuncName->isNot(tok::identifier))
1229     return false;
1230 
1231   const FormatToken *Prev = FuncName->Previous;
1232   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1233     return false;
1234 
1235   if (!isC78Type(*Tok) &&
1236       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1237     return false;
1238 
1239   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1240     return false;
1241 
1242   Tok = Tok->Previous;
1243   if (!Tok || Tok->isNot(tok::r_paren))
1244     return false;
1245 
1246   Tok = Tok->Previous;
1247   if (!Tok || Tok->isNot(tok::identifier))
1248     return false;
1249 
1250   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1251 }
1252 
1253 void UnwrappedLineParser::parseModuleImport() {
1254   nextToken();
1255   while (!eof()) {
1256     if (FormatTok->is(tok::colon)) {
1257       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1258     }
1259     // Handle import <foo/bar.h> as we would an include statement.
1260     else if (FormatTok->is(tok::less)) {
1261       nextToken();
1262       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1263         // Mark tokens up to the trailing line comments as implicit string
1264         // literals.
1265         if (FormatTok->isNot(tok::comment) &&
1266             !FormatTok->TokenText.startswith("//"))
1267           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1268         nextToken();
1269       }
1270     }
1271     if (FormatTok->is(tok::semi)) {
1272       nextToken();
1273       break;
1274     }
1275     nextToken();
1276   }
1277 
1278   addUnwrappedLine();
1279 }
1280 
1281 // readTokenWithJavaScriptASI reads the next token and terminates the current
1282 // line if JavaScript Automatic Semicolon Insertion must
1283 // happen between the current token and the next token.
1284 //
1285 // This method is conservative - it cannot cover all edge cases of JavaScript,
1286 // but only aims to correctly handle certain well known cases. It *must not*
1287 // return true in speculative cases.
1288 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1289   FormatToken *Previous = FormatTok;
1290   readToken();
1291   FormatToken *Next = FormatTok;
1292 
1293   bool IsOnSameLine =
1294       CommentsBeforeNextToken.empty()
1295           ? Next->NewlinesBefore == 0
1296           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1297   if (IsOnSameLine)
1298     return;
1299 
1300   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1301   bool PreviousStartsTemplateExpr =
1302       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1303   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1304     // If the line contains an '@' sign, the previous token might be an
1305     // annotation, which can precede another identifier/value.
1306     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1307       return LineNode.Tok->is(tok::at);
1308     });
1309     if (HasAt)
1310       return;
1311   }
1312   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1313     return addUnwrappedLine();
1314   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1315   bool NextEndsTemplateExpr =
1316       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1317   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1318       (PreviousMustBeValue ||
1319        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1320                          tok::minusminus)))
1321     return addUnwrappedLine();
1322   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1323       isJSDeclOrStmt(Keywords, Next))
1324     return addUnwrappedLine();
1325 }
1326 
1327 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1328                                                  bool IsTopLevel,
1329                                                  TokenType NextLBracesType,
1330                                                  bool *HasLabel) {
1331   if (Style.Language == FormatStyle::LK_TableGen &&
1332       FormatTok->is(tok::pp_include)) {
1333     nextToken();
1334     if (FormatTok->is(tok::string_literal))
1335       nextToken();
1336     addUnwrappedLine();
1337     return;
1338   }
1339   switch (FormatTok->Tok.getKind()) {
1340   case tok::kw_asm:
1341     nextToken();
1342     if (FormatTok->is(tok::l_brace)) {
1343       FormatTok->setFinalizedType(TT_InlineASMBrace);
1344       nextToken();
1345       while (FormatTok && FormatTok->isNot(tok::eof)) {
1346         if (FormatTok->is(tok::r_brace)) {
1347           FormatTok->setFinalizedType(TT_InlineASMBrace);
1348           nextToken();
1349           addUnwrappedLine();
1350           break;
1351         }
1352         FormatTok->Finalized = true;
1353         nextToken();
1354       }
1355     }
1356     break;
1357   case tok::kw_namespace:
1358     parseNamespace();
1359     return;
1360   case tok::kw_public:
1361   case tok::kw_protected:
1362   case tok::kw_private:
1363     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1364         Style.isCSharp())
1365       nextToken();
1366     else
1367       parseAccessSpecifier();
1368     return;
1369   case tok::kw_if:
1370     if (Style.isJavaScript() && Line->MustBeDeclaration)
1371       // field/method declaration.
1372       break;
1373     parseIfThenElse(IfKind);
1374     return;
1375   case tok::kw_for:
1376   case tok::kw_while:
1377     if (Style.isJavaScript() && Line->MustBeDeclaration)
1378       // field/method declaration.
1379       break;
1380     parseForOrWhileLoop();
1381     return;
1382   case tok::kw_do:
1383     if (Style.isJavaScript() && Line->MustBeDeclaration)
1384       // field/method declaration.
1385       break;
1386     parseDoWhile();
1387     return;
1388   case tok::kw_switch:
1389     if (Style.isJavaScript() && Line->MustBeDeclaration)
1390       // 'switch: string' field declaration.
1391       break;
1392     parseSwitch();
1393     return;
1394   case tok::kw_default:
1395     if (Style.isJavaScript() && Line->MustBeDeclaration)
1396       // 'default: string' field declaration.
1397       break;
1398     nextToken();
1399     if (FormatTok->is(tok::colon)) {
1400       parseLabel();
1401       return;
1402     }
1403     // e.g. "default void f() {}" in a Java interface.
1404     break;
1405   case tok::kw_case:
1406     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1407       // 'case: string' field declaration.
1408       nextToken();
1409       break;
1410     }
1411     parseCaseLabel();
1412     return;
1413   case tok::kw_try:
1414   case tok::kw___try:
1415     if (Style.isJavaScript() && Line->MustBeDeclaration)
1416       // field/method declaration.
1417       break;
1418     parseTryCatch();
1419     return;
1420   case tok::kw_extern:
1421     nextToken();
1422     if (FormatTok->is(tok::string_literal)) {
1423       nextToken();
1424       if (FormatTok->is(tok::l_brace)) {
1425         if (Style.BraceWrapping.AfterExternBlock)
1426           addUnwrappedLine();
1427         // Either we indent or for backwards compatibility we follow the
1428         // AfterExternBlock style.
1429         unsigned AddLevels =
1430             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1431                     (Style.BraceWrapping.AfterExternBlock &&
1432                      Style.IndentExternBlock ==
1433                          FormatStyle::IEBS_AfterExternBlock)
1434                 ? 1u
1435                 : 0u;
1436         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1437         addUnwrappedLine();
1438         return;
1439       }
1440     }
1441     break;
1442   case tok::kw_export:
1443     if (Style.isJavaScript()) {
1444       parseJavaScriptEs6ImportExport();
1445       return;
1446     }
1447     if (!Style.isCpp())
1448       break;
1449     // Handle C++ "(inline|export) namespace".
1450     LLVM_FALLTHROUGH;
1451   case tok::kw_inline:
1452     nextToken();
1453     if (FormatTok->is(tok::kw_namespace)) {
1454       parseNamespace();
1455       return;
1456     }
1457     break;
1458   case tok::identifier:
1459     if (FormatTok->is(TT_ForEachMacro)) {
1460       parseForOrWhileLoop();
1461       return;
1462     }
1463     if (FormatTok->is(TT_MacroBlockBegin)) {
1464       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1465                  /*MunchSemi=*/false);
1466       return;
1467     }
1468     if (FormatTok->is(Keywords.kw_import)) {
1469       if (Style.isJavaScript()) {
1470         parseJavaScriptEs6ImportExport();
1471         return;
1472       }
1473       if (Style.Language == FormatStyle::LK_Proto) {
1474         nextToken();
1475         if (FormatTok->is(tok::kw_public))
1476           nextToken();
1477         if (!FormatTok->is(tok::string_literal))
1478           return;
1479         nextToken();
1480         if (FormatTok->is(tok::semi))
1481           nextToken();
1482         addUnwrappedLine();
1483         return;
1484       }
1485       if (Style.isCpp()) {
1486         parseModuleImport();
1487         return;
1488       }
1489     }
1490     if (Style.isCpp() &&
1491         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1492                            Keywords.kw_slots, Keywords.kw_qslots)) {
1493       nextToken();
1494       if (FormatTok->is(tok::colon)) {
1495         nextToken();
1496         addUnwrappedLine();
1497         return;
1498       }
1499     }
1500     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1501       parseStatementMacro();
1502       return;
1503     }
1504     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1505       parseNamespace();
1506       return;
1507     }
1508     // In all other cases, parse the declaration.
1509     break;
1510   default:
1511     break;
1512   }
1513   do {
1514     const FormatToken *Previous = FormatTok->Previous;
1515     switch (FormatTok->Tok.getKind()) {
1516     case tok::at:
1517       nextToken();
1518       if (FormatTok->is(tok::l_brace)) {
1519         nextToken();
1520         parseBracedList();
1521         break;
1522       } else if (Style.Language == FormatStyle::LK_Java &&
1523                  FormatTok->is(Keywords.kw_interface)) {
1524         nextToken();
1525         break;
1526       }
1527       switch (FormatTok->Tok.getObjCKeywordID()) {
1528       case tok::objc_public:
1529       case tok::objc_protected:
1530       case tok::objc_package:
1531       case tok::objc_private:
1532         return parseAccessSpecifier();
1533       case tok::objc_interface:
1534       case tok::objc_implementation:
1535         return parseObjCInterfaceOrImplementation();
1536       case tok::objc_protocol:
1537         if (parseObjCProtocol())
1538           return;
1539         break;
1540       case tok::objc_end:
1541         return; // Handled by the caller.
1542       case tok::objc_optional:
1543       case tok::objc_required:
1544         nextToken();
1545         addUnwrappedLine();
1546         return;
1547       case tok::objc_autoreleasepool:
1548         nextToken();
1549         if (FormatTok->is(tok::l_brace)) {
1550           if (Style.BraceWrapping.AfterControlStatement ==
1551               FormatStyle::BWACS_Always)
1552             addUnwrappedLine();
1553           parseBlock();
1554         }
1555         addUnwrappedLine();
1556         return;
1557       case tok::objc_synchronized:
1558         nextToken();
1559         if (FormatTok->is(tok::l_paren))
1560           // Skip synchronization object
1561           parseParens();
1562         if (FormatTok->is(tok::l_brace)) {
1563           if (Style.BraceWrapping.AfterControlStatement ==
1564               FormatStyle::BWACS_Always)
1565             addUnwrappedLine();
1566           parseBlock();
1567         }
1568         addUnwrappedLine();
1569         return;
1570       case tok::objc_try:
1571         // This branch isn't strictly necessary (the kw_try case below would
1572         // do this too after the tok::at is parsed above).  But be explicit.
1573         parseTryCatch();
1574         return;
1575       default:
1576         break;
1577       }
1578       break;
1579     case tok::kw_concept:
1580       parseConcept();
1581       return;
1582     case tok::kw_requires: {
1583       if (Style.isCpp()) {
1584         bool ParsedClause = parseRequires();
1585         if (ParsedClause)
1586           return;
1587       } else {
1588         nextToken();
1589       }
1590       break;
1591     }
1592     case tok::kw_enum:
1593       // Ignore if this is part of "template <enum ...".
1594       if (Previous && Previous->is(tok::less)) {
1595         nextToken();
1596         break;
1597       }
1598 
1599       // parseEnum falls through and does not yet add an unwrapped line as an
1600       // enum definition can start a structural element.
1601       if (!parseEnum())
1602         break;
1603       // This only applies for C++.
1604       if (!Style.isCpp()) {
1605         addUnwrappedLine();
1606         return;
1607       }
1608       break;
1609     case tok::kw_typedef:
1610       nextToken();
1611       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1612                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1613                              Keywords.kw_CF_CLOSED_ENUM,
1614                              Keywords.kw_NS_CLOSED_ENUM))
1615         parseEnum();
1616       break;
1617     case tok::kw_struct:
1618     case tok::kw_union:
1619     case tok::kw_class:
1620       if (parseStructLike())
1621         return;
1622       break;
1623     case tok::period:
1624       nextToken();
1625       // In Java, classes have an implicit static member "class".
1626       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1627           FormatTok->is(tok::kw_class))
1628         nextToken();
1629       if (Style.isJavaScript() && FormatTok &&
1630           FormatTok->Tok.getIdentifierInfo())
1631         // JavaScript only has pseudo keywords, all keywords are allowed to
1632         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1633         nextToken();
1634       break;
1635     case tok::semi:
1636       nextToken();
1637       addUnwrappedLine();
1638       return;
1639     case tok::r_brace:
1640       addUnwrappedLine();
1641       return;
1642     case tok::l_paren: {
1643       parseParens();
1644       // Break the unwrapped line if a K&R C function definition has a parameter
1645       // declaration.
1646       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1647         break;
1648       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1649         addUnwrappedLine();
1650         return;
1651       }
1652       break;
1653     }
1654     case tok::kw_operator:
1655       nextToken();
1656       if (FormatTok->isBinaryOperator())
1657         nextToken();
1658       break;
1659     case tok::caret:
1660       nextToken();
1661       if (FormatTok->Tok.isAnyIdentifier() ||
1662           FormatTok->isSimpleTypeSpecifier())
1663         nextToken();
1664       if (FormatTok->is(tok::l_paren))
1665         parseParens();
1666       if (FormatTok->is(tok::l_brace))
1667         parseChildBlock();
1668       break;
1669     case tok::l_brace:
1670       if (NextLBracesType != TT_Unknown)
1671         FormatTok->setFinalizedType(NextLBracesType);
1672       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1673         // A block outside of parentheses must be the last part of a
1674         // structural element.
1675         // FIXME: Figure out cases where this is not true, and add projections
1676         // for them (the one we know is missing are lambdas).
1677         if (Style.Language == FormatStyle::LK_Java &&
1678             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1679           // If necessary, we could set the type to something different than
1680           // TT_FunctionLBrace.
1681           if (Style.BraceWrapping.AfterControlStatement ==
1682               FormatStyle::BWACS_Always)
1683             addUnwrappedLine();
1684         } else if (Style.BraceWrapping.AfterFunction) {
1685           addUnwrappedLine();
1686         }
1687         if (!Line->InPPDirective)
1688           FormatTok->setFinalizedType(TT_FunctionLBrace);
1689         parseBlock();
1690         addUnwrappedLine();
1691         return;
1692       }
1693       // Otherwise this was a braced init list, and the structural
1694       // element continues.
1695       break;
1696     case tok::kw_try:
1697       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1698         // field/method declaration.
1699         nextToken();
1700         break;
1701       }
1702       // We arrive here when parsing function-try blocks.
1703       if (Style.BraceWrapping.AfterFunction)
1704         addUnwrappedLine();
1705       parseTryCatch();
1706       return;
1707     case tok::identifier: {
1708       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1709           Line->MustBeDeclaration) {
1710         addUnwrappedLine();
1711         parseCSharpGenericTypeConstraint();
1712         break;
1713       }
1714       if (FormatTok->is(TT_MacroBlockEnd)) {
1715         addUnwrappedLine();
1716         return;
1717       }
1718 
1719       // Function declarations (as opposed to function expressions) are parsed
1720       // on their own unwrapped line by continuing this loop. Function
1721       // expressions (functions that are not on their own line) must not create
1722       // a new unwrapped line, so they are special cased below.
1723       size_t TokenCount = Line->Tokens.size();
1724       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1725           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1726                                                      Keywords.kw_async)))) {
1727         tryToParseJSFunction();
1728         break;
1729       }
1730       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1731           FormatTok->is(Keywords.kw_interface)) {
1732         if (Style.isJavaScript()) {
1733           // In JavaScript/TypeScript, "interface" can be used as a standalone
1734           // identifier, e.g. in `var interface = 1;`. If "interface" is
1735           // followed by another identifier, it is very like to be an actual
1736           // interface declaration.
1737           unsigned StoredPosition = Tokens->getPosition();
1738           FormatToken *Next = Tokens->getNextToken();
1739           FormatTok = Tokens->setPosition(StoredPosition);
1740           if (!mustBeJSIdent(Keywords, Next)) {
1741             nextToken();
1742             break;
1743           }
1744         }
1745         parseRecord();
1746         addUnwrappedLine();
1747         return;
1748       }
1749 
1750       if (FormatTok->is(Keywords.kw_interface)) {
1751         if (parseStructLike())
1752           return;
1753         break;
1754       }
1755 
1756       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1757         parseStatementMacro();
1758         return;
1759       }
1760 
1761       // See if the following token should start a new unwrapped line.
1762       StringRef Text = FormatTok->TokenText;
1763 
1764       FormatToken *PreviousToken = FormatTok;
1765       nextToken();
1766 
1767       // JS doesn't have macros, and within classes colons indicate fields, not
1768       // labels.
1769       if (Style.isJavaScript())
1770         break;
1771 
1772       TokenCount = Line->Tokens.size();
1773       if (TokenCount == 1 ||
1774           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1775         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1776           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1777           parseLabel(!Style.IndentGotoLabels);
1778           if (HasLabel)
1779             *HasLabel = true;
1780           return;
1781         }
1782         // Recognize function-like macro usages without trailing semicolon as
1783         // well as free-standing macros like Q_OBJECT.
1784         bool FunctionLike = FormatTok->is(tok::l_paren);
1785         if (FunctionLike)
1786           parseParens();
1787 
1788         bool FollowedByNewline =
1789             CommentsBeforeNextToken.empty()
1790                 ? FormatTok->NewlinesBefore > 0
1791                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1792 
1793         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1794             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1795           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1796           addUnwrappedLine();
1797           return;
1798         }
1799       }
1800       break;
1801     }
1802     case tok::equal:
1803       if ((Style.isJavaScript() || Style.isCSharp()) &&
1804           FormatTok->is(TT_FatArrow)) {
1805         tryToParseChildBlock();
1806         break;
1807       }
1808 
1809       nextToken();
1810       if (FormatTok->is(tok::l_brace)) {
1811         // Block kind should probably be set to BK_BracedInit for any language.
1812         // C# needs this change to ensure that array initialisers and object
1813         // initialisers are indented the same way.
1814         if (Style.isCSharp())
1815           FormatTok->setBlockKind(BK_BracedInit);
1816         nextToken();
1817         parseBracedList();
1818       } else if (Style.Language == FormatStyle::LK_Proto &&
1819                  FormatTok->is(tok::less)) {
1820         nextToken();
1821         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1822                         /*ClosingBraceKind=*/tok::greater);
1823       }
1824       break;
1825     case tok::l_square:
1826       parseSquare();
1827       break;
1828     case tok::kw_new:
1829       parseNew();
1830       break;
1831     case tok::kw_case:
1832       if (Style.isJavaScript() && Line->MustBeDeclaration)
1833         // 'case: string' field declaration.
1834         break;
1835       parseCaseLabel();
1836       break;
1837     default:
1838       nextToken();
1839       break;
1840     }
1841   } while (!eof());
1842 }
1843 
1844 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1845   assert(FormatTok->is(tok::l_brace));
1846   if (!Style.isCSharp())
1847     return false;
1848   // See if it's a property accessor.
1849   if (FormatTok->Previous->isNot(tok::identifier))
1850     return false;
1851 
1852   // See if we are inside a property accessor.
1853   //
1854   // Record the current tokenPosition so that we can advance and
1855   // reset the current token. `Next` is not set yet so we need
1856   // another way to advance along the token stream.
1857   unsigned int StoredPosition = Tokens->getPosition();
1858   FormatToken *Tok = Tokens->getNextToken();
1859 
1860   // A trivial property accessor is of the form:
1861   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1862   // Track these as they do not require line breaks to be introduced.
1863   bool HasSpecialAccessor = false;
1864   bool IsTrivialPropertyAccessor = true;
1865   while (!eof()) {
1866     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1867                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1868                      Keywords.kw_init, Keywords.kw_set)) {
1869       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1870         HasSpecialAccessor = true;
1871       Tok = Tokens->getNextToken();
1872       continue;
1873     }
1874     if (Tok->isNot(tok::r_brace))
1875       IsTrivialPropertyAccessor = false;
1876     break;
1877   }
1878 
1879   if (!HasSpecialAccessor) {
1880     Tokens->setPosition(StoredPosition);
1881     return false;
1882   }
1883 
1884   // Try to parse the property accessor:
1885   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1886   Tokens->setPosition(StoredPosition);
1887   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1888     addUnwrappedLine();
1889   nextToken();
1890   do {
1891     switch (FormatTok->Tok.getKind()) {
1892     case tok::r_brace:
1893       nextToken();
1894       if (FormatTok->is(tok::equal)) {
1895         while (!eof() && FormatTok->isNot(tok::semi))
1896           nextToken();
1897         nextToken();
1898       }
1899       addUnwrappedLine();
1900       return true;
1901     case tok::l_brace:
1902       ++Line->Level;
1903       parseBlock(/*MustBeDeclaration=*/true);
1904       addUnwrappedLine();
1905       --Line->Level;
1906       break;
1907     case tok::equal:
1908       if (FormatTok->is(TT_FatArrow)) {
1909         ++Line->Level;
1910         do {
1911           nextToken();
1912         } while (!eof() && FormatTok->isNot(tok::semi));
1913         nextToken();
1914         addUnwrappedLine();
1915         --Line->Level;
1916         break;
1917       }
1918       nextToken();
1919       break;
1920     default:
1921       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
1922                              Keywords.kw_set) &&
1923           !IsTrivialPropertyAccessor) {
1924         // Non-trivial get/set needs to be on its own line.
1925         addUnwrappedLine();
1926       }
1927       nextToken();
1928     }
1929   } while (!eof());
1930 
1931   // Unreachable for well-formed code (paired '{' and '}').
1932   return true;
1933 }
1934 
1935 bool UnwrappedLineParser::tryToParseLambda() {
1936   if (!Style.isCpp()) {
1937     nextToken();
1938     return false;
1939   }
1940   assert(FormatTok->is(tok::l_square));
1941   FormatToken &LSquare = *FormatTok;
1942   if (!tryToParseLambdaIntroducer())
1943     return false;
1944 
1945   // `[something] >` is not a lambda, but an array type in a template parameter
1946   // list.
1947   if (FormatTok->is(tok::greater))
1948     return false;
1949 
1950   bool SeenArrow = false;
1951   bool InTemplateParameterList = false;
1952 
1953   while (FormatTok->isNot(tok::l_brace)) {
1954     if (FormatTok->isSimpleTypeSpecifier()) {
1955       nextToken();
1956       continue;
1957     }
1958     switch (FormatTok->Tok.getKind()) {
1959     case tok::l_brace:
1960       break;
1961     case tok::l_paren:
1962       parseParens();
1963       break;
1964     case tok::l_square:
1965       parseSquare();
1966       break;
1967     case tok::kw_class:
1968     case tok::kw_template:
1969     case tok::kw_typename:
1970       assert(FormatTok->Previous);
1971       if (FormatTok->Previous->is(tok::less))
1972         InTemplateParameterList = true;
1973       nextToken();
1974       break;
1975     case tok::amp:
1976     case tok::star:
1977     case tok::kw_const:
1978     case tok::comma:
1979     case tok::less:
1980     case tok::greater:
1981     case tok::identifier:
1982     case tok::numeric_constant:
1983     case tok::coloncolon:
1984     case tok::kw_mutable:
1985     case tok::kw_noexcept:
1986       nextToken();
1987       break;
1988     // Specialization of a template with an integer parameter can contain
1989     // arithmetic, logical, comparison and ternary operators.
1990     //
1991     // FIXME: This also accepts sequences of operators that are not in the scope
1992     // of a template argument list.
1993     //
1994     // In a C++ lambda a template type can only occur after an arrow. We use
1995     // this as an heuristic to distinguish between Objective-C expressions
1996     // followed by an `a->b` expression, such as:
1997     // ([obj func:arg] + a->b)
1998     // Otherwise the code below would parse as a lambda.
1999     //
2000     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2001     // explicit template lists: []<bool b = true && false>(U &&u){}
2002     case tok::plus:
2003     case tok::minus:
2004     case tok::exclaim:
2005     case tok::tilde:
2006     case tok::slash:
2007     case tok::percent:
2008     case tok::lessless:
2009     case tok::pipe:
2010     case tok::pipepipe:
2011     case tok::ampamp:
2012     case tok::caret:
2013     case tok::equalequal:
2014     case tok::exclaimequal:
2015     case tok::greaterequal:
2016     case tok::lessequal:
2017     case tok::question:
2018     case tok::colon:
2019     case tok::ellipsis:
2020     case tok::kw_true:
2021     case tok::kw_false:
2022       if (SeenArrow || InTemplateParameterList) {
2023         nextToken();
2024         break;
2025       }
2026       return true;
2027     case tok::arrow:
2028       // This might or might not actually be a lambda arrow (this could be an
2029       // ObjC method invocation followed by a dereferencing arrow). We might
2030       // reset this back to TT_Unknown in TokenAnnotator.
2031       FormatTok->setFinalizedType(TT_LambdaArrow);
2032       SeenArrow = true;
2033       nextToken();
2034       break;
2035     default:
2036       return true;
2037     }
2038   }
2039   FormatTok->setFinalizedType(TT_LambdaLBrace);
2040   LSquare.setFinalizedType(TT_LambdaLSquare);
2041   parseChildBlock();
2042   return true;
2043 }
2044 
2045 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2046   const FormatToken *Previous = FormatTok->Previous;
2047   if (Previous &&
2048       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2049                          tok::kw_delete, tok::l_square) ||
2050        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
2051        Previous->isSimpleTypeSpecifier())) {
2052     nextToken();
2053     return false;
2054   }
2055   nextToken();
2056   if (FormatTok->is(tok::l_square))
2057     return false;
2058   parseSquare(/*LambdaIntroducer=*/true);
2059   return true;
2060 }
2061 
2062 void UnwrappedLineParser::tryToParseJSFunction() {
2063   assert(FormatTok->is(Keywords.kw_function) ||
2064          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2065   if (FormatTok->is(Keywords.kw_async))
2066     nextToken();
2067   // Consume "function".
2068   nextToken();
2069 
2070   // Consume * (generator function). Treat it like C++'s overloaded operators.
2071   if (FormatTok->is(tok::star)) {
2072     FormatTok->setFinalizedType(TT_OverloadedOperator);
2073     nextToken();
2074   }
2075 
2076   // Consume function name.
2077   if (FormatTok->is(tok::identifier))
2078     nextToken();
2079 
2080   if (FormatTok->isNot(tok::l_paren))
2081     return;
2082 
2083   // Parse formal parameter list.
2084   parseParens();
2085 
2086   if (FormatTok->is(tok::colon)) {
2087     // Parse a type definition.
2088     nextToken();
2089 
2090     // Eat the type declaration. For braced inline object types, balance braces,
2091     // otherwise just parse until finding an l_brace for the function body.
2092     if (FormatTok->is(tok::l_brace))
2093       tryToParseBracedList();
2094     else
2095       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2096         nextToken();
2097   }
2098 
2099   if (FormatTok->is(tok::semi))
2100     return;
2101 
2102   parseChildBlock();
2103 }
2104 
2105 bool UnwrappedLineParser::tryToParseBracedList() {
2106   if (FormatTok->is(BK_Unknown))
2107     calculateBraceTypes();
2108   assert(FormatTok->isNot(BK_Unknown));
2109   if (FormatTok->is(BK_Block))
2110     return false;
2111   nextToken();
2112   parseBracedList();
2113   return true;
2114 }
2115 
2116 bool UnwrappedLineParser::tryToParseChildBlock() {
2117   assert(Style.isJavaScript() || Style.isCSharp());
2118   assert(FormatTok->is(TT_FatArrow));
2119   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2120   // They always start an expression or a child block if followed by a curly
2121   // brace.
2122   nextToken();
2123   if (FormatTok->isNot(tok::l_brace))
2124     return false;
2125   parseChildBlock();
2126   return true;
2127 }
2128 
2129 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2130                                           bool IsEnum,
2131                                           tok::TokenKind ClosingBraceKind) {
2132   bool HasError = false;
2133 
2134   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2135   // replace this by using parseAssignmentExpression() inside.
2136   do {
2137     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2138         tryToParseChildBlock())
2139       continue;
2140     if (Style.isJavaScript()) {
2141       if (FormatTok->is(Keywords.kw_function) ||
2142           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2143         tryToParseJSFunction();
2144         continue;
2145       }
2146       if (FormatTok->is(tok::l_brace)) {
2147         // Could be a method inside of a braced list `{a() { return 1; }}`.
2148         if (tryToParseBracedList())
2149           continue;
2150         parseChildBlock();
2151       }
2152     }
2153     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2154       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2155         addUnwrappedLine();
2156       nextToken();
2157       return !HasError;
2158     }
2159     switch (FormatTok->Tok.getKind()) {
2160     case tok::l_square:
2161       if (Style.isCSharp())
2162         parseSquare();
2163       else
2164         tryToParseLambda();
2165       break;
2166     case tok::l_paren:
2167       parseParens();
2168       // JavaScript can just have free standing methods and getters/setters in
2169       // object literals. Detect them by a "{" following ")".
2170       if (Style.isJavaScript()) {
2171         if (FormatTok->is(tok::l_brace))
2172           parseChildBlock();
2173         break;
2174       }
2175       break;
2176     case tok::l_brace:
2177       // Assume there are no blocks inside a braced init list apart
2178       // from the ones we explicitly parse out (like lambdas).
2179       FormatTok->setBlockKind(BK_BracedInit);
2180       nextToken();
2181       parseBracedList();
2182       break;
2183     case tok::less:
2184       if (Style.Language == FormatStyle::LK_Proto) {
2185         nextToken();
2186         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2187                         /*ClosingBraceKind=*/tok::greater);
2188       } else {
2189         nextToken();
2190       }
2191       break;
2192     case tok::semi:
2193       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2194       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2195       // used for error recovery if we have otherwise determined that this is
2196       // a braced list.
2197       if (Style.isJavaScript()) {
2198         nextToken();
2199         break;
2200       }
2201       HasError = true;
2202       if (!ContinueOnSemicolons)
2203         return !HasError;
2204       nextToken();
2205       break;
2206     case tok::comma:
2207       nextToken();
2208       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2209         addUnwrappedLine();
2210       break;
2211     default:
2212       nextToken();
2213       break;
2214     }
2215   } while (!eof());
2216   return false;
2217 }
2218 
2219 /// \brief Parses a pair of parentheses (and everything between them).
2220 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2221 /// double ampersands. This only counts for the current parens scope.
2222 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2223   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2224   nextToken();
2225   do {
2226     switch (FormatTok->Tok.getKind()) {
2227     case tok::l_paren:
2228       parseParens();
2229       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2230         parseChildBlock();
2231       break;
2232     case tok::r_paren:
2233       nextToken();
2234       return;
2235     case tok::r_brace:
2236       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2237       return;
2238     case tok::l_square:
2239       tryToParseLambda();
2240       break;
2241     case tok::l_brace:
2242       if (!tryToParseBracedList())
2243         parseChildBlock();
2244       break;
2245     case tok::at:
2246       nextToken();
2247       if (FormatTok->is(tok::l_brace)) {
2248         nextToken();
2249         parseBracedList();
2250       }
2251       break;
2252     case tok::equal:
2253       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2254         tryToParseChildBlock();
2255       else
2256         nextToken();
2257       break;
2258     case tok::kw_class:
2259       if (Style.isJavaScript())
2260         parseRecord(/*ParseAsExpr=*/true);
2261       else
2262         nextToken();
2263       break;
2264     case tok::identifier:
2265       if (Style.isJavaScript() &&
2266           (FormatTok->is(Keywords.kw_function) ||
2267            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2268         tryToParseJSFunction();
2269       else
2270         nextToken();
2271       break;
2272     case tok::kw_requires: {
2273       auto RequiresToken = FormatTok;
2274       nextToken();
2275       parseRequiresExpression(RequiresToken);
2276       break;
2277     }
2278     case tok::ampamp:
2279       if (AmpAmpTokenType != TT_Unknown)
2280         FormatTok->setFinalizedType(AmpAmpTokenType);
2281       LLVM_FALLTHROUGH;
2282     default:
2283       nextToken();
2284       break;
2285     }
2286   } while (!eof());
2287 }
2288 
2289 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2290   if (!LambdaIntroducer) {
2291     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2292     if (tryToParseLambda())
2293       return;
2294   }
2295   do {
2296     switch (FormatTok->Tok.getKind()) {
2297     case tok::l_paren:
2298       parseParens();
2299       break;
2300     case tok::r_square:
2301       nextToken();
2302       return;
2303     case tok::r_brace:
2304       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2305       return;
2306     case tok::l_square:
2307       parseSquare();
2308       break;
2309     case tok::l_brace: {
2310       if (!tryToParseBracedList())
2311         parseChildBlock();
2312       break;
2313     }
2314     case tok::at:
2315       nextToken();
2316       if (FormatTok->is(tok::l_brace)) {
2317         nextToken();
2318         parseBracedList();
2319       }
2320       break;
2321     default:
2322       nextToken();
2323       break;
2324     }
2325   } while (!eof());
2326 }
2327 
2328 void UnwrappedLineParser::keepAncestorBraces() {
2329   if (!Style.RemoveBracesLLVM)
2330     return;
2331 
2332   const int MaxNestingLevels = 2;
2333   const int Size = NestedTooDeep.size();
2334   if (Size >= MaxNestingLevels)
2335     NestedTooDeep[Size - MaxNestingLevels] = true;
2336   NestedTooDeep.push_back(false);
2337 }
2338 
2339 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2340   for (const auto &Token : llvm::reverse(Line.Tokens))
2341     if (Token.Tok->isNot(tok::comment))
2342       return Token.Tok;
2343 
2344   return nullptr;
2345 }
2346 
2347 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2348   FormatToken *Tok = nullptr;
2349 
2350   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2351       PreprocessorDirectives.empty()) {
2352     Tok = getLastNonComment(*Line);
2353     assert(Tok);
2354     if (Tok->BraceCount < 0) {
2355       assert(Tok->BraceCount == -1);
2356       Tok = nullptr;
2357     } else {
2358       Tok->BraceCount = -1;
2359     }
2360   }
2361 
2362   addUnwrappedLine();
2363   ++Line->Level;
2364   parseStructuralElement();
2365 
2366   if (Tok) {
2367     assert(!Line->InPPDirective);
2368     Tok = nullptr;
2369     for (const auto &L : llvm::reverse(*CurrentLines)) {
2370       if (!L.InPPDirective && getLastNonComment(L)) {
2371         Tok = L.Tokens.back().Tok;
2372         break;
2373       }
2374     }
2375     assert(Tok);
2376     ++Tok->BraceCount;
2377   }
2378 
2379   if (CheckEOF && FormatTok->is(tok::eof))
2380     addUnwrappedLine();
2381 
2382   --Line->Level;
2383 }
2384 
2385 static void markOptionalBraces(FormatToken *LeftBrace) {
2386   if (!LeftBrace)
2387     return;
2388 
2389   assert(LeftBrace->is(tok::l_brace));
2390 
2391   FormatToken *RightBrace = LeftBrace->MatchingParen;
2392   if (!RightBrace) {
2393     assert(!LeftBrace->Optional);
2394     return;
2395   }
2396 
2397   assert(RightBrace->is(tok::r_brace));
2398   assert(RightBrace->MatchingParen == LeftBrace);
2399   assert(LeftBrace->Optional == RightBrace->Optional);
2400 
2401   LeftBrace->Optional = true;
2402   RightBrace->Optional = true;
2403 }
2404 
2405 void UnwrappedLineParser::handleAttributes() {
2406   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2407   if (FormatTok->is(TT_AttributeMacro))
2408     nextToken();
2409   handleCppAttributes();
2410 }
2411 
2412 bool UnwrappedLineParser::handleCppAttributes() {
2413   // Handle [[likely]] / [[unlikely]] attributes.
2414   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2415     parseSquare();
2416     return true;
2417   }
2418   return false;
2419 }
2420 
2421 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2422                                                   bool KeepBraces) {
2423   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2424   nextToken();
2425   if (FormatTok->is(tok::exclaim))
2426     nextToken();
2427   if (FormatTok->is(tok::kw_consteval)) {
2428     nextToken();
2429   } else {
2430     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2431       nextToken();
2432     if (FormatTok->is(tok::l_paren))
2433       parseParens();
2434   }
2435   handleAttributes();
2436 
2437   bool NeedsUnwrappedLine = false;
2438   keepAncestorBraces();
2439 
2440   FormatToken *IfLeftBrace = nullptr;
2441   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2442 
2443   if (FormatTok->is(tok::l_brace)) {
2444     IfLeftBrace = FormatTok;
2445     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2446     IfBlockKind = parseBlock();
2447     if (Style.BraceWrapping.BeforeElse)
2448       addUnwrappedLine();
2449     else
2450       NeedsUnwrappedLine = true;
2451   } else {
2452     parseUnbracedBody();
2453   }
2454 
2455   bool KeepIfBraces = false;
2456   if (Style.RemoveBracesLLVM) {
2457     assert(!NestedTooDeep.empty());
2458     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2459                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2460                    IfBlockKind == IfStmtKind::IfElseIf;
2461   }
2462 
2463   FormatToken *ElseLeftBrace = nullptr;
2464   IfStmtKind Kind = IfStmtKind::IfOnly;
2465 
2466   if (FormatTok->is(tok::kw_else)) {
2467     if (Style.RemoveBracesLLVM) {
2468       NestedTooDeep.back() = false;
2469       Kind = IfStmtKind::IfElse;
2470     }
2471     nextToken();
2472     handleAttributes();
2473     if (FormatTok->is(tok::l_brace)) {
2474       ElseLeftBrace = FormatTok;
2475       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2476       if (parseBlock() == IfStmtKind::IfOnly)
2477         Kind = IfStmtKind::IfElseIf;
2478       addUnwrappedLine();
2479     } else if (FormatTok->is(tok::kw_if)) {
2480       FormatToken *Previous = Tokens->getPreviousToken();
2481       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2482       if (IsPrecededByComment) {
2483         addUnwrappedLine();
2484         ++Line->Level;
2485       }
2486       bool TooDeep = true;
2487       if (Style.RemoveBracesLLVM) {
2488         Kind = IfStmtKind::IfElseIf;
2489         TooDeep = NestedTooDeep.pop_back_val();
2490       }
2491       ElseLeftBrace =
2492           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2493       if (Style.RemoveBracesLLVM)
2494         NestedTooDeep.push_back(TooDeep);
2495       if (IsPrecededByComment)
2496         --Line->Level;
2497     } else {
2498       parseUnbracedBody(/*CheckEOF=*/true);
2499     }
2500   } else {
2501     if (Style.RemoveBracesLLVM)
2502       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2503     if (NeedsUnwrappedLine)
2504       addUnwrappedLine();
2505   }
2506 
2507   if (!Style.RemoveBracesLLVM)
2508     return nullptr;
2509 
2510   assert(!NestedTooDeep.empty());
2511   const bool KeepElseBraces =
2512       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2513 
2514   NestedTooDeep.pop_back();
2515 
2516   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2517     markOptionalBraces(IfLeftBrace);
2518     markOptionalBraces(ElseLeftBrace);
2519   } else if (IfLeftBrace) {
2520     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2521     if (IfRightBrace) {
2522       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2523       assert(!IfLeftBrace->Optional);
2524       assert(!IfRightBrace->Optional);
2525       IfLeftBrace->MatchingParen = nullptr;
2526       IfRightBrace->MatchingParen = nullptr;
2527     }
2528   }
2529 
2530   if (IfKind)
2531     *IfKind = Kind;
2532 
2533   return IfLeftBrace;
2534 }
2535 
2536 void UnwrappedLineParser::parseTryCatch() {
2537   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2538   nextToken();
2539   bool NeedsUnwrappedLine = false;
2540   if (FormatTok->is(tok::colon)) {
2541     // We are in a function try block, what comes is an initializer list.
2542     nextToken();
2543 
2544     // In case identifiers were removed by clang-tidy, what might follow is
2545     // multiple commas in sequence - before the first identifier.
2546     while (FormatTok->is(tok::comma))
2547       nextToken();
2548 
2549     while (FormatTok->is(tok::identifier)) {
2550       nextToken();
2551       if (FormatTok->is(tok::l_paren))
2552         parseParens();
2553       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2554           FormatTok->is(tok::l_brace)) {
2555         do {
2556           nextToken();
2557         } while (!FormatTok->is(tok::r_brace));
2558         nextToken();
2559       }
2560 
2561       // In case identifiers were removed by clang-tidy, what might follow is
2562       // multiple commas in sequence - after the first identifier.
2563       while (FormatTok->is(tok::comma))
2564         nextToken();
2565     }
2566   }
2567   // Parse try with resource.
2568   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2569     parseParens();
2570 
2571   keepAncestorBraces();
2572 
2573   if (FormatTok->is(tok::l_brace)) {
2574     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2575     parseBlock();
2576     if (Style.BraceWrapping.BeforeCatch)
2577       addUnwrappedLine();
2578     else
2579       NeedsUnwrappedLine = true;
2580   } else if (!FormatTok->is(tok::kw_catch)) {
2581     // The C++ standard requires a compound-statement after a try.
2582     // If there's none, we try to assume there's a structuralElement
2583     // and try to continue.
2584     addUnwrappedLine();
2585     ++Line->Level;
2586     parseStructuralElement();
2587     --Line->Level;
2588   }
2589   while (true) {
2590     if (FormatTok->is(tok::at))
2591       nextToken();
2592     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2593                              tok::kw___finally) ||
2594           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2595            FormatTok->is(Keywords.kw_finally)) ||
2596           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2597            FormatTok->isObjCAtKeyword(tok::objc_finally))))
2598       break;
2599     nextToken();
2600     while (FormatTok->isNot(tok::l_brace)) {
2601       if (FormatTok->is(tok::l_paren)) {
2602         parseParens();
2603         continue;
2604       }
2605       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2606         if (Style.RemoveBracesLLVM)
2607           NestedTooDeep.pop_back();
2608         return;
2609       }
2610       nextToken();
2611     }
2612     NeedsUnwrappedLine = false;
2613     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2614     parseBlock();
2615     if (Style.BraceWrapping.BeforeCatch)
2616       addUnwrappedLine();
2617     else
2618       NeedsUnwrappedLine = true;
2619   }
2620 
2621   if (Style.RemoveBracesLLVM)
2622     NestedTooDeep.pop_back();
2623 
2624   if (NeedsUnwrappedLine)
2625     addUnwrappedLine();
2626 }
2627 
2628 void UnwrappedLineParser::parseNamespace() {
2629   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2630          "'namespace' expected");
2631 
2632   const FormatToken &InitialToken = *FormatTok;
2633   nextToken();
2634   if (InitialToken.is(TT_NamespaceMacro)) {
2635     parseParens();
2636   } else {
2637     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2638                               tok::l_square, tok::period, tok::l_paren) ||
2639            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2640       if (FormatTok->is(tok::l_square))
2641         parseSquare();
2642       else if (FormatTok->is(tok::l_paren))
2643         parseParens();
2644       else
2645         nextToken();
2646   }
2647   if (FormatTok->is(tok::l_brace)) {
2648     if (ShouldBreakBeforeBrace(Style, InitialToken))
2649       addUnwrappedLine();
2650 
2651     unsigned AddLevels =
2652         Style.NamespaceIndentation == FormatStyle::NI_All ||
2653                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2654                  DeclarationScopeStack.size() > 1)
2655             ? 1u
2656             : 0u;
2657     bool ManageWhitesmithsBraces =
2658         AddLevels == 0u &&
2659         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2660 
2661     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2662     // the whole block.
2663     if (ManageWhitesmithsBraces)
2664       ++Line->Level;
2665 
2666     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2667                /*MunchSemi=*/true,
2668                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2669 
2670     // Munch the semicolon after a namespace. This is more common than one would
2671     // think. Putting the semicolon into its own line is very ugly.
2672     if (FormatTok->is(tok::semi))
2673       nextToken();
2674 
2675     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2676 
2677     if (ManageWhitesmithsBraces)
2678       --Line->Level;
2679   }
2680   // FIXME: Add error handling.
2681 }
2682 
2683 void UnwrappedLineParser::parseNew() {
2684   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2685   nextToken();
2686 
2687   if (Style.isCSharp()) {
2688     do {
2689       if (FormatTok->is(tok::l_brace))
2690         parseBracedList();
2691 
2692       if (FormatTok->isOneOf(tok::semi, tok::comma))
2693         return;
2694 
2695       nextToken();
2696     } while (!eof());
2697   }
2698 
2699   if (Style.Language != FormatStyle::LK_Java)
2700     return;
2701 
2702   // In Java, we can parse everything up to the parens, which aren't optional.
2703   do {
2704     // There should not be a ;, { or } before the new's open paren.
2705     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2706       return;
2707 
2708     // Consume the parens.
2709     if (FormatTok->is(tok::l_paren)) {
2710       parseParens();
2711 
2712       // If there is a class body of an anonymous class, consume that as child.
2713       if (FormatTok->is(tok::l_brace))
2714         parseChildBlock();
2715       return;
2716     }
2717     nextToken();
2718   } while (!eof());
2719 }
2720 
2721 void UnwrappedLineParser::parseForOrWhileLoop() {
2722   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2723          "'for', 'while' or foreach macro expected");
2724   nextToken();
2725   // JS' for await ( ...
2726   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2727     nextToken();
2728   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2729     nextToken();
2730   if (FormatTok->is(tok::l_paren))
2731     parseParens();
2732 
2733   keepAncestorBraces();
2734 
2735   if (FormatTok->is(tok::l_brace)) {
2736     FormatToken *LeftBrace = FormatTok;
2737     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2738     parseBlock();
2739     if (Style.RemoveBracesLLVM) {
2740       assert(!NestedTooDeep.empty());
2741       if (!NestedTooDeep.back())
2742         markOptionalBraces(LeftBrace);
2743     }
2744     addUnwrappedLine();
2745   } else {
2746     parseUnbracedBody();
2747   }
2748 
2749   if (Style.RemoveBracesLLVM)
2750     NestedTooDeep.pop_back();
2751 }
2752 
2753 void UnwrappedLineParser::parseDoWhile() {
2754   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2755   nextToken();
2756 
2757   keepAncestorBraces();
2758 
2759   if (FormatTok->is(tok::l_brace)) {
2760     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2761     parseBlock();
2762     if (Style.BraceWrapping.BeforeWhile)
2763       addUnwrappedLine();
2764   } else {
2765     parseUnbracedBody();
2766   }
2767 
2768   if (Style.RemoveBracesLLVM)
2769     NestedTooDeep.pop_back();
2770 
2771   // FIXME: Add error handling.
2772   if (!FormatTok->is(tok::kw_while)) {
2773     addUnwrappedLine();
2774     return;
2775   }
2776 
2777   // If in Whitesmiths mode, the line with the while() needs to be indented
2778   // to the same level as the block.
2779   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2780     ++Line->Level;
2781 
2782   nextToken();
2783   parseStructuralElement();
2784 }
2785 
2786 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2787   nextToken();
2788   unsigned OldLineLevel = Line->Level;
2789   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2790     --Line->Level;
2791   if (LeftAlignLabel)
2792     Line->Level = 0;
2793 
2794   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2795       FormatTok->is(tok::l_brace)) {
2796 
2797     CompoundStatementIndenter Indenter(this, Line->Level,
2798                                        Style.BraceWrapping.AfterCaseLabel,
2799                                        Style.BraceWrapping.IndentBraces);
2800     parseBlock();
2801     if (FormatTok->is(tok::kw_break)) {
2802       if (Style.BraceWrapping.AfterControlStatement ==
2803           FormatStyle::BWACS_Always) {
2804         addUnwrappedLine();
2805         if (!Style.IndentCaseBlocks &&
2806             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2807           ++Line->Level;
2808       }
2809       parseStructuralElement();
2810     }
2811     addUnwrappedLine();
2812   } else {
2813     if (FormatTok->is(tok::semi))
2814       nextToken();
2815     addUnwrappedLine();
2816   }
2817   Line->Level = OldLineLevel;
2818   if (FormatTok->isNot(tok::l_brace)) {
2819     parseStructuralElement();
2820     addUnwrappedLine();
2821   }
2822 }
2823 
2824 void UnwrappedLineParser::parseCaseLabel() {
2825   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2826 
2827   // FIXME: fix handling of complex expressions here.
2828   do {
2829     nextToken();
2830   } while (!eof() && !FormatTok->is(tok::colon));
2831   parseLabel();
2832 }
2833 
2834 void UnwrappedLineParser::parseSwitch() {
2835   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2836   nextToken();
2837   if (FormatTok->is(tok::l_paren))
2838     parseParens();
2839 
2840   keepAncestorBraces();
2841 
2842   if (FormatTok->is(tok::l_brace)) {
2843     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2844     parseBlock();
2845     addUnwrappedLine();
2846   } else {
2847     addUnwrappedLine();
2848     ++Line->Level;
2849     parseStructuralElement();
2850     --Line->Level;
2851   }
2852 
2853   if (Style.RemoveBracesLLVM)
2854     NestedTooDeep.pop_back();
2855 }
2856 
2857 // Operators that can follow a C variable.
2858 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
2859   switch (kind) {
2860   case tok::ampamp:
2861   case tok::ampequal:
2862   case tok::arrow:
2863   case tok::caret:
2864   case tok::caretequal:
2865   case tok::comma:
2866   case tok::ellipsis:
2867   case tok::equal:
2868   case tok::equalequal:
2869   case tok::exclaim:
2870   case tok::exclaimequal:
2871   case tok::greater:
2872   case tok::greaterequal:
2873   case tok::greatergreater:
2874   case tok::greatergreaterequal:
2875   case tok::l_paren:
2876   case tok::l_square:
2877   case tok::less:
2878   case tok::lessequal:
2879   case tok::lessless:
2880   case tok::lesslessequal:
2881   case tok::minus:
2882   case tok::minusequal:
2883   case tok::minusminus:
2884   case tok::percent:
2885   case tok::percentequal:
2886   case tok::period:
2887   case tok::pipe:
2888   case tok::pipeequal:
2889   case tok::pipepipe:
2890   case tok::plus:
2891   case tok::plusequal:
2892   case tok::plusplus:
2893   case tok::question:
2894   case tok::r_brace:
2895   case tok::r_paren:
2896   case tok::r_square:
2897   case tok::semi:
2898   case tok::slash:
2899   case tok::slashequal:
2900   case tok::star:
2901   case tok::starequal:
2902     return true;
2903   default:
2904     return false;
2905   }
2906 }
2907 
2908 void UnwrappedLineParser::parseAccessSpecifier() {
2909   FormatToken *AccessSpecifierCandidate = FormatTok;
2910   nextToken();
2911   // Understand Qt's slots.
2912   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2913     nextToken();
2914   // Otherwise, we don't know what it is, and we'd better keep the next token.
2915   if (FormatTok->is(tok::colon)) {
2916     nextToken();
2917     addUnwrappedLine();
2918   } else if (!FormatTok->is(tok::coloncolon) &&
2919              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
2920     // Not a variable name nor namespace name.
2921     addUnwrappedLine();
2922   } else if (AccessSpecifierCandidate) {
2923     // Consider the access specifier to be a C identifier.
2924     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2925   }
2926 }
2927 
2928 /// \brief Parses a concept definition.
2929 /// \pre The current token has to be the concept keyword.
2930 ///
2931 /// Returns if either the concept has been completely parsed, or if it detects
2932 /// that the concept definition is incorrect.
2933 void UnwrappedLineParser::parseConcept() {
2934   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
2935   nextToken();
2936   if (!FormatTok->is(tok::identifier))
2937     return;
2938   nextToken();
2939   if (!FormatTok->is(tok::equal))
2940     return;
2941   nextToken();
2942   parseConstraintExpression();
2943   if (FormatTok->is(tok::semi))
2944     nextToken();
2945   addUnwrappedLine();
2946 }
2947 
2948 /// \brief Parses a requires, decides if it is a clause or an expression.
2949 /// \pre The current token has to be the requires keyword.
2950 /// \returns true if it parsed a clause.
2951 bool clang::format::UnwrappedLineParser::parseRequires() {
2952   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
2953   auto RequiresToken = FormatTok;
2954 
2955   // We try to guess if it is a requires clause, or a requires expression. For
2956   // that we first consume the keyword and check the next token.
2957   nextToken();
2958 
2959   switch (FormatTok->Tok.getKind()) {
2960   case tok::l_brace:
2961     // This can only be an expression, never a clause.
2962     parseRequiresExpression(RequiresToken);
2963     return false;
2964   case tok::l_paren:
2965     // Clauses and expression can start with a paren, it's unclear what we have.
2966     break;
2967   default:
2968     // All other tokens can only be a clause.
2969     parseRequiresClause(RequiresToken);
2970     return true;
2971   }
2972 
2973   // Looking forward we would have to decide if there are function declaration
2974   // like arguments to the requires expression:
2975   // requires (T t) {
2976   // Or there is a constraint expression for the requires clause:
2977   // requires (C<T> && ...
2978 
2979   // But first let's look behind.
2980   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
2981 
2982   if (!PreviousNonComment ||
2983       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
2984     // If there is no token, or an expression left brace, we are a requires
2985     // clause within a requires expression.
2986     parseRequiresClause(RequiresToken);
2987     return true;
2988   }
2989 
2990   switch (PreviousNonComment->Tok.getKind()) {
2991   case tok::greater:
2992   case tok::r_paren:
2993   case tok::kw_noexcept:
2994   case tok::kw_const:
2995     // This is a requires clause.
2996     parseRequiresClause(RequiresToken);
2997     return true;
2998   case tok::amp:
2999   case tok::ampamp: {
3000     // This can be either:
3001     // if (... && requires (T t) ...)
3002     // Or
3003     // void member(...) && requires (C<T> ...
3004     // We check the one token before that for a const:
3005     // void member(...) const && requires (C<T> ...
3006     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3007     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3008       parseRequiresClause(RequiresToken);
3009       return true;
3010     }
3011     break;
3012   }
3013   default:
3014     // It's an expression.
3015     parseRequiresExpression(RequiresToken);
3016     return false;
3017   }
3018 
3019   // Now we look forward and try to check if the paren content is a parameter
3020   // list. The parameters can be cv-qualified and contain references or
3021   // pointers.
3022   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3023   // of stuff: typename, const, *, &, &&, ::, identifiers.
3024 
3025   int NextTokenOffset = 1;
3026   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3027   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3028     ++NextTokenOffset;
3029     NextToken = Tokens->peekNextToken(NextTokenOffset);
3030   };
3031 
3032   bool FoundType = false;
3033   bool LastWasColonColon = false;
3034   int OpenAngles = 0;
3035 
3036   for (; NextTokenOffset < 50; PeekNext()) {
3037     switch (NextToken->Tok.getKind()) {
3038     case tok::kw_volatile:
3039     case tok::kw_const:
3040     case tok::comma:
3041       parseRequiresExpression(RequiresToken);
3042       return false;
3043     case tok::r_paren:
3044     case tok::pipepipe:
3045       parseRequiresClause(RequiresToken);
3046       return true;
3047     case tok::eof:
3048       // Break out of the loop.
3049       NextTokenOffset = 50;
3050       break;
3051     case tok::coloncolon:
3052       LastWasColonColon = true;
3053       break;
3054     case tok::identifier:
3055       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3056         parseRequiresExpression(RequiresToken);
3057         return false;
3058       }
3059       FoundType = true;
3060       LastWasColonColon = false;
3061       break;
3062     case tok::less:
3063       ++OpenAngles;
3064       break;
3065     case tok::greater:
3066       --OpenAngles;
3067       break;
3068     default:
3069       if (NextToken->isSimpleTypeSpecifier()) {
3070         parseRequiresExpression(RequiresToken);
3071         return false;
3072       }
3073       break;
3074     }
3075   }
3076 
3077   // This seems to be a complicated expression, just assume it's a clause.
3078   parseRequiresClause(RequiresToken);
3079   return true;
3080 }
3081 
3082 /// \brief Parses a requires clause.
3083 /// \param RequiresToken The requires keyword token, which starts this clause.
3084 /// \pre We need to be on the next token after the requires keyword.
3085 /// \sa parseRequiresExpression
3086 ///
3087 /// Returns if it either has finished parsing the clause, or it detects, that
3088 /// the clause is incorrect.
3089 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3090   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3091   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3092 
3093   // If there is no previous token, we are within a requires expression,
3094   // otherwise we will always have the template or function declaration in front
3095   // of it.
3096   bool InRequiresExpression =
3097       !RequiresToken->Previous ||
3098       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3099 
3100   RequiresToken->setFinalizedType(InRequiresExpression
3101                                       ? TT_RequiresClauseInARequiresExpression
3102                                       : TT_RequiresClause);
3103 
3104   parseConstraintExpression();
3105 
3106   if (!InRequiresExpression)
3107     FormatTok->Previous->ClosesRequiresClause = true;
3108 }
3109 
3110 /// \brief Parses a requires expression.
3111 /// \param RequiresToken The requires keyword token, which starts this clause.
3112 /// \pre We need to be on the next token after the requires keyword.
3113 /// \sa parseRequiresClause
3114 ///
3115 /// Returns if it either has finished parsing the expression, or it detects,
3116 /// that the expression is incorrect.
3117 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3118   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3119   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3120 
3121   RequiresToken->setFinalizedType(TT_RequiresExpression);
3122 
3123   if (FormatTok->is(tok::l_paren)) {
3124     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3125     parseParens();
3126   }
3127 
3128   if (FormatTok->is(tok::l_brace)) {
3129     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3130     parseChildBlock(/*CanContainBracedList=*/false,
3131                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3132   }
3133 }
3134 
3135 /// \brief Parses a constraint expression.
3136 ///
3137 /// This is either the definition of a concept, or the body of a requires
3138 /// clause. It returns, when the parsing is complete, or the expression is
3139 /// incorrect.
3140 void UnwrappedLineParser::parseConstraintExpression() {
3141   // The special handling for lambdas is needed since tryToParseLambda() eats a
3142   // token and if a requires expression is the last part of a requires clause
3143   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3144   // not set on the correct token. Thus we need to be aware if we even expect a
3145   // lambda to be possible.
3146   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3147   bool LambdaNextTimeAllowed = true;
3148   do {
3149     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3150 
3151     switch (FormatTok->Tok.getKind()) {
3152     case tok::kw_requires: {
3153       auto RequiresToken = FormatTok;
3154       nextToken();
3155       parseRequiresExpression(RequiresToken);
3156       break;
3157     }
3158 
3159     case tok::l_paren:
3160       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3161       break;
3162 
3163     case tok::l_square:
3164       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3165         return;
3166       break;
3167 
3168     case tok::kw_const:
3169     case tok::semi:
3170     case tok::kw_class:
3171     case tok::kw_struct:
3172     case tok::kw_union:
3173       return;
3174 
3175     case tok::l_brace:
3176       // Potential function body.
3177       return;
3178 
3179     case tok::ampamp:
3180     case tok::pipepipe:
3181       FormatTok->setFinalizedType(TT_BinaryOperator);
3182       nextToken();
3183       LambdaNextTimeAllowed = true;
3184       break;
3185 
3186     case tok::comma:
3187     case tok::comment:
3188       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3189       nextToken();
3190       break;
3191 
3192     case tok::kw_sizeof:
3193     case tok::greater:
3194     case tok::greaterequal:
3195     case tok::greatergreater:
3196     case tok::less:
3197     case tok::lessequal:
3198     case tok::lessless:
3199     case tok::equalequal:
3200     case tok::exclaim:
3201     case tok::exclaimequal:
3202     case tok::plus:
3203     case tok::minus:
3204     case tok::star:
3205     case tok::slash:
3206     case tok::kw_decltype:
3207       LambdaNextTimeAllowed = true;
3208       // Just eat them.
3209       nextToken();
3210       break;
3211 
3212     case tok::numeric_constant:
3213     case tok::coloncolon:
3214     case tok::kw_true:
3215     case tok::kw_false:
3216       // Just eat them.
3217       nextToken();
3218       break;
3219 
3220     case tok::kw_static_cast:
3221     case tok::kw_const_cast:
3222     case tok::kw_reinterpret_cast:
3223     case tok::kw_dynamic_cast:
3224       nextToken();
3225       if (!FormatTok->is(tok::less))
3226         return;
3227 
3228       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3229                       /*ClosingBraceKind=*/tok::greater);
3230       break;
3231 
3232     case tok::kw_bool:
3233       // bool is only allowed if it is directly followed by a paren for a cast:
3234       // concept C = bool(...);
3235       // and bool is the only type, all other types as cast must be inside a
3236       // cast to bool an thus are handled by the other cases.
3237       nextToken();
3238       if (FormatTok->isNot(tok::l_paren))
3239         return;
3240       parseParens();
3241       break;
3242 
3243     default:
3244       if (!FormatTok->Tok.getIdentifierInfo()) {
3245         // Identifiers are part of the default case, we check for more then
3246         // tok::identifier to handle builtin type traits.
3247         return;
3248       }
3249 
3250       // We need to differentiate identifiers for a template deduction guide,
3251       // variables, or function return types (the constraint expression has
3252       // ended before that), and basically all other cases. But it's easier to
3253       // check the other way around.
3254       assert(FormatTok->Previous);
3255       switch (FormatTok->Previous->Tok.getKind()) {
3256       case tok::coloncolon:  // Nested identifier.
3257       case tok::ampamp:      // Start of a function or variable for the
3258       case tok::pipepipe:    // constraint expression.
3259       case tok::kw_requires: // Initial identifier of a requires clause.
3260       case tok::equal:       // Initial identifier of a concept declaration.
3261         break;
3262       default:
3263         return;
3264       }
3265 
3266       // Read identifier with optional template declaration.
3267       nextToken();
3268       if (FormatTok->is(tok::less))
3269         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3270                         /*ClosingBraceKind=*/tok::greater);
3271       break;
3272     }
3273   } while (!eof());
3274 }
3275 
3276 bool UnwrappedLineParser::parseEnum() {
3277   const FormatToken &InitialToken = *FormatTok;
3278 
3279   // Won't be 'enum' for NS_ENUMs.
3280   if (FormatTok->is(tok::kw_enum))
3281     nextToken();
3282 
3283   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3284   // declarations. An "enum" keyword followed by a colon would be a syntax
3285   // error and thus assume it is just an identifier.
3286   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3287     return false;
3288 
3289   // In protobuf, "enum" can be used as a field name.
3290   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3291     return false;
3292 
3293   // Eat up enum class ...
3294   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3295     nextToken();
3296 
3297   while (FormatTok->Tok.getIdentifierInfo() ||
3298          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3299                             tok::greater, tok::comma, tok::question)) {
3300     nextToken();
3301     // We can have macros or attributes in between 'enum' and the enum name.
3302     if (FormatTok->is(tok::l_paren))
3303       parseParens();
3304     if (FormatTok->is(tok::identifier)) {
3305       nextToken();
3306       // If there are two identifiers in a row, this is likely an elaborate
3307       // return type. In Java, this can be "implements", etc.
3308       if (Style.isCpp() && FormatTok->is(tok::identifier))
3309         return false;
3310     }
3311   }
3312 
3313   // Just a declaration or something is wrong.
3314   if (FormatTok->isNot(tok::l_brace))
3315     return true;
3316   FormatTok->setFinalizedType(TT_EnumLBrace);
3317   FormatTok->setBlockKind(BK_Block);
3318 
3319   if (Style.Language == FormatStyle::LK_Java) {
3320     // Java enums are different.
3321     parseJavaEnumBody();
3322     return true;
3323   }
3324   if (Style.Language == FormatStyle::LK_Proto) {
3325     parseBlock(/*MustBeDeclaration=*/true);
3326     return true;
3327   }
3328 
3329   if (!Style.AllowShortEnumsOnASingleLine &&
3330       ShouldBreakBeforeBrace(Style, InitialToken))
3331     addUnwrappedLine();
3332   // Parse enum body.
3333   nextToken();
3334   if (!Style.AllowShortEnumsOnASingleLine) {
3335     addUnwrappedLine();
3336     Line->Level += 1;
3337   }
3338   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3339                                    /*IsEnum=*/true);
3340   if (!Style.AllowShortEnumsOnASingleLine)
3341     Line->Level -= 1;
3342   if (HasError) {
3343     if (FormatTok->is(tok::semi))
3344       nextToken();
3345     addUnwrappedLine();
3346   }
3347   return true;
3348 
3349   // There is no addUnwrappedLine() here so that we fall through to parsing a
3350   // structural element afterwards. Thus, in "enum A {} n, m;",
3351   // "} n, m;" will end up in one unwrapped line.
3352 }
3353 
3354 bool UnwrappedLineParser::parseStructLike() {
3355   // parseRecord falls through and does not yet add an unwrapped line as a
3356   // record declaration or definition can start a structural element.
3357   parseRecord();
3358   // This does not apply to Java, JavaScript and C#.
3359   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3360       Style.isCSharp()) {
3361     if (FormatTok->is(tok::semi))
3362       nextToken();
3363     addUnwrappedLine();
3364     return true;
3365   }
3366   return false;
3367 }
3368 
3369 namespace {
3370 // A class used to set and restore the Token position when peeking
3371 // ahead in the token source.
3372 class ScopedTokenPosition {
3373   unsigned StoredPosition;
3374   FormatTokenSource *Tokens;
3375 
3376 public:
3377   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3378     assert(Tokens && "Tokens expected to not be null");
3379     StoredPosition = Tokens->getPosition();
3380   }
3381 
3382   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3383 };
3384 } // namespace
3385 
3386 // Look to see if we have [[ by looking ahead, if
3387 // its not then rewind to the original position.
3388 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3389   ScopedTokenPosition AutoPosition(Tokens);
3390   FormatToken *Tok = Tokens->getNextToken();
3391   // We already read the first [ check for the second.
3392   if (!Tok->is(tok::l_square))
3393     return false;
3394   // Double check that the attribute is just something
3395   // fairly simple.
3396   while (Tok->isNot(tok::eof)) {
3397     if (Tok->is(tok::r_square))
3398       break;
3399     Tok = Tokens->getNextToken();
3400   }
3401   if (Tok->is(tok::eof))
3402     return false;
3403   Tok = Tokens->getNextToken();
3404   if (!Tok->is(tok::r_square))
3405     return false;
3406   Tok = Tokens->getNextToken();
3407   if (Tok->is(tok::semi))
3408     return false;
3409   return true;
3410 }
3411 
3412 void UnwrappedLineParser::parseJavaEnumBody() {
3413   // Determine whether the enum is simple, i.e. does not have a semicolon or
3414   // constants with class bodies. Simple enums can be formatted like braced
3415   // lists, contracted to a single line, etc.
3416   unsigned StoredPosition = Tokens->getPosition();
3417   bool IsSimple = true;
3418   FormatToken *Tok = Tokens->getNextToken();
3419   while (!Tok->is(tok::eof)) {
3420     if (Tok->is(tok::r_brace))
3421       break;
3422     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3423       IsSimple = false;
3424       break;
3425     }
3426     // FIXME: This will also mark enums with braces in the arguments to enum
3427     // constants as "not simple". This is probably fine in practice, though.
3428     Tok = Tokens->getNextToken();
3429   }
3430   FormatTok = Tokens->setPosition(StoredPosition);
3431 
3432   if (IsSimple) {
3433     nextToken();
3434     parseBracedList();
3435     addUnwrappedLine();
3436     return;
3437   }
3438 
3439   // Parse the body of a more complex enum.
3440   // First add a line for everything up to the "{".
3441   nextToken();
3442   addUnwrappedLine();
3443   ++Line->Level;
3444 
3445   // Parse the enum constants.
3446   while (FormatTok) {
3447     if (FormatTok->is(tok::l_brace)) {
3448       // Parse the constant's class body.
3449       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3450                  /*MunchSemi=*/false);
3451     } else if (FormatTok->is(tok::l_paren)) {
3452       parseParens();
3453     } else if (FormatTok->is(tok::comma)) {
3454       nextToken();
3455       addUnwrappedLine();
3456     } else if (FormatTok->is(tok::semi)) {
3457       nextToken();
3458       addUnwrappedLine();
3459       break;
3460     } else if (FormatTok->is(tok::r_brace)) {
3461       addUnwrappedLine();
3462       break;
3463     } else {
3464       nextToken();
3465     }
3466   }
3467 
3468   // Parse the class body after the enum's ";" if any.
3469   parseLevel(/*HasOpeningBrace=*/true, /*CanContainBracedList=*/true);
3470   nextToken();
3471   --Line->Level;
3472   addUnwrappedLine();
3473 }
3474 
3475 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3476   const FormatToken &InitialToken = *FormatTok;
3477   nextToken();
3478 
3479   // The actual identifier can be a nested name specifier, and in macros
3480   // it is often token-pasted.
3481   // An [[attribute]] can be before the identifier.
3482   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3483                             tok::kw___attribute, tok::kw___declspec,
3484                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3485          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3486           FormatTok->isOneOf(tok::period, tok::comma))) {
3487     if (Style.isJavaScript() &&
3488         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3489       // JavaScript/TypeScript supports inline object types in
3490       // extends/implements positions:
3491       //     class Foo implements {bar: number} { }
3492       nextToken();
3493       if (FormatTok->is(tok::l_brace)) {
3494         tryToParseBracedList();
3495         continue;
3496       }
3497     }
3498     bool IsNonMacroIdentifier =
3499         FormatTok->is(tok::identifier) &&
3500         FormatTok->TokenText != FormatTok->TokenText.upper();
3501     nextToken();
3502     // We can have macros or attributes in between 'class' and the class name.
3503     if (!IsNonMacroIdentifier) {
3504       if (FormatTok->is(tok::l_paren)) {
3505         parseParens();
3506       } else if (FormatTok->is(TT_AttributeSquare)) {
3507         parseSquare();
3508         // Consume the closing TT_AttributeSquare.
3509         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3510           nextToken();
3511       }
3512     }
3513   }
3514 
3515   // Note that parsing away template declarations here leads to incorrectly
3516   // accepting function declarations as record declarations.
3517   // In general, we cannot solve this problem. Consider:
3518   // class A<int> B() {}
3519   // which can be a function definition or a class definition when B() is a
3520   // macro. If we find enough real-world cases where this is a problem, we
3521   // can parse for the 'template' keyword in the beginning of the statement,
3522   // and thus rule out the record production in case there is no template
3523   // (this would still leave us with an ambiguity between template function
3524   // and class declarations).
3525   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3526     do {
3527       if (FormatTok->is(tok::l_brace)) {
3528         calculateBraceTypes(/*ExpectClassBody=*/true);
3529         if (!tryToParseBracedList())
3530           break;
3531       }
3532       if (FormatTok->is(tok::l_square)) {
3533         FormatToken *Previous = FormatTok->Previous;
3534         if (!Previous ||
3535             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3536           // Don't try parsing a lambda if we had a closing parenthesis before,
3537           // it was probably a pointer to an array: int (*)[].
3538           if (!tryToParseLambda())
3539             continue;
3540         } else {
3541           parseSquare();
3542           continue;
3543         }
3544       }
3545       if (FormatTok->is(tok::semi))
3546         return;
3547       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3548         addUnwrappedLine();
3549         nextToken();
3550         parseCSharpGenericTypeConstraint();
3551         break;
3552       }
3553       nextToken();
3554     } while (!eof());
3555   }
3556 
3557   auto GetBraceType = [](const FormatToken &RecordTok) {
3558     switch (RecordTok.Tok.getKind()) {
3559     case tok::kw_class:
3560       return TT_ClassLBrace;
3561     case tok::kw_struct:
3562       return TT_StructLBrace;
3563     case tok::kw_union:
3564       return TT_UnionLBrace;
3565     default:
3566       // Useful for e.g. interface.
3567       return TT_RecordLBrace;
3568     }
3569   };
3570   if (FormatTok->is(tok::l_brace)) {
3571     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3572     if (ParseAsExpr) {
3573       parseChildBlock();
3574     } else {
3575       if (ShouldBreakBeforeBrace(Style, InitialToken))
3576         addUnwrappedLine();
3577 
3578       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3579       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3580     }
3581   }
3582   // There is no addUnwrappedLine() here so that we fall through to parsing a
3583   // structural element afterwards. Thus, in "class A {} n, m;",
3584   // "} n, m;" will end up in one unwrapped line.
3585 }
3586 
3587 void UnwrappedLineParser::parseObjCMethod() {
3588   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3589          "'(' or identifier expected.");
3590   do {
3591     if (FormatTok->is(tok::semi)) {
3592       nextToken();
3593       addUnwrappedLine();
3594       return;
3595     } else if (FormatTok->is(tok::l_brace)) {
3596       if (Style.BraceWrapping.AfterFunction)
3597         addUnwrappedLine();
3598       parseBlock();
3599       addUnwrappedLine();
3600       return;
3601     } else {
3602       nextToken();
3603     }
3604   } while (!eof());
3605 }
3606 
3607 void UnwrappedLineParser::parseObjCProtocolList() {
3608   assert(FormatTok->is(tok::less) && "'<' expected.");
3609   do {
3610     nextToken();
3611     // Early exit in case someone forgot a close angle.
3612     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3613         FormatTok->isObjCAtKeyword(tok::objc_end))
3614       return;
3615   } while (!eof() && FormatTok->isNot(tok::greater));
3616   nextToken(); // Skip '>'.
3617 }
3618 
3619 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3620   do {
3621     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3622       nextToken();
3623       addUnwrappedLine();
3624       break;
3625     }
3626     if (FormatTok->is(tok::l_brace)) {
3627       parseBlock();
3628       // In ObjC interfaces, nothing should be following the "}".
3629       addUnwrappedLine();
3630     } else if (FormatTok->is(tok::r_brace)) {
3631       // Ignore stray "}". parseStructuralElement doesn't consume them.
3632       nextToken();
3633       addUnwrappedLine();
3634     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3635       nextToken();
3636       parseObjCMethod();
3637     } else {
3638       parseStructuralElement();
3639     }
3640   } while (!eof());
3641 }
3642 
3643 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3644   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3645          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3646   nextToken();
3647   nextToken(); // interface name
3648 
3649   // @interface can be followed by a lightweight generic
3650   // specialization list, then either a base class or a category.
3651   if (FormatTok->is(tok::less))
3652     parseObjCLightweightGenerics();
3653   if (FormatTok->is(tok::colon)) {
3654     nextToken();
3655     nextToken(); // base class name
3656     // The base class can also have lightweight generics applied to it.
3657     if (FormatTok->is(tok::less))
3658       parseObjCLightweightGenerics();
3659   } else if (FormatTok->is(tok::l_paren))
3660     // Skip category, if present.
3661     parseParens();
3662 
3663   if (FormatTok->is(tok::less))
3664     parseObjCProtocolList();
3665 
3666   if (FormatTok->is(tok::l_brace)) {
3667     if (Style.BraceWrapping.AfterObjCDeclaration)
3668       addUnwrappedLine();
3669     parseBlock(/*MustBeDeclaration=*/true);
3670   }
3671 
3672   // With instance variables, this puts '}' on its own line.  Without instance
3673   // variables, this ends the @interface line.
3674   addUnwrappedLine();
3675 
3676   parseObjCUntilAtEnd();
3677 }
3678 
3679 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3680   assert(FormatTok->is(tok::less));
3681   // Unlike protocol lists, generic parameterizations support
3682   // nested angles:
3683   //
3684   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3685   //     NSObject <NSCopying, NSSecureCoding>
3686   //
3687   // so we need to count how many open angles we have left.
3688   unsigned NumOpenAngles = 1;
3689   do {
3690     nextToken();
3691     // Early exit in case someone forgot a close angle.
3692     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3693         FormatTok->isObjCAtKeyword(tok::objc_end))
3694       break;
3695     if (FormatTok->is(tok::less))
3696       ++NumOpenAngles;
3697     else if (FormatTok->is(tok::greater)) {
3698       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3699       --NumOpenAngles;
3700     }
3701   } while (!eof() && NumOpenAngles != 0);
3702   nextToken(); // Skip '>'.
3703 }
3704 
3705 // Returns true for the declaration/definition form of @protocol,
3706 // false for the expression form.
3707 bool UnwrappedLineParser::parseObjCProtocol() {
3708   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3709   nextToken();
3710 
3711   if (FormatTok->is(tok::l_paren))
3712     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3713     return false;
3714 
3715   // The definition/declaration form,
3716   // @protocol Foo
3717   // - (int)someMethod;
3718   // @end
3719 
3720   nextToken(); // protocol name
3721 
3722   if (FormatTok->is(tok::less))
3723     parseObjCProtocolList();
3724 
3725   // Check for protocol declaration.
3726   if (FormatTok->is(tok::semi)) {
3727     nextToken();
3728     addUnwrappedLine();
3729     return true;
3730   }
3731 
3732   addUnwrappedLine();
3733   parseObjCUntilAtEnd();
3734   return true;
3735 }
3736 
3737 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3738   bool IsImport = FormatTok->is(Keywords.kw_import);
3739   assert(IsImport || FormatTok->is(tok::kw_export));
3740   nextToken();
3741 
3742   // Consume the "default" in "export default class/function".
3743   if (FormatTok->is(tok::kw_default))
3744     nextToken();
3745 
3746   // Consume "async function", "function" and "default function", so that these
3747   // get parsed as free-standing JS functions, i.e. do not require a trailing
3748   // semicolon.
3749   if (FormatTok->is(Keywords.kw_async))
3750     nextToken();
3751   if (FormatTok->is(Keywords.kw_function)) {
3752     nextToken();
3753     return;
3754   }
3755 
3756   // For imports, `export *`, `export {...}`, consume the rest of the line up
3757   // to the terminating `;`. For everything else, just return and continue
3758   // parsing the structural element, i.e. the declaration or expression for
3759   // `export default`.
3760   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3761       !FormatTok->isStringLiteral())
3762     return;
3763 
3764   while (!eof()) {
3765     if (FormatTok->is(tok::semi))
3766       return;
3767     if (Line->Tokens.empty()) {
3768       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3769       // import statement should terminate.
3770       return;
3771     }
3772     if (FormatTok->is(tok::l_brace)) {
3773       FormatTok->setBlockKind(BK_Block);
3774       nextToken();
3775       parseBracedList();
3776     } else {
3777       nextToken();
3778     }
3779   }
3780 }
3781 
3782 void UnwrappedLineParser::parseStatementMacro() {
3783   nextToken();
3784   if (FormatTok->is(tok::l_paren))
3785     parseParens();
3786   if (FormatTok->is(tok::semi))
3787     nextToken();
3788   addUnwrappedLine();
3789 }
3790 
3791 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3792                                                  StringRef Prefix = "") {
3793   llvm::dbgs() << Prefix << "Line(" << Line.Level
3794                << ", FSC=" << Line.FirstStartColumn << ")"
3795                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3796   for (const auto &Node : Line.Tokens) {
3797     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3798                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3799                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3800   }
3801   for (const auto &Node : Line.Tokens)
3802     for (const auto &ChildNode : Node.Children)
3803       printDebugInfo(ChildNode, "\nChild: ");
3804 
3805   llvm::dbgs() << "\n";
3806 }
3807 
3808 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3809   if (Line->Tokens.empty())
3810     return;
3811   LLVM_DEBUG({
3812     if (CurrentLines == &Lines)
3813       printDebugInfo(*Line);
3814   });
3815 
3816   // If this line closes a block when in Whitesmiths mode, remember that
3817   // information so that the level can be decreased after the line is added.
3818   // This has to happen after the addition of the line since the line itself
3819   // needs to be indented.
3820   bool ClosesWhitesmithsBlock =
3821       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3822       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3823 
3824   CurrentLines->push_back(std::move(*Line));
3825   Line->Tokens.clear();
3826   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3827   Line->FirstStartColumn = 0;
3828 
3829   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3830     --Line->Level;
3831   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3832     CurrentLines->append(
3833         std::make_move_iterator(PreprocessorDirectives.begin()),
3834         std::make_move_iterator(PreprocessorDirectives.end()));
3835     PreprocessorDirectives.clear();
3836   }
3837   // Disconnect the current token from the last token on the previous line.
3838   FormatTok->Previous = nullptr;
3839 }
3840 
3841 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
3842 
3843 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3844   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3845          FormatTok.NewlinesBefore > 0;
3846 }
3847 
3848 // Checks if \p FormatTok is a line comment that continues the line comment
3849 // section on \p Line.
3850 static bool
3851 continuesLineCommentSection(const FormatToken &FormatTok,
3852                             const UnwrappedLine &Line,
3853                             const llvm::Regex &CommentPragmasRegex) {
3854   if (Line.Tokens.empty())
3855     return false;
3856 
3857   StringRef IndentContent = FormatTok.TokenText;
3858   if (FormatTok.TokenText.startswith("//") ||
3859       FormatTok.TokenText.startswith("/*"))
3860     IndentContent = FormatTok.TokenText.substr(2);
3861   if (CommentPragmasRegex.match(IndentContent))
3862     return false;
3863 
3864   // If Line starts with a line comment, then FormatTok continues the comment
3865   // section if its original column is greater or equal to the original start
3866   // column of the line.
3867   //
3868   // Define the min column token of a line as follows: if a line ends in '{' or
3869   // contains a '{' followed by a line comment, then the min column token is
3870   // that '{'. Otherwise, the min column token of the line is the first token of
3871   // the line.
3872   //
3873   // If Line starts with a token other than a line comment, then FormatTok
3874   // continues the comment section if its original column is greater than the
3875   // original start column of the min column token of the line.
3876   //
3877   // For example, the second line comment continues the first in these cases:
3878   //
3879   // // first line
3880   // // second line
3881   //
3882   // and:
3883   //
3884   // // first line
3885   //  // second line
3886   //
3887   // and:
3888   //
3889   // int i; // first line
3890   //  // second line
3891   //
3892   // and:
3893   //
3894   // do { // first line
3895   //      // second line
3896   //   int i;
3897   // } while (true);
3898   //
3899   // and:
3900   //
3901   // enum {
3902   //   a, // first line
3903   //    // second line
3904   //   b
3905   // };
3906   //
3907   // The second line comment doesn't continue the first in these cases:
3908   //
3909   //   // first line
3910   //  // second line
3911   //
3912   // and:
3913   //
3914   // int i; // first line
3915   // // second line
3916   //
3917   // and:
3918   //
3919   // do { // first line
3920   //   // second line
3921   //   int i;
3922   // } while (true);
3923   //
3924   // and:
3925   //
3926   // enum {
3927   //   a, // first line
3928   //   // second line
3929   // };
3930   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3931 
3932   // Scan for '{//'. If found, use the column of '{' as a min column for line
3933   // comment section continuation.
3934   const FormatToken *PreviousToken = nullptr;
3935   for (const UnwrappedLineNode &Node : Line.Tokens) {
3936     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3937         isLineComment(*Node.Tok)) {
3938       MinColumnToken = PreviousToken;
3939       break;
3940     }
3941     PreviousToken = Node.Tok;
3942 
3943     // Grab the last newline preceding a token in this unwrapped line.
3944     if (Node.Tok->NewlinesBefore > 0)
3945       MinColumnToken = Node.Tok;
3946   }
3947   if (PreviousToken && PreviousToken->is(tok::l_brace))
3948     MinColumnToken = PreviousToken;
3949 
3950   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3951                               MinColumnToken);
3952 }
3953 
3954 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3955   bool JustComments = Line->Tokens.empty();
3956   for (FormatToken *Tok : CommentsBeforeNextToken) {
3957     // Line comments that belong to the same line comment section are put on the
3958     // same line since later we might want to reflow content between them.
3959     // Additional fine-grained breaking of line comment sections is controlled
3960     // by the class BreakableLineCommentSection in case it is desirable to keep
3961     // several line comment sections in the same unwrapped line.
3962     //
3963     // FIXME: Consider putting separate line comment sections as children to the
3964     // unwrapped line instead.
3965     Tok->ContinuesLineCommentSection =
3966         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3967     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3968       addUnwrappedLine();
3969     pushToken(Tok);
3970   }
3971   if (NewlineBeforeNext && JustComments)
3972     addUnwrappedLine();
3973   CommentsBeforeNextToken.clear();
3974 }
3975 
3976 void UnwrappedLineParser::nextToken(int LevelDifference) {
3977   if (eof())
3978     return;
3979   flushComments(isOnNewLine(*FormatTok));
3980   pushToken(FormatTok);
3981   FormatToken *Previous = FormatTok;
3982   if (!Style.isJavaScript())
3983     readToken(LevelDifference);
3984   else
3985     readTokenWithJavaScriptASI();
3986   FormatTok->Previous = Previous;
3987 }
3988 
3989 void UnwrappedLineParser::distributeComments(
3990     const SmallVectorImpl<FormatToken *> &Comments,
3991     const FormatToken *NextTok) {
3992   // Whether or not a line comment token continues a line is controlled by
3993   // the method continuesLineCommentSection, with the following caveat:
3994   //
3995   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3996   // that each comment line from the trail is aligned with the next token, if
3997   // the next token exists. If a trail exists, the beginning of the maximal
3998   // trail is marked as a start of a new comment section.
3999   //
4000   // For example in this code:
4001   //
4002   // int a; // line about a
4003   //   // line 1 about b
4004   //   // line 2 about b
4005   //   int b;
4006   //
4007   // the two lines about b form a maximal trail, so there are two sections, the
4008   // first one consisting of the single comment "// line about a" and the
4009   // second one consisting of the next two comments.
4010   if (Comments.empty())
4011     return;
4012   bool ShouldPushCommentsInCurrentLine = true;
4013   bool HasTrailAlignedWithNextToken = false;
4014   unsigned StartOfTrailAlignedWithNextToken = 0;
4015   if (NextTok) {
4016     // We are skipping the first element intentionally.
4017     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4018       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4019         HasTrailAlignedWithNextToken = true;
4020         StartOfTrailAlignedWithNextToken = i;
4021       }
4022     }
4023   }
4024   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4025     FormatToken *FormatTok = Comments[i];
4026     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4027       FormatTok->ContinuesLineCommentSection = false;
4028     } else {
4029       FormatTok->ContinuesLineCommentSection =
4030           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4031     }
4032     if (!FormatTok->ContinuesLineCommentSection &&
4033         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
4034       ShouldPushCommentsInCurrentLine = false;
4035     if (ShouldPushCommentsInCurrentLine)
4036       pushToken(FormatTok);
4037     else
4038       CommentsBeforeNextToken.push_back(FormatTok);
4039   }
4040 }
4041 
4042 void UnwrappedLineParser::readToken(int LevelDifference) {
4043   SmallVector<FormatToken *, 1> Comments;
4044   bool PreviousWasComment = false;
4045   bool FirstNonCommentOnLine = false;
4046   do {
4047     FormatTok = Tokens->getNextToken();
4048     assert(FormatTok);
4049     while (FormatTok->getType() == TT_ConflictStart ||
4050            FormatTok->getType() == TT_ConflictEnd ||
4051            FormatTok->getType() == TT_ConflictAlternative) {
4052       if (FormatTok->getType() == TT_ConflictStart)
4053         conditionalCompilationStart(/*Unreachable=*/false);
4054       else if (FormatTok->getType() == TT_ConflictAlternative)
4055         conditionalCompilationAlternative();
4056       else if (FormatTok->getType() == TT_ConflictEnd)
4057         conditionalCompilationEnd();
4058       FormatTok = Tokens->getNextToken();
4059       FormatTok->MustBreakBefore = true;
4060     }
4061 
4062     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4063                                       const FormatToken &Tok,
4064                                       bool PreviousWasComment) {
4065       auto IsFirstOnLine = [](const FormatToken &Tok) {
4066         return Tok.HasUnescapedNewline || Tok.IsFirst;
4067       };
4068 
4069       // Consider preprocessor directives preceded by block comments as first
4070       // on line.
4071       if (PreviousWasComment)
4072         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4073       return IsFirstOnLine(Tok);
4074     };
4075 
4076     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4077         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4078     PreviousWasComment = FormatTok->is(tok::comment);
4079 
4080     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4081            FirstNonCommentOnLine) {
4082       distributeComments(Comments, FormatTok);
4083       Comments.clear();
4084       // If there is an unfinished unwrapped line, we flush the preprocessor
4085       // directives only after that unwrapped line was finished later.
4086       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4087       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4088       assert((LevelDifference >= 0 ||
4089               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4090              "LevelDifference makes Line->Level negative");
4091       Line->Level += LevelDifference;
4092       // Comments stored before the preprocessor directive need to be output
4093       // before the preprocessor directive, at the same level as the
4094       // preprocessor directive, as we consider them to apply to the directive.
4095       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4096           PPBranchLevel > 0)
4097         Line->Level += PPBranchLevel;
4098       flushComments(isOnNewLine(*FormatTok));
4099       parsePPDirective();
4100       PreviousWasComment = FormatTok->is(tok::comment);
4101       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4102           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4103     }
4104 
4105     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4106         !Line->InPPDirective)
4107       continue;
4108 
4109     if (!FormatTok->is(tok::comment)) {
4110       distributeComments(Comments, FormatTok);
4111       Comments.clear();
4112       return;
4113     }
4114 
4115     Comments.push_back(FormatTok);
4116   } while (!eof());
4117 
4118   distributeComments(Comments, nullptr);
4119   Comments.clear();
4120 }
4121 
4122 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4123   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4124   if (MustBreakBeforeNextToken) {
4125     Line->Tokens.back().Tok->MustBreakBefore = true;
4126     MustBreakBeforeNextToken = false;
4127   }
4128 }
4129 
4130 } // end namespace format
4131 } // end namespace clang
4132