1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 #include <utility>
23 
24 #define DEBUG_TYPE "format-parser"
25 
26 namespace clang {
27 namespace format {
28 
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   virtual FormatToken *getPreviousToken() = 0;
39 
40   // Returns the token that would be returned by the next call to
41   // getNextToken().
42   virtual FormatToken *peekNextToken() = 0;
43 
44   // Returns the token that would be returned after the next N calls to
45   // getNextToken(). N needs to be greater than zero, and small enough that
46   // there are still tokens. Check for tok::eof with N-1 before calling it with
47   // N.
48   virtual FormatToken *peekNextToken(int N) = 0;
49 
50   // Returns whether we are at the end of the file.
51   // This can be different from whether getNextToken() returned an eof token
52   // when the FormatTokenSource is a view on a part of the token stream.
53   virtual bool isEOF() = 0;
54 
55   // Gets the current position in the token stream, to be used by setPosition().
56   virtual unsigned getPosition() = 0;
57 
58   // Resets the token stream to the state it was in when getPosition() returned
59   // Position, and return the token at that position in the stream.
60   virtual FormatToken *setPosition(unsigned Position) = 0;
61 };
62 
63 namespace {
64 
65 class ScopedDeclarationState {
66 public:
67   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
68                          bool MustBeDeclaration)
69       : Line(Line), Stack(Stack) {
70     Line.MustBeDeclaration = MustBeDeclaration;
71     Stack.push_back(MustBeDeclaration);
72   }
73   ~ScopedDeclarationState() {
74     Stack.pop_back();
75     if (!Stack.empty())
76       Line.MustBeDeclaration = Stack.back();
77     else
78       Line.MustBeDeclaration = true;
79   }
80 
81 private:
82   UnwrappedLine &Line;
83   llvm::BitVector &Stack;
84 };
85 
86 static bool isLineComment(const FormatToken &FormatTok) {
87   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
88 }
89 
90 // Checks if \p FormatTok is a line comment that continues the line comment
91 // \p Previous. The original column of \p MinColumnToken is used to determine
92 // whether \p FormatTok is indented enough to the right to continue \p Previous.
93 static bool continuesLineComment(const FormatToken &FormatTok,
94                                  const FormatToken *Previous,
95                                  const FormatToken *MinColumnToken) {
96   if (!Previous || !MinColumnToken)
97     return false;
98   unsigned MinContinueColumn =
99       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
100   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
101          isLineComment(*Previous) &&
102          FormatTok.OriginalColumn >= MinContinueColumn;
103 }
104 
105 class ScopedMacroState : public FormatTokenSource {
106 public:
107   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
108                    FormatToken *&ResetToken)
109       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
110         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
111         Token(nullptr), PreviousToken(nullptr) {
112     FakeEOF.Tok.startToken();
113     FakeEOF.Tok.setKind(tok::eof);
114     TokenSource = this;
115     Line.Level = 0;
116     Line.InPPDirective = true;
117   }
118 
119   ~ScopedMacroState() override {
120     TokenSource = PreviousTokenSource;
121     ResetToken = Token;
122     Line.InPPDirective = false;
123     Line.Level = PreviousLineLevel;
124   }
125 
126   FormatToken *getNextToken() override {
127     // The \c UnwrappedLineParser guards against this by never calling
128     // \c getNextToken() after it has encountered the first eof token.
129     assert(!eof());
130     PreviousToken = Token;
131     Token = PreviousTokenSource->getNextToken();
132     if (eof())
133       return &FakeEOF;
134     return Token;
135   }
136 
137   FormatToken *getPreviousToken() override {
138     return PreviousTokenSource->getPreviousToken();
139   }
140 
141   FormatToken *peekNextToken() override {
142     if (eof())
143       return &FakeEOF;
144     return PreviousTokenSource->peekNextToken();
145   }
146 
147   FormatToken *peekNextToken(int N) override {
148     assert(N > 0);
149     if (eof())
150       return &FakeEOF;
151     return PreviousTokenSource->peekNextToken(N);
152   }
153 
154   bool isEOF() override { return PreviousTokenSource->isEOF(); }
155 
156   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
157 
158   FormatToken *setPosition(unsigned Position) override {
159     PreviousToken = nullptr;
160     Token = PreviousTokenSource->setPosition(Position);
161     return Token;
162   }
163 
164 private:
165   bool eof() {
166     return Token && Token->HasUnescapedNewline &&
167            !continuesLineComment(*Token, PreviousToken,
168                                  /*MinColumnToken=*/PreviousToken);
169   }
170 
171   FormatToken FakeEOF;
172   UnwrappedLine &Line;
173   FormatTokenSource *&TokenSource;
174   FormatToken *&ResetToken;
175   unsigned PreviousLineLevel;
176   FormatTokenSource *PreviousTokenSource;
177 
178   FormatToken *Token;
179   FormatToken *PreviousToken;
180 };
181 
182 } // end anonymous namespace
183 
184 class ScopedLineState {
185 public:
186   ScopedLineState(UnwrappedLineParser &Parser,
187                   bool SwitchToPreprocessorLines = false)
188       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
189     if (SwitchToPreprocessorLines)
190       Parser.CurrentLines = &Parser.PreprocessorDirectives;
191     else if (!Parser.Line->Tokens.empty())
192       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
193     PreBlockLine = std::move(Parser.Line);
194     Parser.Line = std::make_unique<UnwrappedLine>();
195     Parser.Line->Level = PreBlockLine->Level;
196     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
197   }
198 
199   ~ScopedLineState() {
200     if (!Parser.Line->Tokens.empty())
201       Parser.addUnwrappedLine();
202     assert(Parser.Line->Tokens.empty());
203     Parser.Line = std::move(PreBlockLine);
204     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
205       Parser.MustBreakBeforeNextToken = true;
206     Parser.CurrentLines = OriginalLines;
207   }
208 
209 private:
210   UnwrappedLineParser &Parser;
211 
212   std::unique_ptr<UnwrappedLine> PreBlockLine;
213   SmallVectorImpl<UnwrappedLine> *OriginalLines;
214 };
215 
216 class CompoundStatementIndenter {
217 public:
218   CompoundStatementIndenter(UnwrappedLineParser *Parser,
219                             const FormatStyle &Style, unsigned &LineLevel)
220       : CompoundStatementIndenter(Parser, LineLevel,
221                                   Style.BraceWrapping.AfterControlStatement,
222                                   Style.BraceWrapping.IndentBraces) {}
223   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
224                             bool WrapBrace, bool IndentBrace)
225       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
226     if (WrapBrace)
227       Parser->addUnwrappedLine();
228     if (IndentBrace)
229       ++LineLevel;
230   }
231   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
232 
233 private:
234   unsigned &LineLevel;
235   unsigned OldLineLevel;
236 };
237 
238 namespace {
239 
240 class IndexedTokenSource : public FormatTokenSource {
241 public:
242   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
243       : Tokens(Tokens), Position(-1) {}
244 
245   FormatToken *getNextToken() override {
246     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
247       LLVM_DEBUG({
248         llvm::dbgs() << "Next ";
249         dbgToken(Position);
250       });
251       return Tokens[Position];
252     }
253     ++Position;
254     LLVM_DEBUG({
255       llvm::dbgs() << "Next ";
256       dbgToken(Position);
257     });
258     return Tokens[Position];
259   }
260 
261   FormatToken *getPreviousToken() override {
262     return Position > 0 ? Tokens[Position - 1] : nullptr;
263   }
264 
265   FormatToken *peekNextToken() override {
266     int Next = Position + 1;
267     LLVM_DEBUG({
268       llvm::dbgs() << "Peeking ";
269       dbgToken(Next);
270     });
271     return Tokens[Next];
272   }
273 
274   FormatToken *peekNextToken(int N) override {
275     assert(N > 0);
276     int Next = Position + N;
277     LLVM_DEBUG({
278       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
279       dbgToken(Next);
280     });
281     return Tokens[Next];
282   }
283 
284   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
285 
286   unsigned getPosition() override {
287     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
288     assert(Position >= 0);
289     return Position;
290   }
291 
292   FormatToken *setPosition(unsigned P) override {
293     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
294     Position = P;
295     return Tokens[Position];
296   }
297 
298   void reset() { Position = -1; }
299 
300 private:
301   void dbgToken(int Position, llvm::StringRef Indent = "") {
302     FormatToken *Tok = Tokens[Position];
303     llvm::dbgs() << Indent << "[" << Position
304                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
305                  << ", Macro: " << !!Tok->MacroCtx << "\n";
306   }
307 
308   ArrayRef<FormatToken *> Tokens;
309   int Position;
310 };
311 
312 } // end anonymous namespace
313 
314 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
315                                          const AdditionalKeywords &Keywords,
316                                          unsigned FirstStartColumn,
317                                          ArrayRef<FormatToken *> Tokens,
318                                          UnwrappedLineConsumer &Callback)
319     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
320       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
321       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
322       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
323       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
324                        ? IG_Rejected
325                        : IG_Inited),
326       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
327 
328 void UnwrappedLineParser::reset() {
329   PPBranchLevel = -1;
330   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
331                      ? IG_Rejected
332                      : IG_Inited;
333   IncludeGuardToken = nullptr;
334   Line.reset(new UnwrappedLine);
335   CommentsBeforeNextToken.clear();
336   FormatTok = nullptr;
337   MustBreakBeforeNextToken = false;
338   PreprocessorDirectives.clear();
339   CurrentLines = &Lines;
340   DeclarationScopeStack.clear();
341   NestedTooDeep.clear();
342   PPStack.clear();
343   Line->FirstStartColumn = FirstStartColumn;
344 }
345 
346 void UnwrappedLineParser::parse() {
347   IndexedTokenSource TokenSource(AllTokens);
348   Line->FirstStartColumn = FirstStartColumn;
349   do {
350     LLVM_DEBUG(llvm::dbgs() << "----\n");
351     reset();
352     Tokens = &TokenSource;
353     TokenSource.reset();
354 
355     readToken();
356     parseFile();
357 
358     // If we found an include guard then all preprocessor directives (other than
359     // the guard) are over-indented by one.
360     if (IncludeGuard == IG_Found)
361       for (auto &Line : Lines)
362         if (Line.InPPDirective && Line.Level > 0)
363           --Line.Level;
364 
365     // Create line with eof token.
366     pushToken(FormatTok);
367     addUnwrappedLine();
368 
369     for (const UnwrappedLine &Line : Lines)
370       Callback.consumeUnwrappedLine(Line);
371 
372     Callback.finishRun();
373     Lines.clear();
374     while (!PPLevelBranchIndex.empty() &&
375            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
376       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
377       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
378     }
379     if (!PPLevelBranchIndex.empty()) {
380       ++PPLevelBranchIndex.back();
381       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
382       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
383     }
384   } while (!PPLevelBranchIndex.empty());
385 }
386 
387 void UnwrappedLineParser::parseFile() {
388   // The top-level context in a file always has declarations, except for pre-
389   // processor directives and JavaScript files.
390   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
391   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
392                                           MustBeDeclaration);
393   if (Style.Language == FormatStyle::LK_TextProto)
394     parseBracedList();
395   else
396     parseLevel(/*HasOpeningBrace=*/false, /*CanContainBracedList=*/true);
397   // Make sure to format the remaining tokens.
398   //
399   // LK_TextProto is special since its top-level is parsed as the body of a
400   // braced list, which does not necessarily have natural line separators such
401   // as a semicolon. Comments after the last entry that have been determined to
402   // not belong to that line, as in:
403   //   key: value
404   //   // endfile comment
405   // do not have a chance to be put on a line of their own until this point.
406   // Here we add this newline before end-of-file comments.
407   if (Style.Language == FormatStyle::LK_TextProto &&
408       !CommentsBeforeNextToken.empty())
409     addUnwrappedLine();
410   flushComments(true);
411   addUnwrappedLine();
412 }
413 
414 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
415   do {
416     switch (FormatTok->Tok.getKind()) {
417     case tok::l_brace:
418       return;
419     default:
420       if (FormatTok->is(Keywords.kw_where)) {
421         addUnwrappedLine();
422         nextToken();
423         parseCSharpGenericTypeConstraint();
424         break;
425       }
426       nextToken();
427       break;
428     }
429   } while (!eof());
430 }
431 
432 void UnwrappedLineParser::parseCSharpAttribute() {
433   int UnpairedSquareBrackets = 1;
434   do {
435     switch (FormatTok->Tok.getKind()) {
436     case tok::r_square:
437       nextToken();
438       --UnpairedSquareBrackets;
439       if (UnpairedSquareBrackets == 0) {
440         addUnwrappedLine();
441         return;
442       }
443       break;
444     case tok::l_square:
445       ++UnpairedSquareBrackets;
446       nextToken();
447       break;
448     default:
449       nextToken();
450       break;
451     }
452   } while (!eof());
453 }
454 
455 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
456   if (!Lines.empty() && Lines.back().InPPDirective)
457     return true;
458 
459   const FormatToken *Previous = Tokens->getPreviousToken();
460   return Previous && Previous->is(tok::comment) &&
461          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
462 }
463 /// \brief Parses a level, that is ???.
464 /// \param HasOpeningBrace If that level is started by an opening brace.
465 /// \param CanContainBracedList If the content can contain (at any level) a
466 /// braced list.
467 /// \param NextLBracesType The type for left brace found in this level.
468 /// \returns true if a simple block, or false otherwise. (A simple block has a
469 /// single statement.)
470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace,
471                                      bool CanContainBracedList,
472                                      IfStmtKind *IfKind,
473                                      TokenType NextLBracesType) {
474   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
475                                   ? TT_BracedListLBrace
476                                   : TT_Unknown;
477   const bool IsPrecededByCommentOrPPDirective =
478       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
479   bool HasLabel = false;
480   unsigned StatementCount = 0;
481   bool SwitchLabelEncountered = false;
482   do {
483     if (FormatTok->getType() == TT_AttributeMacro) {
484       nextToken();
485       continue;
486     }
487     tok::TokenKind kind = FormatTok->Tok.getKind();
488     if (FormatTok->getType() == TT_MacroBlockBegin)
489       kind = tok::l_brace;
490     else if (FormatTok->getType() == TT_MacroBlockEnd)
491       kind = tok::r_brace;
492 
493     auto ParseDefault = [this, HasOpeningBrace, IfKind, NextLevelLBracesType,
494                          &HasLabel, &StatementCount] {
495       parseStructuralElement(IfKind, !HasOpeningBrace, NextLevelLBracesType,
496                              HasLabel ? nullptr : &HasLabel);
497       ++StatementCount;
498       assert(StatementCount > 0 && "StatementCount overflow!");
499     };
500 
501     switch (kind) {
502     case tok::comment:
503       nextToken();
504       addUnwrappedLine();
505       break;
506     case tok::l_brace:
507       if (NextLBracesType != TT_Unknown)
508         FormatTok->setFinalizedType(NextLBracesType);
509       else if (FormatTok->Previous &&
510                FormatTok->Previous->ClosesRequiresClause) {
511         // We need the 'default' case here to correctly parse a function
512         // l_brace.
513         ParseDefault();
514         continue;
515       }
516       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
517           tryToParseBracedList())
518         continue;
519       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
520                  /*MunchSemi=*/true, /*UnindentWhitesmithBraces=*/false,
521                  CanContainBracedList,
522                  /*NextLBracesType=*/NextLBracesType);
523       ++StatementCount;
524       assert(StatementCount > 0 && "StatementCount overflow!");
525       addUnwrappedLine();
526       break;
527     case tok::r_brace:
528       if (HasOpeningBrace) {
529         if (!Style.RemoveBracesLLVM)
530           return false;
531         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
532             IsPrecededByCommentOrPPDirective ||
533             precededByCommentOrPPDirective())
534           return false;
535         const FormatToken *Next = Tokens->peekNextToken();
536         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
537       }
538       nextToken();
539       addUnwrappedLine();
540       break;
541     case tok::kw_default: {
542       unsigned StoredPosition = Tokens->getPosition();
543       FormatToken *Next;
544       do {
545         Next = Tokens->getNextToken();
546         assert(Next);
547       } while (Next->is(tok::comment));
548       FormatTok = Tokens->setPosition(StoredPosition);
549       if (Next->isNot(tok::colon)) {
550         // default not followed by ':' is not a case label; treat it like
551         // an identifier.
552         parseStructuralElement();
553         break;
554       }
555       // Else, if it is 'default:', fall through to the case handling.
556       LLVM_FALLTHROUGH;
557     }
558     case tok::kw_case:
559       if (Style.isJavaScript() && Line->MustBeDeclaration) {
560         // A 'case: string' style field declaration.
561         parseStructuralElement();
562         break;
563       }
564       if (!SwitchLabelEncountered &&
565           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
566         ++Line->Level;
567       SwitchLabelEncountered = true;
568       parseStructuralElement();
569       break;
570     case tok::l_square:
571       if (Style.isCSharp()) {
572         nextToken();
573         parseCSharpAttribute();
574         break;
575       }
576       if (handleCppAttributes())
577         break;
578       LLVM_FALLTHROUGH;
579     default:
580       ParseDefault();
581       break;
582     }
583   } while (!eof());
584   return false;
585 }
586 
587 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
588   // We'll parse forward through the tokens until we hit
589   // a closing brace or eof - note that getNextToken() will
590   // parse macros, so this will magically work inside macro
591   // definitions, too.
592   unsigned StoredPosition = Tokens->getPosition();
593   FormatToken *Tok = FormatTok;
594   const FormatToken *PrevTok = Tok->Previous;
595   // Keep a stack of positions of lbrace tokens. We will
596   // update information about whether an lbrace starts a
597   // braced init list or a different block during the loop.
598   SmallVector<FormatToken *, 8> LBraceStack;
599   assert(Tok->is(tok::l_brace));
600   do {
601     // Get next non-comment token.
602     FormatToken *NextTok;
603     unsigned ReadTokens = 0;
604     do {
605       NextTok = Tokens->getNextToken();
606       ++ReadTokens;
607     } while (NextTok->is(tok::comment));
608 
609     switch (Tok->Tok.getKind()) {
610     case tok::l_brace:
611       if (Style.isJavaScript() && PrevTok) {
612         if (PrevTok->isOneOf(tok::colon, tok::less))
613           // A ':' indicates this code is in a type, or a braced list
614           // following a label in an object literal ({a: {b: 1}}).
615           // A '<' could be an object used in a comparison, but that is nonsense
616           // code (can never return true), so more likely it is a generic type
617           // argument (`X<{a: string; b: number}>`).
618           // The code below could be confused by semicolons between the
619           // individual members in a type member list, which would normally
620           // trigger BK_Block. In both cases, this must be parsed as an inline
621           // braced init.
622           Tok->setBlockKind(BK_BracedInit);
623         else if (PrevTok->is(tok::r_paren))
624           // `) { }` can only occur in function or method declarations in JS.
625           Tok->setBlockKind(BK_Block);
626       } else {
627         Tok->setBlockKind(BK_Unknown);
628       }
629       LBraceStack.push_back(Tok);
630       break;
631     case tok::r_brace:
632       if (LBraceStack.empty())
633         break;
634       if (LBraceStack.back()->is(BK_Unknown)) {
635         bool ProbablyBracedList = false;
636         if (Style.Language == FormatStyle::LK_Proto) {
637           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
638         } else {
639           // Skip NextTok over preprocessor lines, otherwise we may not
640           // properly diagnose the block as a braced intializer
641           // if the comma separator appears after the pp directive.
642           while (NextTok->is(tok::hash)) {
643             ScopedMacroState MacroState(*Line, Tokens, NextTok);
644             do {
645               NextTok = Tokens->getNextToken();
646               ++ReadTokens;
647             } while (NextTok->isNot(tok::eof));
648           }
649 
650           // Using OriginalColumn to distinguish between ObjC methods and
651           // binary operators is a bit hacky.
652           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
653                                   NextTok->OriginalColumn == 0;
654 
655           // Try to detect a braced list. Note that regardless how we mark inner
656           // braces here, we will overwrite the BlockKind later if we parse a
657           // braced list (where all blocks inside are by default braced lists),
658           // or when we explicitly detect blocks (for example while parsing
659           // lambdas).
660 
661           // If we already marked the opening brace as braced list, the closing
662           // must also be part of it.
663           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
664 
665           ProbablyBracedList = ProbablyBracedList ||
666                                (Style.isJavaScript() &&
667                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
668                                                  Keywords.kw_as));
669           ProbablyBracedList = ProbablyBracedList ||
670                                (Style.isCpp() && NextTok->is(tok::l_paren));
671 
672           // If there is a comma, semicolon or right paren after the closing
673           // brace, we assume this is a braced initializer list.
674           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
675           // braced list in JS.
676           ProbablyBracedList =
677               ProbablyBracedList ||
678               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
679                                tok::r_paren, tok::r_square, tok::l_brace,
680                                tok::ellipsis);
681 
682           ProbablyBracedList =
683               ProbablyBracedList ||
684               (NextTok->is(tok::identifier) &&
685                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
686 
687           ProbablyBracedList = ProbablyBracedList ||
688                                (NextTok->is(tok::semi) &&
689                                 (!ExpectClassBody || LBraceStack.size() != 1));
690 
691           ProbablyBracedList =
692               ProbablyBracedList ||
693               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
694 
695           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
696             // We can have an array subscript after a braced init
697             // list, but C++11 attributes are expected after blocks.
698             NextTok = Tokens->getNextToken();
699             ++ReadTokens;
700             ProbablyBracedList = NextTok->isNot(tok::l_square);
701           }
702         }
703         if (ProbablyBracedList) {
704           Tok->setBlockKind(BK_BracedInit);
705           LBraceStack.back()->setBlockKind(BK_BracedInit);
706         } else {
707           Tok->setBlockKind(BK_Block);
708           LBraceStack.back()->setBlockKind(BK_Block);
709         }
710       }
711       LBraceStack.pop_back();
712       break;
713     case tok::identifier:
714       if (!Tok->is(TT_StatementMacro))
715         break;
716       LLVM_FALLTHROUGH;
717     case tok::at:
718     case tok::semi:
719     case tok::kw_if:
720     case tok::kw_while:
721     case tok::kw_for:
722     case tok::kw_switch:
723     case tok::kw_try:
724     case tok::kw___try:
725       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
726         LBraceStack.back()->setBlockKind(BK_Block);
727       break;
728     default:
729       break;
730     }
731     PrevTok = Tok;
732     Tok = NextTok;
733   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
734 
735   // Assume other blocks for all unclosed opening braces.
736   for (FormatToken *LBrace : LBraceStack)
737     if (LBrace->is(BK_Unknown))
738       LBrace->setBlockKind(BK_Block);
739 
740   FormatTok = Tokens->setPosition(StoredPosition);
741 }
742 
743 template <class T>
744 static inline void hash_combine(std::size_t &seed, const T &v) {
745   std::hash<T> hasher;
746   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
747 }
748 
749 size_t UnwrappedLineParser::computePPHash() const {
750   size_t h = 0;
751   for (const auto &i : PPStack) {
752     hash_combine(h, size_t(i.Kind));
753     hash_combine(h, i.Line);
754   }
755   return h;
756 }
757 
758 UnwrappedLineParser::IfStmtKind
759 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
760                                 bool MunchSemi, bool UnindentWhitesmithsBraces,
761                                 bool CanContainBracedList,
762                                 TokenType NextLBracesType) {
763   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
764          "'{' or macro block token expected");
765   FormatToken *Tok = FormatTok;
766   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
767   FormatTok->setBlockKind(BK_Block);
768 
769   // For Whitesmiths mode, jump to the next level prior to skipping over the
770   // braces.
771   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
772     ++Line->Level;
773 
774   size_t PPStartHash = computePPHash();
775 
776   unsigned InitialLevel = Line->Level;
777   nextToken(/*LevelDifference=*/AddLevels);
778 
779   if (MacroBlock && FormatTok->is(tok::l_paren))
780     parseParens();
781 
782   size_t NbPreprocessorDirectives =
783       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
784   addUnwrappedLine();
785   size_t OpeningLineIndex =
786       CurrentLines->empty()
787           ? (UnwrappedLine::kInvalidIndex)
788           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
789 
790   // Whitesmiths is weird here. The brace needs to be indented for the namespace
791   // block, but the block itself may not be indented depending on the style
792   // settings. This allows the format to back up one level in those cases.
793   if (UnindentWhitesmithsBraces)
794     --Line->Level;
795 
796   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
797                                           MustBeDeclaration);
798   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
799     Line->Level += AddLevels;
800 
801   IfStmtKind IfKind = IfStmtKind::NotIf;
802   const bool SimpleBlock = parseLevel(
803       /*HasOpeningBrace=*/true, CanContainBracedList, &IfKind, NextLBracesType);
804 
805   if (eof())
806     return IfKind;
807 
808   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
809                  : !FormatTok->is(tok::r_brace)) {
810     Line->Level = InitialLevel;
811     FormatTok->setBlockKind(BK_Block);
812     return IfKind;
813   }
814 
815   if (SimpleBlock && Tok->is(tok::l_brace)) {
816     assert(FormatTok->is(tok::r_brace));
817     const FormatToken *Previous = Tokens->getPreviousToken();
818     assert(Previous);
819     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
820       Tok->MatchingParen = FormatTok;
821       FormatTok->MatchingParen = Tok;
822     }
823   }
824 
825   size_t PPEndHash = computePPHash();
826 
827   // Munch the closing brace.
828   nextToken(/*LevelDifference=*/-AddLevels);
829 
830   if (MacroBlock && FormatTok->is(tok::l_paren))
831     parseParens();
832 
833   if (FormatTok->is(tok::kw_noexcept)) {
834     // A noexcept in a requires expression.
835     nextToken();
836   }
837 
838   if (FormatTok->is(tok::arrow)) {
839     // Following the } or noexcept we can find a trailing return type arrow
840     // as part of an implicit conversion constraint.
841     nextToken();
842     parseStructuralElement();
843   }
844 
845   if (MunchSemi && FormatTok->is(tok::semi))
846     nextToken();
847 
848   Line->Level = InitialLevel;
849 
850   if (PPStartHash == PPEndHash) {
851     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
852     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
853       // Update the opening line to add the forward reference as well
854       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
855           CurrentLines->size() - 1;
856     }
857   }
858 
859   return IfKind;
860 }
861 
862 static bool isGoogScope(const UnwrappedLine &Line) {
863   // FIXME: Closure-library specific stuff should not be hard-coded but be
864   // configurable.
865   if (Line.Tokens.size() < 4)
866     return false;
867   auto I = Line.Tokens.begin();
868   if (I->Tok->TokenText != "goog")
869     return false;
870   ++I;
871   if (I->Tok->isNot(tok::period))
872     return false;
873   ++I;
874   if (I->Tok->TokenText != "scope")
875     return false;
876   ++I;
877   return I->Tok->is(tok::l_paren);
878 }
879 
880 static bool isIIFE(const UnwrappedLine &Line,
881                    const AdditionalKeywords &Keywords) {
882   // Look for the start of an immediately invoked anonymous function.
883   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
884   // This is commonly done in JavaScript to create a new, anonymous scope.
885   // Example: (function() { ... })()
886   if (Line.Tokens.size() < 3)
887     return false;
888   auto I = Line.Tokens.begin();
889   if (I->Tok->isNot(tok::l_paren))
890     return false;
891   ++I;
892   if (I->Tok->isNot(Keywords.kw_function))
893     return false;
894   ++I;
895   return I->Tok->is(tok::l_paren);
896 }
897 
898 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
899                                    const FormatToken &InitialToken) {
900   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
901     return Style.BraceWrapping.AfterNamespace;
902   if (InitialToken.is(tok::kw_class))
903     return Style.BraceWrapping.AfterClass;
904   if (InitialToken.is(tok::kw_union))
905     return Style.BraceWrapping.AfterUnion;
906   if (InitialToken.is(tok::kw_struct))
907     return Style.BraceWrapping.AfterStruct;
908   if (InitialToken.is(tok::kw_enum))
909     return Style.BraceWrapping.AfterEnum;
910   return false;
911 }
912 
913 void UnwrappedLineParser::parseChildBlock(
914     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
915   FormatTok->setBlockKind(BK_Block);
916   nextToken();
917   {
918     bool SkipIndent = (Style.isJavaScript() &&
919                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
920     ScopedLineState LineState(*this);
921     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
922                                             /*MustBeDeclaration=*/false);
923     Line->Level += SkipIndent ? 0 : 1;
924     parseLevel(/*HasOpeningBrace=*/true, CanContainBracedList,
925                /*IfKind=*/nullptr, NextLBracesType);
926     flushComments(isOnNewLine(*FormatTok));
927     Line->Level -= SkipIndent ? 0 : 1;
928   }
929   nextToken();
930 }
931 
932 void UnwrappedLineParser::parsePPDirective() {
933   assert(FormatTok->is(tok::hash) && "'#' expected");
934   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
935 
936   nextToken();
937 
938   if (!FormatTok->Tok.getIdentifierInfo()) {
939     parsePPUnknown();
940     return;
941   }
942 
943   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
944   case tok::pp_define:
945     parsePPDefine();
946     return;
947   case tok::pp_if:
948     parsePPIf(/*IfDef=*/false);
949     break;
950   case tok::pp_ifdef:
951   case tok::pp_ifndef:
952     parsePPIf(/*IfDef=*/true);
953     break;
954   case tok::pp_else:
955     parsePPElse();
956     break;
957   case tok::pp_elifdef:
958   case tok::pp_elifndef:
959   case tok::pp_elif:
960     parsePPElIf();
961     break;
962   case tok::pp_endif:
963     parsePPEndIf();
964     break;
965   default:
966     parsePPUnknown();
967     break;
968   }
969 }
970 
971 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
972   size_t Line = CurrentLines->size();
973   if (CurrentLines == &PreprocessorDirectives)
974     Line += Lines.size();
975 
976   if (Unreachable ||
977       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
978     PPStack.push_back({PP_Unreachable, Line});
979   else
980     PPStack.push_back({PP_Conditional, Line});
981 }
982 
983 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
984   ++PPBranchLevel;
985   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
986   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
987     PPLevelBranchIndex.push_back(0);
988     PPLevelBranchCount.push_back(0);
989   }
990   PPChainBranchIndex.push(0);
991   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
992   conditionalCompilationCondition(Unreachable || Skip);
993 }
994 
995 void UnwrappedLineParser::conditionalCompilationAlternative() {
996   if (!PPStack.empty())
997     PPStack.pop_back();
998   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
999   if (!PPChainBranchIndex.empty())
1000     ++PPChainBranchIndex.top();
1001   conditionalCompilationCondition(
1002       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1003       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1004 }
1005 
1006 void UnwrappedLineParser::conditionalCompilationEnd() {
1007   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1008   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1009     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1010       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1011   }
1012   // Guard against #endif's without #if.
1013   if (PPBranchLevel > -1)
1014     --PPBranchLevel;
1015   if (!PPChainBranchIndex.empty())
1016     PPChainBranchIndex.pop();
1017   if (!PPStack.empty())
1018     PPStack.pop_back();
1019 }
1020 
1021 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1022   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1023   nextToken();
1024   bool Unreachable = false;
1025   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1026     Unreachable = true;
1027   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1028     Unreachable = true;
1029   conditionalCompilationStart(Unreachable);
1030   FormatToken *IfCondition = FormatTok;
1031   // If there's a #ifndef on the first line, and the only lines before it are
1032   // comments, it could be an include guard.
1033   bool MaybeIncludeGuard = IfNDef;
1034   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
1035     for (auto &Line : Lines) {
1036       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1037         MaybeIncludeGuard = false;
1038         IncludeGuard = IG_Rejected;
1039         break;
1040       }
1041     }
1042   --PPBranchLevel;
1043   parsePPUnknown();
1044   ++PPBranchLevel;
1045   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1046     IncludeGuard = IG_IfNdefed;
1047     IncludeGuardToken = IfCondition;
1048   }
1049 }
1050 
1051 void UnwrappedLineParser::parsePPElse() {
1052   // If a potential include guard has an #else, it's not an include guard.
1053   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1054     IncludeGuard = IG_Rejected;
1055   conditionalCompilationAlternative();
1056   if (PPBranchLevel > -1)
1057     --PPBranchLevel;
1058   parsePPUnknown();
1059   ++PPBranchLevel;
1060 }
1061 
1062 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1063 
1064 void UnwrappedLineParser::parsePPEndIf() {
1065   conditionalCompilationEnd();
1066   parsePPUnknown();
1067   // If the #endif of a potential include guard is the last thing in the file,
1068   // then we found an include guard.
1069   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1070       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1071     IncludeGuard = IG_Found;
1072 }
1073 
1074 void UnwrappedLineParser::parsePPDefine() {
1075   nextToken();
1076 
1077   if (!FormatTok->Tok.getIdentifierInfo()) {
1078     IncludeGuard = IG_Rejected;
1079     IncludeGuardToken = nullptr;
1080     parsePPUnknown();
1081     return;
1082   }
1083 
1084   if (IncludeGuard == IG_IfNdefed &&
1085       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1086     IncludeGuard = IG_Defined;
1087     IncludeGuardToken = nullptr;
1088     for (auto &Line : Lines) {
1089       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1090         IncludeGuard = IG_Rejected;
1091         break;
1092       }
1093     }
1094   }
1095 
1096   // In the context of a define, even keywords should be treated as normal
1097   // identifiers. Setting the kind to identifier is not enough, because we need
1098   // to treat additional keywords like __except as well, which are already
1099   // identifiers. Setting the identifier info to null interferes with include
1100   // guard processing above, and changes preprocessing nesting.
1101   FormatTok->Tok.setKind(tok::identifier);
1102   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1103   nextToken();
1104   if (FormatTok->Tok.getKind() == tok::l_paren &&
1105       !FormatTok->hasWhitespaceBefore())
1106     parseParens();
1107   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1108     Line->Level += PPBranchLevel + 1;
1109   addUnwrappedLine();
1110   ++Line->Level;
1111 
1112   // Errors during a preprocessor directive can only affect the layout of the
1113   // preprocessor directive, and thus we ignore them. An alternative approach
1114   // would be to use the same approach we use on the file level (no
1115   // re-indentation if there was a structural error) within the macro
1116   // definition.
1117   parseFile();
1118 }
1119 
1120 void UnwrappedLineParser::parsePPUnknown() {
1121   do {
1122     nextToken();
1123   } while (!eof());
1124   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1125     Line->Level += PPBranchLevel + 1;
1126   addUnwrappedLine();
1127 }
1128 
1129 // Here we exclude certain tokens that are not usually the first token in an
1130 // unwrapped line. This is used in attempt to distinguish macro calls without
1131 // trailing semicolons from other constructs split to several lines.
1132 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1133   // Semicolon can be a null-statement, l_square can be a start of a macro or
1134   // a C++11 attribute, but this doesn't seem to be common.
1135   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1136          Tok.isNot(TT_AttributeSquare) &&
1137          // Tokens that can only be used as binary operators and a part of
1138          // overloaded operator names.
1139          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1140          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1141          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1142          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1143          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1144          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1145          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1146          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1147          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1148          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1149          Tok.isNot(tok::lesslessequal) &&
1150          // Colon is used in labels, base class lists, initializer lists,
1151          // range-based for loops, ternary operator, but should never be the
1152          // first token in an unwrapped line.
1153          Tok.isNot(tok::colon) &&
1154          // 'noexcept' is a trailing annotation.
1155          Tok.isNot(tok::kw_noexcept);
1156 }
1157 
1158 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1159                           const FormatToken *FormatTok) {
1160   // FIXME: This returns true for C/C++ keywords like 'struct'.
1161   return FormatTok->is(tok::identifier) &&
1162          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1163           !FormatTok->isOneOf(
1164               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1165               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1166               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1167               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1168               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1169               Keywords.kw_instanceof, Keywords.kw_interface,
1170               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1171 }
1172 
1173 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1174                                  const FormatToken *FormatTok) {
1175   return FormatTok->Tok.isLiteral() ||
1176          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1177          mustBeJSIdent(Keywords, FormatTok);
1178 }
1179 
1180 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1181 // when encountered after a value (see mustBeJSIdentOrValue).
1182 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1183                            const FormatToken *FormatTok) {
1184   return FormatTok->isOneOf(
1185       tok::kw_return, Keywords.kw_yield,
1186       // conditionals
1187       tok::kw_if, tok::kw_else,
1188       // loops
1189       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1190       // switch/case
1191       tok::kw_switch, tok::kw_case,
1192       // exceptions
1193       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1194       // declaration
1195       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1196       Keywords.kw_async, Keywords.kw_function,
1197       // import/export
1198       Keywords.kw_import, tok::kw_export);
1199 }
1200 
1201 // Checks whether a token is a type in K&R C (aka C78).
1202 static bool isC78Type(const FormatToken &Tok) {
1203   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1204                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1205                      tok::identifier);
1206 }
1207 
1208 // This function checks whether a token starts the first parameter declaration
1209 // in a K&R C (aka C78) function definition, e.g.:
1210 //   int f(a, b)
1211 //   short a, b;
1212 //   {
1213 //      return a + b;
1214 //   }
1215 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1216                                const FormatToken *FuncName) {
1217   assert(Tok);
1218   assert(Next);
1219   assert(FuncName);
1220 
1221   if (FuncName->isNot(tok::identifier))
1222     return false;
1223 
1224   const FormatToken *Prev = FuncName->Previous;
1225   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1226     return false;
1227 
1228   if (!isC78Type(*Tok) &&
1229       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1230     return false;
1231 
1232   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1233     return false;
1234 
1235   Tok = Tok->Previous;
1236   if (!Tok || Tok->isNot(tok::r_paren))
1237     return false;
1238 
1239   Tok = Tok->Previous;
1240   if (!Tok || Tok->isNot(tok::identifier))
1241     return false;
1242 
1243   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1244 }
1245 
1246 void UnwrappedLineParser::parseModuleImport() {
1247   nextToken();
1248   while (!eof()) {
1249     if (FormatTok->is(tok::colon)) {
1250       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1251     }
1252     // Handle import <foo/bar.h> as we would an include statement.
1253     else if (FormatTok->is(tok::less)) {
1254       nextToken();
1255       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1256         // Mark tokens up to the trailing line comments as implicit string
1257         // literals.
1258         if (FormatTok->isNot(tok::comment) &&
1259             !FormatTok->TokenText.startswith("//"))
1260           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1261         nextToken();
1262       }
1263     }
1264     if (FormatTok->is(tok::semi)) {
1265       nextToken();
1266       break;
1267     }
1268     nextToken();
1269   }
1270 
1271   addUnwrappedLine();
1272 }
1273 
1274 // readTokenWithJavaScriptASI reads the next token and terminates the current
1275 // line if JavaScript Automatic Semicolon Insertion must
1276 // happen between the current token and the next token.
1277 //
1278 // This method is conservative - it cannot cover all edge cases of JavaScript,
1279 // but only aims to correctly handle certain well known cases. It *must not*
1280 // return true in speculative cases.
1281 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1282   FormatToken *Previous = FormatTok;
1283   readToken();
1284   FormatToken *Next = FormatTok;
1285 
1286   bool IsOnSameLine =
1287       CommentsBeforeNextToken.empty()
1288           ? Next->NewlinesBefore == 0
1289           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1290   if (IsOnSameLine)
1291     return;
1292 
1293   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1294   bool PreviousStartsTemplateExpr =
1295       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1296   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1297     // If the line contains an '@' sign, the previous token might be an
1298     // annotation, which can precede another identifier/value.
1299     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1300       return LineNode.Tok->is(tok::at);
1301     });
1302     if (HasAt)
1303       return;
1304   }
1305   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1306     return addUnwrappedLine();
1307   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1308   bool NextEndsTemplateExpr =
1309       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1310   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1311       (PreviousMustBeValue ||
1312        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1313                          tok::minusminus)))
1314     return addUnwrappedLine();
1315   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1316       isJSDeclOrStmt(Keywords, Next))
1317     return addUnwrappedLine();
1318 }
1319 
1320 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1321                                                  bool IsTopLevel,
1322                                                  TokenType NextLBracesType,
1323                                                  bool *HasLabel) {
1324   if (Style.Language == FormatStyle::LK_TableGen &&
1325       FormatTok->is(tok::pp_include)) {
1326     nextToken();
1327     if (FormatTok->is(tok::string_literal))
1328       nextToken();
1329     addUnwrappedLine();
1330     return;
1331   }
1332   switch (FormatTok->Tok.getKind()) {
1333   case tok::kw_asm:
1334     nextToken();
1335     if (FormatTok->is(tok::l_brace)) {
1336       FormatTok->setFinalizedType(TT_InlineASMBrace);
1337       nextToken();
1338       while (FormatTok && FormatTok->isNot(tok::eof)) {
1339         if (FormatTok->is(tok::r_brace)) {
1340           FormatTok->setFinalizedType(TT_InlineASMBrace);
1341           nextToken();
1342           addUnwrappedLine();
1343           break;
1344         }
1345         FormatTok->Finalized = true;
1346         nextToken();
1347       }
1348     }
1349     break;
1350   case tok::kw_namespace:
1351     parseNamespace();
1352     return;
1353   case tok::kw_public:
1354   case tok::kw_protected:
1355   case tok::kw_private:
1356     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1357         Style.isCSharp())
1358       nextToken();
1359     else
1360       parseAccessSpecifier();
1361     return;
1362   case tok::kw_if:
1363     if (Style.isJavaScript() && Line->MustBeDeclaration)
1364       // field/method declaration.
1365       break;
1366     parseIfThenElse(IfKind);
1367     return;
1368   case tok::kw_for:
1369   case tok::kw_while:
1370     if (Style.isJavaScript() && Line->MustBeDeclaration)
1371       // field/method declaration.
1372       break;
1373     parseForOrWhileLoop();
1374     return;
1375   case tok::kw_do:
1376     if (Style.isJavaScript() && Line->MustBeDeclaration)
1377       // field/method declaration.
1378       break;
1379     parseDoWhile();
1380     return;
1381   case tok::kw_switch:
1382     if (Style.isJavaScript() && Line->MustBeDeclaration)
1383       // 'switch: string' field declaration.
1384       break;
1385     parseSwitch();
1386     return;
1387   case tok::kw_default:
1388     if (Style.isJavaScript() && Line->MustBeDeclaration)
1389       // 'default: string' field declaration.
1390       break;
1391     nextToken();
1392     if (FormatTok->is(tok::colon)) {
1393       parseLabel();
1394       return;
1395     }
1396     // e.g. "default void f() {}" in a Java interface.
1397     break;
1398   case tok::kw_case:
1399     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1400       // 'case: string' field declaration.
1401       nextToken();
1402       break;
1403     }
1404     parseCaseLabel();
1405     return;
1406   case tok::kw_try:
1407   case tok::kw___try:
1408     if (Style.isJavaScript() && Line->MustBeDeclaration)
1409       // field/method declaration.
1410       break;
1411     parseTryCatch();
1412     return;
1413   case tok::kw_extern:
1414     nextToken();
1415     if (FormatTok->is(tok::string_literal)) {
1416       nextToken();
1417       if (FormatTok->is(tok::l_brace)) {
1418         if (Style.BraceWrapping.AfterExternBlock)
1419           addUnwrappedLine();
1420         // Either we indent or for backwards compatibility we follow the
1421         // AfterExternBlock style.
1422         unsigned AddLevels =
1423             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1424                     (Style.BraceWrapping.AfterExternBlock &&
1425                      Style.IndentExternBlock ==
1426                          FormatStyle::IEBS_AfterExternBlock)
1427                 ? 1u
1428                 : 0u;
1429         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1430         addUnwrappedLine();
1431         return;
1432       }
1433     }
1434     break;
1435   case tok::kw_export:
1436     if (Style.isJavaScript()) {
1437       parseJavaScriptEs6ImportExport();
1438       return;
1439     }
1440     if (!Style.isCpp())
1441       break;
1442     // Handle C++ "(inline|export) namespace".
1443     LLVM_FALLTHROUGH;
1444   case tok::kw_inline:
1445     nextToken();
1446     if (FormatTok->is(tok::kw_namespace)) {
1447       parseNamespace();
1448       return;
1449     }
1450     break;
1451   case tok::identifier:
1452     if (FormatTok->is(TT_ForEachMacro)) {
1453       parseForOrWhileLoop();
1454       return;
1455     }
1456     if (FormatTok->is(TT_MacroBlockBegin)) {
1457       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1458                  /*MunchSemi=*/false);
1459       return;
1460     }
1461     if (FormatTok->is(Keywords.kw_import)) {
1462       if (Style.isJavaScript()) {
1463         parseJavaScriptEs6ImportExport();
1464         return;
1465       }
1466       if (Style.Language == FormatStyle::LK_Proto) {
1467         nextToken();
1468         if (FormatTok->is(tok::kw_public))
1469           nextToken();
1470         if (!FormatTok->is(tok::string_literal))
1471           return;
1472         nextToken();
1473         if (FormatTok->is(tok::semi))
1474           nextToken();
1475         addUnwrappedLine();
1476         return;
1477       }
1478       if (Style.isCpp()) {
1479         parseModuleImport();
1480         return;
1481       }
1482     }
1483     if (Style.isCpp() &&
1484         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1485                            Keywords.kw_slots, Keywords.kw_qslots)) {
1486       nextToken();
1487       if (FormatTok->is(tok::colon)) {
1488         nextToken();
1489         addUnwrappedLine();
1490         return;
1491       }
1492     }
1493     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1494       parseStatementMacro();
1495       return;
1496     }
1497     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1498       parseNamespace();
1499       return;
1500     }
1501     // In all other cases, parse the declaration.
1502     break;
1503   default:
1504     break;
1505   }
1506   do {
1507     const FormatToken *Previous = FormatTok->Previous;
1508     switch (FormatTok->Tok.getKind()) {
1509     case tok::at:
1510       nextToken();
1511       if (FormatTok->is(tok::l_brace)) {
1512         nextToken();
1513         parseBracedList();
1514         break;
1515       } else if (Style.Language == FormatStyle::LK_Java &&
1516                  FormatTok->is(Keywords.kw_interface)) {
1517         nextToken();
1518         break;
1519       }
1520       switch (FormatTok->Tok.getObjCKeywordID()) {
1521       case tok::objc_public:
1522       case tok::objc_protected:
1523       case tok::objc_package:
1524       case tok::objc_private:
1525         return parseAccessSpecifier();
1526       case tok::objc_interface:
1527       case tok::objc_implementation:
1528         return parseObjCInterfaceOrImplementation();
1529       case tok::objc_protocol:
1530         if (parseObjCProtocol())
1531           return;
1532         break;
1533       case tok::objc_end:
1534         return; // Handled by the caller.
1535       case tok::objc_optional:
1536       case tok::objc_required:
1537         nextToken();
1538         addUnwrappedLine();
1539         return;
1540       case tok::objc_autoreleasepool:
1541         nextToken();
1542         if (FormatTok->is(tok::l_brace)) {
1543           if (Style.BraceWrapping.AfterControlStatement ==
1544               FormatStyle::BWACS_Always)
1545             addUnwrappedLine();
1546           parseBlock();
1547         }
1548         addUnwrappedLine();
1549         return;
1550       case tok::objc_synchronized:
1551         nextToken();
1552         if (FormatTok->is(tok::l_paren))
1553           // Skip synchronization object
1554           parseParens();
1555         if (FormatTok->is(tok::l_brace)) {
1556           if (Style.BraceWrapping.AfterControlStatement ==
1557               FormatStyle::BWACS_Always)
1558             addUnwrappedLine();
1559           parseBlock();
1560         }
1561         addUnwrappedLine();
1562         return;
1563       case tok::objc_try:
1564         // This branch isn't strictly necessary (the kw_try case below would
1565         // do this too after the tok::at is parsed above).  But be explicit.
1566         parseTryCatch();
1567         return;
1568       default:
1569         break;
1570       }
1571       break;
1572     case tok::kw_concept:
1573       parseConcept();
1574       return;
1575     case tok::kw_requires: {
1576       if (Style.isCpp()) {
1577         bool ParsedClause = parseRequires();
1578         if (ParsedClause)
1579           return;
1580       } else {
1581         nextToken();
1582       }
1583       break;
1584     }
1585     case tok::kw_enum:
1586       // Ignore if this is part of "template <enum ...".
1587       if (Previous && Previous->is(tok::less)) {
1588         nextToken();
1589         break;
1590       }
1591 
1592       // parseEnum falls through and does not yet add an unwrapped line as an
1593       // enum definition can start a structural element.
1594       if (!parseEnum())
1595         break;
1596       // This only applies for C++.
1597       if (!Style.isCpp()) {
1598         addUnwrappedLine();
1599         return;
1600       }
1601       break;
1602     case tok::kw_typedef:
1603       nextToken();
1604       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1605                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1606                              Keywords.kw_CF_CLOSED_ENUM,
1607                              Keywords.kw_NS_CLOSED_ENUM))
1608         parseEnum();
1609       break;
1610     case tok::kw_struct:
1611     case tok::kw_union:
1612     case tok::kw_class:
1613       if (parseStructLike())
1614         return;
1615       break;
1616     case tok::period:
1617       nextToken();
1618       // In Java, classes have an implicit static member "class".
1619       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1620           FormatTok->is(tok::kw_class))
1621         nextToken();
1622       if (Style.isJavaScript() && FormatTok &&
1623           FormatTok->Tok.getIdentifierInfo())
1624         // JavaScript only has pseudo keywords, all keywords are allowed to
1625         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1626         nextToken();
1627       break;
1628     case tok::semi:
1629       nextToken();
1630       addUnwrappedLine();
1631       return;
1632     case tok::r_brace:
1633       addUnwrappedLine();
1634       return;
1635     case tok::l_paren: {
1636       parseParens();
1637       // Break the unwrapped line if a K&R C function definition has a parameter
1638       // declaration.
1639       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1640         break;
1641       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1642         addUnwrappedLine();
1643         return;
1644       }
1645       break;
1646     }
1647     case tok::kw_operator:
1648       nextToken();
1649       if (FormatTok->isBinaryOperator())
1650         nextToken();
1651       break;
1652     case tok::caret:
1653       nextToken();
1654       if (FormatTok->Tok.isAnyIdentifier() ||
1655           FormatTok->isSimpleTypeSpecifier())
1656         nextToken();
1657       if (FormatTok->is(tok::l_paren))
1658         parseParens();
1659       if (FormatTok->is(tok::l_brace))
1660         parseChildBlock();
1661       break;
1662     case tok::l_brace:
1663       if (NextLBracesType != TT_Unknown)
1664         FormatTok->setFinalizedType(NextLBracesType);
1665       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1666         // A block outside of parentheses must be the last part of a
1667         // structural element.
1668         // FIXME: Figure out cases where this is not true, and add projections
1669         // for them (the one we know is missing are lambdas).
1670         if (Style.Language == FormatStyle::LK_Java &&
1671             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1672           // If necessary, we could set the type to something different than
1673           // TT_FunctionLBrace.
1674           if (Style.BraceWrapping.AfterControlStatement ==
1675               FormatStyle::BWACS_Always)
1676             addUnwrappedLine();
1677         } else if (Style.BraceWrapping.AfterFunction) {
1678           addUnwrappedLine();
1679         }
1680         if (!Line->InPPDirective)
1681           FormatTok->setFinalizedType(TT_FunctionLBrace);
1682         parseBlock();
1683         addUnwrappedLine();
1684         return;
1685       }
1686       // Otherwise this was a braced init list, and the structural
1687       // element continues.
1688       break;
1689     case tok::kw_try:
1690       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1691         // field/method declaration.
1692         nextToken();
1693         break;
1694       }
1695       // We arrive here when parsing function-try blocks.
1696       if (Style.BraceWrapping.AfterFunction)
1697         addUnwrappedLine();
1698       parseTryCatch();
1699       return;
1700     case tok::identifier: {
1701       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1702           Line->MustBeDeclaration) {
1703         addUnwrappedLine();
1704         parseCSharpGenericTypeConstraint();
1705         break;
1706       }
1707       if (FormatTok->is(TT_MacroBlockEnd)) {
1708         addUnwrappedLine();
1709         return;
1710       }
1711 
1712       // Function declarations (as opposed to function expressions) are parsed
1713       // on their own unwrapped line by continuing this loop. Function
1714       // expressions (functions that are not on their own line) must not create
1715       // a new unwrapped line, so they are special cased below.
1716       size_t TokenCount = Line->Tokens.size();
1717       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1718           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1719                                                      Keywords.kw_async)))) {
1720         tryToParseJSFunction();
1721         break;
1722       }
1723       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1724           FormatTok->is(Keywords.kw_interface)) {
1725         if (Style.isJavaScript()) {
1726           // In JavaScript/TypeScript, "interface" can be used as a standalone
1727           // identifier, e.g. in `var interface = 1;`. If "interface" is
1728           // followed by another identifier, it is very like to be an actual
1729           // interface declaration.
1730           unsigned StoredPosition = Tokens->getPosition();
1731           FormatToken *Next = Tokens->getNextToken();
1732           FormatTok = Tokens->setPosition(StoredPosition);
1733           if (!mustBeJSIdent(Keywords, Next)) {
1734             nextToken();
1735             break;
1736           }
1737         }
1738         parseRecord();
1739         addUnwrappedLine();
1740         return;
1741       }
1742 
1743       if (FormatTok->is(Keywords.kw_interface)) {
1744         if (parseStructLike())
1745           return;
1746         break;
1747       }
1748 
1749       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1750         parseStatementMacro();
1751         return;
1752       }
1753 
1754       // See if the following token should start a new unwrapped line.
1755       StringRef Text = FormatTok->TokenText;
1756 
1757       FormatToken *PreviousToken = FormatTok;
1758       nextToken();
1759 
1760       // JS doesn't have macros, and within classes colons indicate fields, not
1761       // labels.
1762       if (Style.isJavaScript())
1763         break;
1764 
1765       TokenCount = Line->Tokens.size();
1766       if (TokenCount == 1 ||
1767           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1768         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1769           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1770           parseLabel(!Style.IndentGotoLabels);
1771           if (HasLabel)
1772             *HasLabel = true;
1773           return;
1774         }
1775         // Recognize function-like macro usages without trailing semicolon as
1776         // well as free-standing macros like Q_OBJECT.
1777         bool FunctionLike = FormatTok->is(tok::l_paren);
1778         if (FunctionLike)
1779           parseParens();
1780 
1781         bool FollowedByNewline =
1782             CommentsBeforeNextToken.empty()
1783                 ? FormatTok->NewlinesBefore > 0
1784                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1785 
1786         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1787             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1788           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1789           addUnwrappedLine();
1790           return;
1791         }
1792       }
1793       break;
1794     }
1795     case tok::equal:
1796       if ((Style.isJavaScript() || Style.isCSharp()) &&
1797           FormatTok->is(TT_FatArrow)) {
1798         tryToParseChildBlock();
1799         break;
1800       }
1801 
1802       nextToken();
1803       if (FormatTok->is(tok::l_brace)) {
1804         // Block kind should probably be set to BK_BracedInit for any language.
1805         // C# needs this change to ensure that array initialisers and object
1806         // initialisers are indented the same way.
1807         if (Style.isCSharp())
1808           FormatTok->setBlockKind(BK_BracedInit);
1809         nextToken();
1810         parseBracedList();
1811       } else if (Style.Language == FormatStyle::LK_Proto &&
1812                  FormatTok->is(tok::less)) {
1813         nextToken();
1814         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1815                         /*ClosingBraceKind=*/tok::greater);
1816       }
1817       break;
1818     case tok::l_square:
1819       parseSquare();
1820       break;
1821     case tok::kw_new:
1822       parseNew();
1823       break;
1824     case tok::kw_case:
1825       if (Style.isJavaScript() && Line->MustBeDeclaration)
1826         // 'case: string' field declaration.
1827         break;
1828       parseCaseLabel();
1829       break;
1830     default:
1831       nextToken();
1832       break;
1833     }
1834   } while (!eof());
1835 }
1836 
1837 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1838   assert(FormatTok->is(tok::l_brace));
1839   if (!Style.isCSharp())
1840     return false;
1841   // See if it's a property accessor.
1842   if (FormatTok->Previous->isNot(tok::identifier))
1843     return false;
1844 
1845   // See if we are inside a property accessor.
1846   //
1847   // Record the current tokenPosition so that we can advance and
1848   // reset the current token. `Next` is not set yet so we need
1849   // another way to advance along the token stream.
1850   unsigned int StoredPosition = Tokens->getPosition();
1851   FormatToken *Tok = Tokens->getNextToken();
1852 
1853   // A trivial property accessor is of the form:
1854   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1855   // Track these as they do not require line breaks to be introduced.
1856   bool HasSpecialAccessor = false;
1857   bool IsTrivialPropertyAccessor = true;
1858   while (!eof()) {
1859     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1860                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1861                      Keywords.kw_init, Keywords.kw_set)) {
1862       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1863         HasSpecialAccessor = true;
1864       Tok = Tokens->getNextToken();
1865       continue;
1866     }
1867     if (Tok->isNot(tok::r_brace))
1868       IsTrivialPropertyAccessor = false;
1869     break;
1870   }
1871 
1872   if (!HasSpecialAccessor) {
1873     Tokens->setPosition(StoredPosition);
1874     return false;
1875   }
1876 
1877   // Try to parse the property accessor:
1878   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1879   Tokens->setPosition(StoredPosition);
1880   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1881     addUnwrappedLine();
1882   nextToken();
1883   do {
1884     switch (FormatTok->Tok.getKind()) {
1885     case tok::r_brace:
1886       nextToken();
1887       if (FormatTok->is(tok::equal)) {
1888         while (!eof() && FormatTok->isNot(tok::semi))
1889           nextToken();
1890         nextToken();
1891       }
1892       addUnwrappedLine();
1893       return true;
1894     case tok::l_brace:
1895       ++Line->Level;
1896       parseBlock(/*MustBeDeclaration=*/true);
1897       addUnwrappedLine();
1898       --Line->Level;
1899       break;
1900     case tok::equal:
1901       if (FormatTok->is(TT_FatArrow)) {
1902         ++Line->Level;
1903         do {
1904           nextToken();
1905         } while (!eof() && FormatTok->isNot(tok::semi));
1906         nextToken();
1907         addUnwrappedLine();
1908         --Line->Level;
1909         break;
1910       }
1911       nextToken();
1912       break;
1913     default:
1914       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
1915                              Keywords.kw_set) &&
1916           !IsTrivialPropertyAccessor) {
1917         // Non-trivial get/set needs to be on its own line.
1918         addUnwrappedLine();
1919       }
1920       nextToken();
1921     }
1922   } while (!eof());
1923 
1924   // Unreachable for well-formed code (paired '{' and '}').
1925   return true;
1926 }
1927 
1928 bool UnwrappedLineParser::tryToParseLambda() {
1929   if (!Style.isCpp()) {
1930     nextToken();
1931     return false;
1932   }
1933   assert(FormatTok->is(tok::l_square));
1934   FormatToken &LSquare = *FormatTok;
1935   if (!tryToParseLambdaIntroducer())
1936     return false;
1937 
1938   // `[something] >` is not a lambda, but an array type in a template parameter
1939   // list.
1940   if (FormatTok->is(tok::greater))
1941     return false;
1942 
1943   bool SeenArrow = false;
1944   bool InTemplateParameterList = false;
1945 
1946   while (FormatTok->isNot(tok::l_brace)) {
1947     if (FormatTok->isSimpleTypeSpecifier()) {
1948       nextToken();
1949       continue;
1950     }
1951     switch (FormatTok->Tok.getKind()) {
1952     case tok::l_brace:
1953       break;
1954     case tok::l_paren:
1955       parseParens();
1956       break;
1957     case tok::l_square:
1958       parseSquare();
1959       break;
1960     case tok::kw_class:
1961     case tok::kw_template:
1962     case tok::kw_typename:
1963       assert(FormatTok->Previous);
1964       if (FormatTok->Previous->is(tok::less))
1965         InTemplateParameterList = true;
1966       nextToken();
1967       break;
1968     case tok::amp:
1969     case tok::star:
1970     case tok::kw_const:
1971     case tok::comma:
1972     case tok::less:
1973     case tok::greater:
1974     case tok::identifier:
1975     case tok::numeric_constant:
1976     case tok::coloncolon:
1977     case tok::kw_mutable:
1978     case tok::kw_noexcept:
1979       nextToken();
1980       break;
1981     // Specialization of a template with an integer parameter can contain
1982     // arithmetic, logical, comparison and ternary operators.
1983     //
1984     // FIXME: This also accepts sequences of operators that are not in the scope
1985     // of a template argument list.
1986     //
1987     // In a C++ lambda a template type can only occur after an arrow. We use
1988     // this as an heuristic to distinguish between Objective-C expressions
1989     // followed by an `a->b` expression, such as:
1990     // ([obj func:arg] + a->b)
1991     // Otherwise the code below would parse as a lambda.
1992     //
1993     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1994     // explicit template lists: []<bool b = true && false>(U &&u){}
1995     case tok::plus:
1996     case tok::minus:
1997     case tok::exclaim:
1998     case tok::tilde:
1999     case tok::slash:
2000     case tok::percent:
2001     case tok::lessless:
2002     case tok::pipe:
2003     case tok::pipepipe:
2004     case tok::ampamp:
2005     case tok::caret:
2006     case tok::equalequal:
2007     case tok::exclaimequal:
2008     case tok::greaterequal:
2009     case tok::lessequal:
2010     case tok::question:
2011     case tok::colon:
2012     case tok::ellipsis:
2013     case tok::kw_true:
2014     case tok::kw_false:
2015       if (SeenArrow || InTemplateParameterList) {
2016         nextToken();
2017         break;
2018       }
2019       return true;
2020     case tok::arrow:
2021       // This might or might not actually be a lambda arrow (this could be an
2022       // ObjC method invocation followed by a dereferencing arrow). We might
2023       // reset this back to TT_Unknown in TokenAnnotator.
2024       FormatTok->setFinalizedType(TT_LambdaArrow);
2025       SeenArrow = true;
2026       nextToken();
2027       break;
2028     default:
2029       return true;
2030     }
2031   }
2032   FormatTok->setFinalizedType(TT_LambdaLBrace);
2033   LSquare.setFinalizedType(TT_LambdaLSquare);
2034   parseChildBlock();
2035   return true;
2036 }
2037 
2038 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2039   const FormatToken *Previous = FormatTok->Previous;
2040   if (Previous &&
2041       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2042                          tok::kw_delete, tok::l_square) ||
2043        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
2044        Previous->isSimpleTypeSpecifier())) {
2045     nextToken();
2046     return false;
2047   }
2048   nextToken();
2049   if (FormatTok->is(tok::l_square))
2050     return false;
2051   parseSquare(/*LambdaIntroducer=*/true);
2052   return true;
2053 }
2054 
2055 void UnwrappedLineParser::tryToParseJSFunction() {
2056   assert(FormatTok->is(Keywords.kw_function) ||
2057          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2058   if (FormatTok->is(Keywords.kw_async))
2059     nextToken();
2060   // Consume "function".
2061   nextToken();
2062 
2063   // Consume * (generator function). Treat it like C++'s overloaded operators.
2064   if (FormatTok->is(tok::star)) {
2065     FormatTok->setFinalizedType(TT_OverloadedOperator);
2066     nextToken();
2067   }
2068 
2069   // Consume function name.
2070   if (FormatTok->is(tok::identifier))
2071     nextToken();
2072 
2073   if (FormatTok->isNot(tok::l_paren))
2074     return;
2075 
2076   // Parse formal parameter list.
2077   parseParens();
2078 
2079   if (FormatTok->is(tok::colon)) {
2080     // Parse a type definition.
2081     nextToken();
2082 
2083     // Eat the type declaration. For braced inline object types, balance braces,
2084     // otherwise just parse until finding an l_brace for the function body.
2085     if (FormatTok->is(tok::l_brace))
2086       tryToParseBracedList();
2087     else
2088       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2089         nextToken();
2090   }
2091 
2092   if (FormatTok->is(tok::semi))
2093     return;
2094 
2095   parseChildBlock();
2096 }
2097 
2098 bool UnwrappedLineParser::tryToParseBracedList() {
2099   if (FormatTok->is(BK_Unknown))
2100     calculateBraceTypes();
2101   assert(FormatTok->isNot(BK_Unknown));
2102   if (FormatTok->is(BK_Block))
2103     return false;
2104   nextToken();
2105   parseBracedList();
2106   return true;
2107 }
2108 
2109 bool UnwrappedLineParser::tryToParseChildBlock() {
2110   assert(Style.isJavaScript() || Style.isCSharp());
2111   assert(FormatTok->is(TT_FatArrow));
2112   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2113   // They always start an expression or a child block if followed by a curly
2114   // brace.
2115   nextToken();
2116   if (FormatTok->isNot(tok::l_brace))
2117     return false;
2118   parseChildBlock();
2119   return true;
2120 }
2121 
2122 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2123                                           bool IsEnum,
2124                                           tok::TokenKind ClosingBraceKind) {
2125   bool HasError = false;
2126 
2127   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2128   // replace this by using parseAssignmentExpression() inside.
2129   do {
2130     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2131         tryToParseChildBlock())
2132       continue;
2133     if (Style.isJavaScript()) {
2134       if (FormatTok->is(Keywords.kw_function) ||
2135           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2136         tryToParseJSFunction();
2137         continue;
2138       }
2139       if (FormatTok->is(tok::l_brace)) {
2140         // Could be a method inside of a braced list `{a() { return 1; }}`.
2141         if (tryToParseBracedList())
2142           continue;
2143         parseChildBlock();
2144       }
2145     }
2146     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2147       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2148         addUnwrappedLine();
2149       nextToken();
2150       return !HasError;
2151     }
2152     switch (FormatTok->Tok.getKind()) {
2153     case tok::l_square:
2154       if (Style.isCSharp())
2155         parseSquare();
2156       else
2157         tryToParseLambda();
2158       break;
2159     case tok::l_paren:
2160       parseParens();
2161       // JavaScript can just have free standing methods and getters/setters in
2162       // object literals. Detect them by a "{" following ")".
2163       if (Style.isJavaScript()) {
2164         if (FormatTok->is(tok::l_brace))
2165           parseChildBlock();
2166         break;
2167       }
2168       break;
2169     case tok::l_brace:
2170       // Assume there are no blocks inside a braced init list apart
2171       // from the ones we explicitly parse out (like lambdas).
2172       FormatTok->setBlockKind(BK_BracedInit);
2173       nextToken();
2174       parseBracedList();
2175       break;
2176     case tok::less:
2177       if (Style.Language == FormatStyle::LK_Proto) {
2178         nextToken();
2179         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2180                         /*ClosingBraceKind=*/tok::greater);
2181       } else {
2182         nextToken();
2183       }
2184       break;
2185     case tok::semi:
2186       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2187       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2188       // used for error recovery if we have otherwise determined that this is
2189       // a braced list.
2190       if (Style.isJavaScript()) {
2191         nextToken();
2192         break;
2193       }
2194       HasError = true;
2195       if (!ContinueOnSemicolons)
2196         return !HasError;
2197       nextToken();
2198       break;
2199     case tok::comma:
2200       nextToken();
2201       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2202         addUnwrappedLine();
2203       break;
2204     default:
2205       nextToken();
2206       break;
2207     }
2208   } while (!eof());
2209   return false;
2210 }
2211 
2212 /// \brief Parses a pair of parentheses (and everything between them).
2213 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2214 /// double ampersands. This only counts for the current parens scope.
2215 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2216   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2217   nextToken();
2218   do {
2219     switch (FormatTok->Tok.getKind()) {
2220     case tok::l_paren:
2221       parseParens();
2222       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2223         parseChildBlock();
2224       break;
2225     case tok::r_paren:
2226       nextToken();
2227       return;
2228     case tok::r_brace:
2229       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2230       return;
2231     case tok::l_square:
2232       tryToParseLambda();
2233       break;
2234     case tok::l_brace:
2235       if (!tryToParseBracedList())
2236         parseChildBlock();
2237       break;
2238     case tok::at:
2239       nextToken();
2240       if (FormatTok->is(tok::l_brace)) {
2241         nextToken();
2242         parseBracedList();
2243       }
2244       break;
2245     case tok::equal:
2246       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2247         tryToParseChildBlock();
2248       else
2249         nextToken();
2250       break;
2251     case tok::kw_class:
2252       if (Style.isJavaScript())
2253         parseRecord(/*ParseAsExpr=*/true);
2254       else
2255         nextToken();
2256       break;
2257     case tok::identifier:
2258       if (Style.isJavaScript() &&
2259           (FormatTok->is(Keywords.kw_function) ||
2260            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2261         tryToParseJSFunction();
2262       else
2263         nextToken();
2264       break;
2265     case tok::kw_requires: {
2266       auto RequiresToken = FormatTok;
2267       nextToken();
2268       parseRequiresExpression(RequiresToken);
2269       break;
2270     }
2271     case tok::ampamp:
2272       if (AmpAmpTokenType != TT_Unknown)
2273         FormatTok->setFinalizedType(AmpAmpTokenType);
2274       LLVM_FALLTHROUGH;
2275     default:
2276       nextToken();
2277       break;
2278     }
2279   } while (!eof());
2280 }
2281 
2282 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2283   if (!LambdaIntroducer) {
2284     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2285     if (tryToParseLambda())
2286       return;
2287   }
2288   do {
2289     switch (FormatTok->Tok.getKind()) {
2290     case tok::l_paren:
2291       parseParens();
2292       break;
2293     case tok::r_square:
2294       nextToken();
2295       return;
2296     case tok::r_brace:
2297       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2298       return;
2299     case tok::l_square:
2300       parseSquare();
2301       break;
2302     case tok::l_brace: {
2303       if (!tryToParseBracedList())
2304         parseChildBlock();
2305       break;
2306     }
2307     case tok::at:
2308       nextToken();
2309       if (FormatTok->is(tok::l_brace)) {
2310         nextToken();
2311         parseBracedList();
2312       }
2313       break;
2314     default:
2315       nextToken();
2316       break;
2317     }
2318   } while (!eof());
2319 }
2320 
2321 void UnwrappedLineParser::keepAncestorBraces() {
2322   if (!Style.RemoveBracesLLVM)
2323     return;
2324 
2325   const int MaxNestingLevels = 2;
2326   const int Size = NestedTooDeep.size();
2327   if (Size >= MaxNestingLevels)
2328     NestedTooDeep[Size - MaxNestingLevels] = true;
2329   NestedTooDeep.push_back(false);
2330 }
2331 
2332 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2333   for (const auto &Token : llvm::reverse(Line.Tokens))
2334     if (Token.Tok->isNot(tok::comment))
2335       return Token.Tok;
2336 
2337   return nullptr;
2338 }
2339 
2340 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2341   FormatToken *Tok = nullptr;
2342 
2343   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2344       PreprocessorDirectives.empty()) {
2345     Tok = getLastNonComment(*Line);
2346     assert(Tok);
2347     if (Tok->BraceCount < 0) {
2348       assert(Tok->BraceCount == -1);
2349       Tok = nullptr;
2350     } else {
2351       Tok->BraceCount = -1;
2352     }
2353   }
2354 
2355   addUnwrappedLine();
2356   ++Line->Level;
2357   parseStructuralElement();
2358 
2359   if (Tok) {
2360     assert(!Line->InPPDirective);
2361     Tok = nullptr;
2362     for (const auto &L : llvm::reverse(*CurrentLines)) {
2363       if (!L.InPPDirective && getLastNonComment(L)) {
2364         Tok = L.Tokens.back().Tok;
2365         break;
2366       }
2367     }
2368     assert(Tok);
2369     ++Tok->BraceCount;
2370   }
2371 
2372   if (CheckEOF && FormatTok->is(tok::eof))
2373     addUnwrappedLine();
2374 
2375   --Line->Level;
2376 }
2377 
2378 static void markOptionalBraces(FormatToken *LeftBrace) {
2379   if (!LeftBrace)
2380     return;
2381 
2382   assert(LeftBrace->is(tok::l_brace));
2383 
2384   FormatToken *RightBrace = LeftBrace->MatchingParen;
2385   if (!RightBrace) {
2386     assert(!LeftBrace->Optional);
2387     return;
2388   }
2389 
2390   assert(RightBrace->is(tok::r_brace));
2391   assert(RightBrace->MatchingParen == LeftBrace);
2392   assert(LeftBrace->Optional == RightBrace->Optional);
2393 
2394   LeftBrace->Optional = true;
2395   RightBrace->Optional = true;
2396 }
2397 
2398 void UnwrappedLineParser::handleAttributes() {
2399   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2400   if (FormatTok->is(TT_AttributeMacro))
2401     nextToken();
2402   handleCppAttributes();
2403 }
2404 
2405 bool UnwrappedLineParser::handleCppAttributes() {
2406   // Handle [[likely]] / [[unlikely]] attributes.
2407   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2408     parseSquare();
2409     return true;
2410   }
2411   return false;
2412 }
2413 
2414 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2415                                                   bool KeepBraces) {
2416   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2417   nextToken();
2418   if (FormatTok->is(tok::exclaim))
2419     nextToken();
2420   if (FormatTok->is(tok::kw_consteval)) {
2421     nextToken();
2422   } else {
2423     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2424       nextToken();
2425     if (FormatTok->is(tok::l_paren))
2426       parseParens();
2427   }
2428   handleAttributes();
2429 
2430   bool NeedsUnwrappedLine = false;
2431   keepAncestorBraces();
2432 
2433   FormatToken *IfLeftBrace = nullptr;
2434   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2435 
2436   if (FormatTok->is(tok::l_brace)) {
2437     IfLeftBrace = FormatTok;
2438     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2439     IfBlockKind = parseBlock();
2440     if (Style.BraceWrapping.BeforeElse)
2441       addUnwrappedLine();
2442     else
2443       NeedsUnwrappedLine = true;
2444   } else {
2445     parseUnbracedBody();
2446   }
2447 
2448   bool KeepIfBraces = false;
2449   if (Style.RemoveBracesLLVM) {
2450     assert(!NestedTooDeep.empty());
2451     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2452                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2453                    IfBlockKind == IfStmtKind::IfElseIf;
2454   }
2455 
2456   FormatToken *ElseLeftBrace = nullptr;
2457   IfStmtKind Kind = IfStmtKind::IfOnly;
2458 
2459   if (FormatTok->is(tok::kw_else)) {
2460     if (Style.RemoveBracesLLVM) {
2461       NestedTooDeep.back() = false;
2462       Kind = IfStmtKind::IfElse;
2463     }
2464     nextToken();
2465     handleAttributes();
2466     if (FormatTok->is(tok::l_brace)) {
2467       ElseLeftBrace = FormatTok;
2468       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2469       if (parseBlock() == IfStmtKind::IfOnly)
2470         Kind = IfStmtKind::IfElseIf;
2471       addUnwrappedLine();
2472     } else if (FormatTok->is(tok::kw_if)) {
2473       FormatToken *Previous = Tokens->getPreviousToken();
2474       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2475       if (IsPrecededByComment) {
2476         addUnwrappedLine();
2477         ++Line->Level;
2478       }
2479       bool TooDeep = true;
2480       if (Style.RemoveBracesLLVM) {
2481         Kind = IfStmtKind::IfElseIf;
2482         TooDeep = NestedTooDeep.pop_back_val();
2483       }
2484       ElseLeftBrace =
2485           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2486       if (Style.RemoveBracesLLVM)
2487         NestedTooDeep.push_back(TooDeep);
2488       if (IsPrecededByComment)
2489         --Line->Level;
2490     } else {
2491       parseUnbracedBody(/*CheckEOF=*/true);
2492     }
2493   } else {
2494     if (Style.RemoveBracesLLVM)
2495       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2496     if (NeedsUnwrappedLine)
2497       addUnwrappedLine();
2498   }
2499 
2500   if (!Style.RemoveBracesLLVM)
2501     return nullptr;
2502 
2503   assert(!NestedTooDeep.empty());
2504   const bool KeepElseBraces =
2505       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2506 
2507   NestedTooDeep.pop_back();
2508 
2509   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2510     markOptionalBraces(IfLeftBrace);
2511     markOptionalBraces(ElseLeftBrace);
2512   } else if (IfLeftBrace) {
2513     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2514     if (IfRightBrace) {
2515       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2516       assert(!IfLeftBrace->Optional);
2517       assert(!IfRightBrace->Optional);
2518       IfLeftBrace->MatchingParen = nullptr;
2519       IfRightBrace->MatchingParen = nullptr;
2520     }
2521   }
2522 
2523   if (IfKind)
2524     *IfKind = Kind;
2525 
2526   return IfLeftBrace;
2527 }
2528 
2529 void UnwrappedLineParser::parseTryCatch() {
2530   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2531   nextToken();
2532   bool NeedsUnwrappedLine = false;
2533   if (FormatTok->is(tok::colon)) {
2534     // We are in a function try block, what comes is an initializer list.
2535     nextToken();
2536 
2537     // In case identifiers were removed by clang-tidy, what might follow is
2538     // multiple commas in sequence - before the first identifier.
2539     while (FormatTok->is(tok::comma))
2540       nextToken();
2541 
2542     while (FormatTok->is(tok::identifier)) {
2543       nextToken();
2544       if (FormatTok->is(tok::l_paren))
2545         parseParens();
2546       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2547           FormatTok->is(tok::l_brace)) {
2548         do {
2549           nextToken();
2550         } while (!FormatTok->is(tok::r_brace));
2551         nextToken();
2552       }
2553 
2554       // In case identifiers were removed by clang-tidy, what might follow is
2555       // multiple commas in sequence - after the first identifier.
2556       while (FormatTok->is(tok::comma))
2557         nextToken();
2558     }
2559   }
2560   // Parse try with resource.
2561   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2562     parseParens();
2563 
2564   keepAncestorBraces();
2565 
2566   if (FormatTok->is(tok::l_brace)) {
2567     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2568     parseBlock();
2569     if (Style.BraceWrapping.BeforeCatch)
2570       addUnwrappedLine();
2571     else
2572       NeedsUnwrappedLine = true;
2573   } else if (!FormatTok->is(tok::kw_catch)) {
2574     // The C++ standard requires a compound-statement after a try.
2575     // If there's none, we try to assume there's a structuralElement
2576     // and try to continue.
2577     addUnwrappedLine();
2578     ++Line->Level;
2579     parseStructuralElement();
2580     --Line->Level;
2581   }
2582   while (true) {
2583     if (FormatTok->is(tok::at))
2584       nextToken();
2585     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2586                              tok::kw___finally) ||
2587           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2588            FormatTok->is(Keywords.kw_finally)) ||
2589           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2590            FormatTok->isObjCAtKeyword(tok::objc_finally))))
2591       break;
2592     nextToken();
2593     while (FormatTok->isNot(tok::l_brace)) {
2594       if (FormatTok->is(tok::l_paren)) {
2595         parseParens();
2596         continue;
2597       }
2598       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2599         if (Style.RemoveBracesLLVM)
2600           NestedTooDeep.pop_back();
2601         return;
2602       }
2603       nextToken();
2604     }
2605     NeedsUnwrappedLine = false;
2606     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2607     parseBlock();
2608     if (Style.BraceWrapping.BeforeCatch)
2609       addUnwrappedLine();
2610     else
2611       NeedsUnwrappedLine = true;
2612   }
2613 
2614   if (Style.RemoveBracesLLVM)
2615     NestedTooDeep.pop_back();
2616 
2617   if (NeedsUnwrappedLine)
2618     addUnwrappedLine();
2619 }
2620 
2621 void UnwrappedLineParser::parseNamespace() {
2622   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2623          "'namespace' expected");
2624 
2625   const FormatToken &InitialToken = *FormatTok;
2626   nextToken();
2627   if (InitialToken.is(TT_NamespaceMacro)) {
2628     parseParens();
2629   } else {
2630     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2631                               tok::l_square, tok::period, tok::l_paren) ||
2632            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2633       if (FormatTok->is(tok::l_square))
2634         parseSquare();
2635       else if (FormatTok->is(tok::l_paren))
2636         parseParens();
2637       else
2638         nextToken();
2639   }
2640   if (FormatTok->is(tok::l_brace)) {
2641     if (ShouldBreakBeforeBrace(Style, InitialToken))
2642       addUnwrappedLine();
2643 
2644     unsigned AddLevels =
2645         Style.NamespaceIndentation == FormatStyle::NI_All ||
2646                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2647                  DeclarationScopeStack.size() > 1)
2648             ? 1u
2649             : 0u;
2650     bool ManageWhitesmithsBraces =
2651         AddLevels == 0u &&
2652         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2653 
2654     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2655     // the whole block.
2656     if (ManageWhitesmithsBraces)
2657       ++Line->Level;
2658 
2659     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2660                /*MunchSemi=*/true,
2661                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2662 
2663     // Munch the semicolon after a namespace. This is more common than one would
2664     // think. Putting the semicolon into its own line is very ugly.
2665     if (FormatTok->is(tok::semi))
2666       nextToken();
2667 
2668     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2669 
2670     if (ManageWhitesmithsBraces)
2671       --Line->Level;
2672   }
2673   // FIXME: Add error handling.
2674 }
2675 
2676 void UnwrappedLineParser::parseNew() {
2677   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2678   nextToken();
2679 
2680   if (Style.isCSharp()) {
2681     do {
2682       if (FormatTok->is(tok::l_brace))
2683         parseBracedList();
2684 
2685       if (FormatTok->isOneOf(tok::semi, tok::comma))
2686         return;
2687 
2688       nextToken();
2689     } while (!eof());
2690   }
2691 
2692   if (Style.Language != FormatStyle::LK_Java)
2693     return;
2694 
2695   // In Java, we can parse everything up to the parens, which aren't optional.
2696   do {
2697     // There should not be a ;, { or } before the new's open paren.
2698     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2699       return;
2700 
2701     // Consume the parens.
2702     if (FormatTok->is(tok::l_paren)) {
2703       parseParens();
2704 
2705       // If there is a class body of an anonymous class, consume that as child.
2706       if (FormatTok->is(tok::l_brace))
2707         parseChildBlock();
2708       return;
2709     }
2710     nextToken();
2711   } while (!eof());
2712 }
2713 
2714 void UnwrappedLineParser::parseForOrWhileLoop() {
2715   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2716          "'for', 'while' or foreach macro expected");
2717   nextToken();
2718   // JS' for await ( ...
2719   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2720     nextToken();
2721   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2722     nextToken();
2723   if (FormatTok->is(tok::l_paren))
2724     parseParens();
2725 
2726   keepAncestorBraces();
2727 
2728   if (FormatTok->is(tok::l_brace)) {
2729     FormatToken *LeftBrace = FormatTok;
2730     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2731     parseBlock();
2732     if (Style.RemoveBracesLLVM) {
2733       assert(!NestedTooDeep.empty());
2734       if (!NestedTooDeep.back())
2735         markOptionalBraces(LeftBrace);
2736     }
2737     addUnwrappedLine();
2738   } else {
2739     parseUnbracedBody();
2740   }
2741 
2742   if (Style.RemoveBracesLLVM)
2743     NestedTooDeep.pop_back();
2744 }
2745 
2746 void UnwrappedLineParser::parseDoWhile() {
2747   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2748   nextToken();
2749 
2750   keepAncestorBraces();
2751 
2752   if (FormatTok->is(tok::l_brace)) {
2753     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2754     parseBlock();
2755     if (Style.BraceWrapping.BeforeWhile)
2756       addUnwrappedLine();
2757   } else {
2758     parseUnbracedBody();
2759   }
2760 
2761   if (Style.RemoveBracesLLVM)
2762     NestedTooDeep.pop_back();
2763 
2764   // FIXME: Add error handling.
2765   if (!FormatTok->is(tok::kw_while)) {
2766     addUnwrappedLine();
2767     return;
2768   }
2769 
2770   // If in Whitesmiths mode, the line with the while() needs to be indented
2771   // to the same level as the block.
2772   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2773     ++Line->Level;
2774 
2775   nextToken();
2776   parseStructuralElement();
2777 }
2778 
2779 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2780   nextToken();
2781   unsigned OldLineLevel = Line->Level;
2782   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2783     --Line->Level;
2784   if (LeftAlignLabel)
2785     Line->Level = 0;
2786 
2787   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2788       FormatTok->is(tok::l_brace)) {
2789 
2790     CompoundStatementIndenter Indenter(this, Line->Level,
2791                                        Style.BraceWrapping.AfterCaseLabel,
2792                                        Style.BraceWrapping.IndentBraces);
2793     parseBlock();
2794     if (FormatTok->is(tok::kw_break)) {
2795       if (Style.BraceWrapping.AfterControlStatement ==
2796           FormatStyle::BWACS_Always) {
2797         addUnwrappedLine();
2798         if (!Style.IndentCaseBlocks &&
2799             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2800           ++Line->Level;
2801       }
2802       parseStructuralElement();
2803     }
2804     addUnwrappedLine();
2805   } else {
2806     if (FormatTok->is(tok::semi))
2807       nextToken();
2808     addUnwrappedLine();
2809   }
2810   Line->Level = OldLineLevel;
2811   if (FormatTok->isNot(tok::l_brace)) {
2812     parseStructuralElement();
2813     addUnwrappedLine();
2814   }
2815 }
2816 
2817 void UnwrappedLineParser::parseCaseLabel() {
2818   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2819 
2820   // FIXME: fix handling of complex expressions here.
2821   do {
2822     nextToken();
2823   } while (!eof() && !FormatTok->is(tok::colon));
2824   parseLabel();
2825 }
2826 
2827 void UnwrappedLineParser::parseSwitch() {
2828   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2829   nextToken();
2830   if (FormatTok->is(tok::l_paren))
2831     parseParens();
2832 
2833   keepAncestorBraces();
2834 
2835   if (FormatTok->is(tok::l_brace)) {
2836     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2837     parseBlock();
2838     addUnwrappedLine();
2839   } else {
2840     addUnwrappedLine();
2841     ++Line->Level;
2842     parseStructuralElement();
2843     --Line->Level;
2844   }
2845 
2846   if (Style.RemoveBracesLLVM)
2847     NestedTooDeep.pop_back();
2848 }
2849 
2850 // Operators that can follow a C variable.
2851 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
2852   switch (kind) {
2853   case tok::ampamp:
2854   case tok::ampequal:
2855   case tok::arrow:
2856   case tok::caret:
2857   case tok::caretequal:
2858   case tok::comma:
2859   case tok::ellipsis:
2860   case tok::equal:
2861   case tok::equalequal:
2862   case tok::exclaim:
2863   case tok::exclaimequal:
2864   case tok::greater:
2865   case tok::greaterequal:
2866   case tok::greatergreater:
2867   case tok::greatergreaterequal:
2868   case tok::l_paren:
2869   case tok::l_square:
2870   case tok::less:
2871   case tok::lessequal:
2872   case tok::lessless:
2873   case tok::lesslessequal:
2874   case tok::minus:
2875   case tok::minusequal:
2876   case tok::minusminus:
2877   case tok::percent:
2878   case tok::percentequal:
2879   case tok::period:
2880   case tok::pipe:
2881   case tok::pipeequal:
2882   case tok::pipepipe:
2883   case tok::plus:
2884   case tok::plusequal:
2885   case tok::plusplus:
2886   case tok::question:
2887   case tok::r_brace:
2888   case tok::r_paren:
2889   case tok::r_square:
2890   case tok::semi:
2891   case tok::slash:
2892   case tok::slashequal:
2893   case tok::star:
2894   case tok::starequal:
2895     return true;
2896   default:
2897     return false;
2898   }
2899 }
2900 
2901 void UnwrappedLineParser::parseAccessSpecifier() {
2902   FormatToken *AccessSpecifierCandidate = FormatTok;
2903   nextToken();
2904   // Understand Qt's slots.
2905   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2906     nextToken();
2907   // Otherwise, we don't know what it is, and we'd better keep the next token.
2908   if (FormatTok->is(tok::colon)) {
2909     nextToken();
2910     addUnwrappedLine();
2911   } else if (!FormatTok->is(tok::coloncolon) &&
2912              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
2913     // Not a variable name nor namespace name.
2914     addUnwrappedLine();
2915   } else if (AccessSpecifierCandidate) {
2916     // Consider the access specifier to be a C identifier.
2917     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2918   }
2919 }
2920 
2921 /// \brief Parses a concept definition.
2922 /// \pre The current token has to be the concept keyword.
2923 ///
2924 /// Returns if either the concept has been completely parsed, or if it detects
2925 /// that the concept definition is incorrect.
2926 void UnwrappedLineParser::parseConcept() {
2927   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
2928   nextToken();
2929   if (!FormatTok->is(tok::identifier))
2930     return;
2931   nextToken();
2932   if (!FormatTok->is(tok::equal))
2933     return;
2934   nextToken();
2935   parseConstraintExpression();
2936   if (FormatTok->is(tok::semi))
2937     nextToken();
2938   addUnwrappedLine();
2939 }
2940 
2941 /// \brief Parses a requires, decides if it is a clause or an expression.
2942 /// \pre The current token has to be the requires keyword.
2943 /// \returns true if it parsed a clause.
2944 bool clang::format::UnwrappedLineParser::parseRequires() {
2945   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
2946   auto RequiresToken = FormatTok;
2947 
2948   // We try to guess if it is a requires clause, or a requires expression. For
2949   // that we first consume the keyword and check the next token.
2950   nextToken();
2951 
2952   switch (FormatTok->Tok.getKind()) {
2953   case tok::l_brace:
2954     // This can only be an expression, never a clause.
2955     parseRequiresExpression(RequiresToken);
2956     return false;
2957   case tok::l_paren:
2958     // Clauses and expression can start with a paren, it's unclear what we have.
2959     break;
2960   default:
2961     // All other tokens can only be a clause.
2962     parseRequiresClause(RequiresToken);
2963     return true;
2964   }
2965 
2966   // Looking forward we would have to decide if there are function declaration
2967   // like arguments to the requires expression:
2968   // requires (T t) {
2969   // Or there is a constraint expression for the requires clause:
2970   // requires (C<T> && ...
2971 
2972   // But first let's look behind.
2973   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
2974 
2975   if (!PreviousNonComment ||
2976       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
2977     // If there is no token, or an expression left brace, we are a requires
2978     // clause within a requires expression.
2979     parseRequiresClause(RequiresToken);
2980     return true;
2981   }
2982 
2983   switch (PreviousNonComment->Tok.getKind()) {
2984   case tok::greater:
2985   case tok::r_paren:
2986   case tok::kw_noexcept:
2987   case tok::kw_const:
2988     // This is a requires clause.
2989     parseRequiresClause(RequiresToken);
2990     return true;
2991   case tok::amp:
2992   case tok::ampamp: {
2993     // This can be either:
2994     // if (... && requires (T t) ...)
2995     // Or
2996     // void member(...) && requires (C<T> ...
2997     // We check the one token before that for a const:
2998     // void member(...) const && requires (C<T> ...
2999     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3000     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3001       parseRequiresClause(RequiresToken);
3002       return true;
3003     }
3004     break;
3005   }
3006   default:
3007     // It's an expression.
3008     parseRequiresExpression(RequiresToken);
3009     return false;
3010   }
3011 
3012   // Now we look forward and try to check if the paren content is a parameter
3013   // list. The parameters can be cv-qualified and contain references or
3014   // pointers.
3015   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3016   // of stuff: typename, const, *, &, &&, ::, identifiers.
3017 
3018   int NextTokenOffset = 1;
3019   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3020   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3021     ++NextTokenOffset;
3022     NextToken = Tokens->peekNextToken(NextTokenOffset);
3023   };
3024 
3025   bool FoundType = false;
3026   bool LastWasColonColon = false;
3027   int OpenAngles = 0;
3028 
3029   for (; NextTokenOffset < 50; PeekNext()) {
3030     switch (NextToken->Tok.getKind()) {
3031     case tok::kw_volatile:
3032     case tok::kw_const:
3033     case tok::comma:
3034       parseRequiresExpression(RequiresToken);
3035       return false;
3036     case tok::r_paren:
3037     case tok::pipepipe:
3038       parseRequiresClause(RequiresToken);
3039       return true;
3040     case tok::eof:
3041       // Break out of the loop.
3042       NextTokenOffset = 50;
3043       break;
3044     case tok::coloncolon:
3045       LastWasColonColon = true;
3046       break;
3047     case tok::identifier:
3048       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3049         parseRequiresExpression(RequiresToken);
3050         return false;
3051       }
3052       FoundType = true;
3053       LastWasColonColon = false;
3054       break;
3055     case tok::less:
3056       ++OpenAngles;
3057       break;
3058     case tok::greater:
3059       --OpenAngles;
3060       break;
3061     default:
3062       if (NextToken->isSimpleTypeSpecifier()) {
3063         parseRequiresExpression(RequiresToken);
3064         return false;
3065       }
3066       break;
3067     }
3068   }
3069 
3070   // This seems to be a complicated expression, just assume it's a clause.
3071   parseRequiresClause(RequiresToken);
3072   return true;
3073 }
3074 
3075 /// \brief Parses a requires clause.
3076 /// \param RequiresToken The requires keyword token, which starts this clause.
3077 /// \pre We need to be on the next token after the requires keyword.
3078 /// \sa parseRequiresExpression
3079 ///
3080 /// Returns if it either has finished parsing the clause, or it detects, that
3081 /// the clause is incorrect.
3082 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3083   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3084   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3085 
3086   // If there is no previous token, we are within a requires expression,
3087   // otherwise we will always have the template or function declaration in front
3088   // of it.
3089   bool InRequiresExpression =
3090       !RequiresToken->Previous ||
3091       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3092 
3093   RequiresToken->setFinalizedType(InRequiresExpression
3094                                       ? TT_RequiresClauseInARequiresExpression
3095                                       : TT_RequiresClause);
3096 
3097   parseConstraintExpression();
3098 
3099   if (!InRequiresExpression)
3100     FormatTok->Previous->ClosesRequiresClause = true;
3101 }
3102 
3103 /// \brief Parses a requires expression.
3104 /// \param RequiresToken The requires keyword token, which starts this clause.
3105 /// \pre We need to be on the next token after the requires keyword.
3106 /// \sa parseRequiresClause
3107 ///
3108 /// Returns if it either has finished parsing the expression, or it detects,
3109 /// that the expression is incorrect.
3110 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3111   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3112   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3113 
3114   RequiresToken->setFinalizedType(TT_RequiresExpression);
3115 
3116   if (FormatTok->is(tok::l_paren)) {
3117     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3118     parseParens();
3119   }
3120 
3121   if (FormatTok->is(tok::l_brace)) {
3122     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3123     parseChildBlock(/*CanContainBracedList=*/false,
3124                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3125   }
3126 }
3127 
3128 /// \brief Parses a constraint expression.
3129 ///
3130 /// This is either the definition of a concept, or the body of a requires
3131 /// clause. It returns, when the parsing is complete, or the expression is
3132 /// incorrect.
3133 void UnwrappedLineParser::parseConstraintExpression() {
3134   // The special handling for lambdas is needed since tryToParseLambda() eats a
3135   // token and if a requires expression is the last part of a requires clause
3136   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3137   // not set on the correct token. Thus we need to be aware if we even expect a
3138   // lambda to be possible.
3139   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3140   bool LambdaNextTimeAllowed = true;
3141   do {
3142     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3143 
3144     switch (FormatTok->Tok.getKind()) {
3145     case tok::kw_requires: {
3146       auto RequiresToken = FormatTok;
3147       nextToken();
3148       parseRequiresExpression(RequiresToken);
3149       break;
3150     }
3151 
3152     case tok::l_paren:
3153       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3154       break;
3155 
3156     case tok::l_square:
3157       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3158         return;
3159       break;
3160 
3161     case tok::kw_const:
3162     case tok::semi:
3163     case tok::kw_class:
3164     case tok::kw_struct:
3165     case tok::kw_union:
3166       return;
3167 
3168     case tok::l_brace:
3169       // Potential function body.
3170       return;
3171 
3172     case tok::ampamp:
3173     case tok::pipepipe:
3174       FormatTok->setFinalizedType(TT_BinaryOperator);
3175       nextToken();
3176       LambdaNextTimeAllowed = true;
3177       break;
3178 
3179     case tok::comma:
3180     case tok::comment:
3181       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3182       nextToken();
3183       break;
3184 
3185     case tok::kw_sizeof:
3186     case tok::greater:
3187     case tok::greaterequal:
3188     case tok::greatergreater:
3189     case tok::less:
3190     case tok::lessequal:
3191     case tok::lessless:
3192     case tok::equalequal:
3193     case tok::exclaim:
3194     case tok::exclaimequal:
3195     case tok::plus:
3196     case tok::minus:
3197     case tok::star:
3198     case tok::slash:
3199     case tok::kw_decltype:
3200       LambdaNextTimeAllowed = true;
3201       // Just eat them.
3202       nextToken();
3203       break;
3204 
3205     case tok::numeric_constant:
3206     case tok::coloncolon:
3207     case tok::kw_true:
3208     case tok::kw_false:
3209       // Just eat them.
3210       nextToken();
3211       break;
3212 
3213     case tok::kw_static_cast:
3214     case tok::kw_const_cast:
3215     case tok::kw_reinterpret_cast:
3216     case tok::kw_dynamic_cast:
3217       nextToken();
3218       if (!FormatTok->is(tok::less))
3219         return;
3220 
3221       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3222                       /*ClosingBraceKind=*/tok::greater);
3223       break;
3224 
3225     case tok::kw_bool:
3226       // bool is only allowed if it is directly followed by a paren for a cast:
3227       // concept C = bool(...);
3228       // and bool is the only type, all other types as cast must be inside a
3229       // cast to bool an thus are handled by the other cases.
3230       nextToken();
3231       if (FormatTok->isNot(tok::l_paren))
3232         return;
3233       parseParens();
3234       break;
3235 
3236     default:
3237       if (!FormatTok->Tok.getIdentifierInfo()) {
3238         // Identifiers are part of the default case, we check for more then
3239         // tok::identifier to handle builtin type traits.
3240         return;
3241       }
3242 
3243       // We need to differentiate identifiers for a template deduction guide,
3244       // variables, or function return types (the constraint expression has
3245       // ended before that), and basically all other cases. But it's easier to
3246       // check the other way around.
3247       assert(FormatTok->Previous);
3248       switch (FormatTok->Previous->Tok.getKind()) {
3249       case tok::coloncolon:  // Nested identifier.
3250       case tok::ampamp:      // Start of a function or variable for the
3251       case tok::pipepipe:    // constraint expression.
3252       case tok::kw_requires: // Initial identifier of a requires clause.
3253       case tok::equal:       // Initial identifier of a concept declaration.
3254         break;
3255       default:
3256         return;
3257       }
3258 
3259       // Read identifier with optional template declaration.
3260       nextToken();
3261       if (FormatTok->is(tok::less))
3262         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3263                         /*ClosingBraceKind=*/tok::greater);
3264       break;
3265     }
3266   } while (!eof());
3267 }
3268 
3269 bool UnwrappedLineParser::parseEnum() {
3270   const FormatToken &InitialToken = *FormatTok;
3271 
3272   // Won't be 'enum' for NS_ENUMs.
3273   if (FormatTok->is(tok::kw_enum))
3274     nextToken();
3275 
3276   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3277   // declarations. An "enum" keyword followed by a colon would be a syntax
3278   // error and thus assume it is just an identifier.
3279   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3280     return false;
3281 
3282   // In protobuf, "enum" can be used as a field name.
3283   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3284     return false;
3285 
3286   // Eat up enum class ...
3287   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3288     nextToken();
3289 
3290   while (FormatTok->Tok.getIdentifierInfo() ||
3291          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3292                             tok::greater, tok::comma, tok::question)) {
3293     nextToken();
3294     // We can have macros or attributes in between 'enum' and the enum name.
3295     if (FormatTok->is(tok::l_paren))
3296       parseParens();
3297     if (FormatTok->is(tok::identifier)) {
3298       nextToken();
3299       // If there are two identifiers in a row, this is likely an elaborate
3300       // return type. In Java, this can be "implements", etc.
3301       if (Style.isCpp() && FormatTok->is(tok::identifier))
3302         return false;
3303     }
3304   }
3305 
3306   // Just a declaration or something is wrong.
3307   if (FormatTok->isNot(tok::l_brace))
3308     return true;
3309   FormatTok->setFinalizedType(TT_EnumLBrace);
3310   FormatTok->setBlockKind(BK_Block);
3311 
3312   if (Style.Language == FormatStyle::LK_Java) {
3313     // Java enums are different.
3314     parseJavaEnumBody();
3315     return true;
3316   }
3317   if (Style.Language == FormatStyle::LK_Proto) {
3318     parseBlock(/*MustBeDeclaration=*/true);
3319     return true;
3320   }
3321 
3322   if (!Style.AllowShortEnumsOnASingleLine &&
3323       ShouldBreakBeforeBrace(Style, InitialToken))
3324     addUnwrappedLine();
3325   // Parse enum body.
3326   nextToken();
3327   if (!Style.AllowShortEnumsOnASingleLine) {
3328     addUnwrappedLine();
3329     Line->Level += 1;
3330   }
3331   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3332                                    /*IsEnum=*/true);
3333   if (!Style.AllowShortEnumsOnASingleLine)
3334     Line->Level -= 1;
3335   if (HasError) {
3336     if (FormatTok->is(tok::semi))
3337       nextToken();
3338     addUnwrappedLine();
3339   }
3340   return true;
3341 
3342   // There is no addUnwrappedLine() here so that we fall through to parsing a
3343   // structural element afterwards. Thus, in "enum A {} n, m;",
3344   // "} n, m;" will end up in one unwrapped line.
3345 }
3346 
3347 bool UnwrappedLineParser::parseStructLike() {
3348   // parseRecord falls through and does not yet add an unwrapped line as a
3349   // record declaration or definition can start a structural element.
3350   parseRecord();
3351   // This does not apply to Java, JavaScript and C#.
3352   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3353       Style.isCSharp()) {
3354     if (FormatTok->is(tok::semi))
3355       nextToken();
3356     addUnwrappedLine();
3357     return true;
3358   }
3359   return false;
3360 }
3361 
3362 namespace {
3363 // A class used to set and restore the Token position when peeking
3364 // ahead in the token source.
3365 class ScopedTokenPosition {
3366   unsigned StoredPosition;
3367   FormatTokenSource *Tokens;
3368 
3369 public:
3370   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3371     assert(Tokens && "Tokens expected to not be null");
3372     StoredPosition = Tokens->getPosition();
3373   }
3374 
3375   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3376 };
3377 } // namespace
3378 
3379 // Look to see if we have [[ by looking ahead, if
3380 // its not then rewind to the original position.
3381 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3382   ScopedTokenPosition AutoPosition(Tokens);
3383   FormatToken *Tok = Tokens->getNextToken();
3384   // We already read the first [ check for the second.
3385   if (!Tok->is(tok::l_square))
3386     return false;
3387   // Double check that the attribute is just something
3388   // fairly simple.
3389   while (Tok->isNot(tok::eof)) {
3390     if (Tok->is(tok::r_square))
3391       break;
3392     Tok = Tokens->getNextToken();
3393   }
3394   if (Tok->is(tok::eof))
3395     return false;
3396   Tok = Tokens->getNextToken();
3397   if (!Tok->is(tok::r_square))
3398     return false;
3399   Tok = Tokens->getNextToken();
3400   if (Tok->is(tok::semi))
3401     return false;
3402   return true;
3403 }
3404 
3405 void UnwrappedLineParser::parseJavaEnumBody() {
3406   // Determine whether the enum is simple, i.e. does not have a semicolon or
3407   // constants with class bodies. Simple enums can be formatted like braced
3408   // lists, contracted to a single line, etc.
3409   unsigned StoredPosition = Tokens->getPosition();
3410   bool IsSimple = true;
3411   FormatToken *Tok = Tokens->getNextToken();
3412   while (!Tok->is(tok::eof)) {
3413     if (Tok->is(tok::r_brace))
3414       break;
3415     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3416       IsSimple = false;
3417       break;
3418     }
3419     // FIXME: This will also mark enums with braces in the arguments to enum
3420     // constants as "not simple". This is probably fine in practice, though.
3421     Tok = Tokens->getNextToken();
3422   }
3423   FormatTok = Tokens->setPosition(StoredPosition);
3424 
3425   if (IsSimple) {
3426     nextToken();
3427     parseBracedList();
3428     addUnwrappedLine();
3429     return;
3430   }
3431 
3432   // Parse the body of a more complex enum.
3433   // First add a line for everything up to the "{".
3434   nextToken();
3435   addUnwrappedLine();
3436   ++Line->Level;
3437 
3438   // Parse the enum constants.
3439   while (FormatTok) {
3440     if (FormatTok->is(tok::l_brace)) {
3441       // Parse the constant's class body.
3442       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3443                  /*MunchSemi=*/false);
3444     } else if (FormatTok->is(tok::l_paren)) {
3445       parseParens();
3446     } else if (FormatTok->is(tok::comma)) {
3447       nextToken();
3448       addUnwrappedLine();
3449     } else if (FormatTok->is(tok::semi)) {
3450       nextToken();
3451       addUnwrappedLine();
3452       break;
3453     } else if (FormatTok->is(tok::r_brace)) {
3454       addUnwrappedLine();
3455       break;
3456     } else {
3457       nextToken();
3458     }
3459   }
3460 
3461   // Parse the class body after the enum's ";" if any.
3462   parseLevel(/*HasOpeningBrace=*/true, /*CanContainBracedList=*/true);
3463   nextToken();
3464   --Line->Level;
3465   addUnwrappedLine();
3466 }
3467 
3468 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3469   const FormatToken &InitialToken = *FormatTok;
3470   nextToken();
3471 
3472   // The actual identifier can be a nested name specifier, and in macros
3473   // it is often token-pasted.
3474   // An [[attribute]] can be before the identifier.
3475   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3476                             tok::kw___attribute, tok::kw___declspec,
3477                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3478          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3479           FormatTok->isOneOf(tok::period, tok::comma))) {
3480     if (Style.isJavaScript() &&
3481         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3482       // JavaScript/TypeScript supports inline object types in
3483       // extends/implements positions:
3484       //     class Foo implements {bar: number} { }
3485       nextToken();
3486       if (FormatTok->is(tok::l_brace)) {
3487         tryToParseBracedList();
3488         continue;
3489       }
3490     }
3491     bool IsNonMacroIdentifier =
3492         FormatTok->is(tok::identifier) &&
3493         FormatTok->TokenText != FormatTok->TokenText.upper();
3494     nextToken();
3495     // We can have macros or attributes in between 'class' and the class name.
3496     if (!IsNonMacroIdentifier) {
3497       if (FormatTok->is(tok::l_paren)) {
3498         parseParens();
3499       } else if (FormatTok->is(TT_AttributeSquare)) {
3500         parseSquare();
3501         // Consume the closing TT_AttributeSquare.
3502         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3503           nextToken();
3504       }
3505     }
3506   }
3507 
3508   // Note that parsing away template declarations here leads to incorrectly
3509   // accepting function declarations as record declarations.
3510   // In general, we cannot solve this problem. Consider:
3511   // class A<int> B() {}
3512   // which can be a function definition or a class definition when B() is a
3513   // macro. If we find enough real-world cases where this is a problem, we
3514   // can parse for the 'template' keyword in the beginning of the statement,
3515   // and thus rule out the record production in case there is no template
3516   // (this would still leave us with an ambiguity between template function
3517   // and class declarations).
3518   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3519     do {
3520       if (FormatTok->is(tok::l_brace)) {
3521         calculateBraceTypes(/*ExpectClassBody=*/true);
3522         if (!tryToParseBracedList())
3523           break;
3524       }
3525       if (FormatTok->is(tok::l_square)) {
3526         FormatToken *Previous = FormatTok->Previous;
3527         if (!Previous ||
3528             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3529           // Don't try parsing a lambda if we had a closing parenthesis before,
3530           // it was probably a pointer to an array: int (*)[].
3531           if (!tryToParseLambda())
3532             continue;
3533         } else {
3534           parseSquare();
3535           continue;
3536         }
3537       }
3538       if (FormatTok->is(tok::semi))
3539         return;
3540       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3541         addUnwrappedLine();
3542         nextToken();
3543         parseCSharpGenericTypeConstraint();
3544         break;
3545       }
3546       nextToken();
3547     } while (!eof());
3548   }
3549 
3550   auto GetBraceType = [](const FormatToken &RecordTok) {
3551     switch (RecordTok.Tok.getKind()) {
3552     case tok::kw_class:
3553       return TT_ClassLBrace;
3554     case tok::kw_struct:
3555       return TT_StructLBrace;
3556     case tok::kw_union:
3557       return TT_UnionLBrace;
3558     default:
3559       // Useful for e.g. interface.
3560       return TT_RecordLBrace;
3561     }
3562   };
3563   if (FormatTok->is(tok::l_brace)) {
3564     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3565     if (ParseAsExpr) {
3566       parseChildBlock();
3567     } else {
3568       if (ShouldBreakBeforeBrace(Style, InitialToken))
3569         addUnwrappedLine();
3570 
3571       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3572       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3573     }
3574   }
3575   // There is no addUnwrappedLine() here so that we fall through to parsing a
3576   // structural element afterwards. Thus, in "class A {} n, m;",
3577   // "} n, m;" will end up in one unwrapped line.
3578 }
3579 
3580 void UnwrappedLineParser::parseObjCMethod() {
3581   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3582          "'(' or identifier expected.");
3583   do {
3584     if (FormatTok->is(tok::semi)) {
3585       nextToken();
3586       addUnwrappedLine();
3587       return;
3588     } else if (FormatTok->is(tok::l_brace)) {
3589       if (Style.BraceWrapping.AfterFunction)
3590         addUnwrappedLine();
3591       parseBlock();
3592       addUnwrappedLine();
3593       return;
3594     } else {
3595       nextToken();
3596     }
3597   } while (!eof());
3598 }
3599 
3600 void UnwrappedLineParser::parseObjCProtocolList() {
3601   assert(FormatTok->is(tok::less) && "'<' expected.");
3602   do {
3603     nextToken();
3604     // Early exit in case someone forgot a close angle.
3605     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3606         FormatTok->isObjCAtKeyword(tok::objc_end))
3607       return;
3608   } while (!eof() && FormatTok->isNot(tok::greater));
3609   nextToken(); // Skip '>'.
3610 }
3611 
3612 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3613   do {
3614     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3615       nextToken();
3616       addUnwrappedLine();
3617       break;
3618     }
3619     if (FormatTok->is(tok::l_brace)) {
3620       parseBlock();
3621       // In ObjC interfaces, nothing should be following the "}".
3622       addUnwrappedLine();
3623     } else if (FormatTok->is(tok::r_brace)) {
3624       // Ignore stray "}". parseStructuralElement doesn't consume them.
3625       nextToken();
3626       addUnwrappedLine();
3627     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3628       nextToken();
3629       parseObjCMethod();
3630     } else {
3631       parseStructuralElement();
3632     }
3633   } while (!eof());
3634 }
3635 
3636 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3637   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3638          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3639   nextToken();
3640   nextToken(); // interface name
3641 
3642   // @interface can be followed by a lightweight generic
3643   // specialization list, then either a base class or a category.
3644   if (FormatTok->is(tok::less))
3645     parseObjCLightweightGenerics();
3646   if (FormatTok->is(tok::colon)) {
3647     nextToken();
3648     nextToken(); // base class name
3649     // The base class can also have lightweight generics applied to it.
3650     if (FormatTok->is(tok::less))
3651       parseObjCLightweightGenerics();
3652   } else if (FormatTok->is(tok::l_paren))
3653     // Skip category, if present.
3654     parseParens();
3655 
3656   if (FormatTok->is(tok::less))
3657     parseObjCProtocolList();
3658 
3659   if (FormatTok->is(tok::l_brace)) {
3660     if (Style.BraceWrapping.AfterObjCDeclaration)
3661       addUnwrappedLine();
3662     parseBlock(/*MustBeDeclaration=*/true);
3663   }
3664 
3665   // With instance variables, this puts '}' on its own line.  Without instance
3666   // variables, this ends the @interface line.
3667   addUnwrappedLine();
3668 
3669   parseObjCUntilAtEnd();
3670 }
3671 
3672 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3673   assert(FormatTok->is(tok::less));
3674   // Unlike protocol lists, generic parameterizations support
3675   // nested angles:
3676   //
3677   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3678   //     NSObject <NSCopying, NSSecureCoding>
3679   //
3680   // so we need to count how many open angles we have left.
3681   unsigned NumOpenAngles = 1;
3682   do {
3683     nextToken();
3684     // Early exit in case someone forgot a close angle.
3685     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3686         FormatTok->isObjCAtKeyword(tok::objc_end))
3687       break;
3688     if (FormatTok->is(tok::less))
3689       ++NumOpenAngles;
3690     else if (FormatTok->is(tok::greater)) {
3691       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3692       --NumOpenAngles;
3693     }
3694   } while (!eof() && NumOpenAngles != 0);
3695   nextToken(); // Skip '>'.
3696 }
3697 
3698 // Returns true for the declaration/definition form of @protocol,
3699 // false for the expression form.
3700 bool UnwrappedLineParser::parseObjCProtocol() {
3701   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3702   nextToken();
3703 
3704   if (FormatTok->is(tok::l_paren))
3705     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3706     return false;
3707 
3708   // The definition/declaration form,
3709   // @protocol Foo
3710   // - (int)someMethod;
3711   // @end
3712 
3713   nextToken(); // protocol name
3714 
3715   if (FormatTok->is(tok::less))
3716     parseObjCProtocolList();
3717 
3718   // Check for protocol declaration.
3719   if (FormatTok->is(tok::semi)) {
3720     nextToken();
3721     addUnwrappedLine();
3722     return true;
3723   }
3724 
3725   addUnwrappedLine();
3726   parseObjCUntilAtEnd();
3727   return true;
3728 }
3729 
3730 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3731   bool IsImport = FormatTok->is(Keywords.kw_import);
3732   assert(IsImport || FormatTok->is(tok::kw_export));
3733   nextToken();
3734 
3735   // Consume the "default" in "export default class/function".
3736   if (FormatTok->is(tok::kw_default))
3737     nextToken();
3738 
3739   // Consume "async function", "function" and "default function", so that these
3740   // get parsed as free-standing JS functions, i.e. do not require a trailing
3741   // semicolon.
3742   if (FormatTok->is(Keywords.kw_async))
3743     nextToken();
3744   if (FormatTok->is(Keywords.kw_function)) {
3745     nextToken();
3746     return;
3747   }
3748 
3749   // For imports, `export *`, `export {...}`, consume the rest of the line up
3750   // to the terminating `;`. For everything else, just return and continue
3751   // parsing the structural element, i.e. the declaration or expression for
3752   // `export default`.
3753   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3754       !FormatTok->isStringLiteral())
3755     return;
3756 
3757   while (!eof()) {
3758     if (FormatTok->is(tok::semi))
3759       return;
3760     if (Line->Tokens.empty()) {
3761       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3762       // import statement should terminate.
3763       return;
3764     }
3765     if (FormatTok->is(tok::l_brace)) {
3766       FormatTok->setBlockKind(BK_Block);
3767       nextToken();
3768       parseBracedList();
3769     } else {
3770       nextToken();
3771     }
3772   }
3773 }
3774 
3775 void UnwrappedLineParser::parseStatementMacro() {
3776   nextToken();
3777   if (FormatTok->is(tok::l_paren))
3778     parseParens();
3779   if (FormatTok->is(tok::semi))
3780     nextToken();
3781   addUnwrappedLine();
3782 }
3783 
3784 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3785                                                  StringRef Prefix = "") {
3786   llvm::dbgs() << Prefix << "Line(" << Line.Level
3787                << ", FSC=" << Line.FirstStartColumn << ")"
3788                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3789   for (const auto &Node : Line.Tokens) {
3790     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3791                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3792                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3793   }
3794   for (const auto &Node : Line.Tokens)
3795     for (const auto &ChildNode : Node.Children)
3796       printDebugInfo(ChildNode, "\nChild: ");
3797 
3798   llvm::dbgs() << "\n";
3799 }
3800 
3801 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3802   if (Line->Tokens.empty())
3803     return;
3804   LLVM_DEBUG({
3805     if (CurrentLines == &Lines)
3806       printDebugInfo(*Line);
3807   });
3808 
3809   // If this line closes a block when in Whitesmiths mode, remember that
3810   // information so that the level can be decreased after the line is added.
3811   // This has to happen after the addition of the line since the line itself
3812   // needs to be indented.
3813   bool ClosesWhitesmithsBlock =
3814       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3815       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3816 
3817   CurrentLines->push_back(std::move(*Line));
3818   Line->Tokens.clear();
3819   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3820   Line->FirstStartColumn = 0;
3821 
3822   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3823     --Line->Level;
3824   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3825     CurrentLines->append(
3826         std::make_move_iterator(PreprocessorDirectives.begin()),
3827         std::make_move_iterator(PreprocessorDirectives.end()));
3828     PreprocessorDirectives.clear();
3829   }
3830   // Disconnect the current token from the last token on the previous line.
3831   FormatTok->Previous = nullptr;
3832 }
3833 
3834 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
3835 
3836 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3837   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3838          FormatTok.NewlinesBefore > 0;
3839 }
3840 
3841 // Checks if \p FormatTok is a line comment that continues the line comment
3842 // section on \p Line.
3843 static bool
3844 continuesLineCommentSection(const FormatToken &FormatTok,
3845                             const UnwrappedLine &Line,
3846                             const llvm::Regex &CommentPragmasRegex) {
3847   if (Line.Tokens.empty())
3848     return false;
3849 
3850   StringRef IndentContent = FormatTok.TokenText;
3851   if (FormatTok.TokenText.startswith("//") ||
3852       FormatTok.TokenText.startswith("/*"))
3853     IndentContent = FormatTok.TokenText.substr(2);
3854   if (CommentPragmasRegex.match(IndentContent))
3855     return false;
3856 
3857   // If Line starts with a line comment, then FormatTok continues the comment
3858   // section if its original column is greater or equal to the original start
3859   // column of the line.
3860   //
3861   // Define the min column token of a line as follows: if a line ends in '{' or
3862   // contains a '{' followed by a line comment, then the min column token is
3863   // that '{'. Otherwise, the min column token of the line is the first token of
3864   // the line.
3865   //
3866   // If Line starts with a token other than a line comment, then FormatTok
3867   // continues the comment section if its original column is greater than the
3868   // original start column of the min column token of the line.
3869   //
3870   // For example, the second line comment continues the first in these cases:
3871   //
3872   // // first line
3873   // // second line
3874   //
3875   // and:
3876   //
3877   // // first line
3878   //  // second line
3879   //
3880   // and:
3881   //
3882   // int i; // first line
3883   //  // second line
3884   //
3885   // and:
3886   //
3887   // do { // first line
3888   //      // second line
3889   //   int i;
3890   // } while (true);
3891   //
3892   // and:
3893   //
3894   // enum {
3895   //   a, // first line
3896   //    // second line
3897   //   b
3898   // };
3899   //
3900   // The second line comment doesn't continue the first in these cases:
3901   //
3902   //   // first line
3903   //  // second line
3904   //
3905   // and:
3906   //
3907   // int i; // first line
3908   // // second line
3909   //
3910   // and:
3911   //
3912   // do { // first line
3913   //   // second line
3914   //   int i;
3915   // } while (true);
3916   //
3917   // and:
3918   //
3919   // enum {
3920   //   a, // first line
3921   //   // second line
3922   // };
3923   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3924 
3925   // Scan for '{//'. If found, use the column of '{' as a min column for line
3926   // comment section continuation.
3927   const FormatToken *PreviousToken = nullptr;
3928   for (const UnwrappedLineNode &Node : Line.Tokens) {
3929     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3930         isLineComment(*Node.Tok)) {
3931       MinColumnToken = PreviousToken;
3932       break;
3933     }
3934     PreviousToken = Node.Tok;
3935 
3936     // Grab the last newline preceding a token in this unwrapped line.
3937     if (Node.Tok->NewlinesBefore > 0)
3938       MinColumnToken = Node.Tok;
3939   }
3940   if (PreviousToken && PreviousToken->is(tok::l_brace))
3941     MinColumnToken = PreviousToken;
3942 
3943   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3944                               MinColumnToken);
3945 }
3946 
3947 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3948   bool JustComments = Line->Tokens.empty();
3949   for (FormatToken *Tok : CommentsBeforeNextToken) {
3950     // Line comments that belong to the same line comment section are put on the
3951     // same line since later we might want to reflow content between them.
3952     // Additional fine-grained breaking of line comment sections is controlled
3953     // by the class BreakableLineCommentSection in case it is desirable to keep
3954     // several line comment sections in the same unwrapped line.
3955     //
3956     // FIXME: Consider putting separate line comment sections as children to the
3957     // unwrapped line instead.
3958     Tok->ContinuesLineCommentSection =
3959         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3960     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3961       addUnwrappedLine();
3962     pushToken(Tok);
3963   }
3964   if (NewlineBeforeNext && JustComments)
3965     addUnwrappedLine();
3966   CommentsBeforeNextToken.clear();
3967 }
3968 
3969 void UnwrappedLineParser::nextToken(int LevelDifference) {
3970   if (eof())
3971     return;
3972   flushComments(isOnNewLine(*FormatTok));
3973   pushToken(FormatTok);
3974   FormatToken *Previous = FormatTok;
3975   if (!Style.isJavaScript())
3976     readToken(LevelDifference);
3977   else
3978     readTokenWithJavaScriptASI();
3979   FormatTok->Previous = Previous;
3980 }
3981 
3982 void UnwrappedLineParser::distributeComments(
3983     const SmallVectorImpl<FormatToken *> &Comments,
3984     const FormatToken *NextTok) {
3985   // Whether or not a line comment token continues a line is controlled by
3986   // the method continuesLineCommentSection, with the following caveat:
3987   //
3988   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3989   // that each comment line from the trail is aligned with the next token, if
3990   // the next token exists. If a trail exists, the beginning of the maximal
3991   // trail is marked as a start of a new comment section.
3992   //
3993   // For example in this code:
3994   //
3995   // int a; // line about a
3996   //   // line 1 about b
3997   //   // line 2 about b
3998   //   int b;
3999   //
4000   // the two lines about b form a maximal trail, so there are two sections, the
4001   // first one consisting of the single comment "// line about a" and the
4002   // second one consisting of the next two comments.
4003   if (Comments.empty())
4004     return;
4005   bool ShouldPushCommentsInCurrentLine = true;
4006   bool HasTrailAlignedWithNextToken = false;
4007   unsigned StartOfTrailAlignedWithNextToken = 0;
4008   if (NextTok) {
4009     // We are skipping the first element intentionally.
4010     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4011       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4012         HasTrailAlignedWithNextToken = true;
4013         StartOfTrailAlignedWithNextToken = i;
4014       }
4015     }
4016   }
4017   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4018     FormatToken *FormatTok = Comments[i];
4019     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4020       FormatTok->ContinuesLineCommentSection = false;
4021     } else {
4022       FormatTok->ContinuesLineCommentSection =
4023           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4024     }
4025     if (!FormatTok->ContinuesLineCommentSection &&
4026         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
4027       ShouldPushCommentsInCurrentLine = false;
4028     if (ShouldPushCommentsInCurrentLine)
4029       pushToken(FormatTok);
4030     else
4031       CommentsBeforeNextToken.push_back(FormatTok);
4032   }
4033 }
4034 
4035 void UnwrappedLineParser::readToken(int LevelDifference) {
4036   SmallVector<FormatToken *, 1> Comments;
4037   bool PreviousWasComment = false;
4038   bool FirstNonCommentOnLine = false;
4039   do {
4040     FormatTok = Tokens->getNextToken();
4041     assert(FormatTok);
4042     while (FormatTok->getType() == TT_ConflictStart ||
4043            FormatTok->getType() == TT_ConflictEnd ||
4044            FormatTok->getType() == TT_ConflictAlternative) {
4045       if (FormatTok->getType() == TT_ConflictStart)
4046         conditionalCompilationStart(/*Unreachable=*/false);
4047       else if (FormatTok->getType() == TT_ConflictAlternative)
4048         conditionalCompilationAlternative();
4049       else if (FormatTok->getType() == TT_ConflictEnd)
4050         conditionalCompilationEnd();
4051       FormatTok = Tokens->getNextToken();
4052       FormatTok->MustBreakBefore = true;
4053     }
4054 
4055     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4056                                       const FormatToken &Tok,
4057                                       bool PreviousWasComment) {
4058       auto IsFirstOnLine = [](const FormatToken &Tok) {
4059         return Tok.HasUnescapedNewline || Tok.IsFirst;
4060       };
4061 
4062       // Consider preprocessor directives preceded by block comments as first
4063       // on line.
4064       if (PreviousWasComment)
4065         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4066       return IsFirstOnLine(Tok);
4067     };
4068 
4069     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4070         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4071     PreviousWasComment = FormatTok->is(tok::comment);
4072 
4073     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4074            FirstNonCommentOnLine) {
4075       distributeComments(Comments, FormatTok);
4076       Comments.clear();
4077       // If there is an unfinished unwrapped line, we flush the preprocessor
4078       // directives only after that unwrapped line was finished later.
4079       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4080       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4081       assert((LevelDifference >= 0 ||
4082               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4083              "LevelDifference makes Line->Level negative");
4084       Line->Level += LevelDifference;
4085       // Comments stored before the preprocessor directive need to be output
4086       // before the preprocessor directive, at the same level as the
4087       // preprocessor directive, as we consider them to apply to the directive.
4088       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4089           PPBranchLevel > 0)
4090         Line->Level += PPBranchLevel;
4091       flushComments(isOnNewLine(*FormatTok));
4092       parsePPDirective();
4093       PreviousWasComment = FormatTok->is(tok::comment);
4094       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4095           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4096     }
4097 
4098     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4099         !Line->InPPDirective)
4100       continue;
4101 
4102     if (!FormatTok->is(tok::comment)) {
4103       distributeComments(Comments, FormatTok);
4104       Comments.clear();
4105       return;
4106     }
4107 
4108     Comments.push_back(FormatTok);
4109   } while (!eof());
4110 
4111   distributeComments(Comments, nullptr);
4112   Comments.clear();
4113 }
4114 
4115 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4116   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4117   if (MustBreakBeforeNextToken) {
4118     Line->Tokens.back().Tok->MustBreakBefore = true;
4119     MustBreakBeforeNextToken = false;
4120   }
4121 }
4122 
4123 } // end namespace format
4124 } // end namespace clang
4125