1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 #include <utility>
23 
24 #define DEBUG_TYPE "format-parser"
25 
26 namespace clang {
27 namespace format {
28 
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   virtual FormatToken *getPreviousToken() = 0;
39 
40   // Returns the token that would be returned by the next call to
41   // getNextToken().
42   virtual FormatToken *peekNextToken() = 0;
43 
44   // Returns the token that would be returned after the next N calls to
45   // getNextToken(). N needs to be greater than zero, and small enough that
46   // there are still tokens. Check for tok::eof with N-1 before calling it with
47   // N.
48   virtual FormatToken *peekNextToken(int N) = 0;
49 
50   // Returns whether we are at the end of the file.
51   // This can be different from whether getNextToken() returned an eof token
52   // when the FormatTokenSource is a view on a part of the token stream.
53   virtual bool isEOF() = 0;
54 
55   // Gets the current position in the token stream, to be used by setPosition().
56   virtual unsigned getPosition() = 0;
57 
58   // Resets the token stream to the state it was in when getPosition() returned
59   // Position, and return the token at that position in the stream.
60   virtual FormatToken *setPosition(unsigned Position) = 0;
61 };
62 
63 namespace {
64 
65 class ScopedDeclarationState {
66 public:
67   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
68                          bool MustBeDeclaration)
69       : Line(Line), Stack(Stack) {
70     Line.MustBeDeclaration = MustBeDeclaration;
71     Stack.push_back(MustBeDeclaration);
72   }
73   ~ScopedDeclarationState() {
74     Stack.pop_back();
75     if (!Stack.empty())
76       Line.MustBeDeclaration = Stack.back();
77     else
78       Line.MustBeDeclaration = true;
79   }
80 
81 private:
82   UnwrappedLine &Line;
83   llvm::BitVector &Stack;
84 };
85 
86 static bool isLineComment(const FormatToken &FormatTok) {
87   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
88 }
89 
90 // Checks if \p FormatTok is a line comment that continues the line comment
91 // \p Previous. The original column of \p MinColumnToken is used to determine
92 // whether \p FormatTok is indented enough to the right to continue \p Previous.
93 static bool continuesLineComment(const FormatToken &FormatTok,
94                                  const FormatToken *Previous,
95                                  const FormatToken *MinColumnToken) {
96   if (!Previous || !MinColumnToken)
97     return false;
98   unsigned MinContinueColumn =
99       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
100   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
101          isLineComment(*Previous) &&
102          FormatTok.OriginalColumn >= MinContinueColumn;
103 }
104 
105 class ScopedMacroState : public FormatTokenSource {
106 public:
107   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
108                    FormatToken *&ResetToken)
109       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
110         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
111         Token(nullptr), PreviousToken(nullptr) {
112     FakeEOF.Tok.startToken();
113     FakeEOF.Tok.setKind(tok::eof);
114     TokenSource = this;
115     Line.Level = 0;
116     Line.InPPDirective = true;
117   }
118 
119   ~ScopedMacroState() override {
120     TokenSource = PreviousTokenSource;
121     ResetToken = Token;
122     Line.InPPDirective = false;
123     Line.Level = PreviousLineLevel;
124   }
125 
126   FormatToken *getNextToken() override {
127     // The \c UnwrappedLineParser guards against this by never calling
128     // \c getNextToken() after it has encountered the first eof token.
129     assert(!eof());
130     PreviousToken = Token;
131     Token = PreviousTokenSource->getNextToken();
132     if (eof())
133       return &FakeEOF;
134     return Token;
135   }
136 
137   FormatToken *getPreviousToken() override {
138     return PreviousTokenSource->getPreviousToken();
139   }
140 
141   FormatToken *peekNextToken() override {
142     if (eof())
143       return &FakeEOF;
144     return PreviousTokenSource->peekNextToken();
145   }
146 
147   FormatToken *peekNextToken(int N) override {
148     assert(N > 0);
149     if (eof())
150       return &FakeEOF;
151     return PreviousTokenSource->peekNextToken(N);
152   }
153 
154   bool isEOF() override { return PreviousTokenSource->isEOF(); }
155 
156   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
157 
158   FormatToken *setPosition(unsigned Position) override {
159     PreviousToken = nullptr;
160     Token = PreviousTokenSource->setPosition(Position);
161     return Token;
162   }
163 
164 private:
165   bool eof() {
166     return Token && Token->HasUnescapedNewline &&
167            !continuesLineComment(*Token, PreviousToken,
168                                  /*MinColumnToken=*/PreviousToken);
169   }
170 
171   FormatToken FakeEOF;
172   UnwrappedLine &Line;
173   FormatTokenSource *&TokenSource;
174   FormatToken *&ResetToken;
175   unsigned PreviousLineLevel;
176   FormatTokenSource *PreviousTokenSource;
177 
178   FormatToken *Token;
179   FormatToken *PreviousToken;
180 };
181 
182 } // end anonymous namespace
183 
184 class ScopedLineState {
185 public:
186   ScopedLineState(UnwrappedLineParser &Parser,
187                   bool SwitchToPreprocessorLines = false)
188       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
189     if (SwitchToPreprocessorLines)
190       Parser.CurrentLines = &Parser.PreprocessorDirectives;
191     else if (!Parser.Line->Tokens.empty())
192       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
193     PreBlockLine = std::move(Parser.Line);
194     Parser.Line = std::make_unique<UnwrappedLine>();
195     Parser.Line->Level = PreBlockLine->Level;
196     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
197   }
198 
199   ~ScopedLineState() {
200     if (!Parser.Line->Tokens.empty())
201       Parser.addUnwrappedLine();
202     assert(Parser.Line->Tokens.empty());
203     Parser.Line = std::move(PreBlockLine);
204     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
205       Parser.MustBreakBeforeNextToken = true;
206     Parser.CurrentLines = OriginalLines;
207   }
208 
209 private:
210   UnwrappedLineParser &Parser;
211 
212   std::unique_ptr<UnwrappedLine> PreBlockLine;
213   SmallVectorImpl<UnwrappedLine> *OriginalLines;
214 };
215 
216 class CompoundStatementIndenter {
217 public:
218   CompoundStatementIndenter(UnwrappedLineParser *Parser,
219                             const FormatStyle &Style, unsigned &LineLevel)
220       : CompoundStatementIndenter(Parser, LineLevel,
221                                   Style.BraceWrapping.AfterControlStatement,
222                                   Style.BraceWrapping.IndentBraces) {}
223   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
224                             bool WrapBrace, bool IndentBrace)
225       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
226     if (WrapBrace)
227       Parser->addUnwrappedLine();
228     if (IndentBrace)
229       ++LineLevel;
230   }
231   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
232 
233 private:
234   unsigned &LineLevel;
235   unsigned OldLineLevel;
236 };
237 
238 namespace {
239 
240 class IndexedTokenSource : public FormatTokenSource {
241 public:
242   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
243       : Tokens(Tokens), Position(-1) {}
244 
245   FormatToken *getNextToken() override {
246     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
247       LLVM_DEBUG({
248         llvm::dbgs() << "Next ";
249         dbgToken(Position);
250       });
251       return Tokens[Position];
252     }
253     ++Position;
254     LLVM_DEBUG({
255       llvm::dbgs() << "Next ";
256       dbgToken(Position);
257     });
258     return Tokens[Position];
259   }
260 
261   FormatToken *getPreviousToken() override {
262     return Position > 0 ? Tokens[Position - 1] : nullptr;
263   }
264 
265   FormatToken *peekNextToken() override {
266     int Next = Position + 1;
267     LLVM_DEBUG({
268       llvm::dbgs() << "Peeking ";
269       dbgToken(Next);
270     });
271     return Tokens[Next];
272   }
273 
274   FormatToken *peekNextToken(int N) override {
275     assert(N > 0);
276     int Next = Position + N;
277     LLVM_DEBUG({
278       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
279       dbgToken(Next);
280     });
281     return Tokens[Next];
282   }
283 
284   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
285 
286   unsigned getPosition() override {
287     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
288     assert(Position >= 0);
289     return Position;
290   }
291 
292   FormatToken *setPosition(unsigned P) override {
293     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
294     Position = P;
295     return Tokens[Position];
296   }
297 
298   void reset() { Position = -1; }
299 
300 private:
301   void dbgToken(int Position, llvm::StringRef Indent = "") {
302     FormatToken *Tok = Tokens[Position];
303     llvm::dbgs() << Indent << "[" << Position
304                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
305                  << ", Macro: " << !!Tok->MacroCtx << "\n";
306   }
307 
308   ArrayRef<FormatToken *> Tokens;
309   int Position;
310 };
311 
312 } // end anonymous namespace
313 
314 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
315                                          const AdditionalKeywords &Keywords,
316                                          unsigned FirstStartColumn,
317                                          ArrayRef<FormatToken *> Tokens,
318                                          UnwrappedLineConsumer &Callback)
319     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
320       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
321       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
322       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
323       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
324                        ? IG_Rejected
325                        : IG_Inited),
326       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
327 
328 void UnwrappedLineParser::reset() {
329   PPBranchLevel = -1;
330   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
331                      ? IG_Rejected
332                      : IG_Inited;
333   IncludeGuardToken = nullptr;
334   Line.reset(new UnwrappedLine);
335   CommentsBeforeNextToken.clear();
336   FormatTok = nullptr;
337   MustBreakBeforeNextToken = false;
338   PreprocessorDirectives.clear();
339   CurrentLines = &Lines;
340   DeclarationScopeStack.clear();
341   NestedTooDeep.clear();
342   PPStack.clear();
343   Line->FirstStartColumn = FirstStartColumn;
344 }
345 
346 void UnwrappedLineParser::parse() {
347   IndexedTokenSource TokenSource(AllTokens);
348   Line->FirstStartColumn = FirstStartColumn;
349   do {
350     LLVM_DEBUG(llvm::dbgs() << "----\n");
351     reset();
352     Tokens = &TokenSource;
353     TokenSource.reset();
354 
355     readToken();
356     parseFile();
357 
358     // If we found an include guard then all preprocessor directives (other than
359     // the guard) are over-indented by one.
360     if (IncludeGuard == IG_Found)
361       for (auto &Line : Lines)
362         if (Line.InPPDirective && Line.Level > 0)
363           --Line.Level;
364 
365     // Create line with eof token.
366     pushToken(FormatTok);
367     addUnwrappedLine();
368 
369     for (const UnwrappedLine &Line : Lines)
370       Callback.consumeUnwrappedLine(Line);
371 
372     Callback.finishRun();
373     Lines.clear();
374     while (!PPLevelBranchIndex.empty() &&
375            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
376       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
377       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
378     }
379     if (!PPLevelBranchIndex.empty()) {
380       ++PPLevelBranchIndex.back();
381       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
382       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
383     }
384   } while (!PPLevelBranchIndex.empty());
385 }
386 
387 void UnwrappedLineParser::parseFile() {
388   // The top-level context in a file always has declarations, except for pre-
389   // processor directives and JavaScript files.
390   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
391   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
392                                           MustBeDeclaration);
393   if (Style.Language == FormatStyle::LK_TextProto)
394     parseBracedList();
395   else
396     parseLevel(/*HasOpeningBrace=*/false, /*CanContainBracedList=*/true);
397   // Make sure to format the remaining tokens.
398   //
399   // LK_TextProto is special since its top-level is parsed as the body of a
400   // braced list, which does not necessarily have natural line separators such
401   // as a semicolon. Comments after the last entry that have been determined to
402   // not belong to that line, as in:
403   //   key: value
404   //   // endfile comment
405   // do not have a chance to be put on a line of their own until this point.
406   // Here we add this newline before end-of-file comments.
407   if (Style.Language == FormatStyle::LK_TextProto &&
408       !CommentsBeforeNextToken.empty())
409     addUnwrappedLine();
410   flushComments(true);
411   addUnwrappedLine();
412 }
413 
414 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
415   do {
416     switch (FormatTok->Tok.getKind()) {
417     case tok::l_brace:
418       return;
419     default:
420       if (FormatTok->is(Keywords.kw_where)) {
421         addUnwrappedLine();
422         nextToken();
423         parseCSharpGenericTypeConstraint();
424         break;
425       }
426       nextToken();
427       break;
428     }
429   } while (!eof());
430 }
431 
432 void UnwrappedLineParser::parseCSharpAttribute() {
433   int UnpairedSquareBrackets = 1;
434   do {
435     switch (FormatTok->Tok.getKind()) {
436     case tok::r_square:
437       nextToken();
438       --UnpairedSquareBrackets;
439       if (UnpairedSquareBrackets == 0) {
440         addUnwrappedLine();
441         return;
442       }
443       break;
444     case tok::l_square:
445       ++UnpairedSquareBrackets;
446       nextToken();
447       break;
448     default:
449       nextToken();
450       break;
451     }
452   } while (!eof());
453 }
454 
455 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
456   if (!Lines.empty() && Lines.back().InPPDirective)
457     return true;
458 
459   const FormatToken *Previous = Tokens->getPreviousToken();
460   return Previous && Previous->is(tok::comment) &&
461          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
462 }
463 /// \brief Parses a level, that is ???.
464 /// \param HasOpeningBrace If that level is started by an opening brace.
465 /// \param CanContainBracedList If the content can contain (at any level) a
466 /// braced list.
467 /// \param NextLBracesType The type for left brace found in this level.
468 /// \returns true if a simple block, or false otherwise. (A simple block has a
469 /// single statement.)
470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace,
471                                      bool CanContainBracedList,
472                                      IfStmtKind *IfKind,
473                                      TokenType NextLBracesType) {
474   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
475                                   ? TT_BracedListLBrace
476                                   : TT_Unknown;
477   const bool IsPrecededByCommentOrPPDirective =
478       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
479   bool HasLabel = false;
480   unsigned StatementCount = 0;
481   bool SwitchLabelEncountered = false;
482   do {
483     if (FormatTok->getType() == TT_AttributeMacro) {
484       nextToken();
485       continue;
486     }
487     tok::TokenKind kind = FormatTok->Tok.getKind();
488     if (FormatTok->getType() == TT_MacroBlockBegin)
489       kind = tok::l_brace;
490     else if (FormatTok->getType() == TT_MacroBlockEnd)
491       kind = tok::r_brace;
492 
493     auto ParseDefault = [this, HasOpeningBrace, IfKind, NextLevelLBracesType,
494                          &HasLabel, &StatementCount] {
495       parseStructuralElement(IfKind, !HasOpeningBrace, NextLevelLBracesType,
496                              HasLabel ? nullptr : &HasLabel);
497       ++StatementCount;
498       assert(StatementCount > 0 && "StatementCount overflow!");
499     };
500 
501     switch (kind) {
502     case tok::comment:
503       nextToken();
504       addUnwrappedLine();
505       break;
506     case tok::l_brace:
507       if (NextLBracesType != TT_Unknown)
508         FormatTok->setFinalizedType(NextLBracesType);
509       else if (FormatTok->Previous &&
510                FormatTok->Previous->ClosesRequiresClause) {
511         // We need the 'default' case here to correctly parse a function
512         // l_brace.
513         ParseDefault();
514         continue;
515       }
516       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
517           tryToParseBracedList())
518         continue;
519       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
520                  /*MunchSemi=*/true, /*UnindentWhitesmithBraces=*/false,
521                  CanContainBracedList,
522                  /*NextLBracesType=*/NextLBracesType);
523       ++StatementCount;
524       assert(StatementCount > 0 && "StatementCount overflow!");
525       addUnwrappedLine();
526       break;
527     case tok::r_brace:
528       if (HasOpeningBrace) {
529         if (!Style.RemoveBracesLLVM)
530           return false;
531         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
532             IsPrecededByCommentOrPPDirective ||
533             precededByCommentOrPPDirective())
534           return false;
535         const FormatToken *Next = Tokens->peekNextToken();
536         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
537       }
538       nextToken();
539       addUnwrappedLine();
540       break;
541     case tok::kw_default: {
542       unsigned StoredPosition = Tokens->getPosition();
543       FormatToken *Next;
544       do {
545         Next = Tokens->getNextToken();
546         assert(Next);
547       } while (Next->is(tok::comment));
548       FormatTok = Tokens->setPosition(StoredPosition);
549       if (Next->isNot(tok::colon)) {
550         // default not followed by ':' is not a case label; treat it like
551         // an identifier.
552         parseStructuralElement();
553         break;
554       }
555       // Else, if it is 'default:', fall through to the case handling.
556       LLVM_FALLTHROUGH;
557     }
558     case tok::kw_case:
559       if (Style.isJavaScript() && Line->MustBeDeclaration) {
560         // A 'case: string' style field declaration.
561         parseStructuralElement();
562         break;
563       }
564       if (!SwitchLabelEncountered &&
565           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
566         ++Line->Level;
567       SwitchLabelEncountered = true;
568       parseStructuralElement();
569       break;
570     case tok::l_square:
571       if (Style.isCSharp()) {
572         nextToken();
573         parseCSharpAttribute();
574         break;
575       }
576       if (handleCppAttributes())
577         break;
578       LLVM_FALLTHROUGH;
579     default:
580       ParseDefault();
581       break;
582     }
583   } while (!eof());
584   return false;
585 }
586 
587 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
588   // We'll parse forward through the tokens until we hit
589   // a closing brace or eof - note that getNextToken() will
590   // parse macros, so this will magically work inside macro
591   // definitions, too.
592   unsigned StoredPosition = Tokens->getPosition();
593   FormatToken *Tok = FormatTok;
594   const FormatToken *PrevTok = Tok->Previous;
595   // Keep a stack of positions of lbrace tokens. We will
596   // update information about whether an lbrace starts a
597   // braced init list or a different block during the loop.
598   SmallVector<FormatToken *, 8> LBraceStack;
599   assert(Tok->is(tok::l_brace));
600   do {
601     // Get next non-comment token.
602     FormatToken *NextTok;
603     unsigned ReadTokens = 0;
604     do {
605       NextTok = Tokens->getNextToken();
606       ++ReadTokens;
607     } while (NextTok->is(tok::comment));
608 
609     switch (Tok->Tok.getKind()) {
610     case tok::l_brace:
611       if (Style.isJavaScript() && PrevTok) {
612         if (PrevTok->isOneOf(tok::colon, tok::less))
613           // A ':' indicates this code is in a type, or a braced list
614           // following a label in an object literal ({a: {b: 1}}).
615           // A '<' could be an object used in a comparison, but that is nonsense
616           // code (can never return true), so more likely it is a generic type
617           // argument (`X<{a: string; b: number}>`).
618           // The code below could be confused by semicolons between the
619           // individual members in a type member list, which would normally
620           // trigger BK_Block. In both cases, this must be parsed as an inline
621           // braced init.
622           Tok->setBlockKind(BK_BracedInit);
623         else if (PrevTok->is(tok::r_paren))
624           // `) { }` can only occur in function or method declarations in JS.
625           Tok->setBlockKind(BK_Block);
626       } else {
627         Tok->setBlockKind(BK_Unknown);
628       }
629       LBraceStack.push_back(Tok);
630       break;
631     case tok::r_brace:
632       if (LBraceStack.empty())
633         break;
634       if (LBraceStack.back()->is(BK_Unknown)) {
635         bool ProbablyBracedList = false;
636         if (Style.Language == FormatStyle::LK_Proto) {
637           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
638         } else {
639           // Skip NextTok over preprocessor lines, otherwise we may not
640           // properly diagnose the block as a braced intializer
641           // if the comma separator appears after the pp directive.
642           while (NextTok->is(tok::hash)) {
643             ScopedMacroState MacroState(*Line, Tokens, NextTok);
644             do {
645               NextTok = Tokens->getNextToken();
646               ++ReadTokens;
647             } while (NextTok->isNot(tok::eof));
648           }
649 
650           // Using OriginalColumn to distinguish between ObjC methods and
651           // binary operators is a bit hacky.
652           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
653                                   NextTok->OriginalColumn == 0;
654 
655           // Try to detect a braced list. Note that regardless how we mark inner
656           // braces here, we will overwrite the BlockKind later if we parse a
657           // braced list (where all blocks inside are by default braced lists),
658           // or when we explicitly detect blocks (for example while parsing
659           // lambdas).
660 
661           // If we already marked the opening brace as braced list, the closing
662           // must also be part of it.
663           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
664 
665           ProbablyBracedList = ProbablyBracedList ||
666                                (Style.isJavaScript() &&
667                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
668                                                  Keywords.kw_as));
669           ProbablyBracedList = ProbablyBracedList ||
670                                (Style.isCpp() && NextTok->is(tok::l_paren));
671 
672           // If there is a comma, semicolon or right paren after the closing
673           // brace, we assume this is a braced initializer list.
674           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
675           // braced list in JS.
676           ProbablyBracedList =
677               ProbablyBracedList ||
678               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
679                                tok::r_paren, tok::r_square, tok::l_brace,
680                                tok::ellipsis);
681 
682           ProbablyBracedList =
683               ProbablyBracedList ||
684               (NextTok->is(tok::identifier) &&
685                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
686 
687           ProbablyBracedList = ProbablyBracedList ||
688                                (NextTok->is(tok::semi) &&
689                                 (!ExpectClassBody || LBraceStack.size() != 1));
690 
691           ProbablyBracedList =
692               ProbablyBracedList ||
693               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
694 
695           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
696             // We can have an array subscript after a braced init
697             // list, but C++11 attributes are expected after blocks.
698             NextTok = Tokens->getNextToken();
699             ++ReadTokens;
700             ProbablyBracedList = NextTok->isNot(tok::l_square);
701           }
702         }
703         if (ProbablyBracedList) {
704           Tok->setBlockKind(BK_BracedInit);
705           LBraceStack.back()->setBlockKind(BK_BracedInit);
706         } else {
707           Tok->setBlockKind(BK_Block);
708           LBraceStack.back()->setBlockKind(BK_Block);
709         }
710       }
711       LBraceStack.pop_back();
712       break;
713     case tok::identifier:
714       if (!Tok->is(TT_StatementMacro))
715         break;
716       LLVM_FALLTHROUGH;
717     case tok::at:
718     case tok::semi:
719     case tok::kw_if:
720     case tok::kw_while:
721     case tok::kw_for:
722     case tok::kw_switch:
723     case tok::kw_try:
724     case tok::kw___try:
725       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
726         LBraceStack.back()->setBlockKind(BK_Block);
727       break;
728     default:
729       break;
730     }
731     PrevTok = Tok;
732     Tok = NextTok;
733   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
734 
735   // Assume other blocks for all unclosed opening braces.
736   for (FormatToken *LBrace : LBraceStack)
737     if (LBrace->is(BK_Unknown))
738       LBrace->setBlockKind(BK_Block);
739 
740   FormatTok = Tokens->setPosition(StoredPosition);
741 }
742 
743 template <class T>
744 static inline void hash_combine(std::size_t &seed, const T &v) {
745   std::hash<T> hasher;
746   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
747 }
748 
749 size_t UnwrappedLineParser::computePPHash() const {
750   size_t h = 0;
751   for (const auto &i : PPStack) {
752     hash_combine(h, size_t(i.Kind));
753     hash_combine(h, i.Line);
754   }
755   return h;
756 }
757 
758 UnwrappedLineParser::IfStmtKind
759 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
760                                 bool MunchSemi, bool UnindentWhitesmithsBraces,
761                                 bool CanContainBracedList,
762                                 TokenType NextLBracesType) {
763   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
764          "'{' or macro block token expected");
765   FormatToken *Tok = FormatTok;
766   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
767   FormatTok->setBlockKind(BK_Block);
768 
769   // For Whitesmiths mode, jump to the next level prior to skipping over the
770   // braces.
771   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
772     ++Line->Level;
773 
774   size_t PPStartHash = computePPHash();
775 
776   unsigned InitialLevel = Line->Level;
777   nextToken(/*LevelDifference=*/AddLevels);
778 
779   if (MacroBlock && FormatTok->is(tok::l_paren))
780     parseParens();
781 
782   size_t NbPreprocessorDirectives =
783       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
784   addUnwrappedLine();
785   size_t OpeningLineIndex =
786       CurrentLines->empty()
787           ? (UnwrappedLine::kInvalidIndex)
788           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
789 
790   // Whitesmiths is weird here. The brace needs to be indented for the namespace
791   // block, but the block itself may not be indented depending on the style
792   // settings. This allows the format to back up one level in those cases.
793   if (UnindentWhitesmithsBraces)
794     --Line->Level;
795 
796   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
797                                           MustBeDeclaration);
798   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
799     Line->Level += AddLevels;
800 
801   IfStmtKind IfKind = IfStmtKind::NotIf;
802   const bool SimpleBlock = parseLevel(
803       /*HasOpeningBrace=*/true, CanContainBracedList, &IfKind, NextLBracesType);
804 
805   if (eof())
806     return IfKind;
807 
808   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
809                  : !FormatTok->is(tok::r_brace)) {
810     Line->Level = InitialLevel;
811     FormatTok->setBlockKind(BK_Block);
812     return IfKind;
813   }
814 
815   if (SimpleBlock && Tok->is(tok::l_brace)) {
816     assert(FormatTok->is(tok::r_brace));
817     const FormatToken *Previous = Tokens->getPreviousToken();
818     assert(Previous);
819     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
820       Tok->MatchingParen = FormatTok;
821       FormatTok->MatchingParen = Tok;
822     }
823   }
824 
825   size_t PPEndHash = computePPHash();
826 
827   // Munch the closing brace.
828   nextToken(/*LevelDifference=*/-AddLevels);
829 
830   if (MacroBlock && FormatTok->is(tok::l_paren))
831     parseParens();
832 
833   if (FormatTok->is(tok::kw_noexcept)) {
834     // A noexcept in a requires expression.
835     nextToken();
836   }
837 
838   if (FormatTok->is(tok::arrow)) {
839     // Following the } or noexcept we can find a trailing return type arrow
840     // as part of an implicit conversion constraint.
841     nextToken();
842     parseStructuralElement();
843   }
844 
845   if (MunchSemi && FormatTok->is(tok::semi))
846     nextToken();
847 
848   Line->Level = InitialLevel;
849 
850   if (PPStartHash == PPEndHash) {
851     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
852     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
853       // Update the opening line to add the forward reference as well
854       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
855           CurrentLines->size() - 1;
856     }
857   }
858 
859   return IfKind;
860 }
861 
862 static bool isGoogScope(const UnwrappedLine &Line) {
863   // FIXME: Closure-library specific stuff should not be hard-coded but be
864   // configurable.
865   if (Line.Tokens.size() < 4)
866     return false;
867   auto I = Line.Tokens.begin();
868   if (I->Tok->TokenText != "goog")
869     return false;
870   ++I;
871   if (I->Tok->isNot(tok::period))
872     return false;
873   ++I;
874   if (I->Tok->TokenText != "scope")
875     return false;
876   ++I;
877   return I->Tok->is(tok::l_paren);
878 }
879 
880 static bool isIIFE(const UnwrappedLine &Line,
881                    const AdditionalKeywords &Keywords) {
882   // Look for the start of an immediately invoked anonymous function.
883   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
884   // This is commonly done in JavaScript to create a new, anonymous scope.
885   // Example: (function() { ... })()
886   if (Line.Tokens.size() < 3)
887     return false;
888   auto I = Line.Tokens.begin();
889   if (I->Tok->isNot(tok::l_paren))
890     return false;
891   ++I;
892   if (I->Tok->isNot(Keywords.kw_function))
893     return false;
894   ++I;
895   return I->Tok->is(tok::l_paren);
896 }
897 
898 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
899                                    const FormatToken &InitialToken) {
900   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
901     return Style.BraceWrapping.AfterNamespace;
902   if (InitialToken.is(tok::kw_class))
903     return Style.BraceWrapping.AfterClass;
904   if (InitialToken.is(tok::kw_union))
905     return Style.BraceWrapping.AfterUnion;
906   if (InitialToken.is(tok::kw_struct))
907     return Style.BraceWrapping.AfterStruct;
908   if (InitialToken.is(tok::kw_enum))
909     return Style.BraceWrapping.AfterEnum;
910   return false;
911 }
912 
913 void UnwrappedLineParser::parseChildBlock(
914     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
915   FormatTok->setBlockKind(BK_Block);
916   nextToken();
917   {
918     bool SkipIndent = (Style.isJavaScript() &&
919                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
920     ScopedLineState LineState(*this);
921     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
922                                             /*MustBeDeclaration=*/false);
923     Line->Level += SkipIndent ? 0 : 1;
924     parseLevel(/*HasOpeningBrace=*/true, CanContainBracedList,
925                /*IfKind=*/nullptr, NextLBracesType);
926     flushComments(isOnNewLine(*FormatTok));
927     Line->Level -= SkipIndent ? 0 : 1;
928   }
929   nextToken();
930 }
931 
932 void UnwrappedLineParser::parsePPDirective() {
933   assert(FormatTok->is(tok::hash) && "'#' expected");
934   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
935 
936   nextToken();
937 
938   if (!FormatTok->Tok.getIdentifierInfo()) {
939     parsePPUnknown();
940     return;
941   }
942 
943   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
944   case tok::pp_define:
945     parsePPDefine();
946     return;
947   case tok::pp_if:
948     parsePPIf(/*IfDef=*/false);
949     break;
950   case tok::pp_ifdef:
951   case tok::pp_ifndef:
952     parsePPIf(/*IfDef=*/true);
953     break;
954   case tok::pp_else:
955     parsePPElse();
956     break;
957   case tok::pp_elifdef:
958   case tok::pp_elifndef:
959   case tok::pp_elif:
960     parsePPElIf();
961     break;
962   case tok::pp_endif:
963     parsePPEndIf();
964     break;
965   default:
966     parsePPUnknown();
967     break;
968   }
969 }
970 
971 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
972   size_t Line = CurrentLines->size();
973   if (CurrentLines == &PreprocessorDirectives)
974     Line += Lines.size();
975 
976   if (Unreachable ||
977       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
978     PPStack.push_back({PP_Unreachable, Line});
979   else
980     PPStack.push_back({PP_Conditional, Line});
981 }
982 
983 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
984   ++PPBranchLevel;
985   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
986   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
987     PPLevelBranchIndex.push_back(0);
988     PPLevelBranchCount.push_back(0);
989   }
990   PPChainBranchIndex.push(0);
991   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
992   conditionalCompilationCondition(Unreachable || Skip);
993 }
994 
995 void UnwrappedLineParser::conditionalCompilationAlternative() {
996   if (!PPStack.empty())
997     PPStack.pop_back();
998   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
999   if (!PPChainBranchIndex.empty())
1000     ++PPChainBranchIndex.top();
1001   conditionalCompilationCondition(
1002       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1003       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1004 }
1005 
1006 void UnwrappedLineParser::conditionalCompilationEnd() {
1007   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1008   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1009     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1010       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1011   }
1012   // Guard against #endif's without #if.
1013   if (PPBranchLevel > -1)
1014     --PPBranchLevel;
1015   if (!PPChainBranchIndex.empty())
1016     PPChainBranchIndex.pop();
1017   if (!PPStack.empty())
1018     PPStack.pop_back();
1019 }
1020 
1021 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1022   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1023   nextToken();
1024   bool Unreachable = false;
1025   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1026     Unreachable = true;
1027   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1028     Unreachable = true;
1029   conditionalCompilationStart(Unreachable);
1030   FormatToken *IfCondition = FormatTok;
1031   // If there's a #ifndef on the first line, and the only lines before it are
1032   // comments, it could be an include guard.
1033   bool MaybeIncludeGuard = IfNDef;
1034   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
1035     for (auto &Line : Lines) {
1036       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1037         MaybeIncludeGuard = false;
1038         IncludeGuard = IG_Rejected;
1039         break;
1040       }
1041     }
1042   --PPBranchLevel;
1043   parsePPUnknown();
1044   ++PPBranchLevel;
1045   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1046     IncludeGuard = IG_IfNdefed;
1047     IncludeGuardToken = IfCondition;
1048   }
1049 }
1050 
1051 void UnwrappedLineParser::parsePPElse() {
1052   // If a potential include guard has an #else, it's not an include guard.
1053   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1054     IncludeGuard = IG_Rejected;
1055   conditionalCompilationAlternative();
1056   if (PPBranchLevel > -1)
1057     --PPBranchLevel;
1058   parsePPUnknown();
1059   ++PPBranchLevel;
1060 }
1061 
1062 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1063 
1064 void UnwrappedLineParser::parsePPEndIf() {
1065   conditionalCompilationEnd();
1066   parsePPUnknown();
1067   // If the #endif of a potential include guard is the last thing in the file,
1068   // then we found an include guard.
1069   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1070       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1071     IncludeGuard = IG_Found;
1072 }
1073 
1074 void UnwrappedLineParser::parsePPDefine() {
1075   nextToken();
1076 
1077   if (!FormatTok->Tok.getIdentifierInfo()) {
1078     IncludeGuard = IG_Rejected;
1079     IncludeGuardToken = nullptr;
1080     parsePPUnknown();
1081     return;
1082   }
1083 
1084   if (IncludeGuard == IG_IfNdefed &&
1085       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1086     IncludeGuard = IG_Defined;
1087     IncludeGuardToken = nullptr;
1088     for (auto &Line : Lines) {
1089       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1090         IncludeGuard = IG_Rejected;
1091         break;
1092       }
1093     }
1094   }
1095 
1096   // In the context of a define, even keywords should be treated as normal
1097   // identifiers. Setting the kind to identifier is not enough, because we need
1098   // to treat additional keywords like __except as well, which are already
1099   // identifiers. Setting the identifier info to null interferes with include
1100   // guard processing above, and changes preprocessing nesting.
1101   FormatTok->Tok.setKind(tok::identifier);
1102   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1103   nextToken();
1104   if (FormatTok->Tok.getKind() == tok::l_paren &&
1105       !FormatTok->hasWhitespaceBefore())
1106     parseParens();
1107   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1108     Line->Level += PPBranchLevel + 1;
1109   addUnwrappedLine();
1110   ++Line->Level;
1111 
1112   // Errors during a preprocessor directive can only affect the layout of the
1113   // preprocessor directive, and thus we ignore them. An alternative approach
1114   // would be to use the same approach we use on the file level (no
1115   // re-indentation if there was a structural error) within the macro
1116   // definition.
1117   parseFile();
1118 }
1119 
1120 void UnwrappedLineParser::parsePPUnknown() {
1121   do {
1122     nextToken();
1123   } while (!eof());
1124   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1125     Line->Level += PPBranchLevel + 1;
1126   addUnwrappedLine();
1127 }
1128 
1129 // Here we exclude certain tokens that are not usually the first token in an
1130 // unwrapped line. This is used in attempt to distinguish macro calls without
1131 // trailing semicolons from other constructs split to several lines.
1132 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1133   // Semicolon can be a null-statement, l_square can be a start of a macro or
1134   // a C++11 attribute, but this doesn't seem to be common.
1135   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1136          Tok.isNot(TT_AttributeSquare) &&
1137          // Tokens that can only be used as binary operators and a part of
1138          // overloaded operator names.
1139          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1140          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1141          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1142          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1143          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1144          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1145          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1146          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1147          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1148          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1149          Tok.isNot(tok::lesslessequal) &&
1150          // Colon is used in labels, base class lists, initializer lists,
1151          // range-based for loops, ternary operator, but should never be the
1152          // first token in an unwrapped line.
1153          Tok.isNot(tok::colon) &&
1154          // 'noexcept' is a trailing annotation.
1155          Tok.isNot(tok::kw_noexcept);
1156 }
1157 
1158 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1159                           const FormatToken *FormatTok) {
1160   // FIXME: This returns true for C/C++ keywords like 'struct'.
1161   return FormatTok->is(tok::identifier) &&
1162          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1163           !FormatTok->isOneOf(
1164               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1165               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1166               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1167               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1168               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1169               Keywords.kw_instanceof, Keywords.kw_interface,
1170               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1171 }
1172 
1173 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1174                                  const FormatToken *FormatTok) {
1175   return FormatTok->Tok.isLiteral() ||
1176          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1177          mustBeJSIdent(Keywords, FormatTok);
1178 }
1179 
1180 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1181 // when encountered after a value (see mustBeJSIdentOrValue).
1182 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1183                            const FormatToken *FormatTok) {
1184   return FormatTok->isOneOf(
1185       tok::kw_return, Keywords.kw_yield,
1186       // conditionals
1187       tok::kw_if, tok::kw_else,
1188       // loops
1189       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1190       // switch/case
1191       tok::kw_switch, tok::kw_case,
1192       // exceptions
1193       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1194       // declaration
1195       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1196       Keywords.kw_async, Keywords.kw_function,
1197       // import/export
1198       Keywords.kw_import, tok::kw_export);
1199 }
1200 
1201 // Checks whether a token is a type in K&R C (aka C78).
1202 static bool isC78Type(const FormatToken &Tok) {
1203   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1204                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1205                      tok::identifier);
1206 }
1207 
1208 // This function checks whether a token starts the first parameter declaration
1209 // in a K&R C (aka C78) function definition, e.g.:
1210 //   int f(a, b)
1211 //   short a, b;
1212 //   {
1213 //      return a + b;
1214 //   }
1215 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1216                                const FormatToken *FuncName) {
1217   assert(Tok);
1218   assert(Next);
1219   assert(FuncName);
1220 
1221   if (FuncName->isNot(tok::identifier))
1222     return false;
1223 
1224   const FormatToken *Prev = FuncName->Previous;
1225   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1226     return false;
1227 
1228   if (!isC78Type(*Tok) &&
1229       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1230     return false;
1231 
1232   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1233     return false;
1234 
1235   Tok = Tok->Previous;
1236   if (!Tok || Tok->isNot(tok::r_paren))
1237     return false;
1238 
1239   Tok = Tok->Previous;
1240   if (!Tok || Tok->isNot(tok::identifier))
1241     return false;
1242 
1243   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1244 }
1245 
1246 void UnwrappedLineParser::parseModuleImport() {
1247   nextToken();
1248   while (!eof()) {
1249     if (FormatTok->is(tok::colon)) {
1250       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1251     }
1252     // Handle import <foo/bar.h> as we would an include statement.
1253     else if (FormatTok->is(tok::less)) {
1254       nextToken();
1255       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1256         // Mark tokens up to the trailing line comments as implicit string
1257         // literals.
1258         if (FormatTok->isNot(tok::comment) &&
1259             !FormatTok->TokenText.startswith("//"))
1260           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1261         nextToken();
1262       }
1263     }
1264     if (FormatTok->is(tok::semi)) {
1265       nextToken();
1266       break;
1267     }
1268     nextToken();
1269   }
1270 
1271   addUnwrappedLine();
1272 }
1273 
1274 // readTokenWithJavaScriptASI reads the next token and terminates the current
1275 // line if JavaScript Automatic Semicolon Insertion must
1276 // happen between the current token and the next token.
1277 //
1278 // This method is conservative - it cannot cover all edge cases of JavaScript,
1279 // but only aims to correctly handle certain well known cases. It *must not*
1280 // return true in speculative cases.
1281 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1282   FormatToken *Previous = FormatTok;
1283   readToken();
1284   FormatToken *Next = FormatTok;
1285 
1286   bool IsOnSameLine =
1287       CommentsBeforeNextToken.empty()
1288           ? Next->NewlinesBefore == 0
1289           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1290   if (IsOnSameLine)
1291     return;
1292 
1293   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1294   bool PreviousStartsTemplateExpr =
1295       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1296   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1297     // If the line contains an '@' sign, the previous token might be an
1298     // annotation, which can precede another identifier/value.
1299     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1300       return LineNode.Tok->is(tok::at);
1301     });
1302     if (HasAt)
1303       return;
1304   }
1305   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1306     return addUnwrappedLine();
1307   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1308   bool NextEndsTemplateExpr =
1309       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1310   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1311       (PreviousMustBeValue ||
1312        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1313                          tok::minusminus)))
1314     return addUnwrappedLine();
1315   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1316       isJSDeclOrStmt(Keywords, Next))
1317     return addUnwrappedLine();
1318 }
1319 
1320 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1321                                                  bool IsTopLevel,
1322                                                  TokenType NextLBracesType,
1323                                                  bool *HasLabel) {
1324   if (Style.Language == FormatStyle::LK_TableGen &&
1325       FormatTok->is(tok::pp_include)) {
1326     nextToken();
1327     if (FormatTok->is(tok::string_literal))
1328       nextToken();
1329     addUnwrappedLine();
1330     return;
1331   }
1332   switch (FormatTok->Tok.getKind()) {
1333   case tok::kw_asm:
1334     nextToken();
1335     if (FormatTok->is(tok::l_brace)) {
1336       FormatTok->setFinalizedType(TT_InlineASMBrace);
1337       nextToken();
1338       while (FormatTok && FormatTok->isNot(tok::eof)) {
1339         if (FormatTok->is(tok::r_brace)) {
1340           FormatTok->setFinalizedType(TT_InlineASMBrace);
1341           nextToken();
1342           addUnwrappedLine();
1343           break;
1344         }
1345         FormatTok->Finalized = true;
1346         nextToken();
1347       }
1348     }
1349     break;
1350   case tok::kw_namespace:
1351     parseNamespace();
1352     return;
1353   case tok::kw_public:
1354   case tok::kw_protected:
1355   case tok::kw_private:
1356     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1357         Style.isCSharp())
1358       nextToken();
1359     else
1360       parseAccessSpecifier();
1361     return;
1362   case tok::kw_if:
1363     if (Style.isJavaScript() && Line->MustBeDeclaration)
1364       // field/method declaration.
1365       break;
1366     parseIfThenElse(IfKind);
1367     return;
1368   case tok::kw_for:
1369   case tok::kw_while:
1370     if (Style.isJavaScript() && Line->MustBeDeclaration)
1371       // field/method declaration.
1372       break;
1373     parseForOrWhileLoop();
1374     return;
1375   case tok::kw_do:
1376     if (Style.isJavaScript() && Line->MustBeDeclaration)
1377       // field/method declaration.
1378       break;
1379     parseDoWhile();
1380     return;
1381   case tok::kw_switch:
1382     if (Style.isJavaScript() && Line->MustBeDeclaration)
1383       // 'switch: string' field declaration.
1384       break;
1385     parseSwitch();
1386     return;
1387   case tok::kw_default:
1388     if (Style.isJavaScript() && Line->MustBeDeclaration)
1389       // 'default: string' field declaration.
1390       break;
1391     nextToken();
1392     if (FormatTok->is(tok::colon)) {
1393       parseLabel();
1394       return;
1395     }
1396     // e.g. "default void f() {}" in a Java interface.
1397     break;
1398   case tok::kw_case:
1399     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1400       // 'case: string' field declaration.
1401       nextToken();
1402       break;
1403     }
1404     parseCaseLabel();
1405     return;
1406   case tok::kw_try:
1407   case tok::kw___try:
1408     if (Style.isJavaScript() && Line->MustBeDeclaration)
1409       // field/method declaration.
1410       break;
1411     parseTryCatch();
1412     return;
1413   case tok::kw_extern:
1414     nextToken();
1415     if (FormatTok->is(tok::string_literal)) {
1416       nextToken();
1417       if (FormatTok->is(tok::l_brace)) {
1418         if (Style.BraceWrapping.AfterExternBlock)
1419           addUnwrappedLine();
1420         // Either we indent or for backwards compatibility we follow the
1421         // AfterExternBlock style.
1422         unsigned AddLevels =
1423             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1424                     (Style.BraceWrapping.AfterExternBlock &&
1425                      Style.IndentExternBlock ==
1426                          FormatStyle::IEBS_AfterExternBlock)
1427                 ? 1u
1428                 : 0u;
1429         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1430         addUnwrappedLine();
1431         return;
1432       }
1433     }
1434     break;
1435   case tok::kw_export:
1436     if (Style.isJavaScript()) {
1437       parseJavaScriptEs6ImportExport();
1438       return;
1439     }
1440     if (!Style.isCpp())
1441       break;
1442     // Handle C++ "(inline|export) namespace".
1443     LLVM_FALLTHROUGH;
1444   case tok::kw_inline:
1445     nextToken();
1446     if (FormatTok->is(tok::kw_namespace)) {
1447       parseNamespace();
1448       return;
1449     }
1450     break;
1451   case tok::identifier:
1452     if (FormatTok->is(TT_ForEachMacro)) {
1453       parseForOrWhileLoop();
1454       return;
1455     }
1456     if (FormatTok->is(TT_MacroBlockBegin)) {
1457       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1458                  /*MunchSemi=*/false);
1459       return;
1460     }
1461     if (FormatTok->is(Keywords.kw_import)) {
1462       if (Style.isJavaScript()) {
1463         parseJavaScriptEs6ImportExport();
1464         return;
1465       }
1466       if (Style.Language == FormatStyle::LK_Proto) {
1467         nextToken();
1468         if (FormatTok->is(tok::kw_public))
1469           nextToken();
1470         if (!FormatTok->is(tok::string_literal))
1471           return;
1472         nextToken();
1473         if (FormatTok->is(tok::semi))
1474           nextToken();
1475         addUnwrappedLine();
1476         return;
1477       }
1478       if (Style.isCpp()) {
1479         parseModuleImport();
1480         return;
1481       }
1482     }
1483     if (Style.isCpp() &&
1484         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1485                            Keywords.kw_slots, Keywords.kw_qslots)) {
1486       nextToken();
1487       if (FormatTok->is(tok::colon)) {
1488         nextToken();
1489         addUnwrappedLine();
1490         return;
1491       }
1492     }
1493     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1494       parseStatementMacro();
1495       return;
1496     }
1497     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1498       parseNamespace();
1499       return;
1500     }
1501     // In all other cases, parse the declaration.
1502     break;
1503   default:
1504     break;
1505   }
1506   do {
1507     const FormatToken *Previous = FormatTok->Previous;
1508     switch (FormatTok->Tok.getKind()) {
1509     case tok::at:
1510       nextToken();
1511       if (FormatTok->is(tok::l_brace)) {
1512         nextToken();
1513         parseBracedList();
1514         break;
1515       } else if (Style.Language == FormatStyle::LK_Java &&
1516                  FormatTok->is(Keywords.kw_interface)) {
1517         nextToken();
1518         break;
1519       }
1520       switch (FormatTok->Tok.getObjCKeywordID()) {
1521       case tok::objc_public:
1522       case tok::objc_protected:
1523       case tok::objc_package:
1524       case tok::objc_private:
1525         return parseAccessSpecifier();
1526       case tok::objc_interface:
1527       case tok::objc_implementation:
1528         return parseObjCInterfaceOrImplementation();
1529       case tok::objc_protocol:
1530         if (parseObjCProtocol())
1531           return;
1532         break;
1533       case tok::objc_end:
1534         return; // Handled by the caller.
1535       case tok::objc_optional:
1536       case tok::objc_required:
1537         nextToken();
1538         addUnwrappedLine();
1539         return;
1540       case tok::objc_autoreleasepool:
1541         nextToken();
1542         if (FormatTok->is(tok::l_brace)) {
1543           if (Style.BraceWrapping.AfterControlStatement ==
1544               FormatStyle::BWACS_Always)
1545             addUnwrappedLine();
1546           parseBlock();
1547         }
1548         addUnwrappedLine();
1549         return;
1550       case tok::objc_synchronized:
1551         nextToken();
1552         if (FormatTok->is(tok::l_paren))
1553           // Skip synchronization object
1554           parseParens();
1555         if (FormatTok->is(tok::l_brace)) {
1556           if (Style.BraceWrapping.AfterControlStatement ==
1557               FormatStyle::BWACS_Always)
1558             addUnwrappedLine();
1559           parseBlock();
1560         }
1561         addUnwrappedLine();
1562         return;
1563       case tok::objc_try:
1564         // This branch isn't strictly necessary (the kw_try case below would
1565         // do this too after the tok::at is parsed above).  But be explicit.
1566         parseTryCatch();
1567         return;
1568       default:
1569         break;
1570       }
1571       break;
1572     case tok::kw_concept:
1573       parseConcept();
1574       return;
1575     case tok::kw_requires: {
1576       if (Style.isCpp()) {
1577         bool ParsedClause = parseRequires();
1578         if (ParsedClause)
1579           return;
1580       } else {
1581         nextToken();
1582       }
1583       break;
1584     }
1585     case tok::kw_enum:
1586       // Ignore if this is part of "template <enum ...".
1587       if (Previous && Previous->is(tok::less)) {
1588         nextToken();
1589         break;
1590       }
1591 
1592       // parseEnum falls through and does not yet add an unwrapped line as an
1593       // enum definition can start a structural element.
1594       if (!parseEnum())
1595         break;
1596       // This only applies for C++.
1597       if (!Style.isCpp()) {
1598         addUnwrappedLine();
1599         return;
1600       }
1601       break;
1602     case tok::kw_typedef:
1603       nextToken();
1604       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1605                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1606                              Keywords.kw_CF_CLOSED_ENUM,
1607                              Keywords.kw_NS_CLOSED_ENUM))
1608         parseEnum();
1609       break;
1610     case tok::kw_struct:
1611     case tok::kw_union:
1612     case tok::kw_class:
1613       if (parseStructLike())
1614         return;
1615       break;
1616     case tok::period:
1617       nextToken();
1618       // In Java, classes have an implicit static member "class".
1619       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1620           FormatTok->is(tok::kw_class))
1621         nextToken();
1622       if (Style.isJavaScript() && FormatTok &&
1623           FormatTok->Tok.getIdentifierInfo())
1624         // JavaScript only has pseudo keywords, all keywords are allowed to
1625         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1626         nextToken();
1627       break;
1628     case tok::semi:
1629       nextToken();
1630       addUnwrappedLine();
1631       return;
1632     case tok::r_brace:
1633       addUnwrappedLine();
1634       return;
1635     case tok::l_paren: {
1636       parseParens();
1637       // Break the unwrapped line if a K&R C function definition has a parameter
1638       // declaration.
1639       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1640         break;
1641       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1642         addUnwrappedLine();
1643         return;
1644       }
1645       break;
1646     }
1647     case tok::kw_operator:
1648       nextToken();
1649       if (FormatTok->isBinaryOperator())
1650         nextToken();
1651       break;
1652     case tok::caret:
1653       nextToken();
1654       if (FormatTok->Tok.isAnyIdentifier() ||
1655           FormatTok->isSimpleTypeSpecifier())
1656         nextToken();
1657       if (FormatTok->is(tok::l_paren))
1658         parseParens();
1659       if (FormatTok->is(tok::l_brace))
1660         parseChildBlock();
1661       break;
1662     case tok::l_brace:
1663       if (NextLBracesType != TT_Unknown)
1664         FormatTok->setFinalizedType(NextLBracesType);
1665       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1666         // A block outside of parentheses must be the last part of a
1667         // structural element.
1668         // FIXME: Figure out cases where this is not true, and add projections
1669         // for them (the one we know is missing are lambdas).
1670         if (Style.Language == FormatStyle::LK_Java &&
1671             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1672           // If necessary, we could set the type to something different than
1673           // TT_FunctionLBrace.
1674           if (Style.BraceWrapping.AfterControlStatement ==
1675               FormatStyle::BWACS_Always)
1676             addUnwrappedLine();
1677         } else if (Style.BraceWrapping.AfterFunction) {
1678           addUnwrappedLine();
1679         }
1680         if (!Line->InPPDirective)
1681           FormatTok->setFinalizedType(TT_FunctionLBrace);
1682         parseBlock();
1683         addUnwrappedLine();
1684         return;
1685       }
1686       // Otherwise this was a braced init list, and the structural
1687       // element continues.
1688       break;
1689     case tok::kw_try:
1690       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1691         // field/method declaration.
1692         nextToken();
1693         break;
1694       }
1695       // We arrive here when parsing function-try blocks.
1696       if (Style.BraceWrapping.AfterFunction)
1697         addUnwrappedLine();
1698       parseTryCatch();
1699       return;
1700     case tok::identifier: {
1701       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1702           Line->MustBeDeclaration) {
1703         addUnwrappedLine();
1704         parseCSharpGenericTypeConstraint();
1705         break;
1706       }
1707       if (FormatTok->is(TT_MacroBlockEnd)) {
1708         addUnwrappedLine();
1709         return;
1710       }
1711 
1712       // Function declarations (as opposed to function expressions) are parsed
1713       // on their own unwrapped line by continuing this loop. Function
1714       // expressions (functions that are not on their own line) must not create
1715       // a new unwrapped line, so they are special cased below.
1716       size_t TokenCount = Line->Tokens.size();
1717       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1718           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1719                                                      Keywords.kw_async)))) {
1720         tryToParseJSFunction();
1721         break;
1722       }
1723       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1724           FormatTok->is(Keywords.kw_interface)) {
1725         if (Style.isJavaScript()) {
1726           // In JavaScript/TypeScript, "interface" can be used as a standalone
1727           // identifier, e.g. in `var interface = 1;`. If "interface" is
1728           // followed by another identifier, it is very like to be an actual
1729           // interface declaration.
1730           unsigned StoredPosition = Tokens->getPosition();
1731           FormatToken *Next = Tokens->getNextToken();
1732           FormatTok = Tokens->setPosition(StoredPosition);
1733           if (!mustBeJSIdent(Keywords, Next)) {
1734             nextToken();
1735             break;
1736           }
1737         }
1738         parseRecord();
1739         addUnwrappedLine();
1740         return;
1741       }
1742 
1743       if (FormatTok->is(Keywords.kw_interface)) {
1744         if (parseStructLike())
1745           return;
1746         break;
1747       }
1748 
1749       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1750         parseStatementMacro();
1751         return;
1752       }
1753 
1754       // See if the following token should start a new unwrapped line.
1755       StringRef Text = FormatTok->TokenText;
1756 
1757       FormatToken *PreviousToken = FormatTok;
1758       nextToken();
1759 
1760       // JS doesn't have macros, and within classes colons indicate fields, not
1761       // labels.
1762       if (Style.isJavaScript())
1763         break;
1764 
1765       TokenCount = Line->Tokens.size();
1766       if (TokenCount == 1 ||
1767           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1768         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1769           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1770           parseLabel(!Style.IndentGotoLabels);
1771           if (HasLabel)
1772             *HasLabel = true;
1773           return;
1774         }
1775         // Recognize function-like macro usages without trailing semicolon as
1776         // well as free-standing macros like Q_OBJECT.
1777         bool FunctionLike = FormatTok->is(tok::l_paren);
1778         if (FunctionLike)
1779           parseParens();
1780 
1781         bool FollowedByNewline =
1782             CommentsBeforeNextToken.empty()
1783                 ? FormatTok->NewlinesBefore > 0
1784                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1785 
1786         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1787             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1788           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1789           addUnwrappedLine();
1790           return;
1791         }
1792       }
1793       break;
1794     }
1795     case tok::equal:
1796       if ((Style.isJavaScript() || Style.isCSharp()) &&
1797           FormatTok->is(TT_FatArrow)) {
1798         tryToParseChildBlock();
1799         break;
1800       }
1801 
1802       nextToken();
1803       if (FormatTok->is(tok::l_brace)) {
1804         // Block kind should probably be set to BK_BracedInit for any language.
1805         // C# needs this change to ensure that array initialisers and object
1806         // initialisers are indented the same way.
1807         if (Style.isCSharp())
1808           FormatTok->setBlockKind(BK_BracedInit);
1809         nextToken();
1810         parseBracedList();
1811       } else if (Style.Language == FormatStyle::LK_Proto &&
1812                  FormatTok->is(tok::less)) {
1813         nextToken();
1814         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1815                         /*ClosingBraceKind=*/tok::greater);
1816       }
1817       break;
1818     case tok::l_square:
1819       parseSquare();
1820       break;
1821     case tok::kw_new:
1822       parseNew();
1823       break;
1824     case tok::kw_case:
1825       if (Style.isJavaScript() && Line->MustBeDeclaration)
1826         // 'case: string' field declaration.
1827         break;
1828       parseCaseLabel();
1829       break;
1830     default:
1831       nextToken();
1832       break;
1833     }
1834   } while (!eof());
1835 }
1836 
1837 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1838   assert(FormatTok->is(tok::l_brace));
1839   if (!Style.isCSharp())
1840     return false;
1841   // See if it's a property accessor.
1842   if (FormatTok->Previous->isNot(tok::identifier))
1843     return false;
1844 
1845   // See if we are inside a property accessor.
1846   //
1847   // Record the current tokenPosition so that we can advance and
1848   // reset the current token. `Next` is not set yet so we need
1849   // another way to advance along the token stream.
1850   unsigned int StoredPosition = Tokens->getPosition();
1851   FormatToken *Tok = Tokens->getNextToken();
1852 
1853   // A trivial property accessor is of the form:
1854   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1855   // Track these as they do not require line breaks to be introduced.
1856   bool HasSpecialAccessor = false;
1857   bool IsTrivialPropertyAccessor = true;
1858   while (!eof()) {
1859     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1860                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1861                      Keywords.kw_init, Keywords.kw_set)) {
1862       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1863         HasSpecialAccessor = true;
1864       Tok = Tokens->getNextToken();
1865       continue;
1866     }
1867     if (Tok->isNot(tok::r_brace))
1868       IsTrivialPropertyAccessor = false;
1869     break;
1870   }
1871 
1872   if (!HasSpecialAccessor) {
1873     Tokens->setPosition(StoredPosition);
1874     return false;
1875   }
1876 
1877   // Try to parse the property accessor:
1878   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1879   Tokens->setPosition(StoredPosition);
1880   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1881     addUnwrappedLine();
1882   nextToken();
1883   do {
1884     switch (FormatTok->Tok.getKind()) {
1885     case tok::r_brace:
1886       nextToken();
1887       if (FormatTok->is(tok::equal)) {
1888         while (!eof() && FormatTok->isNot(tok::semi))
1889           nextToken();
1890         nextToken();
1891       }
1892       addUnwrappedLine();
1893       return true;
1894     case tok::l_brace:
1895       ++Line->Level;
1896       parseBlock(/*MustBeDeclaration=*/true);
1897       addUnwrappedLine();
1898       --Line->Level;
1899       break;
1900     case tok::equal:
1901       if (FormatTok->is(TT_FatArrow)) {
1902         ++Line->Level;
1903         do {
1904           nextToken();
1905         } while (!eof() && FormatTok->isNot(tok::semi));
1906         nextToken();
1907         addUnwrappedLine();
1908         --Line->Level;
1909         break;
1910       }
1911       nextToken();
1912       break;
1913     default:
1914       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
1915                              Keywords.kw_set) &&
1916           !IsTrivialPropertyAccessor) {
1917         // Non-trivial get/set needs to be on its own line.
1918         addUnwrappedLine();
1919       }
1920       nextToken();
1921     }
1922   } while (!eof());
1923 
1924   // Unreachable for well-formed code (paired '{' and '}').
1925   return true;
1926 }
1927 
1928 bool UnwrappedLineParser::tryToParseLambda() {
1929   if (!Style.isCpp()) {
1930     nextToken();
1931     return false;
1932   }
1933   assert(FormatTok->is(tok::l_square));
1934   FormatToken &LSquare = *FormatTok;
1935   if (!tryToParseLambdaIntroducer())
1936     return false;
1937 
1938   bool SeenArrow = false;
1939   bool InTemplateParameterList = false;
1940 
1941   while (FormatTok->isNot(tok::l_brace)) {
1942     if (FormatTok->isSimpleTypeSpecifier()) {
1943       nextToken();
1944       continue;
1945     }
1946     switch (FormatTok->Tok.getKind()) {
1947     case tok::l_brace:
1948       break;
1949     case tok::l_paren:
1950       parseParens();
1951       break;
1952     case tok::l_square:
1953       parseSquare();
1954       break;
1955     case tok::kw_class:
1956     case tok::kw_template:
1957     case tok::kw_typename:
1958       assert(FormatTok->Previous);
1959       if (FormatTok->Previous->is(tok::less))
1960         InTemplateParameterList = true;
1961       nextToken();
1962       break;
1963     case tok::amp:
1964     case tok::star:
1965     case tok::kw_const:
1966     case tok::comma:
1967     case tok::less:
1968     case tok::greater:
1969     case tok::identifier:
1970     case tok::numeric_constant:
1971     case tok::coloncolon:
1972     case tok::kw_mutable:
1973     case tok::kw_noexcept:
1974       nextToken();
1975       break;
1976     // Specialization of a template with an integer parameter can contain
1977     // arithmetic, logical, comparison and ternary operators.
1978     //
1979     // FIXME: This also accepts sequences of operators that are not in the scope
1980     // of a template argument list.
1981     //
1982     // In a C++ lambda a template type can only occur after an arrow. We use
1983     // this as an heuristic to distinguish between Objective-C expressions
1984     // followed by an `a->b` expression, such as:
1985     // ([obj func:arg] + a->b)
1986     // Otherwise the code below would parse as a lambda.
1987     //
1988     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1989     // explicit template lists: []<bool b = true && false>(U &&u){}
1990     case tok::plus:
1991     case tok::minus:
1992     case tok::exclaim:
1993     case tok::tilde:
1994     case tok::slash:
1995     case tok::percent:
1996     case tok::lessless:
1997     case tok::pipe:
1998     case tok::pipepipe:
1999     case tok::ampamp:
2000     case tok::caret:
2001     case tok::equalequal:
2002     case tok::exclaimequal:
2003     case tok::greaterequal:
2004     case tok::lessequal:
2005     case tok::question:
2006     case tok::colon:
2007     case tok::ellipsis:
2008     case tok::kw_true:
2009     case tok::kw_false:
2010       if (SeenArrow || InTemplateParameterList) {
2011         nextToken();
2012         break;
2013       }
2014       return true;
2015     case tok::arrow:
2016       // This might or might not actually be a lambda arrow (this could be an
2017       // ObjC method invocation followed by a dereferencing arrow). We might
2018       // reset this back to TT_Unknown in TokenAnnotator.
2019       FormatTok->setFinalizedType(TT_LambdaArrow);
2020       SeenArrow = true;
2021       nextToken();
2022       break;
2023     default:
2024       return true;
2025     }
2026   }
2027   FormatTok->setFinalizedType(TT_LambdaLBrace);
2028   LSquare.setFinalizedType(TT_LambdaLSquare);
2029   parseChildBlock();
2030   return true;
2031 }
2032 
2033 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2034   const FormatToken *Previous = FormatTok->Previous;
2035   if (Previous &&
2036       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2037                          tok::kw_delete, tok::l_square) ||
2038        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
2039        Previous->isSimpleTypeSpecifier())) {
2040     nextToken();
2041     return false;
2042   }
2043   nextToken();
2044   if (FormatTok->is(tok::l_square))
2045     return false;
2046   parseSquare(/*LambdaIntroducer=*/true);
2047   return true;
2048 }
2049 
2050 void UnwrappedLineParser::tryToParseJSFunction() {
2051   assert(FormatTok->is(Keywords.kw_function) ||
2052          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2053   if (FormatTok->is(Keywords.kw_async))
2054     nextToken();
2055   // Consume "function".
2056   nextToken();
2057 
2058   // Consume * (generator function). Treat it like C++'s overloaded operators.
2059   if (FormatTok->is(tok::star)) {
2060     FormatTok->setFinalizedType(TT_OverloadedOperator);
2061     nextToken();
2062   }
2063 
2064   // Consume function name.
2065   if (FormatTok->is(tok::identifier))
2066     nextToken();
2067 
2068   if (FormatTok->isNot(tok::l_paren))
2069     return;
2070 
2071   // Parse formal parameter list.
2072   parseParens();
2073 
2074   if (FormatTok->is(tok::colon)) {
2075     // Parse a type definition.
2076     nextToken();
2077 
2078     // Eat the type declaration. For braced inline object types, balance braces,
2079     // otherwise just parse until finding an l_brace for the function body.
2080     if (FormatTok->is(tok::l_brace))
2081       tryToParseBracedList();
2082     else
2083       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2084         nextToken();
2085   }
2086 
2087   if (FormatTok->is(tok::semi))
2088     return;
2089 
2090   parseChildBlock();
2091 }
2092 
2093 bool UnwrappedLineParser::tryToParseBracedList() {
2094   if (FormatTok->is(BK_Unknown))
2095     calculateBraceTypes();
2096   assert(FormatTok->isNot(BK_Unknown));
2097   if (FormatTok->is(BK_Block))
2098     return false;
2099   nextToken();
2100   parseBracedList();
2101   return true;
2102 }
2103 
2104 bool UnwrappedLineParser::tryToParseChildBlock() {
2105   assert(Style.isJavaScript() || Style.isCSharp());
2106   assert(FormatTok->is(TT_FatArrow));
2107   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2108   // They always start an expression or a child block if followed by a curly
2109   // brace.
2110   nextToken();
2111   if (FormatTok->isNot(tok::l_brace))
2112     return false;
2113   parseChildBlock();
2114   return true;
2115 }
2116 
2117 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2118                                           bool IsEnum,
2119                                           tok::TokenKind ClosingBraceKind) {
2120   bool HasError = false;
2121 
2122   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2123   // replace this by using parseAssignmentExpression() inside.
2124   do {
2125     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2126         tryToParseChildBlock())
2127       continue;
2128     if (Style.isJavaScript()) {
2129       if (FormatTok->is(Keywords.kw_function) ||
2130           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2131         tryToParseJSFunction();
2132         continue;
2133       }
2134       if (FormatTok->is(tok::l_brace)) {
2135         // Could be a method inside of a braced list `{a() { return 1; }}`.
2136         if (tryToParseBracedList())
2137           continue;
2138         parseChildBlock();
2139       }
2140     }
2141     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2142       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2143         addUnwrappedLine();
2144       nextToken();
2145       return !HasError;
2146     }
2147     switch (FormatTok->Tok.getKind()) {
2148     case tok::l_square:
2149       if (Style.isCSharp())
2150         parseSquare();
2151       else
2152         tryToParseLambda();
2153       break;
2154     case tok::l_paren:
2155       parseParens();
2156       // JavaScript can just have free standing methods and getters/setters in
2157       // object literals. Detect them by a "{" following ")".
2158       if (Style.isJavaScript()) {
2159         if (FormatTok->is(tok::l_brace))
2160           parseChildBlock();
2161         break;
2162       }
2163       break;
2164     case tok::l_brace:
2165       // Assume there are no blocks inside a braced init list apart
2166       // from the ones we explicitly parse out (like lambdas).
2167       FormatTok->setBlockKind(BK_BracedInit);
2168       nextToken();
2169       parseBracedList();
2170       break;
2171     case tok::less:
2172       if (Style.Language == FormatStyle::LK_Proto) {
2173         nextToken();
2174         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2175                         /*ClosingBraceKind=*/tok::greater);
2176       } else {
2177         nextToken();
2178       }
2179       break;
2180     case tok::semi:
2181       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2182       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2183       // used for error recovery if we have otherwise determined that this is
2184       // a braced list.
2185       if (Style.isJavaScript()) {
2186         nextToken();
2187         break;
2188       }
2189       HasError = true;
2190       if (!ContinueOnSemicolons)
2191         return !HasError;
2192       nextToken();
2193       break;
2194     case tok::comma:
2195       nextToken();
2196       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2197         addUnwrappedLine();
2198       break;
2199     default:
2200       nextToken();
2201       break;
2202     }
2203   } while (!eof());
2204   return false;
2205 }
2206 
2207 /// \brief Parses a pair of parentheses (and everything between them).
2208 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2209 /// double ampersands. This only counts for the current parens scope.
2210 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2211   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2212   nextToken();
2213   do {
2214     switch (FormatTok->Tok.getKind()) {
2215     case tok::l_paren:
2216       parseParens();
2217       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2218         parseChildBlock();
2219       break;
2220     case tok::r_paren:
2221       nextToken();
2222       return;
2223     case tok::r_brace:
2224       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2225       return;
2226     case tok::l_square:
2227       tryToParseLambda();
2228       break;
2229     case tok::l_brace:
2230       if (!tryToParseBracedList())
2231         parseChildBlock();
2232       break;
2233     case tok::at:
2234       nextToken();
2235       if (FormatTok->is(tok::l_brace)) {
2236         nextToken();
2237         parseBracedList();
2238       }
2239       break;
2240     case tok::equal:
2241       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2242         tryToParseChildBlock();
2243       else
2244         nextToken();
2245       break;
2246     case tok::kw_class:
2247       if (Style.isJavaScript())
2248         parseRecord(/*ParseAsExpr=*/true);
2249       else
2250         nextToken();
2251       break;
2252     case tok::identifier:
2253       if (Style.isJavaScript() &&
2254           (FormatTok->is(Keywords.kw_function) ||
2255            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2256         tryToParseJSFunction();
2257       else
2258         nextToken();
2259       break;
2260     case tok::kw_requires: {
2261       auto RequiresToken = FormatTok;
2262       nextToken();
2263       parseRequiresExpression(RequiresToken);
2264       break;
2265     }
2266     case tok::ampamp:
2267       if (AmpAmpTokenType != TT_Unknown)
2268         FormatTok->setFinalizedType(AmpAmpTokenType);
2269       LLVM_FALLTHROUGH;
2270     default:
2271       nextToken();
2272       break;
2273     }
2274   } while (!eof());
2275 }
2276 
2277 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2278   if (!LambdaIntroducer) {
2279     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2280     if (tryToParseLambda())
2281       return;
2282   }
2283   do {
2284     switch (FormatTok->Tok.getKind()) {
2285     case tok::l_paren:
2286       parseParens();
2287       break;
2288     case tok::r_square:
2289       nextToken();
2290       return;
2291     case tok::r_brace:
2292       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2293       return;
2294     case tok::l_square:
2295       parseSquare();
2296       break;
2297     case tok::l_brace: {
2298       if (!tryToParseBracedList())
2299         parseChildBlock();
2300       break;
2301     }
2302     case tok::at:
2303       nextToken();
2304       if (FormatTok->is(tok::l_brace)) {
2305         nextToken();
2306         parseBracedList();
2307       }
2308       break;
2309     default:
2310       nextToken();
2311       break;
2312     }
2313   } while (!eof());
2314 }
2315 
2316 void UnwrappedLineParser::keepAncestorBraces() {
2317   if (!Style.RemoveBracesLLVM)
2318     return;
2319 
2320   const int MaxNestingLevels = 2;
2321   const int Size = NestedTooDeep.size();
2322   if (Size >= MaxNestingLevels)
2323     NestedTooDeep[Size - MaxNestingLevels] = true;
2324   NestedTooDeep.push_back(false);
2325 }
2326 
2327 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2328   for (const auto &Token : llvm::reverse(Line.Tokens))
2329     if (Token.Tok->isNot(tok::comment))
2330       return Token.Tok;
2331 
2332   return nullptr;
2333 }
2334 
2335 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2336   FormatToken *Tok = nullptr;
2337 
2338   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2339       PreprocessorDirectives.empty()) {
2340     Tok = getLastNonComment(*Line);
2341     assert(Tok);
2342     if (Tok->BraceCount < 0) {
2343       assert(Tok->BraceCount == -1);
2344       Tok = nullptr;
2345     } else {
2346       Tok->BraceCount = -1;
2347     }
2348   }
2349 
2350   addUnwrappedLine();
2351   ++Line->Level;
2352   parseStructuralElement();
2353 
2354   if (Tok) {
2355     assert(!Line->InPPDirective);
2356     Tok = nullptr;
2357     for (const auto &L : llvm::reverse(*CurrentLines)) {
2358       if (!L.InPPDirective && getLastNonComment(L)) {
2359         Tok = L.Tokens.back().Tok;
2360         break;
2361       }
2362     }
2363     assert(Tok);
2364     ++Tok->BraceCount;
2365   }
2366 
2367   if (CheckEOF && FormatTok->is(tok::eof))
2368     addUnwrappedLine();
2369 
2370   --Line->Level;
2371 }
2372 
2373 static void markOptionalBraces(FormatToken *LeftBrace) {
2374   if (!LeftBrace)
2375     return;
2376 
2377   assert(LeftBrace->is(tok::l_brace));
2378 
2379   FormatToken *RightBrace = LeftBrace->MatchingParen;
2380   if (!RightBrace) {
2381     assert(!LeftBrace->Optional);
2382     return;
2383   }
2384 
2385   assert(RightBrace->is(tok::r_brace));
2386   assert(RightBrace->MatchingParen == LeftBrace);
2387   assert(LeftBrace->Optional == RightBrace->Optional);
2388 
2389   LeftBrace->Optional = true;
2390   RightBrace->Optional = true;
2391 }
2392 
2393 void UnwrappedLineParser::handleAttributes() {
2394   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2395   if (FormatTok->is(TT_AttributeMacro))
2396     nextToken();
2397   handleCppAttributes();
2398 }
2399 
2400 bool UnwrappedLineParser::handleCppAttributes() {
2401   // Handle [[likely]] / [[unlikely]] attributes.
2402   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2403     parseSquare();
2404     return true;
2405   }
2406   return false;
2407 }
2408 
2409 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2410                                                   bool KeepBraces) {
2411   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2412   nextToken();
2413   if (FormatTok->is(tok::exclaim))
2414     nextToken();
2415   if (FormatTok->is(tok::kw_consteval)) {
2416     nextToken();
2417   } else {
2418     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2419       nextToken();
2420     if (FormatTok->is(tok::l_paren))
2421       parseParens();
2422   }
2423   handleAttributes();
2424 
2425   bool NeedsUnwrappedLine = false;
2426   keepAncestorBraces();
2427 
2428   FormatToken *IfLeftBrace = nullptr;
2429   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2430 
2431   if (FormatTok->is(tok::l_brace)) {
2432     IfLeftBrace = FormatTok;
2433     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2434     IfBlockKind = parseBlock();
2435     if (Style.BraceWrapping.BeforeElse)
2436       addUnwrappedLine();
2437     else
2438       NeedsUnwrappedLine = true;
2439   } else {
2440     parseUnbracedBody();
2441   }
2442 
2443   bool KeepIfBraces = false;
2444   if (Style.RemoveBracesLLVM) {
2445     assert(!NestedTooDeep.empty());
2446     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2447                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2448                    IfBlockKind == IfStmtKind::IfElseIf;
2449   }
2450 
2451   FormatToken *ElseLeftBrace = nullptr;
2452   IfStmtKind Kind = IfStmtKind::IfOnly;
2453 
2454   if (FormatTok->is(tok::kw_else)) {
2455     if (Style.RemoveBracesLLVM) {
2456       NestedTooDeep.back() = false;
2457       Kind = IfStmtKind::IfElse;
2458     }
2459     nextToken();
2460     handleAttributes();
2461     if (FormatTok->is(tok::l_brace)) {
2462       ElseLeftBrace = FormatTok;
2463       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2464       if (parseBlock() == IfStmtKind::IfOnly)
2465         Kind = IfStmtKind::IfElseIf;
2466       addUnwrappedLine();
2467     } else if (FormatTok->is(tok::kw_if)) {
2468       FormatToken *Previous = Tokens->getPreviousToken();
2469       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2470       if (IsPrecededByComment) {
2471         addUnwrappedLine();
2472         ++Line->Level;
2473       }
2474       bool TooDeep = true;
2475       if (Style.RemoveBracesLLVM) {
2476         Kind = IfStmtKind::IfElseIf;
2477         TooDeep = NestedTooDeep.pop_back_val();
2478       }
2479       ElseLeftBrace =
2480           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2481       if (Style.RemoveBracesLLVM)
2482         NestedTooDeep.push_back(TooDeep);
2483       if (IsPrecededByComment)
2484         --Line->Level;
2485     } else {
2486       parseUnbracedBody(/*CheckEOF=*/true);
2487     }
2488   } else {
2489     if (Style.RemoveBracesLLVM)
2490       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2491     if (NeedsUnwrappedLine)
2492       addUnwrappedLine();
2493   }
2494 
2495   if (!Style.RemoveBracesLLVM)
2496     return nullptr;
2497 
2498   assert(!NestedTooDeep.empty());
2499   const bool KeepElseBraces =
2500       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2501 
2502   NestedTooDeep.pop_back();
2503 
2504   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2505     markOptionalBraces(IfLeftBrace);
2506     markOptionalBraces(ElseLeftBrace);
2507   } else if (IfLeftBrace) {
2508     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2509     if (IfRightBrace) {
2510       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2511       assert(!IfLeftBrace->Optional);
2512       assert(!IfRightBrace->Optional);
2513       IfLeftBrace->MatchingParen = nullptr;
2514       IfRightBrace->MatchingParen = nullptr;
2515     }
2516   }
2517 
2518   if (IfKind)
2519     *IfKind = Kind;
2520 
2521   return IfLeftBrace;
2522 }
2523 
2524 void UnwrappedLineParser::parseTryCatch() {
2525   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2526   nextToken();
2527   bool NeedsUnwrappedLine = false;
2528   if (FormatTok->is(tok::colon)) {
2529     // We are in a function try block, what comes is an initializer list.
2530     nextToken();
2531 
2532     // In case identifiers were removed by clang-tidy, what might follow is
2533     // multiple commas in sequence - before the first identifier.
2534     while (FormatTok->is(tok::comma))
2535       nextToken();
2536 
2537     while (FormatTok->is(tok::identifier)) {
2538       nextToken();
2539       if (FormatTok->is(tok::l_paren))
2540         parseParens();
2541       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2542           FormatTok->is(tok::l_brace)) {
2543         do {
2544           nextToken();
2545         } while (!FormatTok->is(tok::r_brace));
2546         nextToken();
2547       }
2548 
2549       // In case identifiers were removed by clang-tidy, what might follow is
2550       // multiple commas in sequence - after the first identifier.
2551       while (FormatTok->is(tok::comma))
2552         nextToken();
2553     }
2554   }
2555   // Parse try with resource.
2556   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2557     parseParens();
2558 
2559   keepAncestorBraces();
2560 
2561   if (FormatTok->is(tok::l_brace)) {
2562     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2563     parseBlock();
2564     if (Style.BraceWrapping.BeforeCatch)
2565       addUnwrappedLine();
2566     else
2567       NeedsUnwrappedLine = true;
2568   } else if (!FormatTok->is(tok::kw_catch)) {
2569     // The C++ standard requires a compound-statement after a try.
2570     // If there's none, we try to assume there's a structuralElement
2571     // and try to continue.
2572     addUnwrappedLine();
2573     ++Line->Level;
2574     parseStructuralElement();
2575     --Line->Level;
2576   }
2577   while (true) {
2578     if (FormatTok->is(tok::at))
2579       nextToken();
2580     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2581                              tok::kw___finally) ||
2582           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2583            FormatTok->is(Keywords.kw_finally)) ||
2584           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2585            FormatTok->isObjCAtKeyword(tok::objc_finally))))
2586       break;
2587     nextToken();
2588     while (FormatTok->isNot(tok::l_brace)) {
2589       if (FormatTok->is(tok::l_paren)) {
2590         parseParens();
2591         continue;
2592       }
2593       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2594         if (Style.RemoveBracesLLVM)
2595           NestedTooDeep.pop_back();
2596         return;
2597       }
2598       nextToken();
2599     }
2600     NeedsUnwrappedLine = false;
2601     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2602     parseBlock();
2603     if (Style.BraceWrapping.BeforeCatch)
2604       addUnwrappedLine();
2605     else
2606       NeedsUnwrappedLine = true;
2607   }
2608 
2609   if (Style.RemoveBracesLLVM)
2610     NestedTooDeep.pop_back();
2611 
2612   if (NeedsUnwrappedLine)
2613     addUnwrappedLine();
2614 }
2615 
2616 void UnwrappedLineParser::parseNamespace() {
2617   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2618          "'namespace' expected");
2619 
2620   const FormatToken &InitialToken = *FormatTok;
2621   nextToken();
2622   if (InitialToken.is(TT_NamespaceMacro)) {
2623     parseParens();
2624   } else {
2625     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2626                               tok::l_square, tok::period, tok::l_paren) ||
2627            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2628       if (FormatTok->is(tok::l_square))
2629         parseSquare();
2630       else if (FormatTok->is(tok::l_paren))
2631         parseParens();
2632       else
2633         nextToken();
2634   }
2635   if (FormatTok->is(tok::l_brace)) {
2636     if (ShouldBreakBeforeBrace(Style, InitialToken))
2637       addUnwrappedLine();
2638 
2639     unsigned AddLevels =
2640         Style.NamespaceIndentation == FormatStyle::NI_All ||
2641                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2642                  DeclarationScopeStack.size() > 1)
2643             ? 1u
2644             : 0u;
2645     bool ManageWhitesmithsBraces =
2646         AddLevels == 0u &&
2647         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2648 
2649     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2650     // the whole block.
2651     if (ManageWhitesmithsBraces)
2652       ++Line->Level;
2653 
2654     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2655                /*MunchSemi=*/true,
2656                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2657 
2658     // Munch the semicolon after a namespace. This is more common than one would
2659     // think. Putting the semicolon into its own line is very ugly.
2660     if (FormatTok->is(tok::semi))
2661       nextToken();
2662 
2663     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2664 
2665     if (ManageWhitesmithsBraces)
2666       --Line->Level;
2667   }
2668   // FIXME: Add error handling.
2669 }
2670 
2671 void UnwrappedLineParser::parseNew() {
2672   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2673   nextToken();
2674 
2675   if (Style.isCSharp()) {
2676     do {
2677       if (FormatTok->is(tok::l_brace))
2678         parseBracedList();
2679 
2680       if (FormatTok->isOneOf(tok::semi, tok::comma))
2681         return;
2682 
2683       nextToken();
2684     } while (!eof());
2685   }
2686 
2687   if (Style.Language != FormatStyle::LK_Java)
2688     return;
2689 
2690   // In Java, we can parse everything up to the parens, which aren't optional.
2691   do {
2692     // There should not be a ;, { or } before the new's open paren.
2693     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2694       return;
2695 
2696     // Consume the parens.
2697     if (FormatTok->is(tok::l_paren)) {
2698       parseParens();
2699 
2700       // If there is a class body of an anonymous class, consume that as child.
2701       if (FormatTok->is(tok::l_brace))
2702         parseChildBlock();
2703       return;
2704     }
2705     nextToken();
2706   } while (!eof());
2707 }
2708 
2709 void UnwrappedLineParser::parseForOrWhileLoop() {
2710   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2711          "'for', 'while' or foreach macro expected");
2712   nextToken();
2713   // JS' for await ( ...
2714   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2715     nextToken();
2716   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2717     nextToken();
2718   if (FormatTok->is(tok::l_paren))
2719     parseParens();
2720 
2721   keepAncestorBraces();
2722 
2723   if (FormatTok->is(tok::l_brace)) {
2724     FormatToken *LeftBrace = FormatTok;
2725     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2726     parseBlock();
2727     if (Style.RemoveBracesLLVM) {
2728       assert(!NestedTooDeep.empty());
2729       if (!NestedTooDeep.back())
2730         markOptionalBraces(LeftBrace);
2731     }
2732     addUnwrappedLine();
2733   } else {
2734     parseUnbracedBody();
2735   }
2736 
2737   if (Style.RemoveBracesLLVM)
2738     NestedTooDeep.pop_back();
2739 }
2740 
2741 void UnwrappedLineParser::parseDoWhile() {
2742   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2743   nextToken();
2744 
2745   keepAncestorBraces();
2746 
2747   if (FormatTok->is(tok::l_brace)) {
2748     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2749     parseBlock();
2750     if (Style.BraceWrapping.BeforeWhile)
2751       addUnwrappedLine();
2752   } else {
2753     parseUnbracedBody();
2754   }
2755 
2756   if (Style.RemoveBracesLLVM)
2757     NestedTooDeep.pop_back();
2758 
2759   // FIXME: Add error handling.
2760   if (!FormatTok->is(tok::kw_while)) {
2761     addUnwrappedLine();
2762     return;
2763   }
2764 
2765   // If in Whitesmiths mode, the line with the while() needs to be indented
2766   // to the same level as the block.
2767   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2768     ++Line->Level;
2769 
2770   nextToken();
2771   parseStructuralElement();
2772 }
2773 
2774 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2775   nextToken();
2776   unsigned OldLineLevel = Line->Level;
2777   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2778     --Line->Level;
2779   if (LeftAlignLabel)
2780     Line->Level = 0;
2781 
2782   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2783       FormatTok->is(tok::l_brace)) {
2784 
2785     CompoundStatementIndenter Indenter(this, Line->Level,
2786                                        Style.BraceWrapping.AfterCaseLabel,
2787                                        Style.BraceWrapping.IndentBraces);
2788     parseBlock();
2789     if (FormatTok->is(tok::kw_break)) {
2790       if (Style.BraceWrapping.AfterControlStatement ==
2791           FormatStyle::BWACS_Always) {
2792         addUnwrappedLine();
2793         if (!Style.IndentCaseBlocks &&
2794             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2795           ++Line->Level;
2796       }
2797       parseStructuralElement();
2798     }
2799     addUnwrappedLine();
2800   } else {
2801     if (FormatTok->is(tok::semi))
2802       nextToken();
2803     addUnwrappedLine();
2804   }
2805   Line->Level = OldLineLevel;
2806   if (FormatTok->isNot(tok::l_brace)) {
2807     parseStructuralElement();
2808     addUnwrappedLine();
2809   }
2810 }
2811 
2812 void UnwrappedLineParser::parseCaseLabel() {
2813   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2814 
2815   // FIXME: fix handling of complex expressions here.
2816   do {
2817     nextToken();
2818   } while (!eof() && !FormatTok->is(tok::colon));
2819   parseLabel();
2820 }
2821 
2822 void UnwrappedLineParser::parseSwitch() {
2823   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2824   nextToken();
2825   if (FormatTok->is(tok::l_paren))
2826     parseParens();
2827 
2828   keepAncestorBraces();
2829 
2830   if (FormatTok->is(tok::l_brace)) {
2831     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2832     parseBlock();
2833     addUnwrappedLine();
2834   } else {
2835     addUnwrappedLine();
2836     ++Line->Level;
2837     parseStructuralElement();
2838     --Line->Level;
2839   }
2840 
2841   if (Style.RemoveBracesLLVM)
2842     NestedTooDeep.pop_back();
2843 }
2844 
2845 // Operators that can follow a C variable.
2846 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
2847   switch (kind) {
2848   case tok::ampamp:
2849   case tok::ampequal:
2850   case tok::arrow:
2851   case tok::caret:
2852   case tok::caretequal:
2853   case tok::comma:
2854   case tok::ellipsis:
2855   case tok::equal:
2856   case tok::equalequal:
2857   case tok::exclaim:
2858   case tok::exclaimequal:
2859   case tok::greater:
2860   case tok::greaterequal:
2861   case tok::greatergreater:
2862   case tok::greatergreaterequal:
2863   case tok::l_paren:
2864   case tok::l_square:
2865   case tok::less:
2866   case tok::lessequal:
2867   case tok::lessless:
2868   case tok::lesslessequal:
2869   case tok::minus:
2870   case tok::minusequal:
2871   case tok::minusminus:
2872   case tok::percent:
2873   case tok::percentequal:
2874   case tok::period:
2875   case tok::pipe:
2876   case tok::pipeequal:
2877   case tok::pipepipe:
2878   case tok::plus:
2879   case tok::plusequal:
2880   case tok::plusplus:
2881   case tok::question:
2882   case tok::r_brace:
2883   case tok::r_paren:
2884   case tok::r_square:
2885   case tok::semi:
2886   case tok::slash:
2887   case tok::slashequal:
2888   case tok::star:
2889   case tok::starequal:
2890     return true;
2891   default:
2892     return false;
2893   }
2894 }
2895 
2896 void UnwrappedLineParser::parseAccessSpecifier() {
2897   FormatToken *AccessSpecifierCandidate = FormatTok;
2898   nextToken();
2899   // Understand Qt's slots.
2900   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2901     nextToken();
2902   // Otherwise, we don't know what it is, and we'd better keep the next token.
2903   if (FormatTok->is(tok::colon)) {
2904     nextToken();
2905     addUnwrappedLine();
2906   } else if (!FormatTok->is(tok::coloncolon) &&
2907              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
2908     // Not a variable name nor namespace name.
2909     addUnwrappedLine();
2910   } else if (AccessSpecifierCandidate) {
2911     // Consider the access specifier to be a C identifier.
2912     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2913   }
2914 }
2915 
2916 /// \brief Parses a concept definition.
2917 /// \pre The current token has to be the concept keyword.
2918 ///
2919 /// Returns if either the concept has been completely parsed, or if it detects
2920 /// that the concept definition is incorrect.
2921 void UnwrappedLineParser::parseConcept() {
2922   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
2923   nextToken();
2924   if (!FormatTok->is(tok::identifier))
2925     return;
2926   nextToken();
2927   if (!FormatTok->is(tok::equal))
2928     return;
2929   nextToken();
2930   parseConstraintExpression();
2931   if (FormatTok->is(tok::semi))
2932     nextToken();
2933   addUnwrappedLine();
2934 }
2935 
2936 /// \brief Parses a requires, decides if it is a clause or an expression.
2937 /// \pre The current token has to be the requires keyword.
2938 /// \returns true if it parsed a clause.
2939 bool clang::format::UnwrappedLineParser::parseRequires() {
2940   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
2941   auto RequiresToken = FormatTok;
2942 
2943   // We try to guess if it is a requires clause, or a requires expression. For
2944   // that we first consume the keyword and check the next token.
2945   nextToken();
2946 
2947   switch (FormatTok->Tok.getKind()) {
2948   case tok::l_brace:
2949     // This can only be an expression, never a clause.
2950     parseRequiresExpression(RequiresToken);
2951     return false;
2952   case tok::l_paren:
2953     // Clauses and expression can start with a paren, it's unclear what we have.
2954     break;
2955   default:
2956     // All other tokens can only be a clause.
2957     parseRequiresClause(RequiresToken);
2958     return true;
2959   }
2960 
2961   // Looking forward we would have to decide if there are function declaration
2962   // like arguments to the requires expression:
2963   // requires (T t) {
2964   // Or there is a constraint expression for the requires clause:
2965   // requires (C<T> && ...
2966 
2967   // But first let's look behind.
2968   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
2969 
2970   if (!PreviousNonComment ||
2971       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
2972     // If there is no token, or an expression left brace, we are a requires
2973     // clause within a requires expression.
2974     parseRequiresClause(RequiresToken);
2975     return true;
2976   }
2977 
2978   switch (PreviousNonComment->Tok.getKind()) {
2979   case tok::greater:
2980   case tok::r_paren:
2981   case tok::kw_noexcept:
2982   case tok::kw_const:
2983     // This is a requires clause.
2984     parseRequiresClause(RequiresToken);
2985     return true;
2986   case tok::amp:
2987   case tok::ampamp: {
2988     // This can be either:
2989     // if (... && requires (T t) ...)
2990     // Or
2991     // void member(...) && requires (C<T> ...
2992     // We check the one token before that for a const:
2993     // void member(...) const && requires (C<T> ...
2994     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
2995     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
2996       parseRequiresClause(RequiresToken);
2997       return true;
2998     }
2999     break;
3000   }
3001   default:
3002     // It's an expression.
3003     parseRequiresExpression(RequiresToken);
3004     return false;
3005   }
3006 
3007   // Now we look forward and try to check if the paren content is a parameter
3008   // list. The parameters can be cv-qualified and contain references or
3009   // pointers.
3010   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3011   // of stuff: typename, const, *, &, &&, ::, identifiers.
3012 
3013   int NextTokenOffset = 1;
3014   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3015   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3016     ++NextTokenOffset;
3017     NextToken = Tokens->peekNextToken(NextTokenOffset);
3018   };
3019 
3020   bool FoundType = false;
3021   bool LastWasColonColon = false;
3022   int OpenAngles = 0;
3023 
3024   for (; NextTokenOffset < 50; PeekNext()) {
3025     switch (NextToken->Tok.getKind()) {
3026     case tok::kw_volatile:
3027     case tok::kw_const:
3028     case tok::comma:
3029       parseRequiresExpression(RequiresToken);
3030       return false;
3031     case tok::r_paren:
3032     case tok::pipepipe:
3033       parseRequiresClause(RequiresToken);
3034       return true;
3035     case tok::eof:
3036       // Break out of the loop.
3037       NextTokenOffset = 50;
3038       break;
3039     case tok::coloncolon:
3040       LastWasColonColon = true;
3041       break;
3042     case tok::identifier:
3043       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3044         parseRequiresExpression(RequiresToken);
3045         return false;
3046       }
3047       FoundType = true;
3048       LastWasColonColon = false;
3049       break;
3050     case tok::less:
3051       ++OpenAngles;
3052       break;
3053     case tok::greater:
3054       --OpenAngles;
3055       break;
3056     default:
3057       if (NextToken->isSimpleTypeSpecifier()) {
3058         parseRequiresExpression(RequiresToken);
3059         return false;
3060       }
3061       break;
3062     }
3063   }
3064 
3065   // This seems to be a complicated expression, just assume it's a clause.
3066   parseRequiresClause(RequiresToken);
3067   return true;
3068 }
3069 
3070 /// \brief Parses a requires clause.
3071 /// \param RequiresToken The requires keyword token, which starts this clause.
3072 /// \pre We need to be on the next token after the requires keyword.
3073 /// \sa parseRequiresExpression
3074 ///
3075 /// Returns if it either has finished parsing the clause, or it detects, that
3076 /// the clause is incorrect.
3077 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3078   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3079   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3080 
3081   // If there is no previous token, we are within a requires expression,
3082   // otherwise we will always have the template or function declaration in front
3083   // of it.
3084   bool InRequiresExpression =
3085       !RequiresToken->Previous ||
3086       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3087 
3088   RequiresToken->setFinalizedType(InRequiresExpression
3089                                       ? TT_RequiresClauseInARequiresExpression
3090                                       : TT_RequiresClause);
3091 
3092   parseConstraintExpression();
3093 
3094   if (!InRequiresExpression)
3095     FormatTok->Previous->ClosesRequiresClause = true;
3096 }
3097 
3098 /// \brief Parses a requires expression.
3099 /// \param RequiresToken The requires keyword token, which starts this clause.
3100 /// \pre We need to be on the next token after the requires keyword.
3101 /// \sa parseRequiresClause
3102 ///
3103 /// Returns if it either has finished parsing the expression, or it detects,
3104 /// that the expression is incorrect.
3105 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3106   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3107   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3108 
3109   RequiresToken->setFinalizedType(TT_RequiresExpression);
3110 
3111   if (FormatTok->is(tok::l_paren)) {
3112     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3113     parseParens();
3114   }
3115 
3116   if (FormatTok->is(tok::l_brace)) {
3117     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3118     parseChildBlock(/*CanContainBracedList=*/false,
3119                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3120   }
3121 }
3122 
3123 /// \brief Parses a constraint expression.
3124 ///
3125 /// This is either the definition of a concept, or the body of a requires
3126 /// clause. It returns, when the parsing is complete, or the expression is
3127 /// incorrect.
3128 void UnwrappedLineParser::parseConstraintExpression() {
3129   // The special handling for lambdas is needed since tryToParseLambda() eats a
3130   // token and if a requires expression is the last part of a requires clause
3131   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3132   // not set on the correct token. Thus we need to be aware if we even expect a
3133   // lambda to be possible.
3134   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3135   bool LambdaNextTimeAllowed = true;
3136   do {
3137     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3138 
3139     switch (FormatTok->Tok.getKind()) {
3140     case tok::kw_requires: {
3141       auto RequiresToken = FormatTok;
3142       nextToken();
3143       parseRequiresExpression(RequiresToken);
3144       break;
3145     }
3146 
3147     case tok::l_paren:
3148       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3149       break;
3150 
3151     case tok::l_square:
3152       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3153         return;
3154       break;
3155 
3156     case tok::kw_const:
3157     case tok::semi:
3158     case tok::kw_class:
3159     case tok::kw_struct:
3160     case tok::kw_union:
3161       return;
3162 
3163     case tok::l_brace:
3164       // Potential function body.
3165       return;
3166 
3167     case tok::ampamp:
3168     case tok::pipepipe:
3169       FormatTok->setFinalizedType(TT_BinaryOperator);
3170       nextToken();
3171       LambdaNextTimeAllowed = true;
3172       break;
3173 
3174     case tok::comma:
3175     case tok::comment:
3176       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3177       nextToken();
3178       break;
3179 
3180     case tok::kw_sizeof:
3181     case tok::greater:
3182     case tok::greaterequal:
3183     case tok::greatergreater:
3184     case tok::less:
3185     case tok::lessequal:
3186     case tok::lessless:
3187     case tok::equalequal:
3188     case tok::exclaim:
3189     case tok::exclaimequal:
3190     case tok::plus:
3191     case tok::minus:
3192     case tok::star:
3193     case tok::slash:
3194     case tok::kw_decltype:
3195       LambdaNextTimeAllowed = true;
3196       // Just eat them.
3197       nextToken();
3198       break;
3199 
3200     case tok::numeric_constant:
3201     case tok::coloncolon:
3202     case tok::kw_true:
3203     case tok::kw_false:
3204       // Just eat them.
3205       nextToken();
3206       break;
3207 
3208     case tok::kw_static_cast:
3209     case tok::kw_const_cast:
3210     case tok::kw_reinterpret_cast:
3211     case tok::kw_dynamic_cast:
3212       nextToken();
3213       if (!FormatTok->is(tok::less))
3214         return;
3215 
3216       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3217                       /*ClosingBraceKind=*/tok::greater);
3218       break;
3219 
3220     case tok::kw_bool:
3221       // bool is only allowed if it is directly followed by a paren for a cast:
3222       // concept C = bool(...);
3223       // and bool is the only type, all other types as cast must be inside a
3224       // cast to bool an thus are handled by the other cases.
3225       nextToken();
3226       if (FormatTok->isNot(tok::l_paren))
3227         return;
3228       parseParens();
3229       break;
3230 
3231     default:
3232       if (!FormatTok->Tok.getIdentifierInfo()) {
3233         // Identifiers are part of the default case, we check for more then
3234         // tok::identifier to handle builtin type traits.
3235         return;
3236       }
3237 
3238       // We need to differentiate identifiers for a template deduction guide,
3239       // variables, or function return types (the constraint expression has
3240       // ended before that), and basically all other cases. But it's easier to
3241       // check the other way around.
3242       assert(FormatTok->Previous);
3243       switch (FormatTok->Previous->Tok.getKind()) {
3244       case tok::coloncolon:  // Nested identifier.
3245       case tok::ampamp:      // Start of a function or variable for the
3246       case tok::pipepipe:    // constraint expression.
3247       case tok::kw_requires: // Initial identifier of a requires clause.
3248       case tok::equal:       // Initial identifier of a concept declaration.
3249         break;
3250       default:
3251         return;
3252       }
3253 
3254       // Read identifier with optional template declaration.
3255       nextToken();
3256       if (FormatTok->is(tok::less))
3257         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3258                         /*ClosingBraceKind=*/tok::greater);
3259       break;
3260     }
3261   } while (!eof());
3262 }
3263 
3264 bool UnwrappedLineParser::parseEnum() {
3265   const FormatToken &InitialToken = *FormatTok;
3266 
3267   // Won't be 'enum' for NS_ENUMs.
3268   if (FormatTok->is(tok::kw_enum))
3269     nextToken();
3270 
3271   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3272   // declarations. An "enum" keyword followed by a colon would be a syntax
3273   // error and thus assume it is just an identifier.
3274   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3275     return false;
3276 
3277   // In protobuf, "enum" can be used as a field name.
3278   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3279     return false;
3280 
3281   // Eat up enum class ...
3282   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3283     nextToken();
3284 
3285   while (FormatTok->Tok.getIdentifierInfo() ||
3286          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3287                             tok::greater, tok::comma, tok::question)) {
3288     nextToken();
3289     // We can have macros or attributes in between 'enum' and the enum name.
3290     if (FormatTok->is(tok::l_paren))
3291       parseParens();
3292     if (FormatTok->is(tok::identifier)) {
3293       nextToken();
3294       // If there are two identifiers in a row, this is likely an elaborate
3295       // return type. In Java, this can be "implements", etc.
3296       if (Style.isCpp() && FormatTok->is(tok::identifier))
3297         return false;
3298     }
3299   }
3300 
3301   // Just a declaration or something is wrong.
3302   if (FormatTok->isNot(tok::l_brace))
3303     return true;
3304   FormatTok->setFinalizedType(TT_EnumLBrace);
3305   FormatTok->setBlockKind(BK_Block);
3306 
3307   if (Style.Language == FormatStyle::LK_Java) {
3308     // Java enums are different.
3309     parseJavaEnumBody();
3310     return true;
3311   }
3312   if (Style.Language == FormatStyle::LK_Proto) {
3313     parseBlock(/*MustBeDeclaration=*/true);
3314     return true;
3315   }
3316 
3317   if (!Style.AllowShortEnumsOnASingleLine &&
3318       ShouldBreakBeforeBrace(Style, InitialToken))
3319     addUnwrappedLine();
3320   // Parse enum body.
3321   nextToken();
3322   if (!Style.AllowShortEnumsOnASingleLine) {
3323     addUnwrappedLine();
3324     Line->Level += 1;
3325   }
3326   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3327                                    /*IsEnum=*/true);
3328   if (!Style.AllowShortEnumsOnASingleLine)
3329     Line->Level -= 1;
3330   if (HasError) {
3331     if (FormatTok->is(tok::semi))
3332       nextToken();
3333     addUnwrappedLine();
3334   }
3335   return true;
3336 
3337   // There is no addUnwrappedLine() here so that we fall through to parsing a
3338   // structural element afterwards. Thus, in "enum A {} n, m;",
3339   // "} n, m;" will end up in one unwrapped line.
3340 }
3341 
3342 bool UnwrappedLineParser::parseStructLike() {
3343   // parseRecord falls through and does not yet add an unwrapped line as a
3344   // record declaration or definition can start a structural element.
3345   parseRecord();
3346   // This does not apply to Java, JavaScript and C#.
3347   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3348       Style.isCSharp()) {
3349     if (FormatTok->is(tok::semi))
3350       nextToken();
3351     addUnwrappedLine();
3352     return true;
3353   }
3354   return false;
3355 }
3356 
3357 namespace {
3358 // A class used to set and restore the Token position when peeking
3359 // ahead in the token source.
3360 class ScopedTokenPosition {
3361   unsigned StoredPosition;
3362   FormatTokenSource *Tokens;
3363 
3364 public:
3365   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3366     assert(Tokens && "Tokens expected to not be null");
3367     StoredPosition = Tokens->getPosition();
3368   }
3369 
3370   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3371 };
3372 } // namespace
3373 
3374 // Look to see if we have [[ by looking ahead, if
3375 // its not then rewind to the original position.
3376 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3377   ScopedTokenPosition AutoPosition(Tokens);
3378   FormatToken *Tok = Tokens->getNextToken();
3379   // We already read the first [ check for the second.
3380   if (!Tok->is(tok::l_square))
3381     return false;
3382   // Double check that the attribute is just something
3383   // fairly simple.
3384   while (Tok->isNot(tok::eof)) {
3385     if (Tok->is(tok::r_square))
3386       break;
3387     Tok = Tokens->getNextToken();
3388   }
3389   if (Tok->is(tok::eof))
3390     return false;
3391   Tok = Tokens->getNextToken();
3392   if (!Tok->is(tok::r_square))
3393     return false;
3394   Tok = Tokens->getNextToken();
3395   if (Tok->is(tok::semi))
3396     return false;
3397   return true;
3398 }
3399 
3400 void UnwrappedLineParser::parseJavaEnumBody() {
3401   // Determine whether the enum is simple, i.e. does not have a semicolon or
3402   // constants with class bodies. Simple enums can be formatted like braced
3403   // lists, contracted to a single line, etc.
3404   unsigned StoredPosition = Tokens->getPosition();
3405   bool IsSimple = true;
3406   FormatToken *Tok = Tokens->getNextToken();
3407   while (!Tok->is(tok::eof)) {
3408     if (Tok->is(tok::r_brace))
3409       break;
3410     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3411       IsSimple = false;
3412       break;
3413     }
3414     // FIXME: This will also mark enums with braces in the arguments to enum
3415     // constants as "not simple". This is probably fine in practice, though.
3416     Tok = Tokens->getNextToken();
3417   }
3418   FormatTok = Tokens->setPosition(StoredPosition);
3419 
3420   if (IsSimple) {
3421     nextToken();
3422     parseBracedList();
3423     addUnwrappedLine();
3424     return;
3425   }
3426 
3427   // Parse the body of a more complex enum.
3428   // First add a line for everything up to the "{".
3429   nextToken();
3430   addUnwrappedLine();
3431   ++Line->Level;
3432 
3433   // Parse the enum constants.
3434   while (FormatTok) {
3435     if (FormatTok->is(tok::l_brace)) {
3436       // Parse the constant's class body.
3437       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3438                  /*MunchSemi=*/false);
3439     } else if (FormatTok->is(tok::l_paren)) {
3440       parseParens();
3441     } else if (FormatTok->is(tok::comma)) {
3442       nextToken();
3443       addUnwrappedLine();
3444     } else if (FormatTok->is(tok::semi)) {
3445       nextToken();
3446       addUnwrappedLine();
3447       break;
3448     } else if (FormatTok->is(tok::r_brace)) {
3449       addUnwrappedLine();
3450       break;
3451     } else {
3452       nextToken();
3453     }
3454   }
3455 
3456   // Parse the class body after the enum's ";" if any.
3457   parseLevel(/*HasOpeningBrace=*/true, /*CanContainBracedList=*/true);
3458   nextToken();
3459   --Line->Level;
3460   addUnwrappedLine();
3461 }
3462 
3463 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3464   const FormatToken &InitialToken = *FormatTok;
3465   nextToken();
3466 
3467   // The actual identifier can be a nested name specifier, and in macros
3468   // it is often token-pasted.
3469   // An [[attribute]] can be before the identifier.
3470   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3471                             tok::kw___attribute, tok::kw___declspec,
3472                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3473          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3474           FormatTok->isOneOf(tok::period, tok::comma))) {
3475     if (Style.isJavaScript() &&
3476         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3477       // JavaScript/TypeScript supports inline object types in
3478       // extends/implements positions:
3479       //     class Foo implements {bar: number} { }
3480       nextToken();
3481       if (FormatTok->is(tok::l_brace)) {
3482         tryToParseBracedList();
3483         continue;
3484       }
3485     }
3486     bool IsNonMacroIdentifier =
3487         FormatTok->is(tok::identifier) &&
3488         FormatTok->TokenText != FormatTok->TokenText.upper();
3489     nextToken();
3490     // We can have macros or attributes in between 'class' and the class name.
3491     if (!IsNonMacroIdentifier) {
3492       if (FormatTok->is(tok::l_paren)) {
3493         parseParens();
3494       } else if (FormatTok->is(TT_AttributeSquare)) {
3495         parseSquare();
3496         // Consume the closing TT_AttributeSquare.
3497         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3498           nextToken();
3499       }
3500     }
3501   }
3502 
3503   // Note that parsing away template declarations here leads to incorrectly
3504   // accepting function declarations as record declarations.
3505   // In general, we cannot solve this problem. Consider:
3506   // class A<int> B() {}
3507   // which can be a function definition or a class definition when B() is a
3508   // macro. If we find enough real-world cases where this is a problem, we
3509   // can parse for the 'template' keyword in the beginning of the statement,
3510   // and thus rule out the record production in case there is no template
3511   // (this would still leave us with an ambiguity between template function
3512   // and class declarations).
3513   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3514     while (!eof()) {
3515       if (FormatTok->is(tok::l_brace)) {
3516         calculateBraceTypes(/*ExpectClassBody=*/true);
3517         if (!tryToParseBracedList())
3518           break;
3519       }
3520       if (FormatTok->is(tok::l_square)) {
3521         FormatToken *Previous = FormatTok->Previous;
3522         if (!Previous ||
3523             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3524           // Don't try parsing a lambda if we had a closing parenthesis before,
3525           // it was probably a pointer to an array: int (*)[].
3526           if (!tryToParseLambda())
3527             break;
3528         }
3529       }
3530       if (FormatTok->is(tok::semi))
3531         return;
3532       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3533         addUnwrappedLine();
3534         nextToken();
3535         parseCSharpGenericTypeConstraint();
3536         break;
3537       }
3538       nextToken();
3539     }
3540   }
3541 
3542   auto GetBraceType = [](const FormatToken &RecordTok) {
3543     switch (RecordTok.Tok.getKind()) {
3544     case tok::kw_class:
3545       return TT_ClassLBrace;
3546     case tok::kw_struct:
3547       return TT_StructLBrace;
3548     case tok::kw_union:
3549       return TT_UnionLBrace;
3550     default:
3551       // Useful for e.g. interface.
3552       return TT_RecordLBrace;
3553     }
3554   };
3555   if (FormatTok->is(tok::l_brace)) {
3556     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3557     if (ParseAsExpr) {
3558       parseChildBlock();
3559     } else {
3560       if (ShouldBreakBeforeBrace(Style, InitialToken))
3561         addUnwrappedLine();
3562 
3563       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3564       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3565     }
3566   }
3567   // There is no addUnwrappedLine() here so that we fall through to parsing a
3568   // structural element afterwards. Thus, in "class A {} n, m;",
3569   // "} n, m;" will end up in one unwrapped line.
3570 }
3571 
3572 void UnwrappedLineParser::parseObjCMethod() {
3573   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3574          "'(' or identifier expected.");
3575   do {
3576     if (FormatTok->is(tok::semi)) {
3577       nextToken();
3578       addUnwrappedLine();
3579       return;
3580     } else if (FormatTok->is(tok::l_brace)) {
3581       if (Style.BraceWrapping.AfterFunction)
3582         addUnwrappedLine();
3583       parseBlock();
3584       addUnwrappedLine();
3585       return;
3586     } else {
3587       nextToken();
3588     }
3589   } while (!eof());
3590 }
3591 
3592 void UnwrappedLineParser::parseObjCProtocolList() {
3593   assert(FormatTok->is(tok::less) && "'<' expected.");
3594   do {
3595     nextToken();
3596     // Early exit in case someone forgot a close angle.
3597     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3598         FormatTok->isObjCAtKeyword(tok::objc_end))
3599       return;
3600   } while (!eof() && FormatTok->isNot(tok::greater));
3601   nextToken(); // Skip '>'.
3602 }
3603 
3604 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3605   do {
3606     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3607       nextToken();
3608       addUnwrappedLine();
3609       break;
3610     }
3611     if (FormatTok->is(tok::l_brace)) {
3612       parseBlock();
3613       // In ObjC interfaces, nothing should be following the "}".
3614       addUnwrappedLine();
3615     } else if (FormatTok->is(tok::r_brace)) {
3616       // Ignore stray "}". parseStructuralElement doesn't consume them.
3617       nextToken();
3618       addUnwrappedLine();
3619     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3620       nextToken();
3621       parseObjCMethod();
3622     } else {
3623       parseStructuralElement();
3624     }
3625   } while (!eof());
3626 }
3627 
3628 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3629   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3630          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3631   nextToken();
3632   nextToken(); // interface name
3633 
3634   // @interface can be followed by a lightweight generic
3635   // specialization list, then either a base class or a category.
3636   if (FormatTok->is(tok::less))
3637     parseObjCLightweightGenerics();
3638   if (FormatTok->is(tok::colon)) {
3639     nextToken();
3640     nextToken(); // base class name
3641     // The base class can also have lightweight generics applied to it.
3642     if (FormatTok->is(tok::less))
3643       parseObjCLightweightGenerics();
3644   } else if (FormatTok->is(tok::l_paren))
3645     // Skip category, if present.
3646     parseParens();
3647 
3648   if (FormatTok->is(tok::less))
3649     parseObjCProtocolList();
3650 
3651   if (FormatTok->is(tok::l_brace)) {
3652     if (Style.BraceWrapping.AfterObjCDeclaration)
3653       addUnwrappedLine();
3654     parseBlock(/*MustBeDeclaration=*/true);
3655   }
3656 
3657   // With instance variables, this puts '}' on its own line.  Without instance
3658   // variables, this ends the @interface line.
3659   addUnwrappedLine();
3660 
3661   parseObjCUntilAtEnd();
3662 }
3663 
3664 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3665   assert(FormatTok->is(tok::less));
3666   // Unlike protocol lists, generic parameterizations support
3667   // nested angles:
3668   //
3669   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3670   //     NSObject <NSCopying, NSSecureCoding>
3671   //
3672   // so we need to count how many open angles we have left.
3673   unsigned NumOpenAngles = 1;
3674   do {
3675     nextToken();
3676     // Early exit in case someone forgot a close angle.
3677     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3678         FormatTok->isObjCAtKeyword(tok::objc_end))
3679       break;
3680     if (FormatTok->is(tok::less))
3681       ++NumOpenAngles;
3682     else if (FormatTok->is(tok::greater)) {
3683       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3684       --NumOpenAngles;
3685     }
3686   } while (!eof() && NumOpenAngles != 0);
3687   nextToken(); // Skip '>'.
3688 }
3689 
3690 // Returns true for the declaration/definition form of @protocol,
3691 // false for the expression form.
3692 bool UnwrappedLineParser::parseObjCProtocol() {
3693   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3694   nextToken();
3695 
3696   if (FormatTok->is(tok::l_paren))
3697     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3698     return false;
3699 
3700   // The definition/declaration form,
3701   // @protocol Foo
3702   // - (int)someMethod;
3703   // @end
3704 
3705   nextToken(); // protocol name
3706 
3707   if (FormatTok->is(tok::less))
3708     parseObjCProtocolList();
3709 
3710   // Check for protocol declaration.
3711   if (FormatTok->is(tok::semi)) {
3712     nextToken();
3713     addUnwrappedLine();
3714     return true;
3715   }
3716 
3717   addUnwrappedLine();
3718   parseObjCUntilAtEnd();
3719   return true;
3720 }
3721 
3722 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3723   bool IsImport = FormatTok->is(Keywords.kw_import);
3724   assert(IsImport || FormatTok->is(tok::kw_export));
3725   nextToken();
3726 
3727   // Consume the "default" in "export default class/function".
3728   if (FormatTok->is(tok::kw_default))
3729     nextToken();
3730 
3731   // Consume "async function", "function" and "default function", so that these
3732   // get parsed as free-standing JS functions, i.e. do not require a trailing
3733   // semicolon.
3734   if (FormatTok->is(Keywords.kw_async))
3735     nextToken();
3736   if (FormatTok->is(Keywords.kw_function)) {
3737     nextToken();
3738     return;
3739   }
3740 
3741   // For imports, `export *`, `export {...}`, consume the rest of the line up
3742   // to the terminating `;`. For everything else, just return and continue
3743   // parsing the structural element, i.e. the declaration or expression for
3744   // `export default`.
3745   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3746       !FormatTok->isStringLiteral())
3747     return;
3748 
3749   while (!eof()) {
3750     if (FormatTok->is(tok::semi))
3751       return;
3752     if (Line->Tokens.empty()) {
3753       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3754       // import statement should terminate.
3755       return;
3756     }
3757     if (FormatTok->is(tok::l_brace)) {
3758       FormatTok->setBlockKind(BK_Block);
3759       nextToken();
3760       parseBracedList();
3761     } else {
3762       nextToken();
3763     }
3764   }
3765 }
3766 
3767 void UnwrappedLineParser::parseStatementMacro() {
3768   nextToken();
3769   if (FormatTok->is(tok::l_paren))
3770     parseParens();
3771   if (FormatTok->is(tok::semi))
3772     nextToken();
3773   addUnwrappedLine();
3774 }
3775 
3776 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3777                                                  StringRef Prefix = "") {
3778   llvm::dbgs() << Prefix << "Line(" << Line.Level
3779                << ", FSC=" << Line.FirstStartColumn << ")"
3780                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3781   for (const auto &Node : Line.Tokens) {
3782     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3783                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3784                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3785   }
3786   for (const auto &Node : Line.Tokens)
3787     for (const auto &ChildNode : Node.Children)
3788       printDebugInfo(ChildNode, "\nChild: ");
3789 
3790   llvm::dbgs() << "\n";
3791 }
3792 
3793 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3794   if (Line->Tokens.empty())
3795     return;
3796   LLVM_DEBUG({
3797     if (CurrentLines == &Lines)
3798       printDebugInfo(*Line);
3799   });
3800 
3801   // If this line closes a block when in Whitesmiths mode, remember that
3802   // information so that the level can be decreased after the line is added.
3803   // This has to happen after the addition of the line since the line itself
3804   // needs to be indented.
3805   bool ClosesWhitesmithsBlock =
3806       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3807       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3808 
3809   CurrentLines->push_back(std::move(*Line));
3810   Line->Tokens.clear();
3811   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3812   Line->FirstStartColumn = 0;
3813 
3814   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3815     --Line->Level;
3816   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3817     CurrentLines->append(
3818         std::make_move_iterator(PreprocessorDirectives.begin()),
3819         std::make_move_iterator(PreprocessorDirectives.end()));
3820     PreprocessorDirectives.clear();
3821   }
3822   // Disconnect the current token from the last token on the previous line.
3823   FormatTok->Previous = nullptr;
3824 }
3825 
3826 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
3827 
3828 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3829   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3830          FormatTok.NewlinesBefore > 0;
3831 }
3832 
3833 // Checks if \p FormatTok is a line comment that continues the line comment
3834 // section on \p Line.
3835 static bool
3836 continuesLineCommentSection(const FormatToken &FormatTok,
3837                             const UnwrappedLine &Line,
3838                             const llvm::Regex &CommentPragmasRegex) {
3839   if (Line.Tokens.empty())
3840     return false;
3841 
3842   StringRef IndentContent = FormatTok.TokenText;
3843   if (FormatTok.TokenText.startswith("//") ||
3844       FormatTok.TokenText.startswith("/*"))
3845     IndentContent = FormatTok.TokenText.substr(2);
3846   if (CommentPragmasRegex.match(IndentContent))
3847     return false;
3848 
3849   // If Line starts with a line comment, then FormatTok continues the comment
3850   // section if its original column is greater or equal to the original start
3851   // column of the line.
3852   //
3853   // Define the min column token of a line as follows: if a line ends in '{' or
3854   // contains a '{' followed by a line comment, then the min column token is
3855   // that '{'. Otherwise, the min column token of the line is the first token of
3856   // the line.
3857   //
3858   // If Line starts with a token other than a line comment, then FormatTok
3859   // continues the comment section if its original column is greater than the
3860   // original start column of the min column token of the line.
3861   //
3862   // For example, the second line comment continues the first in these cases:
3863   //
3864   // // first line
3865   // // second line
3866   //
3867   // and:
3868   //
3869   // // first line
3870   //  // second line
3871   //
3872   // and:
3873   //
3874   // int i; // first line
3875   //  // second line
3876   //
3877   // and:
3878   //
3879   // do { // first line
3880   //      // second line
3881   //   int i;
3882   // } while (true);
3883   //
3884   // and:
3885   //
3886   // enum {
3887   //   a, // first line
3888   //    // second line
3889   //   b
3890   // };
3891   //
3892   // The second line comment doesn't continue the first in these cases:
3893   //
3894   //   // first line
3895   //  // second line
3896   //
3897   // and:
3898   //
3899   // int i; // first line
3900   // // second line
3901   //
3902   // and:
3903   //
3904   // do { // first line
3905   //   // second line
3906   //   int i;
3907   // } while (true);
3908   //
3909   // and:
3910   //
3911   // enum {
3912   //   a, // first line
3913   //   // second line
3914   // };
3915   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3916 
3917   // Scan for '{//'. If found, use the column of '{' as a min column for line
3918   // comment section continuation.
3919   const FormatToken *PreviousToken = nullptr;
3920   for (const UnwrappedLineNode &Node : Line.Tokens) {
3921     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3922         isLineComment(*Node.Tok)) {
3923       MinColumnToken = PreviousToken;
3924       break;
3925     }
3926     PreviousToken = Node.Tok;
3927 
3928     // Grab the last newline preceding a token in this unwrapped line.
3929     if (Node.Tok->NewlinesBefore > 0)
3930       MinColumnToken = Node.Tok;
3931   }
3932   if (PreviousToken && PreviousToken->is(tok::l_brace))
3933     MinColumnToken = PreviousToken;
3934 
3935   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3936                               MinColumnToken);
3937 }
3938 
3939 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3940   bool JustComments = Line->Tokens.empty();
3941   for (FormatToken *Tok : CommentsBeforeNextToken) {
3942     // Line comments that belong to the same line comment section are put on the
3943     // same line since later we might want to reflow content between them.
3944     // Additional fine-grained breaking of line comment sections is controlled
3945     // by the class BreakableLineCommentSection in case it is desirable to keep
3946     // several line comment sections in the same unwrapped line.
3947     //
3948     // FIXME: Consider putting separate line comment sections as children to the
3949     // unwrapped line instead.
3950     Tok->ContinuesLineCommentSection =
3951         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3952     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3953       addUnwrappedLine();
3954     pushToken(Tok);
3955   }
3956   if (NewlineBeforeNext && JustComments)
3957     addUnwrappedLine();
3958   CommentsBeforeNextToken.clear();
3959 }
3960 
3961 void UnwrappedLineParser::nextToken(int LevelDifference) {
3962   if (eof())
3963     return;
3964   flushComments(isOnNewLine(*FormatTok));
3965   pushToken(FormatTok);
3966   FormatToken *Previous = FormatTok;
3967   if (!Style.isJavaScript())
3968     readToken(LevelDifference);
3969   else
3970     readTokenWithJavaScriptASI();
3971   FormatTok->Previous = Previous;
3972 }
3973 
3974 void UnwrappedLineParser::distributeComments(
3975     const SmallVectorImpl<FormatToken *> &Comments,
3976     const FormatToken *NextTok) {
3977   // Whether or not a line comment token continues a line is controlled by
3978   // the method continuesLineCommentSection, with the following caveat:
3979   //
3980   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3981   // that each comment line from the trail is aligned with the next token, if
3982   // the next token exists. If a trail exists, the beginning of the maximal
3983   // trail is marked as a start of a new comment section.
3984   //
3985   // For example in this code:
3986   //
3987   // int a; // line about a
3988   //   // line 1 about b
3989   //   // line 2 about b
3990   //   int b;
3991   //
3992   // the two lines about b form a maximal trail, so there are two sections, the
3993   // first one consisting of the single comment "// line about a" and the
3994   // second one consisting of the next two comments.
3995   if (Comments.empty())
3996     return;
3997   bool ShouldPushCommentsInCurrentLine = true;
3998   bool HasTrailAlignedWithNextToken = false;
3999   unsigned StartOfTrailAlignedWithNextToken = 0;
4000   if (NextTok) {
4001     // We are skipping the first element intentionally.
4002     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4003       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4004         HasTrailAlignedWithNextToken = true;
4005         StartOfTrailAlignedWithNextToken = i;
4006       }
4007     }
4008   }
4009   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4010     FormatToken *FormatTok = Comments[i];
4011     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4012       FormatTok->ContinuesLineCommentSection = false;
4013     } else {
4014       FormatTok->ContinuesLineCommentSection =
4015           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4016     }
4017     if (!FormatTok->ContinuesLineCommentSection &&
4018         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
4019       ShouldPushCommentsInCurrentLine = false;
4020     if (ShouldPushCommentsInCurrentLine)
4021       pushToken(FormatTok);
4022     else
4023       CommentsBeforeNextToken.push_back(FormatTok);
4024   }
4025 }
4026 
4027 void UnwrappedLineParser::readToken(int LevelDifference) {
4028   SmallVector<FormatToken *, 1> Comments;
4029   bool PreviousWasComment = false;
4030   bool FirstNonCommentOnLine = false;
4031   do {
4032     FormatTok = Tokens->getNextToken();
4033     assert(FormatTok);
4034     while (FormatTok->getType() == TT_ConflictStart ||
4035            FormatTok->getType() == TT_ConflictEnd ||
4036            FormatTok->getType() == TT_ConflictAlternative) {
4037       if (FormatTok->getType() == TT_ConflictStart)
4038         conditionalCompilationStart(/*Unreachable=*/false);
4039       else if (FormatTok->getType() == TT_ConflictAlternative)
4040         conditionalCompilationAlternative();
4041       else if (FormatTok->getType() == TT_ConflictEnd)
4042         conditionalCompilationEnd();
4043       FormatTok = Tokens->getNextToken();
4044       FormatTok->MustBreakBefore = true;
4045     }
4046 
4047     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4048                                       const FormatToken &Tok,
4049                                       bool PreviousWasComment) {
4050       auto IsFirstOnLine = [](const FormatToken &Tok) {
4051         return Tok.HasUnescapedNewline || Tok.IsFirst;
4052       };
4053 
4054       // Consider preprocessor directives preceded by block comments as first
4055       // on line.
4056       if (PreviousWasComment)
4057         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4058       return IsFirstOnLine(Tok);
4059     };
4060 
4061     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4062         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4063     PreviousWasComment = FormatTok->is(tok::comment);
4064 
4065     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4066            FirstNonCommentOnLine) {
4067       distributeComments(Comments, FormatTok);
4068       Comments.clear();
4069       // If there is an unfinished unwrapped line, we flush the preprocessor
4070       // directives only after that unwrapped line was finished later.
4071       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4072       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4073       assert((LevelDifference >= 0 ||
4074               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4075              "LevelDifference makes Line->Level negative");
4076       Line->Level += LevelDifference;
4077       // Comments stored before the preprocessor directive need to be output
4078       // before the preprocessor directive, at the same level as the
4079       // preprocessor directive, as we consider them to apply to the directive.
4080       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4081           PPBranchLevel > 0)
4082         Line->Level += PPBranchLevel;
4083       flushComments(isOnNewLine(*FormatTok));
4084       parsePPDirective();
4085       PreviousWasComment = FormatTok->is(tok::comment);
4086       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4087           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4088     }
4089 
4090     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4091         !Line->InPPDirective)
4092       continue;
4093 
4094     if (!FormatTok->is(tok::comment)) {
4095       distributeComments(Comments, FormatTok);
4096       Comments.clear();
4097       return;
4098     }
4099 
4100     Comments.push_back(FormatTok);
4101   } while (!eof());
4102 
4103   distributeComments(Comments, nullptr);
4104   Comments.clear();
4105 }
4106 
4107 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4108   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4109   if (MustBreakBeforeNextToken) {
4110     Line->Tokens.back().Tok->MustBreakBefore = true;
4111     MustBreakBeforeNextToken = false;
4112   }
4113 }
4114 
4115 } // end namespace format
4116 } // end namespace clang
4117