1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 #include <utility>
23 
24 #define DEBUG_TYPE "format-parser"
25 
26 namespace clang {
27 namespace format {
28 
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   virtual FormatToken *getPreviousToken() = 0;
39 
40   // Returns the token that would be returned by the next call to
41   // getNextToken().
42   virtual FormatToken *peekNextToken() = 0;
43 
44   // Returns the token that would be returned after the next N calls to
45   // getNextToken(). N needs to be greater than zero, and small enough that
46   // there are still tokens. Check for tok::eof with N-1 before calling it with
47   // N.
48   virtual FormatToken *peekNextToken(int N) = 0;
49 
50   // Returns whether we are at the end of the file.
51   // This can be different from whether getNextToken() returned an eof token
52   // when the FormatTokenSource is a view on a part of the token stream.
53   virtual bool isEOF() = 0;
54 
55   // Gets the current position in the token stream, to be used by setPosition().
56   virtual unsigned getPosition() = 0;
57 
58   // Resets the token stream to the state it was in when getPosition() returned
59   // Position, and return the token at that position in the stream.
60   virtual FormatToken *setPosition(unsigned Position) = 0;
61 };
62 
63 namespace {
64 
65 class ScopedDeclarationState {
66 public:
67   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
68                          bool MustBeDeclaration)
69       : Line(Line), Stack(Stack) {
70     Line.MustBeDeclaration = MustBeDeclaration;
71     Stack.push_back(MustBeDeclaration);
72   }
73   ~ScopedDeclarationState() {
74     Stack.pop_back();
75     if (!Stack.empty())
76       Line.MustBeDeclaration = Stack.back();
77     else
78       Line.MustBeDeclaration = true;
79   }
80 
81 private:
82   UnwrappedLine &Line;
83   llvm::BitVector &Stack;
84 };
85 
86 static bool isLineComment(const FormatToken &FormatTok) {
87   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
88 }
89 
90 // Checks if \p FormatTok is a line comment that continues the line comment
91 // \p Previous. The original column of \p MinColumnToken is used to determine
92 // whether \p FormatTok is indented enough to the right to continue \p Previous.
93 static bool continuesLineComment(const FormatToken &FormatTok,
94                                  const FormatToken *Previous,
95                                  const FormatToken *MinColumnToken) {
96   if (!Previous || !MinColumnToken)
97     return false;
98   unsigned MinContinueColumn =
99       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
100   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
101          isLineComment(*Previous) &&
102          FormatTok.OriginalColumn >= MinContinueColumn;
103 }
104 
105 class ScopedMacroState : public FormatTokenSource {
106 public:
107   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
108                    FormatToken *&ResetToken)
109       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
110         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
111         Token(nullptr), PreviousToken(nullptr) {
112     FakeEOF.Tok.startToken();
113     FakeEOF.Tok.setKind(tok::eof);
114     TokenSource = this;
115     Line.Level = 0;
116     Line.InPPDirective = true;
117   }
118 
119   ~ScopedMacroState() override {
120     TokenSource = PreviousTokenSource;
121     ResetToken = Token;
122     Line.InPPDirective = false;
123     Line.Level = PreviousLineLevel;
124   }
125 
126   FormatToken *getNextToken() override {
127     // The \c UnwrappedLineParser guards against this by never calling
128     // \c getNextToken() after it has encountered the first eof token.
129     assert(!eof());
130     PreviousToken = Token;
131     Token = PreviousTokenSource->getNextToken();
132     if (eof())
133       return &FakeEOF;
134     return Token;
135   }
136 
137   FormatToken *getPreviousToken() override {
138     return PreviousTokenSource->getPreviousToken();
139   }
140 
141   FormatToken *peekNextToken() override {
142     if (eof())
143       return &FakeEOF;
144     return PreviousTokenSource->peekNextToken();
145   }
146 
147   FormatToken *peekNextToken(int N) override {
148     assert(N > 0);
149     if (eof())
150       return &FakeEOF;
151     return PreviousTokenSource->peekNextToken(N);
152   }
153 
154   bool isEOF() override { return PreviousTokenSource->isEOF(); }
155 
156   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
157 
158   FormatToken *setPosition(unsigned Position) override {
159     PreviousToken = nullptr;
160     Token = PreviousTokenSource->setPosition(Position);
161     return Token;
162   }
163 
164 private:
165   bool eof() {
166     return Token && Token->HasUnescapedNewline &&
167            !continuesLineComment(*Token, PreviousToken,
168                                  /*MinColumnToken=*/PreviousToken);
169   }
170 
171   FormatToken FakeEOF;
172   UnwrappedLine &Line;
173   FormatTokenSource *&TokenSource;
174   FormatToken *&ResetToken;
175   unsigned PreviousLineLevel;
176   FormatTokenSource *PreviousTokenSource;
177 
178   FormatToken *Token;
179   FormatToken *PreviousToken;
180 };
181 
182 } // end anonymous namespace
183 
184 class ScopedLineState {
185 public:
186   ScopedLineState(UnwrappedLineParser &Parser,
187                   bool SwitchToPreprocessorLines = false)
188       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
189     if (SwitchToPreprocessorLines)
190       Parser.CurrentLines = &Parser.PreprocessorDirectives;
191     else if (!Parser.Line->Tokens.empty())
192       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
193     PreBlockLine = std::move(Parser.Line);
194     Parser.Line = std::make_unique<UnwrappedLine>();
195     Parser.Line->Level = PreBlockLine->Level;
196     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
197   }
198 
199   ~ScopedLineState() {
200     if (!Parser.Line->Tokens.empty())
201       Parser.addUnwrappedLine();
202     assert(Parser.Line->Tokens.empty());
203     Parser.Line = std::move(PreBlockLine);
204     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
205       Parser.MustBreakBeforeNextToken = true;
206     Parser.CurrentLines = OriginalLines;
207   }
208 
209 private:
210   UnwrappedLineParser &Parser;
211 
212   std::unique_ptr<UnwrappedLine> PreBlockLine;
213   SmallVectorImpl<UnwrappedLine> *OriginalLines;
214 };
215 
216 class CompoundStatementIndenter {
217 public:
218   CompoundStatementIndenter(UnwrappedLineParser *Parser,
219                             const FormatStyle &Style, unsigned &LineLevel)
220       : CompoundStatementIndenter(Parser, LineLevel,
221                                   Style.BraceWrapping.AfterControlStatement,
222                                   Style.BraceWrapping.IndentBraces) {}
223   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
224                             bool WrapBrace, bool IndentBrace)
225       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
226     if (WrapBrace)
227       Parser->addUnwrappedLine();
228     if (IndentBrace)
229       ++LineLevel;
230   }
231   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
232 
233 private:
234   unsigned &LineLevel;
235   unsigned OldLineLevel;
236 };
237 
238 namespace {
239 
240 class IndexedTokenSource : public FormatTokenSource {
241 public:
242   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
243       : Tokens(Tokens), Position(-1) {}
244 
245   FormatToken *getNextToken() override {
246     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
247       LLVM_DEBUG({
248         llvm::dbgs() << "Next ";
249         dbgToken(Position);
250       });
251       return Tokens[Position];
252     }
253     ++Position;
254     LLVM_DEBUG({
255       llvm::dbgs() << "Next ";
256       dbgToken(Position);
257     });
258     return Tokens[Position];
259   }
260 
261   FormatToken *getPreviousToken() override {
262     return Position > 0 ? Tokens[Position - 1] : nullptr;
263   }
264 
265   FormatToken *peekNextToken() override {
266     int Next = Position + 1;
267     LLVM_DEBUG({
268       llvm::dbgs() << "Peeking ";
269       dbgToken(Next);
270     });
271     return Tokens[Next];
272   }
273 
274   FormatToken *peekNextToken(int N) override {
275     assert(N > 0);
276     int Next = Position + N;
277     LLVM_DEBUG({
278       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
279       dbgToken(Next);
280     });
281     return Tokens[Next];
282   }
283 
284   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
285 
286   unsigned getPosition() override {
287     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
288     assert(Position >= 0);
289     return Position;
290   }
291 
292   FormatToken *setPosition(unsigned P) override {
293     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
294     Position = P;
295     return Tokens[Position];
296   }
297 
298   void reset() { Position = -1; }
299 
300 private:
301   void dbgToken(int Position, llvm::StringRef Indent = "") {
302     FormatToken *Tok = Tokens[Position];
303     llvm::dbgs() << Indent << "[" << Position
304                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
305                  << ", Macro: " << !!Tok->MacroCtx << "\n";
306   }
307 
308   ArrayRef<FormatToken *> Tokens;
309   int Position;
310 };
311 
312 } // end anonymous namespace
313 
314 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
315                                          const AdditionalKeywords &Keywords,
316                                          unsigned FirstStartColumn,
317                                          ArrayRef<FormatToken *> Tokens,
318                                          UnwrappedLineConsumer &Callback)
319     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
320       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
321       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
322       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
323       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
324                        ? IG_Rejected
325                        : IG_Inited),
326       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
327 
328 void UnwrappedLineParser::reset() {
329   PPBranchLevel = -1;
330   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
331                      ? IG_Rejected
332                      : IG_Inited;
333   IncludeGuardToken = nullptr;
334   Line.reset(new UnwrappedLine);
335   CommentsBeforeNextToken.clear();
336   FormatTok = nullptr;
337   MustBreakBeforeNextToken = false;
338   PreprocessorDirectives.clear();
339   CurrentLines = &Lines;
340   DeclarationScopeStack.clear();
341   NestedTooDeep.clear();
342   PPStack.clear();
343   Line->FirstStartColumn = FirstStartColumn;
344 }
345 
346 void UnwrappedLineParser::parse() {
347   IndexedTokenSource TokenSource(AllTokens);
348   Line->FirstStartColumn = FirstStartColumn;
349   do {
350     LLVM_DEBUG(llvm::dbgs() << "----\n");
351     reset();
352     Tokens = &TokenSource;
353     TokenSource.reset();
354 
355     readToken();
356     parseFile();
357 
358     // If we found an include guard then all preprocessor directives (other than
359     // the guard) are over-indented by one.
360     if (IncludeGuard == IG_Found)
361       for (auto &Line : Lines)
362         if (Line.InPPDirective && Line.Level > 0)
363           --Line.Level;
364 
365     // Create line with eof token.
366     pushToken(FormatTok);
367     addUnwrappedLine();
368 
369     for (const UnwrappedLine &Line : Lines)
370       Callback.consumeUnwrappedLine(Line);
371 
372     Callback.finishRun();
373     Lines.clear();
374     while (!PPLevelBranchIndex.empty() &&
375            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
376       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
377       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
378     }
379     if (!PPLevelBranchIndex.empty()) {
380       ++PPLevelBranchIndex.back();
381       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
382       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
383     }
384   } while (!PPLevelBranchIndex.empty());
385 }
386 
387 void UnwrappedLineParser::parseFile() {
388   // The top-level context in a file always has declarations, except for pre-
389   // processor directives and JavaScript files.
390   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
391   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
392                                           MustBeDeclaration);
393   if (Style.Language == FormatStyle::LK_TextProto)
394     parseBracedList();
395   else
396     parseLevel(/*HasOpeningBrace=*/false, /*CanContainBracedList=*/true);
397   // Make sure to format the remaining tokens.
398   //
399   // LK_TextProto is special since its top-level is parsed as the body of a
400   // braced list, which does not necessarily have natural line separators such
401   // as a semicolon. Comments after the last entry that have been determined to
402   // not belong to that line, as in:
403   //   key: value
404   //   // endfile comment
405   // do not have a chance to be put on a line of their own until this point.
406   // Here we add this newline before end-of-file comments.
407   if (Style.Language == FormatStyle::LK_TextProto &&
408       !CommentsBeforeNextToken.empty())
409     addUnwrappedLine();
410   flushComments(true);
411   addUnwrappedLine();
412 }
413 
414 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
415   do {
416     switch (FormatTok->Tok.getKind()) {
417     case tok::l_brace:
418       return;
419     default:
420       if (FormatTok->is(Keywords.kw_where)) {
421         addUnwrappedLine();
422         nextToken();
423         parseCSharpGenericTypeConstraint();
424         break;
425       }
426       nextToken();
427       break;
428     }
429   } while (!eof());
430 }
431 
432 void UnwrappedLineParser::parseCSharpAttribute() {
433   int UnpairedSquareBrackets = 1;
434   do {
435     switch (FormatTok->Tok.getKind()) {
436     case tok::r_square:
437       nextToken();
438       --UnpairedSquareBrackets;
439       if (UnpairedSquareBrackets == 0) {
440         addUnwrappedLine();
441         return;
442       }
443       break;
444     case tok::l_square:
445       ++UnpairedSquareBrackets;
446       nextToken();
447       break;
448     default:
449       nextToken();
450       break;
451     }
452   } while (!eof());
453 }
454 
455 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
456   if (!Lines.empty() && Lines.back().InPPDirective)
457     return true;
458 
459   const FormatToken *Previous = Tokens->getPreviousToken();
460   return Previous && Previous->is(tok::comment) &&
461          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
462 }
463 /// \brief Parses a level, that is ???.
464 /// \param HasOpeningBrace If that level is started by an opening brace.
465 /// \param CanContainBracedList If the content can contain (at any level) a
466 /// braced list.
467 /// \param NextLBracesType The type for left brace found in this level.
468 /// \returns true if a simple block, or false otherwise. (A simple block has a
469 /// single statement.)
470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace,
471                                      bool CanContainBracedList,
472                                      IfStmtKind *IfKind,
473                                      TokenType NextLBracesType) {
474   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
475                                   ? TT_BracedListLBrace
476                                   : TT_Unknown;
477   const bool IsPrecededByCommentOrPPDirective =
478       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
479   unsigned StatementCount = 0;
480   bool SwitchLabelEncountered = false;
481   do {
482     tok::TokenKind kind = FormatTok->Tok.getKind();
483     if (FormatTok->getType() == TT_MacroBlockBegin)
484       kind = tok::l_brace;
485     else if (FormatTok->getType() == TT_MacroBlockEnd)
486       kind = tok::r_brace;
487 
488     auto ParseDefault = [this, HasOpeningBrace, IfKind, NextLevelLBracesType,
489                          &StatementCount] {
490       parseStructuralElement(IfKind, /*IsTopLevel=*/!HasOpeningBrace,
491                              /*NextLBracesType=*/NextLevelLBracesType);
492       ++StatementCount;
493       assert(StatementCount > 0 && "StatementCount overflow!");
494     };
495 
496     switch (kind) {
497     case tok::comment:
498       nextToken();
499       addUnwrappedLine();
500       break;
501     case tok::l_brace:
502       if (NextLBracesType != TT_Unknown)
503         FormatTok->setType(NextLBracesType);
504       else if (FormatTok->Previous &&
505                FormatTok->Previous->ClosesRequiresClause) {
506         // We need the 'default' case here to correctly parse a function
507         // l_brace.
508         ParseDefault();
509         continue;
510       }
511       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
512           tryToParseBracedList())
513         continue;
514       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
515                  /*MunchSemi=*/true, /*UnindentWhitesmithBraces=*/false,
516                  CanContainBracedList,
517                  /*NextLBracesType=*/NextLBracesType);
518       ++StatementCount;
519       assert(StatementCount > 0 && "StatementCount overflow!");
520       addUnwrappedLine();
521       break;
522     case tok::r_brace:
523       if (HasOpeningBrace) {
524         if (!Style.RemoveBracesLLVM)
525           return false;
526         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 ||
527             IsPrecededByCommentOrPPDirective ||
528             precededByCommentOrPPDirective())
529           return false;
530         const FormatToken *Next = Tokens->peekNextToken();
531         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
532       }
533       nextToken();
534       addUnwrappedLine();
535       break;
536     case tok::kw_default: {
537       unsigned StoredPosition = Tokens->getPosition();
538       FormatToken *Next;
539       do {
540         Next = Tokens->getNextToken();
541         assert(Next);
542       } while (Next->is(tok::comment));
543       FormatTok = Tokens->setPosition(StoredPosition);
544       if (Next->isNot(tok::colon)) {
545         // default not followed by ':' is not a case label; treat it like
546         // an identifier.
547         parseStructuralElement();
548         break;
549       }
550       // Else, if it is 'default:', fall through to the case handling.
551       LLVM_FALLTHROUGH;
552     }
553     case tok::kw_case:
554       if (Style.isJavaScript() && Line->MustBeDeclaration) {
555         // A 'case: string' style field declaration.
556         parseStructuralElement();
557         break;
558       }
559       if (!SwitchLabelEncountered &&
560           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
561         ++Line->Level;
562       SwitchLabelEncountered = true;
563       parseStructuralElement();
564       break;
565     case tok::l_square:
566       if (Style.isCSharp()) {
567         nextToken();
568         parseCSharpAttribute();
569         break;
570       }
571       LLVM_FALLTHROUGH;
572     default:
573       ParseDefault();
574       break;
575     }
576   } while (!eof());
577   return false;
578 }
579 
580 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
581   // We'll parse forward through the tokens until we hit
582   // a closing brace or eof - note that getNextToken() will
583   // parse macros, so this will magically work inside macro
584   // definitions, too.
585   unsigned StoredPosition = Tokens->getPosition();
586   FormatToken *Tok = FormatTok;
587   const FormatToken *PrevTok = Tok->Previous;
588   // Keep a stack of positions of lbrace tokens. We will
589   // update information about whether an lbrace starts a
590   // braced init list or a different block during the loop.
591   SmallVector<FormatToken *, 8> LBraceStack;
592   assert(Tok->Tok.is(tok::l_brace));
593   do {
594     // Get next non-comment token.
595     FormatToken *NextTok;
596     unsigned ReadTokens = 0;
597     do {
598       NextTok = Tokens->getNextToken();
599       ++ReadTokens;
600     } while (NextTok->is(tok::comment));
601 
602     switch (Tok->Tok.getKind()) {
603     case tok::l_brace:
604       if (Style.isJavaScript() && PrevTok) {
605         if (PrevTok->isOneOf(tok::colon, tok::less))
606           // A ':' indicates this code is in a type, or a braced list
607           // following a label in an object literal ({a: {b: 1}}).
608           // A '<' could be an object used in a comparison, but that is nonsense
609           // code (can never return true), so more likely it is a generic type
610           // argument (`X<{a: string; b: number}>`).
611           // The code below could be confused by semicolons between the
612           // individual members in a type member list, which would normally
613           // trigger BK_Block. In both cases, this must be parsed as an inline
614           // braced init.
615           Tok->setBlockKind(BK_BracedInit);
616         else if (PrevTok->is(tok::r_paren))
617           // `) { }` can only occur in function or method declarations in JS.
618           Tok->setBlockKind(BK_Block);
619       } else {
620         Tok->setBlockKind(BK_Unknown);
621       }
622       LBraceStack.push_back(Tok);
623       break;
624     case tok::r_brace:
625       if (LBraceStack.empty())
626         break;
627       if (LBraceStack.back()->is(BK_Unknown)) {
628         bool ProbablyBracedList = false;
629         if (Style.Language == FormatStyle::LK_Proto) {
630           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
631         } else {
632           // Skip NextTok over preprocessor lines, otherwise we may not
633           // properly diagnose the block as a braced intializer
634           // if the comma separator appears after the pp directive.
635           while (NextTok->is(tok::hash)) {
636             ScopedMacroState MacroState(*Line, Tokens, NextTok);
637             do {
638               NextTok = Tokens->getNextToken();
639               ++ReadTokens;
640             } while (NextTok->isNot(tok::eof));
641           }
642 
643           // Using OriginalColumn to distinguish between ObjC methods and
644           // binary operators is a bit hacky.
645           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
646                                   NextTok->OriginalColumn == 0;
647 
648           // Try to detect a braced list. Note that regardless how we mark inner
649           // braces here, we will overwrite the BlockKind later if we parse a
650           // braced list (where all blocks inside are by default braced lists),
651           // or when we explicitly detect blocks (for example while parsing
652           // lambdas).
653 
654           // If we already marked the opening brace as braced list, the closing
655           // must also be part of it.
656           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
657 
658           ProbablyBracedList = ProbablyBracedList ||
659                                (Style.isJavaScript() &&
660                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
661                                                  Keywords.kw_as));
662           ProbablyBracedList = ProbablyBracedList ||
663                                (Style.isCpp() && NextTok->is(tok::l_paren));
664 
665           // If there is a comma, semicolon or right paren after the closing
666           // brace, we assume this is a braced initializer list.
667           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
668           // braced list in JS.
669           ProbablyBracedList =
670               ProbablyBracedList ||
671               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
672                                tok::r_paren, tok::r_square, tok::l_brace,
673                                tok::ellipsis);
674 
675           ProbablyBracedList =
676               ProbablyBracedList ||
677               (NextTok->is(tok::identifier) &&
678                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
679 
680           ProbablyBracedList = ProbablyBracedList ||
681                                (NextTok->is(tok::semi) &&
682                                 (!ExpectClassBody || LBraceStack.size() != 1));
683 
684           ProbablyBracedList =
685               ProbablyBracedList ||
686               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
687 
688           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
689             // We can have an array subscript after a braced init
690             // list, but C++11 attributes are expected after blocks.
691             NextTok = Tokens->getNextToken();
692             ++ReadTokens;
693             ProbablyBracedList = NextTok->isNot(tok::l_square);
694           }
695         }
696         if (ProbablyBracedList) {
697           Tok->setBlockKind(BK_BracedInit);
698           LBraceStack.back()->setBlockKind(BK_BracedInit);
699         } else {
700           Tok->setBlockKind(BK_Block);
701           LBraceStack.back()->setBlockKind(BK_Block);
702         }
703       }
704       LBraceStack.pop_back();
705       break;
706     case tok::identifier:
707       if (!Tok->is(TT_StatementMacro))
708         break;
709       LLVM_FALLTHROUGH;
710     case tok::at:
711     case tok::semi:
712     case tok::kw_if:
713     case tok::kw_while:
714     case tok::kw_for:
715     case tok::kw_switch:
716     case tok::kw_try:
717     case tok::kw___try:
718       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
719         LBraceStack.back()->setBlockKind(BK_Block);
720       break;
721     default:
722       break;
723     }
724     PrevTok = Tok;
725     Tok = NextTok;
726   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
727 
728   // Assume other blocks for all unclosed opening braces.
729   for (FormatToken *LBrace : LBraceStack)
730     if (LBrace->is(BK_Unknown))
731       LBrace->setBlockKind(BK_Block);
732 
733   FormatTok = Tokens->setPosition(StoredPosition);
734 }
735 
736 template <class T>
737 static inline void hash_combine(std::size_t &seed, const T &v) {
738   std::hash<T> hasher;
739   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
740 }
741 
742 size_t UnwrappedLineParser::computePPHash() const {
743   size_t h = 0;
744   for (const auto &i : PPStack) {
745     hash_combine(h, size_t(i.Kind));
746     hash_combine(h, i.Line);
747   }
748   return h;
749 }
750 
751 UnwrappedLineParser::IfStmtKind
752 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
753                                 bool MunchSemi, bool UnindentWhitesmithsBraces,
754                                 bool CanContainBracedList,
755                                 TokenType NextLBracesType) {
756   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
757          "'{' or macro block token expected");
758   FormatToken *Tok = FormatTok;
759   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
760   FormatTok->setBlockKind(BK_Block);
761 
762   // For Whitesmiths mode, jump to the next level prior to skipping over the
763   // braces.
764   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
765     ++Line->Level;
766 
767   size_t PPStartHash = computePPHash();
768 
769   unsigned InitialLevel = Line->Level;
770   nextToken(/*LevelDifference=*/AddLevels);
771 
772   if (MacroBlock && FormatTok->is(tok::l_paren))
773     parseParens();
774 
775   size_t NbPreprocessorDirectives =
776       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
777   addUnwrappedLine();
778   size_t OpeningLineIndex =
779       CurrentLines->empty()
780           ? (UnwrappedLine::kInvalidIndex)
781           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
782 
783   // Whitesmiths is weird here. The brace needs to be indented for the namespace
784   // block, but the block itself may not be indented depending on the style
785   // settings. This allows the format to back up one level in those cases.
786   if (UnindentWhitesmithsBraces)
787     --Line->Level;
788 
789   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
790                                           MustBeDeclaration);
791   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
792     Line->Level += AddLevels;
793 
794   IfStmtKind IfKind = IfStmtKind::NotIf;
795   const bool SimpleBlock = parseLevel(
796       /*HasOpeningBrace=*/true, CanContainBracedList, &IfKind, NextLBracesType);
797 
798   if (eof())
799     return IfKind;
800 
801   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
802                  : !FormatTok->is(tok::r_brace)) {
803     Line->Level = InitialLevel;
804     FormatTok->setBlockKind(BK_Block);
805     return IfKind;
806   }
807 
808   if (SimpleBlock && Tok->is(tok::l_brace)) {
809     assert(FormatTok->is(tok::r_brace));
810     const FormatToken *Previous = Tokens->getPreviousToken();
811     assert(Previous);
812     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
813       Tok->MatchingParen = FormatTok;
814       FormatTok->MatchingParen = Tok;
815     }
816   }
817 
818   size_t PPEndHash = computePPHash();
819 
820   // Munch the closing brace.
821   nextToken(/*LevelDifference=*/-AddLevels);
822 
823   if (MacroBlock && FormatTok->is(tok::l_paren))
824     parseParens();
825 
826   if (FormatTok->is(tok::kw_noexcept)) {
827     // A noexcept in a requires expression.
828     nextToken();
829   }
830 
831   if (FormatTok->is(tok::arrow)) {
832     // Following the } or noexcept we can find a trailing return type arrow
833     // as part of an implicit conversion constraint.
834     nextToken();
835     parseStructuralElement();
836   }
837 
838   if (MunchSemi && FormatTok->Tok.is(tok::semi))
839     nextToken();
840 
841   Line->Level = InitialLevel;
842 
843   if (PPStartHash == PPEndHash) {
844     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
845     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
846       // Update the opening line to add the forward reference as well
847       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
848           CurrentLines->size() - 1;
849     }
850   }
851 
852   return IfKind;
853 }
854 
855 static bool isGoogScope(const UnwrappedLine &Line) {
856   // FIXME: Closure-library specific stuff should not be hard-coded but be
857   // configurable.
858   if (Line.Tokens.size() < 4)
859     return false;
860   auto I = Line.Tokens.begin();
861   if (I->Tok->TokenText != "goog")
862     return false;
863   ++I;
864   if (I->Tok->isNot(tok::period))
865     return false;
866   ++I;
867   if (I->Tok->TokenText != "scope")
868     return false;
869   ++I;
870   return I->Tok->is(tok::l_paren);
871 }
872 
873 static bool isIIFE(const UnwrappedLine &Line,
874                    const AdditionalKeywords &Keywords) {
875   // Look for the start of an immediately invoked anonymous function.
876   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
877   // This is commonly done in JavaScript to create a new, anonymous scope.
878   // Example: (function() { ... })()
879   if (Line.Tokens.size() < 3)
880     return false;
881   auto I = Line.Tokens.begin();
882   if (I->Tok->isNot(tok::l_paren))
883     return false;
884   ++I;
885   if (I->Tok->isNot(Keywords.kw_function))
886     return false;
887   ++I;
888   return I->Tok->is(tok::l_paren);
889 }
890 
891 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
892                                    const FormatToken &InitialToken) {
893   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
894     return Style.BraceWrapping.AfterNamespace;
895   if (InitialToken.is(tok::kw_class))
896     return Style.BraceWrapping.AfterClass;
897   if (InitialToken.is(tok::kw_union))
898     return Style.BraceWrapping.AfterUnion;
899   if (InitialToken.is(tok::kw_struct))
900     return Style.BraceWrapping.AfterStruct;
901   if (InitialToken.is(tok::kw_enum))
902     return Style.BraceWrapping.AfterEnum;
903   return false;
904 }
905 
906 void UnwrappedLineParser::parseChildBlock(
907     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
908   FormatTok->setBlockKind(BK_Block);
909   nextToken();
910   {
911     bool SkipIndent = (Style.isJavaScript() &&
912                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
913     ScopedLineState LineState(*this);
914     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
915                                             /*MustBeDeclaration=*/false);
916     Line->Level += SkipIndent ? 0 : 1;
917     parseLevel(/*HasOpeningBrace=*/true, CanContainBracedList,
918                /*IfKind=*/nullptr, NextLBracesType);
919     flushComments(isOnNewLine(*FormatTok));
920     Line->Level -= SkipIndent ? 0 : 1;
921   }
922   nextToken();
923 }
924 
925 void UnwrappedLineParser::parsePPDirective() {
926   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
927   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
928 
929   nextToken();
930 
931   if (!FormatTok->Tok.getIdentifierInfo()) {
932     parsePPUnknown();
933     return;
934   }
935 
936   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
937   case tok::pp_define:
938     parsePPDefine();
939     return;
940   case tok::pp_if:
941     parsePPIf(/*IfDef=*/false);
942     break;
943   case tok::pp_ifdef:
944   case tok::pp_ifndef:
945     parsePPIf(/*IfDef=*/true);
946     break;
947   case tok::pp_else:
948     parsePPElse();
949     break;
950   case tok::pp_elifdef:
951   case tok::pp_elifndef:
952   case tok::pp_elif:
953     parsePPElIf();
954     break;
955   case tok::pp_endif:
956     parsePPEndIf();
957     break;
958   default:
959     parsePPUnknown();
960     break;
961   }
962 }
963 
964 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
965   size_t Line = CurrentLines->size();
966   if (CurrentLines == &PreprocessorDirectives)
967     Line += Lines.size();
968 
969   if (Unreachable ||
970       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
971     PPStack.push_back({PP_Unreachable, Line});
972   else
973     PPStack.push_back({PP_Conditional, Line});
974 }
975 
976 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
977   ++PPBranchLevel;
978   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
979   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
980     PPLevelBranchIndex.push_back(0);
981     PPLevelBranchCount.push_back(0);
982   }
983   PPChainBranchIndex.push(0);
984   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
985   conditionalCompilationCondition(Unreachable || Skip);
986 }
987 
988 void UnwrappedLineParser::conditionalCompilationAlternative() {
989   if (!PPStack.empty())
990     PPStack.pop_back();
991   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
992   if (!PPChainBranchIndex.empty())
993     ++PPChainBranchIndex.top();
994   conditionalCompilationCondition(
995       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
996       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
997 }
998 
999 void UnwrappedLineParser::conditionalCompilationEnd() {
1000   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1001   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1002     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1003       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1004   }
1005   // Guard against #endif's without #if.
1006   if (PPBranchLevel > -1)
1007     --PPBranchLevel;
1008   if (!PPChainBranchIndex.empty())
1009     PPChainBranchIndex.pop();
1010   if (!PPStack.empty())
1011     PPStack.pop_back();
1012 }
1013 
1014 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1015   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1016   nextToken();
1017   bool Unreachable = false;
1018   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1019     Unreachable = true;
1020   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1021     Unreachable = true;
1022   conditionalCompilationStart(Unreachable);
1023   FormatToken *IfCondition = FormatTok;
1024   // If there's a #ifndef on the first line, and the only lines before it are
1025   // comments, it could be an include guard.
1026   bool MaybeIncludeGuard = IfNDef;
1027   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
1028     for (auto &Line : Lines) {
1029       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1030         MaybeIncludeGuard = false;
1031         IncludeGuard = IG_Rejected;
1032         break;
1033       }
1034     }
1035   --PPBranchLevel;
1036   parsePPUnknown();
1037   ++PPBranchLevel;
1038   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1039     IncludeGuard = IG_IfNdefed;
1040     IncludeGuardToken = IfCondition;
1041   }
1042 }
1043 
1044 void UnwrappedLineParser::parsePPElse() {
1045   // If a potential include guard has an #else, it's not an include guard.
1046   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1047     IncludeGuard = IG_Rejected;
1048   conditionalCompilationAlternative();
1049   if (PPBranchLevel > -1)
1050     --PPBranchLevel;
1051   parsePPUnknown();
1052   ++PPBranchLevel;
1053 }
1054 
1055 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1056 
1057 void UnwrappedLineParser::parsePPEndIf() {
1058   conditionalCompilationEnd();
1059   parsePPUnknown();
1060   // If the #endif of a potential include guard is the last thing in the file,
1061   // then we found an include guard.
1062   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1063       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1064     IncludeGuard = IG_Found;
1065 }
1066 
1067 void UnwrappedLineParser::parsePPDefine() {
1068   nextToken();
1069 
1070   if (!FormatTok->Tok.getIdentifierInfo()) {
1071     IncludeGuard = IG_Rejected;
1072     IncludeGuardToken = nullptr;
1073     parsePPUnknown();
1074     return;
1075   }
1076 
1077   if (IncludeGuard == IG_IfNdefed &&
1078       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1079     IncludeGuard = IG_Defined;
1080     IncludeGuardToken = nullptr;
1081     for (auto &Line : Lines) {
1082       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1083         IncludeGuard = IG_Rejected;
1084         break;
1085       }
1086     }
1087   }
1088 
1089   // In the context of a define, even keywords should be treated as normal
1090   // identifiers. Setting the kind to identifier is not enough, because we need
1091   // to treat additional keywords like __except as well, which are already
1092   // identifiers.
1093   FormatTok->Tok.setKind(tok::identifier);
1094   FormatTok->Tok.setIdentifierInfo(nullptr);
1095   nextToken();
1096   if (FormatTok->Tok.getKind() == tok::l_paren &&
1097       !FormatTok->hasWhitespaceBefore())
1098     parseParens();
1099   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1100     Line->Level += PPBranchLevel + 1;
1101   addUnwrappedLine();
1102   ++Line->Level;
1103 
1104   // Errors during a preprocessor directive can only affect the layout of the
1105   // preprocessor directive, and thus we ignore them. An alternative approach
1106   // would be to use the same approach we use on the file level (no
1107   // re-indentation if there was a structural error) within the macro
1108   // definition.
1109   parseFile();
1110 }
1111 
1112 void UnwrappedLineParser::parsePPUnknown() {
1113   do {
1114     nextToken();
1115   } while (!eof());
1116   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1117     Line->Level += PPBranchLevel + 1;
1118   addUnwrappedLine();
1119 }
1120 
1121 // Here we exclude certain tokens that are not usually the first token in an
1122 // unwrapped line. This is used in attempt to distinguish macro calls without
1123 // trailing semicolons from other constructs split to several lines.
1124 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1125   // Semicolon can be a null-statement, l_square can be a start of a macro or
1126   // a C++11 attribute, but this doesn't seem to be common.
1127   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1128          Tok.isNot(TT_AttributeSquare) &&
1129          // Tokens that can only be used as binary operators and a part of
1130          // overloaded operator names.
1131          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1132          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1133          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1134          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1135          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1136          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1137          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1138          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1139          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1140          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1141          Tok.isNot(tok::lesslessequal) &&
1142          // Colon is used in labels, base class lists, initializer lists,
1143          // range-based for loops, ternary operator, but should never be the
1144          // first token in an unwrapped line.
1145          Tok.isNot(tok::colon) &&
1146          // 'noexcept' is a trailing annotation.
1147          Tok.isNot(tok::kw_noexcept);
1148 }
1149 
1150 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1151                           const FormatToken *FormatTok) {
1152   // FIXME: This returns true for C/C++ keywords like 'struct'.
1153   return FormatTok->is(tok::identifier) &&
1154          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1155           !FormatTok->isOneOf(
1156               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1157               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1158               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1159               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1160               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1161               Keywords.kw_instanceof, Keywords.kw_interface,
1162               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1163 }
1164 
1165 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1166                                  const FormatToken *FormatTok) {
1167   return FormatTok->Tok.isLiteral() ||
1168          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1169          mustBeJSIdent(Keywords, FormatTok);
1170 }
1171 
1172 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1173 // when encountered after a value (see mustBeJSIdentOrValue).
1174 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1175                            const FormatToken *FormatTok) {
1176   return FormatTok->isOneOf(
1177       tok::kw_return, Keywords.kw_yield,
1178       // conditionals
1179       tok::kw_if, tok::kw_else,
1180       // loops
1181       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1182       // switch/case
1183       tok::kw_switch, tok::kw_case,
1184       // exceptions
1185       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1186       // declaration
1187       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1188       Keywords.kw_async, Keywords.kw_function,
1189       // import/export
1190       Keywords.kw_import, tok::kw_export);
1191 }
1192 
1193 // Checks whether a token is a type in K&R C (aka C78).
1194 static bool isC78Type(const FormatToken &Tok) {
1195   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1196                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1197                      tok::identifier);
1198 }
1199 
1200 // This function checks whether a token starts the first parameter declaration
1201 // in a K&R C (aka C78) function definition, e.g.:
1202 //   int f(a, b)
1203 //   short a, b;
1204 //   {
1205 //      return a + b;
1206 //   }
1207 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1208                                const FormatToken *FuncName) {
1209   assert(Tok);
1210   assert(Next);
1211   assert(FuncName);
1212 
1213   if (FuncName->isNot(tok::identifier))
1214     return false;
1215 
1216   const FormatToken *Prev = FuncName->Previous;
1217   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1218     return false;
1219 
1220   if (!isC78Type(*Tok) &&
1221       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1222     return false;
1223 
1224   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1225     return false;
1226 
1227   Tok = Tok->Previous;
1228   if (!Tok || Tok->isNot(tok::r_paren))
1229     return false;
1230 
1231   Tok = Tok->Previous;
1232   if (!Tok || Tok->isNot(tok::identifier))
1233     return false;
1234 
1235   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1236 }
1237 
1238 void UnwrappedLineParser::parseModuleImport() {
1239   nextToken();
1240   while (!eof()) {
1241     if (FormatTok->is(tok::colon)) {
1242       FormatTok->setType(TT_ModulePartitionColon);
1243     }
1244     // Handle import <foo/bar.h> as we would an include statement.
1245     else if (FormatTok->is(tok::less)) {
1246       nextToken();
1247       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1248         // Mark tokens up to the trailing line comments as implicit string
1249         // literals.
1250         if (FormatTok->isNot(tok::comment) &&
1251             !FormatTok->TokenText.startswith("//"))
1252           FormatTok->setType(TT_ImplicitStringLiteral);
1253         nextToken();
1254       }
1255     }
1256     if (FormatTok->is(tok::semi)) {
1257       nextToken();
1258       break;
1259     }
1260     nextToken();
1261   }
1262 
1263   addUnwrappedLine();
1264 }
1265 
1266 // readTokenWithJavaScriptASI reads the next token and terminates the current
1267 // line if JavaScript Automatic Semicolon Insertion must
1268 // happen between the current token and the next token.
1269 //
1270 // This method is conservative - it cannot cover all edge cases of JavaScript,
1271 // but only aims to correctly handle certain well known cases. It *must not*
1272 // return true in speculative cases.
1273 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1274   FormatToken *Previous = FormatTok;
1275   readToken();
1276   FormatToken *Next = FormatTok;
1277 
1278   bool IsOnSameLine =
1279       CommentsBeforeNextToken.empty()
1280           ? Next->NewlinesBefore == 0
1281           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1282   if (IsOnSameLine)
1283     return;
1284 
1285   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1286   bool PreviousStartsTemplateExpr =
1287       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1288   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1289     // If the line contains an '@' sign, the previous token might be an
1290     // annotation, which can precede another identifier/value.
1291     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1292       return LineNode.Tok->is(tok::at);
1293     });
1294     if (HasAt)
1295       return;
1296   }
1297   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1298     return addUnwrappedLine();
1299   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1300   bool NextEndsTemplateExpr =
1301       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1302   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1303       (PreviousMustBeValue ||
1304        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1305                          tok::minusminus)))
1306     return addUnwrappedLine();
1307   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1308       isJSDeclOrStmt(Keywords, Next))
1309     return addUnwrappedLine();
1310 }
1311 
1312 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1313                                                  bool IsTopLevel,
1314                                                  TokenType NextLBracesType) {
1315   if (Style.Language == FormatStyle::LK_TableGen &&
1316       FormatTok->is(tok::pp_include)) {
1317     nextToken();
1318     if (FormatTok->is(tok::string_literal))
1319       nextToken();
1320     addUnwrappedLine();
1321     return;
1322   }
1323   switch (FormatTok->Tok.getKind()) {
1324   case tok::kw_asm:
1325     nextToken();
1326     if (FormatTok->is(tok::l_brace)) {
1327       FormatTok->setType(TT_InlineASMBrace);
1328       nextToken();
1329       while (FormatTok && FormatTok->isNot(tok::eof)) {
1330         if (FormatTok->is(tok::r_brace)) {
1331           FormatTok->setType(TT_InlineASMBrace);
1332           nextToken();
1333           addUnwrappedLine();
1334           break;
1335         }
1336         FormatTok->Finalized = true;
1337         nextToken();
1338       }
1339     }
1340     break;
1341   case tok::kw_namespace:
1342     parseNamespace();
1343     return;
1344   case tok::kw_public:
1345   case tok::kw_protected:
1346   case tok::kw_private:
1347     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1348         Style.isCSharp())
1349       nextToken();
1350     else
1351       parseAccessSpecifier();
1352     return;
1353   case tok::kw_if:
1354     if (Style.isJavaScript() && Line->MustBeDeclaration)
1355       // field/method declaration.
1356       break;
1357     parseIfThenElse(IfKind);
1358     return;
1359   case tok::kw_for:
1360   case tok::kw_while:
1361     if (Style.isJavaScript() && Line->MustBeDeclaration)
1362       // field/method declaration.
1363       break;
1364     parseForOrWhileLoop();
1365     return;
1366   case tok::kw_do:
1367     if (Style.isJavaScript() && Line->MustBeDeclaration)
1368       // field/method declaration.
1369       break;
1370     parseDoWhile();
1371     return;
1372   case tok::kw_switch:
1373     if (Style.isJavaScript() && Line->MustBeDeclaration)
1374       // 'switch: string' field declaration.
1375       break;
1376     parseSwitch();
1377     return;
1378   case tok::kw_default:
1379     if (Style.isJavaScript() && Line->MustBeDeclaration)
1380       // 'default: string' field declaration.
1381       break;
1382     nextToken();
1383     if (FormatTok->is(tok::colon)) {
1384       parseLabel();
1385       return;
1386     }
1387     // e.g. "default void f() {}" in a Java interface.
1388     break;
1389   case tok::kw_case:
1390     if (Style.isJavaScript() && Line->MustBeDeclaration)
1391       // 'case: string' field declaration.
1392       break;
1393     parseCaseLabel();
1394     return;
1395   case tok::kw_try:
1396   case tok::kw___try:
1397     if (Style.isJavaScript() && Line->MustBeDeclaration)
1398       // field/method declaration.
1399       break;
1400     parseTryCatch();
1401     return;
1402   case tok::kw_extern:
1403     nextToken();
1404     if (FormatTok->Tok.is(tok::string_literal)) {
1405       nextToken();
1406       if (FormatTok->Tok.is(tok::l_brace)) {
1407         if (Style.BraceWrapping.AfterExternBlock)
1408           addUnwrappedLine();
1409         // Either we indent or for backwards compatibility we follow the
1410         // AfterExternBlock style.
1411         unsigned AddLevels =
1412             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1413                     (Style.BraceWrapping.AfterExternBlock &&
1414                      Style.IndentExternBlock ==
1415                          FormatStyle::IEBS_AfterExternBlock)
1416                 ? 1u
1417                 : 0u;
1418         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1419         addUnwrappedLine();
1420         return;
1421       }
1422     }
1423     break;
1424   case tok::kw_export:
1425     if (Style.isJavaScript()) {
1426       parseJavaScriptEs6ImportExport();
1427       return;
1428     }
1429     if (!Style.isCpp())
1430       break;
1431     // Handle C++ "(inline|export) namespace".
1432     LLVM_FALLTHROUGH;
1433   case tok::kw_inline:
1434     nextToken();
1435     if (FormatTok->Tok.is(tok::kw_namespace)) {
1436       parseNamespace();
1437       return;
1438     }
1439     break;
1440   case tok::identifier:
1441     if (FormatTok->is(TT_ForEachMacro)) {
1442       parseForOrWhileLoop();
1443       return;
1444     }
1445     if (FormatTok->is(TT_MacroBlockBegin)) {
1446       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1447                  /*MunchSemi=*/false);
1448       return;
1449     }
1450     if (FormatTok->is(Keywords.kw_import)) {
1451       if (Style.isJavaScript()) {
1452         parseJavaScriptEs6ImportExport();
1453         return;
1454       }
1455       if (Style.Language == FormatStyle::LK_Proto) {
1456         nextToken();
1457         if (FormatTok->is(tok::kw_public))
1458           nextToken();
1459         if (!FormatTok->is(tok::string_literal))
1460           return;
1461         nextToken();
1462         if (FormatTok->is(tok::semi))
1463           nextToken();
1464         addUnwrappedLine();
1465         return;
1466       }
1467       if (Style.isCpp()) {
1468         parseModuleImport();
1469         return;
1470       }
1471     }
1472     if (Style.isCpp() &&
1473         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1474                            Keywords.kw_slots, Keywords.kw_qslots)) {
1475       nextToken();
1476       if (FormatTok->is(tok::colon)) {
1477         nextToken();
1478         addUnwrappedLine();
1479         return;
1480       }
1481     }
1482     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1483       parseStatementMacro();
1484       return;
1485     }
1486     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1487       parseNamespace();
1488       return;
1489     }
1490     // In all other cases, parse the declaration.
1491     break;
1492   default:
1493     break;
1494   }
1495   do {
1496     const FormatToken *Previous = FormatTok->Previous;
1497     switch (FormatTok->Tok.getKind()) {
1498     case tok::at:
1499       nextToken();
1500       if (FormatTok->Tok.is(tok::l_brace)) {
1501         nextToken();
1502         parseBracedList();
1503         break;
1504       } else if (Style.Language == FormatStyle::LK_Java &&
1505                  FormatTok->is(Keywords.kw_interface)) {
1506         nextToken();
1507         break;
1508       }
1509       switch (FormatTok->Tok.getObjCKeywordID()) {
1510       case tok::objc_public:
1511       case tok::objc_protected:
1512       case tok::objc_package:
1513       case tok::objc_private:
1514         return parseAccessSpecifier();
1515       case tok::objc_interface:
1516       case tok::objc_implementation:
1517         return parseObjCInterfaceOrImplementation();
1518       case tok::objc_protocol:
1519         if (parseObjCProtocol())
1520           return;
1521         break;
1522       case tok::objc_end:
1523         return; // Handled by the caller.
1524       case tok::objc_optional:
1525       case tok::objc_required:
1526         nextToken();
1527         addUnwrappedLine();
1528         return;
1529       case tok::objc_autoreleasepool:
1530         nextToken();
1531         if (FormatTok->Tok.is(tok::l_brace)) {
1532           if (Style.BraceWrapping.AfterControlStatement ==
1533               FormatStyle::BWACS_Always)
1534             addUnwrappedLine();
1535           parseBlock();
1536         }
1537         addUnwrappedLine();
1538         return;
1539       case tok::objc_synchronized:
1540         nextToken();
1541         if (FormatTok->Tok.is(tok::l_paren))
1542           // Skip synchronization object
1543           parseParens();
1544         if (FormatTok->Tok.is(tok::l_brace)) {
1545           if (Style.BraceWrapping.AfterControlStatement ==
1546               FormatStyle::BWACS_Always)
1547             addUnwrappedLine();
1548           parseBlock();
1549         }
1550         addUnwrappedLine();
1551         return;
1552       case tok::objc_try:
1553         // This branch isn't strictly necessary (the kw_try case below would
1554         // do this too after the tok::at is parsed above).  But be explicit.
1555         parseTryCatch();
1556         return;
1557       default:
1558         break;
1559       }
1560       break;
1561     case tok::kw_concept:
1562       parseConcept();
1563       return;
1564     case tok::kw_requires: {
1565       bool ParsedClause = parseRequires();
1566       if (ParsedClause)
1567         return;
1568       break;
1569     }
1570     case tok::kw_enum:
1571       // Ignore if this is part of "template <enum ...".
1572       if (Previous && Previous->is(tok::less)) {
1573         nextToken();
1574         break;
1575       }
1576 
1577       // parseEnum falls through and does not yet add an unwrapped line as an
1578       // enum definition can start a structural element.
1579       if (!parseEnum())
1580         break;
1581       // This only applies for C++.
1582       if (!Style.isCpp()) {
1583         addUnwrappedLine();
1584         return;
1585       }
1586       break;
1587     case tok::kw_typedef:
1588       nextToken();
1589       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1590                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1591                              Keywords.kw_CF_CLOSED_ENUM,
1592                              Keywords.kw_NS_CLOSED_ENUM))
1593         parseEnum();
1594       break;
1595     case tok::kw_struct:
1596     case tok::kw_union:
1597     case tok::kw_class:
1598       if (parseStructLike())
1599         return;
1600       break;
1601     case tok::period:
1602       nextToken();
1603       // In Java, classes have an implicit static member "class".
1604       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1605           FormatTok->is(tok::kw_class))
1606         nextToken();
1607       if (Style.isJavaScript() && FormatTok &&
1608           FormatTok->Tok.getIdentifierInfo())
1609         // JavaScript only has pseudo keywords, all keywords are allowed to
1610         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1611         nextToken();
1612       break;
1613     case tok::semi:
1614       nextToken();
1615       addUnwrappedLine();
1616       return;
1617     case tok::r_brace:
1618       addUnwrappedLine();
1619       return;
1620     case tok::l_paren: {
1621       parseParens();
1622       // Break the unwrapped line if a K&R C function definition has a parameter
1623       // declaration.
1624       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1625         break;
1626       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1627         addUnwrappedLine();
1628         return;
1629       }
1630       break;
1631     }
1632     case tok::kw_operator:
1633       nextToken();
1634       if (FormatTok->isBinaryOperator())
1635         nextToken();
1636       break;
1637     case tok::caret:
1638       nextToken();
1639       if (FormatTok->Tok.isAnyIdentifier() ||
1640           FormatTok->isSimpleTypeSpecifier())
1641         nextToken();
1642       if (FormatTok->is(tok::l_paren))
1643         parseParens();
1644       if (FormatTok->is(tok::l_brace))
1645         parseChildBlock();
1646       break;
1647     case tok::l_brace:
1648       if (NextLBracesType != TT_Unknown)
1649         FormatTok->setType(NextLBracesType);
1650       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1651         // A block outside of parentheses must be the last part of a
1652         // structural element.
1653         // FIXME: Figure out cases where this is not true, and add projections
1654         // for them (the one we know is missing are lambdas).
1655         if (Style.Language == FormatStyle::LK_Java &&
1656             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1657           // If necessary, we could set the type to something different than
1658           // TT_FunctionLBrace.
1659           if (Style.BraceWrapping.AfterControlStatement ==
1660               FormatStyle::BWACS_Always)
1661             addUnwrappedLine();
1662         } else if (Style.BraceWrapping.AfterFunction) {
1663           addUnwrappedLine();
1664         }
1665         if (!Line->InPPDirective)
1666           FormatTok->setType(TT_FunctionLBrace);
1667         parseBlock();
1668         addUnwrappedLine();
1669         return;
1670       }
1671       // Otherwise this was a braced init list, and the structural
1672       // element continues.
1673       break;
1674     case tok::kw_try:
1675       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1676         // field/method declaration.
1677         nextToken();
1678         break;
1679       }
1680       // We arrive here when parsing function-try blocks.
1681       if (Style.BraceWrapping.AfterFunction)
1682         addUnwrappedLine();
1683       parseTryCatch();
1684       return;
1685     case tok::identifier: {
1686       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1687           Line->MustBeDeclaration) {
1688         addUnwrappedLine();
1689         parseCSharpGenericTypeConstraint();
1690         break;
1691       }
1692       if (FormatTok->is(TT_MacroBlockEnd)) {
1693         addUnwrappedLine();
1694         return;
1695       }
1696 
1697       // Function declarations (as opposed to function expressions) are parsed
1698       // on their own unwrapped line by continuing this loop. Function
1699       // expressions (functions that are not on their own line) must not create
1700       // a new unwrapped line, so they are special cased below.
1701       size_t TokenCount = Line->Tokens.size();
1702       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1703           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1704                                                      Keywords.kw_async)))) {
1705         tryToParseJSFunction();
1706         break;
1707       }
1708       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1709           FormatTok->is(Keywords.kw_interface)) {
1710         if (Style.isJavaScript()) {
1711           // In JavaScript/TypeScript, "interface" can be used as a standalone
1712           // identifier, e.g. in `var interface = 1;`. If "interface" is
1713           // followed by another identifier, it is very like to be an actual
1714           // interface declaration.
1715           unsigned StoredPosition = Tokens->getPosition();
1716           FormatToken *Next = Tokens->getNextToken();
1717           FormatTok = Tokens->setPosition(StoredPosition);
1718           if (!mustBeJSIdent(Keywords, Next)) {
1719             nextToken();
1720             break;
1721           }
1722         }
1723         parseRecord();
1724         addUnwrappedLine();
1725         return;
1726       }
1727 
1728       if (FormatTok->is(Keywords.kw_interface)) {
1729         if (parseStructLike())
1730           return;
1731         break;
1732       }
1733 
1734       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1735         parseStatementMacro();
1736         return;
1737       }
1738 
1739       // See if the following token should start a new unwrapped line.
1740       StringRef Text = FormatTok->TokenText;
1741 
1742       FormatToken *PreviousToken = FormatTok;
1743       nextToken();
1744 
1745       // JS doesn't have macros, and within classes colons indicate fields, not
1746       // labels.
1747       if (Style.isJavaScript())
1748         break;
1749 
1750       TokenCount = Line->Tokens.size();
1751       if (TokenCount == 1 ||
1752           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1753         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1754           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1755           parseLabel(!Style.IndentGotoLabels);
1756           return;
1757         }
1758         // Recognize function-like macro usages without trailing semicolon as
1759         // well as free-standing macros like Q_OBJECT.
1760         bool FunctionLike = FormatTok->is(tok::l_paren);
1761         if (FunctionLike)
1762           parseParens();
1763 
1764         bool FollowedByNewline =
1765             CommentsBeforeNextToken.empty()
1766                 ? FormatTok->NewlinesBefore > 0
1767                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1768 
1769         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1770             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1771           PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro);
1772           addUnwrappedLine();
1773           return;
1774         }
1775       }
1776       break;
1777     }
1778     case tok::equal:
1779       if ((Style.isJavaScript() || Style.isCSharp()) &&
1780           FormatTok->is(TT_FatArrow)) {
1781         tryToParseChildBlock();
1782         break;
1783       }
1784 
1785       nextToken();
1786       if (FormatTok->Tok.is(tok::l_brace)) {
1787         // Block kind should probably be set to BK_BracedInit for any language.
1788         // C# needs this change to ensure that array initialisers and object
1789         // initialisers are indented the same way.
1790         if (Style.isCSharp())
1791           FormatTok->setBlockKind(BK_BracedInit);
1792         nextToken();
1793         parseBracedList();
1794       } else if (Style.Language == FormatStyle::LK_Proto &&
1795                  FormatTok->Tok.is(tok::less)) {
1796         nextToken();
1797         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1798                         /*ClosingBraceKind=*/tok::greater);
1799       }
1800       break;
1801     case tok::l_square:
1802       parseSquare();
1803       break;
1804     case tok::kw_new:
1805       parseNew();
1806       break;
1807     default:
1808       nextToken();
1809       break;
1810     }
1811   } while (!eof());
1812 }
1813 
1814 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1815   assert(FormatTok->is(tok::l_brace));
1816   if (!Style.isCSharp())
1817     return false;
1818   // See if it's a property accessor.
1819   if (FormatTok->Previous->isNot(tok::identifier))
1820     return false;
1821 
1822   // See if we are inside a property accessor.
1823   //
1824   // Record the current tokenPosition so that we can advance and
1825   // reset the current token. `Next` is not set yet so we need
1826   // another way to advance along the token stream.
1827   unsigned int StoredPosition = Tokens->getPosition();
1828   FormatToken *Tok = Tokens->getNextToken();
1829 
1830   // A trivial property accessor is of the form:
1831   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1832   // Track these as they do not require line breaks to be introduced.
1833   bool HasGetOrSet = false;
1834   bool IsTrivialPropertyAccessor = true;
1835   while (!eof()) {
1836     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1837                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1838                      Keywords.kw_set)) {
1839       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1840         HasGetOrSet = true;
1841       Tok = Tokens->getNextToken();
1842       continue;
1843     }
1844     if (Tok->isNot(tok::r_brace))
1845       IsTrivialPropertyAccessor = false;
1846     break;
1847   }
1848 
1849   if (!HasGetOrSet) {
1850     Tokens->setPosition(StoredPosition);
1851     return false;
1852   }
1853 
1854   // Try to parse the property accessor:
1855   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1856   Tokens->setPosition(StoredPosition);
1857   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1858     addUnwrappedLine();
1859   nextToken();
1860   do {
1861     switch (FormatTok->Tok.getKind()) {
1862     case tok::r_brace:
1863       nextToken();
1864       if (FormatTok->is(tok::equal)) {
1865         while (!eof() && FormatTok->isNot(tok::semi))
1866           nextToken();
1867         nextToken();
1868       }
1869       addUnwrappedLine();
1870       return true;
1871     case tok::l_brace:
1872       ++Line->Level;
1873       parseBlock(/*MustBeDeclaration=*/true);
1874       addUnwrappedLine();
1875       --Line->Level;
1876       break;
1877     case tok::equal:
1878       if (FormatTok->is(TT_FatArrow)) {
1879         ++Line->Level;
1880         do {
1881           nextToken();
1882         } while (!eof() && FormatTok->isNot(tok::semi));
1883         nextToken();
1884         addUnwrappedLine();
1885         --Line->Level;
1886         break;
1887       }
1888       nextToken();
1889       break;
1890     default:
1891       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1892           !IsTrivialPropertyAccessor) {
1893         // Non-trivial get/set needs to be on its own line.
1894         addUnwrappedLine();
1895       }
1896       nextToken();
1897     }
1898   } while (!eof());
1899 
1900   // Unreachable for well-formed code (paired '{' and '}').
1901   return true;
1902 }
1903 
1904 bool UnwrappedLineParser::tryToParseLambda() {
1905   if (!Style.isCpp()) {
1906     nextToken();
1907     return false;
1908   }
1909   assert(FormatTok->is(tok::l_square));
1910   FormatToken &LSquare = *FormatTok;
1911   if (!tryToParseLambdaIntroducer())
1912     return false;
1913 
1914   bool SeenArrow = false;
1915   bool InTemplateParameterList = false;
1916 
1917   while (FormatTok->isNot(tok::l_brace)) {
1918     if (FormatTok->isSimpleTypeSpecifier()) {
1919       nextToken();
1920       continue;
1921     }
1922     switch (FormatTok->Tok.getKind()) {
1923     case tok::l_brace:
1924       break;
1925     case tok::l_paren:
1926       parseParens();
1927       break;
1928     case tok::l_square:
1929       parseSquare();
1930       break;
1931     case tok::kw_class:
1932     case tok::kw_template:
1933     case tok::kw_typename:
1934       assert(FormatTok->Previous);
1935       if (FormatTok->Previous->is(tok::less))
1936         InTemplateParameterList = true;
1937       nextToken();
1938       break;
1939     case tok::amp:
1940     case tok::star:
1941     case tok::kw_const:
1942     case tok::comma:
1943     case tok::less:
1944     case tok::greater:
1945     case tok::identifier:
1946     case tok::numeric_constant:
1947     case tok::coloncolon:
1948     case tok::kw_mutable:
1949     case tok::kw_noexcept:
1950       nextToken();
1951       break;
1952     // Specialization of a template with an integer parameter can contain
1953     // arithmetic, logical, comparison and ternary operators.
1954     //
1955     // FIXME: This also accepts sequences of operators that are not in the scope
1956     // of a template argument list.
1957     //
1958     // In a C++ lambda a template type can only occur after an arrow. We use
1959     // this as an heuristic to distinguish between Objective-C expressions
1960     // followed by an `a->b` expression, such as:
1961     // ([obj func:arg] + a->b)
1962     // Otherwise the code below would parse as a lambda.
1963     //
1964     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1965     // explicit template lists: []<bool b = true && false>(U &&u){}
1966     case tok::plus:
1967     case tok::minus:
1968     case tok::exclaim:
1969     case tok::tilde:
1970     case tok::slash:
1971     case tok::percent:
1972     case tok::lessless:
1973     case tok::pipe:
1974     case tok::pipepipe:
1975     case tok::ampamp:
1976     case tok::caret:
1977     case tok::equalequal:
1978     case tok::exclaimequal:
1979     case tok::greaterequal:
1980     case tok::lessequal:
1981     case tok::question:
1982     case tok::colon:
1983     case tok::ellipsis:
1984     case tok::kw_true:
1985     case tok::kw_false:
1986       if (SeenArrow || InTemplateParameterList) {
1987         nextToken();
1988         break;
1989       }
1990       return true;
1991     case tok::arrow:
1992       // This might or might not actually be a lambda arrow (this could be an
1993       // ObjC method invocation followed by a dereferencing arrow). We might
1994       // reset this back to TT_Unknown in TokenAnnotator.
1995       FormatTok->setType(TT_LambdaArrow);
1996       SeenArrow = true;
1997       nextToken();
1998       break;
1999     default:
2000       return true;
2001     }
2002   }
2003   FormatTok->setType(TT_LambdaLBrace);
2004   LSquare.setType(TT_LambdaLSquare);
2005   parseChildBlock();
2006   return true;
2007 }
2008 
2009 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2010   const FormatToken *Previous = FormatTok->Previous;
2011   if (Previous &&
2012       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2013                          tok::kw_delete, tok::l_square) ||
2014        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
2015        Previous->isSimpleTypeSpecifier())) {
2016     nextToken();
2017     return false;
2018   }
2019   nextToken();
2020   if (FormatTok->is(tok::l_square))
2021     return false;
2022   parseSquare(/*LambdaIntroducer=*/true);
2023   return true;
2024 }
2025 
2026 void UnwrappedLineParser::tryToParseJSFunction() {
2027   assert(FormatTok->is(Keywords.kw_function) ||
2028          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2029   if (FormatTok->is(Keywords.kw_async))
2030     nextToken();
2031   // Consume "function".
2032   nextToken();
2033 
2034   // Consume * (generator function). Treat it like C++'s overloaded operators.
2035   if (FormatTok->is(tok::star)) {
2036     FormatTok->setType(TT_OverloadedOperator);
2037     nextToken();
2038   }
2039 
2040   // Consume function name.
2041   if (FormatTok->is(tok::identifier))
2042     nextToken();
2043 
2044   if (FormatTok->isNot(tok::l_paren))
2045     return;
2046 
2047   // Parse formal parameter list.
2048   parseParens();
2049 
2050   if (FormatTok->is(tok::colon)) {
2051     // Parse a type definition.
2052     nextToken();
2053 
2054     // Eat the type declaration. For braced inline object types, balance braces,
2055     // otherwise just parse until finding an l_brace for the function body.
2056     if (FormatTok->is(tok::l_brace))
2057       tryToParseBracedList();
2058     else
2059       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2060         nextToken();
2061   }
2062 
2063   if (FormatTok->is(tok::semi))
2064     return;
2065 
2066   parseChildBlock();
2067 }
2068 
2069 bool UnwrappedLineParser::tryToParseBracedList() {
2070   if (FormatTok->is(BK_Unknown))
2071     calculateBraceTypes();
2072   assert(FormatTok->isNot(BK_Unknown));
2073   if (FormatTok->is(BK_Block))
2074     return false;
2075   nextToken();
2076   parseBracedList();
2077   return true;
2078 }
2079 
2080 bool UnwrappedLineParser::tryToParseChildBlock() {
2081   assert(Style.isJavaScript() || Style.isCSharp());
2082   assert(FormatTok->is(TT_FatArrow));
2083   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2084   // They always start an expression or a child block if followed by a curly
2085   // brace.
2086   nextToken();
2087   if (FormatTok->isNot(tok::l_brace))
2088     return false;
2089   parseChildBlock();
2090   return true;
2091 }
2092 
2093 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2094                                           bool IsEnum,
2095                                           tok::TokenKind ClosingBraceKind) {
2096   bool HasError = false;
2097 
2098   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2099   // replace this by using parseAssignmentExpression() inside.
2100   do {
2101     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2102         tryToParseChildBlock())
2103       continue;
2104     if (Style.isJavaScript()) {
2105       if (FormatTok->is(Keywords.kw_function) ||
2106           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2107         tryToParseJSFunction();
2108         continue;
2109       }
2110       if (FormatTok->is(tok::l_brace)) {
2111         // Could be a method inside of a braced list `{a() { return 1; }}`.
2112         if (tryToParseBracedList())
2113           continue;
2114         parseChildBlock();
2115       }
2116     }
2117     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2118       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2119         addUnwrappedLine();
2120       nextToken();
2121       return !HasError;
2122     }
2123     switch (FormatTok->Tok.getKind()) {
2124     case tok::l_square:
2125       if (Style.isCSharp())
2126         parseSquare();
2127       else
2128         tryToParseLambda();
2129       break;
2130     case tok::l_paren:
2131       parseParens();
2132       // JavaScript can just have free standing methods and getters/setters in
2133       // object literals. Detect them by a "{" following ")".
2134       if (Style.isJavaScript()) {
2135         if (FormatTok->is(tok::l_brace))
2136           parseChildBlock();
2137         break;
2138       }
2139       break;
2140     case tok::l_brace:
2141       // Assume there are no blocks inside a braced init list apart
2142       // from the ones we explicitly parse out (like lambdas).
2143       FormatTok->setBlockKind(BK_BracedInit);
2144       nextToken();
2145       parseBracedList();
2146       break;
2147     case tok::less:
2148       if (Style.Language == FormatStyle::LK_Proto) {
2149         nextToken();
2150         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2151                         /*ClosingBraceKind=*/tok::greater);
2152       } else {
2153         nextToken();
2154       }
2155       break;
2156     case tok::semi:
2157       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2158       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2159       // used for error recovery if we have otherwise determined that this is
2160       // a braced list.
2161       if (Style.isJavaScript()) {
2162         nextToken();
2163         break;
2164       }
2165       HasError = true;
2166       if (!ContinueOnSemicolons)
2167         return !HasError;
2168       nextToken();
2169       break;
2170     case tok::comma:
2171       nextToken();
2172       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2173         addUnwrappedLine();
2174       break;
2175     default:
2176       nextToken();
2177       break;
2178     }
2179   } while (!eof());
2180   return false;
2181 }
2182 
2183 /// \brief Parses a pair of parentheses (and everything between them).
2184 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2185 /// double ampersands. This only counts for the current parens scope.
2186 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2187   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2188   nextToken();
2189   do {
2190     switch (FormatTok->Tok.getKind()) {
2191     case tok::l_paren:
2192       parseParens();
2193       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2194         parseChildBlock();
2195       break;
2196     case tok::r_paren:
2197       nextToken();
2198       return;
2199     case tok::r_brace:
2200       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2201       return;
2202     case tok::l_square:
2203       tryToParseLambda();
2204       break;
2205     case tok::l_brace:
2206       if (!tryToParseBracedList())
2207         parseChildBlock();
2208       break;
2209     case tok::at:
2210       nextToken();
2211       if (FormatTok->Tok.is(tok::l_brace)) {
2212         nextToken();
2213         parseBracedList();
2214       }
2215       break;
2216     case tok::equal:
2217       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2218         tryToParseChildBlock();
2219       else
2220         nextToken();
2221       break;
2222     case tok::kw_class:
2223       if (Style.isJavaScript())
2224         parseRecord(/*ParseAsExpr=*/true);
2225       else
2226         nextToken();
2227       break;
2228     case tok::identifier:
2229       if (Style.isJavaScript() &&
2230           (FormatTok->is(Keywords.kw_function) ||
2231            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2232         tryToParseJSFunction();
2233       else
2234         nextToken();
2235       break;
2236     case tok::kw_requires: {
2237       auto RequiresToken = FormatTok;
2238       nextToken();
2239       parseRequiresExpression(RequiresToken);
2240       break;
2241     }
2242     case tok::ampamp:
2243       if (AmpAmpTokenType != TT_Unknown)
2244         FormatTok->setType(AmpAmpTokenType);
2245       LLVM_FALLTHROUGH;
2246     default:
2247       nextToken();
2248       break;
2249     }
2250   } while (!eof());
2251 }
2252 
2253 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2254   if (!LambdaIntroducer) {
2255     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2256     if (tryToParseLambda())
2257       return;
2258   }
2259   do {
2260     switch (FormatTok->Tok.getKind()) {
2261     case tok::l_paren:
2262       parseParens();
2263       break;
2264     case tok::r_square:
2265       nextToken();
2266       return;
2267     case tok::r_brace:
2268       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2269       return;
2270     case tok::l_square:
2271       parseSquare();
2272       break;
2273     case tok::l_brace: {
2274       if (!tryToParseBracedList())
2275         parseChildBlock();
2276       break;
2277     }
2278     case tok::at:
2279       nextToken();
2280       if (FormatTok->Tok.is(tok::l_brace)) {
2281         nextToken();
2282         parseBracedList();
2283       }
2284       break;
2285     default:
2286       nextToken();
2287       break;
2288     }
2289   } while (!eof());
2290 }
2291 
2292 void UnwrappedLineParser::keepAncestorBraces() {
2293   if (!Style.RemoveBracesLLVM)
2294     return;
2295 
2296   const int MaxNestingLevels = 2;
2297   const int Size = NestedTooDeep.size();
2298   if (Size >= MaxNestingLevels)
2299     NestedTooDeep[Size - MaxNestingLevels] = true;
2300   NestedTooDeep.push_back(false);
2301 }
2302 
2303 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2304   for (const auto &Token : llvm::reverse(Line.Tokens))
2305     if (Token.Tok->isNot(tok::comment))
2306       return Token.Tok;
2307 
2308   return nullptr;
2309 }
2310 
2311 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2312   FormatToken *Tok = nullptr;
2313 
2314   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2315       PreprocessorDirectives.empty()) {
2316     Tok = getLastNonComment(*Line);
2317     assert(Tok);
2318     if (Tok->BraceCount < 0) {
2319       assert(Tok->BraceCount == -1);
2320       Tok = nullptr;
2321     } else {
2322       Tok->BraceCount = -1;
2323     }
2324   }
2325 
2326   addUnwrappedLine();
2327   ++Line->Level;
2328   parseStructuralElement();
2329 
2330   if (Tok) {
2331     assert(!Line->InPPDirective);
2332     Tok = nullptr;
2333     for (const auto &L : llvm::reverse(*CurrentLines)) {
2334       if (!L.InPPDirective) {
2335         Tok = getLastNonComment(L);
2336         if (Tok)
2337           break;
2338       }
2339     }
2340     assert(Tok);
2341     ++Tok->BraceCount;
2342   }
2343 
2344   if (CheckEOF && FormatTok->is(tok::eof))
2345     addUnwrappedLine();
2346 
2347   --Line->Level;
2348 }
2349 
2350 static void markOptionalBraces(FormatToken *LeftBrace) {
2351   if (!LeftBrace)
2352     return;
2353 
2354   assert(LeftBrace->is(tok::l_brace));
2355 
2356   FormatToken *RightBrace = LeftBrace->MatchingParen;
2357   if (!RightBrace) {
2358     assert(!LeftBrace->Optional);
2359     return;
2360   }
2361 
2362   assert(RightBrace->is(tok::r_brace));
2363   assert(RightBrace->MatchingParen == LeftBrace);
2364   assert(LeftBrace->Optional == RightBrace->Optional);
2365 
2366   LeftBrace->Optional = true;
2367   RightBrace->Optional = true;
2368 }
2369 
2370 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2371                                                   bool KeepBraces) {
2372   auto HandleAttributes = [this]() {
2373     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2374     if (FormatTok->is(TT_AttributeMacro))
2375       nextToken();
2376     // Handle [[likely]] / [[unlikely]] attributes.
2377     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2378       parseSquare();
2379   };
2380 
2381   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2382   nextToken();
2383   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2384     nextToken();
2385   if (FormatTok->Tok.is(tok::l_paren))
2386     parseParens();
2387   HandleAttributes();
2388 
2389   bool NeedsUnwrappedLine = false;
2390   keepAncestorBraces();
2391 
2392   FormatToken *IfLeftBrace = nullptr;
2393   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2394 
2395   if (FormatTok->Tok.is(tok::l_brace)) {
2396     IfLeftBrace = FormatTok;
2397     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2398     IfBlockKind = parseBlock();
2399     if (Style.BraceWrapping.BeforeElse)
2400       addUnwrappedLine();
2401     else
2402       NeedsUnwrappedLine = true;
2403   } else {
2404     parseUnbracedBody();
2405   }
2406 
2407   bool KeepIfBraces = false;
2408   if (Style.RemoveBracesLLVM) {
2409     assert(!NestedTooDeep.empty());
2410     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2411                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2412                    IfBlockKind == IfStmtKind::IfElseIf;
2413   }
2414 
2415   FormatToken *ElseLeftBrace = nullptr;
2416   IfStmtKind Kind = IfStmtKind::IfOnly;
2417 
2418   if (FormatTok->Tok.is(tok::kw_else)) {
2419     if (Style.RemoveBracesLLVM) {
2420       NestedTooDeep.back() = false;
2421       Kind = IfStmtKind::IfElse;
2422     }
2423     nextToken();
2424     HandleAttributes();
2425     if (FormatTok->Tok.is(tok::l_brace)) {
2426       ElseLeftBrace = FormatTok;
2427       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2428       if (parseBlock() == IfStmtKind::IfOnly)
2429         Kind = IfStmtKind::IfElseIf;
2430       addUnwrappedLine();
2431     } else if (FormatTok->Tok.is(tok::kw_if)) {
2432       FormatToken *Previous = Tokens->getPreviousToken();
2433       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2434       if (IsPrecededByComment) {
2435         addUnwrappedLine();
2436         ++Line->Level;
2437       }
2438       bool TooDeep = true;
2439       if (Style.RemoveBracesLLVM) {
2440         Kind = IfStmtKind::IfElseIf;
2441         TooDeep = NestedTooDeep.pop_back_val();
2442       }
2443       ElseLeftBrace =
2444           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2445       if (Style.RemoveBracesLLVM)
2446         NestedTooDeep.push_back(TooDeep);
2447       if (IsPrecededByComment)
2448         --Line->Level;
2449     } else {
2450       parseUnbracedBody(/*CheckEOF=*/true);
2451     }
2452   } else {
2453     if (Style.RemoveBracesLLVM)
2454       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2455     if (NeedsUnwrappedLine)
2456       addUnwrappedLine();
2457   }
2458 
2459   if (!Style.RemoveBracesLLVM)
2460     return nullptr;
2461 
2462   assert(!NestedTooDeep.empty());
2463   const bool KeepElseBraces =
2464       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2465 
2466   NestedTooDeep.pop_back();
2467 
2468   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2469     markOptionalBraces(IfLeftBrace);
2470     markOptionalBraces(ElseLeftBrace);
2471   } else if (IfLeftBrace) {
2472     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2473     if (IfRightBrace) {
2474       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2475       assert(!IfLeftBrace->Optional);
2476       assert(!IfRightBrace->Optional);
2477       IfLeftBrace->MatchingParen = nullptr;
2478       IfRightBrace->MatchingParen = nullptr;
2479     }
2480   }
2481 
2482   if (IfKind)
2483     *IfKind = Kind;
2484 
2485   return IfLeftBrace;
2486 }
2487 
2488 void UnwrappedLineParser::parseTryCatch() {
2489   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2490   nextToken();
2491   bool NeedsUnwrappedLine = false;
2492   if (FormatTok->is(tok::colon)) {
2493     // We are in a function try block, what comes is an initializer list.
2494     nextToken();
2495 
2496     // In case identifiers were removed by clang-tidy, what might follow is
2497     // multiple commas in sequence - before the first identifier.
2498     while (FormatTok->is(tok::comma))
2499       nextToken();
2500 
2501     while (FormatTok->is(tok::identifier)) {
2502       nextToken();
2503       if (FormatTok->is(tok::l_paren))
2504         parseParens();
2505       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2506           FormatTok->is(tok::l_brace)) {
2507         do {
2508           nextToken();
2509         } while (!FormatTok->is(tok::r_brace));
2510         nextToken();
2511       }
2512 
2513       // In case identifiers were removed by clang-tidy, what might follow is
2514       // multiple commas in sequence - after the first identifier.
2515       while (FormatTok->is(tok::comma))
2516         nextToken();
2517     }
2518   }
2519   // Parse try with resource.
2520   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2521     parseParens();
2522 
2523   keepAncestorBraces();
2524 
2525   if (FormatTok->is(tok::l_brace)) {
2526     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2527     parseBlock();
2528     if (Style.BraceWrapping.BeforeCatch)
2529       addUnwrappedLine();
2530     else
2531       NeedsUnwrappedLine = true;
2532   } else if (!FormatTok->is(tok::kw_catch)) {
2533     // The C++ standard requires a compound-statement after a try.
2534     // If there's none, we try to assume there's a structuralElement
2535     // and try to continue.
2536     addUnwrappedLine();
2537     ++Line->Level;
2538     parseStructuralElement();
2539     --Line->Level;
2540   }
2541   while (true) {
2542     if (FormatTok->is(tok::at))
2543       nextToken();
2544     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2545                              tok::kw___finally) ||
2546           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2547            FormatTok->is(Keywords.kw_finally)) ||
2548           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2549            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2550       break;
2551     nextToken();
2552     while (FormatTok->isNot(tok::l_brace)) {
2553       if (FormatTok->is(tok::l_paren)) {
2554         parseParens();
2555         continue;
2556       }
2557       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2558         if (Style.RemoveBracesLLVM)
2559           NestedTooDeep.pop_back();
2560         return;
2561       }
2562       nextToken();
2563     }
2564     NeedsUnwrappedLine = false;
2565     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2566     parseBlock();
2567     if (Style.BraceWrapping.BeforeCatch)
2568       addUnwrappedLine();
2569     else
2570       NeedsUnwrappedLine = true;
2571   }
2572 
2573   if (Style.RemoveBracesLLVM)
2574     NestedTooDeep.pop_back();
2575 
2576   if (NeedsUnwrappedLine)
2577     addUnwrappedLine();
2578 }
2579 
2580 void UnwrappedLineParser::parseNamespace() {
2581   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2582          "'namespace' expected");
2583 
2584   const FormatToken &InitialToken = *FormatTok;
2585   nextToken();
2586   if (InitialToken.is(TT_NamespaceMacro)) {
2587     parseParens();
2588   } else {
2589     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2590                               tok::l_square, tok::period) ||
2591            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2592       if (FormatTok->is(tok::l_square))
2593         parseSquare();
2594       else
2595         nextToken();
2596   }
2597   if (FormatTok->Tok.is(tok::l_brace)) {
2598     if (ShouldBreakBeforeBrace(Style, InitialToken))
2599       addUnwrappedLine();
2600 
2601     unsigned AddLevels =
2602         Style.NamespaceIndentation == FormatStyle::NI_All ||
2603                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2604                  DeclarationScopeStack.size() > 1)
2605             ? 1u
2606             : 0u;
2607     bool ManageWhitesmithsBraces =
2608         AddLevels == 0u &&
2609         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2610 
2611     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2612     // the whole block.
2613     if (ManageWhitesmithsBraces)
2614       ++Line->Level;
2615 
2616     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2617                /*MunchSemi=*/true,
2618                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2619 
2620     // Munch the semicolon after a namespace. This is more common than one would
2621     // think. Putting the semicolon into its own line is very ugly.
2622     if (FormatTok->Tok.is(tok::semi))
2623       nextToken();
2624 
2625     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2626 
2627     if (ManageWhitesmithsBraces)
2628       --Line->Level;
2629   }
2630   // FIXME: Add error handling.
2631 }
2632 
2633 void UnwrappedLineParser::parseNew() {
2634   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2635   nextToken();
2636 
2637   if (Style.isCSharp()) {
2638     do {
2639       if (FormatTok->is(tok::l_brace))
2640         parseBracedList();
2641 
2642       if (FormatTok->isOneOf(tok::semi, tok::comma))
2643         return;
2644 
2645       nextToken();
2646     } while (!eof());
2647   }
2648 
2649   if (Style.Language != FormatStyle::LK_Java)
2650     return;
2651 
2652   // In Java, we can parse everything up to the parens, which aren't optional.
2653   do {
2654     // There should not be a ;, { or } before the new's open paren.
2655     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2656       return;
2657 
2658     // Consume the parens.
2659     if (FormatTok->is(tok::l_paren)) {
2660       parseParens();
2661 
2662       // If there is a class body of an anonymous class, consume that as child.
2663       if (FormatTok->is(tok::l_brace))
2664         parseChildBlock();
2665       return;
2666     }
2667     nextToken();
2668   } while (!eof());
2669 }
2670 
2671 void UnwrappedLineParser::parseForOrWhileLoop() {
2672   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2673          "'for', 'while' or foreach macro expected");
2674   nextToken();
2675   // JS' for await ( ...
2676   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2677     nextToken();
2678   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2679     nextToken();
2680   if (FormatTok->Tok.is(tok::l_paren))
2681     parseParens();
2682 
2683   keepAncestorBraces();
2684 
2685   if (FormatTok->Tok.is(tok::l_brace)) {
2686     FormatToken *LeftBrace = FormatTok;
2687     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2688     parseBlock();
2689     if (Style.RemoveBracesLLVM) {
2690       assert(!NestedTooDeep.empty());
2691       if (!NestedTooDeep.back())
2692         markOptionalBraces(LeftBrace);
2693     }
2694     addUnwrappedLine();
2695   } else {
2696     parseUnbracedBody();
2697   }
2698 
2699   if (Style.RemoveBracesLLVM)
2700     NestedTooDeep.pop_back();
2701 }
2702 
2703 void UnwrappedLineParser::parseDoWhile() {
2704   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2705   nextToken();
2706 
2707   keepAncestorBraces();
2708 
2709   if (FormatTok->Tok.is(tok::l_brace)) {
2710     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2711     parseBlock();
2712     if (Style.BraceWrapping.BeforeWhile)
2713       addUnwrappedLine();
2714   } else {
2715     parseUnbracedBody();
2716   }
2717 
2718   if (Style.RemoveBracesLLVM)
2719     NestedTooDeep.pop_back();
2720 
2721   // FIXME: Add error handling.
2722   if (!FormatTok->Tok.is(tok::kw_while)) {
2723     addUnwrappedLine();
2724     return;
2725   }
2726 
2727   // If in Whitesmiths mode, the line with the while() needs to be indented
2728   // to the same level as the block.
2729   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2730     ++Line->Level;
2731 
2732   nextToken();
2733   parseStructuralElement();
2734 }
2735 
2736 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2737   nextToken();
2738   unsigned OldLineLevel = Line->Level;
2739   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2740     --Line->Level;
2741   if (LeftAlignLabel)
2742     Line->Level = 0;
2743 
2744   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2745       FormatTok->Tok.is(tok::l_brace)) {
2746 
2747     CompoundStatementIndenter Indenter(this, Line->Level,
2748                                        Style.BraceWrapping.AfterCaseLabel,
2749                                        Style.BraceWrapping.IndentBraces);
2750     parseBlock();
2751     if (FormatTok->Tok.is(tok::kw_break)) {
2752       if (Style.BraceWrapping.AfterControlStatement ==
2753           FormatStyle::BWACS_Always) {
2754         addUnwrappedLine();
2755         if (!Style.IndentCaseBlocks &&
2756             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2757           ++Line->Level;
2758       }
2759       parseStructuralElement();
2760     }
2761     addUnwrappedLine();
2762   } else {
2763     if (FormatTok->is(tok::semi))
2764       nextToken();
2765     addUnwrappedLine();
2766   }
2767   Line->Level = OldLineLevel;
2768   if (FormatTok->isNot(tok::l_brace)) {
2769     parseStructuralElement();
2770     addUnwrappedLine();
2771   }
2772 }
2773 
2774 void UnwrappedLineParser::parseCaseLabel() {
2775   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2776 
2777   // FIXME: fix handling of complex expressions here.
2778   do {
2779     nextToken();
2780   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2781   parseLabel();
2782 }
2783 
2784 void UnwrappedLineParser::parseSwitch() {
2785   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2786   nextToken();
2787   if (FormatTok->Tok.is(tok::l_paren))
2788     parseParens();
2789 
2790   keepAncestorBraces();
2791 
2792   if (FormatTok->Tok.is(tok::l_brace)) {
2793     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2794     parseBlock();
2795     addUnwrappedLine();
2796   } else {
2797     addUnwrappedLine();
2798     ++Line->Level;
2799     parseStructuralElement();
2800     --Line->Level;
2801   }
2802 
2803   if (Style.RemoveBracesLLVM)
2804     NestedTooDeep.pop_back();
2805 }
2806 
2807 void UnwrappedLineParser::parseAccessSpecifier() {
2808   FormatToken *AccessSpecifierCandidate = FormatTok;
2809   nextToken();
2810   // Understand Qt's slots.
2811   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2812     nextToken();
2813   // Otherwise, we don't know what it is, and we'd better keep the next token.
2814   if (FormatTok->Tok.is(tok::colon)) {
2815     nextToken();
2816     addUnwrappedLine();
2817   } else if (!FormatTok->Tok.is(tok::coloncolon) &&
2818              !std::binary_search(COperatorsFollowingVar.begin(),
2819                                  COperatorsFollowingVar.end(),
2820                                  FormatTok->Tok.getKind())) {
2821     // Not a variable name nor namespace name.
2822     addUnwrappedLine();
2823   } else if (AccessSpecifierCandidate) {
2824     // Consider the access specifier to be a C identifier.
2825     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2826   }
2827 }
2828 
2829 /// \brief Parses a concept definition.
2830 /// \pre The current token has to be the concept keyword.
2831 ///
2832 /// Returns if either the concept has been completely parsed, or if it detects
2833 /// that the concept definition is incorrect.
2834 void UnwrappedLineParser::parseConcept() {
2835   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2836   nextToken();
2837   if (!FormatTok->Tok.is(tok::identifier))
2838     return;
2839   nextToken();
2840   if (!FormatTok->Tok.is(tok::equal))
2841     return;
2842   nextToken();
2843   parseConstraintExpression();
2844   if (FormatTok->Tok.is(tok::semi))
2845     nextToken();
2846   addUnwrappedLine();
2847 }
2848 
2849 /// \brief Parses a requires, decides if it is a clause or an expression.
2850 /// \pre The current token has to be the requires keyword.
2851 /// \returns true if it parsed a clause.
2852 bool clang::format::UnwrappedLineParser::parseRequires() {
2853   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2854   auto RequiresToken = FormatTok;
2855 
2856   // We try to guess if it is a requires clause, or a requires expression. For
2857   // that we first consume the keyword and check the next token.
2858   nextToken();
2859 
2860   switch (FormatTok->Tok.getKind()) {
2861   case tok::l_brace:
2862     // This can only be an expression, never a clause.
2863     parseRequiresExpression(RequiresToken);
2864     return false;
2865   case tok::l_paren:
2866     // Clauses and expression can start with a paren, it's unclear what we have.
2867     break;
2868   default:
2869     // All other tokens can only be a clause.
2870     parseRequiresClause(RequiresToken);
2871     return true;
2872   }
2873 
2874   // Looking forward we would have to decide if there are function declaration
2875   // like arguments to the requires expression:
2876   // requires (T t) {
2877   // Or there is a constraint expression for the requires clause:
2878   // requires (C<T> && ...
2879 
2880   // But first let's look behind.
2881   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
2882 
2883   if (!PreviousNonComment ||
2884       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
2885     // If there is no token, or an expression left brace, we are a requires
2886     // clause within a requires expression.
2887     parseRequiresClause(RequiresToken);
2888     return true;
2889   }
2890 
2891   switch (PreviousNonComment->Tok.getKind()) {
2892   case tok::greater:
2893   case tok::r_paren:
2894   case tok::kw_noexcept:
2895   case tok::kw_const:
2896     // This is a requires clause.
2897     parseRequiresClause(RequiresToken);
2898     return true;
2899   case tok::amp:
2900   case tok::ampamp: {
2901     // This can be either:
2902     // if (... && requires (T t) ...)
2903     // Or
2904     // void member(...) && requires (C<T> ...
2905     // We check the one token before that for a const:
2906     // void member(...) const && requires (C<T> ...
2907     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
2908     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
2909       parseRequiresClause(RequiresToken);
2910       return true;
2911     }
2912     break;
2913   }
2914   default:
2915     // It's an expression.
2916     parseRequiresExpression(RequiresToken);
2917     return false;
2918   }
2919 
2920   // Now we look forward and try to check if the paren content is a parameter
2921   // list. The parameters can be cv-qualified and contain references or
2922   // pointers.
2923   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
2924   // of stuff: typename, const, *, &, &&, ::, identifiers.
2925 
2926   int NextTokenOffset = 1;
2927   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
2928   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
2929     ++NextTokenOffset;
2930     NextToken = Tokens->peekNextToken(NextTokenOffset);
2931   };
2932 
2933   bool FoundType = false;
2934   bool LastWasColonColon = false;
2935   int OpenAngles = 0;
2936 
2937   for (; NextTokenOffset < 50; PeekNext()) {
2938     switch (NextToken->Tok.getKind()) {
2939     case tok::kw_volatile:
2940     case tok::kw_const:
2941     case tok::comma:
2942       parseRequiresExpression(RequiresToken);
2943       return false;
2944     case tok::r_paren:
2945     case tok::pipepipe:
2946       parseRequiresClause(RequiresToken);
2947       return true;
2948     case tok::eof:
2949       // Break out of the loop.
2950       NextTokenOffset = 50;
2951       break;
2952     case tok::coloncolon:
2953       LastWasColonColon = true;
2954       break;
2955     case tok::identifier:
2956       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
2957         parseRequiresExpression(RequiresToken);
2958         return false;
2959       }
2960       FoundType = true;
2961       LastWasColonColon = false;
2962       break;
2963     case tok::less:
2964       ++OpenAngles;
2965       break;
2966     case tok::greater:
2967       --OpenAngles;
2968       break;
2969     default:
2970       if (NextToken->isSimpleTypeSpecifier()) {
2971         parseRequiresExpression(RequiresToken);
2972         return false;
2973       }
2974       break;
2975     }
2976   }
2977 
2978   // This seems to be a complicated expression, just assume it's a clause.
2979   parseRequiresClause(RequiresToken);
2980   return true;
2981 }
2982 
2983 /// \brief Parses a requires clause.
2984 /// \param RequiresToken The requires keyword token, which starts this clause.
2985 /// \pre We need to be on the next token after the requires keyword.
2986 /// \sa parseRequiresExpression
2987 ///
2988 /// Returns if it either has finished parsing the clause, or it detects, that
2989 /// the clause is incorrect.
2990 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
2991   assert(FormatTok->getPreviousNonComment() == RequiresToken);
2992   assert(RequiresToken->Tok.is(tok::kw_requires) && "'requires' expected");
2993   assert(RequiresToken->getType() == TT_Unknown);
2994 
2995   // If there is no previous token, we are within a requires expression,
2996   // otherwise we will always have the template or function declaration in front
2997   // of it.
2998   bool InRequiresExpression =
2999       !RequiresToken->Previous ||
3000       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3001 
3002   RequiresToken->setType(InRequiresExpression
3003                              ? TT_RequiresClauseInARequiresExpression
3004                              : TT_RequiresClause);
3005 
3006   parseConstraintExpression();
3007 
3008   if (!InRequiresExpression)
3009     FormatTok->Previous->ClosesRequiresClause = true;
3010 }
3011 
3012 /// \brief Parses a requires expression.
3013 /// \param RequiresToken The requires keyword token, which starts this clause.
3014 /// \pre We need to be on the next token after the requires keyword.
3015 /// \sa parseRequiresClause
3016 ///
3017 /// Returns if it either has finished parsing the expression, or it detects,
3018 /// that the expression is incorrect.
3019 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3020   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3021   assert(RequiresToken->Tok.is(tok::kw_requires) && "'requires' expected");
3022   assert(RequiresToken->getType() == TT_Unknown);
3023 
3024   RequiresToken->setType(TT_RequiresExpression);
3025 
3026   if (FormatTok->is(tok::l_paren)) {
3027     FormatTok->setType(TT_RequiresExpressionLParen);
3028     parseParens();
3029   }
3030 
3031   if (FormatTok->is(tok::l_brace)) {
3032     FormatTok->setType(TT_RequiresExpressionLBrace);
3033     parseChildBlock(/*CanContainBracedList=*/false,
3034                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3035   }
3036 }
3037 
3038 /// \brief Parses a constraint expression.
3039 ///
3040 /// This is either the definition of a concept, or the body of a requires
3041 /// clause. It returns, when the parsing is complete, or the expression is
3042 /// incorrect.
3043 void UnwrappedLineParser::parseConstraintExpression() {
3044   // The special handling for lambdas is needed since tryToParseLambda() eats a
3045   // token and if a requires expression is the last part of a requires clause
3046   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3047   // not set on the correct token. Thus we need to be aware if we even expect a
3048   // lambda to be possible.
3049   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3050   bool LambdaNextTimeAllowed = true;
3051   do {
3052     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3053 
3054     switch (FormatTok->Tok.getKind()) {
3055     case tok::kw_requires: {
3056       auto RequiresToken = FormatTok;
3057       nextToken();
3058       parseRequiresExpression(RequiresToken);
3059       break;
3060     }
3061 
3062     case tok::l_paren:
3063       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3064       break;
3065 
3066     case tok::l_square:
3067       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3068         return;
3069       break;
3070 
3071     case tok::identifier:
3072       // We need to differentiate identifiers for a template deduction guide,
3073       // variables, or function return types (the constraint expression has
3074       // ended before that), and basically all other cases. But it's easier to
3075       // check the other way around.
3076       assert(FormatTok->Previous);
3077       switch (FormatTok->Previous->Tok.getKind()) {
3078       case tok::coloncolon:  // Nested identifier.
3079       case tok::ampamp:      // Start of a function or variable for the
3080       case tok::pipepipe:    // constraint expression.
3081       case tok::kw_requires: // Initial identifier of a requires clause.
3082       case tok::equal:       // Initial identifier of a concept declaration.
3083         break;
3084       default:
3085         return;
3086       }
3087 
3088       // Read identifier with optional template declaration.
3089       nextToken();
3090       if (FormatTok->Tok.is(tok::less))
3091         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3092                         /*ClosingBraceKind=*/tok::greater);
3093       break;
3094 
3095     case tok::kw_const:
3096     case tok::semi:
3097     case tok::kw_class:
3098     case tok::kw_struct:
3099     case tok::kw_union:
3100       return;
3101 
3102     case tok::l_brace:
3103       // Potential function body.
3104       return;
3105 
3106     case tok::ampamp:
3107     case tok::pipepipe:
3108       FormatTok->setType(TT_BinaryOperator);
3109       nextToken();
3110       LambdaNextTimeAllowed = true;
3111       break;
3112 
3113     case tok::comma:
3114     case tok::comment:
3115       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3116       nextToken();
3117       break;
3118 
3119     case tok::kw_sizeof:
3120     case tok::greater:
3121     case tok::greaterequal:
3122     case tok::greatergreater:
3123     case tok::less:
3124     case tok::lessequal:
3125     case tok::lessless:
3126     case tok::equalequal:
3127     case tok::exclaim:
3128     case tok::exclaimequal:
3129     case tok::plus:
3130     case tok::minus:
3131     case tok::star:
3132     case tok::slash:
3133     case tok::kw_decltype:
3134       LambdaNextTimeAllowed = true;
3135       // Just eat them.
3136       nextToken();
3137       break;
3138 
3139     case tok::numeric_constant:
3140     case tok::coloncolon:
3141     case tok::kw_true:
3142     case tok::kw_false:
3143       // Just eat them.
3144       nextToken();
3145       break;
3146 
3147     case tok::kw_static_cast:
3148     case tok::kw_const_cast:
3149     case tok::kw_reinterpret_cast:
3150     case tok::kw_dynamic_cast:
3151       nextToken();
3152       if (!FormatTok->is(tok::less))
3153         return;
3154 
3155       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3156                       /*ClosingBraceKind=*/tok::greater);
3157       break;
3158 
3159     case tok::kw_bool:
3160       // bool is only allowed if it is directly followed by a paren for a cast:
3161       // concept C = bool(...);
3162       // and bool is the only type, all other types as cast must be inside a
3163       // cast to bool an thus are handled by the other cases.
3164       nextToken();
3165       if (FormatTok->isNot(tok::l_paren))
3166         return;
3167       parseParens();
3168       break;
3169 
3170     default:
3171       return;
3172     }
3173   } while (!eof());
3174 }
3175 
3176 bool UnwrappedLineParser::parseEnum() {
3177   const FormatToken &InitialToken = *FormatTok;
3178 
3179   // Won't be 'enum' for NS_ENUMs.
3180   if (FormatTok->Tok.is(tok::kw_enum))
3181     nextToken();
3182 
3183   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3184   // declarations. An "enum" keyword followed by a colon would be a syntax
3185   // error and thus assume it is just an identifier.
3186   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3187     return false;
3188 
3189   // In protobuf, "enum" can be used as a field name.
3190   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3191     return false;
3192 
3193   // Eat up enum class ...
3194   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
3195     nextToken();
3196 
3197   while (FormatTok->Tok.getIdentifierInfo() ||
3198          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3199                             tok::greater, tok::comma, tok::question)) {
3200     nextToken();
3201     // We can have macros or attributes in between 'enum' and the enum name.
3202     if (FormatTok->is(tok::l_paren))
3203       parseParens();
3204     if (FormatTok->is(tok::identifier)) {
3205       nextToken();
3206       // If there are two identifiers in a row, this is likely an elaborate
3207       // return type. In Java, this can be "implements", etc.
3208       if (Style.isCpp() && FormatTok->is(tok::identifier))
3209         return false;
3210     }
3211   }
3212 
3213   // Just a declaration or something is wrong.
3214   if (FormatTok->isNot(tok::l_brace))
3215     return true;
3216   FormatTok->setType(TT_EnumLBrace);
3217   FormatTok->setBlockKind(BK_Block);
3218 
3219   if (Style.Language == FormatStyle::LK_Java) {
3220     // Java enums are different.
3221     parseJavaEnumBody();
3222     return true;
3223   }
3224   if (Style.Language == FormatStyle::LK_Proto) {
3225     parseBlock(/*MustBeDeclaration=*/true);
3226     return true;
3227   }
3228 
3229   if (!Style.AllowShortEnumsOnASingleLine &&
3230       ShouldBreakBeforeBrace(Style, InitialToken))
3231     addUnwrappedLine();
3232   // Parse enum body.
3233   nextToken();
3234   if (!Style.AllowShortEnumsOnASingleLine) {
3235     addUnwrappedLine();
3236     Line->Level += 1;
3237   }
3238   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3239                                    /*IsEnum=*/true);
3240   if (!Style.AllowShortEnumsOnASingleLine)
3241     Line->Level -= 1;
3242   if (HasError) {
3243     if (FormatTok->is(tok::semi))
3244       nextToken();
3245     addUnwrappedLine();
3246   }
3247   return true;
3248 
3249   // There is no addUnwrappedLine() here so that we fall through to parsing a
3250   // structural element afterwards. Thus, in "enum A {} n, m;",
3251   // "} n, m;" will end up in one unwrapped line.
3252 }
3253 
3254 bool UnwrappedLineParser::parseStructLike() {
3255   // parseRecord falls through and does not yet add an unwrapped line as a
3256   // record declaration or definition can start a structural element.
3257   parseRecord();
3258   // This does not apply to Java, JavaScript and C#.
3259   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3260       Style.isCSharp()) {
3261     if (FormatTok->is(tok::semi))
3262       nextToken();
3263     addUnwrappedLine();
3264     return true;
3265   }
3266   return false;
3267 }
3268 
3269 namespace {
3270 // A class used to set and restore the Token position when peeking
3271 // ahead in the token source.
3272 class ScopedTokenPosition {
3273   unsigned StoredPosition;
3274   FormatTokenSource *Tokens;
3275 
3276 public:
3277   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3278     assert(Tokens && "Tokens expected to not be null");
3279     StoredPosition = Tokens->getPosition();
3280   }
3281 
3282   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3283 };
3284 } // namespace
3285 
3286 // Look to see if we have [[ by looking ahead, if
3287 // its not then rewind to the original position.
3288 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3289   ScopedTokenPosition AutoPosition(Tokens);
3290   FormatToken *Tok = Tokens->getNextToken();
3291   // We already read the first [ check for the second.
3292   if (!Tok->is(tok::l_square))
3293     return false;
3294   // Double check that the attribute is just something
3295   // fairly simple.
3296   while (Tok->isNot(tok::eof)) {
3297     if (Tok->is(tok::r_square))
3298       break;
3299     Tok = Tokens->getNextToken();
3300   }
3301   if (Tok->is(tok::eof))
3302     return false;
3303   Tok = Tokens->getNextToken();
3304   if (!Tok->is(tok::r_square))
3305     return false;
3306   Tok = Tokens->getNextToken();
3307   if (Tok->is(tok::semi))
3308     return false;
3309   return true;
3310 }
3311 
3312 void UnwrappedLineParser::parseJavaEnumBody() {
3313   // Determine whether the enum is simple, i.e. does not have a semicolon or
3314   // constants with class bodies. Simple enums can be formatted like braced
3315   // lists, contracted to a single line, etc.
3316   unsigned StoredPosition = Tokens->getPosition();
3317   bool IsSimple = true;
3318   FormatToken *Tok = Tokens->getNextToken();
3319   while (!Tok->is(tok::eof)) {
3320     if (Tok->is(tok::r_brace))
3321       break;
3322     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3323       IsSimple = false;
3324       break;
3325     }
3326     // FIXME: This will also mark enums with braces in the arguments to enum
3327     // constants as "not simple". This is probably fine in practice, though.
3328     Tok = Tokens->getNextToken();
3329   }
3330   FormatTok = Tokens->setPosition(StoredPosition);
3331 
3332   if (IsSimple) {
3333     nextToken();
3334     parseBracedList();
3335     addUnwrappedLine();
3336     return;
3337   }
3338 
3339   // Parse the body of a more complex enum.
3340   // First add a line for everything up to the "{".
3341   nextToken();
3342   addUnwrappedLine();
3343   ++Line->Level;
3344 
3345   // Parse the enum constants.
3346   while (FormatTok) {
3347     if (FormatTok->is(tok::l_brace)) {
3348       // Parse the constant's class body.
3349       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3350                  /*MunchSemi=*/false);
3351     } else if (FormatTok->is(tok::l_paren)) {
3352       parseParens();
3353     } else if (FormatTok->is(tok::comma)) {
3354       nextToken();
3355       addUnwrappedLine();
3356     } else if (FormatTok->is(tok::semi)) {
3357       nextToken();
3358       addUnwrappedLine();
3359       break;
3360     } else if (FormatTok->is(tok::r_brace)) {
3361       addUnwrappedLine();
3362       break;
3363     } else {
3364       nextToken();
3365     }
3366   }
3367 
3368   // Parse the class body after the enum's ";" if any.
3369   parseLevel(/*HasOpeningBrace=*/true, /*CanContainBracedList=*/true);
3370   nextToken();
3371   --Line->Level;
3372   addUnwrappedLine();
3373 }
3374 
3375 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3376   const FormatToken &InitialToken = *FormatTok;
3377   nextToken();
3378 
3379   // The actual identifier can be a nested name specifier, and in macros
3380   // it is often token-pasted.
3381   // An [[attribute]] can be before the identifier.
3382   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3383                             tok::kw___attribute, tok::kw___declspec,
3384                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3385          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3386           FormatTok->isOneOf(tok::period, tok::comma))) {
3387     if (Style.isJavaScript() &&
3388         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3389       // JavaScript/TypeScript supports inline object types in
3390       // extends/implements positions:
3391       //     class Foo implements {bar: number} { }
3392       nextToken();
3393       if (FormatTok->is(tok::l_brace)) {
3394         tryToParseBracedList();
3395         continue;
3396       }
3397     }
3398     bool IsNonMacroIdentifier =
3399         FormatTok->is(tok::identifier) &&
3400         FormatTok->TokenText != FormatTok->TokenText.upper();
3401     nextToken();
3402     // We can have macros or attributes in between 'class' and the class name.
3403     if (!IsNonMacroIdentifier) {
3404       if (FormatTok->Tok.is(tok::l_paren)) {
3405         parseParens();
3406       } else if (FormatTok->is(TT_AttributeSquare)) {
3407         parseSquare();
3408         // Consume the closing TT_AttributeSquare.
3409         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3410           nextToken();
3411       }
3412     }
3413   }
3414 
3415   // Note that parsing away template declarations here leads to incorrectly
3416   // accepting function declarations as record declarations.
3417   // In general, we cannot solve this problem. Consider:
3418   // class A<int> B() {}
3419   // which can be a function definition or a class definition when B() is a
3420   // macro. If we find enough real-world cases where this is a problem, we
3421   // can parse for the 'template' keyword in the beginning of the statement,
3422   // and thus rule out the record production in case there is no template
3423   // (this would still leave us with an ambiguity between template function
3424   // and class declarations).
3425   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3426     while (!eof()) {
3427       if (FormatTok->is(tok::l_brace)) {
3428         calculateBraceTypes(/*ExpectClassBody=*/true);
3429         if (!tryToParseBracedList())
3430           break;
3431       }
3432       if (FormatTok->is(tok::l_square)) {
3433         FormatToken *Previous = FormatTok->Previous;
3434         if (!Previous ||
3435             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3436           // Don't try parsing a lambda if we had a closing parenthesis before,
3437           // it was probably a pointer to an array: int (*)[].
3438           if (!tryToParseLambda())
3439             break;
3440         }
3441       }
3442       if (FormatTok->Tok.is(tok::semi))
3443         return;
3444       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3445         addUnwrappedLine();
3446         nextToken();
3447         parseCSharpGenericTypeConstraint();
3448         break;
3449       }
3450       nextToken();
3451     }
3452   }
3453 
3454   auto GetBraceType = [](const FormatToken &RecordTok) {
3455     switch (RecordTok.Tok.getKind()) {
3456     case tok::kw_class:
3457       return TT_ClassLBrace;
3458     case tok::kw_struct:
3459       return TT_StructLBrace;
3460     case tok::kw_union:
3461       return TT_UnionLBrace;
3462     default:
3463       // Useful for e.g. interface.
3464       return TT_RecordLBrace;
3465     }
3466   };
3467   if (FormatTok->Tok.is(tok::l_brace)) {
3468     FormatTok->setType(GetBraceType(InitialToken));
3469     if (ParseAsExpr) {
3470       parseChildBlock();
3471     } else {
3472       if (ShouldBreakBeforeBrace(Style, InitialToken))
3473         addUnwrappedLine();
3474 
3475       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3476       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3477     }
3478   }
3479   // There is no addUnwrappedLine() here so that we fall through to parsing a
3480   // structural element afterwards. Thus, in "class A {} n, m;",
3481   // "} n, m;" will end up in one unwrapped line.
3482 }
3483 
3484 void UnwrappedLineParser::parseObjCMethod() {
3485   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
3486          "'(' or identifier expected.");
3487   do {
3488     if (FormatTok->Tok.is(tok::semi)) {
3489       nextToken();
3490       addUnwrappedLine();
3491       return;
3492     } else if (FormatTok->Tok.is(tok::l_brace)) {
3493       if (Style.BraceWrapping.AfterFunction)
3494         addUnwrappedLine();
3495       parseBlock();
3496       addUnwrappedLine();
3497       return;
3498     } else {
3499       nextToken();
3500     }
3501   } while (!eof());
3502 }
3503 
3504 void UnwrappedLineParser::parseObjCProtocolList() {
3505   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
3506   do {
3507     nextToken();
3508     // Early exit in case someone forgot a close angle.
3509     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3510         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3511       return;
3512   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
3513   nextToken(); // Skip '>'.
3514 }
3515 
3516 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3517   do {
3518     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
3519       nextToken();
3520       addUnwrappedLine();
3521       break;
3522     }
3523     if (FormatTok->is(tok::l_brace)) {
3524       parseBlock();
3525       // In ObjC interfaces, nothing should be following the "}".
3526       addUnwrappedLine();
3527     } else if (FormatTok->is(tok::r_brace)) {
3528       // Ignore stray "}". parseStructuralElement doesn't consume them.
3529       nextToken();
3530       addUnwrappedLine();
3531     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3532       nextToken();
3533       parseObjCMethod();
3534     } else {
3535       parseStructuralElement();
3536     }
3537   } while (!eof());
3538 }
3539 
3540 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3541   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3542          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3543   nextToken();
3544   nextToken(); // interface name
3545 
3546   // @interface can be followed by a lightweight generic
3547   // specialization list, then either a base class or a category.
3548   if (FormatTok->Tok.is(tok::less))
3549     parseObjCLightweightGenerics();
3550   if (FormatTok->Tok.is(tok::colon)) {
3551     nextToken();
3552     nextToken(); // base class name
3553     // The base class can also have lightweight generics applied to it.
3554     if (FormatTok->Tok.is(tok::less))
3555       parseObjCLightweightGenerics();
3556   } else if (FormatTok->Tok.is(tok::l_paren))
3557     // Skip category, if present.
3558     parseParens();
3559 
3560   if (FormatTok->Tok.is(tok::less))
3561     parseObjCProtocolList();
3562 
3563   if (FormatTok->Tok.is(tok::l_brace)) {
3564     if (Style.BraceWrapping.AfterObjCDeclaration)
3565       addUnwrappedLine();
3566     parseBlock(/*MustBeDeclaration=*/true);
3567   }
3568 
3569   // With instance variables, this puts '}' on its own line.  Without instance
3570   // variables, this ends the @interface line.
3571   addUnwrappedLine();
3572 
3573   parseObjCUntilAtEnd();
3574 }
3575 
3576 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3577   assert(FormatTok->Tok.is(tok::less));
3578   // Unlike protocol lists, generic parameterizations support
3579   // nested angles:
3580   //
3581   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3582   //     NSObject <NSCopying, NSSecureCoding>
3583   //
3584   // so we need to count how many open angles we have left.
3585   unsigned NumOpenAngles = 1;
3586   do {
3587     nextToken();
3588     // Early exit in case someone forgot a close angle.
3589     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3590         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3591       break;
3592     if (FormatTok->Tok.is(tok::less))
3593       ++NumOpenAngles;
3594     else if (FormatTok->Tok.is(tok::greater)) {
3595       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3596       --NumOpenAngles;
3597     }
3598   } while (!eof() && NumOpenAngles != 0);
3599   nextToken(); // Skip '>'.
3600 }
3601 
3602 // Returns true for the declaration/definition form of @protocol,
3603 // false for the expression form.
3604 bool UnwrappedLineParser::parseObjCProtocol() {
3605   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3606   nextToken();
3607 
3608   if (FormatTok->is(tok::l_paren))
3609     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3610     return false;
3611 
3612   // The definition/declaration form,
3613   // @protocol Foo
3614   // - (int)someMethod;
3615   // @end
3616 
3617   nextToken(); // protocol name
3618 
3619   if (FormatTok->Tok.is(tok::less))
3620     parseObjCProtocolList();
3621 
3622   // Check for protocol declaration.
3623   if (FormatTok->Tok.is(tok::semi)) {
3624     nextToken();
3625     addUnwrappedLine();
3626     return true;
3627   }
3628 
3629   addUnwrappedLine();
3630   parseObjCUntilAtEnd();
3631   return true;
3632 }
3633 
3634 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3635   bool IsImport = FormatTok->is(Keywords.kw_import);
3636   assert(IsImport || FormatTok->is(tok::kw_export));
3637   nextToken();
3638 
3639   // Consume the "default" in "export default class/function".
3640   if (FormatTok->is(tok::kw_default))
3641     nextToken();
3642 
3643   // Consume "async function", "function" and "default function", so that these
3644   // get parsed as free-standing JS functions, i.e. do not require a trailing
3645   // semicolon.
3646   if (FormatTok->is(Keywords.kw_async))
3647     nextToken();
3648   if (FormatTok->is(Keywords.kw_function)) {
3649     nextToken();
3650     return;
3651   }
3652 
3653   // For imports, `export *`, `export {...}`, consume the rest of the line up
3654   // to the terminating `;`. For everything else, just return and continue
3655   // parsing the structural element, i.e. the declaration or expression for
3656   // `export default`.
3657   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3658       !FormatTok->isStringLiteral())
3659     return;
3660 
3661   while (!eof()) {
3662     if (FormatTok->is(tok::semi))
3663       return;
3664     if (Line->Tokens.empty()) {
3665       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3666       // import statement should terminate.
3667       return;
3668     }
3669     if (FormatTok->is(tok::l_brace)) {
3670       FormatTok->setBlockKind(BK_Block);
3671       nextToken();
3672       parseBracedList();
3673     } else {
3674       nextToken();
3675     }
3676   }
3677 }
3678 
3679 void UnwrappedLineParser::parseStatementMacro() {
3680   nextToken();
3681   if (FormatTok->is(tok::l_paren))
3682     parseParens();
3683   if (FormatTok->is(tok::semi))
3684     nextToken();
3685   addUnwrappedLine();
3686 }
3687 
3688 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3689                                                  StringRef Prefix = "") {
3690   llvm::dbgs() << Prefix << "Line(" << Line.Level
3691                << ", FSC=" << Line.FirstStartColumn << ")"
3692                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3693   for (const auto &Node : Line.Tokens) {
3694     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3695                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3696                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3697   }
3698   for (const auto &Node : Line.Tokens)
3699     for (const auto &ChildNode : Node.Children)
3700       printDebugInfo(ChildNode, "\nChild: ");
3701 
3702   llvm::dbgs() << "\n";
3703 }
3704 
3705 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3706   if (Line->Tokens.empty())
3707     return;
3708   LLVM_DEBUG({
3709     if (CurrentLines == &Lines)
3710       printDebugInfo(*Line);
3711   });
3712 
3713   // If this line closes a block when in Whitesmiths mode, remember that
3714   // information so that the level can be decreased after the line is added.
3715   // This has to happen after the addition of the line since the line itself
3716   // needs to be indented.
3717   bool ClosesWhitesmithsBlock =
3718       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3719       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3720 
3721   CurrentLines->push_back(std::move(*Line));
3722   Line->Tokens.clear();
3723   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3724   Line->FirstStartColumn = 0;
3725 
3726   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3727     --Line->Level;
3728   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3729     CurrentLines->append(
3730         std::make_move_iterator(PreprocessorDirectives.begin()),
3731         std::make_move_iterator(PreprocessorDirectives.end()));
3732     PreprocessorDirectives.clear();
3733   }
3734   // Disconnect the current token from the last token on the previous line.
3735   FormatTok->Previous = nullptr;
3736 }
3737 
3738 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3739 
3740 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3741   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3742          FormatTok.NewlinesBefore > 0;
3743 }
3744 
3745 // Checks if \p FormatTok is a line comment that continues the line comment
3746 // section on \p Line.
3747 static bool
3748 continuesLineCommentSection(const FormatToken &FormatTok,
3749                             const UnwrappedLine &Line,
3750                             const llvm::Regex &CommentPragmasRegex) {
3751   if (Line.Tokens.empty())
3752     return false;
3753 
3754   StringRef IndentContent = FormatTok.TokenText;
3755   if (FormatTok.TokenText.startswith("//") ||
3756       FormatTok.TokenText.startswith("/*"))
3757     IndentContent = FormatTok.TokenText.substr(2);
3758   if (CommentPragmasRegex.match(IndentContent))
3759     return false;
3760 
3761   // If Line starts with a line comment, then FormatTok continues the comment
3762   // section if its original column is greater or equal to the original start
3763   // column of the line.
3764   //
3765   // Define the min column token of a line as follows: if a line ends in '{' or
3766   // contains a '{' followed by a line comment, then the min column token is
3767   // that '{'. Otherwise, the min column token of the line is the first token of
3768   // the line.
3769   //
3770   // If Line starts with a token other than a line comment, then FormatTok
3771   // continues the comment section if its original column is greater than the
3772   // original start column of the min column token of the line.
3773   //
3774   // For example, the second line comment continues the first in these cases:
3775   //
3776   // // first line
3777   // // second line
3778   //
3779   // and:
3780   //
3781   // // first line
3782   //  // second line
3783   //
3784   // and:
3785   //
3786   // int i; // first line
3787   //  // second line
3788   //
3789   // and:
3790   //
3791   // do { // first line
3792   //      // second line
3793   //   int i;
3794   // } while (true);
3795   //
3796   // and:
3797   //
3798   // enum {
3799   //   a, // first line
3800   //    // second line
3801   //   b
3802   // };
3803   //
3804   // The second line comment doesn't continue the first in these cases:
3805   //
3806   //   // first line
3807   //  // second line
3808   //
3809   // and:
3810   //
3811   // int i; // first line
3812   // // second line
3813   //
3814   // and:
3815   //
3816   // do { // first line
3817   //   // second line
3818   //   int i;
3819   // } while (true);
3820   //
3821   // and:
3822   //
3823   // enum {
3824   //   a, // first line
3825   //   // second line
3826   // };
3827   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3828 
3829   // Scan for '{//'. If found, use the column of '{' as a min column for line
3830   // comment section continuation.
3831   const FormatToken *PreviousToken = nullptr;
3832   for (const UnwrappedLineNode &Node : Line.Tokens) {
3833     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3834         isLineComment(*Node.Tok)) {
3835       MinColumnToken = PreviousToken;
3836       break;
3837     }
3838     PreviousToken = Node.Tok;
3839 
3840     // Grab the last newline preceding a token in this unwrapped line.
3841     if (Node.Tok->NewlinesBefore > 0)
3842       MinColumnToken = Node.Tok;
3843   }
3844   if (PreviousToken && PreviousToken->is(tok::l_brace))
3845     MinColumnToken = PreviousToken;
3846 
3847   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3848                               MinColumnToken);
3849 }
3850 
3851 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3852   bool JustComments = Line->Tokens.empty();
3853   for (FormatToken *Tok : CommentsBeforeNextToken) {
3854     // Line comments that belong to the same line comment section are put on the
3855     // same line since later we might want to reflow content between them.
3856     // Additional fine-grained breaking of line comment sections is controlled
3857     // by the class BreakableLineCommentSection in case it is desirable to keep
3858     // several line comment sections in the same unwrapped line.
3859     //
3860     // FIXME: Consider putting separate line comment sections as children to the
3861     // unwrapped line instead.
3862     Tok->ContinuesLineCommentSection =
3863         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3864     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3865       addUnwrappedLine();
3866     pushToken(Tok);
3867   }
3868   if (NewlineBeforeNext && JustComments)
3869     addUnwrappedLine();
3870   CommentsBeforeNextToken.clear();
3871 }
3872 
3873 void UnwrappedLineParser::nextToken(int LevelDifference) {
3874   if (eof())
3875     return;
3876   flushComments(isOnNewLine(*FormatTok));
3877   pushToken(FormatTok);
3878   FormatToken *Previous = FormatTok;
3879   if (!Style.isJavaScript())
3880     readToken(LevelDifference);
3881   else
3882     readTokenWithJavaScriptASI();
3883   FormatTok->Previous = Previous;
3884 }
3885 
3886 void UnwrappedLineParser::distributeComments(
3887     const SmallVectorImpl<FormatToken *> &Comments,
3888     const FormatToken *NextTok) {
3889   // Whether or not a line comment token continues a line is controlled by
3890   // the method continuesLineCommentSection, with the following caveat:
3891   //
3892   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3893   // that each comment line from the trail is aligned with the next token, if
3894   // the next token exists. If a trail exists, the beginning of the maximal
3895   // trail is marked as a start of a new comment section.
3896   //
3897   // For example in this code:
3898   //
3899   // int a; // line about a
3900   //   // line 1 about b
3901   //   // line 2 about b
3902   //   int b;
3903   //
3904   // the two lines about b form a maximal trail, so there are two sections, the
3905   // first one consisting of the single comment "// line about a" and the
3906   // second one consisting of the next two comments.
3907   if (Comments.empty())
3908     return;
3909   bool ShouldPushCommentsInCurrentLine = true;
3910   bool HasTrailAlignedWithNextToken = false;
3911   unsigned StartOfTrailAlignedWithNextToken = 0;
3912   if (NextTok) {
3913     // We are skipping the first element intentionally.
3914     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3915       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3916         HasTrailAlignedWithNextToken = true;
3917         StartOfTrailAlignedWithNextToken = i;
3918       }
3919     }
3920   }
3921   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3922     FormatToken *FormatTok = Comments[i];
3923     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3924       FormatTok->ContinuesLineCommentSection = false;
3925     } else {
3926       FormatTok->ContinuesLineCommentSection =
3927           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3928     }
3929     if (!FormatTok->ContinuesLineCommentSection &&
3930         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
3931       ShouldPushCommentsInCurrentLine = false;
3932     if (ShouldPushCommentsInCurrentLine)
3933       pushToken(FormatTok);
3934     else
3935       CommentsBeforeNextToken.push_back(FormatTok);
3936   }
3937 }
3938 
3939 void UnwrappedLineParser::readToken(int LevelDifference) {
3940   SmallVector<FormatToken *, 1> Comments;
3941   bool PreviousWasComment = false;
3942   bool FirstNonCommentOnLine = false;
3943   do {
3944     FormatTok = Tokens->getNextToken();
3945     assert(FormatTok);
3946     while (FormatTok->getType() == TT_ConflictStart ||
3947            FormatTok->getType() == TT_ConflictEnd ||
3948            FormatTok->getType() == TT_ConflictAlternative) {
3949       if (FormatTok->getType() == TT_ConflictStart)
3950         conditionalCompilationStart(/*Unreachable=*/false);
3951       else if (FormatTok->getType() == TT_ConflictAlternative)
3952         conditionalCompilationAlternative();
3953       else if (FormatTok->getType() == TT_ConflictEnd)
3954         conditionalCompilationEnd();
3955       FormatTok = Tokens->getNextToken();
3956       FormatTok->MustBreakBefore = true;
3957     }
3958 
3959     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
3960                                       const FormatToken &Tok,
3961                                       bool PreviousWasComment) {
3962       auto IsFirstOnLine = [](const FormatToken &Tok) {
3963         return Tok.HasUnescapedNewline || Tok.IsFirst;
3964       };
3965 
3966       // Consider preprocessor directives preceded by block comments as first
3967       // on line.
3968       if (PreviousWasComment)
3969         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
3970       return IsFirstOnLine(Tok);
3971     };
3972 
3973     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
3974         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
3975     PreviousWasComment = FormatTok->Tok.is(tok::comment);
3976 
3977     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3978            FirstNonCommentOnLine) {
3979       distributeComments(Comments, FormatTok);
3980       Comments.clear();
3981       // If there is an unfinished unwrapped line, we flush the preprocessor
3982       // directives only after that unwrapped line was finished later.
3983       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3984       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3985       assert((LevelDifference >= 0 ||
3986               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3987              "LevelDifference makes Line->Level negative");
3988       Line->Level += LevelDifference;
3989       // Comments stored before the preprocessor directive need to be output
3990       // before the preprocessor directive, at the same level as the
3991       // preprocessor directive, as we consider them to apply to the directive.
3992       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3993           PPBranchLevel > 0)
3994         Line->Level += PPBranchLevel;
3995       flushComments(isOnNewLine(*FormatTok));
3996       parsePPDirective();
3997       PreviousWasComment = FormatTok->Tok.is(tok::comment);
3998       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
3999           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4000     }
4001 
4002     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4003         !Line->InPPDirective)
4004       continue;
4005 
4006     if (!FormatTok->Tok.is(tok::comment)) {
4007       distributeComments(Comments, FormatTok);
4008       Comments.clear();
4009       return;
4010     }
4011 
4012     Comments.push_back(FormatTok);
4013   } while (!eof());
4014 
4015   distributeComments(Comments, nullptr);
4016   Comments.clear();
4017 }
4018 
4019 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4020   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4021   if (MustBreakBeforeNextToken) {
4022     Line->Tokens.back().Tok->MustBreakBefore = true;
4023     MustBreakBeforeNextToken = false;
4024   }
4025 }
4026 
4027 } // end namespace format
4028 } // end namespace clang
4029