1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 #include <utility>
24 
25 #define DEBUG_TYPE "format-parser"
26 
27 namespace clang {
28 namespace format {
29 
30 class FormatTokenSource {
31 public:
32   virtual ~FormatTokenSource() {}
33 
34   // Returns the next token in the token stream.
35   virtual FormatToken *getNextToken() = 0;
36 
37   // Returns the token preceding the token returned by the last call to
38   // getNextToken() in the token stream, or nullptr if no such token exists.
39   virtual FormatToken *getPreviousToken() = 0;
40 
41   // Returns the token that would be returned by the next call to
42   // getNextToken().
43   virtual FormatToken *peekNextToken() = 0;
44 
45   // Returns the token that would be returned after the next N calls to
46   // getNextToken(). N needs to be greater than zero, and small enough that
47   // there are still tokens. Check for tok::eof with N-1 before calling it with
48   // N.
49   virtual FormatToken *peekNextToken(int N) = 0;
50 
51   // Returns whether we are at the end of the file.
52   // This can be different from whether getNextToken() returned an eof token
53   // when the FormatTokenSource is a view on a part of the token stream.
54   virtual bool isEOF() = 0;
55 
56   // Gets the current position in the token stream, to be used by setPosition().
57   virtual unsigned getPosition() = 0;
58 
59   // Resets the token stream to the state it was in when getPosition() returned
60   // Position, and return the token at that position in the stream.
61   virtual FormatToken *setPosition(unsigned Position) = 0;
62 };
63 
64 namespace {
65 
66 class ScopedDeclarationState {
67 public:
68   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
69                          bool MustBeDeclaration)
70       : Line(Line), Stack(Stack) {
71     Line.MustBeDeclaration = MustBeDeclaration;
72     Stack.push_back(MustBeDeclaration);
73   }
74   ~ScopedDeclarationState() {
75     Stack.pop_back();
76     if (!Stack.empty())
77       Line.MustBeDeclaration = Stack.back();
78     else
79       Line.MustBeDeclaration = true;
80   }
81 
82 private:
83   UnwrappedLine &Line;
84   llvm::BitVector &Stack;
85 };
86 
87 static bool isLineComment(const FormatToken &FormatTok) {
88   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
89 }
90 
91 // Checks if \p FormatTok is a line comment that continues the line comment
92 // \p Previous. The original column of \p MinColumnToken is used to determine
93 // whether \p FormatTok is indented enough to the right to continue \p Previous.
94 static bool continuesLineComment(const FormatToken &FormatTok,
95                                  const FormatToken *Previous,
96                                  const FormatToken *MinColumnToken) {
97   if (!Previous || !MinColumnToken)
98     return false;
99   unsigned MinContinueColumn =
100       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
101   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
102          isLineComment(*Previous) &&
103          FormatTok.OriginalColumn >= MinContinueColumn;
104 }
105 
106 class ScopedMacroState : public FormatTokenSource {
107 public:
108   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
109                    FormatToken *&ResetToken)
110       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
111         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
112         Token(nullptr), PreviousToken(nullptr) {
113     FakeEOF.Tok.startToken();
114     FakeEOF.Tok.setKind(tok::eof);
115     TokenSource = this;
116     Line.Level = 0;
117     Line.InPPDirective = true;
118   }
119 
120   ~ScopedMacroState() override {
121     TokenSource = PreviousTokenSource;
122     ResetToken = Token;
123     Line.InPPDirective = false;
124     Line.Level = PreviousLineLevel;
125   }
126 
127   FormatToken *getNextToken() override {
128     // The \c UnwrappedLineParser guards against this by never calling
129     // \c getNextToken() after it has encountered the first eof token.
130     assert(!eof());
131     PreviousToken = Token;
132     Token = PreviousTokenSource->getNextToken();
133     if (eof())
134       return &FakeEOF;
135     return Token;
136   }
137 
138   FormatToken *getPreviousToken() override {
139     return PreviousTokenSource->getPreviousToken();
140   }
141 
142   FormatToken *peekNextToken() override {
143     if (eof())
144       return &FakeEOF;
145     return PreviousTokenSource->peekNextToken();
146   }
147 
148   FormatToken *peekNextToken(int N) override {
149     assert(N > 0);
150     if (eof())
151       return &FakeEOF;
152     return PreviousTokenSource->peekNextToken(N);
153   }
154 
155   bool isEOF() override { return PreviousTokenSource->isEOF(); }
156 
157   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
158 
159   FormatToken *setPosition(unsigned Position) override {
160     PreviousToken = nullptr;
161     Token = PreviousTokenSource->setPosition(Position);
162     return Token;
163   }
164 
165 private:
166   bool eof() {
167     return Token && Token->HasUnescapedNewline &&
168            !continuesLineComment(*Token, PreviousToken,
169                                  /*MinColumnToken=*/PreviousToken);
170   }
171 
172   FormatToken FakeEOF;
173   UnwrappedLine &Line;
174   FormatTokenSource *&TokenSource;
175   FormatToken *&ResetToken;
176   unsigned PreviousLineLevel;
177   FormatTokenSource *PreviousTokenSource;
178 
179   FormatToken *Token;
180   FormatToken *PreviousToken;
181 };
182 
183 } // end anonymous namespace
184 
185 class ScopedLineState {
186 public:
187   ScopedLineState(UnwrappedLineParser &Parser,
188                   bool SwitchToPreprocessorLines = false)
189       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
190     if (SwitchToPreprocessorLines)
191       Parser.CurrentLines = &Parser.PreprocessorDirectives;
192     else if (!Parser.Line->Tokens.empty())
193       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
194     PreBlockLine = std::move(Parser.Line);
195     Parser.Line = std::make_unique<UnwrappedLine>();
196     Parser.Line->Level = PreBlockLine->Level;
197     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
198   }
199 
200   ~ScopedLineState() {
201     if (!Parser.Line->Tokens.empty())
202       Parser.addUnwrappedLine();
203     assert(Parser.Line->Tokens.empty());
204     Parser.Line = std::move(PreBlockLine);
205     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
206       Parser.MustBreakBeforeNextToken = true;
207     Parser.CurrentLines = OriginalLines;
208   }
209 
210 private:
211   UnwrappedLineParser &Parser;
212 
213   std::unique_ptr<UnwrappedLine> PreBlockLine;
214   SmallVectorImpl<UnwrappedLine> *OriginalLines;
215 };
216 
217 class CompoundStatementIndenter {
218 public:
219   CompoundStatementIndenter(UnwrappedLineParser *Parser,
220                             const FormatStyle &Style, unsigned &LineLevel)
221       : CompoundStatementIndenter(Parser, LineLevel,
222                                   Style.BraceWrapping.AfterControlStatement,
223                                   Style.BraceWrapping.IndentBraces) {}
224   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
225                             bool WrapBrace, bool IndentBrace)
226       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
227     if (WrapBrace)
228       Parser->addUnwrappedLine();
229     if (IndentBrace)
230       ++LineLevel;
231   }
232   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
233 
234 private:
235   unsigned &LineLevel;
236   unsigned OldLineLevel;
237 };
238 
239 namespace {
240 
241 class IndexedTokenSource : public FormatTokenSource {
242 public:
243   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
244       : Tokens(Tokens), Position(-1) {}
245 
246   FormatToken *getNextToken() override {
247     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
248       LLVM_DEBUG({
249         llvm::dbgs() << "Next ";
250         dbgToken(Position);
251       });
252       return Tokens[Position];
253     }
254     ++Position;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Next ";
257       dbgToken(Position);
258     });
259     return Tokens[Position];
260   }
261 
262   FormatToken *getPreviousToken() override {
263     return Position > 0 ? Tokens[Position - 1] : nullptr;
264   }
265 
266   FormatToken *peekNextToken() override {
267     int Next = Position + 1;
268     LLVM_DEBUG({
269       llvm::dbgs() << "Peeking ";
270       dbgToken(Next);
271     });
272     return Tokens[Next];
273   }
274 
275   FormatToken *peekNextToken(int N) override {
276     assert(N > 0);
277     int Next = Position + N;
278     LLVM_DEBUG({
279       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
280       dbgToken(Next);
281     });
282     return Tokens[Next];
283   }
284 
285   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
286 
287   unsigned getPosition() override {
288     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
289     assert(Position >= 0);
290     return Position;
291   }
292 
293   FormatToken *setPosition(unsigned P) override {
294     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
295     Position = P;
296     return Tokens[Position];
297   }
298 
299   void reset() { Position = -1; }
300 
301 private:
302   void dbgToken(int Position, llvm::StringRef Indent = "") {
303     FormatToken *Tok = Tokens[Position];
304     llvm::dbgs() << Indent << "[" << Position
305                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
306                  << ", Macro: " << !!Tok->MacroCtx << "\n";
307   }
308 
309   ArrayRef<FormatToken *> Tokens;
310   int Position;
311 };
312 
313 } // end anonymous namespace
314 
315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
316                                          const AdditionalKeywords &Keywords,
317                                          unsigned FirstStartColumn,
318                                          ArrayRef<FormatToken *> Tokens,
319                                          UnwrappedLineConsumer &Callback)
320     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
321       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
322       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
323       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
324       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
325                        ? IG_Rejected
326                        : IG_Inited),
327       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
328 
329 void UnwrappedLineParser::reset() {
330   PPBranchLevel = -1;
331   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
332                      ? IG_Rejected
333                      : IG_Inited;
334   IncludeGuardToken = nullptr;
335   Line.reset(new UnwrappedLine);
336   CommentsBeforeNextToken.clear();
337   FormatTok = nullptr;
338   MustBreakBeforeNextToken = false;
339   PreprocessorDirectives.clear();
340   CurrentLines = &Lines;
341   DeclarationScopeStack.clear();
342   NestedTooDeep.clear();
343   PPStack.clear();
344   Line->FirstStartColumn = FirstStartColumn;
345 }
346 
347 void UnwrappedLineParser::parse() {
348   IndexedTokenSource TokenSource(AllTokens);
349   Line->FirstStartColumn = FirstStartColumn;
350   do {
351     LLVM_DEBUG(llvm::dbgs() << "----\n");
352     reset();
353     Tokens = &TokenSource;
354     TokenSource.reset();
355 
356     readToken();
357     parseFile();
358 
359     // If we found an include guard then all preprocessor directives (other than
360     // the guard) are over-indented by one.
361     if (IncludeGuard == IG_Found) {
362       for (auto &Line : Lines)
363         if (Line.InPPDirective && Line.Level > 0)
364           --Line.Level;
365     }
366 
367     // Create line with eof token.
368     pushToken(FormatTok);
369     addUnwrappedLine();
370 
371     for (const UnwrappedLine &Line : Lines)
372       Callback.consumeUnwrappedLine(Line);
373 
374     Callback.finishRun();
375     Lines.clear();
376     while (!PPLevelBranchIndex.empty() &&
377            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
378       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
379       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
380     }
381     if (!PPLevelBranchIndex.empty()) {
382       ++PPLevelBranchIndex.back();
383       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
384       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
385     }
386   } while (!PPLevelBranchIndex.empty());
387 }
388 
389 void UnwrappedLineParser::parseFile() {
390   // The top-level context in a file always has declarations, except for pre-
391   // processor directives and JavaScript files.
392   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
393   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
394                                           MustBeDeclaration);
395   if (Style.Language == FormatStyle::LK_TextProto)
396     parseBracedList();
397   else
398     parseLevel();
399   // Make sure to format the remaining tokens.
400   //
401   // LK_TextProto is special since its top-level is parsed as the body of a
402   // braced list, which does not necessarily have natural line separators such
403   // as a semicolon. Comments after the last entry that have been determined to
404   // not belong to that line, as in:
405   //   key: value
406   //   // endfile comment
407   // do not have a chance to be put on a line of their own until this point.
408   // Here we add this newline before end-of-file comments.
409   if (Style.Language == FormatStyle::LK_TextProto &&
410       !CommentsBeforeNextToken.empty()) {
411     addUnwrappedLine();
412   }
413   flushComments(true);
414   addUnwrappedLine();
415 }
416 
417 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
418   do {
419     switch (FormatTok->Tok.getKind()) {
420     case tok::l_brace:
421       return;
422     default:
423       if (FormatTok->is(Keywords.kw_where)) {
424         addUnwrappedLine();
425         nextToken();
426         parseCSharpGenericTypeConstraint();
427         break;
428       }
429       nextToken();
430       break;
431     }
432   } while (!eof());
433 }
434 
435 void UnwrappedLineParser::parseCSharpAttribute() {
436   int UnpairedSquareBrackets = 1;
437   do {
438     switch (FormatTok->Tok.getKind()) {
439     case tok::r_square:
440       nextToken();
441       --UnpairedSquareBrackets;
442       if (UnpairedSquareBrackets == 0) {
443         addUnwrappedLine();
444         return;
445       }
446       break;
447     case tok::l_square:
448       ++UnpairedSquareBrackets;
449       nextToken();
450       break;
451     default:
452       nextToken();
453       break;
454     }
455   } while (!eof());
456 }
457 
458 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
459   if (!Lines.empty() && Lines.back().InPPDirective)
460     return true;
461 
462   const FormatToken *Previous = Tokens->getPreviousToken();
463   return Previous && Previous->is(tok::comment) &&
464          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
465 }
466 
467 /// \brief Parses a level, that is ???.
468 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
469 /// \param CanContainBracedList If the content can contain (at any level) a
470 /// braced list.
471 /// \param NextLBracesType The type for left brace found in this level.
472 /// \param IfKind The if statement kind in the level.
473 /// \returns true if a simple block of if/else/for/while, or false otherwise.
474 /// (A simple block has a single statement.)
475 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
476                                      bool CanContainBracedList,
477                                      TokenType NextLBracesType,
478                                      IfStmtKind *IfKind) {
479   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
480                                   ? TT_BracedListLBrace
481                                   : TT_Unknown;
482   const bool IsPrecededByCommentOrPPDirective =
483       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
484   bool HasDoWhile = false;
485   bool HasLabel = false;
486   unsigned StatementCount = 0;
487   bool SwitchLabelEncountered = false;
488 
489   do {
490     if (FormatTok->getType() == TT_AttributeMacro) {
491       nextToken();
492       continue;
493     }
494     tok::TokenKind kind = FormatTok->Tok.getKind();
495     if (FormatTok->getType() == TT_MacroBlockBegin)
496       kind = tok::l_brace;
497     else if (FormatTok->getType() == TT_MacroBlockEnd)
498       kind = tok::r_brace;
499 
500     auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
501                          &HasDoWhile, &HasLabel, &StatementCount] {
502       parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
503                              HasDoWhile ? nullptr : &HasDoWhile,
504                              HasLabel ? nullptr : &HasLabel);
505       ++StatementCount;
506       assert(StatementCount > 0 && "StatementCount overflow!");
507     };
508 
509     switch (kind) {
510     case tok::comment:
511       nextToken();
512       addUnwrappedLine();
513       break;
514     case tok::l_brace:
515       if (NextLBracesType != TT_Unknown) {
516         FormatTok->setFinalizedType(NextLBracesType);
517       } else if (FormatTok->Previous &&
518                  FormatTok->Previous->ClosesRequiresClause) {
519         // We need the 'default' case here to correctly parse a function
520         // l_brace.
521         ParseDefault();
522         continue;
523       }
524       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
525           tryToParseBracedList()) {
526         continue;
527       }
528       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
529                  /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
530                  /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
531                  NextLBracesType);
532       ++StatementCount;
533       assert(StatementCount > 0 && "StatementCount overflow!");
534       addUnwrappedLine();
535       break;
536     case tok::r_brace:
537       if (OpeningBrace) {
538         if (!Style.RemoveBracesLLVM ||
539             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
540           return false;
541         }
542         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
543             HasDoWhile || IsPrecededByCommentOrPPDirective ||
544             precededByCommentOrPPDirective()) {
545           return false;
546         }
547         const FormatToken *Next = Tokens->peekNextToken();
548         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
549       }
550       nextToken();
551       addUnwrappedLine();
552       break;
553     case tok::kw_default: {
554       unsigned StoredPosition = Tokens->getPosition();
555       FormatToken *Next;
556       do {
557         Next = Tokens->getNextToken();
558         assert(Next);
559       } while (Next->is(tok::comment));
560       FormatTok = Tokens->setPosition(StoredPosition);
561       if (Next->isNot(tok::colon)) {
562         // default not followed by ':' is not a case label; treat it like
563         // an identifier.
564         parseStructuralElement();
565         break;
566       }
567       // Else, if it is 'default:', fall through to the case handling.
568       LLVM_FALLTHROUGH;
569     }
570     case tok::kw_case:
571       if (Style.isJavaScript() && Line->MustBeDeclaration) {
572         // A 'case: string' style field declaration.
573         parseStructuralElement();
574         break;
575       }
576       if (!SwitchLabelEncountered &&
577           (Style.IndentCaseLabels ||
578            (Line->InPPDirective && Line->Level == 1))) {
579         ++Line->Level;
580       }
581       SwitchLabelEncountered = true;
582       parseStructuralElement();
583       break;
584     case tok::l_square:
585       if (Style.isCSharp()) {
586         nextToken();
587         parseCSharpAttribute();
588         break;
589       }
590       if (handleCppAttributes())
591         break;
592       LLVM_FALLTHROUGH;
593     default:
594       ParseDefault();
595       break;
596     }
597   } while (!eof());
598 
599   return false;
600 }
601 
602 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
603   // We'll parse forward through the tokens until we hit
604   // a closing brace or eof - note that getNextToken() will
605   // parse macros, so this will magically work inside macro
606   // definitions, too.
607   unsigned StoredPosition = Tokens->getPosition();
608   FormatToken *Tok = FormatTok;
609   const FormatToken *PrevTok = Tok->Previous;
610   // Keep a stack of positions of lbrace tokens. We will
611   // update information about whether an lbrace starts a
612   // braced init list or a different block during the loop.
613   SmallVector<FormatToken *, 8> LBraceStack;
614   assert(Tok->is(tok::l_brace));
615   do {
616     // Get next non-comment token.
617     FormatToken *NextTok;
618     do {
619       NextTok = Tokens->getNextToken();
620     } while (NextTok->is(tok::comment));
621 
622     switch (Tok->Tok.getKind()) {
623     case tok::l_brace:
624       if (Style.isJavaScript() && PrevTok) {
625         if (PrevTok->isOneOf(tok::colon, tok::less)) {
626           // A ':' indicates this code is in a type, or a braced list
627           // following a label in an object literal ({a: {b: 1}}).
628           // A '<' could be an object used in a comparison, but that is nonsense
629           // code (can never return true), so more likely it is a generic type
630           // argument (`X<{a: string; b: number}>`).
631           // The code below could be confused by semicolons between the
632           // individual members in a type member list, which would normally
633           // trigger BK_Block. In both cases, this must be parsed as an inline
634           // braced init.
635           Tok->setBlockKind(BK_BracedInit);
636         } else if (PrevTok->is(tok::r_paren)) {
637           // `) { }` can only occur in function or method declarations in JS.
638           Tok->setBlockKind(BK_Block);
639         }
640       } else {
641         Tok->setBlockKind(BK_Unknown);
642       }
643       LBraceStack.push_back(Tok);
644       break;
645     case tok::r_brace:
646       if (LBraceStack.empty())
647         break;
648       if (LBraceStack.back()->is(BK_Unknown)) {
649         bool ProbablyBracedList = false;
650         if (Style.Language == FormatStyle::LK_Proto) {
651           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
652         } else {
653           // Skip NextTok over preprocessor lines, otherwise we may not
654           // properly diagnose the block as a braced intializer
655           // if the comma separator appears after the pp directive.
656           while (NextTok->is(tok::hash)) {
657             ScopedMacroState MacroState(*Line, Tokens, NextTok);
658             do {
659               NextTok = Tokens->getNextToken();
660             } while (NextTok->isNot(tok::eof));
661           }
662 
663           // Using OriginalColumn to distinguish between ObjC methods and
664           // binary operators is a bit hacky.
665           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
666                                   NextTok->OriginalColumn == 0;
667 
668           // Try to detect a braced list. Note that regardless how we mark inner
669           // braces here, we will overwrite the BlockKind later if we parse a
670           // braced list (where all blocks inside are by default braced lists),
671           // or when we explicitly detect blocks (for example while parsing
672           // lambdas).
673 
674           // If we already marked the opening brace as braced list, the closing
675           // must also be part of it.
676           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
677 
678           ProbablyBracedList = ProbablyBracedList ||
679                                (Style.isJavaScript() &&
680                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
681                                                  Keywords.kw_as));
682           ProbablyBracedList = ProbablyBracedList ||
683                                (Style.isCpp() && NextTok->is(tok::l_paren));
684 
685           // If there is a comma, semicolon or right paren after the closing
686           // brace, we assume this is a braced initializer list.
687           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
688           // braced list in JS.
689           ProbablyBracedList =
690               ProbablyBracedList ||
691               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
692                                tok::r_paren, tok::r_square, tok::l_brace,
693                                tok::ellipsis);
694 
695           ProbablyBracedList =
696               ProbablyBracedList ||
697               (NextTok->is(tok::identifier) &&
698                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
699 
700           ProbablyBracedList = ProbablyBracedList ||
701                                (NextTok->is(tok::semi) &&
702                                 (!ExpectClassBody || LBraceStack.size() != 1));
703 
704           ProbablyBracedList =
705               ProbablyBracedList ||
706               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
707 
708           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
709             // We can have an array subscript after a braced init
710             // list, but C++11 attributes are expected after blocks.
711             NextTok = Tokens->getNextToken();
712             ProbablyBracedList = NextTok->isNot(tok::l_square);
713           }
714         }
715         if (ProbablyBracedList) {
716           Tok->setBlockKind(BK_BracedInit);
717           LBraceStack.back()->setBlockKind(BK_BracedInit);
718         } else {
719           Tok->setBlockKind(BK_Block);
720           LBraceStack.back()->setBlockKind(BK_Block);
721         }
722       }
723       LBraceStack.pop_back();
724       break;
725     case tok::identifier:
726       if (!Tok->is(TT_StatementMacro))
727         break;
728       LLVM_FALLTHROUGH;
729     case tok::at:
730     case tok::semi:
731     case tok::kw_if:
732     case tok::kw_while:
733     case tok::kw_for:
734     case tok::kw_switch:
735     case tok::kw_try:
736     case tok::kw___try:
737       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
738         LBraceStack.back()->setBlockKind(BK_Block);
739       break;
740     default:
741       break;
742     }
743     PrevTok = Tok;
744     Tok = NextTok;
745   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
746 
747   // Assume other blocks for all unclosed opening braces.
748   for (FormatToken *LBrace : LBraceStack)
749     if (LBrace->is(BK_Unknown))
750       LBrace->setBlockKind(BK_Block);
751 
752   FormatTok = Tokens->setPosition(StoredPosition);
753 }
754 
755 template <class T>
756 static inline void hash_combine(std::size_t &seed, const T &v) {
757   std::hash<T> hasher;
758   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
759 }
760 
761 size_t UnwrappedLineParser::computePPHash() const {
762   size_t h = 0;
763   for (const auto &i : PPStack) {
764     hash_combine(h, size_t(i.Kind));
765     hash_combine(h, i.Line);
766   }
767   return h;
768 }
769 
770 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
771 // is not null, subtracts its length (plus the preceding space) when computing
772 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
773 // running the token annotator on it so that we can restore them afterward.
774 bool UnwrappedLineParser::mightFitOnOneLine(
775     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
776   const auto ColumnLimit = Style.ColumnLimit;
777   if (ColumnLimit == 0)
778     return true;
779 
780   auto &Tokens = ParsedLine.Tokens;
781   assert(!Tokens.empty());
782 
783   const auto *LastToken = Tokens.back().Tok;
784   assert(LastToken);
785 
786   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
787 
788   int Index = 0;
789   for (const auto &Token : Tokens) {
790     assert(Token.Tok);
791     auto &SavedToken = SavedTokens[Index++];
792     SavedToken.Tok = new FormatToken;
793     SavedToken.Tok->copyFrom(*Token.Tok);
794     SavedToken.Children = std::move(Token.Children);
795   }
796 
797   AnnotatedLine Line(ParsedLine);
798   assert(Line.Last == LastToken);
799 
800   TokenAnnotator Annotator(Style, Keywords);
801   Annotator.annotate(Line);
802   Annotator.calculateFormattingInformation(Line);
803 
804   auto Length = LastToken->TotalLength;
805   if (OpeningBrace) {
806     assert(OpeningBrace != Tokens.front().Tok);
807     Length -= OpeningBrace->TokenText.size() + 1;
808   }
809 
810   Index = 0;
811   for (auto &Token : Tokens) {
812     const auto &SavedToken = SavedTokens[Index++];
813     Token.Tok->copyFrom(*SavedToken.Tok);
814     Token.Children = std::move(SavedToken.Children);
815     delete SavedToken.Tok;
816   }
817 
818   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
819 }
820 
821 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
822                                      bool MunchSemi, bool KeepBraces,
823                                      IfStmtKind *IfKind,
824                                      bool UnindentWhitesmithsBraces,
825                                      bool CanContainBracedList,
826                                      TokenType NextLBracesType) {
827   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
828          "'{' or macro block token expected");
829   FormatToken *Tok = FormatTok;
830   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
831   auto Index = CurrentLines->size();
832   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
833   FormatTok->setBlockKind(BK_Block);
834 
835   // For Whitesmiths mode, jump to the next level prior to skipping over the
836   // braces.
837   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
838     ++Line->Level;
839 
840   size_t PPStartHash = computePPHash();
841 
842   unsigned InitialLevel = Line->Level;
843   nextToken(/*LevelDifference=*/AddLevels);
844 
845   // Bail out if there are too many levels. Otherwise, the stack might overflow.
846   if (Line->Level > 300)
847     return;
848 
849   if (MacroBlock && FormatTok->is(tok::l_paren))
850     parseParens();
851 
852   size_t NbPreprocessorDirectives =
853       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
854   addUnwrappedLine();
855   size_t OpeningLineIndex =
856       CurrentLines->empty()
857           ? (UnwrappedLine::kInvalidIndex)
858           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
859 
860   // Whitesmiths is weird here. The brace needs to be indented for the namespace
861   // block, but the block itself may not be indented depending on the style
862   // settings. This allows the format to back up one level in those cases.
863   if (UnindentWhitesmithsBraces)
864     --Line->Level;
865 
866   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
867                                           MustBeDeclaration);
868   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
869     Line->Level += AddLevels;
870 
871   const bool SimpleBlock =
872       parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind);
873 
874   if (eof())
875     return;
876 
877   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
878                  : !FormatTok->is(tok::r_brace)) {
879     Line->Level = InitialLevel;
880     FormatTok->setBlockKind(BK_Block);
881     return;
882   }
883 
884   auto RemoveBraces = [=]() mutable {
885     if (KeepBraces || !SimpleBlock)
886       return false;
887     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
888     assert(FormatTok->is(tok::r_brace));
889     const bool WrappedOpeningBrace = !Tok->Previous;
890     if (WrappedOpeningBrace && FollowedByComment)
891       return false;
892     const FormatToken *Previous = Tokens->getPreviousToken();
893     assert(Previous);
894     if (Previous->is(tok::r_brace) && !Previous->Optional)
895       return false;
896     assert(!CurrentLines->empty());
897     if (!mightFitOnOneLine(CurrentLines->back()))
898       return false;
899     if (Tok->is(TT_ElseLBrace))
900       return true;
901     if (WrappedOpeningBrace) {
902       assert(Index > 0);
903       --Index; // The line above the wrapped l_brace.
904       Tok = nullptr;
905     }
906     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
907   };
908   if (RemoveBraces()) {
909     Tok->MatchingParen = FormatTok;
910     FormatTok->MatchingParen = Tok;
911   }
912 
913   size_t PPEndHash = computePPHash();
914 
915   // Munch the closing brace.
916   nextToken(/*LevelDifference=*/-AddLevels);
917 
918   if (MacroBlock && FormatTok->is(tok::l_paren))
919     parseParens();
920 
921   if (FormatTok->is(tok::kw_noexcept)) {
922     // A noexcept in a requires expression.
923     nextToken();
924   }
925 
926   if (FormatTok->is(tok::arrow)) {
927     // Following the } or noexcept we can find a trailing return type arrow
928     // as part of an implicit conversion constraint.
929     nextToken();
930     parseStructuralElement();
931   }
932 
933   if (MunchSemi && FormatTok->is(tok::semi))
934     nextToken();
935 
936   Line->Level = InitialLevel;
937 
938   if (PPStartHash == PPEndHash) {
939     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
940     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
941       // Update the opening line to add the forward reference as well
942       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
943           CurrentLines->size() - 1;
944     }
945   }
946 }
947 
948 static bool isGoogScope(const UnwrappedLine &Line) {
949   // FIXME: Closure-library specific stuff should not be hard-coded but be
950   // configurable.
951   if (Line.Tokens.size() < 4)
952     return false;
953   auto I = Line.Tokens.begin();
954   if (I->Tok->TokenText != "goog")
955     return false;
956   ++I;
957   if (I->Tok->isNot(tok::period))
958     return false;
959   ++I;
960   if (I->Tok->TokenText != "scope")
961     return false;
962   ++I;
963   return I->Tok->is(tok::l_paren);
964 }
965 
966 static bool isIIFE(const UnwrappedLine &Line,
967                    const AdditionalKeywords &Keywords) {
968   // Look for the start of an immediately invoked anonymous function.
969   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
970   // This is commonly done in JavaScript to create a new, anonymous scope.
971   // Example: (function() { ... })()
972   if (Line.Tokens.size() < 3)
973     return false;
974   auto I = Line.Tokens.begin();
975   if (I->Tok->isNot(tok::l_paren))
976     return false;
977   ++I;
978   if (I->Tok->isNot(Keywords.kw_function))
979     return false;
980   ++I;
981   return I->Tok->is(tok::l_paren);
982 }
983 
984 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
985                                    const FormatToken &InitialToken) {
986   tok::TokenKind Kind = InitialToken.Tok.getKind();
987   if (InitialToken.is(TT_NamespaceMacro))
988     Kind = tok::kw_namespace;
989 
990   switch (Kind) {
991   case tok::kw_namespace:
992     return Style.BraceWrapping.AfterNamespace;
993   case tok::kw_class:
994     return Style.BraceWrapping.AfterClass;
995   case tok::kw_union:
996     return Style.BraceWrapping.AfterUnion;
997   case tok::kw_struct:
998     return Style.BraceWrapping.AfterStruct;
999   case tok::kw_enum:
1000     return Style.BraceWrapping.AfterEnum;
1001   default:
1002     return false;
1003   }
1004 }
1005 
1006 void UnwrappedLineParser::parseChildBlock(
1007     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
1008   assert(FormatTok->is(tok::l_brace));
1009   FormatTok->setBlockKind(BK_Block);
1010   const FormatToken *OpeningBrace = FormatTok;
1011   nextToken();
1012   {
1013     bool SkipIndent = (Style.isJavaScript() &&
1014                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
1015     ScopedLineState LineState(*this);
1016     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
1017                                             /*MustBeDeclaration=*/false);
1018     Line->Level += SkipIndent ? 0 : 1;
1019     parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
1020     flushComments(isOnNewLine(*FormatTok));
1021     Line->Level -= SkipIndent ? 0 : 1;
1022   }
1023   nextToken();
1024 }
1025 
1026 void UnwrappedLineParser::parsePPDirective() {
1027   assert(FormatTok->is(tok::hash) && "'#' expected");
1028   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1029 
1030   nextToken();
1031 
1032   if (!FormatTok->Tok.getIdentifierInfo()) {
1033     parsePPUnknown();
1034     return;
1035   }
1036 
1037   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1038   case tok::pp_define:
1039     parsePPDefine();
1040     return;
1041   case tok::pp_if:
1042     parsePPIf(/*IfDef=*/false);
1043     break;
1044   case tok::pp_ifdef:
1045   case tok::pp_ifndef:
1046     parsePPIf(/*IfDef=*/true);
1047     break;
1048   case tok::pp_else:
1049     parsePPElse();
1050     break;
1051   case tok::pp_elifdef:
1052   case tok::pp_elifndef:
1053   case tok::pp_elif:
1054     parsePPElIf();
1055     break;
1056   case tok::pp_endif:
1057     parsePPEndIf();
1058     break;
1059   default:
1060     parsePPUnknown();
1061     break;
1062   }
1063 }
1064 
1065 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1066   size_t Line = CurrentLines->size();
1067   if (CurrentLines == &PreprocessorDirectives)
1068     Line += Lines.size();
1069 
1070   if (Unreachable ||
1071       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1072     PPStack.push_back({PP_Unreachable, Line});
1073   } else {
1074     PPStack.push_back({PP_Conditional, Line});
1075   }
1076 }
1077 
1078 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1079   ++PPBranchLevel;
1080   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1081   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1082     PPLevelBranchIndex.push_back(0);
1083     PPLevelBranchCount.push_back(0);
1084   }
1085   PPChainBranchIndex.push(0);
1086   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1087   conditionalCompilationCondition(Unreachable || Skip);
1088 }
1089 
1090 void UnwrappedLineParser::conditionalCompilationAlternative() {
1091   if (!PPStack.empty())
1092     PPStack.pop_back();
1093   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1094   if (!PPChainBranchIndex.empty())
1095     ++PPChainBranchIndex.top();
1096   conditionalCompilationCondition(
1097       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1098       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1099 }
1100 
1101 void UnwrappedLineParser::conditionalCompilationEnd() {
1102   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1103   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1104     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1105       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1106   }
1107   // Guard against #endif's without #if.
1108   if (PPBranchLevel > -1)
1109     --PPBranchLevel;
1110   if (!PPChainBranchIndex.empty())
1111     PPChainBranchIndex.pop();
1112   if (!PPStack.empty())
1113     PPStack.pop_back();
1114 }
1115 
1116 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1117   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1118   nextToken();
1119   bool Unreachable = false;
1120   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1121     Unreachable = true;
1122   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1123     Unreachable = true;
1124   conditionalCompilationStart(Unreachable);
1125   FormatToken *IfCondition = FormatTok;
1126   // If there's a #ifndef on the first line, and the only lines before it are
1127   // comments, it could be an include guard.
1128   bool MaybeIncludeGuard = IfNDef;
1129   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1130     for (auto &Line : Lines) {
1131       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1132         MaybeIncludeGuard = false;
1133         IncludeGuard = IG_Rejected;
1134         break;
1135       }
1136     }
1137   }
1138   --PPBranchLevel;
1139   parsePPUnknown();
1140   ++PPBranchLevel;
1141   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1142     IncludeGuard = IG_IfNdefed;
1143     IncludeGuardToken = IfCondition;
1144   }
1145 }
1146 
1147 void UnwrappedLineParser::parsePPElse() {
1148   // If a potential include guard has an #else, it's not an include guard.
1149   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1150     IncludeGuard = IG_Rejected;
1151   conditionalCompilationAlternative();
1152   if (PPBranchLevel > -1)
1153     --PPBranchLevel;
1154   parsePPUnknown();
1155   ++PPBranchLevel;
1156 }
1157 
1158 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1159 
1160 void UnwrappedLineParser::parsePPEndIf() {
1161   conditionalCompilationEnd();
1162   parsePPUnknown();
1163   // If the #endif of a potential include guard is the last thing in the file,
1164   // then we found an include guard.
1165   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1166       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1167     IncludeGuard = IG_Found;
1168   }
1169 }
1170 
1171 void UnwrappedLineParser::parsePPDefine() {
1172   nextToken();
1173 
1174   if (!FormatTok->Tok.getIdentifierInfo()) {
1175     IncludeGuard = IG_Rejected;
1176     IncludeGuardToken = nullptr;
1177     parsePPUnknown();
1178     return;
1179   }
1180 
1181   if (IncludeGuard == IG_IfNdefed &&
1182       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1183     IncludeGuard = IG_Defined;
1184     IncludeGuardToken = nullptr;
1185     for (auto &Line : Lines) {
1186       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1187         IncludeGuard = IG_Rejected;
1188         break;
1189       }
1190     }
1191   }
1192 
1193   // In the context of a define, even keywords should be treated as normal
1194   // identifiers. Setting the kind to identifier is not enough, because we need
1195   // to treat additional keywords like __except as well, which are already
1196   // identifiers. Setting the identifier info to null interferes with include
1197   // guard processing above, and changes preprocessing nesting.
1198   FormatTok->Tok.setKind(tok::identifier);
1199   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1200   nextToken();
1201   if (FormatTok->Tok.getKind() == tok::l_paren &&
1202       !FormatTok->hasWhitespaceBefore()) {
1203     parseParens();
1204   }
1205   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1206     Line->Level += PPBranchLevel + 1;
1207   addUnwrappedLine();
1208   ++Line->Level;
1209 
1210   // Errors during a preprocessor directive can only affect the layout of the
1211   // preprocessor directive, and thus we ignore them. An alternative approach
1212   // would be to use the same approach we use on the file level (no
1213   // re-indentation if there was a structural error) within the macro
1214   // definition.
1215   parseFile();
1216 }
1217 
1218 void UnwrappedLineParser::parsePPUnknown() {
1219   do {
1220     nextToken();
1221   } while (!eof());
1222   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1223     Line->Level += PPBranchLevel + 1;
1224   addUnwrappedLine();
1225 }
1226 
1227 // Here we exclude certain tokens that are not usually the first token in an
1228 // unwrapped line. This is used in attempt to distinguish macro calls without
1229 // trailing semicolons from other constructs split to several lines.
1230 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1231   // Semicolon can be a null-statement, l_square can be a start of a macro or
1232   // a C++11 attribute, but this doesn't seem to be common.
1233   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1234          Tok.isNot(TT_AttributeSquare) &&
1235          // Tokens that can only be used as binary operators and a part of
1236          // overloaded operator names.
1237          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1238          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1239          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1240          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1241          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1242          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1243          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1244          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1245          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1246          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1247          Tok.isNot(tok::lesslessequal) &&
1248          // Colon is used in labels, base class lists, initializer lists,
1249          // range-based for loops, ternary operator, but should never be the
1250          // first token in an unwrapped line.
1251          Tok.isNot(tok::colon) &&
1252          // 'noexcept' is a trailing annotation.
1253          Tok.isNot(tok::kw_noexcept);
1254 }
1255 
1256 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1257                           const FormatToken *FormatTok) {
1258   // FIXME: This returns true for C/C++ keywords like 'struct'.
1259   return FormatTok->is(tok::identifier) &&
1260          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1261           !FormatTok->isOneOf(
1262               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1263               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1264               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1265               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1266               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1267               Keywords.kw_instanceof, Keywords.kw_interface,
1268               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1269 }
1270 
1271 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1272                                  const FormatToken *FormatTok) {
1273   return FormatTok->Tok.isLiteral() ||
1274          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1275          mustBeJSIdent(Keywords, FormatTok);
1276 }
1277 
1278 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1279 // when encountered after a value (see mustBeJSIdentOrValue).
1280 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1281                            const FormatToken *FormatTok) {
1282   return FormatTok->isOneOf(
1283       tok::kw_return, Keywords.kw_yield,
1284       // conditionals
1285       tok::kw_if, tok::kw_else,
1286       // loops
1287       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1288       // switch/case
1289       tok::kw_switch, tok::kw_case,
1290       // exceptions
1291       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1292       // declaration
1293       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1294       Keywords.kw_async, Keywords.kw_function,
1295       // import/export
1296       Keywords.kw_import, tok::kw_export);
1297 }
1298 
1299 // Checks whether a token is a type in K&R C (aka C78).
1300 static bool isC78Type(const FormatToken &Tok) {
1301   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1302                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1303                      tok::identifier);
1304 }
1305 
1306 // This function checks whether a token starts the first parameter declaration
1307 // in a K&R C (aka C78) function definition, e.g.:
1308 //   int f(a, b)
1309 //   short a, b;
1310 //   {
1311 //      return a + b;
1312 //   }
1313 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1314                                const FormatToken *FuncName) {
1315   assert(Tok);
1316   assert(Next);
1317   assert(FuncName);
1318 
1319   if (FuncName->isNot(tok::identifier))
1320     return false;
1321 
1322   const FormatToken *Prev = FuncName->Previous;
1323   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1324     return false;
1325 
1326   if (!isC78Type(*Tok) &&
1327       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1328     return false;
1329   }
1330 
1331   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1332     return false;
1333 
1334   Tok = Tok->Previous;
1335   if (!Tok || Tok->isNot(tok::r_paren))
1336     return false;
1337 
1338   Tok = Tok->Previous;
1339   if (!Tok || Tok->isNot(tok::identifier))
1340     return false;
1341 
1342   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1343 }
1344 
1345 void UnwrappedLineParser::parseModuleImport() {
1346   nextToken();
1347   while (!eof()) {
1348     if (FormatTok->is(tok::colon)) {
1349       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1350     }
1351     // Handle import <foo/bar.h> as we would an include statement.
1352     else if (FormatTok->is(tok::less)) {
1353       nextToken();
1354       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1355         // Mark tokens up to the trailing line comments as implicit string
1356         // literals.
1357         if (FormatTok->isNot(tok::comment) &&
1358             !FormatTok->TokenText.startswith("//")) {
1359           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1360         }
1361         nextToken();
1362       }
1363     }
1364     if (FormatTok->is(tok::semi)) {
1365       nextToken();
1366       break;
1367     }
1368     nextToken();
1369   }
1370 
1371   addUnwrappedLine();
1372 }
1373 
1374 // readTokenWithJavaScriptASI reads the next token and terminates the current
1375 // line if JavaScript Automatic Semicolon Insertion must
1376 // happen between the current token and the next token.
1377 //
1378 // This method is conservative - it cannot cover all edge cases of JavaScript,
1379 // but only aims to correctly handle certain well known cases. It *must not*
1380 // return true in speculative cases.
1381 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1382   FormatToken *Previous = FormatTok;
1383   readToken();
1384   FormatToken *Next = FormatTok;
1385 
1386   bool IsOnSameLine =
1387       CommentsBeforeNextToken.empty()
1388           ? Next->NewlinesBefore == 0
1389           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1390   if (IsOnSameLine)
1391     return;
1392 
1393   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1394   bool PreviousStartsTemplateExpr =
1395       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1396   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1397     // If the line contains an '@' sign, the previous token might be an
1398     // annotation, which can precede another identifier/value.
1399     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1400       return LineNode.Tok->is(tok::at);
1401     });
1402     if (HasAt)
1403       return;
1404   }
1405   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1406     return addUnwrappedLine();
1407   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1408   bool NextEndsTemplateExpr =
1409       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1410   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1411       (PreviousMustBeValue ||
1412        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1413                          tok::minusminus))) {
1414     return addUnwrappedLine();
1415   }
1416   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1417       isJSDeclOrStmt(Keywords, Next)) {
1418     return addUnwrappedLine();
1419   }
1420 }
1421 
1422 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel,
1423                                                  TokenType NextLBracesType,
1424                                                  IfStmtKind *IfKind,
1425                                                  bool *HasDoWhile,
1426                                                  bool *HasLabel) {
1427   if (Style.Language == FormatStyle::LK_TableGen &&
1428       FormatTok->is(tok::pp_include)) {
1429     nextToken();
1430     if (FormatTok->is(tok::string_literal))
1431       nextToken();
1432     addUnwrappedLine();
1433     return;
1434   }
1435   switch (FormatTok->Tok.getKind()) {
1436   case tok::kw_asm:
1437     nextToken();
1438     if (FormatTok->is(tok::l_brace)) {
1439       FormatTok->setFinalizedType(TT_InlineASMBrace);
1440       nextToken();
1441       while (FormatTok && FormatTok->isNot(tok::eof)) {
1442         if (FormatTok->is(tok::r_brace)) {
1443           FormatTok->setFinalizedType(TT_InlineASMBrace);
1444           nextToken();
1445           addUnwrappedLine();
1446           break;
1447         }
1448         FormatTok->Finalized = true;
1449         nextToken();
1450       }
1451     }
1452     break;
1453   case tok::kw_namespace:
1454     parseNamespace();
1455     return;
1456   case tok::kw_public:
1457   case tok::kw_protected:
1458   case tok::kw_private:
1459     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1460         Style.isCSharp()) {
1461       nextToken();
1462     } else {
1463       parseAccessSpecifier();
1464     }
1465     return;
1466   case tok::kw_if:
1467     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1468       // field/method declaration.
1469       break;
1470     }
1471     parseIfThenElse(IfKind);
1472     return;
1473   case tok::kw_for:
1474   case tok::kw_while:
1475     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1476       // field/method declaration.
1477       break;
1478     }
1479     parseForOrWhileLoop();
1480     return;
1481   case tok::kw_do:
1482     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1483       // field/method declaration.
1484       break;
1485     }
1486     parseDoWhile();
1487     if (HasDoWhile)
1488       *HasDoWhile = true;
1489     return;
1490   case tok::kw_switch:
1491     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1492       // 'switch: string' field declaration.
1493       break;
1494     }
1495     parseSwitch();
1496     return;
1497   case tok::kw_default:
1498     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1499       // 'default: string' field declaration.
1500       break;
1501     }
1502     nextToken();
1503     if (FormatTok->is(tok::colon)) {
1504       parseLabel();
1505       return;
1506     }
1507     // e.g. "default void f() {}" in a Java interface.
1508     break;
1509   case tok::kw_case:
1510     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1511       // 'case: string' field declaration.
1512       nextToken();
1513       break;
1514     }
1515     parseCaseLabel();
1516     return;
1517   case tok::kw_try:
1518   case tok::kw___try:
1519     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1520       // field/method declaration.
1521       break;
1522     }
1523     parseTryCatch();
1524     return;
1525   case tok::kw_extern:
1526     nextToken();
1527     if (FormatTok->is(tok::string_literal)) {
1528       nextToken();
1529       if (FormatTok->is(tok::l_brace)) {
1530         if (Style.BraceWrapping.AfterExternBlock)
1531           addUnwrappedLine();
1532         // Either we indent or for backwards compatibility we follow the
1533         // AfterExternBlock style.
1534         unsigned AddLevels =
1535             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1536                     (Style.BraceWrapping.AfterExternBlock &&
1537                      Style.IndentExternBlock ==
1538                          FormatStyle::IEBS_AfterExternBlock)
1539                 ? 1u
1540                 : 0u;
1541         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1542         addUnwrappedLine();
1543         return;
1544       }
1545     }
1546     break;
1547   case tok::kw_export:
1548     if (Style.isJavaScript()) {
1549       parseJavaScriptEs6ImportExport();
1550       return;
1551     }
1552     if (!Style.isCpp())
1553       break;
1554     // Handle C++ "(inline|export) namespace".
1555     LLVM_FALLTHROUGH;
1556   case tok::kw_inline:
1557     nextToken();
1558     if (FormatTok->is(tok::kw_namespace)) {
1559       parseNamespace();
1560       return;
1561     }
1562     break;
1563   case tok::identifier:
1564     if (FormatTok->is(TT_ForEachMacro)) {
1565       parseForOrWhileLoop();
1566       return;
1567     }
1568     if (FormatTok->is(TT_MacroBlockBegin)) {
1569       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1570                  /*MunchSemi=*/false);
1571       return;
1572     }
1573     if (FormatTok->is(Keywords.kw_import)) {
1574       if (Style.isJavaScript()) {
1575         parseJavaScriptEs6ImportExport();
1576         return;
1577       }
1578       if (Style.Language == FormatStyle::LK_Proto) {
1579         nextToken();
1580         if (FormatTok->is(tok::kw_public))
1581           nextToken();
1582         if (!FormatTok->is(tok::string_literal))
1583           return;
1584         nextToken();
1585         if (FormatTok->is(tok::semi))
1586           nextToken();
1587         addUnwrappedLine();
1588         return;
1589       }
1590       if (Style.isCpp()) {
1591         parseModuleImport();
1592         return;
1593       }
1594     }
1595     if (Style.isCpp() &&
1596         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1597                            Keywords.kw_slots, Keywords.kw_qslots)) {
1598       nextToken();
1599       if (FormatTok->is(tok::colon)) {
1600         nextToken();
1601         addUnwrappedLine();
1602         return;
1603       }
1604     }
1605     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1606       parseStatementMacro();
1607       return;
1608     }
1609     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1610       parseNamespace();
1611       return;
1612     }
1613     // In all other cases, parse the declaration.
1614     break;
1615   default:
1616     break;
1617   }
1618   do {
1619     const FormatToken *Previous = FormatTok->Previous;
1620     switch (FormatTok->Tok.getKind()) {
1621     case tok::at:
1622       nextToken();
1623       if (FormatTok->is(tok::l_brace)) {
1624         nextToken();
1625         parseBracedList();
1626         break;
1627       } else if (Style.Language == FormatStyle::LK_Java &&
1628                  FormatTok->is(Keywords.kw_interface)) {
1629         nextToken();
1630         break;
1631       }
1632       switch (FormatTok->Tok.getObjCKeywordID()) {
1633       case tok::objc_public:
1634       case tok::objc_protected:
1635       case tok::objc_package:
1636       case tok::objc_private:
1637         return parseAccessSpecifier();
1638       case tok::objc_interface:
1639       case tok::objc_implementation:
1640         return parseObjCInterfaceOrImplementation();
1641       case tok::objc_protocol:
1642         if (parseObjCProtocol())
1643           return;
1644         break;
1645       case tok::objc_end:
1646         return; // Handled by the caller.
1647       case tok::objc_optional:
1648       case tok::objc_required:
1649         nextToken();
1650         addUnwrappedLine();
1651         return;
1652       case tok::objc_autoreleasepool:
1653         nextToken();
1654         if (FormatTok->is(tok::l_brace)) {
1655           if (Style.BraceWrapping.AfterControlStatement ==
1656               FormatStyle::BWACS_Always) {
1657             addUnwrappedLine();
1658           }
1659           parseBlock();
1660         }
1661         addUnwrappedLine();
1662         return;
1663       case tok::objc_synchronized:
1664         nextToken();
1665         if (FormatTok->is(tok::l_paren)) {
1666           // Skip synchronization object
1667           parseParens();
1668         }
1669         if (FormatTok->is(tok::l_brace)) {
1670           if (Style.BraceWrapping.AfterControlStatement ==
1671               FormatStyle::BWACS_Always) {
1672             addUnwrappedLine();
1673           }
1674           parseBlock();
1675         }
1676         addUnwrappedLine();
1677         return;
1678       case tok::objc_try:
1679         // This branch isn't strictly necessary (the kw_try case below would
1680         // do this too after the tok::at is parsed above).  But be explicit.
1681         parseTryCatch();
1682         return;
1683       default:
1684         break;
1685       }
1686       break;
1687     case tok::kw_concept:
1688       parseConcept();
1689       return;
1690     case tok::kw_requires: {
1691       if (Style.isCpp()) {
1692         bool ParsedClause = parseRequires();
1693         if (ParsedClause)
1694           return;
1695       } else {
1696         nextToken();
1697       }
1698       break;
1699     }
1700     case tok::kw_enum:
1701       // Ignore if this is part of "template <enum ...".
1702       if (Previous && Previous->is(tok::less)) {
1703         nextToken();
1704         break;
1705       }
1706 
1707       // parseEnum falls through and does not yet add an unwrapped line as an
1708       // enum definition can start a structural element.
1709       if (!parseEnum())
1710         break;
1711       // This only applies for C++.
1712       if (!Style.isCpp()) {
1713         addUnwrappedLine();
1714         return;
1715       }
1716       break;
1717     case tok::kw_typedef:
1718       nextToken();
1719       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1720                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1721                              Keywords.kw_CF_CLOSED_ENUM,
1722                              Keywords.kw_NS_CLOSED_ENUM)) {
1723         parseEnum();
1724       }
1725       break;
1726     case tok::kw_struct:
1727     case tok::kw_union:
1728     case tok::kw_class:
1729       if (parseStructLike())
1730         return;
1731       break;
1732     case tok::period:
1733       nextToken();
1734       // In Java, classes have an implicit static member "class".
1735       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1736           FormatTok->is(tok::kw_class)) {
1737         nextToken();
1738       }
1739       if (Style.isJavaScript() && FormatTok &&
1740           FormatTok->Tok.getIdentifierInfo()) {
1741         // JavaScript only has pseudo keywords, all keywords are allowed to
1742         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1743         nextToken();
1744       }
1745       break;
1746     case tok::semi:
1747       nextToken();
1748       addUnwrappedLine();
1749       return;
1750     case tok::r_brace:
1751       addUnwrappedLine();
1752       return;
1753     case tok::l_paren: {
1754       parseParens();
1755       // Break the unwrapped line if a K&R C function definition has a parameter
1756       // declaration.
1757       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1758         break;
1759       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1760         addUnwrappedLine();
1761         return;
1762       }
1763       break;
1764     }
1765     case tok::kw_operator:
1766       nextToken();
1767       if (FormatTok->isBinaryOperator())
1768         nextToken();
1769       break;
1770     case tok::caret:
1771       nextToken();
1772       if (FormatTok->Tok.isAnyIdentifier() ||
1773           FormatTok->isSimpleTypeSpecifier()) {
1774         nextToken();
1775       }
1776       if (FormatTok->is(tok::l_paren))
1777         parseParens();
1778       if (FormatTok->is(tok::l_brace))
1779         parseChildBlock();
1780       break;
1781     case tok::l_brace:
1782       if (NextLBracesType != TT_Unknown)
1783         FormatTok->setFinalizedType(NextLBracesType);
1784       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1785         // A block outside of parentheses must be the last part of a
1786         // structural element.
1787         // FIXME: Figure out cases where this is not true, and add projections
1788         // for them (the one we know is missing are lambdas).
1789         if (Style.Language == FormatStyle::LK_Java &&
1790             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1791           // If necessary, we could set the type to something different than
1792           // TT_FunctionLBrace.
1793           if (Style.BraceWrapping.AfterControlStatement ==
1794               FormatStyle::BWACS_Always) {
1795             addUnwrappedLine();
1796           }
1797         } else if (Style.BraceWrapping.AfterFunction) {
1798           addUnwrappedLine();
1799         }
1800         if (!Line->InPPDirective)
1801           FormatTok->setFinalizedType(TT_FunctionLBrace);
1802         parseBlock();
1803         addUnwrappedLine();
1804         return;
1805       }
1806       // Otherwise this was a braced init list, and the structural
1807       // element continues.
1808       break;
1809     case tok::kw_try:
1810       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1811         // field/method declaration.
1812         nextToken();
1813         break;
1814       }
1815       // We arrive here when parsing function-try blocks.
1816       if (Style.BraceWrapping.AfterFunction)
1817         addUnwrappedLine();
1818       parseTryCatch();
1819       return;
1820     case tok::identifier: {
1821       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1822           Line->MustBeDeclaration) {
1823         addUnwrappedLine();
1824         parseCSharpGenericTypeConstraint();
1825         break;
1826       }
1827       if (FormatTok->is(TT_MacroBlockEnd)) {
1828         addUnwrappedLine();
1829         return;
1830       }
1831 
1832       // Function declarations (as opposed to function expressions) are parsed
1833       // on their own unwrapped line by continuing this loop. Function
1834       // expressions (functions that are not on their own line) must not create
1835       // a new unwrapped line, so they are special cased below.
1836       size_t TokenCount = Line->Tokens.size();
1837       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1838           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1839                                                      Keywords.kw_async)))) {
1840         tryToParseJSFunction();
1841         break;
1842       }
1843       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1844           FormatTok->is(Keywords.kw_interface)) {
1845         if (Style.isJavaScript()) {
1846           // In JavaScript/TypeScript, "interface" can be used as a standalone
1847           // identifier, e.g. in `var interface = 1;`. If "interface" is
1848           // followed by another identifier, it is very like to be an actual
1849           // interface declaration.
1850           unsigned StoredPosition = Tokens->getPosition();
1851           FormatToken *Next = Tokens->getNextToken();
1852           FormatTok = Tokens->setPosition(StoredPosition);
1853           if (!mustBeJSIdent(Keywords, Next)) {
1854             nextToken();
1855             break;
1856           }
1857         }
1858         parseRecord();
1859         addUnwrappedLine();
1860         return;
1861       }
1862 
1863       if (FormatTok->is(Keywords.kw_interface)) {
1864         if (parseStructLike())
1865           return;
1866         break;
1867       }
1868 
1869       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1870         parseStatementMacro();
1871         return;
1872       }
1873 
1874       // See if the following token should start a new unwrapped line.
1875       StringRef Text = FormatTok->TokenText;
1876 
1877       FormatToken *PreviousToken = FormatTok;
1878       nextToken();
1879 
1880       // JS doesn't have macros, and within classes colons indicate fields, not
1881       // labels.
1882       if (Style.isJavaScript())
1883         break;
1884 
1885       TokenCount = Line->Tokens.size();
1886       if (TokenCount == 1 ||
1887           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1888         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1889           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1890           parseLabel(!Style.IndentGotoLabels);
1891           if (HasLabel)
1892             *HasLabel = true;
1893           return;
1894         }
1895         // Recognize function-like macro usages without trailing semicolon as
1896         // well as free-standing macros like Q_OBJECT.
1897         bool FunctionLike = FormatTok->is(tok::l_paren);
1898         if (FunctionLike)
1899           parseParens();
1900 
1901         bool FollowedByNewline =
1902             CommentsBeforeNextToken.empty()
1903                 ? FormatTok->NewlinesBefore > 0
1904                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1905 
1906         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1907             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1908           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1909           addUnwrappedLine();
1910           return;
1911         }
1912       }
1913       break;
1914     }
1915     case tok::equal:
1916       if ((Style.isJavaScript() || Style.isCSharp()) &&
1917           FormatTok->is(TT_FatArrow)) {
1918         tryToParseChildBlock();
1919         break;
1920       }
1921 
1922       nextToken();
1923       if (FormatTok->is(tok::l_brace)) {
1924         // Block kind should probably be set to BK_BracedInit for any language.
1925         // C# needs this change to ensure that array initialisers and object
1926         // initialisers are indented the same way.
1927         if (Style.isCSharp())
1928           FormatTok->setBlockKind(BK_BracedInit);
1929         nextToken();
1930         parseBracedList();
1931       } else if (Style.Language == FormatStyle::LK_Proto &&
1932                  FormatTok->is(tok::less)) {
1933         nextToken();
1934         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1935                         /*ClosingBraceKind=*/tok::greater);
1936       }
1937       break;
1938     case tok::l_square:
1939       parseSquare();
1940       break;
1941     case tok::kw_new:
1942       parseNew();
1943       break;
1944     case tok::kw_case:
1945       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1946         // 'case: string' field declaration.
1947         nextToken();
1948         break;
1949       }
1950       parseCaseLabel();
1951       break;
1952     default:
1953       nextToken();
1954       break;
1955     }
1956   } while (!eof());
1957 }
1958 
1959 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1960   assert(FormatTok->is(tok::l_brace));
1961   if (!Style.isCSharp())
1962     return false;
1963   // See if it's a property accessor.
1964   if (FormatTok->Previous->isNot(tok::identifier))
1965     return false;
1966 
1967   // See if we are inside a property accessor.
1968   //
1969   // Record the current tokenPosition so that we can advance and
1970   // reset the current token. `Next` is not set yet so we need
1971   // another way to advance along the token stream.
1972   unsigned int StoredPosition = Tokens->getPosition();
1973   FormatToken *Tok = Tokens->getNextToken();
1974 
1975   // A trivial property accessor is of the form:
1976   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1977   // Track these as they do not require line breaks to be introduced.
1978   bool HasSpecialAccessor = false;
1979   bool IsTrivialPropertyAccessor = true;
1980   while (!eof()) {
1981     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1982                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1983                      Keywords.kw_init, Keywords.kw_set)) {
1984       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1985         HasSpecialAccessor = true;
1986       Tok = Tokens->getNextToken();
1987       continue;
1988     }
1989     if (Tok->isNot(tok::r_brace))
1990       IsTrivialPropertyAccessor = false;
1991     break;
1992   }
1993 
1994   if (!HasSpecialAccessor) {
1995     Tokens->setPosition(StoredPosition);
1996     return false;
1997   }
1998 
1999   // Try to parse the property accessor:
2000   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2001   Tokens->setPosition(StoredPosition);
2002   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2003     addUnwrappedLine();
2004   nextToken();
2005   do {
2006     switch (FormatTok->Tok.getKind()) {
2007     case tok::r_brace:
2008       nextToken();
2009       if (FormatTok->is(tok::equal)) {
2010         while (!eof() && FormatTok->isNot(tok::semi))
2011           nextToken();
2012         nextToken();
2013       }
2014       addUnwrappedLine();
2015       return true;
2016     case tok::l_brace:
2017       ++Line->Level;
2018       parseBlock(/*MustBeDeclaration=*/true);
2019       addUnwrappedLine();
2020       --Line->Level;
2021       break;
2022     case tok::equal:
2023       if (FormatTok->is(TT_FatArrow)) {
2024         ++Line->Level;
2025         do {
2026           nextToken();
2027         } while (!eof() && FormatTok->isNot(tok::semi));
2028         nextToken();
2029         addUnwrappedLine();
2030         --Line->Level;
2031         break;
2032       }
2033       nextToken();
2034       break;
2035     default:
2036       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2037                              Keywords.kw_set) &&
2038           !IsTrivialPropertyAccessor) {
2039         // Non-trivial get/set needs to be on its own line.
2040         addUnwrappedLine();
2041       }
2042       nextToken();
2043     }
2044   } while (!eof());
2045 
2046   // Unreachable for well-formed code (paired '{' and '}').
2047   return true;
2048 }
2049 
2050 bool UnwrappedLineParser::tryToParseLambda() {
2051   assert(FormatTok->is(tok::l_square));
2052   if (!Style.isCpp()) {
2053     nextToken();
2054     return false;
2055   }
2056   FormatToken &LSquare = *FormatTok;
2057   if (!tryToParseLambdaIntroducer())
2058     return false;
2059 
2060   bool SeenArrow = false;
2061   bool InTemplateParameterList = false;
2062 
2063   while (FormatTok->isNot(tok::l_brace)) {
2064     if (FormatTok->isSimpleTypeSpecifier()) {
2065       nextToken();
2066       continue;
2067     }
2068     switch (FormatTok->Tok.getKind()) {
2069     case tok::l_brace:
2070       break;
2071     case tok::l_paren:
2072       parseParens();
2073       break;
2074     case tok::l_square:
2075       parseSquare();
2076       break;
2077     case tok::kw_class:
2078     case tok::kw_template:
2079     case tok::kw_typename:
2080       assert(FormatTok->Previous);
2081       if (FormatTok->Previous->is(tok::less))
2082         InTemplateParameterList = true;
2083       nextToken();
2084       break;
2085     case tok::amp:
2086     case tok::star:
2087     case tok::kw_const:
2088     case tok::comma:
2089     case tok::less:
2090     case tok::greater:
2091     case tok::identifier:
2092     case tok::numeric_constant:
2093     case tok::coloncolon:
2094     case tok::kw_mutable:
2095     case tok::kw_noexcept:
2096       nextToken();
2097       break;
2098     // Specialization of a template with an integer parameter can contain
2099     // arithmetic, logical, comparison and ternary operators.
2100     //
2101     // FIXME: This also accepts sequences of operators that are not in the scope
2102     // of a template argument list.
2103     //
2104     // In a C++ lambda a template type can only occur after an arrow. We use
2105     // this as an heuristic to distinguish between Objective-C expressions
2106     // followed by an `a->b` expression, such as:
2107     // ([obj func:arg] + a->b)
2108     // Otherwise the code below would parse as a lambda.
2109     //
2110     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2111     // explicit template lists: []<bool b = true && false>(U &&u){}
2112     case tok::plus:
2113     case tok::minus:
2114     case tok::exclaim:
2115     case tok::tilde:
2116     case tok::slash:
2117     case tok::percent:
2118     case tok::lessless:
2119     case tok::pipe:
2120     case tok::pipepipe:
2121     case tok::ampamp:
2122     case tok::caret:
2123     case tok::equalequal:
2124     case tok::exclaimequal:
2125     case tok::greaterequal:
2126     case tok::lessequal:
2127     case tok::question:
2128     case tok::colon:
2129     case tok::ellipsis:
2130     case tok::kw_true:
2131     case tok::kw_false:
2132       if (SeenArrow || InTemplateParameterList) {
2133         nextToken();
2134         break;
2135       }
2136       return true;
2137     case tok::arrow:
2138       // This might or might not actually be a lambda arrow (this could be an
2139       // ObjC method invocation followed by a dereferencing arrow). We might
2140       // reset this back to TT_Unknown in TokenAnnotator.
2141       FormatTok->setFinalizedType(TT_LambdaArrow);
2142       SeenArrow = true;
2143       nextToken();
2144       break;
2145     default:
2146       return true;
2147     }
2148   }
2149   FormatTok->setFinalizedType(TT_LambdaLBrace);
2150   LSquare.setFinalizedType(TT_LambdaLSquare);
2151   parseChildBlock();
2152   return true;
2153 }
2154 
2155 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2156   const FormatToken *Previous = FormatTok->Previous;
2157   const FormatToken *LeftSquare = FormatTok;
2158   nextToken();
2159   if (Previous &&
2160       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2161                          tok::kw_delete, tok::l_square) ||
2162        LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2163        Previous->isSimpleTypeSpecifier())) {
2164     return false;
2165   }
2166   if (FormatTok->is(tok::l_square))
2167     return false;
2168   if (FormatTok->is(tok::r_square)) {
2169     const FormatToken *Next = Tokens->peekNextToken();
2170     if (Next->is(tok::greater))
2171       return false;
2172   }
2173   parseSquare(/*LambdaIntroducer=*/true);
2174   return true;
2175 }
2176 
2177 void UnwrappedLineParser::tryToParseJSFunction() {
2178   assert(FormatTok->is(Keywords.kw_function) ||
2179          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2180   if (FormatTok->is(Keywords.kw_async))
2181     nextToken();
2182   // Consume "function".
2183   nextToken();
2184 
2185   // Consume * (generator function). Treat it like C++'s overloaded operators.
2186   if (FormatTok->is(tok::star)) {
2187     FormatTok->setFinalizedType(TT_OverloadedOperator);
2188     nextToken();
2189   }
2190 
2191   // Consume function name.
2192   if (FormatTok->is(tok::identifier))
2193     nextToken();
2194 
2195   if (FormatTok->isNot(tok::l_paren))
2196     return;
2197 
2198   // Parse formal parameter list.
2199   parseParens();
2200 
2201   if (FormatTok->is(tok::colon)) {
2202     // Parse a type definition.
2203     nextToken();
2204 
2205     // Eat the type declaration. For braced inline object types, balance braces,
2206     // otherwise just parse until finding an l_brace for the function body.
2207     if (FormatTok->is(tok::l_brace))
2208       tryToParseBracedList();
2209     else
2210       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2211         nextToken();
2212   }
2213 
2214   if (FormatTok->is(tok::semi))
2215     return;
2216 
2217   parseChildBlock();
2218 }
2219 
2220 bool UnwrappedLineParser::tryToParseBracedList() {
2221   if (FormatTok->is(BK_Unknown))
2222     calculateBraceTypes();
2223   assert(FormatTok->isNot(BK_Unknown));
2224   if (FormatTok->is(BK_Block))
2225     return false;
2226   nextToken();
2227   parseBracedList();
2228   return true;
2229 }
2230 
2231 bool UnwrappedLineParser::tryToParseChildBlock() {
2232   assert(Style.isJavaScript() || Style.isCSharp());
2233   assert(FormatTok->is(TT_FatArrow));
2234   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2235   // They always start an expression or a child block if followed by a curly
2236   // brace.
2237   nextToken();
2238   if (FormatTok->isNot(tok::l_brace))
2239     return false;
2240   parseChildBlock();
2241   return true;
2242 }
2243 
2244 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2245                                           bool IsEnum,
2246                                           tok::TokenKind ClosingBraceKind) {
2247   bool HasError = false;
2248 
2249   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2250   // replace this by using parseAssignmentExpression() inside.
2251   do {
2252     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2253         tryToParseChildBlock()) {
2254       continue;
2255     }
2256     if (Style.isJavaScript()) {
2257       if (FormatTok->is(Keywords.kw_function) ||
2258           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2259         tryToParseJSFunction();
2260         continue;
2261       }
2262       if (FormatTok->is(tok::l_brace)) {
2263         // Could be a method inside of a braced list `{a() { return 1; }}`.
2264         if (tryToParseBracedList())
2265           continue;
2266         parseChildBlock();
2267       }
2268     }
2269     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2270       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2271         addUnwrappedLine();
2272       nextToken();
2273       return !HasError;
2274     }
2275     switch (FormatTok->Tok.getKind()) {
2276     case tok::l_square:
2277       if (Style.isCSharp())
2278         parseSquare();
2279       else
2280         tryToParseLambda();
2281       break;
2282     case tok::l_paren:
2283       parseParens();
2284       // JavaScript can just have free standing methods and getters/setters in
2285       // object literals. Detect them by a "{" following ")".
2286       if (Style.isJavaScript()) {
2287         if (FormatTok->is(tok::l_brace))
2288           parseChildBlock();
2289         break;
2290       }
2291       break;
2292     case tok::l_brace:
2293       // Assume there are no blocks inside a braced init list apart
2294       // from the ones we explicitly parse out (like lambdas).
2295       FormatTok->setBlockKind(BK_BracedInit);
2296       nextToken();
2297       parseBracedList();
2298       break;
2299     case tok::less:
2300       if (Style.Language == FormatStyle::LK_Proto ||
2301           ClosingBraceKind == tok::greater) {
2302         nextToken();
2303         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2304                         /*ClosingBraceKind=*/tok::greater);
2305       } else {
2306         nextToken();
2307       }
2308       break;
2309     case tok::semi:
2310       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2311       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2312       // used for error recovery if we have otherwise determined that this is
2313       // a braced list.
2314       if (Style.isJavaScript()) {
2315         nextToken();
2316         break;
2317       }
2318       HasError = true;
2319       if (!ContinueOnSemicolons)
2320         return !HasError;
2321       nextToken();
2322       break;
2323     case tok::comma:
2324       nextToken();
2325       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2326         addUnwrappedLine();
2327       break;
2328     default:
2329       nextToken();
2330       break;
2331     }
2332   } while (!eof());
2333   return false;
2334 }
2335 
2336 /// \brief Parses a pair of parentheses (and everything between them).
2337 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2338 /// double ampersands. This only counts for the current parens scope.
2339 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2340   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2341   nextToken();
2342   do {
2343     switch (FormatTok->Tok.getKind()) {
2344     case tok::l_paren:
2345       parseParens();
2346       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2347         parseChildBlock();
2348       break;
2349     case tok::r_paren:
2350       nextToken();
2351       return;
2352     case tok::r_brace:
2353       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2354       return;
2355     case tok::l_square:
2356       tryToParseLambda();
2357       break;
2358     case tok::l_brace:
2359       if (!tryToParseBracedList())
2360         parseChildBlock();
2361       break;
2362     case tok::at:
2363       nextToken();
2364       if (FormatTok->is(tok::l_brace)) {
2365         nextToken();
2366         parseBracedList();
2367       }
2368       break;
2369     case tok::equal:
2370       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2371         tryToParseChildBlock();
2372       else
2373         nextToken();
2374       break;
2375     case tok::kw_class:
2376       if (Style.isJavaScript())
2377         parseRecord(/*ParseAsExpr=*/true);
2378       else
2379         nextToken();
2380       break;
2381     case tok::identifier:
2382       if (Style.isJavaScript() &&
2383           (FormatTok->is(Keywords.kw_function) ||
2384            FormatTok->startsSequence(Keywords.kw_async,
2385                                      Keywords.kw_function))) {
2386         tryToParseJSFunction();
2387       } else {
2388         nextToken();
2389       }
2390       break;
2391     case tok::kw_requires: {
2392       auto RequiresToken = FormatTok;
2393       nextToken();
2394       parseRequiresExpression(RequiresToken);
2395       break;
2396     }
2397     case tok::ampamp:
2398       if (AmpAmpTokenType != TT_Unknown)
2399         FormatTok->setFinalizedType(AmpAmpTokenType);
2400       LLVM_FALLTHROUGH;
2401     default:
2402       nextToken();
2403       break;
2404     }
2405   } while (!eof());
2406 }
2407 
2408 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2409   if (!LambdaIntroducer) {
2410     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2411     if (tryToParseLambda())
2412       return;
2413   }
2414   do {
2415     switch (FormatTok->Tok.getKind()) {
2416     case tok::l_paren:
2417       parseParens();
2418       break;
2419     case tok::r_square:
2420       nextToken();
2421       return;
2422     case tok::r_brace:
2423       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2424       return;
2425     case tok::l_square:
2426       parseSquare();
2427       break;
2428     case tok::l_brace: {
2429       if (!tryToParseBracedList())
2430         parseChildBlock();
2431       break;
2432     }
2433     case tok::at:
2434       nextToken();
2435       if (FormatTok->is(tok::l_brace)) {
2436         nextToken();
2437         parseBracedList();
2438       }
2439       break;
2440     default:
2441       nextToken();
2442       break;
2443     }
2444   } while (!eof());
2445 }
2446 
2447 void UnwrappedLineParser::keepAncestorBraces() {
2448   if (!Style.RemoveBracesLLVM)
2449     return;
2450 
2451   const int MaxNestingLevels = 2;
2452   const int Size = NestedTooDeep.size();
2453   if (Size >= MaxNestingLevels)
2454     NestedTooDeep[Size - MaxNestingLevels] = true;
2455   NestedTooDeep.push_back(false);
2456 }
2457 
2458 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2459   for (const auto &Token : llvm::reverse(Line.Tokens))
2460     if (Token.Tok->isNot(tok::comment))
2461       return Token.Tok;
2462 
2463   return nullptr;
2464 }
2465 
2466 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2467   FormatToken *Tok = nullptr;
2468 
2469   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2470       PreprocessorDirectives.empty()) {
2471     Tok = getLastNonComment(*Line);
2472     assert(Tok);
2473     if (Tok->BraceCount < 0) {
2474       assert(Tok->BraceCount == -1);
2475       Tok = nullptr;
2476     } else {
2477       Tok->BraceCount = -1;
2478     }
2479   }
2480 
2481   addUnwrappedLine();
2482   ++Line->Level;
2483   parseStructuralElement();
2484 
2485   if (Tok) {
2486     assert(!Line->InPPDirective);
2487     Tok = nullptr;
2488     for (const auto &L : llvm::reverse(*CurrentLines)) {
2489       if (!L.InPPDirective && getLastNonComment(L)) {
2490         Tok = L.Tokens.back().Tok;
2491         break;
2492       }
2493     }
2494     assert(Tok);
2495     ++Tok->BraceCount;
2496   }
2497 
2498   if (CheckEOF && FormatTok->is(tok::eof))
2499     addUnwrappedLine();
2500 
2501   --Line->Level;
2502 }
2503 
2504 static void markOptionalBraces(FormatToken *LeftBrace) {
2505   if (!LeftBrace)
2506     return;
2507 
2508   assert(LeftBrace->is(tok::l_brace));
2509 
2510   FormatToken *RightBrace = LeftBrace->MatchingParen;
2511   if (!RightBrace) {
2512     assert(!LeftBrace->Optional);
2513     return;
2514   }
2515 
2516   assert(RightBrace->is(tok::r_brace));
2517   assert(RightBrace->MatchingParen == LeftBrace);
2518   assert(LeftBrace->Optional == RightBrace->Optional);
2519 
2520   LeftBrace->Optional = true;
2521   RightBrace->Optional = true;
2522 }
2523 
2524 void UnwrappedLineParser::handleAttributes() {
2525   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2526   if (FormatTok->is(TT_AttributeMacro))
2527     nextToken();
2528   handleCppAttributes();
2529 }
2530 
2531 bool UnwrappedLineParser::handleCppAttributes() {
2532   // Handle [[likely]] / [[unlikely]] attributes.
2533   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2534     parseSquare();
2535     return true;
2536   }
2537   return false;
2538 }
2539 
2540 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2541                                                   bool KeepBraces) {
2542   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2543   nextToken();
2544   if (FormatTok->is(tok::exclaim))
2545     nextToken();
2546 
2547   bool KeepIfBraces = true;
2548   if (FormatTok->is(tok::kw_consteval)) {
2549     nextToken();
2550   } else {
2551     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2552     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2553       nextToken();
2554     if (FormatTok->is(tok::l_paren))
2555       parseParens();
2556   }
2557   handleAttributes();
2558 
2559   bool NeedsUnwrappedLine = false;
2560   keepAncestorBraces();
2561 
2562   FormatToken *IfLeftBrace = nullptr;
2563   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2564 
2565   if (FormatTok->is(tok::l_brace)) {
2566     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2567     IfLeftBrace = FormatTok;
2568     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2569     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2570                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2571     if (Style.BraceWrapping.BeforeElse)
2572       addUnwrappedLine();
2573     else
2574       NeedsUnwrappedLine = true;
2575   } else {
2576     parseUnbracedBody();
2577   }
2578 
2579   if (Style.RemoveBracesLLVM) {
2580     assert(!NestedTooDeep.empty());
2581     KeepIfBraces = KeepIfBraces ||
2582                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2583                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2584                    IfBlockKind == IfStmtKind::IfElseIf;
2585   }
2586 
2587   bool KeepElseBraces = KeepIfBraces;
2588   FormatToken *ElseLeftBrace = nullptr;
2589   IfStmtKind Kind = IfStmtKind::IfOnly;
2590 
2591   if (FormatTok->is(tok::kw_else)) {
2592     if (Style.RemoveBracesLLVM) {
2593       NestedTooDeep.back() = false;
2594       Kind = IfStmtKind::IfElse;
2595     }
2596     nextToken();
2597     handleAttributes();
2598     if (FormatTok->is(tok::l_brace)) {
2599       FormatTok->setFinalizedType(TT_ElseLBrace);
2600       ElseLeftBrace = FormatTok;
2601       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2602       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2603       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2604                  /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2605       if ((ElseBlockKind == IfStmtKind::IfOnly ||
2606            ElseBlockKind == IfStmtKind::IfElseIf) &&
2607           FormatTok->is(tok::kw_else)) {
2608         KeepElseBraces = true;
2609       }
2610       addUnwrappedLine();
2611     } else if (FormatTok->is(tok::kw_if)) {
2612       const FormatToken *Previous = Tokens->getPreviousToken();
2613       assert(Previous);
2614       const bool IsPrecededByComment = Previous->is(tok::comment);
2615       if (IsPrecededByComment) {
2616         addUnwrappedLine();
2617         ++Line->Level;
2618       }
2619       bool TooDeep = true;
2620       if (Style.RemoveBracesLLVM) {
2621         Kind = IfStmtKind::IfElseIf;
2622         TooDeep = NestedTooDeep.pop_back_val();
2623       }
2624       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2625       if (Style.RemoveBracesLLVM)
2626         NestedTooDeep.push_back(TooDeep);
2627       if (IsPrecededByComment)
2628         --Line->Level;
2629     } else {
2630       parseUnbracedBody(/*CheckEOF=*/true);
2631     }
2632   } else {
2633     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2634     if (NeedsUnwrappedLine)
2635       addUnwrappedLine();
2636   }
2637 
2638   if (!Style.RemoveBracesLLVM)
2639     return nullptr;
2640 
2641   assert(!NestedTooDeep.empty());
2642   KeepElseBraces = KeepElseBraces ||
2643                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2644                    NestedTooDeep.back();
2645 
2646   NestedTooDeep.pop_back();
2647 
2648   if (!KeepIfBraces && !KeepElseBraces) {
2649     markOptionalBraces(IfLeftBrace);
2650     markOptionalBraces(ElseLeftBrace);
2651   } else if (IfLeftBrace) {
2652     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2653     if (IfRightBrace) {
2654       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2655       assert(!IfLeftBrace->Optional);
2656       assert(!IfRightBrace->Optional);
2657       IfLeftBrace->MatchingParen = nullptr;
2658       IfRightBrace->MatchingParen = nullptr;
2659     }
2660   }
2661 
2662   if (IfKind)
2663     *IfKind = Kind;
2664 
2665   return IfLeftBrace;
2666 }
2667 
2668 void UnwrappedLineParser::parseTryCatch() {
2669   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2670   nextToken();
2671   bool NeedsUnwrappedLine = false;
2672   if (FormatTok->is(tok::colon)) {
2673     // We are in a function try block, what comes is an initializer list.
2674     nextToken();
2675 
2676     // In case identifiers were removed by clang-tidy, what might follow is
2677     // multiple commas in sequence - before the first identifier.
2678     while (FormatTok->is(tok::comma))
2679       nextToken();
2680 
2681     while (FormatTok->is(tok::identifier)) {
2682       nextToken();
2683       if (FormatTok->is(tok::l_paren))
2684         parseParens();
2685       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2686           FormatTok->is(tok::l_brace)) {
2687         do {
2688           nextToken();
2689         } while (!FormatTok->is(tok::r_brace));
2690         nextToken();
2691       }
2692 
2693       // In case identifiers were removed by clang-tidy, what might follow is
2694       // multiple commas in sequence - after the first identifier.
2695       while (FormatTok->is(tok::comma))
2696         nextToken();
2697     }
2698   }
2699   // Parse try with resource.
2700   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2701     parseParens();
2702 
2703   keepAncestorBraces();
2704 
2705   if (FormatTok->is(tok::l_brace)) {
2706     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2707     parseBlock();
2708     if (Style.BraceWrapping.BeforeCatch)
2709       addUnwrappedLine();
2710     else
2711       NeedsUnwrappedLine = true;
2712   } else if (!FormatTok->is(tok::kw_catch)) {
2713     // The C++ standard requires a compound-statement after a try.
2714     // If there's none, we try to assume there's a structuralElement
2715     // and try to continue.
2716     addUnwrappedLine();
2717     ++Line->Level;
2718     parseStructuralElement();
2719     --Line->Level;
2720   }
2721   while (true) {
2722     if (FormatTok->is(tok::at))
2723       nextToken();
2724     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2725                              tok::kw___finally) ||
2726           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2727            FormatTok->is(Keywords.kw_finally)) ||
2728           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2729            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2730       break;
2731     }
2732     nextToken();
2733     while (FormatTok->isNot(tok::l_brace)) {
2734       if (FormatTok->is(tok::l_paren)) {
2735         parseParens();
2736         continue;
2737       }
2738       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2739         if (Style.RemoveBracesLLVM)
2740           NestedTooDeep.pop_back();
2741         return;
2742       }
2743       nextToken();
2744     }
2745     NeedsUnwrappedLine = false;
2746     Line->MustBeDeclaration = false;
2747     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2748     parseBlock();
2749     if (Style.BraceWrapping.BeforeCatch)
2750       addUnwrappedLine();
2751     else
2752       NeedsUnwrappedLine = true;
2753   }
2754 
2755   if (Style.RemoveBracesLLVM)
2756     NestedTooDeep.pop_back();
2757 
2758   if (NeedsUnwrappedLine)
2759     addUnwrappedLine();
2760 }
2761 
2762 void UnwrappedLineParser::parseNamespace() {
2763   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2764          "'namespace' expected");
2765 
2766   const FormatToken &InitialToken = *FormatTok;
2767   nextToken();
2768   if (InitialToken.is(TT_NamespaceMacro)) {
2769     parseParens();
2770   } else {
2771     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2772                               tok::l_square, tok::period, tok::l_paren) ||
2773            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2774       if (FormatTok->is(tok::l_square))
2775         parseSquare();
2776       else if (FormatTok->is(tok::l_paren))
2777         parseParens();
2778       else
2779         nextToken();
2780     }
2781   }
2782   if (FormatTok->is(tok::l_brace)) {
2783     if (ShouldBreakBeforeBrace(Style, InitialToken))
2784       addUnwrappedLine();
2785 
2786     unsigned AddLevels =
2787         Style.NamespaceIndentation == FormatStyle::NI_All ||
2788                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2789                  DeclarationScopeStack.size() > 1)
2790             ? 1u
2791             : 0u;
2792     bool ManageWhitesmithsBraces =
2793         AddLevels == 0u &&
2794         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2795 
2796     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2797     // the whole block.
2798     if (ManageWhitesmithsBraces)
2799       ++Line->Level;
2800 
2801     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2802                /*KeepBraces=*/true, /*IfKind=*/nullptr,
2803                ManageWhitesmithsBraces);
2804 
2805     // Munch the semicolon after a namespace. This is more common than one would
2806     // think. Putting the semicolon into its own line is very ugly.
2807     if (FormatTok->is(tok::semi))
2808       nextToken();
2809 
2810     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2811 
2812     if (ManageWhitesmithsBraces)
2813       --Line->Level;
2814   }
2815   // FIXME: Add error handling.
2816 }
2817 
2818 void UnwrappedLineParser::parseNew() {
2819   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2820   nextToken();
2821 
2822   if (Style.isCSharp()) {
2823     do {
2824       if (FormatTok->is(tok::l_brace))
2825         parseBracedList();
2826 
2827       if (FormatTok->isOneOf(tok::semi, tok::comma))
2828         return;
2829 
2830       nextToken();
2831     } while (!eof());
2832   }
2833 
2834   if (Style.Language != FormatStyle::LK_Java)
2835     return;
2836 
2837   // In Java, we can parse everything up to the parens, which aren't optional.
2838   do {
2839     // There should not be a ;, { or } before the new's open paren.
2840     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2841       return;
2842 
2843     // Consume the parens.
2844     if (FormatTok->is(tok::l_paren)) {
2845       parseParens();
2846 
2847       // If there is a class body of an anonymous class, consume that as child.
2848       if (FormatTok->is(tok::l_brace))
2849         parseChildBlock();
2850       return;
2851     }
2852     nextToken();
2853   } while (!eof());
2854 }
2855 
2856 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
2857   keepAncestorBraces();
2858 
2859   if (FormatTok->is(tok::l_brace)) {
2860     if (!KeepBraces)
2861       FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2862     FormatToken *LeftBrace = FormatTok;
2863     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2864     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2865                /*MunchSemi=*/true, KeepBraces);
2866     if (!KeepBraces) {
2867       assert(!NestedTooDeep.empty());
2868       if (!NestedTooDeep.back())
2869         markOptionalBraces(LeftBrace);
2870     }
2871     if (WrapRightBrace)
2872       addUnwrappedLine();
2873   } else {
2874     parseUnbracedBody();
2875   }
2876 
2877   if (!KeepBraces)
2878     NestedTooDeep.pop_back();
2879 }
2880 
2881 void UnwrappedLineParser::parseForOrWhileLoop() {
2882   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2883          "'for', 'while' or foreach macro expected");
2884   const bool KeepBraces = !Style.RemoveBracesLLVM ||
2885                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
2886 
2887   nextToken();
2888   // JS' for await ( ...
2889   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2890     nextToken();
2891   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2892     nextToken();
2893   if (FormatTok->is(tok::l_paren))
2894     parseParens();
2895 
2896   handleAttributes();
2897   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
2898 }
2899 
2900 void UnwrappedLineParser::parseDoWhile() {
2901   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2902   nextToken();
2903 
2904   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
2905 
2906   // FIXME: Add error handling.
2907   if (!FormatTok->is(tok::kw_while)) {
2908     addUnwrappedLine();
2909     return;
2910   }
2911 
2912   // If in Whitesmiths mode, the line with the while() needs to be indented
2913   // to the same level as the block.
2914   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2915     ++Line->Level;
2916 
2917   nextToken();
2918   parseStructuralElement();
2919 }
2920 
2921 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2922   nextToken();
2923   unsigned OldLineLevel = Line->Level;
2924   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2925     --Line->Level;
2926   if (LeftAlignLabel)
2927     Line->Level = 0;
2928 
2929   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2930       FormatTok->is(tok::l_brace)) {
2931 
2932     CompoundStatementIndenter Indenter(this, Line->Level,
2933                                        Style.BraceWrapping.AfterCaseLabel,
2934                                        Style.BraceWrapping.IndentBraces);
2935     parseBlock();
2936     if (FormatTok->is(tok::kw_break)) {
2937       if (Style.BraceWrapping.AfterControlStatement ==
2938           FormatStyle::BWACS_Always) {
2939         addUnwrappedLine();
2940         if (!Style.IndentCaseBlocks &&
2941             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2942           ++Line->Level;
2943         }
2944       }
2945       parseStructuralElement();
2946     }
2947     addUnwrappedLine();
2948   } else {
2949     if (FormatTok->is(tok::semi))
2950       nextToken();
2951     addUnwrappedLine();
2952   }
2953   Line->Level = OldLineLevel;
2954   if (FormatTok->isNot(tok::l_brace)) {
2955     parseStructuralElement();
2956     addUnwrappedLine();
2957   }
2958 }
2959 
2960 void UnwrappedLineParser::parseCaseLabel() {
2961   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2962 
2963   // FIXME: fix handling of complex expressions here.
2964   do {
2965     nextToken();
2966   } while (!eof() && !FormatTok->is(tok::colon));
2967   parseLabel();
2968 }
2969 
2970 void UnwrappedLineParser::parseSwitch() {
2971   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2972   nextToken();
2973   if (FormatTok->is(tok::l_paren))
2974     parseParens();
2975 
2976   keepAncestorBraces();
2977 
2978   if (FormatTok->is(tok::l_brace)) {
2979     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2980     parseBlock();
2981     addUnwrappedLine();
2982   } else {
2983     addUnwrappedLine();
2984     ++Line->Level;
2985     parseStructuralElement();
2986     --Line->Level;
2987   }
2988 
2989   if (Style.RemoveBracesLLVM)
2990     NestedTooDeep.pop_back();
2991 }
2992 
2993 // Operators that can follow a C variable.
2994 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
2995   switch (kind) {
2996   case tok::ampamp:
2997   case tok::ampequal:
2998   case tok::arrow:
2999   case tok::caret:
3000   case tok::caretequal:
3001   case tok::comma:
3002   case tok::ellipsis:
3003   case tok::equal:
3004   case tok::equalequal:
3005   case tok::exclaim:
3006   case tok::exclaimequal:
3007   case tok::greater:
3008   case tok::greaterequal:
3009   case tok::greatergreater:
3010   case tok::greatergreaterequal:
3011   case tok::l_paren:
3012   case tok::l_square:
3013   case tok::less:
3014   case tok::lessequal:
3015   case tok::lessless:
3016   case tok::lesslessequal:
3017   case tok::minus:
3018   case tok::minusequal:
3019   case tok::minusminus:
3020   case tok::percent:
3021   case tok::percentequal:
3022   case tok::period:
3023   case tok::pipe:
3024   case tok::pipeequal:
3025   case tok::pipepipe:
3026   case tok::plus:
3027   case tok::plusequal:
3028   case tok::plusplus:
3029   case tok::question:
3030   case tok::r_brace:
3031   case tok::r_paren:
3032   case tok::r_square:
3033   case tok::semi:
3034   case tok::slash:
3035   case tok::slashequal:
3036   case tok::star:
3037   case tok::starequal:
3038     return true;
3039   default:
3040     return false;
3041   }
3042 }
3043 
3044 void UnwrappedLineParser::parseAccessSpecifier() {
3045   FormatToken *AccessSpecifierCandidate = FormatTok;
3046   nextToken();
3047   // Understand Qt's slots.
3048   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3049     nextToken();
3050   // Otherwise, we don't know what it is, and we'd better keep the next token.
3051   if (FormatTok->is(tok::colon)) {
3052     nextToken();
3053     addUnwrappedLine();
3054   } else if (!FormatTok->is(tok::coloncolon) &&
3055              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3056     // Not a variable name nor namespace name.
3057     addUnwrappedLine();
3058   } else if (AccessSpecifierCandidate) {
3059     // Consider the access specifier to be a C identifier.
3060     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3061   }
3062 }
3063 
3064 /// \brief Parses a concept definition.
3065 /// \pre The current token has to be the concept keyword.
3066 ///
3067 /// Returns if either the concept has been completely parsed, or if it detects
3068 /// that the concept definition is incorrect.
3069 void UnwrappedLineParser::parseConcept() {
3070   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
3071   nextToken();
3072   if (!FormatTok->is(tok::identifier))
3073     return;
3074   nextToken();
3075   if (!FormatTok->is(tok::equal))
3076     return;
3077   nextToken();
3078   parseConstraintExpression();
3079   if (FormatTok->is(tok::semi))
3080     nextToken();
3081   addUnwrappedLine();
3082 }
3083 
3084 /// \brief Parses a requires, decides if it is a clause or an expression.
3085 /// \pre The current token has to be the requires keyword.
3086 /// \returns true if it parsed a clause.
3087 bool clang::format::UnwrappedLineParser::parseRequires() {
3088   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3089   auto RequiresToken = FormatTok;
3090 
3091   // We try to guess if it is a requires clause, or a requires expression. For
3092   // that we first consume the keyword and check the next token.
3093   nextToken();
3094 
3095   switch (FormatTok->Tok.getKind()) {
3096   case tok::l_brace:
3097     // This can only be an expression, never a clause.
3098     parseRequiresExpression(RequiresToken);
3099     return false;
3100   case tok::l_paren:
3101     // Clauses and expression can start with a paren, it's unclear what we have.
3102     break;
3103   default:
3104     // All other tokens can only be a clause.
3105     parseRequiresClause(RequiresToken);
3106     return true;
3107   }
3108 
3109   // Looking forward we would have to decide if there are function declaration
3110   // like arguments to the requires expression:
3111   // requires (T t) {
3112   // Or there is a constraint expression for the requires clause:
3113   // requires (C<T> && ...
3114 
3115   // But first let's look behind.
3116   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3117 
3118   if (!PreviousNonComment ||
3119       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3120     // If there is no token, or an expression left brace, we are a requires
3121     // clause within a requires expression.
3122     parseRequiresClause(RequiresToken);
3123     return true;
3124   }
3125 
3126   switch (PreviousNonComment->Tok.getKind()) {
3127   case tok::greater:
3128   case tok::r_paren:
3129   case tok::kw_noexcept:
3130   case tok::kw_const:
3131     // This is a requires clause.
3132     parseRequiresClause(RequiresToken);
3133     return true;
3134   case tok::amp:
3135   case tok::ampamp: {
3136     // This can be either:
3137     // if (... && requires (T t) ...)
3138     // Or
3139     // void member(...) && requires (C<T> ...
3140     // We check the one token before that for a const:
3141     // void member(...) const && requires (C<T> ...
3142     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3143     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3144       parseRequiresClause(RequiresToken);
3145       return true;
3146     }
3147     break;
3148   }
3149   default:
3150     // It's an expression.
3151     parseRequiresExpression(RequiresToken);
3152     return false;
3153   }
3154 
3155   // Now we look forward and try to check if the paren content is a parameter
3156   // list. The parameters can be cv-qualified and contain references or
3157   // pointers.
3158   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3159   // of stuff: typename, const, *, &, &&, ::, identifiers.
3160 
3161   int NextTokenOffset = 1;
3162   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3163   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3164     ++NextTokenOffset;
3165     NextToken = Tokens->peekNextToken(NextTokenOffset);
3166   };
3167 
3168   bool FoundType = false;
3169   bool LastWasColonColon = false;
3170   int OpenAngles = 0;
3171 
3172   for (; NextTokenOffset < 50; PeekNext()) {
3173     switch (NextToken->Tok.getKind()) {
3174     case tok::kw_volatile:
3175     case tok::kw_const:
3176     case tok::comma:
3177       parseRequiresExpression(RequiresToken);
3178       return false;
3179     case tok::r_paren:
3180     case tok::pipepipe:
3181       parseRequiresClause(RequiresToken);
3182       return true;
3183     case tok::eof:
3184       // Break out of the loop.
3185       NextTokenOffset = 50;
3186       break;
3187     case tok::coloncolon:
3188       LastWasColonColon = true;
3189       break;
3190     case tok::identifier:
3191       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3192         parseRequiresExpression(RequiresToken);
3193         return false;
3194       }
3195       FoundType = true;
3196       LastWasColonColon = false;
3197       break;
3198     case tok::less:
3199       ++OpenAngles;
3200       break;
3201     case tok::greater:
3202       --OpenAngles;
3203       break;
3204     default:
3205       if (NextToken->isSimpleTypeSpecifier()) {
3206         parseRequiresExpression(RequiresToken);
3207         return false;
3208       }
3209       break;
3210     }
3211   }
3212 
3213   // This seems to be a complicated expression, just assume it's a clause.
3214   parseRequiresClause(RequiresToken);
3215   return true;
3216 }
3217 
3218 /// \brief Parses a requires clause.
3219 /// \param RequiresToken The requires keyword token, which starts this clause.
3220 /// \pre We need to be on the next token after the requires keyword.
3221 /// \sa parseRequiresExpression
3222 ///
3223 /// Returns if it either has finished parsing the clause, or it detects, that
3224 /// the clause is incorrect.
3225 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3226   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3227   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3228 
3229   // If there is no previous token, we are within a requires expression,
3230   // otherwise we will always have the template or function declaration in front
3231   // of it.
3232   bool InRequiresExpression =
3233       !RequiresToken->Previous ||
3234       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3235 
3236   RequiresToken->setFinalizedType(InRequiresExpression
3237                                       ? TT_RequiresClauseInARequiresExpression
3238                                       : TT_RequiresClause);
3239 
3240   parseConstraintExpression();
3241 
3242   if (!InRequiresExpression)
3243     FormatTok->Previous->ClosesRequiresClause = true;
3244 }
3245 
3246 /// \brief Parses a requires expression.
3247 /// \param RequiresToken The requires keyword token, which starts this clause.
3248 /// \pre We need to be on the next token after the requires keyword.
3249 /// \sa parseRequiresClause
3250 ///
3251 /// Returns if it either has finished parsing the expression, or it detects,
3252 /// that the expression is incorrect.
3253 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3254   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3255   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3256 
3257   RequiresToken->setFinalizedType(TT_RequiresExpression);
3258 
3259   if (FormatTok->is(tok::l_paren)) {
3260     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3261     parseParens();
3262   }
3263 
3264   if (FormatTok->is(tok::l_brace)) {
3265     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3266     parseChildBlock(/*CanContainBracedList=*/false,
3267                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3268   }
3269 }
3270 
3271 /// \brief Parses a constraint expression.
3272 ///
3273 /// This is either the definition of a concept, or the body of a requires
3274 /// clause. It returns, when the parsing is complete, or the expression is
3275 /// incorrect.
3276 void UnwrappedLineParser::parseConstraintExpression() {
3277   // The special handling for lambdas is needed since tryToParseLambda() eats a
3278   // token and if a requires expression is the last part of a requires clause
3279   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3280   // not set on the correct token. Thus we need to be aware if we even expect a
3281   // lambda to be possible.
3282   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3283   bool LambdaNextTimeAllowed = true;
3284   do {
3285     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3286 
3287     switch (FormatTok->Tok.getKind()) {
3288     case tok::kw_requires: {
3289       auto RequiresToken = FormatTok;
3290       nextToken();
3291       parseRequiresExpression(RequiresToken);
3292       break;
3293     }
3294 
3295     case tok::l_paren:
3296       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3297       break;
3298 
3299     case tok::l_square:
3300       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3301         return;
3302       break;
3303 
3304     case tok::kw_const:
3305     case tok::semi:
3306     case tok::kw_class:
3307     case tok::kw_struct:
3308     case tok::kw_union:
3309       return;
3310 
3311     case tok::l_brace:
3312       // Potential function body.
3313       return;
3314 
3315     case tok::ampamp:
3316     case tok::pipepipe:
3317       FormatTok->setFinalizedType(TT_BinaryOperator);
3318       nextToken();
3319       LambdaNextTimeAllowed = true;
3320       break;
3321 
3322     case tok::comma:
3323     case tok::comment:
3324       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3325       nextToken();
3326       break;
3327 
3328     case tok::kw_sizeof:
3329     case tok::greater:
3330     case tok::greaterequal:
3331     case tok::greatergreater:
3332     case tok::less:
3333     case tok::lessequal:
3334     case tok::lessless:
3335     case tok::equalequal:
3336     case tok::exclaim:
3337     case tok::exclaimequal:
3338     case tok::plus:
3339     case tok::minus:
3340     case tok::star:
3341     case tok::slash:
3342     case tok::kw_decltype:
3343       LambdaNextTimeAllowed = true;
3344       // Just eat them.
3345       nextToken();
3346       break;
3347 
3348     case tok::numeric_constant:
3349     case tok::coloncolon:
3350     case tok::kw_true:
3351     case tok::kw_false:
3352       // Just eat them.
3353       nextToken();
3354       break;
3355 
3356     case tok::kw_static_cast:
3357     case tok::kw_const_cast:
3358     case tok::kw_reinterpret_cast:
3359     case tok::kw_dynamic_cast:
3360       nextToken();
3361       if (!FormatTok->is(tok::less))
3362         return;
3363 
3364       nextToken();
3365       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3366                       /*ClosingBraceKind=*/tok::greater);
3367       break;
3368 
3369     case tok::kw_bool:
3370       // bool is only allowed if it is directly followed by a paren for a cast:
3371       // concept C = bool(...);
3372       // and bool is the only type, all other types as cast must be inside a
3373       // cast to bool an thus are handled by the other cases.
3374       nextToken();
3375       if (FormatTok->isNot(tok::l_paren))
3376         return;
3377       parseParens();
3378       break;
3379 
3380     default:
3381       if (!FormatTok->Tok.getIdentifierInfo()) {
3382         // Identifiers are part of the default case, we check for more then
3383         // tok::identifier to handle builtin type traits.
3384         return;
3385       }
3386 
3387       // We need to differentiate identifiers for a template deduction guide,
3388       // variables, or function return types (the constraint expression has
3389       // ended before that), and basically all other cases. But it's easier to
3390       // check the other way around.
3391       assert(FormatTok->Previous);
3392       switch (FormatTok->Previous->Tok.getKind()) {
3393       case tok::coloncolon:  // Nested identifier.
3394       case tok::ampamp:      // Start of a function or variable for the
3395       case tok::pipepipe:    // constraint expression.
3396       case tok::kw_requires: // Initial identifier of a requires clause.
3397       case tok::equal:       // Initial identifier of a concept declaration.
3398         break;
3399       default:
3400         return;
3401       }
3402 
3403       // Read identifier with optional template declaration.
3404       nextToken();
3405       if (FormatTok->is(tok::less)) {
3406         nextToken();
3407         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3408                         /*ClosingBraceKind=*/tok::greater);
3409       }
3410       break;
3411     }
3412   } while (!eof());
3413 }
3414 
3415 bool UnwrappedLineParser::parseEnum() {
3416   const FormatToken &InitialToken = *FormatTok;
3417 
3418   // Won't be 'enum' for NS_ENUMs.
3419   if (FormatTok->is(tok::kw_enum))
3420     nextToken();
3421 
3422   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3423   // declarations. An "enum" keyword followed by a colon would be a syntax
3424   // error and thus assume it is just an identifier.
3425   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3426     return false;
3427 
3428   // In protobuf, "enum" can be used as a field name.
3429   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3430     return false;
3431 
3432   // Eat up enum class ...
3433   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3434     nextToken();
3435 
3436   while (FormatTok->Tok.getIdentifierInfo() ||
3437          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3438                             tok::greater, tok::comma, tok::question,
3439                             tok::l_square, tok::r_square)) {
3440     nextToken();
3441     // We can have macros or attributes in between 'enum' and the enum name.
3442     if (FormatTok->is(tok::l_paren))
3443       parseParens();
3444     if (FormatTok->is(TT_AttributeSquare)) {
3445       parseSquare();
3446       // Consume the closing TT_AttributeSquare.
3447       if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3448         nextToken();
3449     }
3450     if (FormatTok->is(tok::identifier)) {
3451       nextToken();
3452       // If there are two identifiers in a row, this is likely an elaborate
3453       // return type. In Java, this can be "implements", etc.
3454       if (Style.isCpp() && FormatTok->is(tok::identifier))
3455         return false;
3456     }
3457   }
3458 
3459   // Just a declaration or something is wrong.
3460   if (FormatTok->isNot(tok::l_brace))
3461     return true;
3462   FormatTok->setFinalizedType(TT_EnumLBrace);
3463   FormatTok->setBlockKind(BK_Block);
3464 
3465   if (Style.Language == FormatStyle::LK_Java) {
3466     // Java enums are different.
3467     parseJavaEnumBody();
3468     return true;
3469   }
3470   if (Style.Language == FormatStyle::LK_Proto) {
3471     parseBlock(/*MustBeDeclaration=*/true);
3472     return true;
3473   }
3474 
3475   if (!Style.AllowShortEnumsOnASingleLine &&
3476       ShouldBreakBeforeBrace(Style, InitialToken)) {
3477     addUnwrappedLine();
3478   }
3479   // Parse enum body.
3480   nextToken();
3481   if (!Style.AllowShortEnumsOnASingleLine) {
3482     addUnwrappedLine();
3483     Line->Level += 1;
3484   }
3485   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3486                                    /*IsEnum=*/true);
3487   if (!Style.AllowShortEnumsOnASingleLine)
3488     Line->Level -= 1;
3489   if (HasError) {
3490     if (FormatTok->is(tok::semi))
3491       nextToken();
3492     addUnwrappedLine();
3493   }
3494   return true;
3495 
3496   // There is no addUnwrappedLine() here so that we fall through to parsing a
3497   // structural element afterwards. Thus, in "enum A {} n, m;",
3498   // "} n, m;" will end up in one unwrapped line.
3499 }
3500 
3501 bool UnwrappedLineParser::parseStructLike() {
3502   // parseRecord falls through and does not yet add an unwrapped line as a
3503   // record declaration or definition can start a structural element.
3504   parseRecord();
3505   // This does not apply to Java, JavaScript and C#.
3506   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3507       Style.isCSharp()) {
3508     if (FormatTok->is(tok::semi))
3509       nextToken();
3510     addUnwrappedLine();
3511     return true;
3512   }
3513   return false;
3514 }
3515 
3516 namespace {
3517 // A class used to set and restore the Token position when peeking
3518 // ahead in the token source.
3519 class ScopedTokenPosition {
3520   unsigned StoredPosition;
3521   FormatTokenSource *Tokens;
3522 
3523 public:
3524   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3525     assert(Tokens && "Tokens expected to not be null");
3526     StoredPosition = Tokens->getPosition();
3527   }
3528 
3529   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3530 };
3531 } // namespace
3532 
3533 // Look to see if we have [[ by looking ahead, if
3534 // its not then rewind to the original position.
3535 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3536   ScopedTokenPosition AutoPosition(Tokens);
3537   FormatToken *Tok = Tokens->getNextToken();
3538   // We already read the first [ check for the second.
3539   if (!Tok->is(tok::l_square))
3540     return false;
3541   // Double check that the attribute is just something
3542   // fairly simple.
3543   while (Tok->isNot(tok::eof)) {
3544     if (Tok->is(tok::r_square))
3545       break;
3546     Tok = Tokens->getNextToken();
3547   }
3548   if (Tok->is(tok::eof))
3549     return false;
3550   Tok = Tokens->getNextToken();
3551   if (!Tok->is(tok::r_square))
3552     return false;
3553   Tok = Tokens->getNextToken();
3554   if (Tok->is(tok::semi))
3555     return false;
3556   return true;
3557 }
3558 
3559 void UnwrappedLineParser::parseJavaEnumBody() {
3560   assert(FormatTok->is(tok::l_brace));
3561   const FormatToken *OpeningBrace = FormatTok;
3562 
3563   // Determine whether the enum is simple, i.e. does not have a semicolon or
3564   // constants with class bodies. Simple enums can be formatted like braced
3565   // lists, contracted to a single line, etc.
3566   unsigned StoredPosition = Tokens->getPosition();
3567   bool IsSimple = true;
3568   FormatToken *Tok = Tokens->getNextToken();
3569   while (!Tok->is(tok::eof)) {
3570     if (Tok->is(tok::r_brace))
3571       break;
3572     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3573       IsSimple = false;
3574       break;
3575     }
3576     // FIXME: This will also mark enums with braces in the arguments to enum
3577     // constants as "not simple". This is probably fine in practice, though.
3578     Tok = Tokens->getNextToken();
3579   }
3580   FormatTok = Tokens->setPosition(StoredPosition);
3581 
3582   if (IsSimple) {
3583     nextToken();
3584     parseBracedList();
3585     addUnwrappedLine();
3586     return;
3587   }
3588 
3589   // Parse the body of a more complex enum.
3590   // First add a line for everything up to the "{".
3591   nextToken();
3592   addUnwrappedLine();
3593   ++Line->Level;
3594 
3595   // Parse the enum constants.
3596   while (FormatTok->isNot(tok::eof)) {
3597     if (FormatTok->is(tok::l_brace)) {
3598       // Parse the constant's class body.
3599       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3600                  /*MunchSemi=*/false);
3601     } else if (FormatTok->is(tok::l_paren)) {
3602       parseParens();
3603     } else if (FormatTok->is(tok::comma)) {
3604       nextToken();
3605       addUnwrappedLine();
3606     } else if (FormatTok->is(tok::semi)) {
3607       nextToken();
3608       addUnwrappedLine();
3609       break;
3610     } else if (FormatTok->is(tok::r_brace)) {
3611       addUnwrappedLine();
3612       break;
3613     } else {
3614       nextToken();
3615     }
3616   }
3617 
3618   // Parse the class body after the enum's ";" if any.
3619   parseLevel(OpeningBrace);
3620   nextToken();
3621   --Line->Level;
3622   addUnwrappedLine();
3623 }
3624 
3625 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3626   const FormatToken &InitialToken = *FormatTok;
3627   nextToken();
3628 
3629   // The actual identifier can be a nested name specifier, and in macros
3630   // it is often token-pasted.
3631   // An [[attribute]] can be before the identifier.
3632   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3633                             tok::kw___attribute, tok::kw___declspec,
3634                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3635          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3636           FormatTok->isOneOf(tok::period, tok::comma))) {
3637     if (Style.isJavaScript() &&
3638         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3639       // JavaScript/TypeScript supports inline object types in
3640       // extends/implements positions:
3641       //     class Foo implements {bar: number} { }
3642       nextToken();
3643       if (FormatTok->is(tok::l_brace)) {
3644         tryToParseBracedList();
3645         continue;
3646       }
3647     }
3648     bool IsNonMacroIdentifier =
3649         FormatTok->is(tok::identifier) &&
3650         FormatTok->TokenText != FormatTok->TokenText.upper();
3651     nextToken();
3652     // We can have macros or attributes in between 'class' and the class name.
3653     if (!IsNonMacroIdentifier) {
3654       if (FormatTok->is(tok::l_paren)) {
3655         parseParens();
3656       } else if (FormatTok->is(TT_AttributeSquare)) {
3657         parseSquare();
3658         // Consume the closing TT_AttributeSquare.
3659         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3660           nextToken();
3661       }
3662     }
3663   }
3664 
3665   // Note that parsing away template declarations here leads to incorrectly
3666   // accepting function declarations as record declarations.
3667   // In general, we cannot solve this problem. Consider:
3668   // class A<int> B() {}
3669   // which can be a function definition or a class definition when B() is a
3670   // macro. If we find enough real-world cases where this is a problem, we
3671   // can parse for the 'template' keyword in the beginning of the statement,
3672   // and thus rule out the record production in case there is no template
3673   // (this would still leave us with an ambiguity between template function
3674   // and class declarations).
3675   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3676     do {
3677       if (FormatTok->is(tok::l_brace)) {
3678         calculateBraceTypes(/*ExpectClassBody=*/true);
3679         if (!tryToParseBracedList())
3680           break;
3681       }
3682       if (FormatTok->is(tok::l_square)) {
3683         FormatToken *Previous = FormatTok->Previous;
3684         if (!Previous ||
3685             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3686           // Don't try parsing a lambda if we had a closing parenthesis before,
3687           // it was probably a pointer to an array: int (*)[].
3688           if (!tryToParseLambda())
3689             break;
3690         } else {
3691           parseSquare();
3692           continue;
3693         }
3694       }
3695       if (FormatTok->is(tok::semi))
3696         return;
3697       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3698         addUnwrappedLine();
3699         nextToken();
3700         parseCSharpGenericTypeConstraint();
3701         break;
3702       }
3703       nextToken();
3704     } while (!eof());
3705   }
3706 
3707   auto GetBraceType = [](const FormatToken &RecordTok) {
3708     switch (RecordTok.Tok.getKind()) {
3709     case tok::kw_class:
3710       return TT_ClassLBrace;
3711     case tok::kw_struct:
3712       return TT_StructLBrace;
3713     case tok::kw_union:
3714       return TT_UnionLBrace;
3715     default:
3716       // Useful for e.g. interface.
3717       return TT_RecordLBrace;
3718     }
3719   };
3720   if (FormatTok->is(tok::l_brace)) {
3721     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3722     if (ParseAsExpr) {
3723       parseChildBlock();
3724     } else {
3725       if (ShouldBreakBeforeBrace(Style, InitialToken))
3726         addUnwrappedLine();
3727 
3728       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3729       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3730     }
3731   }
3732   // There is no addUnwrappedLine() here so that we fall through to parsing a
3733   // structural element afterwards. Thus, in "class A {} n, m;",
3734   // "} n, m;" will end up in one unwrapped line.
3735 }
3736 
3737 void UnwrappedLineParser::parseObjCMethod() {
3738   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3739          "'(' or identifier expected.");
3740   do {
3741     if (FormatTok->is(tok::semi)) {
3742       nextToken();
3743       addUnwrappedLine();
3744       return;
3745     } else if (FormatTok->is(tok::l_brace)) {
3746       if (Style.BraceWrapping.AfterFunction)
3747         addUnwrappedLine();
3748       parseBlock();
3749       addUnwrappedLine();
3750       return;
3751     } else {
3752       nextToken();
3753     }
3754   } while (!eof());
3755 }
3756 
3757 void UnwrappedLineParser::parseObjCProtocolList() {
3758   assert(FormatTok->is(tok::less) && "'<' expected.");
3759   do {
3760     nextToken();
3761     // Early exit in case someone forgot a close angle.
3762     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3763         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3764       return;
3765     }
3766   } while (!eof() && FormatTok->isNot(tok::greater));
3767   nextToken(); // Skip '>'.
3768 }
3769 
3770 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3771   do {
3772     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3773       nextToken();
3774       addUnwrappedLine();
3775       break;
3776     }
3777     if (FormatTok->is(tok::l_brace)) {
3778       parseBlock();
3779       // In ObjC interfaces, nothing should be following the "}".
3780       addUnwrappedLine();
3781     } else if (FormatTok->is(tok::r_brace)) {
3782       // Ignore stray "}". parseStructuralElement doesn't consume them.
3783       nextToken();
3784       addUnwrappedLine();
3785     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3786       nextToken();
3787       parseObjCMethod();
3788     } else {
3789       parseStructuralElement();
3790     }
3791   } while (!eof());
3792 }
3793 
3794 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3795   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3796          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3797   nextToken();
3798   nextToken(); // interface name
3799 
3800   // @interface can be followed by a lightweight generic
3801   // specialization list, then either a base class or a category.
3802   if (FormatTok->is(tok::less))
3803     parseObjCLightweightGenerics();
3804   if (FormatTok->is(tok::colon)) {
3805     nextToken();
3806     nextToken(); // base class name
3807     // The base class can also have lightweight generics applied to it.
3808     if (FormatTok->is(tok::less))
3809       parseObjCLightweightGenerics();
3810   } else if (FormatTok->is(tok::l_paren)) {
3811     // Skip category, if present.
3812     parseParens();
3813   }
3814 
3815   if (FormatTok->is(tok::less))
3816     parseObjCProtocolList();
3817 
3818   if (FormatTok->is(tok::l_brace)) {
3819     if (Style.BraceWrapping.AfterObjCDeclaration)
3820       addUnwrappedLine();
3821     parseBlock(/*MustBeDeclaration=*/true);
3822   }
3823 
3824   // With instance variables, this puts '}' on its own line.  Without instance
3825   // variables, this ends the @interface line.
3826   addUnwrappedLine();
3827 
3828   parseObjCUntilAtEnd();
3829 }
3830 
3831 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3832   assert(FormatTok->is(tok::less));
3833   // Unlike protocol lists, generic parameterizations support
3834   // nested angles:
3835   //
3836   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3837   //     NSObject <NSCopying, NSSecureCoding>
3838   //
3839   // so we need to count how many open angles we have left.
3840   unsigned NumOpenAngles = 1;
3841   do {
3842     nextToken();
3843     // Early exit in case someone forgot a close angle.
3844     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3845         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3846       break;
3847     }
3848     if (FormatTok->is(tok::less)) {
3849       ++NumOpenAngles;
3850     } else if (FormatTok->is(tok::greater)) {
3851       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3852       --NumOpenAngles;
3853     }
3854   } while (!eof() && NumOpenAngles != 0);
3855   nextToken(); // Skip '>'.
3856 }
3857 
3858 // Returns true for the declaration/definition form of @protocol,
3859 // false for the expression form.
3860 bool UnwrappedLineParser::parseObjCProtocol() {
3861   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3862   nextToken();
3863 
3864   if (FormatTok->is(tok::l_paren)) {
3865     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3866     return false;
3867   }
3868 
3869   // The definition/declaration form,
3870   // @protocol Foo
3871   // - (int)someMethod;
3872   // @end
3873 
3874   nextToken(); // protocol name
3875 
3876   if (FormatTok->is(tok::less))
3877     parseObjCProtocolList();
3878 
3879   // Check for protocol declaration.
3880   if (FormatTok->is(tok::semi)) {
3881     nextToken();
3882     addUnwrappedLine();
3883     return true;
3884   }
3885 
3886   addUnwrappedLine();
3887   parseObjCUntilAtEnd();
3888   return true;
3889 }
3890 
3891 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3892   bool IsImport = FormatTok->is(Keywords.kw_import);
3893   assert(IsImport || FormatTok->is(tok::kw_export));
3894   nextToken();
3895 
3896   // Consume the "default" in "export default class/function".
3897   if (FormatTok->is(tok::kw_default))
3898     nextToken();
3899 
3900   // Consume "async function", "function" and "default function", so that these
3901   // get parsed as free-standing JS functions, i.e. do not require a trailing
3902   // semicolon.
3903   if (FormatTok->is(Keywords.kw_async))
3904     nextToken();
3905   if (FormatTok->is(Keywords.kw_function)) {
3906     nextToken();
3907     return;
3908   }
3909 
3910   // For imports, `export *`, `export {...}`, consume the rest of the line up
3911   // to the terminating `;`. For everything else, just return and continue
3912   // parsing the structural element, i.e. the declaration or expression for
3913   // `export default`.
3914   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3915       !FormatTok->isStringLiteral()) {
3916     return;
3917   }
3918 
3919   while (!eof()) {
3920     if (FormatTok->is(tok::semi))
3921       return;
3922     if (Line->Tokens.empty()) {
3923       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3924       // import statement should terminate.
3925       return;
3926     }
3927     if (FormatTok->is(tok::l_brace)) {
3928       FormatTok->setBlockKind(BK_Block);
3929       nextToken();
3930       parseBracedList();
3931     } else {
3932       nextToken();
3933     }
3934   }
3935 }
3936 
3937 void UnwrappedLineParser::parseStatementMacro() {
3938   nextToken();
3939   if (FormatTok->is(tok::l_paren))
3940     parseParens();
3941   if (FormatTok->is(tok::semi))
3942     nextToken();
3943   addUnwrappedLine();
3944 }
3945 
3946 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3947                                                  StringRef Prefix = "") {
3948   llvm::dbgs() << Prefix << "Line(" << Line.Level
3949                << ", FSC=" << Line.FirstStartColumn << ")"
3950                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3951   for (const auto &Node : Line.Tokens) {
3952     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3953                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3954                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3955   }
3956   for (const auto &Node : Line.Tokens)
3957     for (const auto &ChildNode : Node.Children)
3958       printDebugInfo(ChildNode, "\nChild: ");
3959 
3960   llvm::dbgs() << "\n";
3961 }
3962 
3963 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3964   if (Line->Tokens.empty())
3965     return;
3966   LLVM_DEBUG({
3967     if (CurrentLines == &Lines)
3968       printDebugInfo(*Line);
3969   });
3970 
3971   // If this line closes a block when in Whitesmiths mode, remember that
3972   // information so that the level can be decreased after the line is added.
3973   // This has to happen after the addition of the line since the line itself
3974   // needs to be indented.
3975   bool ClosesWhitesmithsBlock =
3976       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3977       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3978 
3979   CurrentLines->push_back(std::move(*Line));
3980   Line->Tokens.clear();
3981   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3982   Line->FirstStartColumn = 0;
3983 
3984   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3985     --Line->Level;
3986   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3987     CurrentLines->append(
3988         std::make_move_iterator(PreprocessorDirectives.begin()),
3989         std::make_move_iterator(PreprocessorDirectives.end()));
3990     PreprocessorDirectives.clear();
3991   }
3992   // Disconnect the current token from the last token on the previous line.
3993   FormatTok->Previous = nullptr;
3994 }
3995 
3996 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
3997 
3998 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3999   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4000          FormatTok.NewlinesBefore > 0;
4001 }
4002 
4003 // Checks if \p FormatTok is a line comment that continues the line comment
4004 // section on \p Line.
4005 static bool
4006 continuesLineCommentSection(const FormatToken &FormatTok,
4007                             const UnwrappedLine &Line,
4008                             const llvm::Regex &CommentPragmasRegex) {
4009   if (Line.Tokens.empty())
4010     return false;
4011 
4012   StringRef IndentContent = FormatTok.TokenText;
4013   if (FormatTok.TokenText.startswith("//") ||
4014       FormatTok.TokenText.startswith("/*")) {
4015     IndentContent = FormatTok.TokenText.substr(2);
4016   }
4017   if (CommentPragmasRegex.match(IndentContent))
4018     return false;
4019 
4020   // If Line starts with a line comment, then FormatTok continues the comment
4021   // section if its original column is greater or equal to the original start
4022   // column of the line.
4023   //
4024   // Define the min column token of a line as follows: if a line ends in '{' or
4025   // contains a '{' followed by a line comment, then the min column token is
4026   // that '{'. Otherwise, the min column token of the line is the first token of
4027   // the line.
4028   //
4029   // If Line starts with a token other than a line comment, then FormatTok
4030   // continues the comment section if its original column is greater than the
4031   // original start column of the min column token of the line.
4032   //
4033   // For example, the second line comment continues the first in these cases:
4034   //
4035   // // first line
4036   // // second line
4037   //
4038   // and:
4039   //
4040   // // first line
4041   //  // second line
4042   //
4043   // and:
4044   //
4045   // int i; // first line
4046   //  // second line
4047   //
4048   // and:
4049   //
4050   // do { // first line
4051   //      // second line
4052   //   int i;
4053   // } while (true);
4054   //
4055   // and:
4056   //
4057   // enum {
4058   //   a, // first line
4059   //    // second line
4060   //   b
4061   // };
4062   //
4063   // The second line comment doesn't continue the first in these cases:
4064   //
4065   //   // first line
4066   //  // second line
4067   //
4068   // and:
4069   //
4070   // int i; // first line
4071   // // second line
4072   //
4073   // and:
4074   //
4075   // do { // first line
4076   //   // second line
4077   //   int i;
4078   // } while (true);
4079   //
4080   // and:
4081   //
4082   // enum {
4083   //   a, // first line
4084   //   // second line
4085   // };
4086   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4087 
4088   // Scan for '{//'. If found, use the column of '{' as a min column for line
4089   // comment section continuation.
4090   const FormatToken *PreviousToken = nullptr;
4091   for (const UnwrappedLineNode &Node : Line.Tokens) {
4092     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4093         isLineComment(*Node.Tok)) {
4094       MinColumnToken = PreviousToken;
4095       break;
4096     }
4097     PreviousToken = Node.Tok;
4098 
4099     // Grab the last newline preceding a token in this unwrapped line.
4100     if (Node.Tok->NewlinesBefore > 0)
4101       MinColumnToken = Node.Tok;
4102   }
4103   if (PreviousToken && PreviousToken->is(tok::l_brace))
4104     MinColumnToken = PreviousToken;
4105 
4106   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4107                               MinColumnToken);
4108 }
4109 
4110 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4111   bool JustComments = Line->Tokens.empty();
4112   for (FormatToken *Tok : CommentsBeforeNextToken) {
4113     // Line comments that belong to the same line comment section are put on the
4114     // same line since later we might want to reflow content between them.
4115     // Additional fine-grained breaking of line comment sections is controlled
4116     // by the class BreakableLineCommentSection in case it is desirable to keep
4117     // several line comment sections in the same unwrapped line.
4118     //
4119     // FIXME: Consider putting separate line comment sections as children to the
4120     // unwrapped line instead.
4121     Tok->ContinuesLineCommentSection =
4122         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4123     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4124       addUnwrappedLine();
4125     pushToken(Tok);
4126   }
4127   if (NewlineBeforeNext && JustComments)
4128     addUnwrappedLine();
4129   CommentsBeforeNextToken.clear();
4130 }
4131 
4132 void UnwrappedLineParser::nextToken(int LevelDifference) {
4133   if (eof())
4134     return;
4135   flushComments(isOnNewLine(*FormatTok));
4136   pushToken(FormatTok);
4137   FormatToken *Previous = FormatTok;
4138   if (!Style.isJavaScript())
4139     readToken(LevelDifference);
4140   else
4141     readTokenWithJavaScriptASI();
4142   FormatTok->Previous = Previous;
4143 }
4144 
4145 void UnwrappedLineParser::distributeComments(
4146     const SmallVectorImpl<FormatToken *> &Comments,
4147     const FormatToken *NextTok) {
4148   // Whether or not a line comment token continues a line is controlled by
4149   // the method continuesLineCommentSection, with the following caveat:
4150   //
4151   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4152   // that each comment line from the trail is aligned with the next token, if
4153   // the next token exists. If a trail exists, the beginning of the maximal
4154   // trail is marked as a start of a new comment section.
4155   //
4156   // For example in this code:
4157   //
4158   // int a; // line about a
4159   //   // line 1 about b
4160   //   // line 2 about b
4161   //   int b;
4162   //
4163   // the two lines about b form a maximal trail, so there are two sections, the
4164   // first one consisting of the single comment "// line about a" and the
4165   // second one consisting of the next two comments.
4166   if (Comments.empty())
4167     return;
4168   bool ShouldPushCommentsInCurrentLine = true;
4169   bool HasTrailAlignedWithNextToken = false;
4170   unsigned StartOfTrailAlignedWithNextToken = 0;
4171   if (NextTok) {
4172     // We are skipping the first element intentionally.
4173     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4174       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4175         HasTrailAlignedWithNextToken = true;
4176         StartOfTrailAlignedWithNextToken = i;
4177       }
4178     }
4179   }
4180   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4181     FormatToken *FormatTok = Comments[i];
4182     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4183       FormatTok->ContinuesLineCommentSection = false;
4184     } else {
4185       FormatTok->ContinuesLineCommentSection =
4186           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4187     }
4188     if (!FormatTok->ContinuesLineCommentSection &&
4189         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4190       ShouldPushCommentsInCurrentLine = false;
4191     }
4192     if (ShouldPushCommentsInCurrentLine)
4193       pushToken(FormatTok);
4194     else
4195       CommentsBeforeNextToken.push_back(FormatTok);
4196   }
4197 }
4198 
4199 void UnwrappedLineParser::readToken(int LevelDifference) {
4200   SmallVector<FormatToken *, 1> Comments;
4201   bool PreviousWasComment = false;
4202   bool FirstNonCommentOnLine = false;
4203   do {
4204     FormatTok = Tokens->getNextToken();
4205     assert(FormatTok);
4206     while (FormatTok->getType() == TT_ConflictStart ||
4207            FormatTok->getType() == TT_ConflictEnd ||
4208            FormatTok->getType() == TT_ConflictAlternative) {
4209       if (FormatTok->getType() == TT_ConflictStart)
4210         conditionalCompilationStart(/*Unreachable=*/false);
4211       else if (FormatTok->getType() == TT_ConflictAlternative)
4212         conditionalCompilationAlternative();
4213       else if (FormatTok->getType() == TT_ConflictEnd)
4214         conditionalCompilationEnd();
4215       FormatTok = Tokens->getNextToken();
4216       FormatTok->MustBreakBefore = true;
4217     }
4218 
4219     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4220                                       const FormatToken &Tok,
4221                                       bool PreviousWasComment) {
4222       auto IsFirstOnLine = [](const FormatToken &Tok) {
4223         return Tok.HasUnescapedNewline || Tok.IsFirst;
4224       };
4225 
4226       // Consider preprocessor directives preceded by block comments as first
4227       // on line.
4228       if (PreviousWasComment)
4229         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4230       return IsFirstOnLine(Tok);
4231     };
4232 
4233     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4234         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4235     PreviousWasComment = FormatTok->is(tok::comment);
4236 
4237     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4238            FirstNonCommentOnLine) {
4239       distributeComments(Comments, FormatTok);
4240       Comments.clear();
4241       // If there is an unfinished unwrapped line, we flush the preprocessor
4242       // directives only after that unwrapped line was finished later.
4243       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4244       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4245       assert((LevelDifference >= 0 ||
4246               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4247              "LevelDifference makes Line->Level negative");
4248       Line->Level += LevelDifference;
4249       // Comments stored before the preprocessor directive need to be output
4250       // before the preprocessor directive, at the same level as the
4251       // preprocessor directive, as we consider them to apply to the directive.
4252       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4253           PPBranchLevel > 0) {
4254         Line->Level += PPBranchLevel;
4255       }
4256       flushComments(isOnNewLine(*FormatTok));
4257       parsePPDirective();
4258       PreviousWasComment = FormatTok->is(tok::comment);
4259       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4260           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4261     }
4262 
4263     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4264         !Line->InPPDirective) {
4265       continue;
4266     }
4267 
4268     if (!FormatTok->is(tok::comment)) {
4269       distributeComments(Comments, FormatTok);
4270       Comments.clear();
4271       return;
4272     }
4273 
4274     Comments.push_back(FormatTok);
4275   } while (!eof());
4276 
4277   distributeComments(Comments, nullptr);
4278   Comments.clear();
4279 }
4280 
4281 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4282   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4283   if (MustBreakBeforeNextToken) {
4284     Line->Tokens.back().Tok->MustBreakBefore = true;
4285     MustBreakBeforeNextToken = false;
4286   }
4287 }
4288 
4289 } // end namespace format
4290 } // end namespace clang
4291