1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 #include <utility>
24 
25 #define DEBUG_TYPE "format-parser"
26 
27 namespace clang {
28 namespace format {
29 
30 class FormatTokenSource {
31 public:
32   virtual ~FormatTokenSource() {}
33 
34   // Returns the next token in the token stream.
35   virtual FormatToken *getNextToken() = 0;
36 
37   // Returns the token preceding the token returned by the last call to
38   // getNextToken() in the token stream, or nullptr if no such token exists.
39   virtual FormatToken *getPreviousToken() = 0;
40 
41   // Returns the token that would be returned by the next call to
42   // getNextToken().
43   virtual FormatToken *peekNextToken() = 0;
44 
45   // Returns the token that would be returned after the next N calls to
46   // getNextToken(). N needs to be greater than zero, and small enough that
47   // there are still tokens. Check for tok::eof with N-1 before calling it with
48   // N.
49   virtual FormatToken *peekNextToken(int N) = 0;
50 
51   // Returns whether we are at the end of the file.
52   // This can be different from whether getNextToken() returned an eof token
53   // when the FormatTokenSource is a view on a part of the token stream.
54   virtual bool isEOF() = 0;
55 
56   // Gets the current position in the token stream, to be used by setPosition().
57   virtual unsigned getPosition() = 0;
58 
59   // Resets the token stream to the state it was in when getPosition() returned
60   // Position, and return the token at that position in the stream.
61   virtual FormatToken *setPosition(unsigned Position) = 0;
62 };
63 
64 namespace {
65 
66 class ScopedDeclarationState {
67 public:
68   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
69                          bool MustBeDeclaration)
70       : Line(Line), Stack(Stack) {
71     Line.MustBeDeclaration = MustBeDeclaration;
72     Stack.push_back(MustBeDeclaration);
73   }
74   ~ScopedDeclarationState() {
75     Stack.pop_back();
76     if (!Stack.empty())
77       Line.MustBeDeclaration = Stack.back();
78     else
79       Line.MustBeDeclaration = true;
80   }
81 
82 private:
83   UnwrappedLine &Line;
84   llvm::BitVector &Stack;
85 };
86 
87 static bool isLineComment(const FormatToken &FormatTok) {
88   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
89 }
90 
91 // Checks if \p FormatTok is a line comment that continues the line comment
92 // \p Previous. The original column of \p MinColumnToken is used to determine
93 // whether \p FormatTok is indented enough to the right to continue \p Previous.
94 static bool continuesLineComment(const FormatToken &FormatTok,
95                                  const FormatToken *Previous,
96                                  const FormatToken *MinColumnToken) {
97   if (!Previous || !MinColumnToken)
98     return false;
99   unsigned MinContinueColumn =
100       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
101   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
102          isLineComment(*Previous) &&
103          FormatTok.OriginalColumn >= MinContinueColumn;
104 }
105 
106 class ScopedMacroState : public FormatTokenSource {
107 public:
108   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
109                    FormatToken *&ResetToken)
110       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
111         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
112         Token(nullptr), PreviousToken(nullptr) {
113     FakeEOF.Tok.startToken();
114     FakeEOF.Tok.setKind(tok::eof);
115     TokenSource = this;
116     Line.Level = 0;
117     Line.InPPDirective = true;
118   }
119 
120   ~ScopedMacroState() override {
121     TokenSource = PreviousTokenSource;
122     ResetToken = Token;
123     Line.InPPDirective = false;
124     Line.Level = PreviousLineLevel;
125   }
126 
127   FormatToken *getNextToken() override {
128     // The \c UnwrappedLineParser guards against this by never calling
129     // \c getNextToken() after it has encountered the first eof token.
130     assert(!eof());
131     PreviousToken = Token;
132     Token = PreviousTokenSource->getNextToken();
133     if (eof())
134       return &FakeEOF;
135     return Token;
136   }
137 
138   FormatToken *getPreviousToken() override {
139     return PreviousTokenSource->getPreviousToken();
140   }
141 
142   FormatToken *peekNextToken() override {
143     if (eof())
144       return &FakeEOF;
145     return PreviousTokenSource->peekNextToken();
146   }
147 
148   FormatToken *peekNextToken(int N) override {
149     assert(N > 0);
150     if (eof())
151       return &FakeEOF;
152     return PreviousTokenSource->peekNextToken(N);
153   }
154 
155   bool isEOF() override { return PreviousTokenSource->isEOF(); }
156 
157   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
158 
159   FormatToken *setPosition(unsigned Position) override {
160     PreviousToken = nullptr;
161     Token = PreviousTokenSource->setPosition(Position);
162     return Token;
163   }
164 
165 private:
166   bool eof() {
167     return Token && Token->HasUnescapedNewline &&
168            !continuesLineComment(*Token, PreviousToken,
169                                  /*MinColumnToken=*/PreviousToken);
170   }
171 
172   FormatToken FakeEOF;
173   UnwrappedLine &Line;
174   FormatTokenSource *&TokenSource;
175   FormatToken *&ResetToken;
176   unsigned PreviousLineLevel;
177   FormatTokenSource *PreviousTokenSource;
178 
179   FormatToken *Token;
180   FormatToken *PreviousToken;
181 };
182 
183 } // end anonymous namespace
184 
185 class ScopedLineState {
186 public:
187   ScopedLineState(UnwrappedLineParser &Parser,
188                   bool SwitchToPreprocessorLines = false)
189       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
190     if (SwitchToPreprocessorLines)
191       Parser.CurrentLines = &Parser.PreprocessorDirectives;
192     else if (!Parser.Line->Tokens.empty())
193       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
194     PreBlockLine = std::move(Parser.Line);
195     Parser.Line = std::make_unique<UnwrappedLine>();
196     Parser.Line->Level = PreBlockLine->Level;
197     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
198   }
199 
200   ~ScopedLineState() {
201     if (!Parser.Line->Tokens.empty())
202       Parser.addUnwrappedLine();
203     assert(Parser.Line->Tokens.empty());
204     Parser.Line = std::move(PreBlockLine);
205     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
206       Parser.MustBreakBeforeNextToken = true;
207     Parser.CurrentLines = OriginalLines;
208   }
209 
210 private:
211   UnwrappedLineParser &Parser;
212 
213   std::unique_ptr<UnwrappedLine> PreBlockLine;
214   SmallVectorImpl<UnwrappedLine> *OriginalLines;
215 };
216 
217 class CompoundStatementIndenter {
218 public:
219   CompoundStatementIndenter(UnwrappedLineParser *Parser,
220                             const FormatStyle &Style, unsigned &LineLevel)
221       : CompoundStatementIndenter(Parser, LineLevel,
222                                   Style.BraceWrapping.AfterControlStatement,
223                                   Style.BraceWrapping.IndentBraces) {}
224   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
225                             bool WrapBrace, bool IndentBrace)
226       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
227     if (WrapBrace)
228       Parser->addUnwrappedLine();
229     if (IndentBrace)
230       ++LineLevel;
231   }
232   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
233 
234 private:
235   unsigned &LineLevel;
236   unsigned OldLineLevel;
237 };
238 
239 namespace {
240 
241 class IndexedTokenSource : public FormatTokenSource {
242 public:
243   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
244       : Tokens(Tokens), Position(-1) {}
245 
246   FormatToken *getNextToken() override {
247     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
248       LLVM_DEBUG({
249         llvm::dbgs() << "Next ";
250         dbgToken(Position);
251       });
252       return Tokens[Position];
253     }
254     ++Position;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Next ";
257       dbgToken(Position);
258     });
259     return Tokens[Position];
260   }
261 
262   FormatToken *getPreviousToken() override {
263     return Position > 0 ? Tokens[Position - 1] : nullptr;
264   }
265 
266   FormatToken *peekNextToken() override {
267     int Next = Position + 1;
268     LLVM_DEBUG({
269       llvm::dbgs() << "Peeking ";
270       dbgToken(Next);
271     });
272     return Tokens[Next];
273   }
274 
275   FormatToken *peekNextToken(int N) override {
276     assert(N > 0);
277     int Next = Position + N;
278     LLVM_DEBUG({
279       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
280       dbgToken(Next);
281     });
282     return Tokens[Next];
283   }
284 
285   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
286 
287   unsigned getPosition() override {
288     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
289     assert(Position >= 0);
290     return Position;
291   }
292 
293   FormatToken *setPosition(unsigned P) override {
294     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
295     Position = P;
296     return Tokens[Position];
297   }
298 
299   void reset() { Position = -1; }
300 
301 private:
302   void dbgToken(int Position, llvm::StringRef Indent = "") {
303     FormatToken *Tok = Tokens[Position];
304     llvm::dbgs() << Indent << "[" << Position
305                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
306                  << ", Macro: " << !!Tok->MacroCtx << "\n";
307   }
308 
309   ArrayRef<FormatToken *> Tokens;
310   int Position;
311 };
312 
313 } // end anonymous namespace
314 
315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
316                                          const AdditionalKeywords &Keywords,
317                                          unsigned FirstStartColumn,
318                                          ArrayRef<FormatToken *> Tokens,
319                                          UnwrappedLineConsumer &Callback)
320     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
321       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
322       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
323       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
324       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
325                        ? IG_Rejected
326                        : IG_Inited),
327       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
328 
329 void UnwrappedLineParser::reset() {
330   PPBranchLevel = -1;
331   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
332                      ? IG_Rejected
333                      : IG_Inited;
334   IncludeGuardToken = nullptr;
335   Line.reset(new UnwrappedLine);
336   CommentsBeforeNextToken.clear();
337   FormatTok = nullptr;
338   MustBreakBeforeNextToken = false;
339   PreprocessorDirectives.clear();
340   CurrentLines = &Lines;
341   DeclarationScopeStack.clear();
342   NestedTooDeep.clear();
343   PPStack.clear();
344   Line->FirstStartColumn = FirstStartColumn;
345 }
346 
347 void UnwrappedLineParser::parse() {
348   IndexedTokenSource TokenSource(AllTokens);
349   Line->FirstStartColumn = FirstStartColumn;
350   do {
351     LLVM_DEBUG(llvm::dbgs() << "----\n");
352     reset();
353     Tokens = &TokenSource;
354     TokenSource.reset();
355 
356     readToken();
357     parseFile();
358 
359     // If we found an include guard then all preprocessor directives (other than
360     // the guard) are over-indented by one.
361     if (IncludeGuard == IG_Found)
362       for (auto &Line : Lines)
363         if (Line.InPPDirective && Line.Level > 0)
364           --Line.Level;
365 
366     // Create line with eof token.
367     pushToken(FormatTok);
368     addUnwrappedLine();
369 
370     for (const UnwrappedLine &Line : Lines)
371       Callback.consumeUnwrappedLine(Line);
372 
373     Callback.finishRun();
374     Lines.clear();
375     while (!PPLevelBranchIndex.empty() &&
376            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
377       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
378       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
379     }
380     if (!PPLevelBranchIndex.empty()) {
381       ++PPLevelBranchIndex.back();
382       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
383       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
384     }
385   } while (!PPLevelBranchIndex.empty());
386 }
387 
388 void UnwrappedLineParser::parseFile() {
389   // The top-level context in a file always has declarations, except for pre-
390   // processor directives and JavaScript files.
391   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
392   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
393                                           MustBeDeclaration);
394   if (Style.Language == FormatStyle::LK_TextProto)
395     parseBracedList();
396   else
397     parseLevel(/*HasOpeningBrace=*/false, /*CanContainBracedList=*/true);
398   // Make sure to format the remaining tokens.
399   //
400   // LK_TextProto is special since its top-level is parsed as the body of a
401   // braced list, which does not necessarily have natural line separators such
402   // as a semicolon. Comments after the last entry that have been determined to
403   // not belong to that line, as in:
404   //   key: value
405   //   // endfile comment
406   // do not have a chance to be put on a line of their own until this point.
407   // Here we add this newline before end-of-file comments.
408   if (Style.Language == FormatStyle::LK_TextProto &&
409       !CommentsBeforeNextToken.empty())
410     addUnwrappedLine();
411   flushComments(true);
412   addUnwrappedLine();
413 }
414 
415 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
416   do {
417     switch (FormatTok->Tok.getKind()) {
418     case tok::l_brace:
419       return;
420     default:
421       if (FormatTok->is(Keywords.kw_where)) {
422         addUnwrappedLine();
423         nextToken();
424         parseCSharpGenericTypeConstraint();
425         break;
426       }
427       nextToken();
428       break;
429     }
430   } while (!eof());
431 }
432 
433 void UnwrappedLineParser::parseCSharpAttribute() {
434   int UnpairedSquareBrackets = 1;
435   do {
436     switch (FormatTok->Tok.getKind()) {
437     case tok::r_square:
438       nextToken();
439       --UnpairedSquareBrackets;
440       if (UnpairedSquareBrackets == 0) {
441         addUnwrappedLine();
442         return;
443       }
444       break;
445     case tok::l_square:
446       ++UnpairedSquareBrackets;
447       nextToken();
448       break;
449     default:
450       nextToken();
451       break;
452     }
453   } while (!eof());
454 }
455 
456 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
457   if (!Lines.empty() && Lines.back().InPPDirective)
458     return true;
459 
460   const FormatToken *Previous = Tokens->getPreviousToken();
461   return Previous && Previous->is(tok::comment) &&
462          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
463 }
464 
465 /// \brief Parses a level, that is ???.
466 /// \param HasOpeningBrace If that level is started by an opening brace.
467 /// \param CanContainBracedList If the content can contain (at any level) a
468 /// braced list.
469 /// \param NextLBracesType The type for left brace found in this level.
470 /// \returns true if a simple block, or false otherwise. (A simple block has a
471 /// single statement.)
472 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace,
473                                      bool CanContainBracedList,
474                                      IfStmtKind *IfKind,
475                                      TokenType NextLBracesType) {
476   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
477                                   ? TT_BracedListLBrace
478                                   : TT_Unknown;
479   const bool IsPrecededByCommentOrPPDirective =
480       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
481   bool HasLabel = false;
482   unsigned StatementCount = 0;
483   bool SwitchLabelEncountered = false;
484   do {
485     if (FormatTok->getType() == TT_AttributeMacro) {
486       nextToken();
487       continue;
488     }
489     tok::TokenKind kind = FormatTok->Tok.getKind();
490     if (FormatTok->getType() == TT_MacroBlockBegin)
491       kind = tok::l_brace;
492     else if (FormatTok->getType() == TT_MacroBlockEnd)
493       kind = tok::r_brace;
494 
495     auto ParseDefault = [this, HasOpeningBrace, IfKind, NextLevelLBracesType,
496                          &HasLabel, &StatementCount] {
497       parseStructuralElement(IfKind, !HasOpeningBrace, NextLevelLBracesType,
498                              HasLabel ? nullptr : &HasLabel);
499       ++StatementCount;
500       assert(StatementCount > 0 && "StatementCount overflow!");
501     };
502 
503     switch (kind) {
504     case tok::comment:
505       nextToken();
506       addUnwrappedLine();
507       break;
508     case tok::l_brace:
509       if (NextLBracesType != TT_Unknown)
510         FormatTok->setFinalizedType(NextLBracesType);
511       else if (FormatTok->Previous &&
512                FormatTok->Previous->ClosesRequiresClause) {
513         // We need the 'default' case here to correctly parse a function
514         // l_brace.
515         ParseDefault();
516         continue;
517       }
518       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
519           tryToParseBracedList())
520         continue;
521       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
522                  /*MunchSemi=*/true, /*UnindentWhitesmithBraces=*/false,
523                  CanContainBracedList,
524                  /*NextLBracesType=*/NextLBracesType);
525       ++StatementCount;
526       assert(StatementCount > 0 && "StatementCount overflow!");
527       addUnwrappedLine();
528       break;
529     case tok::r_brace:
530       if (HasOpeningBrace) {
531         if (!Style.RemoveBracesLLVM)
532           return false;
533         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
534             IsPrecededByCommentOrPPDirective ||
535             precededByCommentOrPPDirective())
536           return false;
537         const FormatToken *Next = Tokens->peekNextToken();
538         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
539       }
540       nextToken();
541       addUnwrappedLine();
542       break;
543     case tok::kw_default: {
544       unsigned StoredPosition = Tokens->getPosition();
545       FormatToken *Next;
546       do {
547         Next = Tokens->getNextToken();
548         assert(Next);
549       } while (Next->is(tok::comment));
550       FormatTok = Tokens->setPosition(StoredPosition);
551       if (Next->isNot(tok::colon)) {
552         // default not followed by ':' is not a case label; treat it like
553         // an identifier.
554         parseStructuralElement();
555         break;
556       }
557       // Else, if it is 'default:', fall through to the case handling.
558       LLVM_FALLTHROUGH;
559     }
560     case tok::kw_case:
561       if (Style.isJavaScript() && Line->MustBeDeclaration) {
562         // A 'case: string' style field declaration.
563         parseStructuralElement();
564         break;
565       }
566       if (!SwitchLabelEncountered &&
567           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
568         ++Line->Level;
569       SwitchLabelEncountered = true;
570       parseStructuralElement();
571       break;
572     case tok::l_square:
573       if (Style.isCSharp()) {
574         nextToken();
575         parseCSharpAttribute();
576         break;
577       }
578       if (handleCppAttributes())
579         break;
580       LLVM_FALLTHROUGH;
581     default:
582       ParseDefault();
583       break;
584     }
585   } while (!eof());
586   return false;
587 }
588 
589 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
590   // We'll parse forward through the tokens until we hit
591   // a closing brace or eof - note that getNextToken() will
592   // parse macros, so this will magically work inside macro
593   // definitions, too.
594   unsigned StoredPosition = Tokens->getPosition();
595   FormatToken *Tok = FormatTok;
596   const FormatToken *PrevTok = Tok->Previous;
597   // Keep a stack of positions of lbrace tokens. We will
598   // update information about whether an lbrace starts a
599   // braced init list or a different block during the loop.
600   SmallVector<FormatToken *, 8> LBraceStack;
601   assert(Tok->is(tok::l_brace));
602   do {
603     // Get next non-comment token.
604     FormatToken *NextTok;
605     do {
606       NextTok = Tokens->getNextToken();
607     } while (NextTok->is(tok::comment));
608 
609     switch (Tok->Tok.getKind()) {
610     case tok::l_brace:
611       if (Style.isJavaScript() && PrevTok) {
612         if (PrevTok->isOneOf(tok::colon, tok::less))
613           // A ':' indicates this code is in a type, or a braced list
614           // following a label in an object literal ({a: {b: 1}}).
615           // A '<' could be an object used in a comparison, but that is nonsense
616           // code (can never return true), so more likely it is a generic type
617           // argument (`X<{a: string; b: number}>`).
618           // The code below could be confused by semicolons between the
619           // individual members in a type member list, which would normally
620           // trigger BK_Block. In both cases, this must be parsed as an inline
621           // braced init.
622           Tok->setBlockKind(BK_BracedInit);
623         else if (PrevTok->is(tok::r_paren))
624           // `) { }` can only occur in function or method declarations in JS.
625           Tok->setBlockKind(BK_Block);
626       } else {
627         Tok->setBlockKind(BK_Unknown);
628       }
629       LBraceStack.push_back(Tok);
630       break;
631     case tok::r_brace:
632       if (LBraceStack.empty())
633         break;
634       if (LBraceStack.back()->is(BK_Unknown)) {
635         bool ProbablyBracedList = false;
636         if (Style.Language == FormatStyle::LK_Proto) {
637           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
638         } else {
639           // Skip NextTok over preprocessor lines, otherwise we may not
640           // properly diagnose the block as a braced intializer
641           // if the comma separator appears after the pp directive.
642           while (NextTok->is(tok::hash)) {
643             ScopedMacroState MacroState(*Line, Tokens, NextTok);
644             do {
645               NextTok = Tokens->getNextToken();
646             } while (NextTok->isNot(tok::eof));
647           }
648 
649           // Using OriginalColumn to distinguish between ObjC methods and
650           // binary operators is a bit hacky.
651           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
652                                   NextTok->OriginalColumn == 0;
653 
654           // Try to detect a braced list. Note that regardless how we mark inner
655           // braces here, we will overwrite the BlockKind later if we parse a
656           // braced list (where all blocks inside are by default braced lists),
657           // or when we explicitly detect blocks (for example while parsing
658           // lambdas).
659 
660           // If we already marked the opening brace as braced list, the closing
661           // must also be part of it.
662           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
663 
664           ProbablyBracedList = ProbablyBracedList ||
665                                (Style.isJavaScript() &&
666                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
667                                                  Keywords.kw_as));
668           ProbablyBracedList = ProbablyBracedList ||
669                                (Style.isCpp() && NextTok->is(tok::l_paren));
670 
671           // If there is a comma, semicolon or right paren after the closing
672           // brace, we assume this is a braced initializer list.
673           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
674           // braced list in JS.
675           ProbablyBracedList =
676               ProbablyBracedList ||
677               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
678                                tok::r_paren, tok::r_square, tok::l_brace,
679                                tok::ellipsis);
680 
681           ProbablyBracedList =
682               ProbablyBracedList ||
683               (NextTok->is(tok::identifier) &&
684                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
685 
686           ProbablyBracedList = ProbablyBracedList ||
687                                (NextTok->is(tok::semi) &&
688                                 (!ExpectClassBody || LBraceStack.size() != 1));
689 
690           ProbablyBracedList =
691               ProbablyBracedList ||
692               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
693 
694           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
695             // We can have an array subscript after a braced init
696             // list, but C++11 attributes are expected after blocks.
697             NextTok = Tokens->getNextToken();
698             ProbablyBracedList = NextTok->isNot(tok::l_square);
699           }
700         }
701         if (ProbablyBracedList) {
702           Tok->setBlockKind(BK_BracedInit);
703           LBraceStack.back()->setBlockKind(BK_BracedInit);
704         } else {
705           Tok->setBlockKind(BK_Block);
706           LBraceStack.back()->setBlockKind(BK_Block);
707         }
708       }
709       LBraceStack.pop_back();
710       break;
711     case tok::identifier:
712       if (!Tok->is(TT_StatementMacro))
713         break;
714       LLVM_FALLTHROUGH;
715     case tok::at:
716     case tok::semi:
717     case tok::kw_if:
718     case tok::kw_while:
719     case tok::kw_for:
720     case tok::kw_switch:
721     case tok::kw_try:
722     case tok::kw___try:
723       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
724         LBraceStack.back()->setBlockKind(BK_Block);
725       break;
726     default:
727       break;
728     }
729     PrevTok = Tok;
730     Tok = NextTok;
731   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
732 
733   // Assume other blocks for all unclosed opening braces.
734   for (FormatToken *LBrace : LBraceStack)
735     if (LBrace->is(BK_Unknown))
736       LBrace->setBlockKind(BK_Block);
737 
738   FormatTok = Tokens->setPosition(StoredPosition);
739 }
740 
741 template <class T>
742 static inline void hash_combine(std::size_t &seed, const T &v) {
743   std::hash<T> hasher;
744   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
745 }
746 
747 size_t UnwrappedLineParser::computePPHash() const {
748   size_t h = 0;
749   for (const auto &i : PPStack) {
750     hash_combine(h, size_t(i.Kind));
751     hash_combine(h, i.Line);
752   }
753   return h;
754 }
755 
756 // Checks whether \p ParsedLine might fit on a single line. We must clone the
757 // tokens of \p ParsedLine before running the token annotator on it so that we
758 // can restore them afterward.
759 bool UnwrappedLineParser::mightFitOnOneLine(UnwrappedLine &ParsedLine) const {
760   const auto ColumnLimit = Style.ColumnLimit;
761   if (ColumnLimit == 0)
762     return true;
763 
764   auto &Tokens = ParsedLine.Tokens;
765   assert(!Tokens.empty());
766   const auto *LastToken = Tokens.back().Tok;
767   assert(LastToken);
768 
769   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
770 
771   int Index = 0;
772   for (const auto &Token : Tokens) {
773     assert(Token.Tok);
774     auto &SavedToken = SavedTokens[Index++];
775     SavedToken.Tok = new FormatToken;
776     SavedToken.Tok->copyFrom(*Token.Tok);
777     SavedToken.Children = std::move(Token.Children);
778   }
779 
780   AnnotatedLine Line(ParsedLine);
781   assert(Line.Last == LastToken);
782 
783   TokenAnnotator Annotator(Style, Keywords);
784   Annotator.annotate(Line);
785   Annotator.calculateFormattingInformation(Line);
786 
787   const int Length = LastToken->TotalLength;
788 
789   Index = 0;
790   for (auto &Token : Tokens) {
791     const auto &SavedToken = SavedTokens[Index++];
792     Token.Tok->copyFrom(*SavedToken.Tok);
793     Token.Children = std::move(SavedToken.Children);
794     delete SavedToken.Tok;
795   }
796 
797   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
798 }
799 
800 UnwrappedLineParser::IfStmtKind
801 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
802                                 bool MunchSemi, bool UnindentWhitesmithsBraces,
803                                 bool CanContainBracedList,
804                                 TokenType NextLBracesType) {
805   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
806          "'{' or macro block token expected");
807   FormatToken *Tok = FormatTok;
808   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
809   FormatTok->setBlockKind(BK_Block);
810 
811   // For Whitesmiths mode, jump to the next level prior to skipping over the
812   // braces.
813   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
814     ++Line->Level;
815 
816   size_t PPStartHash = computePPHash();
817 
818   unsigned InitialLevel = Line->Level;
819   nextToken(/*LevelDifference=*/AddLevels);
820 
821   if (MacroBlock && FormatTok->is(tok::l_paren))
822     parseParens();
823 
824   size_t NbPreprocessorDirectives =
825       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
826   addUnwrappedLine();
827   size_t OpeningLineIndex =
828       CurrentLines->empty()
829           ? (UnwrappedLine::kInvalidIndex)
830           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
831 
832   // Whitesmiths is weird here. The brace needs to be indented for the namespace
833   // block, but the block itself may not be indented depending on the style
834   // settings. This allows the format to back up one level in those cases.
835   if (UnindentWhitesmithsBraces)
836     --Line->Level;
837 
838   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
839                                           MustBeDeclaration);
840   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
841     Line->Level += AddLevels;
842 
843   IfStmtKind IfKind = IfStmtKind::NotIf;
844   const bool SimpleBlock = parseLevel(
845       /*HasOpeningBrace=*/true, CanContainBracedList, &IfKind, NextLBracesType);
846 
847   if (eof())
848     return IfKind;
849 
850   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
851                  : !FormatTok->is(tok::r_brace)) {
852     Line->Level = InitialLevel;
853     FormatTok->setBlockKind(BK_Block);
854     return IfKind;
855   }
856 
857   if (SimpleBlock && Tok->is(tok::l_brace)) {
858     assert(FormatTok->is(tok::r_brace));
859     const FormatToken *Previous = Tokens->getPreviousToken();
860     assert(Previous);
861     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
862       assert(!CurrentLines->empty());
863       if (mightFitOnOneLine(CurrentLines->back())) {
864         Tok->MatchingParen = FormatTok;
865         FormatTok->MatchingParen = Tok;
866       }
867     }
868   }
869 
870   size_t PPEndHash = computePPHash();
871 
872   // Munch the closing brace.
873   nextToken(/*LevelDifference=*/-AddLevels);
874 
875   if (MacroBlock && FormatTok->is(tok::l_paren))
876     parseParens();
877 
878   if (FormatTok->is(tok::kw_noexcept)) {
879     // A noexcept in a requires expression.
880     nextToken();
881   }
882 
883   if (FormatTok->is(tok::arrow)) {
884     // Following the } or noexcept we can find a trailing return type arrow
885     // as part of an implicit conversion constraint.
886     nextToken();
887     parseStructuralElement();
888   }
889 
890   if (MunchSemi && FormatTok->is(tok::semi))
891     nextToken();
892 
893   Line->Level = InitialLevel;
894 
895   if (PPStartHash == PPEndHash) {
896     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
897     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
898       // Update the opening line to add the forward reference as well
899       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
900           CurrentLines->size() - 1;
901     }
902   }
903 
904   return IfKind;
905 }
906 
907 static bool isGoogScope(const UnwrappedLine &Line) {
908   // FIXME: Closure-library specific stuff should not be hard-coded but be
909   // configurable.
910   if (Line.Tokens.size() < 4)
911     return false;
912   auto I = Line.Tokens.begin();
913   if (I->Tok->TokenText != "goog")
914     return false;
915   ++I;
916   if (I->Tok->isNot(tok::period))
917     return false;
918   ++I;
919   if (I->Tok->TokenText != "scope")
920     return false;
921   ++I;
922   return I->Tok->is(tok::l_paren);
923 }
924 
925 static bool isIIFE(const UnwrappedLine &Line,
926                    const AdditionalKeywords &Keywords) {
927   // Look for the start of an immediately invoked anonymous function.
928   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
929   // This is commonly done in JavaScript to create a new, anonymous scope.
930   // Example: (function() { ... })()
931   if (Line.Tokens.size() < 3)
932     return false;
933   auto I = Line.Tokens.begin();
934   if (I->Tok->isNot(tok::l_paren))
935     return false;
936   ++I;
937   if (I->Tok->isNot(Keywords.kw_function))
938     return false;
939   ++I;
940   return I->Tok->is(tok::l_paren);
941 }
942 
943 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
944                                    const FormatToken &InitialToken) {
945   tok::TokenKind Kind = InitialToken.Tok.getKind();
946   if (InitialToken.is(TT_NamespaceMacro))
947     Kind = tok::kw_namespace;
948 
949   switch (Kind) {
950   case tok::kw_namespace:
951     return Style.BraceWrapping.AfterNamespace;
952   case tok::kw_class:
953     return Style.BraceWrapping.AfterClass;
954   case tok::kw_union:
955     return Style.BraceWrapping.AfterUnion;
956   case tok::kw_struct:
957     return Style.BraceWrapping.AfterStruct;
958   case tok::kw_enum:
959     return Style.BraceWrapping.AfterEnum;
960   default:
961     return false;
962   }
963 }
964 
965 void UnwrappedLineParser::parseChildBlock(
966     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
967   FormatTok->setBlockKind(BK_Block);
968   nextToken();
969   {
970     bool SkipIndent = (Style.isJavaScript() &&
971                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
972     ScopedLineState LineState(*this);
973     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
974                                             /*MustBeDeclaration=*/false);
975     Line->Level += SkipIndent ? 0 : 1;
976     parseLevel(/*HasOpeningBrace=*/true, CanContainBracedList,
977                /*IfKind=*/nullptr, NextLBracesType);
978     flushComments(isOnNewLine(*FormatTok));
979     Line->Level -= SkipIndent ? 0 : 1;
980   }
981   nextToken();
982 }
983 
984 void UnwrappedLineParser::parsePPDirective() {
985   assert(FormatTok->is(tok::hash) && "'#' expected");
986   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
987 
988   nextToken();
989 
990   if (!FormatTok->Tok.getIdentifierInfo()) {
991     parsePPUnknown();
992     return;
993   }
994 
995   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
996   case tok::pp_define:
997     parsePPDefine();
998     return;
999   case tok::pp_if:
1000     parsePPIf(/*IfDef=*/false);
1001     break;
1002   case tok::pp_ifdef:
1003   case tok::pp_ifndef:
1004     parsePPIf(/*IfDef=*/true);
1005     break;
1006   case tok::pp_else:
1007     parsePPElse();
1008     break;
1009   case tok::pp_elifdef:
1010   case tok::pp_elifndef:
1011   case tok::pp_elif:
1012     parsePPElIf();
1013     break;
1014   case tok::pp_endif:
1015     parsePPEndIf();
1016     break;
1017   default:
1018     parsePPUnknown();
1019     break;
1020   }
1021 }
1022 
1023 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1024   size_t Line = CurrentLines->size();
1025   if (CurrentLines == &PreprocessorDirectives)
1026     Line += Lines.size();
1027 
1028   if (Unreachable ||
1029       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
1030     PPStack.push_back({PP_Unreachable, Line});
1031   else
1032     PPStack.push_back({PP_Conditional, Line});
1033 }
1034 
1035 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1036   ++PPBranchLevel;
1037   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1038   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1039     PPLevelBranchIndex.push_back(0);
1040     PPLevelBranchCount.push_back(0);
1041   }
1042   PPChainBranchIndex.push(0);
1043   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1044   conditionalCompilationCondition(Unreachable || Skip);
1045 }
1046 
1047 void UnwrappedLineParser::conditionalCompilationAlternative() {
1048   if (!PPStack.empty())
1049     PPStack.pop_back();
1050   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1051   if (!PPChainBranchIndex.empty())
1052     ++PPChainBranchIndex.top();
1053   conditionalCompilationCondition(
1054       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1055       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1056 }
1057 
1058 void UnwrappedLineParser::conditionalCompilationEnd() {
1059   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1060   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1061     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1062       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1063   }
1064   // Guard against #endif's without #if.
1065   if (PPBranchLevel > -1)
1066     --PPBranchLevel;
1067   if (!PPChainBranchIndex.empty())
1068     PPChainBranchIndex.pop();
1069   if (!PPStack.empty())
1070     PPStack.pop_back();
1071 }
1072 
1073 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1074   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1075   nextToken();
1076   bool Unreachable = false;
1077   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1078     Unreachable = true;
1079   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1080     Unreachable = true;
1081   conditionalCompilationStart(Unreachable);
1082   FormatToken *IfCondition = FormatTok;
1083   // If there's a #ifndef on the first line, and the only lines before it are
1084   // comments, it could be an include guard.
1085   bool MaybeIncludeGuard = IfNDef;
1086   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
1087     for (auto &Line : Lines) {
1088       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1089         MaybeIncludeGuard = false;
1090         IncludeGuard = IG_Rejected;
1091         break;
1092       }
1093     }
1094   --PPBranchLevel;
1095   parsePPUnknown();
1096   ++PPBranchLevel;
1097   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1098     IncludeGuard = IG_IfNdefed;
1099     IncludeGuardToken = IfCondition;
1100   }
1101 }
1102 
1103 void UnwrappedLineParser::parsePPElse() {
1104   // If a potential include guard has an #else, it's not an include guard.
1105   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1106     IncludeGuard = IG_Rejected;
1107   conditionalCompilationAlternative();
1108   if (PPBranchLevel > -1)
1109     --PPBranchLevel;
1110   parsePPUnknown();
1111   ++PPBranchLevel;
1112 }
1113 
1114 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1115 
1116 void UnwrappedLineParser::parsePPEndIf() {
1117   conditionalCompilationEnd();
1118   parsePPUnknown();
1119   // If the #endif of a potential include guard is the last thing in the file,
1120   // then we found an include guard.
1121   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1122       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1123     IncludeGuard = IG_Found;
1124 }
1125 
1126 void UnwrappedLineParser::parsePPDefine() {
1127   nextToken();
1128 
1129   if (!FormatTok->Tok.getIdentifierInfo()) {
1130     IncludeGuard = IG_Rejected;
1131     IncludeGuardToken = nullptr;
1132     parsePPUnknown();
1133     return;
1134   }
1135 
1136   if (IncludeGuard == IG_IfNdefed &&
1137       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1138     IncludeGuard = IG_Defined;
1139     IncludeGuardToken = nullptr;
1140     for (auto &Line : Lines) {
1141       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1142         IncludeGuard = IG_Rejected;
1143         break;
1144       }
1145     }
1146   }
1147 
1148   // In the context of a define, even keywords should be treated as normal
1149   // identifiers. Setting the kind to identifier is not enough, because we need
1150   // to treat additional keywords like __except as well, which are already
1151   // identifiers. Setting the identifier info to null interferes with include
1152   // guard processing above, and changes preprocessing nesting.
1153   FormatTok->Tok.setKind(tok::identifier);
1154   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1155   nextToken();
1156   if (FormatTok->Tok.getKind() == tok::l_paren &&
1157       !FormatTok->hasWhitespaceBefore())
1158     parseParens();
1159   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1160     Line->Level += PPBranchLevel + 1;
1161   addUnwrappedLine();
1162   ++Line->Level;
1163 
1164   // Errors during a preprocessor directive can only affect the layout of the
1165   // preprocessor directive, and thus we ignore them. An alternative approach
1166   // would be to use the same approach we use on the file level (no
1167   // re-indentation if there was a structural error) within the macro
1168   // definition.
1169   parseFile();
1170 }
1171 
1172 void UnwrappedLineParser::parsePPUnknown() {
1173   do {
1174     nextToken();
1175   } while (!eof());
1176   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1177     Line->Level += PPBranchLevel + 1;
1178   addUnwrappedLine();
1179 }
1180 
1181 // Here we exclude certain tokens that are not usually the first token in an
1182 // unwrapped line. This is used in attempt to distinguish macro calls without
1183 // trailing semicolons from other constructs split to several lines.
1184 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1185   // Semicolon can be a null-statement, l_square can be a start of a macro or
1186   // a C++11 attribute, but this doesn't seem to be common.
1187   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1188          Tok.isNot(TT_AttributeSquare) &&
1189          // Tokens that can only be used as binary operators and a part of
1190          // overloaded operator names.
1191          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1192          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1193          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1194          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1195          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1196          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1197          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1198          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1199          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1200          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1201          Tok.isNot(tok::lesslessequal) &&
1202          // Colon is used in labels, base class lists, initializer lists,
1203          // range-based for loops, ternary operator, but should never be the
1204          // first token in an unwrapped line.
1205          Tok.isNot(tok::colon) &&
1206          // 'noexcept' is a trailing annotation.
1207          Tok.isNot(tok::kw_noexcept);
1208 }
1209 
1210 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1211                           const FormatToken *FormatTok) {
1212   // FIXME: This returns true for C/C++ keywords like 'struct'.
1213   return FormatTok->is(tok::identifier) &&
1214          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1215           !FormatTok->isOneOf(
1216               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1217               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1218               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1219               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1220               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1221               Keywords.kw_instanceof, Keywords.kw_interface,
1222               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1223 }
1224 
1225 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1226                                  const FormatToken *FormatTok) {
1227   return FormatTok->Tok.isLiteral() ||
1228          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1229          mustBeJSIdent(Keywords, FormatTok);
1230 }
1231 
1232 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1233 // when encountered after a value (see mustBeJSIdentOrValue).
1234 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1235                            const FormatToken *FormatTok) {
1236   return FormatTok->isOneOf(
1237       tok::kw_return, Keywords.kw_yield,
1238       // conditionals
1239       tok::kw_if, tok::kw_else,
1240       // loops
1241       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1242       // switch/case
1243       tok::kw_switch, tok::kw_case,
1244       // exceptions
1245       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1246       // declaration
1247       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1248       Keywords.kw_async, Keywords.kw_function,
1249       // import/export
1250       Keywords.kw_import, tok::kw_export);
1251 }
1252 
1253 // Checks whether a token is a type in K&R C (aka C78).
1254 static bool isC78Type(const FormatToken &Tok) {
1255   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1256                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1257                      tok::identifier);
1258 }
1259 
1260 // This function checks whether a token starts the first parameter declaration
1261 // in a K&R C (aka C78) function definition, e.g.:
1262 //   int f(a, b)
1263 //   short a, b;
1264 //   {
1265 //      return a + b;
1266 //   }
1267 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1268                                const FormatToken *FuncName) {
1269   assert(Tok);
1270   assert(Next);
1271   assert(FuncName);
1272 
1273   if (FuncName->isNot(tok::identifier))
1274     return false;
1275 
1276   const FormatToken *Prev = FuncName->Previous;
1277   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1278     return false;
1279 
1280   if (!isC78Type(*Tok) &&
1281       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1282     return false;
1283 
1284   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1285     return false;
1286 
1287   Tok = Tok->Previous;
1288   if (!Tok || Tok->isNot(tok::r_paren))
1289     return false;
1290 
1291   Tok = Tok->Previous;
1292   if (!Tok || Tok->isNot(tok::identifier))
1293     return false;
1294 
1295   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1296 }
1297 
1298 void UnwrappedLineParser::parseModuleImport() {
1299   nextToken();
1300   while (!eof()) {
1301     if (FormatTok->is(tok::colon)) {
1302       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1303     }
1304     // Handle import <foo/bar.h> as we would an include statement.
1305     else if (FormatTok->is(tok::less)) {
1306       nextToken();
1307       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1308         // Mark tokens up to the trailing line comments as implicit string
1309         // literals.
1310         if (FormatTok->isNot(tok::comment) &&
1311             !FormatTok->TokenText.startswith("//"))
1312           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1313         nextToken();
1314       }
1315     }
1316     if (FormatTok->is(tok::semi)) {
1317       nextToken();
1318       break;
1319     }
1320     nextToken();
1321   }
1322 
1323   addUnwrappedLine();
1324 }
1325 
1326 // readTokenWithJavaScriptASI reads the next token and terminates the current
1327 // line if JavaScript Automatic Semicolon Insertion must
1328 // happen between the current token and the next token.
1329 //
1330 // This method is conservative - it cannot cover all edge cases of JavaScript,
1331 // but only aims to correctly handle certain well known cases. It *must not*
1332 // return true in speculative cases.
1333 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1334   FormatToken *Previous = FormatTok;
1335   readToken();
1336   FormatToken *Next = FormatTok;
1337 
1338   bool IsOnSameLine =
1339       CommentsBeforeNextToken.empty()
1340           ? Next->NewlinesBefore == 0
1341           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1342   if (IsOnSameLine)
1343     return;
1344 
1345   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1346   bool PreviousStartsTemplateExpr =
1347       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1348   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1349     // If the line contains an '@' sign, the previous token might be an
1350     // annotation, which can precede another identifier/value.
1351     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1352       return LineNode.Tok->is(tok::at);
1353     });
1354     if (HasAt)
1355       return;
1356   }
1357   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1358     return addUnwrappedLine();
1359   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1360   bool NextEndsTemplateExpr =
1361       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1362   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1363       (PreviousMustBeValue ||
1364        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1365                          tok::minusminus)))
1366     return addUnwrappedLine();
1367   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1368       isJSDeclOrStmt(Keywords, Next))
1369     return addUnwrappedLine();
1370 }
1371 
1372 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1373                                                  bool IsTopLevel,
1374                                                  TokenType NextLBracesType,
1375                                                  bool *HasLabel) {
1376   if (Style.Language == FormatStyle::LK_TableGen &&
1377       FormatTok->is(tok::pp_include)) {
1378     nextToken();
1379     if (FormatTok->is(tok::string_literal))
1380       nextToken();
1381     addUnwrappedLine();
1382     return;
1383   }
1384   switch (FormatTok->Tok.getKind()) {
1385   case tok::kw_asm:
1386     nextToken();
1387     if (FormatTok->is(tok::l_brace)) {
1388       FormatTok->setFinalizedType(TT_InlineASMBrace);
1389       nextToken();
1390       while (FormatTok && FormatTok->isNot(tok::eof)) {
1391         if (FormatTok->is(tok::r_brace)) {
1392           FormatTok->setFinalizedType(TT_InlineASMBrace);
1393           nextToken();
1394           addUnwrappedLine();
1395           break;
1396         }
1397         FormatTok->Finalized = true;
1398         nextToken();
1399       }
1400     }
1401     break;
1402   case tok::kw_namespace:
1403     parseNamespace();
1404     return;
1405   case tok::kw_public:
1406   case tok::kw_protected:
1407   case tok::kw_private:
1408     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1409         Style.isCSharp())
1410       nextToken();
1411     else
1412       parseAccessSpecifier();
1413     return;
1414   case tok::kw_if:
1415     if (Style.isJavaScript() && Line->MustBeDeclaration)
1416       // field/method declaration.
1417       break;
1418     parseIfThenElse(IfKind);
1419     return;
1420   case tok::kw_for:
1421   case tok::kw_while:
1422     if (Style.isJavaScript() && Line->MustBeDeclaration)
1423       // field/method declaration.
1424       break;
1425     parseForOrWhileLoop();
1426     return;
1427   case tok::kw_do:
1428     if (Style.isJavaScript() && Line->MustBeDeclaration)
1429       // field/method declaration.
1430       break;
1431     parseDoWhile();
1432     return;
1433   case tok::kw_switch:
1434     if (Style.isJavaScript() && Line->MustBeDeclaration)
1435       // 'switch: string' field declaration.
1436       break;
1437     parseSwitch();
1438     return;
1439   case tok::kw_default:
1440     if (Style.isJavaScript() && Line->MustBeDeclaration)
1441       // 'default: string' field declaration.
1442       break;
1443     nextToken();
1444     if (FormatTok->is(tok::colon)) {
1445       parseLabel();
1446       return;
1447     }
1448     // e.g. "default void f() {}" in a Java interface.
1449     break;
1450   case tok::kw_case:
1451     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1452       // 'case: string' field declaration.
1453       nextToken();
1454       break;
1455     }
1456     parseCaseLabel();
1457     return;
1458   case tok::kw_try:
1459   case tok::kw___try:
1460     if (Style.isJavaScript() && Line->MustBeDeclaration)
1461       // field/method declaration.
1462       break;
1463     parseTryCatch();
1464     return;
1465   case tok::kw_extern:
1466     nextToken();
1467     if (FormatTok->is(tok::string_literal)) {
1468       nextToken();
1469       if (FormatTok->is(tok::l_brace)) {
1470         if (Style.BraceWrapping.AfterExternBlock)
1471           addUnwrappedLine();
1472         // Either we indent or for backwards compatibility we follow the
1473         // AfterExternBlock style.
1474         unsigned AddLevels =
1475             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1476                     (Style.BraceWrapping.AfterExternBlock &&
1477                      Style.IndentExternBlock ==
1478                          FormatStyle::IEBS_AfterExternBlock)
1479                 ? 1u
1480                 : 0u;
1481         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1482         addUnwrappedLine();
1483         return;
1484       }
1485     }
1486     break;
1487   case tok::kw_export:
1488     if (Style.isJavaScript()) {
1489       parseJavaScriptEs6ImportExport();
1490       return;
1491     }
1492     if (!Style.isCpp())
1493       break;
1494     // Handle C++ "(inline|export) namespace".
1495     LLVM_FALLTHROUGH;
1496   case tok::kw_inline:
1497     nextToken();
1498     if (FormatTok->is(tok::kw_namespace)) {
1499       parseNamespace();
1500       return;
1501     }
1502     break;
1503   case tok::identifier:
1504     if (FormatTok->is(TT_ForEachMacro)) {
1505       parseForOrWhileLoop();
1506       return;
1507     }
1508     if (FormatTok->is(TT_MacroBlockBegin)) {
1509       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1510                  /*MunchSemi=*/false);
1511       return;
1512     }
1513     if (FormatTok->is(Keywords.kw_import)) {
1514       if (Style.isJavaScript()) {
1515         parseJavaScriptEs6ImportExport();
1516         return;
1517       }
1518       if (Style.Language == FormatStyle::LK_Proto) {
1519         nextToken();
1520         if (FormatTok->is(tok::kw_public))
1521           nextToken();
1522         if (!FormatTok->is(tok::string_literal))
1523           return;
1524         nextToken();
1525         if (FormatTok->is(tok::semi))
1526           nextToken();
1527         addUnwrappedLine();
1528         return;
1529       }
1530       if (Style.isCpp()) {
1531         parseModuleImport();
1532         return;
1533       }
1534     }
1535     if (Style.isCpp() &&
1536         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1537                            Keywords.kw_slots, Keywords.kw_qslots)) {
1538       nextToken();
1539       if (FormatTok->is(tok::colon)) {
1540         nextToken();
1541         addUnwrappedLine();
1542         return;
1543       }
1544     }
1545     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1546       parseStatementMacro();
1547       return;
1548     }
1549     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1550       parseNamespace();
1551       return;
1552     }
1553     // In all other cases, parse the declaration.
1554     break;
1555   default:
1556     break;
1557   }
1558   do {
1559     const FormatToken *Previous = FormatTok->Previous;
1560     switch (FormatTok->Tok.getKind()) {
1561     case tok::at:
1562       nextToken();
1563       if (FormatTok->is(tok::l_brace)) {
1564         nextToken();
1565         parseBracedList();
1566         break;
1567       } else if (Style.Language == FormatStyle::LK_Java &&
1568                  FormatTok->is(Keywords.kw_interface)) {
1569         nextToken();
1570         break;
1571       }
1572       switch (FormatTok->Tok.getObjCKeywordID()) {
1573       case tok::objc_public:
1574       case tok::objc_protected:
1575       case tok::objc_package:
1576       case tok::objc_private:
1577         return parseAccessSpecifier();
1578       case tok::objc_interface:
1579       case tok::objc_implementation:
1580         return parseObjCInterfaceOrImplementation();
1581       case tok::objc_protocol:
1582         if (parseObjCProtocol())
1583           return;
1584         break;
1585       case tok::objc_end:
1586         return; // Handled by the caller.
1587       case tok::objc_optional:
1588       case tok::objc_required:
1589         nextToken();
1590         addUnwrappedLine();
1591         return;
1592       case tok::objc_autoreleasepool:
1593         nextToken();
1594         if (FormatTok->is(tok::l_brace)) {
1595           if (Style.BraceWrapping.AfterControlStatement ==
1596               FormatStyle::BWACS_Always)
1597             addUnwrappedLine();
1598           parseBlock();
1599         }
1600         addUnwrappedLine();
1601         return;
1602       case tok::objc_synchronized:
1603         nextToken();
1604         if (FormatTok->is(tok::l_paren))
1605           // Skip synchronization object
1606           parseParens();
1607         if (FormatTok->is(tok::l_brace)) {
1608           if (Style.BraceWrapping.AfterControlStatement ==
1609               FormatStyle::BWACS_Always)
1610             addUnwrappedLine();
1611           parseBlock();
1612         }
1613         addUnwrappedLine();
1614         return;
1615       case tok::objc_try:
1616         // This branch isn't strictly necessary (the kw_try case below would
1617         // do this too after the tok::at is parsed above).  But be explicit.
1618         parseTryCatch();
1619         return;
1620       default:
1621         break;
1622       }
1623       break;
1624     case tok::kw_concept:
1625       parseConcept();
1626       return;
1627     case tok::kw_requires: {
1628       if (Style.isCpp()) {
1629         bool ParsedClause = parseRequires();
1630         if (ParsedClause)
1631           return;
1632       } else {
1633         nextToken();
1634       }
1635       break;
1636     }
1637     case tok::kw_enum:
1638       // Ignore if this is part of "template <enum ...".
1639       if (Previous && Previous->is(tok::less)) {
1640         nextToken();
1641         break;
1642       }
1643 
1644       // parseEnum falls through and does not yet add an unwrapped line as an
1645       // enum definition can start a structural element.
1646       if (!parseEnum())
1647         break;
1648       // This only applies for C++.
1649       if (!Style.isCpp()) {
1650         addUnwrappedLine();
1651         return;
1652       }
1653       break;
1654     case tok::kw_typedef:
1655       nextToken();
1656       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1657                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1658                              Keywords.kw_CF_CLOSED_ENUM,
1659                              Keywords.kw_NS_CLOSED_ENUM))
1660         parseEnum();
1661       break;
1662     case tok::kw_struct:
1663     case tok::kw_union:
1664     case tok::kw_class:
1665       if (parseStructLike())
1666         return;
1667       break;
1668     case tok::period:
1669       nextToken();
1670       // In Java, classes have an implicit static member "class".
1671       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1672           FormatTok->is(tok::kw_class))
1673         nextToken();
1674       if (Style.isJavaScript() && FormatTok &&
1675           FormatTok->Tok.getIdentifierInfo())
1676         // JavaScript only has pseudo keywords, all keywords are allowed to
1677         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1678         nextToken();
1679       break;
1680     case tok::semi:
1681       nextToken();
1682       addUnwrappedLine();
1683       return;
1684     case tok::r_brace:
1685       addUnwrappedLine();
1686       return;
1687     case tok::l_paren: {
1688       parseParens();
1689       // Break the unwrapped line if a K&R C function definition has a parameter
1690       // declaration.
1691       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1692         break;
1693       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1694         addUnwrappedLine();
1695         return;
1696       }
1697       break;
1698     }
1699     case tok::kw_operator:
1700       nextToken();
1701       if (FormatTok->isBinaryOperator())
1702         nextToken();
1703       break;
1704     case tok::caret:
1705       nextToken();
1706       if (FormatTok->Tok.isAnyIdentifier() ||
1707           FormatTok->isSimpleTypeSpecifier())
1708         nextToken();
1709       if (FormatTok->is(tok::l_paren))
1710         parseParens();
1711       if (FormatTok->is(tok::l_brace))
1712         parseChildBlock();
1713       break;
1714     case tok::l_brace:
1715       if (NextLBracesType != TT_Unknown)
1716         FormatTok->setFinalizedType(NextLBracesType);
1717       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1718         // A block outside of parentheses must be the last part of a
1719         // structural element.
1720         // FIXME: Figure out cases where this is not true, and add projections
1721         // for them (the one we know is missing are lambdas).
1722         if (Style.Language == FormatStyle::LK_Java &&
1723             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1724           // If necessary, we could set the type to something different than
1725           // TT_FunctionLBrace.
1726           if (Style.BraceWrapping.AfterControlStatement ==
1727               FormatStyle::BWACS_Always)
1728             addUnwrappedLine();
1729         } else if (Style.BraceWrapping.AfterFunction) {
1730           addUnwrappedLine();
1731         }
1732         if (!Line->InPPDirective)
1733           FormatTok->setFinalizedType(TT_FunctionLBrace);
1734         parseBlock();
1735         addUnwrappedLine();
1736         return;
1737       }
1738       // Otherwise this was a braced init list, and the structural
1739       // element continues.
1740       break;
1741     case tok::kw_try:
1742       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1743         // field/method declaration.
1744         nextToken();
1745         break;
1746       }
1747       // We arrive here when parsing function-try blocks.
1748       if (Style.BraceWrapping.AfterFunction)
1749         addUnwrappedLine();
1750       parseTryCatch();
1751       return;
1752     case tok::identifier: {
1753       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1754           Line->MustBeDeclaration) {
1755         addUnwrappedLine();
1756         parseCSharpGenericTypeConstraint();
1757         break;
1758       }
1759       if (FormatTok->is(TT_MacroBlockEnd)) {
1760         addUnwrappedLine();
1761         return;
1762       }
1763 
1764       // Function declarations (as opposed to function expressions) are parsed
1765       // on their own unwrapped line by continuing this loop. Function
1766       // expressions (functions that are not on their own line) must not create
1767       // a new unwrapped line, so they are special cased below.
1768       size_t TokenCount = Line->Tokens.size();
1769       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1770           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1771                                                      Keywords.kw_async)))) {
1772         tryToParseJSFunction();
1773         break;
1774       }
1775       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1776           FormatTok->is(Keywords.kw_interface)) {
1777         if (Style.isJavaScript()) {
1778           // In JavaScript/TypeScript, "interface" can be used as a standalone
1779           // identifier, e.g. in `var interface = 1;`. If "interface" is
1780           // followed by another identifier, it is very like to be an actual
1781           // interface declaration.
1782           unsigned StoredPosition = Tokens->getPosition();
1783           FormatToken *Next = Tokens->getNextToken();
1784           FormatTok = Tokens->setPosition(StoredPosition);
1785           if (!mustBeJSIdent(Keywords, Next)) {
1786             nextToken();
1787             break;
1788           }
1789         }
1790         parseRecord();
1791         addUnwrappedLine();
1792         return;
1793       }
1794 
1795       if (FormatTok->is(Keywords.kw_interface)) {
1796         if (parseStructLike())
1797           return;
1798         break;
1799       }
1800 
1801       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1802         parseStatementMacro();
1803         return;
1804       }
1805 
1806       // See if the following token should start a new unwrapped line.
1807       StringRef Text = FormatTok->TokenText;
1808 
1809       FormatToken *PreviousToken = FormatTok;
1810       nextToken();
1811 
1812       // JS doesn't have macros, and within classes colons indicate fields, not
1813       // labels.
1814       if (Style.isJavaScript())
1815         break;
1816 
1817       TokenCount = Line->Tokens.size();
1818       if (TokenCount == 1 ||
1819           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1820         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1821           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1822           parseLabel(!Style.IndentGotoLabels);
1823           if (HasLabel)
1824             *HasLabel = true;
1825           return;
1826         }
1827         // Recognize function-like macro usages without trailing semicolon as
1828         // well as free-standing macros like Q_OBJECT.
1829         bool FunctionLike = FormatTok->is(tok::l_paren);
1830         if (FunctionLike)
1831           parseParens();
1832 
1833         bool FollowedByNewline =
1834             CommentsBeforeNextToken.empty()
1835                 ? FormatTok->NewlinesBefore > 0
1836                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1837 
1838         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1839             tokenCanStartNewLine(*FormatTok) && Text == Text.upper() &&
1840             !PreviousToken->isTypeFinalized()) {
1841           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1842           addUnwrappedLine();
1843           return;
1844         }
1845       }
1846       break;
1847     }
1848     case tok::equal:
1849       if ((Style.isJavaScript() || Style.isCSharp()) &&
1850           FormatTok->is(TT_FatArrow)) {
1851         tryToParseChildBlock();
1852         break;
1853       }
1854 
1855       nextToken();
1856       if (FormatTok->is(tok::l_brace)) {
1857         // Block kind should probably be set to BK_BracedInit for any language.
1858         // C# needs this change to ensure that array initialisers and object
1859         // initialisers are indented the same way.
1860         if (Style.isCSharp())
1861           FormatTok->setBlockKind(BK_BracedInit);
1862         nextToken();
1863         parseBracedList();
1864       } else if (Style.Language == FormatStyle::LK_Proto &&
1865                  FormatTok->is(tok::less)) {
1866         nextToken();
1867         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1868                         /*ClosingBraceKind=*/tok::greater);
1869       }
1870       break;
1871     case tok::l_square:
1872       parseSquare();
1873       break;
1874     case tok::kw_new:
1875       parseNew();
1876       break;
1877     case tok::kw_case:
1878       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1879         // 'case: string' field declaration.
1880         nextToken();
1881         break;
1882       }
1883       parseCaseLabel();
1884       break;
1885     default:
1886       nextToken();
1887       break;
1888     }
1889   } while (!eof());
1890 }
1891 
1892 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1893   assert(FormatTok->is(tok::l_brace));
1894   if (!Style.isCSharp())
1895     return false;
1896   // See if it's a property accessor.
1897   if (FormatTok->Previous->isNot(tok::identifier))
1898     return false;
1899 
1900   // See if we are inside a property accessor.
1901   //
1902   // Record the current tokenPosition so that we can advance and
1903   // reset the current token. `Next` is not set yet so we need
1904   // another way to advance along the token stream.
1905   unsigned int StoredPosition = Tokens->getPosition();
1906   FormatToken *Tok = Tokens->getNextToken();
1907 
1908   // A trivial property accessor is of the form:
1909   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1910   // Track these as they do not require line breaks to be introduced.
1911   bool HasSpecialAccessor = false;
1912   bool IsTrivialPropertyAccessor = true;
1913   while (!eof()) {
1914     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1915                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1916                      Keywords.kw_init, Keywords.kw_set)) {
1917       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1918         HasSpecialAccessor = true;
1919       Tok = Tokens->getNextToken();
1920       continue;
1921     }
1922     if (Tok->isNot(tok::r_brace))
1923       IsTrivialPropertyAccessor = false;
1924     break;
1925   }
1926 
1927   if (!HasSpecialAccessor) {
1928     Tokens->setPosition(StoredPosition);
1929     return false;
1930   }
1931 
1932   // Try to parse the property accessor:
1933   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1934   Tokens->setPosition(StoredPosition);
1935   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1936     addUnwrappedLine();
1937   nextToken();
1938   do {
1939     switch (FormatTok->Tok.getKind()) {
1940     case tok::r_brace:
1941       nextToken();
1942       if (FormatTok->is(tok::equal)) {
1943         while (!eof() && FormatTok->isNot(tok::semi))
1944           nextToken();
1945         nextToken();
1946       }
1947       addUnwrappedLine();
1948       return true;
1949     case tok::l_brace:
1950       ++Line->Level;
1951       parseBlock(/*MustBeDeclaration=*/true);
1952       addUnwrappedLine();
1953       --Line->Level;
1954       break;
1955     case tok::equal:
1956       if (FormatTok->is(TT_FatArrow)) {
1957         ++Line->Level;
1958         do {
1959           nextToken();
1960         } while (!eof() && FormatTok->isNot(tok::semi));
1961         nextToken();
1962         addUnwrappedLine();
1963         --Line->Level;
1964         break;
1965       }
1966       nextToken();
1967       break;
1968     default:
1969       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
1970                              Keywords.kw_set) &&
1971           !IsTrivialPropertyAccessor) {
1972         // Non-trivial get/set needs to be on its own line.
1973         addUnwrappedLine();
1974       }
1975       nextToken();
1976     }
1977   } while (!eof());
1978 
1979   // Unreachable for well-formed code (paired '{' and '}').
1980   return true;
1981 }
1982 
1983 bool UnwrappedLineParser::tryToParseLambda() {
1984   assert(FormatTok->is(tok::l_square));
1985   if (!Style.isCpp()) {
1986     nextToken();
1987     return false;
1988   }
1989   FormatToken &LSquare = *FormatTok;
1990   if (!tryToParseLambdaIntroducer())
1991     return false;
1992 
1993   bool SeenArrow = false;
1994   bool InTemplateParameterList = false;
1995 
1996   while (FormatTok->isNot(tok::l_brace)) {
1997     if (FormatTok->isSimpleTypeSpecifier()) {
1998       nextToken();
1999       continue;
2000     }
2001     switch (FormatTok->Tok.getKind()) {
2002     case tok::l_brace:
2003       break;
2004     case tok::l_paren:
2005       parseParens();
2006       break;
2007     case tok::l_square:
2008       parseSquare();
2009       break;
2010     case tok::kw_class:
2011     case tok::kw_template:
2012     case tok::kw_typename:
2013       assert(FormatTok->Previous);
2014       if (FormatTok->Previous->is(tok::less))
2015         InTemplateParameterList = true;
2016       nextToken();
2017       break;
2018     case tok::amp:
2019     case tok::star:
2020     case tok::kw_const:
2021     case tok::comma:
2022     case tok::less:
2023     case tok::greater:
2024     case tok::identifier:
2025     case tok::numeric_constant:
2026     case tok::coloncolon:
2027     case tok::kw_mutable:
2028     case tok::kw_noexcept:
2029       nextToken();
2030       break;
2031     // Specialization of a template with an integer parameter can contain
2032     // arithmetic, logical, comparison and ternary operators.
2033     //
2034     // FIXME: This also accepts sequences of operators that are not in the scope
2035     // of a template argument list.
2036     //
2037     // In a C++ lambda a template type can only occur after an arrow. We use
2038     // this as an heuristic to distinguish between Objective-C expressions
2039     // followed by an `a->b` expression, such as:
2040     // ([obj func:arg] + a->b)
2041     // Otherwise the code below would parse as a lambda.
2042     //
2043     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2044     // explicit template lists: []<bool b = true && false>(U &&u){}
2045     case tok::plus:
2046     case tok::minus:
2047     case tok::exclaim:
2048     case tok::tilde:
2049     case tok::slash:
2050     case tok::percent:
2051     case tok::lessless:
2052     case tok::pipe:
2053     case tok::pipepipe:
2054     case tok::ampamp:
2055     case tok::caret:
2056     case tok::equalequal:
2057     case tok::exclaimequal:
2058     case tok::greaterequal:
2059     case tok::lessequal:
2060     case tok::question:
2061     case tok::colon:
2062     case tok::ellipsis:
2063     case tok::kw_true:
2064     case tok::kw_false:
2065       if (SeenArrow || InTemplateParameterList) {
2066         nextToken();
2067         break;
2068       }
2069       return true;
2070     case tok::arrow:
2071       // This might or might not actually be a lambda arrow (this could be an
2072       // ObjC method invocation followed by a dereferencing arrow). We might
2073       // reset this back to TT_Unknown in TokenAnnotator.
2074       FormatTok->setFinalizedType(TT_LambdaArrow);
2075       SeenArrow = true;
2076       nextToken();
2077       break;
2078     default:
2079       return true;
2080     }
2081   }
2082   FormatTok->setFinalizedType(TT_LambdaLBrace);
2083   LSquare.setFinalizedType(TT_LambdaLSquare);
2084   parseChildBlock();
2085   return true;
2086 }
2087 
2088 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2089   const FormatToken *Previous = FormatTok->Previous;
2090   const FormatToken *LeftSquare = FormatTok;
2091   nextToken();
2092   if (Previous &&
2093       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2094                          tok::kw_delete, tok::l_square) ||
2095        LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2096        Previous->isSimpleTypeSpecifier())) {
2097     return false;
2098   }
2099   if (FormatTok->is(tok::l_square))
2100     return false;
2101   if (FormatTok->is(tok::r_square)) {
2102     const FormatToken *Next = Tokens->peekNextToken();
2103     if (Next->is(tok::greater))
2104       return false;
2105   }
2106   parseSquare(/*LambdaIntroducer=*/true);
2107   return true;
2108 }
2109 
2110 void UnwrappedLineParser::tryToParseJSFunction() {
2111   assert(FormatTok->is(Keywords.kw_function) ||
2112          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2113   if (FormatTok->is(Keywords.kw_async))
2114     nextToken();
2115   // Consume "function".
2116   nextToken();
2117 
2118   // Consume * (generator function). Treat it like C++'s overloaded operators.
2119   if (FormatTok->is(tok::star)) {
2120     FormatTok->setFinalizedType(TT_OverloadedOperator);
2121     nextToken();
2122   }
2123 
2124   // Consume function name.
2125   if (FormatTok->is(tok::identifier))
2126     nextToken();
2127 
2128   if (FormatTok->isNot(tok::l_paren))
2129     return;
2130 
2131   // Parse formal parameter list.
2132   parseParens();
2133 
2134   if (FormatTok->is(tok::colon)) {
2135     // Parse a type definition.
2136     nextToken();
2137 
2138     // Eat the type declaration. For braced inline object types, balance braces,
2139     // otherwise just parse until finding an l_brace for the function body.
2140     if (FormatTok->is(tok::l_brace))
2141       tryToParseBracedList();
2142     else
2143       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2144         nextToken();
2145   }
2146 
2147   if (FormatTok->is(tok::semi))
2148     return;
2149 
2150   parseChildBlock();
2151 }
2152 
2153 bool UnwrappedLineParser::tryToParseBracedList() {
2154   if (FormatTok->is(BK_Unknown))
2155     calculateBraceTypes();
2156   assert(FormatTok->isNot(BK_Unknown));
2157   if (FormatTok->is(BK_Block))
2158     return false;
2159   nextToken();
2160   parseBracedList();
2161   return true;
2162 }
2163 
2164 bool UnwrappedLineParser::tryToParseChildBlock() {
2165   assert(Style.isJavaScript() || Style.isCSharp());
2166   assert(FormatTok->is(TT_FatArrow));
2167   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2168   // They always start an expression or a child block if followed by a curly
2169   // brace.
2170   nextToken();
2171   if (FormatTok->isNot(tok::l_brace))
2172     return false;
2173   parseChildBlock();
2174   return true;
2175 }
2176 
2177 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2178                                           bool IsEnum,
2179                                           tok::TokenKind ClosingBraceKind) {
2180   bool HasError = false;
2181 
2182   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2183   // replace this by using parseAssignmentExpression() inside.
2184   do {
2185     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2186         tryToParseChildBlock())
2187       continue;
2188     if (Style.isJavaScript()) {
2189       if (FormatTok->is(Keywords.kw_function) ||
2190           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2191         tryToParseJSFunction();
2192         continue;
2193       }
2194       if (FormatTok->is(tok::l_brace)) {
2195         // Could be a method inside of a braced list `{a() { return 1; }}`.
2196         if (tryToParseBracedList())
2197           continue;
2198         parseChildBlock();
2199       }
2200     }
2201     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2202       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2203         addUnwrappedLine();
2204       nextToken();
2205       return !HasError;
2206     }
2207     switch (FormatTok->Tok.getKind()) {
2208     case tok::l_square:
2209       if (Style.isCSharp())
2210         parseSquare();
2211       else
2212         tryToParseLambda();
2213       break;
2214     case tok::l_paren:
2215       parseParens();
2216       // JavaScript can just have free standing methods and getters/setters in
2217       // object literals. Detect them by a "{" following ")".
2218       if (Style.isJavaScript()) {
2219         if (FormatTok->is(tok::l_brace))
2220           parseChildBlock();
2221         break;
2222       }
2223       break;
2224     case tok::l_brace:
2225       // Assume there are no blocks inside a braced init list apart
2226       // from the ones we explicitly parse out (like lambdas).
2227       FormatTok->setBlockKind(BK_BracedInit);
2228       nextToken();
2229       parseBracedList();
2230       break;
2231     case tok::less:
2232       if (Style.Language == FormatStyle::LK_Proto ||
2233           ClosingBraceKind == tok::greater) {
2234         nextToken();
2235         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2236                         /*ClosingBraceKind=*/tok::greater);
2237       } else {
2238         nextToken();
2239       }
2240       break;
2241     case tok::semi:
2242       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2243       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2244       // used for error recovery if we have otherwise determined that this is
2245       // a braced list.
2246       if (Style.isJavaScript()) {
2247         nextToken();
2248         break;
2249       }
2250       HasError = true;
2251       if (!ContinueOnSemicolons)
2252         return !HasError;
2253       nextToken();
2254       break;
2255     case tok::comma:
2256       nextToken();
2257       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2258         addUnwrappedLine();
2259       break;
2260     default:
2261       nextToken();
2262       break;
2263     }
2264   } while (!eof());
2265   return false;
2266 }
2267 
2268 /// \brief Parses a pair of parentheses (and everything between them).
2269 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2270 /// double ampersands. This only counts for the current parens scope.
2271 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2272   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2273   nextToken();
2274   do {
2275     switch (FormatTok->Tok.getKind()) {
2276     case tok::l_paren:
2277       parseParens();
2278       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2279         parseChildBlock();
2280       break;
2281     case tok::r_paren:
2282       nextToken();
2283       return;
2284     case tok::r_brace:
2285       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2286       return;
2287     case tok::l_square:
2288       tryToParseLambda();
2289       break;
2290     case tok::l_brace:
2291       if (!tryToParseBracedList())
2292         parseChildBlock();
2293       break;
2294     case tok::at:
2295       nextToken();
2296       if (FormatTok->is(tok::l_brace)) {
2297         nextToken();
2298         parseBracedList();
2299       }
2300       break;
2301     case tok::equal:
2302       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2303         tryToParseChildBlock();
2304       else
2305         nextToken();
2306       break;
2307     case tok::kw_class:
2308       if (Style.isJavaScript())
2309         parseRecord(/*ParseAsExpr=*/true);
2310       else
2311         nextToken();
2312       break;
2313     case tok::identifier:
2314       if (Style.isJavaScript() &&
2315           (FormatTok->is(Keywords.kw_function) ||
2316            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2317         tryToParseJSFunction();
2318       else
2319         nextToken();
2320       break;
2321     case tok::kw_requires: {
2322       auto RequiresToken = FormatTok;
2323       nextToken();
2324       parseRequiresExpression(RequiresToken);
2325       break;
2326     }
2327     case tok::ampamp:
2328       if (AmpAmpTokenType != TT_Unknown)
2329         FormatTok->setFinalizedType(AmpAmpTokenType);
2330       LLVM_FALLTHROUGH;
2331     default:
2332       nextToken();
2333       break;
2334     }
2335   } while (!eof());
2336 }
2337 
2338 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2339   if (!LambdaIntroducer) {
2340     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2341     if (tryToParseLambda())
2342       return;
2343   }
2344   do {
2345     switch (FormatTok->Tok.getKind()) {
2346     case tok::l_paren:
2347       parseParens();
2348       break;
2349     case tok::r_square:
2350       nextToken();
2351       return;
2352     case tok::r_brace:
2353       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2354       return;
2355     case tok::l_square:
2356       parseSquare();
2357       break;
2358     case tok::l_brace: {
2359       if (!tryToParseBracedList())
2360         parseChildBlock();
2361       break;
2362     }
2363     case tok::at:
2364       nextToken();
2365       if (FormatTok->is(tok::l_brace)) {
2366         nextToken();
2367         parseBracedList();
2368       }
2369       break;
2370     default:
2371       nextToken();
2372       break;
2373     }
2374   } while (!eof());
2375 }
2376 
2377 void UnwrappedLineParser::keepAncestorBraces() {
2378   if (!Style.RemoveBracesLLVM)
2379     return;
2380 
2381   const int MaxNestingLevels = 2;
2382   const int Size = NestedTooDeep.size();
2383   if (Size >= MaxNestingLevels)
2384     NestedTooDeep[Size - MaxNestingLevels] = true;
2385   NestedTooDeep.push_back(false);
2386 }
2387 
2388 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2389   for (const auto &Token : llvm::reverse(Line.Tokens))
2390     if (Token.Tok->isNot(tok::comment))
2391       return Token.Tok;
2392 
2393   return nullptr;
2394 }
2395 
2396 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2397   FormatToken *Tok = nullptr;
2398 
2399   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2400       PreprocessorDirectives.empty()) {
2401     Tok = getLastNonComment(*Line);
2402     assert(Tok);
2403     if (Tok->BraceCount < 0) {
2404       assert(Tok->BraceCount == -1);
2405       Tok = nullptr;
2406     } else {
2407       Tok->BraceCount = -1;
2408     }
2409   }
2410 
2411   addUnwrappedLine();
2412   ++Line->Level;
2413   parseStructuralElement();
2414 
2415   if (Tok) {
2416     assert(!Line->InPPDirective);
2417     Tok = nullptr;
2418     for (const auto &L : llvm::reverse(*CurrentLines)) {
2419       if (!L.InPPDirective && getLastNonComment(L)) {
2420         Tok = L.Tokens.back().Tok;
2421         break;
2422       }
2423     }
2424     assert(Tok);
2425     ++Tok->BraceCount;
2426   }
2427 
2428   if (CheckEOF && FormatTok->is(tok::eof))
2429     addUnwrappedLine();
2430 
2431   --Line->Level;
2432 }
2433 
2434 static void markOptionalBraces(FormatToken *LeftBrace) {
2435   if (!LeftBrace)
2436     return;
2437 
2438   assert(LeftBrace->is(tok::l_brace));
2439 
2440   FormatToken *RightBrace = LeftBrace->MatchingParen;
2441   if (!RightBrace) {
2442     assert(!LeftBrace->Optional);
2443     return;
2444   }
2445 
2446   assert(RightBrace->is(tok::r_brace));
2447   assert(RightBrace->MatchingParen == LeftBrace);
2448   assert(LeftBrace->Optional == RightBrace->Optional);
2449 
2450   LeftBrace->Optional = true;
2451   RightBrace->Optional = true;
2452 }
2453 
2454 void UnwrappedLineParser::handleAttributes() {
2455   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2456   if (FormatTok->is(TT_AttributeMacro))
2457     nextToken();
2458   handleCppAttributes();
2459 }
2460 
2461 bool UnwrappedLineParser::handleCppAttributes() {
2462   // Handle [[likely]] / [[unlikely]] attributes.
2463   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2464     parseSquare();
2465     return true;
2466   }
2467   return false;
2468 }
2469 
2470 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2471                                                   bool KeepBraces) {
2472   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2473   nextToken();
2474   if (FormatTok->is(tok::exclaim))
2475     nextToken();
2476 
2477   bool KeepIfBraces = false;
2478   bool KeepElseBraces = false;
2479   if (FormatTok->is(tok::kw_consteval)) {
2480     KeepIfBraces = true;
2481     KeepElseBraces = true;
2482     nextToken();
2483   } else {
2484     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2485       nextToken();
2486     if (FormatTok->is(tok::l_paren))
2487       parseParens();
2488   }
2489   handleAttributes();
2490 
2491   bool NeedsUnwrappedLine = false;
2492   keepAncestorBraces();
2493 
2494   FormatToken *IfLeftBrace = nullptr;
2495   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2496 
2497   if (FormatTok->is(tok::l_brace)) {
2498     IfLeftBrace = FormatTok;
2499     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2500     IfBlockKind = parseBlock();
2501     if (Style.BraceWrapping.BeforeElse)
2502       addUnwrappedLine();
2503     else
2504       NeedsUnwrappedLine = true;
2505   } else {
2506     parseUnbracedBody();
2507   }
2508 
2509   if (Style.RemoveBracesLLVM) {
2510     assert(!NestedTooDeep.empty());
2511     KeepIfBraces = KeepIfBraces ||
2512                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2513                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2514                    IfBlockKind == IfStmtKind::IfElseIf;
2515   }
2516 
2517   FormatToken *ElseLeftBrace = nullptr;
2518   IfStmtKind Kind = IfStmtKind::IfOnly;
2519 
2520   if (FormatTok->is(tok::kw_else)) {
2521     if (Style.RemoveBracesLLVM) {
2522       NestedTooDeep.back() = false;
2523       Kind = IfStmtKind::IfElse;
2524     }
2525     nextToken();
2526     handleAttributes();
2527     if (FormatTok->is(tok::l_brace)) {
2528       ElseLeftBrace = FormatTok;
2529       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2530       if (parseBlock() == IfStmtKind::IfOnly)
2531         Kind = IfStmtKind::IfElseIf;
2532       addUnwrappedLine();
2533     } else if (FormatTok->is(tok::kw_if)) {
2534       const FormatToken *Previous = Tokens->getPreviousToken();
2535       assert(Previous);
2536       const bool IsPrecededByComment = Previous->is(tok::comment);
2537       if (IsPrecededByComment) {
2538         addUnwrappedLine();
2539         ++Line->Level;
2540       }
2541       bool TooDeep = true;
2542       if (Style.RemoveBracesLLVM) {
2543         Kind = IfStmtKind::IfElseIf;
2544         TooDeep = NestedTooDeep.pop_back_val();
2545       }
2546       ElseLeftBrace =
2547           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2548       if (Style.RemoveBracesLLVM)
2549         NestedTooDeep.push_back(TooDeep);
2550       if (IsPrecededByComment)
2551         --Line->Level;
2552     } else {
2553       parseUnbracedBody(/*CheckEOF=*/true);
2554     }
2555   } else {
2556     if (Style.RemoveBracesLLVM)
2557       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2558     if (NeedsUnwrappedLine)
2559       addUnwrappedLine();
2560   }
2561 
2562   if (!Style.RemoveBracesLLVM)
2563     return nullptr;
2564 
2565   assert(!NestedTooDeep.empty());
2566   KeepElseBraces = KeepElseBraces ||
2567                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2568                    NestedTooDeep.back();
2569 
2570   NestedTooDeep.pop_back();
2571 
2572   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2573     markOptionalBraces(IfLeftBrace);
2574     markOptionalBraces(ElseLeftBrace);
2575   } else if (IfLeftBrace) {
2576     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2577     if (IfRightBrace) {
2578       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2579       assert(!IfLeftBrace->Optional);
2580       assert(!IfRightBrace->Optional);
2581       IfLeftBrace->MatchingParen = nullptr;
2582       IfRightBrace->MatchingParen = nullptr;
2583     }
2584   }
2585 
2586   if (IfKind)
2587     *IfKind = Kind;
2588 
2589   return IfLeftBrace;
2590 }
2591 
2592 void UnwrappedLineParser::parseTryCatch() {
2593   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2594   nextToken();
2595   bool NeedsUnwrappedLine = false;
2596   if (FormatTok->is(tok::colon)) {
2597     // We are in a function try block, what comes is an initializer list.
2598     nextToken();
2599 
2600     // In case identifiers were removed by clang-tidy, what might follow is
2601     // multiple commas in sequence - before the first identifier.
2602     while (FormatTok->is(tok::comma))
2603       nextToken();
2604 
2605     while (FormatTok->is(tok::identifier)) {
2606       nextToken();
2607       if (FormatTok->is(tok::l_paren))
2608         parseParens();
2609       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2610           FormatTok->is(tok::l_brace)) {
2611         do {
2612           nextToken();
2613         } while (!FormatTok->is(tok::r_brace));
2614         nextToken();
2615       }
2616 
2617       // In case identifiers were removed by clang-tidy, what might follow is
2618       // multiple commas in sequence - after the first identifier.
2619       while (FormatTok->is(tok::comma))
2620         nextToken();
2621     }
2622   }
2623   // Parse try with resource.
2624   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2625     parseParens();
2626 
2627   keepAncestorBraces();
2628 
2629   if (FormatTok->is(tok::l_brace)) {
2630     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2631     parseBlock();
2632     if (Style.BraceWrapping.BeforeCatch)
2633       addUnwrappedLine();
2634     else
2635       NeedsUnwrappedLine = true;
2636   } else if (!FormatTok->is(tok::kw_catch)) {
2637     // The C++ standard requires a compound-statement after a try.
2638     // If there's none, we try to assume there's a structuralElement
2639     // and try to continue.
2640     addUnwrappedLine();
2641     ++Line->Level;
2642     parseStructuralElement();
2643     --Line->Level;
2644   }
2645   while (true) {
2646     if (FormatTok->is(tok::at))
2647       nextToken();
2648     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2649                              tok::kw___finally) ||
2650           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2651            FormatTok->is(Keywords.kw_finally)) ||
2652           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2653            FormatTok->isObjCAtKeyword(tok::objc_finally))))
2654       break;
2655     nextToken();
2656     while (FormatTok->isNot(tok::l_brace)) {
2657       if (FormatTok->is(tok::l_paren)) {
2658         parseParens();
2659         continue;
2660       }
2661       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2662         if (Style.RemoveBracesLLVM)
2663           NestedTooDeep.pop_back();
2664         return;
2665       }
2666       nextToken();
2667     }
2668     NeedsUnwrappedLine = false;
2669     Line->MustBeDeclaration = false;
2670     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2671     parseBlock();
2672     if (Style.BraceWrapping.BeforeCatch)
2673       addUnwrappedLine();
2674     else
2675       NeedsUnwrappedLine = true;
2676   }
2677 
2678   if (Style.RemoveBracesLLVM)
2679     NestedTooDeep.pop_back();
2680 
2681   if (NeedsUnwrappedLine)
2682     addUnwrappedLine();
2683 }
2684 
2685 void UnwrappedLineParser::parseNamespace() {
2686   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2687          "'namespace' expected");
2688 
2689   const FormatToken &InitialToken = *FormatTok;
2690   nextToken();
2691   if (InitialToken.is(TT_NamespaceMacro)) {
2692     parseParens();
2693   } else {
2694     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2695                               tok::l_square, tok::period, tok::l_paren) ||
2696            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2697       if (FormatTok->is(tok::l_square))
2698         parseSquare();
2699       else if (FormatTok->is(tok::l_paren))
2700         parseParens();
2701       else
2702         nextToken();
2703   }
2704   if (FormatTok->is(tok::l_brace)) {
2705     if (ShouldBreakBeforeBrace(Style, InitialToken))
2706       addUnwrappedLine();
2707 
2708     unsigned AddLevels =
2709         Style.NamespaceIndentation == FormatStyle::NI_All ||
2710                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2711                  DeclarationScopeStack.size() > 1)
2712             ? 1u
2713             : 0u;
2714     bool ManageWhitesmithsBraces =
2715         AddLevels == 0u &&
2716         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2717 
2718     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2719     // the whole block.
2720     if (ManageWhitesmithsBraces)
2721       ++Line->Level;
2722 
2723     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2724                /*MunchSemi=*/true,
2725                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2726 
2727     // Munch the semicolon after a namespace. This is more common than one would
2728     // think. Putting the semicolon into its own line is very ugly.
2729     if (FormatTok->is(tok::semi))
2730       nextToken();
2731 
2732     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2733 
2734     if (ManageWhitesmithsBraces)
2735       --Line->Level;
2736   }
2737   // FIXME: Add error handling.
2738 }
2739 
2740 void UnwrappedLineParser::parseNew() {
2741   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2742   nextToken();
2743 
2744   if (Style.isCSharp()) {
2745     do {
2746       if (FormatTok->is(tok::l_brace))
2747         parseBracedList();
2748 
2749       if (FormatTok->isOneOf(tok::semi, tok::comma))
2750         return;
2751 
2752       nextToken();
2753     } while (!eof());
2754   }
2755 
2756   if (Style.Language != FormatStyle::LK_Java)
2757     return;
2758 
2759   // In Java, we can parse everything up to the parens, which aren't optional.
2760   do {
2761     // There should not be a ;, { or } before the new's open paren.
2762     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2763       return;
2764 
2765     // Consume the parens.
2766     if (FormatTok->is(tok::l_paren)) {
2767       parseParens();
2768 
2769       // If there is a class body of an anonymous class, consume that as child.
2770       if (FormatTok->is(tok::l_brace))
2771         parseChildBlock();
2772       return;
2773     }
2774     nextToken();
2775   } while (!eof());
2776 }
2777 
2778 void UnwrappedLineParser::parseLoopBody(bool TryRemoveBraces,
2779                                         bool WrapRightBrace) {
2780   keepAncestorBraces();
2781 
2782   if (FormatTok->is(tok::l_brace)) {
2783     FormatToken *LeftBrace = FormatTok;
2784     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2785     parseBlock();
2786     if (TryRemoveBraces) {
2787       assert(!NestedTooDeep.empty());
2788       if (!NestedTooDeep.back())
2789         markOptionalBraces(LeftBrace);
2790     }
2791     if (WrapRightBrace)
2792       addUnwrappedLine();
2793   } else {
2794     parseUnbracedBody();
2795   }
2796 
2797   if (TryRemoveBraces)
2798     NestedTooDeep.pop_back();
2799 }
2800 
2801 void UnwrappedLineParser::parseForOrWhileLoop() {
2802   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2803          "'for', 'while' or foreach macro expected");
2804   nextToken();
2805   // JS' for await ( ...
2806   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2807     nextToken();
2808   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2809     nextToken();
2810   if (FormatTok->is(tok::l_paren))
2811     parseParens();
2812 
2813   parseLoopBody(Style.RemoveBracesLLVM, true);
2814 }
2815 
2816 void UnwrappedLineParser::parseDoWhile() {
2817   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2818   nextToken();
2819 
2820   parseLoopBody(false, Style.BraceWrapping.BeforeWhile);
2821 
2822   // FIXME: Add error handling.
2823   if (!FormatTok->is(tok::kw_while)) {
2824     addUnwrappedLine();
2825     return;
2826   }
2827 
2828   // If in Whitesmiths mode, the line with the while() needs to be indented
2829   // to the same level as the block.
2830   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2831     ++Line->Level;
2832 
2833   nextToken();
2834   parseStructuralElement();
2835 }
2836 
2837 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2838   nextToken();
2839   unsigned OldLineLevel = Line->Level;
2840   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2841     --Line->Level;
2842   if (LeftAlignLabel)
2843     Line->Level = 0;
2844 
2845   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2846       FormatTok->is(tok::l_brace)) {
2847 
2848     CompoundStatementIndenter Indenter(this, Line->Level,
2849                                        Style.BraceWrapping.AfterCaseLabel,
2850                                        Style.BraceWrapping.IndentBraces);
2851     parseBlock();
2852     if (FormatTok->is(tok::kw_break)) {
2853       if (Style.BraceWrapping.AfterControlStatement ==
2854           FormatStyle::BWACS_Always) {
2855         addUnwrappedLine();
2856         if (!Style.IndentCaseBlocks &&
2857             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2858           ++Line->Level;
2859       }
2860       parseStructuralElement();
2861     }
2862     addUnwrappedLine();
2863   } else {
2864     if (FormatTok->is(tok::semi))
2865       nextToken();
2866     addUnwrappedLine();
2867   }
2868   Line->Level = OldLineLevel;
2869   if (FormatTok->isNot(tok::l_brace)) {
2870     parseStructuralElement();
2871     addUnwrappedLine();
2872   }
2873 }
2874 
2875 void UnwrappedLineParser::parseCaseLabel() {
2876   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2877 
2878   // FIXME: fix handling of complex expressions here.
2879   do {
2880     nextToken();
2881   } while (!eof() && !FormatTok->is(tok::colon));
2882   parseLabel();
2883 }
2884 
2885 void UnwrappedLineParser::parseSwitch() {
2886   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2887   nextToken();
2888   if (FormatTok->is(tok::l_paren))
2889     parseParens();
2890 
2891   keepAncestorBraces();
2892 
2893   if (FormatTok->is(tok::l_brace)) {
2894     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2895     parseBlock();
2896     addUnwrappedLine();
2897   } else {
2898     addUnwrappedLine();
2899     ++Line->Level;
2900     parseStructuralElement();
2901     --Line->Level;
2902   }
2903 
2904   if (Style.RemoveBracesLLVM)
2905     NestedTooDeep.pop_back();
2906 }
2907 
2908 // Operators that can follow a C variable.
2909 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
2910   switch (kind) {
2911   case tok::ampamp:
2912   case tok::ampequal:
2913   case tok::arrow:
2914   case tok::caret:
2915   case tok::caretequal:
2916   case tok::comma:
2917   case tok::ellipsis:
2918   case tok::equal:
2919   case tok::equalequal:
2920   case tok::exclaim:
2921   case tok::exclaimequal:
2922   case tok::greater:
2923   case tok::greaterequal:
2924   case tok::greatergreater:
2925   case tok::greatergreaterequal:
2926   case tok::l_paren:
2927   case tok::l_square:
2928   case tok::less:
2929   case tok::lessequal:
2930   case tok::lessless:
2931   case tok::lesslessequal:
2932   case tok::minus:
2933   case tok::minusequal:
2934   case tok::minusminus:
2935   case tok::percent:
2936   case tok::percentequal:
2937   case tok::period:
2938   case tok::pipe:
2939   case tok::pipeequal:
2940   case tok::pipepipe:
2941   case tok::plus:
2942   case tok::plusequal:
2943   case tok::plusplus:
2944   case tok::question:
2945   case tok::r_brace:
2946   case tok::r_paren:
2947   case tok::r_square:
2948   case tok::semi:
2949   case tok::slash:
2950   case tok::slashequal:
2951   case tok::star:
2952   case tok::starequal:
2953     return true;
2954   default:
2955     return false;
2956   }
2957 }
2958 
2959 void UnwrappedLineParser::parseAccessSpecifier() {
2960   FormatToken *AccessSpecifierCandidate = FormatTok;
2961   nextToken();
2962   // Understand Qt's slots.
2963   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2964     nextToken();
2965   // Otherwise, we don't know what it is, and we'd better keep the next token.
2966   if (FormatTok->is(tok::colon)) {
2967     nextToken();
2968     addUnwrappedLine();
2969   } else if (!FormatTok->is(tok::coloncolon) &&
2970              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
2971     // Not a variable name nor namespace name.
2972     addUnwrappedLine();
2973   } else if (AccessSpecifierCandidate) {
2974     // Consider the access specifier to be a C identifier.
2975     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2976   }
2977 }
2978 
2979 /// \brief Parses a concept definition.
2980 /// \pre The current token has to be the concept keyword.
2981 ///
2982 /// Returns if either the concept has been completely parsed, or if it detects
2983 /// that the concept definition is incorrect.
2984 void UnwrappedLineParser::parseConcept() {
2985   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
2986   nextToken();
2987   if (!FormatTok->is(tok::identifier))
2988     return;
2989   nextToken();
2990   if (!FormatTok->is(tok::equal))
2991     return;
2992   nextToken();
2993   parseConstraintExpression();
2994   if (FormatTok->is(tok::semi))
2995     nextToken();
2996   addUnwrappedLine();
2997 }
2998 
2999 /// \brief Parses a requires, decides if it is a clause or an expression.
3000 /// \pre The current token has to be the requires keyword.
3001 /// \returns true if it parsed a clause.
3002 bool clang::format::UnwrappedLineParser::parseRequires() {
3003   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3004   auto RequiresToken = FormatTok;
3005 
3006   // We try to guess if it is a requires clause, or a requires expression. For
3007   // that we first consume the keyword and check the next token.
3008   nextToken();
3009 
3010   switch (FormatTok->Tok.getKind()) {
3011   case tok::l_brace:
3012     // This can only be an expression, never a clause.
3013     parseRequiresExpression(RequiresToken);
3014     return false;
3015   case tok::l_paren:
3016     // Clauses and expression can start with a paren, it's unclear what we have.
3017     break;
3018   default:
3019     // All other tokens can only be a clause.
3020     parseRequiresClause(RequiresToken);
3021     return true;
3022   }
3023 
3024   // Looking forward we would have to decide if there are function declaration
3025   // like arguments to the requires expression:
3026   // requires (T t) {
3027   // Or there is a constraint expression for the requires clause:
3028   // requires (C<T> && ...
3029 
3030   // But first let's look behind.
3031   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3032 
3033   if (!PreviousNonComment ||
3034       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3035     // If there is no token, or an expression left brace, we are a requires
3036     // clause within a requires expression.
3037     parseRequiresClause(RequiresToken);
3038     return true;
3039   }
3040 
3041   switch (PreviousNonComment->Tok.getKind()) {
3042   case tok::greater:
3043   case tok::r_paren:
3044   case tok::kw_noexcept:
3045   case tok::kw_const:
3046     // This is a requires clause.
3047     parseRequiresClause(RequiresToken);
3048     return true;
3049   case tok::amp:
3050   case tok::ampamp: {
3051     // This can be either:
3052     // if (... && requires (T t) ...)
3053     // Or
3054     // void member(...) && requires (C<T> ...
3055     // We check the one token before that for a const:
3056     // void member(...) const && requires (C<T> ...
3057     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3058     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3059       parseRequiresClause(RequiresToken);
3060       return true;
3061     }
3062     break;
3063   }
3064   default:
3065     // It's an expression.
3066     parseRequiresExpression(RequiresToken);
3067     return false;
3068   }
3069 
3070   // Now we look forward and try to check if the paren content is a parameter
3071   // list. The parameters can be cv-qualified and contain references or
3072   // pointers.
3073   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3074   // of stuff: typename, const, *, &, &&, ::, identifiers.
3075 
3076   int NextTokenOffset = 1;
3077   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3078   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3079     ++NextTokenOffset;
3080     NextToken = Tokens->peekNextToken(NextTokenOffset);
3081   };
3082 
3083   bool FoundType = false;
3084   bool LastWasColonColon = false;
3085   int OpenAngles = 0;
3086 
3087   for (; NextTokenOffset < 50; PeekNext()) {
3088     switch (NextToken->Tok.getKind()) {
3089     case tok::kw_volatile:
3090     case tok::kw_const:
3091     case tok::comma:
3092       parseRequiresExpression(RequiresToken);
3093       return false;
3094     case tok::r_paren:
3095     case tok::pipepipe:
3096       parseRequiresClause(RequiresToken);
3097       return true;
3098     case tok::eof:
3099       // Break out of the loop.
3100       NextTokenOffset = 50;
3101       break;
3102     case tok::coloncolon:
3103       LastWasColonColon = true;
3104       break;
3105     case tok::identifier:
3106       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3107         parseRequiresExpression(RequiresToken);
3108         return false;
3109       }
3110       FoundType = true;
3111       LastWasColonColon = false;
3112       break;
3113     case tok::less:
3114       ++OpenAngles;
3115       break;
3116     case tok::greater:
3117       --OpenAngles;
3118       break;
3119     default:
3120       if (NextToken->isSimpleTypeSpecifier()) {
3121         parseRequiresExpression(RequiresToken);
3122         return false;
3123       }
3124       break;
3125     }
3126   }
3127 
3128   // This seems to be a complicated expression, just assume it's a clause.
3129   parseRequiresClause(RequiresToken);
3130   return true;
3131 }
3132 
3133 /// \brief Parses a requires clause.
3134 /// \param RequiresToken The requires keyword token, which starts this clause.
3135 /// \pre We need to be on the next token after the requires keyword.
3136 /// \sa parseRequiresExpression
3137 ///
3138 /// Returns if it either has finished parsing the clause, or it detects, that
3139 /// the clause is incorrect.
3140 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3141   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3142   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3143 
3144   // If there is no previous token, we are within a requires expression,
3145   // otherwise we will always have the template or function declaration in front
3146   // of it.
3147   bool InRequiresExpression =
3148       !RequiresToken->Previous ||
3149       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3150 
3151   RequiresToken->setFinalizedType(InRequiresExpression
3152                                       ? TT_RequiresClauseInARequiresExpression
3153                                       : TT_RequiresClause);
3154 
3155   parseConstraintExpression();
3156 
3157   if (!InRequiresExpression)
3158     FormatTok->Previous->ClosesRequiresClause = true;
3159 }
3160 
3161 /// \brief Parses a requires expression.
3162 /// \param RequiresToken The requires keyword token, which starts this clause.
3163 /// \pre We need to be on the next token after the requires keyword.
3164 /// \sa parseRequiresClause
3165 ///
3166 /// Returns if it either has finished parsing the expression, or it detects,
3167 /// that the expression is incorrect.
3168 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3169   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3170   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3171 
3172   RequiresToken->setFinalizedType(TT_RequiresExpression);
3173 
3174   if (FormatTok->is(tok::l_paren)) {
3175     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3176     parseParens();
3177   }
3178 
3179   if (FormatTok->is(tok::l_brace)) {
3180     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3181     parseChildBlock(/*CanContainBracedList=*/false,
3182                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3183   }
3184 }
3185 
3186 /// \brief Parses a constraint expression.
3187 ///
3188 /// This is either the definition of a concept, or the body of a requires
3189 /// clause. It returns, when the parsing is complete, or the expression is
3190 /// incorrect.
3191 void UnwrappedLineParser::parseConstraintExpression() {
3192   // The special handling for lambdas is needed since tryToParseLambda() eats a
3193   // token and if a requires expression is the last part of a requires clause
3194   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3195   // not set on the correct token. Thus we need to be aware if we even expect a
3196   // lambda to be possible.
3197   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3198   bool LambdaNextTimeAllowed = true;
3199   do {
3200     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3201 
3202     switch (FormatTok->Tok.getKind()) {
3203     case tok::kw_requires: {
3204       auto RequiresToken = FormatTok;
3205       nextToken();
3206       parseRequiresExpression(RequiresToken);
3207       break;
3208     }
3209 
3210     case tok::l_paren:
3211       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3212       break;
3213 
3214     case tok::l_square:
3215       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3216         return;
3217       break;
3218 
3219     case tok::kw_const:
3220     case tok::semi:
3221     case tok::kw_class:
3222     case tok::kw_struct:
3223     case tok::kw_union:
3224       return;
3225 
3226     case tok::l_brace:
3227       // Potential function body.
3228       return;
3229 
3230     case tok::ampamp:
3231     case tok::pipepipe:
3232       FormatTok->setFinalizedType(TT_BinaryOperator);
3233       nextToken();
3234       LambdaNextTimeAllowed = true;
3235       break;
3236 
3237     case tok::comma:
3238     case tok::comment:
3239       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3240       nextToken();
3241       break;
3242 
3243     case tok::kw_sizeof:
3244     case tok::greater:
3245     case tok::greaterequal:
3246     case tok::greatergreater:
3247     case tok::less:
3248     case tok::lessequal:
3249     case tok::lessless:
3250     case tok::equalequal:
3251     case tok::exclaim:
3252     case tok::exclaimequal:
3253     case tok::plus:
3254     case tok::minus:
3255     case tok::star:
3256     case tok::slash:
3257     case tok::kw_decltype:
3258       LambdaNextTimeAllowed = true;
3259       // Just eat them.
3260       nextToken();
3261       break;
3262 
3263     case tok::numeric_constant:
3264     case tok::coloncolon:
3265     case tok::kw_true:
3266     case tok::kw_false:
3267       // Just eat them.
3268       nextToken();
3269       break;
3270 
3271     case tok::kw_static_cast:
3272     case tok::kw_const_cast:
3273     case tok::kw_reinterpret_cast:
3274     case tok::kw_dynamic_cast:
3275       nextToken();
3276       if (!FormatTok->is(tok::less))
3277         return;
3278 
3279       nextToken();
3280       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3281                       /*ClosingBraceKind=*/tok::greater);
3282       break;
3283 
3284     case tok::kw_bool:
3285       // bool is only allowed if it is directly followed by a paren for a cast:
3286       // concept C = bool(...);
3287       // and bool is the only type, all other types as cast must be inside a
3288       // cast to bool an thus are handled by the other cases.
3289       nextToken();
3290       if (FormatTok->isNot(tok::l_paren))
3291         return;
3292       parseParens();
3293       break;
3294 
3295     default:
3296       if (!FormatTok->Tok.getIdentifierInfo()) {
3297         // Identifiers are part of the default case, we check for more then
3298         // tok::identifier to handle builtin type traits.
3299         return;
3300       }
3301 
3302       // We need to differentiate identifiers for a template deduction guide,
3303       // variables, or function return types (the constraint expression has
3304       // ended before that), and basically all other cases. But it's easier to
3305       // check the other way around.
3306       assert(FormatTok->Previous);
3307       switch (FormatTok->Previous->Tok.getKind()) {
3308       case tok::coloncolon:  // Nested identifier.
3309       case tok::ampamp:      // Start of a function or variable for the
3310       case tok::pipepipe:    // constraint expression.
3311       case tok::kw_requires: // Initial identifier of a requires clause.
3312       case tok::equal:       // Initial identifier of a concept declaration.
3313         break;
3314       default:
3315         return;
3316       }
3317 
3318       // Read identifier with optional template declaration.
3319       nextToken();
3320       if (FormatTok->is(tok::less)) {
3321         nextToken();
3322         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3323                         /*ClosingBraceKind=*/tok::greater);
3324       }
3325       break;
3326     }
3327   } while (!eof());
3328 }
3329 
3330 bool UnwrappedLineParser::parseEnum() {
3331   const FormatToken &InitialToken = *FormatTok;
3332 
3333   // Won't be 'enum' for NS_ENUMs.
3334   if (FormatTok->is(tok::kw_enum))
3335     nextToken();
3336 
3337   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3338   // declarations. An "enum" keyword followed by a colon would be a syntax
3339   // error and thus assume it is just an identifier.
3340   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3341     return false;
3342 
3343   // In protobuf, "enum" can be used as a field name.
3344   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3345     return false;
3346 
3347   // Eat up enum class ...
3348   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3349     nextToken();
3350 
3351   while (FormatTok->Tok.getIdentifierInfo() ||
3352          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3353                             tok::greater, tok::comma, tok::question)) {
3354     nextToken();
3355     // We can have macros or attributes in between 'enum' and the enum name.
3356     if (FormatTok->is(tok::l_paren))
3357       parseParens();
3358     if (FormatTok->is(tok::identifier)) {
3359       nextToken();
3360       // If there are two identifiers in a row, this is likely an elaborate
3361       // return type. In Java, this can be "implements", etc.
3362       if (Style.isCpp() && FormatTok->is(tok::identifier))
3363         return false;
3364     }
3365   }
3366 
3367   // Just a declaration or something is wrong.
3368   if (FormatTok->isNot(tok::l_brace))
3369     return true;
3370   FormatTok->setFinalizedType(TT_EnumLBrace);
3371   FormatTok->setBlockKind(BK_Block);
3372 
3373   if (Style.Language == FormatStyle::LK_Java) {
3374     // Java enums are different.
3375     parseJavaEnumBody();
3376     return true;
3377   }
3378   if (Style.Language == FormatStyle::LK_Proto) {
3379     parseBlock(/*MustBeDeclaration=*/true);
3380     return true;
3381   }
3382 
3383   if (!Style.AllowShortEnumsOnASingleLine &&
3384       ShouldBreakBeforeBrace(Style, InitialToken))
3385     addUnwrappedLine();
3386   // Parse enum body.
3387   nextToken();
3388   if (!Style.AllowShortEnumsOnASingleLine) {
3389     addUnwrappedLine();
3390     Line->Level += 1;
3391   }
3392   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3393                                    /*IsEnum=*/true);
3394   if (!Style.AllowShortEnumsOnASingleLine)
3395     Line->Level -= 1;
3396   if (HasError) {
3397     if (FormatTok->is(tok::semi))
3398       nextToken();
3399     addUnwrappedLine();
3400   }
3401   return true;
3402 
3403   // There is no addUnwrappedLine() here so that we fall through to parsing a
3404   // structural element afterwards. Thus, in "enum A {} n, m;",
3405   // "} n, m;" will end up in one unwrapped line.
3406 }
3407 
3408 bool UnwrappedLineParser::parseStructLike() {
3409   // parseRecord falls through and does not yet add an unwrapped line as a
3410   // record declaration or definition can start a structural element.
3411   parseRecord();
3412   // This does not apply to Java, JavaScript and C#.
3413   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3414       Style.isCSharp()) {
3415     if (FormatTok->is(tok::semi))
3416       nextToken();
3417     addUnwrappedLine();
3418     return true;
3419   }
3420   return false;
3421 }
3422 
3423 namespace {
3424 // A class used to set and restore the Token position when peeking
3425 // ahead in the token source.
3426 class ScopedTokenPosition {
3427   unsigned StoredPosition;
3428   FormatTokenSource *Tokens;
3429 
3430 public:
3431   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3432     assert(Tokens && "Tokens expected to not be null");
3433     StoredPosition = Tokens->getPosition();
3434   }
3435 
3436   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3437 };
3438 } // namespace
3439 
3440 // Look to see if we have [[ by looking ahead, if
3441 // its not then rewind to the original position.
3442 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3443   ScopedTokenPosition AutoPosition(Tokens);
3444   FormatToken *Tok = Tokens->getNextToken();
3445   // We already read the first [ check for the second.
3446   if (!Tok->is(tok::l_square))
3447     return false;
3448   // Double check that the attribute is just something
3449   // fairly simple.
3450   while (Tok->isNot(tok::eof)) {
3451     if (Tok->is(tok::r_square))
3452       break;
3453     Tok = Tokens->getNextToken();
3454   }
3455   if (Tok->is(tok::eof))
3456     return false;
3457   Tok = Tokens->getNextToken();
3458   if (!Tok->is(tok::r_square))
3459     return false;
3460   Tok = Tokens->getNextToken();
3461   if (Tok->is(tok::semi))
3462     return false;
3463   return true;
3464 }
3465 
3466 void UnwrappedLineParser::parseJavaEnumBody() {
3467   // Determine whether the enum is simple, i.e. does not have a semicolon or
3468   // constants with class bodies. Simple enums can be formatted like braced
3469   // lists, contracted to a single line, etc.
3470   unsigned StoredPosition = Tokens->getPosition();
3471   bool IsSimple = true;
3472   FormatToken *Tok = Tokens->getNextToken();
3473   while (!Tok->is(tok::eof)) {
3474     if (Tok->is(tok::r_brace))
3475       break;
3476     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3477       IsSimple = false;
3478       break;
3479     }
3480     // FIXME: This will also mark enums with braces in the arguments to enum
3481     // constants as "not simple". This is probably fine in practice, though.
3482     Tok = Tokens->getNextToken();
3483   }
3484   FormatTok = Tokens->setPosition(StoredPosition);
3485 
3486   if (IsSimple) {
3487     nextToken();
3488     parseBracedList();
3489     addUnwrappedLine();
3490     return;
3491   }
3492 
3493   // Parse the body of a more complex enum.
3494   // First add a line for everything up to the "{".
3495   nextToken();
3496   addUnwrappedLine();
3497   ++Line->Level;
3498 
3499   // Parse the enum constants.
3500   while (FormatTok) {
3501     if (FormatTok->is(tok::l_brace)) {
3502       // Parse the constant's class body.
3503       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3504                  /*MunchSemi=*/false);
3505     } else if (FormatTok->is(tok::l_paren)) {
3506       parseParens();
3507     } else if (FormatTok->is(tok::comma)) {
3508       nextToken();
3509       addUnwrappedLine();
3510     } else if (FormatTok->is(tok::semi)) {
3511       nextToken();
3512       addUnwrappedLine();
3513       break;
3514     } else if (FormatTok->is(tok::r_brace)) {
3515       addUnwrappedLine();
3516       break;
3517     } else {
3518       nextToken();
3519     }
3520   }
3521 
3522   // Parse the class body after the enum's ";" if any.
3523   parseLevel(/*HasOpeningBrace=*/true, /*CanContainBracedList=*/true);
3524   nextToken();
3525   --Line->Level;
3526   addUnwrappedLine();
3527 }
3528 
3529 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3530   const FormatToken &InitialToken = *FormatTok;
3531   nextToken();
3532 
3533   // The actual identifier can be a nested name specifier, and in macros
3534   // it is often token-pasted.
3535   // An [[attribute]] can be before the identifier.
3536   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3537                             tok::kw___attribute, tok::kw___declspec,
3538                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3539          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3540           FormatTok->isOneOf(tok::period, tok::comma))) {
3541     if (Style.isJavaScript() &&
3542         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3543       // JavaScript/TypeScript supports inline object types in
3544       // extends/implements positions:
3545       //     class Foo implements {bar: number} { }
3546       nextToken();
3547       if (FormatTok->is(tok::l_brace)) {
3548         tryToParseBracedList();
3549         continue;
3550       }
3551     }
3552     bool IsNonMacroIdentifier =
3553         FormatTok->is(tok::identifier) &&
3554         FormatTok->TokenText != FormatTok->TokenText.upper();
3555     nextToken();
3556     // We can have macros or attributes in between 'class' and the class name.
3557     if (!IsNonMacroIdentifier) {
3558       if (FormatTok->is(tok::l_paren)) {
3559         parseParens();
3560       } else if (FormatTok->is(TT_AttributeSquare)) {
3561         parseSquare();
3562         // Consume the closing TT_AttributeSquare.
3563         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3564           nextToken();
3565       }
3566     }
3567   }
3568 
3569   // Note that parsing away template declarations here leads to incorrectly
3570   // accepting function declarations as record declarations.
3571   // In general, we cannot solve this problem. Consider:
3572   // class A<int> B() {}
3573   // which can be a function definition or a class definition when B() is a
3574   // macro. If we find enough real-world cases where this is a problem, we
3575   // can parse for the 'template' keyword in the beginning of the statement,
3576   // and thus rule out the record production in case there is no template
3577   // (this would still leave us with an ambiguity between template function
3578   // and class declarations).
3579   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3580     do {
3581       if (FormatTok->is(tok::l_brace)) {
3582         calculateBraceTypes(/*ExpectClassBody=*/true);
3583         if (!tryToParseBracedList())
3584           break;
3585       }
3586       if (FormatTok->is(tok::l_square)) {
3587         FormatToken *Previous = FormatTok->Previous;
3588         if (!Previous ||
3589             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3590           // Don't try parsing a lambda if we had a closing parenthesis before,
3591           // it was probably a pointer to an array: int (*)[].
3592           if (!tryToParseLambda())
3593             break;
3594         } else {
3595           parseSquare();
3596           continue;
3597         }
3598       }
3599       if (FormatTok->is(tok::semi))
3600         return;
3601       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3602         addUnwrappedLine();
3603         nextToken();
3604         parseCSharpGenericTypeConstraint();
3605         break;
3606       }
3607       nextToken();
3608     } while (!eof());
3609   }
3610 
3611   auto GetBraceType = [](const FormatToken &RecordTok) {
3612     switch (RecordTok.Tok.getKind()) {
3613     case tok::kw_class:
3614       return TT_ClassLBrace;
3615     case tok::kw_struct:
3616       return TT_StructLBrace;
3617     case tok::kw_union:
3618       return TT_UnionLBrace;
3619     default:
3620       // Useful for e.g. interface.
3621       return TT_RecordLBrace;
3622     }
3623   };
3624   if (FormatTok->is(tok::l_brace)) {
3625     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3626     if (ParseAsExpr) {
3627       parseChildBlock();
3628     } else {
3629       if (ShouldBreakBeforeBrace(Style, InitialToken))
3630         addUnwrappedLine();
3631 
3632       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3633       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3634     }
3635   }
3636   // There is no addUnwrappedLine() here so that we fall through to parsing a
3637   // structural element afterwards. Thus, in "class A {} n, m;",
3638   // "} n, m;" will end up in one unwrapped line.
3639 }
3640 
3641 void UnwrappedLineParser::parseObjCMethod() {
3642   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3643          "'(' or identifier expected.");
3644   do {
3645     if (FormatTok->is(tok::semi)) {
3646       nextToken();
3647       addUnwrappedLine();
3648       return;
3649     } else if (FormatTok->is(tok::l_brace)) {
3650       if (Style.BraceWrapping.AfterFunction)
3651         addUnwrappedLine();
3652       parseBlock();
3653       addUnwrappedLine();
3654       return;
3655     } else {
3656       nextToken();
3657     }
3658   } while (!eof());
3659 }
3660 
3661 void UnwrappedLineParser::parseObjCProtocolList() {
3662   assert(FormatTok->is(tok::less) && "'<' expected.");
3663   do {
3664     nextToken();
3665     // Early exit in case someone forgot a close angle.
3666     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3667         FormatTok->isObjCAtKeyword(tok::objc_end))
3668       return;
3669   } while (!eof() && FormatTok->isNot(tok::greater));
3670   nextToken(); // Skip '>'.
3671 }
3672 
3673 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3674   do {
3675     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3676       nextToken();
3677       addUnwrappedLine();
3678       break;
3679     }
3680     if (FormatTok->is(tok::l_brace)) {
3681       parseBlock();
3682       // In ObjC interfaces, nothing should be following the "}".
3683       addUnwrappedLine();
3684     } else if (FormatTok->is(tok::r_brace)) {
3685       // Ignore stray "}". parseStructuralElement doesn't consume them.
3686       nextToken();
3687       addUnwrappedLine();
3688     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3689       nextToken();
3690       parseObjCMethod();
3691     } else {
3692       parseStructuralElement();
3693     }
3694   } while (!eof());
3695 }
3696 
3697 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3698   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3699          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3700   nextToken();
3701   nextToken(); // interface name
3702 
3703   // @interface can be followed by a lightweight generic
3704   // specialization list, then either a base class or a category.
3705   if (FormatTok->is(tok::less))
3706     parseObjCLightweightGenerics();
3707   if (FormatTok->is(tok::colon)) {
3708     nextToken();
3709     nextToken(); // base class name
3710     // The base class can also have lightweight generics applied to it.
3711     if (FormatTok->is(tok::less))
3712       parseObjCLightweightGenerics();
3713   } else if (FormatTok->is(tok::l_paren))
3714     // Skip category, if present.
3715     parseParens();
3716 
3717   if (FormatTok->is(tok::less))
3718     parseObjCProtocolList();
3719 
3720   if (FormatTok->is(tok::l_brace)) {
3721     if (Style.BraceWrapping.AfterObjCDeclaration)
3722       addUnwrappedLine();
3723     parseBlock(/*MustBeDeclaration=*/true);
3724   }
3725 
3726   // With instance variables, this puts '}' on its own line.  Without instance
3727   // variables, this ends the @interface line.
3728   addUnwrappedLine();
3729 
3730   parseObjCUntilAtEnd();
3731 }
3732 
3733 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3734   assert(FormatTok->is(tok::less));
3735   // Unlike protocol lists, generic parameterizations support
3736   // nested angles:
3737   //
3738   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3739   //     NSObject <NSCopying, NSSecureCoding>
3740   //
3741   // so we need to count how many open angles we have left.
3742   unsigned NumOpenAngles = 1;
3743   do {
3744     nextToken();
3745     // Early exit in case someone forgot a close angle.
3746     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3747         FormatTok->isObjCAtKeyword(tok::objc_end))
3748       break;
3749     if (FormatTok->is(tok::less))
3750       ++NumOpenAngles;
3751     else if (FormatTok->is(tok::greater)) {
3752       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3753       --NumOpenAngles;
3754     }
3755   } while (!eof() && NumOpenAngles != 0);
3756   nextToken(); // Skip '>'.
3757 }
3758 
3759 // Returns true for the declaration/definition form of @protocol,
3760 // false for the expression form.
3761 bool UnwrappedLineParser::parseObjCProtocol() {
3762   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3763   nextToken();
3764 
3765   if (FormatTok->is(tok::l_paren))
3766     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3767     return false;
3768 
3769   // The definition/declaration form,
3770   // @protocol Foo
3771   // - (int)someMethod;
3772   // @end
3773 
3774   nextToken(); // protocol name
3775 
3776   if (FormatTok->is(tok::less))
3777     parseObjCProtocolList();
3778 
3779   // Check for protocol declaration.
3780   if (FormatTok->is(tok::semi)) {
3781     nextToken();
3782     addUnwrappedLine();
3783     return true;
3784   }
3785 
3786   addUnwrappedLine();
3787   parseObjCUntilAtEnd();
3788   return true;
3789 }
3790 
3791 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3792   bool IsImport = FormatTok->is(Keywords.kw_import);
3793   assert(IsImport || FormatTok->is(tok::kw_export));
3794   nextToken();
3795 
3796   // Consume the "default" in "export default class/function".
3797   if (FormatTok->is(tok::kw_default))
3798     nextToken();
3799 
3800   // Consume "async function", "function" and "default function", so that these
3801   // get parsed as free-standing JS functions, i.e. do not require a trailing
3802   // semicolon.
3803   if (FormatTok->is(Keywords.kw_async))
3804     nextToken();
3805   if (FormatTok->is(Keywords.kw_function)) {
3806     nextToken();
3807     return;
3808   }
3809 
3810   // For imports, `export *`, `export {...}`, consume the rest of the line up
3811   // to the terminating `;`. For everything else, just return and continue
3812   // parsing the structural element, i.e. the declaration or expression for
3813   // `export default`.
3814   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3815       !FormatTok->isStringLiteral())
3816     return;
3817 
3818   while (!eof()) {
3819     if (FormatTok->is(tok::semi))
3820       return;
3821     if (Line->Tokens.empty()) {
3822       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3823       // import statement should terminate.
3824       return;
3825     }
3826     if (FormatTok->is(tok::l_brace)) {
3827       FormatTok->setBlockKind(BK_Block);
3828       nextToken();
3829       parseBracedList();
3830     } else {
3831       nextToken();
3832     }
3833   }
3834 }
3835 
3836 void UnwrappedLineParser::parseStatementMacro() {
3837   nextToken();
3838   if (FormatTok->is(tok::l_paren))
3839     parseParens();
3840   if (FormatTok->is(tok::semi))
3841     nextToken();
3842   addUnwrappedLine();
3843 }
3844 
3845 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3846                                                  StringRef Prefix = "") {
3847   llvm::dbgs() << Prefix << "Line(" << Line.Level
3848                << ", FSC=" << Line.FirstStartColumn << ")"
3849                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3850   for (const auto &Node : Line.Tokens) {
3851     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3852                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3853                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3854   }
3855   for (const auto &Node : Line.Tokens)
3856     for (const auto &ChildNode : Node.Children)
3857       printDebugInfo(ChildNode, "\nChild: ");
3858 
3859   llvm::dbgs() << "\n";
3860 }
3861 
3862 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3863   if (Line->Tokens.empty())
3864     return;
3865   LLVM_DEBUG({
3866     if (CurrentLines == &Lines)
3867       printDebugInfo(*Line);
3868   });
3869 
3870   // If this line closes a block when in Whitesmiths mode, remember that
3871   // information so that the level can be decreased after the line is added.
3872   // This has to happen after the addition of the line since the line itself
3873   // needs to be indented.
3874   bool ClosesWhitesmithsBlock =
3875       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3876       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3877 
3878   CurrentLines->push_back(std::move(*Line));
3879   Line->Tokens.clear();
3880   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3881   Line->FirstStartColumn = 0;
3882 
3883   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3884     --Line->Level;
3885   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3886     CurrentLines->append(
3887         std::make_move_iterator(PreprocessorDirectives.begin()),
3888         std::make_move_iterator(PreprocessorDirectives.end()));
3889     PreprocessorDirectives.clear();
3890   }
3891   // Disconnect the current token from the last token on the previous line.
3892   FormatTok->Previous = nullptr;
3893 }
3894 
3895 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
3896 
3897 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3898   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3899          FormatTok.NewlinesBefore > 0;
3900 }
3901 
3902 // Checks if \p FormatTok is a line comment that continues the line comment
3903 // section on \p Line.
3904 static bool
3905 continuesLineCommentSection(const FormatToken &FormatTok,
3906                             const UnwrappedLine &Line,
3907                             const llvm::Regex &CommentPragmasRegex) {
3908   if (Line.Tokens.empty())
3909     return false;
3910 
3911   StringRef IndentContent = FormatTok.TokenText;
3912   if (FormatTok.TokenText.startswith("//") ||
3913       FormatTok.TokenText.startswith("/*"))
3914     IndentContent = FormatTok.TokenText.substr(2);
3915   if (CommentPragmasRegex.match(IndentContent))
3916     return false;
3917 
3918   // If Line starts with a line comment, then FormatTok continues the comment
3919   // section if its original column is greater or equal to the original start
3920   // column of the line.
3921   //
3922   // Define the min column token of a line as follows: if a line ends in '{' or
3923   // contains a '{' followed by a line comment, then the min column token is
3924   // that '{'. Otherwise, the min column token of the line is the first token of
3925   // the line.
3926   //
3927   // If Line starts with a token other than a line comment, then FormatTok
3928   // continues the comment section if its original column is greater than the
3929   // original start column of the min column token of the line.
3930   //
3931   // For example, the second line comment continues the first in these cases:
3932   //
3933   // // first line
3934   // // second line
3935   //
3936   // and:
3937   //
3938   // // first line
3939   //  // second line
3940   //
3941   // and:
3942   //
3943   // int i; // first line
3944   //  // second line
3945   //
3946   // and:
3947   //
3948   // do { // first line
3949   //      // second line
3950   //   int i;
3951   // } while (true);
3952   //
3953   // and:
3954   //
3955   // enum {
3956   //   a, // first line
3957   //    // second line
3958   //   b
3959   // };
3960   //
3961   // The second line comment doesn't continue the first in these cases:
3962   //
3963   //   // first line
3964   //  // second line
3965   //
3966   // and:
3967   //
3968   // int i; // first line
3969   // // second line
3970   //
3971   // and:
3972   //
3973   // do { // first line
3974   //   // second line
3975   //   int i;
3976   // } while (true);
3977   //
3978   // and:
3979   //
3980   // enum {
3981   //   a, // first line
3982   //   // second line
3983   // };
3984   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3985 
3986   // Scan for '{//'. If found, use the column of '{' as a min column for line
3987   // comment section continuation.
3988   const FormatToken *PreviousToken = nullptr;
3989   for (const UnwrappedLineNode &Node : Line.Tokens) {
3990     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3991         isLineComment(*Node.Tok)) {
3992       MinColumnToken = PreviousToken;
3993       break;
3994     }
3995     PreviousToken = Node.Tok;
3996 
3997     // Grab the last newline preceding a token in this unwrapped line.
3998     if (Node.Tok->NewlinesBefore > 0)
3999       MinColumnToken = Node.Tok;
4000   }
4001   if (PreviousToken && PreviousToken->is(tok::l_brace))
4002     MinColumnToken = PreviousToken;
4003 
4004   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4005                               MinColumnToken);
4006 }
4007 
4008 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4009   bool JustComments = Line->Tokens.empty();
4010   for (FormatToken *Tok : CommentsBeforeNextToken) {
4011     // Line comments that belong to the same line comment section are put on the
4012     // same line since later we might want to reflow content between them.
4013     // Additional fine-grained breaking of line comment sections is controlled
4014     // by the class BreakableLineCommentSection in case it is desirable to keep
4015     // several line comment sections in the same unwrapped line.
4016     //
4017     // FIXME: Consider putting separate line comment sections as children to the
4018     // unwrapped line instead.
4019     Tok->ContinuesLineCommentSection =
4020         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4021     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4022       addUnwrappedLine();
4023     pushToken(Tok);
4024   }
4025   if (NewlineBeforeNext && JustComments)
4026     addUnwrappedLine();
4027   CommentsBeforeNextToken.clear();
4028 }
4029 
4030 void UnwrappedLineParser::nextToken(int LevelDifference) {
4031   if (eof())
4032     return;
4033   flushComments(isOnNewLine(*FormatTok));
4034   pushToken(FormatTok);
4035   FormatToken *Previous = FormatTok;
4036   if (!Style.isJavaScript())
4037     readToken(LevelDifference);
4038   else
4039     readTokenWithJavaScriptASI();
4040   FormatTok->Previous = Previous;
4041 }
4042 
4043 void UnwrappedLineParser::distributeComments(
4044     const SmallVectorImpl<FormatToken *> &Comments,
4045     const FormatToken *NextTok) {
4046   // Whether or not a line comment token continues a line is controlled by
4047   // the method continuesLineCommentSection, with the following caveat:
4048   //
4049   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4050   // that each comment line from the trail is aligned with the next token, if
4051   // the next token exists. If a trail exists, the beginning of the maximal
4052   // trail is marked as a start of a new comment section.
4053   //
4054   // For example in this code:
4055   //
4056   // int a; // line about a
4057   //   // line 1 about b
4058   //   // line 2 about b
4059   //   int b;
4060   //
4061   // the two lines about b form a maximal trail, so there are two sections, the
4062   // first one consisting of the single comment "// line about a" and the
4063   // second one consisting of the next two comments.
4064   if (Comments.empty())
4065     return;
4066   bool ShouldPushCommentsInCurrentLine = true;
4067   bool HasTrailAlignedWithNextToken = false;
4068   unsigned StartOfTrailAlignedWithNextToken = 0;
4069   if (NextTok) {
4070     // We are skipping the first element intentionally.
4071     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4072       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4073         HasTrailAlignedWithNextToken = true;
4074         StartOfTrailAlignedWithNextToken = i;
4075       }
4076     }
4077   }
4078   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4079     FormatToken *FormatTok = Comments[i];
4080     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4081       FormatTok->ContinuesLineCommentSection = false;
4082     } else {
4083       FormatTok->ContinuesLineCommentSection =
4084           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4085     }
4086     if (!FormatTok->ContinuesLineCommentSection &&
4087         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
4088       ShouldPushCommentsInCurrentLine = false;
4089     if (ShouldPushCommentsInCurrentLine)
4090       pushToken(FormatTok);
4091     else
4092       CommentsBeforeNextToken.push_back(FormatTok);
4093   }
4094 }
4095 
4096 void UnwrappedLineParser::readToken(int LevelDifference) {
4097   SmallVector<FormatToken *, 1> Comments;
4098   bool PreviousWasComment = false;
4099   bool FirstNonCommentOnLine = false;
4100   do {
4101     FormatTok = Tokens->getNextToken();
4102     assert(FormatTok);
4103     while (FormatTok->getType() == TT_ConflictStart ||
4104            FormatTok->getType() == TT_ConflictEnd ||
4105            FormatTok->getType() == TT_ConflictAlternative) {
4106       if (FormatTok->getType() == TT_ConflictStart)
4107         conditionalCompilationStart(/*Unreachable=*/false);
4108       else if (FormatTok->getType() == TT_ConflictAlternative)
4109         conditionalCompilationAlternative();
4110       else if (FormatTok->getType() == TT_ConflictEnd)
4111         conditionalCompilationEnd();
4112       FormatTok = Tokens->getNextToken();
4113       FormatTok->MustBreakBefore = true;
4114     }
4115 
4116     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4117                                       const FormatToken &Tok,
4118                                       bool PreviousWasComment) {
4119       auto IsFirstOnLine = [](const FormatToken &Tok) {
4120         return Tok.HasUnescapedNewline || Tok.IsFirst;
4121       };
4122 
4123       // Consider preprocessor directives preceded by block comments as first
4124       // on line.
4125       if (PreviousWasComment)
4126         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4127       return IsFirstOnLine(Tok);
4128     };
4129 
4130     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4131         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4132     PreviousWasComment = FormatTok->is(tok::comment);
4133 
4134     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4135            FirstNonCommentOnLine) {
4136       distributeComments(Comments, FormatTok);
4137       Comments.clear();
4138       // If there is an unfinished unwrapped line, we flush the preprocessor
4139       // directives only after that unwrapped line was finished later.
4140       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4141       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4142       assert((LevelDifference >= 0 ||
4143               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4144              "LevelDifference makes Line->Level negative");
4145       Line->Level += LevelDifference;
4146       // Comments stored before the preprocessor directive need to be output
4147       // before the preprocessor directive, at the same level as the
4148       // preprocessor directive, as we consider them to apply to the directive.
4149       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4150           PPBranchLevel > 0)
4151         Line->Level += PPBranchLevel;
4152       flushComments(isOnNewLine(*FormatTok));
4153       parsePPDirective();
4154       PreviousWasComment = FormatTok->is(tok::comment);
4155       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4156           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4157     }
4158 
4159     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4160         !Line->InPPDirective)
4161       continue;
4162 
4163     if (!FormatTok->is(tok::comment)) {
4164       distributeComments(Comments, FormatTok);
4165       Comments.clear();
4166       return;
4167     }
4168 
4169     Comments.push_back(FormatTok);
4170   } while (!eof());
4171 
4172   distributeComments(Comments, nullptr);
4173   Comments.clear();
4174 }
4175 
4176 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4177   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4178   if (MustBreakBeforeNextToken) {
4179     Line->Tokens.back().Tok->MustBreakBefore = true;
4180     MustBreakBeforeNextToken = false;
4181   }
4182 }
4183 
4184 } // end namespace format
4185 } // end namespace clang
4186