1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 #include <utility>
24 
25 #define DEBUG_TYPE "format-parser"
26 
27 namespace clang {
28 namespace format {
29 
30 class FormatTokenSource {
31 public:
32   virtual ~FormatTokenSource() {}
33 
34   // Returns the next token in the token stream.
35   virtual FormatToken *getNextToken() = 0;
36 
37   // Returns the token preceding the token returned by the last call to
38   // getNextToken() in the token stream, or nullptr if no such token exists.
39   virtual FormatToken *getPreviousToken() = 0;
40 
41   // Returns the token that would be returned by the next call to
42   // getNextToken().
43   virtual FormatToken *peekNextToken() = 0;
44 
45   // Returns the token that would be returned after the next N calls to
46   // getNextToken(). N needs to be greater than zero, and small enough that
47   // there are still tokens. Check for tok::eof with N-1 before calling it with
48   // N.
49   virtual FormatToken *peekNextToken(int N) = 0;
50 
51   // Returns whether we are at the end of the file.
52   // This can be different from whether getNextToken() returned an eof token
53   // when the FormatTokenSource is a view on a part of the token stream.
54   virtual bool isEOF() = 0;
55 
56   // Gets the current position in the token stream, to be used by setPosition().
57   virtual unsigned getPosition() = 0;
58 
59   // Resets the token stream to the state it was in when getPosition() returned
60   // Position, and return the token at that position in the stream.
61   virtual FormatToken *setPosition(unsigned Position) = 0;
62 };
63 
64 namespace {
65 
66 class ScopedDeclarationState {
67 public:
68   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
69                          bool MustBeDeclaration)
70       : Line(Line), Stack(Stack) {
71     Line.MustBeDeclaration = MustBeDeclaration;
72     Stack.push_back(MustBeDeclaration);
73   }
74   ~ScopedDeclarationState() {
75     Stack.pop_back();
76     if (!Stack.empty())
77       Line.MustBeDeclaration = Stack.back();
78     else
79       Line.MustBeDeclaration = true;
80   }
81 
82 private:
83   UnwrappedLine &Line;
84   llvm::BitVector &Stack;
85 };
86 
87 static bool isLineComment(const FormatToken &FormatTok) {
88   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
89 }
90 
91 // Checks if \p FormatTok is a line comment that continues the line comment
92 // \p Previous. The original column of \p MinColumnToken is used to determine
93 // whether \p FormatTok is indented enough to the right to continue \p Previous.
94 static bool continuesLineComment(const FormatToken &FormatTok,
95                                  const FormatToken *Previous,
96                                  const FormatToken *MinColumnToken) {
97   if (!Previous || !MinColumnToken)
98     return false;
99   unsigned MinContinueColumn =
100       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
101   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
102          isLineComment(*Previous) &&
103          FormatTok.OriginalColumn >= MinContinueColumn;
104 }
105 
106 class ScopedMacroState : public FormatTokenSource {
107 public:
108   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
109                    FormatToken *&ResetToken)
110       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
111         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
112         Token(nullptr), PreviousToken(nullptr) {
113     FakeEOF.Tok.startToken();
114     FakeEOF.Tok.setKind(tok::eof);
115     TokenSource = this;
116     Line.Level = 0;
117     Line.InPPDirective = true;
118   }
119 
120   ~ScopedMacroState() override {
121     TokenSource = PreviousTokenSource;
122     ResetToken = Token;
123     Line.InPPDirective = false;
124     Line.Level = PreviousLineLevel;
125   }
126 
127   FormatToken *getNextToken() override {
128     // The \c UnwrappedLineParser guards against this by never calling
129     // \c getNextToken() after it has encountered the first eof token.
130     assert(!eof());
131     PreviousToken = Token;
132     Token = PreviousTokenSource->getNextToken();
133     if (eof())
134       return &FakeEOF;
135     return Token;
136   }
137 
138   FormatToken *getPreviousToken() override {
139     return PreviousTokenSource->getPreviousToken();
140   }
141 
142   FormatToken *peekNextToken() override {
143     if (eof())
144       return &FakeEOF;
145     return PreviousTokenSource->peekNextToken();
146   }
147 
148   FormatToken *peekNextToken(int N) override {
149     assert(N > 0);
150     if (eof())
151       return &FakeEOF;
152     return PreviousTokenSource->peekNextToken(N);
153   }
154 
155   bool isEOF() override { return PreviousTokenSource->isEOF(); }
156 
157   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
158 
159   FormatToken *setPosition(unsigned Position) override {
160     PreviousToken = nullptr;
161     Token = PreviousTokenSource->setPosition(Position);
162     return Token;
163   }
164 
165 private:
166   bool eof() {
167     return Token && Token->HasUnescapedNewline &&
168            !continuesLineComment(*Token, PreviousToken,
169                                  /*MinColumnToken=*/PreviousToken);
170   }
171 
172   FormatToken FakeEOF;
173   UnwrappedLine &Line;
174   FormatTokenSource *&TokenSource;
175   FormatToken *&ResetToken;
176   unsigned PreviousLineLevel;
177   FormatTokenSource *PreviousTokenSource;
178 
179   FormatToken *Token;
180   FormatToken *PreviousToken;
181 };
182 
183 } // end anonymous namespace
184 
185 class ScopedLineState {
186 public:
187   ScopedLineState(UnwrappedLineParser &Parser,
188                   bool SwitchToPreprocessorLines = false)
189       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
190     if (SwitchToPreprocessorLines)
191       Parser.CurrentLines = &Parser.PreprocessorDirectives;
192     else if (!Parser.Line->Tokens.empty())
193       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
194     PreBlockLine = std::move(Parser.Line);
195     Parser.Line = std::make_unique<UnwrappedLine>();
196     Parser.Line->Level = PreBlockLine->Level;
197     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
198   }
199 
200   ~ScopedLineState() {
201     if (!Parser.Line->Tokens.empty())
202       Parser.addUnwrappedLine();
203     assert(Parser.Line->Tokens.empty());
204     Parser.Line = std::move(PreBlockLine);
205     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
206       Parser.MustBreakBeforeNextToken = true;
207     Parser.CurrentLines = OriginalLines;
208   }
209 
210 private:
211   UnwrappedLineParser &Parser;
212 
213   std::unique_ptr<UnwrappedLine> PreBlockLine;
214   SmallVectorImpl<UnwrappedLine> *OriginalLines;
215 };
216 
217 class CompoundStatementIndenter {
218 public:
219   CompoundStatementIndenter(UnwrappedLineParser *Parser,
220                             const FormatStyle &Style, unsigned &LineLevel)
221       : CompoundStatementIndenter(Parser, LineLevel,
222                                   Style.BraceWrapping.AfterControlStatement,
223                                   Style.BraceWrapping.IndentBraces) {}
224   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
225                             bool WrapBrace, bool IndentBrace)
226       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
227     if (WrapBrace)
228       Parser->addUnwrappedLine();
229     if (IndentBrace)
230       ++LineLevel;
231   }
232   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
233 
234 private:
235   unsigned &LineLevel;
236   unsigned OldLineLevel;
237 };
238 
239 namespace {
240 
241 class IndexedTokenSource : public FormatTokenSource {
242 public:
243   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
244       : Tokens(Tokens), Position(-1) {}
245 
246   FormatToken *getNextToken() override {
247     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
248       LLVM_DEBUG({
249         llvm::dbgs() << "Next ";
250         dbgToken(Position);
251       });
252       return Tokens[Position];
253     }
254     ++Position;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Next ";
257       dbgToken(Position);
258     });
259     return Tokens[Position];
260   }
261 
262   FormatToken *getPreviousToken() override {
263     return Position > 0 ? Tokens[Position - 1] : nullptr;
264   }
265 
266   FormatToken *peekNextToken() override {
267     int Next = Position + 1;
268     LLVM_DEBUG({
269       llvm::dbgs() << "Peeking ";
270       dbgToken(Next);
271     });
272     return Tokens[Next];
273   }
274 
275   FormatToken *peekNextToken(int N) override {
276     assert(N > 0);
277     int Next = Position + N;
278     LLVM_DEBUG({
279       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
280       dbgToken(Next);
281     });
282     return Tokens[Next];
283   }
284 
285   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
286 
287   unsigned getPosition() override {
288     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
289     assert(Position >= 0);
290     return Position;
291   }
292 
293   FormatToken *setPosition(unsigned P) override {
294     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
295     Position = P;
296     return Tokens[Position];
297   }
298 
299   void reset() { Position = -1; }
300 
301 private:
302   void dbgToken(int Position, llvm::StringRef Indent = "") {
303     FormatToken *Tok = Tokens[Position];
304     llvm::dbgs() << Indent << "[" << Position
305                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
306                  << ", Macro: " << !!Tok->MacroCtx << "\n";
307   }
308 
309   ArrayRef<FormatToken *> Tokens;
310   int Position;
311 };
312 
313 } // end anonymous namespace
314 
315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
316                                          const AdditionalKeywords &Keywords,
317                                          unsigned FirstStartColumn,
318                                          ArrayRef<FormatToken *> Tokens,
319                                          UnwrappedLineConsumer &Callback)
320     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
321       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
322       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
323       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
324       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
325                        ? IG_Rejected
326                        : IG_Inited),
327       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
328 
329 void UnwrappedLineParser::reset() {
330   PPBranchLevel = -1;
331   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
332                      ? IG_Rejected
333                      : IG_Inited;
334   IncludeGuardToken = nullptr;
335   Line.reset(new UnwrappedLine);
336   CommentsBeforeNextToken.clear();
337   FormatTok = nullptr;
338   MustBreakBeforeNextToken = false;
339   PreprocessorDirectives.clear();
340   CurrentLines = &Lines;
341   DeclarationScopeStack.clear();
342   NestedTooDeep.clear();
343   PPStack.clear();
344   Line->FirstStartColumn = FirstStartColumn;
345 }
346 
347 void UnwrappedLineParser::parse() {
348   IndexedTokenSource TokenSource(AllTokens);
349   Line->FirstStartColumn = FirstStartColumn;
350   do {
351     LLVM_DEBUG(llvm::dbgs() << "----\n");
352     reset();
353     Tokens = &TokenSource;
354     TokenSource.reset();
355 
356     readToken();
357     parseFile();
358 
359     // If we found an include guard then all preprocessor directives (other than
360     // the guard) are over-indented by one.
361     if (IncludeGuard == IG_Found)
362       for (auto &Line : Lines)
363         if (Line.InPPDirective && Line.Level > 0)
364           --Line.Level;
365 
366     // Create line with eof token.
367     pushToken(FormatTok);
368     addUnwrappedLine();
369 
370     for (const UnwrappedLine &Line : Lines)
371       Callback.consumeUnwrappedLine(Line);
372 
373     Callback.finishRun();
374     Lines.clear();
375     while (!PPLevelBranchIndex.empty() &&
376            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
377       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
378       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
379     }
380     if (!PPLevelBranchIndex.empty()) {
381       ++PPLevelBranchIndex.back();
382       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
383       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
384     }
385   } while (!PPLevelBranchIndex.empty());
386 }
387 
388 void UnwrappedLineParser::parseFile() {
389   // The top-level context in a file always has declarations, except for pre-
390   // processor directives and JavaScript files.
391   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
392   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
393                                           MustBeDeclaration);
394   if (Style.Language == FormatStyle::LK_TextProto)
395     parseBracedList();
396   else
397     parseLevel(/*HasOpeningBrace=*/false, /*CanContainBracedList=*/true);
398   // Make sure to format the remaining tokens.
399   //
400   // LK_TextProto is special since its top-level is parsed as the body of a
401   // braced list, which does not necessarily have natural line separators such
402   // as a semicolon. Comments after the last entry that have been determined to
403   // not belong to that line, as in:
404   //   key: value
405   //   // endfile comment
406   // do not have a chance to be put on a line of their own until this point.
407   // Here we add this newline before end-of-file comments.
408   if (Style.Language == FormatStyle::LK_TextProto &&
409       !CommentsBeforeNextToken.empty())
410     addUnwrappedLine();
411   flushComments(true);
412   addUnwrappedLine();
413 }
414 
415 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
416   do {
417     switch (FormatTok->Tok.getKind()) {
418     case tok::l_brace:
419       return;
420     default:
421       if (FormatTok->is(Keywords.kw_where)) {
422         addUnwrappedLine();
423         nextToken();
424         parseCSharpGenericTypeConstraint();
425         break;
426       }
427       nextToken();
428       break;
429     }
430   } while (!eof());
431 }
432 
433 void UnwrappedLineParser::parseCSharpAttribute() {
434   int UnpairedSquareBrackets = 1;
435   do {
436     switch (FormatTok->Tok.getKind()) {
437     case tok::r_square:
438       nextToken();
439       --UnpairedSquareBrackets;
440       if (UnpairedSquareBrackets == 0) {
441         addUnwrappedLine();
442         return;
443       }
444       break;
445     case tok::l_square:
446       ++UnpairedSquareBrackets;
447       nextToken();
448       break;
449     default:
450       nextToken();
451       break;
452     }
453   } while (!eof());
454 }
455 
456 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
457   if (!Lines.empty() && Lines.back().InPPDirective)
458     return true;
459 
460   const FormatToken *Previous = Tokens->getPreviousToken();
461   return Previous && Previous->is(tok::comment) &&
462          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
463 }
464 
465 /// \brief Parses a level, that is ???.
466 /// \param HasOpeningBrace If that level is started by an opening brace.
467 /// \param CanContainBracedList If the content can contain (at any level) a
468 /// braced list.
469 /// \param NextLBracesType The type for left brace found in this level.
470 /// \returns true if a simple block, or false otherwise. (A simple block has a
471 /// single statement.)
472 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace,
473                                      bool CanContainBracedList,
474                                      IfStmtKind *IfKind,
475                                      TokenType NextLBracesType) {
476   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
477                                   ? TT_BracedListLBrace
478                                   : TT_Unknown;
479   const bool IsPrecededByCommentOrPPDirective =
480       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
481   bool HasLabel = false;
482   unsigned StatementCount = 0;
483   bool SwitchLabelEncountered = false;
484   do {
485     if (FormatTok->getType() == TT_AttributeMacro) {
486       nextToken();
487       continue;
488     }
489     tok::TokenKind kind = FormatTok->Tok.getKind();
490     if (FormatTok->getType() == TT_MacroBlockBegin)
491       kind = tok::l_brace;
492     else if (FormatTok->getType() == TT_MacroBlockEnd)
493       kind = tok::r_brace;
494 
495     auto ParseDefault = [this, HasOpeningBrace, IfKind, NextLevelLBracesType,
496                          &HasLabel, &StatementCount] {
497       parseStructuralElement(IfKind, !HasOpeningBrace, NextLevelLBracesType,
498                              HasLabel ? nullptr : &HasLabel);
499       ++StatementCount;
500       assert(StatementCount > 0 && "StatementCount overflow!");
501     };
502 
503     switch (kind) {
504     case tok::comment:
505       nextToken();
506       addUnwrappedLine();
507       break;
508     case tok::l_brace:
509       if (NextLBracesType != TT_Unknown)
510         FormatTok->setFinalizedType(NextLBracesType);
511       else if (FormatTok->Previous &&
512                FormatTok->Previous->ClosesRequiresClause) {
513         // We need the 'default' case here to correctly parse a function
514         // l_brace.
515         ParseDefault();
516         continue;
517       }
518       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
519           tryToParseBracedList())
520         continue;
521       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
522                  /*MunchSemi=*/true, /*UnindentWhitesmithBraces=*/false,
523                  CanContainBracedList,
524                  /*NextLBracesType=*/NextLBracesType);
525       ++StatementCount;
526       assert(StatementCount > 0 && "StatementCount overflow!");
527       addUnwrappedLine();
528       break;
529     case tok::r_brace:
530       if (HasOpeningBrace) {
531         if (!Style.RemoveBracesLLVM)
532           return false;
533         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
534             IsPrecededByCommentOrPPDirective ||
535             precededByCommentOrPPDirective())
536           return false;
537         const FormatToken *Next = Tokens->peekNextToken();
538         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
539       }
540       nextToken();
541       addUnwrappedLine();
542       break;
543     case tok::kw_default: {
544       unsigned StoredPosition = Tokens->getPosition();
545       FormatToken *Next;
546       do {
547         Next = Tokens->getNextToken();
548         assert(Next);
549       } while (Next->is(tok::comment));
550       FormatTok = Tokens->setPosition(StoredPosition);
551       if (Next->isNot(tok::colon)) {
552         // default not followed by ':' is not a case label; treat it like
553         // an identifier.
554         parseStructuralElement();
555         break;
556       }
557       // Else, if it is 'default:', fall through to the case handling.
558       LLVM_FALLTHROUGH;
559     }
560     case tok::kw_case:
561       if (Style.isJavaScript() && Line->MustBeDeclaration) {
562         // A 'case: string' style field declaration.
563         parseStructuralElement();
564         break;
565       }
566       if (!SwitchLabelEncountered &&
567           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
568         ++Line->Level;
569       SwitchLabelEncountered = true;
570       parseStructuralElement();
571       break;
572     case tok::l_square:
573       if (Style.isCSharp()) {
574         nextToken();
575         parseCSharpAttribute();
576         break;
577       }
578       if (handleCppAttributes())
579         break;
580       LLVM_FALLTHROUGH;
581     default:
582       ParseDefault();
583       break;
584     }
585   } while (!eof());
586   return false;
587 }
588 
589 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
590   // We'll parse forward through the tokens until we hit
591   // a closing brace or eof - note that getNextToken() will
592   // parse macros, so this will magically work inside macro
593   // definitions, too.
594   unsigned StoredPosition = Tokens->getPosition();
595   FormatToken *Tok = FormatTok;
596   const FormatToken *PrevTok = Tok->Previous;
597   // Keep a stack of positions of lbrace tokens. We will
598   // update information about whether an lbrace starts a
599   // braced init list or a different block during the loop.
600   SmallVector<FormatToken *, 8> LBraceStack;
601   assert(Tok->is(tok::l_brace));
602   do {
603     // Get next non-comment token.
604     FormatToken *NextTok;
605     do {
606       NextTok = Tokens->getNextToken();
607     } while (NextTok->is(tok::comment));
608 
609     switch (Tok->Tok.getKind()) {
610     case tok::l_brace:
611       if (Style.isJavaScript() && PrevTok) {
612         if (PrevTok->isOneOf(tok::colon, tok::less))
613           // A ':' indicates this code is in a type, or a braced list
614           // following a label in an object literal ({a: {b: 1}}).
615           // A '<' could be an object used in a comparison, but that is nonsense
616           // code (can never return true), so more likely it is a generic type
617           // argument (`X<{a: string; b: number}>`).
618           // The code below could be confused by semicolons between the
619           // individual members in a type member list, which would normally
620           // trigger BK_Block. In both cases, this must be parsed as an inline
621           // braced init.
622           Tok->setBlockKind(BK_BracedInit);
623         else if (PrevTok->is(tok::r_paren))
624           // `) { }` can only occur in function or method declarations in JS.
625           Tok->setBlockKind(BK_Block);
626       } else {
627         Tok->setBlockKind(BK_Unknown);
628       }
629       LBraceStack.push_back(Tok);
630       break;
631     case tok::r_brace:
632       if (LBraceStack.empty())
633         break;
634       if (LBraceStack.back()->is(BK_Unknown)) {
635         bool ProbablyBracedList = false;
636         if (Style.Language == FormatStyle::LK_Proto) {
637           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
638         } else {
639           // Skip NextTok over preprocessor lines, otherwise we may not
640           // properly diagnose the block as a braced intializer
641           // if the comma separator appears after the pp directive.
642           while (NextTok->is(tok::hash)) {
643             ScopedMacroState MacroState(*Line, Tokens, NextTok);
644             do {
645               NextTok = Tokens->getNextToken();
646             } while (NextTok->isNot(tok::eof));
647           }
648 
649           // Using OriginalColumn to distinguish between ObjC methods and
650           // binary operators is a bit hacky.
651           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
652                                   NextTok->OriginalColumn == 0;
653 
654           // Try to detect a braced list. Note that regardless how we mark inner
655           // braces here, we will overwrite the BlockKind later if we parse a
656           // braced list (where all blocks inside are by default braced lists),
657           // or when we explicitly detect blocks (for example while parsing
658           // lambdas).
659 
660           // If we already marked the opening brace as braced list, the closing
661           // must also be part of it.
662           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
663 
664           ProbablyBracedList = ProbablyBracedList ||
665                                (Style.isJavaScript() &&
666                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
667                                                  Keywords.kw_as));
668           ProbablyBracedList = ProbablyBracedList ||
669                                (Style.isCpp() && NextTok->is(tok::l_paren));
670 
671           // If there is a comma, semicolon or right paren after the closing
672           // brace, we assume this is a braced initializer list.
673           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
674           // braced list in JS.
675           ProbablyBracedList =
676               ProbablyBracedList ||
677               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
678                                tok::r_paren, tok::r_square, tok::l_brace,
679                                tok::ellipsis);
680 
681           ProbablyBracedList =
682               ProbablyBracedList ||
683               (NextTok->is(tok::identifier) &&
684                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
685 
686           ProbablyBracedList = ProbablyBracedList ||
687                                (NextTok->is(tok::semi) &&
688                                 (!ExpectClassBody || LBraceStack.size() != 1));
689 
690           ProbablyBracedList =
691               ProbablyBracedList ||
692               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
693 
694           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
695             // We can have an array subscript after a braced init
696             // list, but C++11 attributes are expected after blocks.
697             NextTok = Tokens->getNextToken();
698             ProbablyBracedList = NextTok->isNot(tok::l_square);
699           }
700         }
701         if (ProbablyBracedList) {
702           Tok->setBlockKind(BK_BracedInit);
703           LBraceStack.back()->setBlockKind(BK_BracedInit);
704         } else {
705           Tok->setBlockKind(BK_Block);
706           LBraceStack.back()->setBlockKind(BK_Block);
707         }
708       }
709       LBraceStack.pop_back();
710       break;
711     case tok::identifier:
712       if (!Tok->is(TT_StatementMacro))
713         break;
714       LLVM_FALLTHROUGH;
715     case tok::at:
716     case tok::semi:
717     case tok::kw_if:
718     case tok::kw_while:
719     case tok::kw_for:
720     case tok::kw_switch:
721     case tok::kw_try:
722     case tok::kw___try:
723       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
724         LBraceStack.back()->setBlockKind(BK_Block);
725       break;
726     default:
727       break;
728     }
729     PrevTok = Tok;
730     Tok = NextTok;
731   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
732 
733   // Assume other blocks for all unclosed opening braces.
734   for (FormatToken *LBrace : LBraceStack)
735     if (LBrace->is(BK_Unknown))
736       LBrace->setBlockKind(BK_Block);
737 
738   FormatTok = Tokens->setPosition(StoredPosition);
739 }
740 
741 template <class T>
742 static inline void hash_combine(std::size_t &seed, const T &v) {
743   std::hash<T> hasher;
744   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
745 }
746 
747 size_t UnwrappedLineParser::computePPHash() const {
748   size_t h = 0;
749   for (const auto &i : PPStack) {
750     hash_combine(h, size_t(i.Kind));
751     hash_combine(h, i.Line);
752   }
753   return h;
754 }
755 
756 // Checks whether \p ParsedLine might fit on a single line. We must clone the
757 // tokens of \p ParsedLine before running the token annotator on it so that we
758 // can restore them afterward.
759 bool UnwrappedLineParser::mightFitOnOneLine(UnwrappedLine &ParsedLine) const {
760   const auto ColumnLimit = Style.ColumnLimit;
761   if (ColumnLimit == 0)
762     return true;
763 
764   auto &Tokens = ParsedLine.Tokens;
765   assert(!Tokens.empty());
766   const auto *LastToken = Tokens.back().Tok;
767   assert(LastToken);
768 
769   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
770 
771   int Index = 0;
772   for (const auto &Token : Tokens) {
773     assert(Token.Tok);
774     auto &SavedToken = SavedTokens[Index++];
775     SavedToken.Tok = new FormatToken;
776     SavedToken.Tok->copyFrom(*Token.Tok);
777     SavedToken.Children = std::move(Token.Children);
778   }
779 
780   AnnotatedLine Line(ParsedLine);
781   assert(Line.Last == LastToken);
782 
783   TokenAnnotator Annotator(Style, Keywords);
784   Annotator.annotate(Line);
785   Annotator.calculateFormattingInformation(Line);
786 
787   const int Length = LastToken->TotalLength;
788 
789   Index = 0;
790   for (auto &Token : Tokens) {
791     const auto &SavedToken = SavedTokens[Index++];
792     Token.Tok->copyFrom(*SavedToken.Tok);
793     Token.Children = std::move(SavedToken.Children);
794     delete SavedToken.Tok;
795   }
796 
797   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
798 }
799 
800 UnwrappedLineParser::IfStmtKind
801 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
802                                 bool MunchSemi, bool UnindentWhitesmithsBraces,
803                                 bool CanContainBracedList,
804                                 TokenType NextLBracesType) {
805   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
806          "'{' or macro block token expected");
807   FormatToken *Tok = FormatTok;
808   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
809   FormatTok->setBlockKind(BK_Block);
810 
811   // For Whitesmiths mode, jump to the next level prior to skipping over the
812   // braces.
813   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
814     ++Line->Level;
815 
816   size_t PPStartHash = computePPHash();
817 
818   unsigned InitialLevel = Line->Level;
819   nextToken(/*LevelDifference=*/AddLevels);
820 
821   if (MacroBlock && FormatTok->is(tok::l_paren))
822     parseParens();
823 
824   size_t NbPreprocessorDirectives =
825       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
826   addUnwrappedLine();
827   size_t OpeningLineIndex =
828       CurrentLines->empty()
829           ? (UnwrappedLine::kInvalidIndex)
830           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
831 
832   // Whitesmiths is weird here. The brace needs to be indented for the namespace
833   // block, but the block itself may not be indented depending on the style
834   // settings. This allows the format to back up one level in those cases.
835   if (UnindentWhitesmithsBraces)
836     --Line->Level;
837 
838   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
839                                           MustBeDeclaration);
840   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
841     Line->Level += AddLevels;
842 
843   IfStmtKind IfKind = IfStmtKind::NotIf;
844   const bool SimpleBlock = parseLevel(
845       /*HasOpeningBrace=*/true, CanContainBracedList, &IfKind, NextLBracesType);
846 
847   if (eof())
848     return IfKind;
849 
850   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
851                  : !FormatTok->is(tok::r_brace)) {
852     Line->Level = InitialLevel;
853     FormatTok->setBlockKind(BK_Block);
854     return IfKind;
855   }
856 
857   if (SimpleBlock && Tok->is(tok::l_brace)) {
858     assert(FormatTok->is(tok::r_brace));
859     const FormatToken *Previous = Tokens->getPreviousToken();
860     assert(Previous);
861     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
862       assert(!CurrentLines->empty());
863       if (mightFitOnOneLine(CurrentLines->back())) {
864         Tok->MatchingParen = FormatTok;
865         FormatTok->MatchingParen = Tok;
866       }
867     }
868   }
869 
870   size_t PPEndHash = computePPHash();
871 
872   // Munch the closing brace.
873   nextToken(/*LevelDifference=*/-AddLevels);
874 
875   if (MacroBlock && FormatTok->is(tok::l_paren))
876     parseParens();
877 
878   if (FormatTok->is(tok::kw_noexcept)) {
879     // A noexcept in a requires expression.
880     nextToken();
881   }
882 
883   if (FormatTok->is(tok::arrow)) {
884     // Following the } or noexcept we can find a trailing return type arrow
885     // as part of an implicit conversion constraint.
886     nextToken();
887     parseStructuralElement();
888   }
889 
890   if (MunchSemi && FormatTok->is(tok::semi))
891     nextToken();
892 
893   Line->Level = InitialLevel;
894 
895   if (PPStartHash == PPEndHash) {
896     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
897     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
898       // Update the opening line to add the forward reference as well
899       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
900           CurrentLines->size() - 1;
901     }
902   }
903 
904   return IfKind;
905 }
906 
907 static bool isGoogScope(const UnwrappedLine &Line) {
908   // FIXME: Closure-library specific stuff should not be hard-coded but be
909   // configurable.
910   if (Line.Tokens.size() < 4)
911     return false;
912   auto I = Line.Tokens.begin();
913   if (I->Tok->TokenText != "goog")
914     return false;
915   ++I;
916   if (I->Tok->isNot(tok::period))
917     return false;
918   ++I;
919   if (I->Tok->TokenText != "scope")
920     return false;
921   ++I;
922   return I->Tok->is(tok::l_paren);
923 }
924 
925 static bool isIIFE(const UnwrappedLine &Line,
926                    const AdditionalKeywords &Keywords) {
927   // Look for the start of an immediately invoked anonymous function.
928   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
929   // This is commonly done in JavaScript to create a new, anonymous scope.
930   // Example: (function() { ... })()
931   if (Line.Tokens.size() < 3)
932     return false;
933   auto I = Line.Tokens.begin();
934   if (I->Tok->isNot(tok::l_paren))
935     return false;
936   ++I;
937   if (I->Tok->isNot(Keywords.kw_function))
938     return false;
939   ++I;
940   return I->Tok->is(tok::l_paren);
941 }
942 
943 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
944                                    const FormatToken &InitialToken) {
945   tok::TokenKind Kind = InitialToken.Tok.getKind();
946   if (InitialToken.is(TT_NamespaceMacro))
947     Kind = tok::kw_namespace;
948 
949   switch (Kind) {
950   case tok::kw_namespace:
951     return Style.BraceWrapping.AfterNamespace;
952   case tok::kw_class:
953     return Style.BraceWrapping.AfterClass;
954   case tok::kw_union:
955     return Style.BraceWrapping.AfterUnion;
956   case tok::kw_struct:
957     return Style.BraceWrapping.AfterStruct;
958   case tok::kw_enum:
959     return Style.BraceWrapping.AfterEnum;
960   default:
961     return false;
962   }
963 }
964 
965 void UnwrappedLineParser::parseChildBlock(
966     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
967   FormatTok->setBlockKind(BK_Block);
968   nextToken();
969   {
970     bool SkipIndent = (Style.isJavaScript() &&
971                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
972     ScopedLineState LineState(*this);
973     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
974                                             /*MustBeDeclaration=*/false);
975     Line->Level += SkipIndent ? 0 : 1;
976     parseLevel(/*HasOpeningBrace=*/true, CanContainBracedList,
977                /*IfKind=*/nullptr, NextLBracesType);
978     flushComments(isOnNewLine(*FormatTok));
979     Line->Level -= SkipIndent ? 0 : 1;
980   }
981   nextToken();
982 }
983 
984 void UnwrappedLineParser::parsePPDirective() {
985   assert(FormatTok->is(tok::hash) && "'#' expected");
986   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
987 
988   nextToken();
989 
990   if (!FormatTok->Tok.getIdentifierInfo()) {
991     parsePPUnknown();
992     return;
993   }
994 
995   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
996   case tok::pp_define:
997     parsePPDefine();
998     return;
999   case tok::pp_if:
1000     parsePPIf(/*IfDef=*/false);
1001     break;
1002   case tok::pp_ifdef:
1003   case tok::pp_ifndef:
1004     parsePPIf(/*IfDef=*/true);
1005     break;
1006   case tok::pp_else:
1007     parsePPElse();
1008     break;
1009   case tok::pp_elifdef:
1010   case tok::pp_elifndef:
1011   case tok::pp_elif:
1012     parsePPElIf();
1013     break;
1014   case tok::pp_endif:
1015     parsePPEndIf();
1016     break;
1017   default:
1018     parsePPUnknown();
1019     break;
1020   }
1021 }
1022 
1023 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1024   size_t Line = CurrentLines->size();
1025   if (CurrentLines == &PreprocessorDirectives)
1026     Line += Lines.size();
1027 
1028   if (Unreachable ||
1029       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
1030     PPStack.push_back({PP_Unreachable, Line});
1031   else
1032     PPStack.push_back({PP_Conditional, Line});
1033 }
1034 
1035 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1036   ++PPBranchLevel;
1037   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1038   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1039     PPLevelBranchIndex.push_back(0);
1040     PPLevelBranchCount.push_back(0);
1041   }
1042   PPChainBranchIndex.push(0);
1043   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1044   conditionalCompilationCondition(Unreachable || Skip);
1045 }
1046 
1047 void UnwrappedLineParser::conditionalCompilationAlternative() {
1048   if (!PPStack.empty())
1049     PPStack.pop_back();
1050   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1051   if (!PPChainBranchIndex.empty())
1052     ++PPChainBranchIndex.top();
1053   conditionalCompilationCondition(
1054       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1055       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1056 }
1057 
1058 void UnwrappedLineParser::conditionalCompilationEnd() {
1059   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1060   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1061     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1062       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1063   }
1064   // Guard against #endif's without #if.
1065   if (PPBranchLevel > -1)
1066     --PPBranchLevel;
1067   if (!PPChainBranchIndex.empty())
1068     PPChainBranchIndex.pop();
1069   if (!PPStack.empty())
1070     PPStack.pop_back();
1071 }
1072 
1073 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1074   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1075   nextToken();
1076   bool Unreachable = false;
1077   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1078     Unreachable = true;
1079   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1080     Unreachable = true;
1081   conditionalCompilationStart(Unreachable);
1082   FormatToken *IfCondition = FormatTok;
1083   // If there's a #ifndef on the first line, and the only lines before it are
1084   // comments, it could be an include guard.
1085   bool MaybeIncludeGuard = IfNDef;
1086   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
1087     for (auto &Line : Lines) {
1088       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1089         MaybeIncludeGuard = false;
1090         IncludeGuard = IG_Rejected;
1091         break;
1092       }
1093     }
1094   --PPBranchLevel;
1095   parsePPUnknown();
1096   ++PPBranchLevel;
1097   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1098     IncludeGuard = IG_IfNdefed;
1099     IncludeGuardToken = IfCondition;
1100   }
1101 }
1102 
1103 void UnwrappedLineParser::parsePPElse() {
1104   // If a potential include guard has an #else, it's not an include guard.
1105   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1106     IncludeGuard = IG_Rejected;
1107   conditionalCompilationAlternative();
1108   if (PPBranchLevel > -1)
1109     --PPBranchLevel;
1110   parsePPUnknown();
1111   ++PPBranchLevel;
1112 }
1113 
1114 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1115 
1116 void UnwrappedLineParser::parsePPEndIf() {
1117   conditionalCompilationEnd();
1118   parsePPUnknown();
1119   // If the #endif of a potential include guard is the last thing in the file,
1120   // then we found an include guard.
1121   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1122       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1123     IncludeGuard = IG_Found;
1124 }
1125 
1126 void UnwrappedLineParser::parsePPDefine() {
1127   nextToken();
1128 
1129   if (!FormatTok->Tok.getIdentifierInfo()) {
1130     IncludeGuard = IG_Rejected;
1131     IncludeGuardToken = nullptr;
1132     parsePPUnknown();
1133     return;
1134   }
1135 
1136   if (IncludeGuard == IG_IfNdefed &&
1137       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1138     IncludeGuard = IG_Defined;
1139     IncludeGuardToken = nullptr;
1140     for (auto &Line : Lines) {
1141       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1142         IncludeGuard = IG_Rejected;
1143         break;
1144       }
1145     }
1146   }
1147 
1148   // In the context of a define, even keywords should be treated as normal
1149   // identifiers. Setting the kind to identifier is not enough, because we need
1150   // to treat additional keywords like __except as well, which are already
1151   // identifiers. Setting the identifier info to null interferes with include
1152   // guard processing above, and changes preprocessing nesting.
1153   FormatTok->Tok.setKind(tok::identifier);
1154   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1155   nextToken();
1156   if (FormatTok->Tok.getKind() == tok::l_paren &&
1157       !FormatTok->hasWhitespaceBefore())
1158     parseParens();
1159   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1160     Line->Level += PPBranchLevel + 1;
1161   addUnwrappedLine();
1162   ++Line->Level;
1163 
1164   // Errors during a preprocessor directive can only affect the layout of the
1165   // preprocessor directive, and thus we ignore them. An alternative approach
1166   // would be to use the same approach we use on the file level (no
1167   // re-indentation if there was a structural error) within the macro
1168   // definition.
1169   parseFile();
1170 }
1171 
1172 void UnwrappedLineParser::parsePPUnknown() {
1173   do {
1174     nextToken();
1175   } while (!eof());
1176   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1177     Line->Level += PPBranchLevel + 1;
1178   addUnwrappedLine();
1179 }
1180 
1181 // Here we exclude certain tokens that are not usually the first token in an
1182 // unwrapped line. This is used in attempt to distinguish macro calls without
1183 // trailing semicolons from other constructs split to several lines.
1184 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1185   // Semicolon can be a null-statement, l_square can be a start of a macro or
1186   // a C++11 attribute, but this doesn't seem to be common.
1187   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1188          Tok.isNot(TT_AttributeSquare) &&
1189          // Tokens that can only be used as binary operators and a part of
1190          // overloaded operator names.
1191          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1192          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1193          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1194          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1195          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1196          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1197          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1198          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1199          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1200          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1201          Tok.isNot(tok::lesslessequal) &&
1202          // Colon is used in labels, base class lists, initializer lists,
1203          // range-based for loops, ternary operator, but should never be the
1204          // first token in an unwrapped line.
1205          Tok.isNot(tok::colon) &&
1206          // 'noexcept' is a trailing annotation.
1207          Tok.isNot(tok::kw_noexcept);
1208 }
1209 
1210 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1211                           const FormatToken *FormatTok) {
1212   // FIXME: This returns true for C/C++ keywords like 'struct'.
1213   return FormatTok->is(tok::identifier) &&
1214          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1215           !FormatTok->isOneOf(
1216               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1217               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1218               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1219               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1220               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1221               Keywords.kw_instanceof, Keywords.kw_interface,
1222               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1223 }
1224 
1225 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1226                                  const FormatToken *FormatTok) {
1227   return FormatTok->Tok.isLiteral() ||
1228          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1229          mustBeJSIdent(Keywords, FormatTok);
1230 }
1231 
1232 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1233 // when encountered after a value (see mustBeJSIdentOrValue).
1234 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1235                            const FormatToken *FormatTok) {
1236   return FormatTok->isOneOf(
1237       tok::kw_return, Keywords.kw_yield,
1238       // conditionals
1239       tok::kw_if, tok::kw_else,
1240       // loops
1241       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1242       // switch/case
1243       tok::kw_switch, tok::kw_case,
1244       // exceptions
1245       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1246       // declaration
1247       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1248       Keywords.kw_async, Keywords.kw_function,
1249       // import/export
1250       Keywords.kw_import, tok::kw_export);
1251 }
1252 
1253 // Checks whether a token is a type in K&R C (aka C78).
1254 static bool isC78Type(const FormatToken &Tok) {
1255   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1256                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1257                      tok::identifier);
1258 }
1259 
1260 // This function checks whether a token starts the first parameter declaration
1261 // in a K&R C (aka C78) function definition, e.g.:
1262 //   int f(a, b)
1263 //   short a, b;
1264 //   {
1265 //      return a + b;
1266 //   }
1267 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1268                                const FormatToken *FuncName) {
1269   assert(Tok);
1270   assert(Next);
1271   assert(FuncName);
1272 
1273   if (FuncName->isNot(tok::identifier))
1274     return false;
1275 
1276   const FormatToken *Prev = FuncName->Previous;
1277   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1278     return false;
1279 
1280   if (!isC78Type(*Tok) &&
1281       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1282     return false;
1283 
1284   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1285     return false;
1286 
1287   Tok = Tok->Previous;
1288   if (!Tok || Tok->isNot(tok::r_paren))
1289     return false;
1290 
1291   Tok = Tok->Previous;
1292   if (!Tok || Tok->isNot(tok::identifier))
1293     return false;
1294 
1295   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1296 }
1297 
1298 void UnwrappedLineParser::parseModuleImport() {
1299   nextToken();
1300   while (!eof()) {
1301     if (FormatTok->is(tok::colon)) {
1302       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1303     }
1304     // Handle import <foo/bar.h> as we would an include statement.
1305     else if (FormatTok->is(tok::less)) {
1306       nextToken();
1307       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1308         // Mark tokens up to the trailing line comments as implicit string
1309         // literals.
1310         if (FormatTok->isNot(tok::comment) &&
1311             !FormatTok->TokenText.startswith("//"))
1312           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1313         nextToken();
1314       }
1315     }
1316     if (FormatTok->is(tok::semi)) {
1317       nextToken();
1318       break;
1319     }
1320     nextToken();
1321   }
1322 
1323   addUnwrappedLine();
1324 }
1325 
1326 // readTokenWithJavaScriptASI reads the next token and terminates the current
1327 // line if JavaScript Automatic Semicolon Insertion must
1328 // happen between the current token and the next token.
1329 //
1330 // This method is conservative - it cannot cover all edge cases of JavaScript,
1331 // but only aims to correctly handle certain well known cases. It *must not*
1332 // return true in speculative cases.
1333 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1334   FormatToken *Previous = FormatTok;
1335   readToken();
1336   FormatToken *Next = FormatTok;
1337 
1338   bool IsOnSameLine =
1339       CommentsBeforeNextToken.empty()
1340           ? Next->NewlinesBefore == 0
1341           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1342   if (IsOnSameLine)
1343     return;
1344 
1345   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1346   bool PreviousStartsTemplateExpr =
1347       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1348   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1349     // If the line contains an '@' sign, the previous token might be an
1350     // annotation, which can precede another identifier/value.
1351     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1352       return LineNode.Tok->is(tok::at);
1353     });
1354     if (HasAt)
1355       return;
1356   }
1357   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1358     return addUnwrappedLine();
1359   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1360   bool NextEndsTemplateExpr =
1361       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1362   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1363       (PreviousMustBeValue ||
1364        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1365                          tok::minusminus)))
1366     return addUnwrappedLine();
1367   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1368       isJSDeclOrStmt(Keywords, Next))
1369     return addUnwrappedLine();
1370 }
1371 
1372 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1373                                                  bool IsTopLevel,
1374                                                  TokenType NextLBracesType,
1375                                                  bool *HasLabel) {
1376   if (Style.Language == FormatStyle::LK_TableGen &&
1377       FormatTok->is(tok::pp_include)) {
1378     nextToken();
1379     if (FormatTok->is(tok::string_literal))
1380       nextToken();
1381     addUnwrappedLine();
1382     return;
1383   }
1384   switch (FormatTok->Tok.getKind()) {
1385   case tok::kw_asm:
1386     nextToken();
1387     if (FormatTok->is(tok::l_brace)) {
1388       FormatTok->setFinalizedType(TT_InlineASMBrace);
1389       nextToken();
1390       while (FormatTok && FormatTok->isNot(tok::eof)) {
1391         if (FormatTok->is(tok::r_brace)) {
1392           FormatTok->setFinalizedType(TT_InlineASMBrace);
1393           nextToken();
1394           addUnwrappedLine();
1395           break;
1396         }
1397         FormatTok->Finalized = true;
1398         nextToken();
1399       }
1400     }
1401     break;
1402   case tok::kw_namespace:
1403     parseNamespace();
1404     return;
1405   case tok::kw_public:
1406   case tok::kw_protected:
1407   case tok::kw_private:
1408     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1409         Style.isCSharp())
1410       nextToken();
1411     else
1412       parseAccessSpecifier();
1413     return;
1414   case tok::kw_if:
1415     if (Style.isJavaScript() && Line->MustBeDeclaration)
1416       // field/method declaration.
1417       break;
1418     parseIfThenElse(IfKind);
1419     return;
1420   case tok::kw_for:
1421   case tok::kw_while:
1422     if (Style.isJavaScript() && Line->MustBeDeclaration)
1423       // field/method declaration.
1424       break;
1425     parseForOrWhileLoop();
1426     return;
1427   case tok::kw_do:
1428     if (Style.isJavaScript() && Line->MustBeDeclaration)
1429       // field/method declaration.
1430       break;
1431     parseDoWhile();
1432     return;
1433   case tok::kw_switch:
1434     if (Style.isJavaScript() && Line->MustBeDeclaration)
1435       // 'switch: string' field declaration.
1436       break;
1437     parseSwitch();
1438     return;
1439   case tok::kw_default:
1440     if (Style.isJavaScript() && Line->MustBeDeclaration)
1441       // 'default: string' field declaration.
1442       break;
1443     nextToken();
1444     if (FormatTok->is(tok::colon)) {
1445       parseLabel();
1446       return;
1447     }
1448     // e.g. "default void f() {}" in a Java interface.
1449     break;
1450   case tok::kw_case:
1451     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1452       // 'case: string' field declaration.
1453       nextToken();
1454       break;
1455     }
1456     parseCaseLabel();
1457     return;
1458   case tok::kw_try:
1459   case tok::kw___try:
1460     if (Style.isJavaScript() && Line->MustBeDeclaration)
1461       // field/method declaration.
1462       break;
1463     parseTryCatch();
1464     return;
1465   case tok::kw_extern:
1466     nextToken();
1467     if (FormatTok->is(tok::string_literal)) {
1468       nextToken();
1469       if (FormatTok->is(tok::l_brace)) {
1470         if (Style.BraceWrapping.AfterExternBlock)
1471           addUnwrappedLine();
1472         // Either we indent or for backwards compatibility we follow the
1473         // AfterExternBlock style.
1474         unsigned AddLevels =
1475             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1476                     (Style.BraceWrapping.AfterExternBlock &&
1477                      Style.IndentExternBlock ==
1478                          FormatStyle::IEBS_AfterExternBlock)
1479                 ? 1u
1480                 : 0u;
1481         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1482         addUnwrappedLine();
1483         return;
1484       }
1485     }
1486     break;
1487   case tok::kw_export:
1488     if (Style.isJavaScript()) {
1489       parseJavaScriptEs6ImportExport();
1490       return;
1491     }
1492     if (!Style.isCpp())
1493       break;
1494     // Handle C++ "(inline|export) namespace".
1495     LLVM_FALLTHROUGH;
1496   case tok::kw_inline:
1497     nextToken();
1498     if (FormatTok->is(tok::kw_namespace)) {
1499       parseNamespace();
1500       return;
1501     }
1502     break;
1503   case tok::identifier:
1504     if (FormatTok->is(TT_ForEachMacro)) {
1505       parseForOrWhileLoop();
1506       return;
1507     }
1508     if (FormatTok->is(TT_MacroBlockBegin)) {
1509       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1510                  /*MunchSemi=*/false);
1511       return;
1512     }
1513     if (FormatTok->is(Keywords.kw_import)) {
1514       if (Style.isJavaScript()) {
1515         parseJavaScriptEs6ImportExport();
1516         return;
1517       }
1518       if (Style.Language == FormatStyle::LK_Proto) {
1519         nextToken();
1520         if (FormatTok->is(tok::kw_public))
1521           nextToken();
1522         if (!FormatTok->is(tok::string_literal))
1523           return;
1524         nextToken();
1525         if (FormatTok->is(tok::semi))
1526           nextToken();
1527         addUnwrappedLine();
1528         return;
1529       }
1530       if (Style.isCpp()) {
1531         parseModuleImport();
1532         return;
1533       }
1534     }
1535     if (Style.isCpp() &&
1536         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1537                            Keywords.kw_slots, Keywords.kw_qslots)) {
1538       nextToken();
1539       if (FormatTok->is(tok::colon)) {
1540         nextToken();
1541         addUnwrappedLine();
1542         return;
1543       }
1544     }
1545     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1546       parseStatementMacro();
1547       return;
1548     }
1549     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1550       parseNamespace();
1551       return;
1552     }
1553     // In all other cases, parse the declaration.
1554     break;
1555   default:
1556     break;
1557   }
1558   do {
1559     const FormatToken *Previous = FormatTok->Previous;
1560     switch (FormatTok->Tok.getKind()) {
1561     case tok::at:
1562       nextToken();
1563       if (FormatTok->is(tok::l_brace)) {
1564         nextToken();
1565         parseBracedList();
1566         break;
1567       } else if (Style.Language == FormatStyle::LK_Java &&
1568                  FormatTok->is(Keywords.kw_interface)) {
1569         nextToken();
1570         break;
1571       }
1572       switch (FormatTok->Tok.getObjCKeywordID()) {
1573       case tok::objc_public:
1574       case tok::objc_protected:
1575       case tok::objc_package:
1576       case tok::objc_private:
1577         return parseAccessSpecifier();
1578       case tok::objc_interface:
1579       case tok::objc_implementation:
1580         return parseObjCInterfaceOrImplementation();
1581       case tok::objc_protocol:
1582         if (parseObjCProtocol())
1583           return;
1584         break;
1585       case tok::objc_end:
1586         return; // Handled by the caller.
1587       case tok::objc_optional:
1588       case tok::objc_required:
1589         nextToken();
1590         addUnwrappedLine();
1591         return;
1592       case tok::objc_autoreleasepool:
1593         nextToken();
1594         if (FormatTok->is(tok::l_brace)) {
1595           if (Style.BraceWrapping.AfterControlStatement ==
1596               FormatStyle::BWACS_Always)
1597             addUnwrappedLine();
1598           parseBlock();
1599         }
1600         addUnwrappedLine();
1601         return;
1602       case tok::objc_synchronized:
1603         nextToken();
1604         if (FormatTok->is(tok::l_paren))
1605           // Skip synchronization object
1606           parseParens();
1607         if (FormatTok->is(tok::l_brace)) {
1608           if (Style.BraceWrapping.AfterControlStatement ==
1609               FormatStyle::BWACS_Always)
1610             addUnwrappedLine();
1611           parseBlock();
1612         }
1613         addUnwrappedLine();
1614         return;
1615       case tok::objc_try:
1616         // This branch isn't strictly necessary (the kw_try case below would
1617         // do this too after the tok::at is parsed above).  But be explicit.
1618         parseTryCatch();
1619         return;
1620       default:
1621         break;
1622       }
1623       break;
1624     case tok::kw_concept:
1625       parseConcept();
1626       return;
1627     case tok::kw_requires: {
1628       if (Style.isCpp()) {
1629         bool ParsedClause = parseRequires();
1630         if (ParsedClause)
1631           return;
1632       } else {
1633         nextToken();
1634       }
1635       break;
1636     }
1637     case tok::kw_enum:
1638       // Ignore if this is part of "template <enum ...".
1639       if (Previous && Previous->is(tok::less)) {
1640         nextToken();
1641         break;
1642       }
1643 
1644       // parseEnum falls through and does not yet add an unwrapped line as an
1645       // enum definition can start a structural element.
1646       if (!parseEnum())
1647         break;
1648       // This only applies for C++.
1649       if (!Style.isCpp()) {
1650         addUnwrappedLine();
1651         return;
1652       }
1653       break;
1654     case tok::kw_typedef:
1655       nextToken();
1656       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1657                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1658                              Keywords.kw_CF_CLOSED_ENUM,
1659                              Keywords.kw_NS_CLOSED_ENUM))
1660         parseEnum();
1661       break;
1662     case tok::kw_struct:
1663     case tok::kw_union:
1664     case tok::kw_class:
1665       if (parseStructLike())
1666         return;
1667       break;
1668     case tok::period:
1669       nextToken();
1670       // In Java, classes have an implicit static member "class".
1671       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1672           FormatTok->is(tok::kw_class))
1673         nextToken();
1674       if (Style.isJavaScript() && FormatTok &&
1675           FormatTok->Tok.getIdentifierInfo())
1676         // JavaScript only has pseudo keywords, all keywords are allowed to
1677         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1678         nextToken();
1679       break;
1680     case tok::semi:
1681       nextToken();
1682       addUnwrappedLine();
1683       return;
1684     case tok::r_brace:
1685       addUnwrappedLine();
1686       return;
1687     case tok::l_paren: {
1688       parseParens();
1689       // Break the unwrapped line if a K&R C function definition has a parameter
1690       // declaration.
1691       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1692         break;
1693       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1694         addUnwrappedLine();
1695         return;
1696       }
1697       break;
1698     }
1699     case tok::kw_operator:
1700       nextToken();
1701       if (FormatTok->isBinaryOperator())
1702         nextToken();
1703       break;
1704     case tok::caret:
1705       nextToken();
1706       if (FormatTok->Tok.isAnyIdentifier() ||
1707           FormatTok->isSimpleTypeSpecifier())
1708         nextToken();
1709       if (FormatTok->is(tok::l_paren))
1710         parseParens();
1711       if (FormatTok->is(tok::l_brace))
1712         parseChildBlock();
1713       break;
1714     case tok::l_brace:
1715       if (NextLBracesType != TT_Unknown)
1716         FormatTok->setFinalizedType(NextLBracesType);
1717       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1718         // A block outside of parentheses must be the last part of a
1719         // structural element.
1720         // FIXME: Figure out cases where this is not true, and add projections
1721         // for them (the one we know is missing are lambdas).
1722         if (Style.Language == FormatStyle::LK_Java &&
1723             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1724           // If necessary, we could set the type to something different than
1725           // TT_FunctionLBrace.
1726           if (Style.BraceWrapping.AfterControlStatement ==
1727               FormatStyle::BWACS_Always)
1728             addUnwrappedLine();
1729         } else if (Style.BraceWrapping.AfterFunction) {
1730           addUnwrappedLine();
1731         }
1732         if (!Line->InPPDirective)
1733           FormatTok->setFinalizedType(TT_FunctionLBrace);
1734         parseBlock();
1735         addUnwrappedLine();
1736         return;
1737       }
1738       // Otherwise this was a braced init list, and the structural
1739       // element continues.
1740       break;
1741     case tok::kw_try:
1742       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1743         // field/method declaration.
1744         nextToken();
1745         break;
1746       }
1747       // We arrive here when parsing function-try blocks.
1748       if (Style.BraceWrapping.AfterFunction)
1749         addUnwrappedLine();
1750       parseTryCatch();
1751       return;
1752     case tok::identifier: {
1753       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1754           Line->MustBeDeclaration) {
1755         addUnwrappedLine();
1756         parseCSharpGenericTypeConstraint();
1757         break;
1758       }
1759       if (FormatTok->is(TT_MacroBlockEnd)) {
1760         addUnwrappedLine();
1761         return;
1762       }
1763 
1764       // Function declarations (as opposed to function expressions) are parsed
1765       // on their own unwrapped line by continuing this loop. Function
1766       // expressions (functions that are not on their own line) must not create
1767       // a new unwrapped line, so they are special cased below.
1768       size_t TokenCount = Line->Tokens.size();
1769       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1770           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1771                                                      Keywords.kw_async)))) {
1772         tryToParseJSFunction();
1773         break;
1774       }
1775       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1776           FormatTok->is(Keywords.kw_interface)) {
1777         if (Style.isJavaScript()) {
1778           // In JavaScript/TypeScript, "interface" can be used as a standalone
1779           // identifier, e.g. in `var interface = 1;`. If "interface" is
1780           // followed by another identifier, it is very like to be an actual
1781           // interface declaration.
1782           unsigned StoredPosition = Tokens->getPosition();
1783           FormatToken *Next = Tokens->getNextToken();
1784           FormatTok = Tokens->setPosition(StoredPosition);
1785           if (!mustBeJSIdent(Keywords, Next)) {
1786             nextToken();
1787             break;
1788           }
1789         }
1790         parseRecord();
1791         addUnwrappedLine();
1792         return;
1793       }
1794 
1795       if (FormatTok->is(Keywords.kw_interface)) {
1796         if (parseStructLike())
1797           return;
1798         break;
1799       }
1800 
1801       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1802         parseStatementMacro();
1803         return;
1804       }
1805 
1806       // See if the following token should start a new unwrapped line.
1807       StringRef Text = FormatTok->TokenText;
1808 
1809       FormatToken *PreviousToken = FormatTok;
1810       nextToken();
1811 
1812       // JS doesn't have macros, and within classes colons indicate fields, not
1813       // labels.
1814       if (Style.isJavaScript())
1815         break;
1816 
1817       TokenCount = Line->Tokens.size();
1818       if (TokenCount == 1 ||
1819           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1820         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1821           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1822           parseLabel(!Style.IndentGotoLabels);
1823           if (HasLabel)
1824             *HasLabel = true;
1825           return;
1826         }
1827         // Recognize function-like macro usages without trailing semicolon as
1828         // well as free-standing macros like Q_OBJECT.
1829         bool FunctionLike = FormatTok->is(tok::l_paren);
1830         if (FunctionLike)
1831           parseParens();
1832 
1833         bool FollowedByNewline =
1834             CommentsBeforeNextToken.empty()
1835                 ? FormatTok->NewlinesBefore > 0
1836                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1837 
1838         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1839             tokenCanStartNewLine(*FormatTok) && Text == Text.upper() &&
1840             !PreviousToken->isTypeFinalized()) {
1841           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1842           addUnwrappedLine();
1843           return;
1844         }
1845       }
1846       break;
1847     }
1848     case tok::equal:
1849       if ((Style.isJavaScript() || Style.isCSharp()) &&
1850           FormatTok->is(TT_FatArrow)) {
1851         tryToParseChildBlock();
1852         break;
1853       }
1854 
1855       nextToken();
1856       if (FormatTok->is(tok::l_brace)) {
1857         // Block kind should probably be set to BK_BracedInit for any language.
1858         // C# needs this change to ensure that array initialisers and object
1859         // initialisers are indented the same way.
1860         if (Style.isCSharp())
1861           FormatTok->setBlockKind(BK_BracedInit);
1862         nextToken();
1863         parseBracedList();
1864       } else if (Style.Language == FormatStyle::LK_Proto &&
1865                  FormatTok->is(tok::less)) {
1866         nextToken();
1867         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1868                         /*ClosingBraceKind=*/tok::greater);
1869       }
1870       break;
1871     case tok::l_square:
1872       parseSquare();
1873       break;
1874     case tok::kw_new:
1875       parseNew();
1876       break;
1877     case tok::kw_case:
1878       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1879         // 'case: string' field declaration.
1880         nextToken();
1881         break;
1882       }
1883       parseCaseLabel();
1884       break;
1885     default:
1886       nextToken();
1887       break;
1888     }
1889   } while (!eof());
1890 }
1891 
1892 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1893   assert(FormatTok->is(tok::l_brace));
1894   if (!Style.isCSharp())
1895     return false;
1896   // See if it's a property accessor.
1897   if (FormatTok->Previous->isNot(tok::identifier))
1898     return false;
1899 
1900   // See if we are inside a property accessor.
1901   //
1902   // Record the current tokenPosition so that we can advance and
1903   // reset the current token. `Next` is not set yet so we need
1904   // another way to advance along the token stream.
1905   unsigned int StoredPosition = Tokens->getPosition();
1906   FormatToken *Tok = Tokens->getNextToken();
1907 
1908   // A trivial property accessor is of the form:
1909   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1910   // Track these as they do not require line breaks to be introduced.
1911   bool HasSpecialAccessor = false;
1912   bool IsTrivialPropertyAccessor = true;
1913   while (!eof()) {
1914     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1915                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1916                      Keywords.kw_init, Keywords.kw_set)) {
1917       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1918         HasSpecialAccessor = true;
1919       Tok = Tokens->getNextToken();
1920       continue;
1921     }
1922     if (Tok->isNot(tok::r_brace))
1923       IsTrivialPropertyAccessor = false;
1924     break;
1925   }
1926 
1927   if (!HasSpecialAccessor) {
1928     Tokens->setPosition(StoredPosition);
1929     return false;
1930   }
1931 
1932   // Try to parse the property accessor:
1933   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1934   Tokens->setPosition(StoredPosition);
1935   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1936     addUnwrappedLine();
1937   nextToken();
1938   do {
1939     switch (FormatTok->Tok.getKind()) {
1940     case tok::r_brace:
1941       nextToken();
1942       if (FormatTok->is(tok::equal)) {
1943         while (!eof() && FormatTok->isNot(tok::semi))
1944           nextToken();
1945         nextToken();
1946       }
1947       addUnwrappedLine();
1948       return true;
1949     case tok::l_brace:
1950       ++Line->Level;
1951       parseBlock(/*MustBeDeclaration=*/true);
1952       addUnwrappedLine();
1953       --Line->Level;
1954       break;
1955     case tok::equal:
1956       if (FormatTok->is(TT_FatArrow)) {
1957         ++Line->Level;
1958         do {
1959           nextToken();
1960         } while (!eof() && FormatTok->isNot(tok::semi));
1961         nextToken();
1962         addUnwrappedLine();
1963         --Line->Level;
1964         break;
1965       }
1966       nextToken();
1967       break;
1968     default:
1969       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
1970                              Keywords.kw_set) &&
1971           !IsTrivialPropertyAccessor) {
1972         // Non-trivial get/set needs to be on its own line.
1973         addUnwrappedLine();
1974       }
1975       nextToken();
1976     }
1977   } while (!eof());
1978 
1979   // Unreachable for well-formed code (paired '{' and '}').
1980   return true;
1981 }
1982 
1983 bool UnwrappedLineParser::tryToParseLambda() {
1984   assert(FormatTok->is(tok::l_square));
1985   if (!Style.isCpp()) {
1986     nextToken();
1987     return false;
1988   }
1989   FormatToken &LSquare = *FormatTok;
1990   if (!tryToParseLambdaIntroducer())
1991     return false;
1992 
1993   bool SeenArrow = false;
1994   bool InTemplateParameterList = false;
1995 
1996   while (FormatTok->isNot(tok::l_brace)) {
1997     if (FormatTok->isSimpleTypeSpecifier()) {
1998       nextToken();
1999       continue;
2000     }
2001     switch (FormatTok->Tok.getKind()) {
2002     case tok::l_brace:
2003       break;
2004     case tok::l_paren:
2005       parseParens();
2006       break;
2007     case tok::l_square:
2008       parseSquare();
2009       break;
2010     case tok::kw_class:
2011     case tok::kw_template:
2012     case tok::kw_typename:
2013       assert(FormatTok->Previous);
2014       if (FormatTok->Previous->is(tok::less))
2015         InTemplateParameterList = true;
2016       nextToken();
2017       break;
2018     case tok::amp:
2019     case tok::star:
2020     case tok::kw_const:
2021     case tok::comma:
2022     case tok::less:
2023     case tok::greater:
2024     case tok::identifier:
2025     case tok::numeric_constant:
2026     case tok::coloncolon:
2027     case tok::kw_mutable:
2028     case tok::kw_noexcept:
2029       nextToken();
2030       break;
2031     // Specialization of a template with an integer parameter can contain
2032     // arithmetic, logical, comparison and ternary operators.
2033     //
2034     // FIXME: This also accepts sequences of operators that are not in the scope
2035     // of a template argument list.
2036     //
2037     // In a C++ lambda a template type can only occur after an arrow. We use
2038     // this as an heuristic to distinguish between Objective-C expressions
2039     // followed by an `a->b` expression, such as:
2040     // ([obj func:arg] + a->b)
2041     // Otherwise the code below would parse as a lambda.
2042     //
2043     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2044     // explicit template lists: []<bool b = true && false>(U &&u){}
2045     case tok::plus:
2046     case tok::minus:
2047     case tok::exclaim:
2048     case tok::tilde:
2049     case tok::slash:
2050     case tok::percent:
2051     case tok::lessless:
2052     case tok::pipe:
2053     case tok::pipepipe:
2054     case tok::ampamp:
2055     case tok::caret:
2056     case tok::equalequal:
2057     case tok::exclaimequal:
2058     case tok::greaterequal:
2059     case tok::lessequal:
2060     case tok::question:
2061     case tok::colon:
2062     case tok::ellipsis:
2063     case tok::kw_true:
2064     case tok::kw_false:
2065       if (SeenArrow || InTemplateParameterList) {
2066         nextToken();
2067         break;
2068       }
2069       return true;
2070     case tok::arrow:
2071       // This might or might not actually be a lambda arrow (this could be an
2072       // ObjC method invocation followed by a dereferencing arrow). We might
2073       // reset this back to TT_Unknown in TokenAnnotator.
2074       FormatTok->setFinalizedType(TT_LambdaArrow);
2075       SeenArrow = true;
2076       nextToken();
2077       break;
2078     default:
2079       return true;
2080     }
2081   }
2082   FormatTok->setFinalizedType(TT_LambdaLBrace);
2083   LSquare.setFinalizedType(TT_LambdaLSquare);
2084   parseChildBlock();
2085   return true;
2086 }
2087 
2088 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2089   const FormatToken *Previous = FormatTok->Previous;
2090   const FormatToken *LeftSquare = FormatTok;
2091   nextToken();
2092   if (Previous &&
2093       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2094                          tok::kw_delete, tok::l_square) ||
2095        LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2096        Previous->isSimpleTypeSpecifier())) {
2097     return false;
2098   }
2099   if (FormatTok->is(tok::l_square))
2100     return false;
2101   if (FormatTok->is(tok::r_square)) {
2102     const FormatToken *Next = Tokens->peekNextToken();
2103     if (Next->is(tok::greater))
2104       return false;
2105   }
2106   parseSquare(/*LambdaIntroducer=*/true);
2107   return true;
2108 }
2109 
2110 void UnwrappedLineParser::tryToParseJSFunction() {
2111   assert(FormatTok->is(Keywords.kw_function) ||
2112          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2113   if (FormatTok->is(Keywords.kw_async))
2114     nextToken();
2115   // Consume "function".
2116   nextToken();
2117 
2118   // Consume * (generator function). Treat it like C++'s overloaded operators.
2119   if (FormatTok->is(tok::star)) {
2120     FormatTok->setFinalizedType(TT_OverloadedOperator);
2121     nextToken();
2122   }
2123 
2124   // Consume function name.
2125   if (FormatTok->is(tok::identifier))
2126     nextToken();
2127 
2128   if (FormatTok->isNot(tok::l_paren))
2129     return;
2130 
2131   // Parse formal parameter list.
2132   parseParens();
2133 
2134   if (FormatTok->is(tok::colon)) {
2135     // Parse a type definition.
2136     nextToken();
2137 
2138     // Eat the type declaration. For braced inline object types, balance braces,
2139     // otherwise just parse until finding an l_brace for the function body.
2140     if (FormatTok->is(tok::l_brace))
2141       tryToParseBracedList();
2142     else
2143       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2144         nextToken();
2145   }
2146 
2147   if (FormatTok->is(tok::semi))
2148     return;
2149 
2150   parseChildBlock();
2151 }
2152 
2153 bool UnwrappedLineParser::tryToParseBracedList() {
2154   if (FormatTok->is(BK_Unknown))
2155     calculateBraceTypes();
2156   assert(FormatTok->isNot(BK_Unknown));
2157   if (FormatTok->is(BK_Block))
2158     return false;
2159   nextToken();
2160   parseBracedList();
2161   return true;
2162 }
2163 
2164 bool UnwrappedLineParser::tryToParseChildBlock() {
2165   assert(Style.isJavaScript() || Style.isCSharp());
2166   assert(FormatTok->is(TT_FatArrow));
2167   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2168   // They always start an expression or a child block if followed by a curly
2169   // brace.
2170   nextToken();
2171   if (FormatTok->isNot(tok::l_brace))
2172     return false;
2173   parseChildBlock();
2174   return true;
2175 }
2176 
2177 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2178                                           bool IsEnum,
2179                                           tok::TokenKind ClosingBraceKind) {
2180   bool HasError = false;
2181 
2182   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2183   // replace this by using parseAssignmentExpression() inside.
2184   do {
2185     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2186         tryToParseChildBlock())
2187       continue;
2188     if (Style.isJavaScript()) {
2189       if (FormatTok->is(Keywords.kw_function) ||
2190           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2191         tryToParseJSFunction();
2192         continue;
2193       }
2194       if (FormatTok->is(tok::l_brace)) {
2195         // Could be a method inside of a braced list `{a() { return 1; }}`.
2196         if (tryToParseBracedList())
2197           continue;
2198         parseChildBlock();
2199       }
2200     }
2201     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2202       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2203         addUnwrappedLine();
2204       nextToken();
2205       return !HasError;
2206     }
2207     switch (FormatTok->Tok.getKind()) {
2208     case tok::l_square:
2209       if (Style.isCSharp())
2210         parseSquare();
2211       else
2212         tryToParseLambda();
2213       break;
2214     case tok::l_paren:
2215       parseParens();
2216       // JavaScript can just have free standing methods and getters/setters in
2217       // object literals. Detect them by a "{" following ")".
2218       if (Style.isJavaScript()) {
2219         if (FormatTok->is(tok::l_brace))
2220           parseChildBlock();
2221         break;
2222       }
2223       break;
2224     case tok::l_brace:
2225       // Assume there are no blocks inside a braced init list apart
2226       // from the ones we explicitly parse out (like lambdas).
2227       FormatTok->setBlockKind(BK_BracedInit);
2228       nextToken();
2229       parseBracedList();
2230       break;
2231     case tok::less:
2232       if (Style.Language == FormatStyle::LK_Proto ||
2233           ClosingBraceKind == tok::greater) {
2234         nextToken();
2235         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2236                         /*ClosingBraceKind=*/tok::greater);
2237       } else {
2238         nextToken();
2239       }
2240       break;
2241     case tok::semi:
2242       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2243       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2244       // used for error recovery if we have otherwise determined that this is
2245       // a braced list.
2246       if (Style.isJavaScript()) {
2247         nextToken();
2248         break;
2249       }
2250       HasError = true;
2251       if (!ContinueOnSemicolons)
2252         return !HasError;
2253       nextToken();
2254       break;
2255     case tok::comma:
2256       nextToken();
2257       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2258         addUnwrappedLine();
2259       break;
2260     default:
2261       nextToken();
2262       break;
2263     }
2264   } while (!eof());
2265   return false;
2266 }
2267 
2268 /// \brief Parses a pair of parentheses (and everything between them).
2269 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2270 /// double ampersands. This only counts for the current parens scope.
2271 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2272   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2273   nextToken();
2274   do {
2275     switch (FormatTok->Tok.getKind()) {
2276     case tok::l_paren:
2277       parseParens();
2278       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2279         parseChildBlock();
2280       break;
2281     case tok::r_paren:
2282       nextToken();
2283       return;
2284     case tok::r_brace:
2285       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2286       return;
2287     case tok::l_square:
2288       tryToParseLambda();
2289       break;
2290     case tok::l_brace:
2291       if (!tryToParseBracedList())
2292         parseChildBlock();
2293       break;
2294     case tok::at:
2295       nextToken();
2296       if (FormatTok->is(tok::l_brace)) {
2297         nextToken();
2298         parseBracedList();
2299       }
2300       break;
2301     case tok::equal:
2302       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2303         tryToParseChildBlock();
2304       else
2305         nextToken();
2306       break;
2307     case tok::kw_class:
2308       if (Style.isJavaScript())
2309         parseRecord(/*ParseAsExpr=*/true);
2310       else
2311         nextToken();
2312       break;
2313     case tok::identifier:
2314       if (Style.isJavaScript() &&
2315           (FormatTok->is(Keywords.kw_function) ||
2316            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2317         tryToParseJSFunction();
2318       else
2319         nextToken();
2320       break;
2321     case tok::kw_requires: {
2322       auto RequiresToken = FormatTok;
2323       nextToken();
2324       parseRequiresExpression(RequiresToken);
2325       break;
2326     }
2327     case tok::ampamp:
2328       if (AmpAmpTokenType != TT_Unknown)
2329         FormatTok->setFinalizedType(AmpAmpTokenType);
2330       LLVM_FALLTHROUGH;
2331     default:
2332       nextToken();
2333       break;
2334     }
2335   } while (!eof());
2336 }
2337 
2338 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2339   if (!LambdaIntroducer) {
2340     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2341     if (tryToParseLambda())
2342       return;
2343   }
2344   do {
2345     switch (FormatTok->Tok.getKind()) {
2346     case tok::l_paren:
2347       parseParens();
2348       break;
2349     case tok::r_square:
2350       nextToken();
2351       return;
2352     case tok::r_brace:
2353       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2354       return;
2355     case tok::l_square:
2356       parseSquare();
2357       break;
2358     case tok::l_brace: {
2359       if (!tryToParseBracedList())
2360         parseChildBlock();
2361       break;
2362     }
2363     case tok::at:
2364       nextToken();
2365       if (FormatTok->is(tok::l_brace)) {
2366         nextToken();
2367         parseBracedList();
2368       }
2369       break;
2370     default:
2371       nextToken();
2372       break;
2373     }
2374   } while (!eof());
2375 }
2376 
2377 void UnwrappedLineParser::keepAncestorBraces() {
2378   if (!Style.RemoveBracesLLVM)
2379     return;
2380 
2381   const int MaxNestingLevels = 2;
2382   const int Size = NestedTooDeep.size();
2383   if (Size >= MaxNestingLevels)
2384     NestedTooDeep[Size - MaxNestingLevels] = true;
2385   NestedTooDeep.push_back(false);
2386 }
2387 
2388 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2389   for (const auto &Token : llvm::reverse(Line.Tokens))
2390     if (Token.Tok->isNot(tok::comment))
2391       return Token.Tok;
2392 
2393   return nullptr;
2394 }
2395 
2396 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2397   FormatToken *Tok = nullptr;
2398 
2399   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2400       PreprocessorDirectives.empty()) {
2401     Tok = getLastNonComment(*Line);
2402     assert(Tok);
2403     if (Tok->BraceCount < 0) {
2404       assert(Tok->BraceCount == -1);
2405       Tok = nullptr;
2406     } else {
2407       Tok->BraceCount = -1;
2408     }
2409   }
2410 
2411   addUnwrappedLine();
2412   ++Line->Level;
2413   parseStructuralElement();
2414 
2415   if (Tok) {
2416     assert(!Line->InPPDirective);
2417     Tok = nullptr;
2418     for (const auto &L : llvm::reverse(*CurrentLines)) {
2419       if (!L.InPPDirective && getLastNonComment(L)) {
2420         Tok = L.Tokens.back().Tok;
2421         break;
2422       }
2423     }
2424     assert(Tok);
2425     ++Tok->BraceCount;
2426   }
2427 
2428   if (CheckEOF && FormatTok->is(tok::eof))
2429     addUnwrappedLine();
2430 
2431   --Line->Level;
2432 }
2433 
2434 static void markOptionalBraces(FormatToken *LeftBrace) {
2435   if (!LeftBrace)
2436     return;
2437 
2438   assert(LeftBrace->is(tok::l_brace));
2439 
2440   FormatToken *RightBrace = LeftBrace->MatchingParen;
2441   if (!RightBrace) {
2442     assert(!LeftBrace->Optional);
2443     return;
2444   }
2445 
2446   assert(RightBrace->is(tok::r_brace));
2447   assert(RightBrace->MatchingParen == LeftBrace);
2448   assert(LeftBrace->Optional == RightBrace->Optional);
2449 
2450   LeftBrace->Optional = true;
2451   RightBrace->Optional = true;
2452 }
2453 
2454 void UnwrappedLineParser::handleAttributes() {
2455   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2456   if (FormatTok->is(TT_AttributeMacro))
2457     nextToken();
2458   handleCppAttributes();
2459 }
2460 
2461 bool UnwrappedLineParser::handleCppAttributes() {
2462   // Handle [[likely]] / [[unlikely]] attributes.
2463   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2464     parseSquare();
2465     return true;
2466   }
2467   return false;
2468 }
2469 
2470 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2471                                                   bool KeepBraces) {
2472   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2473   nextToken();
2474   if (FormatTok->is(tok::exclaim))
2475     nextToken();
2476   if (FormatTok->is(tok::kw_consteval)) {
2477     nextToken();
2478   } else {
2479     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2480       nextToken();
2481     if (FormatTok->is(tok::l_paren))
2482       parseParens();
2483   }
2484   handleAttributes();
2485 
2486   bool NeedsUnwrappedLine = false;
2487   keepAncestorBraces();
2488 
2489   FormatToken *IfLeftBrace = nullptr;
2490   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2491 
2492   if (FormatTok->is(tok::l_brace)) {
2493     IfLeftBrace = FormatTok;
2494     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2495     IfBlockKind = parseBlock();
2496     if (Style.BraceWrapping.BeforeElse)
2497       addUnwrappedLine();
2498     else
2499       NeedsUnwrappedLine = true;
2500   } else {
2501     parseUnbracedBody();
2502   }
2503 
2504   bool KeepIfBraces = false;
2505   if (Style.RemoveBracesLLVM) {
2506     assert(!NestedTooDeep.empty());
2507     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2508                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2509                    IfBlockKind == IfStmtKind::IfElseIf;
2510   }
2511 
2512   FormatToken *ElseLeftBrace = nullptr;
2513   IfStmtKind Kind = IfStmtKind::IfOnly;
2514 
2515   if (FormatTok->is(tok::kw_else)) {
2516     if (Style.RemoveBracesLLVM) {
2517       NestedTooDeep.back() = false;
2518       Kind = IfStmtKind::IfElse;
2519     }
2520     nextToken();
2521     handleAttributes();
2522     if (FormatTok->is(tok::l_brace)) {
2523       ElseLeftBrace = FormatTok;
2524       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2525       if (parseBlock() == IfStmtKind::IfOnly)
2526         Kind = IfStmtKind::IfElseIf;
2527       addUnwrappedLine();
2528     } else if (FormatTok->is(tok::kw_if)) {
2529       const FormatToken *Previous = Tokens->getPreviousToken();
2530       assert(Previous);
2531       const bool IsPrecededByComment = Previous->is(tok::comment);
2532       if (IsPrecededByComment) {
2533         addUnwrappedLine();
2534         ++Line->Level;
2535       }
2536       bool TooDeep = true;
2537       if (Style.RemoveBracesLLVM) {
2538         Kind = IfStmtKind::IfElseIf;
2539         TooDeep = NestedTooDeep.pop_back_val();
2540       }
2541       ElseLeftBrace =
2542           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2543       if (Style.RemoveBracesLLVM)
2544         NestedTooDeep.push_back(TooDeep);
2545       if (IsPrecededByComment)
2546         --Line->Level;
2547     } else {
2548       parseUnbracedBody(/*CheckEOF=*/true);
2549     }
2550   } else {
2551     if (Style.RemoveBracesLLVM)
2552       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2553     if (NeedsUnwrappedLine)
2554       addUnwrappedLine();
2555   }
2556 
2557   if (!Style.RemoveBracesLLVM)
2558     return nullptr;
2559 
2560   assert(!NestedTooDeep.empty());
2561   const bool KeepElseBraces =
2562       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2563 
2564   NestedTooDeep.pop_back();
2565 
2566   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2567     markOptionalBraces(IfLeftBrace);
2568     markOptionalBraces(ElseLeftBrace);
2569   } else if (IfLeftBrace) {
2570     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2571     if (IfRightBrace) {
2572       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2573       assert(!IfLeftBrace->Optional);
2574       assert(!IfRightBrace->Optional);
2575       IfLeftBrace->MatchingParen = nullptr;
2576       IfRightBrace->MatchingParen = nullptr;
2577     }
2578   }
2579 
2580   if (IfKind)
2581     *IfKind = Kind;
2582 
2583   return IfLeftBrace;
2584 }
2585 
2586 void UnwrappedLineParser::parseTryCatch() {
2587   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2588   nextToken();
2589   bool NeedsUnwrappedLine = false;
2590   if (FormatTok->is(tok::colon)) {
2591     // We are in a function try block, what comes is an initializer list.
2592     nextToken();
2593 
2594     // In case identifiers were removed by clang-tidy, what might follow is
2595     // multiple commas in sequence - before the first identifier.
2596     while (FormatTok->is(tok::comma))
2597       nextToken();
2598 
2599     while (FormatTok->is(tok::identifier)) {
2600       nextToken();
2601       if (FormatTok->is(tok::l_paren))
2602         parseParens();
2603       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2604           FormatTok->is(tok::l_brace)) {
2605         do {
2606           nextToken();
2607         } while (!FormatTok->is(tok::r_brace));
2608         nextToken();
2609       }
2610 
2611       // In case identifiers were removed by clang-tidy, what might follow is
2612       // multiple commas in sequence - after the first identifier.
2613       while (FormatTok->is(tok::comma))
2614         nextToken();
2615     }
2616   }
2617   // Parse try with resource.
2618   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2619     parseParens();
2620 
2621   keepAncestorBraces();
2622 
2623   if (FormatTok->is(tok::l_brace)) {
2624     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2625     parseBlock();
2626     if (Style.BraceWrapping.BeforeCatch)
2627       addUnwrappedLine();
2628     else
2629       NeedsUnwrappedLine = true;
2630   } else if (!FormatTok->is(tok::kw_catch)) {
2631     // The C++ standard requires a compound-statement after a try.
2632     // If there's none, we try to assume there's a structuralElement
2633     // and try to continue.
2634     addUnwrappedLine();
2635     ++Line->Level;
2636     parseStructuralElement();
2637     --Line->Level;
2638   }
2639   while (true) {
2640     if (FormatTok->is(tok::at))
2641       nextToken();
2642     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2643                              tok::kw___finally) ||
2644           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2645            FormatTok->is(Keywords.kw_finally)) ||
2646           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2647            FormatTok->isObjCAtKeyword(tok::objc_finally))))
2648       break;
2649     nextToken();
2650     while (FormatTok->isNot(tok::l_brace)) {
2651       if (FormatTok->is(tok::l_paren)) {
2652         parseParens();
2653         continue;
2654       }
2655       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2656         if (Style.RemoveBracesLLVM)
2657           NestedTooDeep.pop_back();
2658         return;
2659       }
2660       nextToken();
2661     }
2662     NeedsUnwrappedLine = false;
2663     Line->MustBeDeclaration = false;
2664     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2665     parseBlock();
2666     if (Style.BraceWrapping.BeforeCatch)
2667       addUnwrappedLine();
2668     else
2669       NeedsUnwrappedLine = true;
2670   }
2671 
2672   if (Style.RemoveBracesLLVM)
2673     NestedTooDeep.pop_back();
2674 
2675   if (NeedsUnwrappedLine)
2676     addUnwrappedLine();
2677 }
2678 
2679 void UnwrappedLineParser::parseNamespace() {
2680   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2681          "'namespace' expected");
2682 
2683   const FormatToken &InitialToken = *FormatTok;
2684   nextToken();
2685   if (InitialToken.is(TT_NamespaceMacro)) {
2686     parseParens();
2687   } else {
2688     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2689                               tok::l_square, tok::period, tok::l_paren) ||
2690            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2691       if (FormatTok->is(tok::l_square))
2692         parseSquare();
2693       else if (FormatTok->is(tok::l_paren))
2694         parseParens();
2695       else
2696         nextToken();
2697   }
2698   if (FormatTok->is(tok::l_brace)) {
2699     if (ShouldBreakBeforeBrace(Style, InitialToken))
2700       addUnwrappedLine();
2701 
2702     unsigned AddLevels =
2703         Style.NamespaceIndentation == FormatStyle::NI_All ||
2704                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2705                  DeclarationScopeStack.size() > 1)
2706             ? 1u
2707             : 0u;
2708     bool ManageWhitesmithsBraces =
2709         AddLevels == 0u &&
2710         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2711 
2712     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2713     // the whole block.
2714     if (ManageWhitesmithsBraces)
2715       ++Line->Level;
2716 
2717     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2718                /*MunchSemi=*/true,
2719                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2720 
2721     // Munch the semicolon after a namespace. This is more common than one would
2722     // think. Putting the semicolon into its own line is very ugly.
2723     if (FormatTok->is(tok::semi))
2724       nextToken();
2725 
2726     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2727 
2728     if (ManageWhitesmithsBraces)
2729       --Line->Level;
2730   }
2731   // FIXME: Add error handling.
2732 }
2733 
2734 void UnwrappedLineParser::parseNew() {
2735   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2736   nextToken();
2737 
2738   if (Style.isCSharp()) {
2739     do {
2740       if (FormatTok->is(tok::l_brace))
2741         parseBracedList();
2742 
2743       if (FormatTok->isOneOf(tok::semi, tok::comma))
2744         return;
2745 
2746       nextToken();
2747     } while (!eof());
2748   }
2749 
2750   if (Style.Language != FormatStyle::LK_Java)
2751     return;
2752 
2753   // In Java, we can parse everything up to the parens, which aren't optional.
2754   do {
2755     // There should not be a ;, { or } before the new's open paren.
2756     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2757       return;
2758 
2759     // Consume the parens.
2760     if (FormatTok->is(tok::l_paren)) {
2761       parseParens();
2762 
2763       // If there is a class body of an anonymous class, consume that as child.
2764       if (FormatTok->is(tok::l_brace))
2765         parseChildBlock();
2766       return;
2767     }
2768     nextToken();
2769   } while (!eof());
2770 }
2771 
2772 void UnwrappedLineParser::parseLoopBody(bool TryRemoveBraces,
2773                                         bool WrapRightBrace) {
2774   keepAncestorBraces();
2775 
2776   if (FormatTok->is(tok::l_brace)) {
2777     FormatToken *LeftBrace = FormatTok;
2778     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2779     parseBlock();
2780     if (TryRemoveBraces) {
2781       assert(!NestedTooDeep.empty());
2782       if (!NestedTooDeep.back())
2783         markOptionalBraces(LeftBrace);
2784     }
2785     if (WrapRightBrace)
2786       addUnwrappedLine();
2787   } else {
2788     parseUnbracedBody();
2789   }
2790 
2791   if (TryRemoveBraces)
2792     NestedTooDeep.pop_back();
2793 }
2794 
2795 void UnwrappedLineParser::parseForOrWhileLoop() {
2796   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2797          "'for', 'while' or foreach macro expected");
2798   nextToken();
2799   // JS' for await ( ...
2800   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2801     nextToken();
2802   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2803     nextToken();
2804   if (FormatTok->is(tok::l_paren))
2805     parseParens();
2806 
2807   parseLoopBody(Style.RemoveBracesLLVM, true);
2808 }
2809 
2810 void UnwrappedLineParser::parseDoWhile() {
2811   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2812   nextToken();
2813 
2814   parseLoopBody(false, Style.BraceWrapping.BeforeWhile);
2815 
2816   // FIXME: Add error handling.
2817   if (!FormatTok->is(tok::kw_while)) {
2818     addUnwrappedLine();
2819     return;
2820   }
2821 
2822   // If in Whitesmiths mode, the line with the while() needs to be indented
2823   // to the same level as the block.
2824   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2825     ++Line->Level;
2826 
2827   nextToken();
2828   parseStructuralElement();
2829 }
2830 
2831 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2832   nextToken();
2833   unsigned OldLineLevel = Line->Level;
2834   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2835     --Line->Level;
2836   if (LeftAlignLabel)
2837     Line->Level = 0;
2838 
2839   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2840       FormatTok->is(tok::l_brace)) {
2841 
2842     CompoundStatementIndenter Indenter(this, Line->Level,
2843                                        Style.BraceWrapping.AfterCaseLabel,
2844                                        Style.BraceWrapping.IndentBraces);
2845     parseBlock();
2846     if (FormatTok->is(tok::kw_break)) {
2847       if (Style.BraceWrapping.AfterControlStatement ==
2848           FormatStyle::BWACS_Always) {
2849         addUnwrappedLine();
2850         if (!Style.IndentCaseBlocks &&
2851             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2852           ++Line->Level;
2853       }
2854       parseStructuralElement();
2855     }
2856     addUnwrappedLine();
2857   } else {
2858     if (FormatTok->is(tok::semi))
2859       nextToken();
2860     addUnwrappedLine();
2861   }
2862   Line->Level = OldLineLevel;
2863   if (FormatTok->isNot(tok::l_brace)) {
2864     parseStructuralElement();
2865     addUnwrappedLine();
2866   }
2867 }
2868 
2869 void UnwrappedLineParser::parseCaseLabel() {
2870   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2871 
2872   // FIXME: fix handling of complex expressions here.
2873   do {
2874     nextToken();
2875   } while (!eof() && !FormatTok->is(tok::colon));
2876   parseLabel();
2877 }
2878 
2879 void UnwrappedLineParser::parseSwitch() {
2880   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2881   nextToken();
2882   if (FormatTok->is(tok::l_paren))
2883     parseParens();
2884 
2885   keepAncestorBraces();
2886 
2887   if (FormatTok->is(tok::l_brace)) {
2888     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2889     parseBlock();
2890     addUnwrappedLine();
2891   } else {
2892     addUnwrappedLine();
2893     ++Line->Level;
2894     parseStructuralElement();
2895     --Line->Level;
2896   }
2897 
2898   if (Style.RemoveBracesLLVM)
2899     NestedTooDeep.pop_back();
2900 }
2901 
2902 // Operators that can follow a C variable.
2903 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
2904   switch (kind) {
2905   case tok::ampamp:
2906   case tok::ampequal:
2907   case tok::arrow:
2908   case tok::caret:
2909   case tok::caretequal:
2910   case tok::comma:
2911   case tok::ellipsis:
2912   case tok::equal:
2913   case tok::equalequal:
2914   case tok::exclaim:
2915   case tok::exclaimequal:
2916   case tok::greater:
2917   case tok::greaterequal:
2918   case tok::greatergreater:
2919   case tok::greatergreaterequal:
2920   case tok::l_paren:
2921   case tok::l_square:
2922   case tok::less:
2923   case tok::lessequal:
2924   case tok::lessless:
2925   case tok::lesslessequal:
2926   case tok::minus:
2927   case tok::minusequal:
2928   case tok::minusminus:
2929   case tok::percent:
2930   case tok::percentequal:
2931   case tok::period:
2932   case tok::pipe:
2933   case tok::pipeequal:
2934   case tok::pipepipe:
2935   case tok::plus:
2936   case tok::plusequal:
2937   case tok::plusplus:
2938   case tok::question:
2939   case tok::r_brace:
2940   case tok::r_paren:
2941   case tok::r_square:
2942   case tok::semi:
2943   case tok::slash:
2944   case tok::slashequal:
2945   case tok::star:
2946   case tok::starequal:
2947     return true;
2948   default:
2949     return false;
2950   }
2951 }
2952 
2953 void UnwrappedLineParser::parseAccessSpecifier() {
2954   FormatToken *AccessSpecifierCandidate = FormatTok;
2955   nextToken();
2956   // Understand Qt's slots.
2957   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2958     nextToken();
2959   // Otherwise, we don't know what it is, and we'd better keep the next token.
2960   if (FormatTok->is(tok::colon)) {
2961     nextToken();
2962     addUnwrappedLine();
2963   } else if (!FormatTok->is(tok::coloncolon) &&
2964              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
2965     // Not a variable name nor namespace name.
2966     addUnwrappedLine();
2967   } else if (AccessSpecifierCandidate) {
2968     // Consider the access specifier to be a C identifier.
2969     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2970   }
2971 }
2972 
2973 /// \brief Parses a concept definition.
2974 /// \pre The current token has to be the concept keyword.
2975 ///
2976 /// Returns if either the concept has been completely parsed, or if it detects
2977 /// that the concept definition is incorrect.
2978 void UnwrappedLineParser::parseConcept() {
2979   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
2980   nextToken();
2981   if (!FormatTok->is(tok::identifier))
2982     return;
2983   nextToken();
2984   if (!FormatTok->is(tok::equal))
2985     return;
2986   nextToken();
2987   parseConstraintExpression();
2988   if (FormatTok->is(tok::semi))
2989     nextToken();
2990   addUnwrappedLine();
2991 }
2992 
2993 /// \brief Parses a requires, decides if it is a clause or an expression.
2994 /// \pre The current token has to be the requires keyword.
2995 /// \returns true if it parsed a clause.
2996 bool clang::format::UnwrappedLineParser::parseRequires() {
2997   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
2998   auto RequiresToken = FormatTok;
2999 
3000   // We try to guess if it is a requires clause, or a requires expression. For
3001   // that we first consume the keyword and check the next token.
3002   nextToken();
3003 
3004   switch (FormatTok->Tok.getKind()) {
3005   case tok::l_brace:
3006     // This can only be an expression, never a clause.
3007     parseRequiresExpression(RequiresToken);
3008     return false;
3009   case tok::l_paren:
3010     // Clauses and expression can start with a paren, it's unclear what we have.
3011     break;
3012   default:
3013     // All other tokens can only be a clause.
3014     parseRequiresClause(RequiresToken);
3015     return true;
3016   }
3017 
3018   // Looking forward we would have to decide if there are function declaration
3019   // like arguments to the requires expression:
3020   // requires (T t) {
3021   // Or there is a constraint expression for the requires clause:
3022   // requires (C<T> && ...
3023 
3024   // But first let's look behind.
3025   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3026 
3027   if (!PreviousNonComment ||
3028       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3029     // If there is no token, or an expression left brace, we are a requires
3030     // clause within a requires expression.
3031     parseRequiresClause(RequiresToken);
3032     return true;
3033   }
3034 
3035   switch (PreviousNonComment->Tok.getKind()) {
3036   case tok::greater:
3037   case tok::r_paren:
3038   case tok::kw_noexcept:
3039   case tok::kw_const:
3040     // This is a requires clause.
3041     parseRequiresClause(RequiresToken);
3042     return true;
3043   case tok::amp:
3044   case tok::ampamp: {
3045     // This can be either:
3046     // if (... && requires (T t) ...)
3047     // Or
3048     // void member(...) && requires (C<T> ...
3049     // We check the one token before that for a const:
3050     // void member(...) const && requires (C<T> ...
3051     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3052     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3053       parseRequiresClause(RequiresToken);
3054       return true;
3055     }
3056     break;
3057   }
3058   default:
3059     // It's an expression.
3060     parseRequiresExpression(RequiresToken);
3061     return false;
3062   }
3063 
3064   // Now we look forward and try to check if the paren content is a parameter
3065   // list. The parameters can be cv-qualified and contain references or
3066   // pointers.
3067   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3068   // of stuff: typename, const, *, &, &&, ::, identifiers.
3069 
3070   int NextTokenOffset = 1;
3071   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3072   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3073     ++NextTokenOffset;
3074     NextToken = Tokens->peekNextToken(NextTokenOffset);
3075   };
3076 
3077   bool FoundType = false;
3078   bool LastWasColonColon = false;
3079   int OpenAngles = 0;
3080 
3081   for (; NextTokenOffset < 50; PeekNext()) {
3082     switch (NextToken->Tok.getKind()) {
3083     case tok::kw_volatile:
3084     case tok::kw_const:
3085     case tok::comma:
3086       parseRequiresExpression(RequiresToken);
3087       return false;
3088     case tok::r_paren:
3089     case tok::pipepipe:
3090       parseRequiresClause(RequiresToken);
3091       return true;
3092     case tok::eof:
3093       // Break out of the loop.
3094       NextTokenOffset = 50;
3095       break;
3096     case tok::coloncolon:
3097       LastWasColonColon = true;
3098       break;
3099     case tok::identifier:
3100       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3101         parseRequiresExpression(RequiresToken);
3102         return false;
3103       }
3104       FoundType = true;
3105       LastWasColonColon = false;
3106       break;
3107     case tok::less:
3108       ++OpenAngles;
3109       break;
3110     case tok::greater:
3111       --OpenAngles;
3112       break;
3113     default:
3114       if (NextToken->isSimpleTypeSpecifier()) {
3115         parseRequiresExpression(RequiresToken);
3116         return false;
3117       }
3118       break;
3119     }
3120   }
3121 
3122   // This seems to be a complicated expression, just assume it's a clause.
3123   parseRequiresClause(RequiresToken);
3124   return true;
3125 }
3126 
3127 /// \brief Parses a requires clause.
3128 /// \param RequiresToken The requires keyword token, which starts this clause.
3129 /// \pre We need to be on the next token after the requires keyword.
3130 /// \sa parseRequiresExpression
3131 ///
3132 /// Returns if it either has finished parsing the clause, or it detects, that
3133 /// the clause is incorrect.
3134 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3135   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3136   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3137 
3138   // If there is no previous token, we are within a requires expression,
3139   // otherwise we will always have the template or function declaration in front
3140   // of it.
3141   bool InRequiresExpression =
3142       !RequiresToken->Previous ||
3143       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3144 
3145   RequiresToken->setFinalizedType(InRequiresExpression
3146                                       ? TT_RequiresClauseInARequiresExpression
3147                                       : TT_RequiresClause);
3148 
3149   parseConstraintExpression();
3150 
3151   if (!InRequiresExpression)
3152     FormatTok->Previous->ClosesRequiresClause = true;
3153 }
3154 
3155 /// \brief Parses a requires expression.
3156 /// \param RequiresToken The requires keyword token, which starts this clause.
3157 /// \pre We need to be on the next token after the requires keyword.
3158 /// \sa parseRequiresClause
3159 ///
3160 /// Returns if it either has finished parsing the expression, or it detects,
3161 /// that the expression is incorrect.
3162 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3163   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3164   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3165 
3166   RequiresToken->setFinalizedType(TT_RequiresExpression);
3167 
3168   if (FormatTok->is(tok::l_paren)) {
3169     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3170     parseParens();
3171   }
3172 
3173   if (FormatTok->is(tok::l_brace)) {
3174     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3175     parseChildBlock(/*CanContainBracedList=*/false,
3176                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3177   }
3178 }
3179 
3180 /// \brief Parses a constraint expression.
3181 ///
3182 /// This is either the definition of a concept, or the body of a requires
3183 /// clause. It returns, when the parsing is complete, or the expression is
3184 /// incorrect.
3185 void UnwrappedLineParser::parseConstraintExpression() {
3186   // The special handling for lambdas is needed since tryToParseLambda() eats a
3187   // token and if a requires expression is the last part of a requires clause
3188   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3189   // not set on the correct token. Thus we need to be aware if we even expect a
3190   // lambda to be possible.
3191   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3192   bool LambdaNextTimeAllowed = true;
3193   do {
3194     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3195 
3196     switch (FormatTok->Tok.getKind()) {
3197     case tok::kw_requires: {
3198       auto RequiresToken = FormatTok;
3199       nextToken();
3200       parseRequiresExpression(RequiresToken);
3201       break;
3202     }
3203 
3204     case tok::l_paren:
3205       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3206       break;
3207 
3208     case tok::l_square:
3209       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3210         return;
3211       break;
3212 
3213     case tok::kw_const:
3214     case tok::semi:
3215     case tok::kw_class:
3216     case tok::kw_struct:
3217     case tok::kw_union:
3218       return;
3219 
3220     case tok::l_brace:
3221       // Potential function body.
3222       return;
3223 
3224     case tok::ampamp:
3225     case tok::pipepipe:
3226       FormatTok->setFinalizedType(TT_BinaryOperator);
3227       nextToken();
3228       LambdaNextTimeAllowed = true;
3229       break;
3230 
3231     case tok::comma:
3232     case tok::comment:
3233       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3234       nextToken();
3235       break;
3236 
3237     case tok::kw_sizeof:
3238     case tok::greater:
3239     case tok::greaterequal:
3240     case tok::greatergreater:
3241     case tok::less:
3242     case tok::lessequal:
3243     case tok::lessless:
3244     case tok::equalequal:
3245     case tok::exclaim:
3246     case tok::exclaimequal:
3247     case tok::plus:
3248     case tok::minus:
3249     case tok::star:
3250     case tok::slash:
3251     case tok::kw_decltype:
3252       LambdaNextTimeAllowed = true;
3253       // Just eat them.
3254       nextToken();
3255       break;
3256 
3257     case tok::numeric_constant:
3258     case tok::coloncolon:
3259     case tok::kw_true:
3260     case tok::kw_false:
3261       // Just eat them.
3262       nextToken();
3263       break;
3264 
3265     case tok::kw_static_cast:
3266     case tok::kw_const_cast:
3267     case tok::kw_reinterpret_cast:
3268     case tok::kw_dynamic_cast:
3269       nextToken();
3270       if (!FormatTok->is(tok::less))
3271         return;
3272 
3273       nextToken();
3274       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3275                       /*ClosingBraceKind=*/tok::greater);
3276       break;
3277 
3278     case tok::kw_bool:
3279       // bool is only allowed if it is directly followed by a paren for a cast:
3280       // concept C = bool(...);
3281       // and bool is the only type, all other types as cast must be inside a
3282       // cast to bool an thus are handled by the other cases.
3283       nextToken();
3284       if (FormatTok->isNot(tok::l_paren))
3285         return;
3286       parseParens();
3287       break;
3288 
3289     default:
3290       if (!FormatTok->Tok.getIdentifierInfo()) {
3291         // Identifiers are part of the default case, we check for more then
3292         // tok::identifier to handle builtin type traits.
3293         return;
3294       }
3295 
3296       // We need to differentiate identifiers for a template deduction guide,
3297       // variables, or function return types (the constraint expression has
3298       // ended before that), and basically all other cases. But it's easier to
3299       // check the other way around.
3300       assert(FormatTok->Previous);
3301       switch (FormatTok->Previous->Tok.getKind()) {
3302       case tok::coloncolon:  // Nested identifier.
3303       case tok::ampamp:      // Start of a function or variable for the
3304       case tok::pipepipe:    // constraint expression.
3305       case tok::kw_requires: // Initial identifier of a requires clause.
3306       case tok::equal:       // Initial identifier of a concept declaration.
3307         break;
3308       default:
3309         return;
3310       }
3311 
3312       // Read identifier with optional template declaration.
3313       nextToken();
3314       if (FormatTok->is(tok::less)) {
3315         nextToken();
3316         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3317                         /*ClosingBraceKind=*/tok::greater);
3318       }
3319       break;
3320     }
3321   } while (!eof());
3322 }
3323 
3324 bool UnwrappedLineParser::parseEnum() {
3325   const FormatToken &InitialToken = *FormatTok;
3326 
3327   // Won't be 'enum' for NS_ENUMs.
3328   if (FormatTok->is(tok::kw_enum))
3329     nextToken();
3330 
3331   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3332   // declarations. An "enum" keyword followed by a colon would be a syntax
3333   // error and thus assume it is just an identifier.
3334   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3335     return false;
3336 
3337   // In protobuf, "enum" can be used as a field name.
3338   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3339     return false;
3340 
3341   // Eat up enum class ...
3342   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3343     nextToken();
3344 
3345   while (FormatTok->Tok.getIdentifierInfo() ||
3346          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3347                             tok::greater, tok::comma, tok::question)) {
3348     nextToken();
3349     // We can have macros or attributes in between 'enum' and the enum name.
3350     if (FormatTok->is(tok::l_paren))
3351       parseParens();
3352     if (FormatTok->is(tok::identifier)) {
3353       nextToken();
3354       // If there are two identifiers in a row, this is likely an elaborate
3355       // return type. In Java, this can be "implements", etc.
3356       if (Style.isCpp() && FormatTok->is(tok::identifier))
3357         return false;
3358     }
3359   }
3360 
3361   // Just a declaration or something is wrong.
3362   if (FormatTok->isNot(tok::l_brace))
3363     return true;
3364   FormatTok->setFinalizedType(TT_EnumLBrace);
3365   FormatTok->setBlockKind(BK_Block);
3366 
3367   if (Style.Language == FormatStyle::LK_Java) {
3368     // Java enums are different.
3369     parseJavaEnumBody();
3370     return true;
3371   }
3372   if (Style.Language == FormatStyle::LK_Proto) {
3373     parseBlock(/*MustBeDeclaration=*/true);
3374     return true;
3375   }
3376 
3377   if (!Style.AllowShortEnumsOnASingleLine &&
3378       ShouldBreakBeforeBrace(Style, InitialToken))
3379     addUnwrappedLine();
3380   // Parse enum body.
3381   nextToken();
3382   if (!Style.AllowShortEnumsOnASingleLine) {
3383     addUnwrappedLine();
3384     Line->Level += 1;
3385   }
3386   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3387                                    /*IsEnum=*/true);
3388   if (!Style.AllowShortEnumsOnASingleLine)
3389     Line->Level -= 1;
3390   if (HasError) {
3391     if (FormatTok->is(tok::semi))
3392       nextToken();
3393     addUnwrappedLine();
3394   }
3395   return true;
3396 
3397   // There is no addUnwrappedLine() here so that we fall through to parsing a
3398   // structural element afterwards. Thus, in "enum A {} n, m;",
3399   // "} n, m;" will end up in one unwrapped line.
3400 }
3401 
3402 bool UnwrappedLineParser::parseStructLike() {
3403   // parseRecord falls through and does not yet add an unwrapped line as a
3404   // record declaration or definition can start a structural element.
3405   parseRecord();
3406   // This does not apply to Java, JavaScript and C#.
3407   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3408       Style.isCSharp()) {
3409     if (FormatTok->is(tok::semi))
3410       nextToken();
3411     addUnwrappedLine();
3412     return true;
3413   }
3414   return false;
3415 }
3416 
3417 namespace {
3418 // A class used to set and restore the Token position when peeking
3419 // ahead in the token source.
3420 class ScopedTokenPosition {
3421   unsigned StoredPosition;
3422   FormatTokenSource *Tokens;
3423 
3424 public:
3425   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3426     assert(Tokens && "Tokens expected to not be null");
3427     StoredPosition = Tokens->getPosition();
3428   }
3429 
3430   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3431 };
3432 } // namespace
3433 
3434 // Look to see if we have [[ by looking ahead, if
3435 // its not then rewind to the original position.
3436 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3437   ScopedTokenPosition AutoPosition(Tokens);
3438   FormatToken *Tok = Tokens->getNextToken();
3439   // We already read the first [ check for the second.
3440   if (!Tok->is(tok::l_square))
3441     return false;
3442   // Double check that the attribute is just something
3443   // fairly simple.
3444   while (Tok->isNot(tok::eof)) {
3445     if (Tok->is(tok::r_square))
3446       break;
3447     Tok = Tokens->getNextToken();
3448   }
3449   if (Tok->is(tok::eof))
3450     return false;
3451   Tok = Tokens->getNextToken();
3452   if (!Tok->is(tok::r_square))
3453     return false;
3454   Tok = Tokens->getNextToken();
3455   if (Tok->is(tok::semi))
3456     return false;
3457   return true;
3458 }
3459 
3460 void UnwrappedLineParser::parseJavaEnumBody() {
3461   // Determine whether the enum is simple, i.e. does not have a semicolon or
3462   // constants with class bodies. Simple enums can be formatted like braced
3463   // lists, contracted to a single line, etc.
3464   unsigned StoredPosition = Tokens->getPosition();
3465   bool IsSimple = true;
3466   FormatToken *Tok = Tokens->getNextToken();
3467   while (!Tok->is(tok::eof)) {
3468     if (Tok->is(tok::r_brace))
3469       break;
3470     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3471       IsSimple = false;
3472       break;
3473     }
3474     // FIXME: This will also mark enums with braces in the arguments to enum
3475     // constants as "not simple". This is probably fine in practice, though.
3476     Tok = Tokens->getNextToken();
3477   }
3478   FormatTok = Tokens->setPosition(StoredPosition);
3479 
3480   if (IsSimple) {
3481     nextToken();
3482     parseBracedList();
3483     addUnwrappedLine();
3484     return;
3485   }
3486 
3487   // Parse the body of a more complex enum.
3488   // First add a line for everything up to the "{".
3489   nextToken();
3490   addUnwrappedLine();
3491   ++Line->Level;
3492 
3493   // Parse the enum constants.
3494   while (FormatTok) {
3495     if (FormatTok->is(tok::l_brace)) {
3496       // Parse the constant's class body.
3497       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3498                  /*MunchSemi=*/false);
3499     } else if (FormatTok->is(tok::l_paren)) {
3500       parseParens();
3501     } else if (FormatTok->is(tok::comma)) {
3502       nextToken();
3503       addUnwrappedLine();
3504     } else if (FormatTok->is(tok::semi)) {
3505       nextToken();
3506       addUnwrappedLine();
3507       break;
3508     } else if (FormatTok->is(tok::r_brace)) {
3509       addUnwrappedLine();
3510       break;
3511     } else {
3512       nextToken();
3513     }
3514   }
3515 
3516   // Parse the class body after the enum's ";" if any.
3517   parseLevel(/*HasOpeningBrace=*/true, /*CanContainBracedList=*/true);
3518   nextToken();
3519   --Line->Level;
3520   addUnwrappedLine();
3521 }
3522 
3523 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3524   const FormatToken &InitialToken = *FormatTok;
3525   nextToken();
3526 
3527   // The actual identifier can be a nested name specifier, and in macros
3528   // it is often token-pasted.
3529   // An [[attribute]] can be before the identifier.
3530   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3531                             tok::kw___attribute, tok::kw___declspec,
3532                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3533          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3534           FormatTok->isOneOf(tok::period, tok::comma))) {
3535     if (Style.isJavaScript() &&
3536         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3537       // JavaScript/TypeScript supports inline object types in
3538       // extends/implements positions:
3539       //     class Foo implements {bar: number} { }
3540       nextToken();
3541       if (FormatTok->is(tok::l_brace)) {
3542         tryToParseBracedList();
3543         continue;
3544       }
3545     }
3546     bool IsNonMacroIdentifier =
3547         FormatTok->is(tok::identifier) &&
3548         FormatTok->TokenText != FormatTok->TokenText.upper();
3549     nextToken();
3550     // We can have macros or attributes in between 'class' and the class name.
3551     if (!IsNonMacroIdentifier) {
3552       if (FormatTok->is(tok::l_paren)) {
3553         parseParens();
3554       } else if (FormatTok->is(TT_AttributeSquare)) {
3555         parseSquare();
3556         // Consume the closing TT_AttributeSquare.
3557         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3558           nextToken();
3559       }
3560     }
3561   }
3562 
3563   // Note that parsing away template declarations here leads to incorrectly
3564   // accepting function declarations as record declarations.
3565   // In general, we cannot solve this problem. Consider:
3566   // class A<int> B() {}
3567   // which can be a function definition or a class definition when B() is a
3568   // macro. If we find enough real-world cases where this is a problem, we
3569   // can parse for the 'template' keyword in the beginning of the statement,
3570   // and thus rule out the record production in case there is no template
3571   // (this would still leave us with an ambiguity between template function
3572   // and class declarations).
3573   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3574     do {
3575       if (FormatTok->is(tok::l_brace)) {
3576         calculateBraceTypes(/*ExpectClassBody=*/true);
3577         if (!tryToParseBracedList())
3578           break;
3579       }
3580       if (FormatTok->is(tok::l_square)) {
3581         FormatToken *Previous = FormatTok->Previous;
3582         if (!Previous ||
3583             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3584           // Don't try parsing a lambda if we had a closing parenthesis before,
3585           // it was probably a pointer to an array: int (*)[].
3586           if (!tryToParseLambda())
3587             break;
3588         } else {
3589           parseSquare();
3590           continue;
3591         }
3592       }
3593       if (FormatTok->is(tok::semi))
3594         return;
3595       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3596         addUnwrappedLine();
3597         nextToken();
3598         parseCSharpGenericTypeConstraint();
3599         break;
3600       }
3601       nextToken();
3602     } while (!eof());
3603   }
3604 
3605   auto GetBraceType = [](const FormatToken &RecordTok) {
3606     switch (RecordTok.Tok.getKind()) {
3607     case tok::kw_class:
3608       return TT_ClassLBrace;
3609     case tok::kw_struct:
3610       return TT_StructLBrace;
3611     case tok::kw_union:
3612       return TT_UnionLBrace;
3613     default:
3614       // Useful for e.g. interface.
3615       return TT_RecordLBrace;
3616     }
3617   };
3618   if (FormatTok->is(tok::l_brace)) {
3619     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3620     if (ParseAsExpr) {
3621       parseChildBlock();
3622     } else {
3623       if (ShouldBreakBeforeBrace(Style, InitialToken))
3624         addUnwrappedLine();
3625 
3626       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3627       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3628     }
3629   }
3630   // There is no addUnwrappedLine() here so that we fall through to parsing a
3631   // structural element afterwards. Thus, in "class A {} n, m;",
3632   // "} n, m;" will end up in one unwrapped line.
3633 }
3634 
3635 void UnwrappedLineParser::parseObjCMethod() {
3636   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3637          "'(' or identifier expected.");
3638   do {
3639     if (FormatTok->is(tok::semi)) {
3640       nextToken();
3641       addUnwrappedLine();
3642       return;
3643     } else if (FormatTok->is(tok::l_brace)) {
3644       if (Style.BraceWrapping.AfterFunction)
3645         addUnwrappedLine();
3646       parseBlock();
3647       addUnwrappedLine();
3648       return;
3649     } else {
3650       nextToken();
3651     }
3652   } while (!eof());
3653 }
3654 
3655 void UnwrappedLineParser::parseObjCProtocolList() {
3656   assert(FormatTok->is(tok::less) && "'<' expected.");
3657   do {
3658     nextToken();
3659     // Early exit in case someone forgot a close angle.
3660     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3661         FormatTok->isObjCAtKeyword(tok::objc_end))
3662       return;
3663   } while (!eof() && FormatTok->isNot(tok::greater));
3664   nextToken(); // Skip '>'.
3665 }
3666 
3667 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3668   do {
3669     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3670       nextToken();
3671       addUnwrappedLine();
3672       break;
3673     }
3674     if (FormatTok->is(tok::l_brace)) {
3675       parseBlock();
3676       // In ObjC interfaces, nothing should be following the "}".
3677       addUnwrappedLine();
3678     } else if (FormatTok->is(tok::r_brace)) {
3679       // Ignore stray "}". parseStructuralElement doesn't consume them.
3680       nextToken();
3681       addUnwrappedLine();
3682     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3683       nextToken();
3684       parseObjCMethod();
3685     } else {
3686       parseStructuralElement();
3687     }
3688   } while (!eof());
3689 }
3690 
3691 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3692   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3693          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3694   nextToken();
3695   nextToken(); // interface name
3696 
3697   // @interface can be followed by a lightweight generic
3698   // specialization list, then either a base class or a category.
3699   if (FormatTok->is(tok::less))
3700     parseObjCLightweightGenerics();
3701   if (FormatTok->is(tok::colon)) {
3702     nextToken();
3703     nextToken(); // base class name
3704     // The base class can also have lightweight generics applied to it.
3705     if (FormatTok->is(tok::less))
3706       parseObjCLightweightGenerics();
3707   } else if (FormatTok->is(tok::l_paren))
3708     // Skip category, if present.
3709     parseParens();
3710 
3711   if (FormatTok->is(tok::less))
3712     parseObjCProtocolList();
3713 
3714   if (FormatTok->is(tok::l_brace)) {
3715     if (Style.BraceWrapping.AfterObjCDeclaration)
3716       addUnwrappedLine();
3717     parseBlock(/*MustBeDeclaration=*/true);
3718   }
3719 
3720   // With instance variables, this puts '}' on its own line.  Without instance
3721   // variables, this ends the @interface line.
3722   addUnwrappedLine();
3723 
3724   parseObjCUntilAtEnd();
3725 }
3726 
3727 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3728   assert(FormatTok->is(tok::less));
3729   // Unlike protocol lists, generic parameterizations support
3730   // nested angles:
3731   //
3732   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3733   //     NSObject <NSCopying, NSSecureCoding>
3734   //
3735   // so we need to count how many open angles we have left.
3736   unsigned NumOpenAngles = 1;
3737   do {
3738     nextToken();
3739     // Early exit in case someone forgot a close angle.
3740     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3741         FormatTok->isObjCAtKeyword(tok::objc_end))
3742       break;
3743     if (FormatTok->is(tok::less))
3744       ++NumOpenAngles;
3745     else if (FormatTok->is(tok::greater)) {
3746       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3747       --NumOpenAngles;
3748     }
3749   } while (!eof() && NumOpenAngles != 0);
3750   nextToken(); // Skip '>'.
3751 }
3752 
3753 // Returns true for the declaration/definition form of @protocol,
3754 // false for the expression form.
3755 bool UnwrappedLineParser::parseObjCProtocol() {
3756   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3757   nextToken();
3758 
3759   if (FormatTok->is(tok::l_paren))
3760     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3761     return false;
3762 
3763   // The definition/declaration form,
3764   // @protocol Foo
3765   // - (int)someMethod;
3766   // @end
3767 
3768   nextToken(); // protocol name
3769 
3770   if (FormatTok->is(tok::less))
3771     parseObjCProtocolList();
3772 
3773   // Check for protocol declaration.
3774   if (FormatTok->is(tok::semi)) {
3775     nextToken();
3776     addUnwrappedLine();
3777     return true;
3778   }
3779 
3780   addUnwrappedLine();
3781   parseObjCUntilAtEnd();
3782   return true;
3783 }
3784 
3785 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3786   bool IsImport = FormatTok->is(Keywords.kw_import);
3787   assert(IsImport || FormatTok->is(tok::kw_export));
3788   nextToken();
3789 
3790   // Consume the "default" in "export default class/function".
3791   if (FormatTok->is(tok::kw_default))
3792     nextToken();
3793 
3794   // Consume "async function", "function" and "default function", so that these
3795   // get parsed as free-standing JS functions, i.e. do not require a trailing
3796   // semicolon.
3797   if (FormatTok->is(Keywords.kw_async))
3798     nextToken();
3799   if (FormatTok->is(Keywords.kw_function)) {
3800     nextToken();
3801     return;
3802   }
3803 
3804   // For imports, `export *`, `export {...}`, consume the rest of the line up
3805   // to the terminating `;`. For everything else, just return and continue
3806   // parsing the structural element, i.e. the declaration or expression for
3807   // `export default`.
3808   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3809       !FormatTok->isStringLiteral())
3810     return;
3811 
3812   while (!eof()) {
3813     if (FormatTok->is(tok::semi))
3814       return;
3815     if (Line->Tokens.empty()) {
3816       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3817       // import statement should terminate.
3818       return;
3819     }
3820     if (FormatTok->is(tok::l_brace)) {
3821       FormatTok->setBlockKind(BK_Block);
3822       nextToken();
3823       parseBracedList();
3824     } else {
3825       nextToken();
3826     }
3827   }
3828 }
3829 
3830 void UnwrappedLineParser::parseStatementMacro() {
3831   nextToken();
3832   if (FormatTok->is(tok::l_paren))
3833     parseParens();
3834   if (FormatTok->is(tok::semi))
3835     nextToken();
3836   addUnwrappedLine();
3837 }
3838 
3839 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3840                                                  StringRef Prefix = "") {
3841   llvm::dbgs() << Prefix << "Line(" << Line.Level
3842                << ", FSC=" << Line.FirstStartColumn << ")"
3843                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3844   for (const auto &Node : Line.Tokens) {
3845     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3846                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3847                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3848   }
3849   for (const auto &Node : Line.Tokens)
3850     for (const auto &ChildNode : Node.Children)
3851       printDebugInfo(ChildNode, "\nChild: ");
3852 
3853   llvm::dbgs() << "\n";
3854 }
3855 
3856 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3857   if (Line->Tokens.empty())
3858     return;
3859   LLVM_DEBUG({
3860     if (CurrentLines == &Lines)
3861       printDebugInfo(*Line);
3862   });
3863 
3864   // If this line closes a block when in Whitesmiths mode, remember that
3865   // information so that the level can be decreased after the line is added.
3866   // This has to happen after the addition of the line since the line itself
3867   // needs to be indented.
3868   bool ClosesWhitesmithsBlock =
3869       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3870       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3871 
3872   CurrentLines->push_back(std::move(*Line));
3873   Line->Tokens.clear();
3874   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3875   Line->FirstStartColumn = 0;
3876 
3877   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3878     --Line->Level;
3879   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3880     CurrentLines->append(
3881         std::make_move_iterator(PreprocessorDirectives.begin()),
3882         std::make_move_iterator(PreprocessorDirectives.end()));
3883     PreprocessorDirectives.clear();
3884   }
3885   // Disconnect the current token from the last token on the previous line.
3886   FormatTok->Previous = nullptr;
3887 }
3888 
3889 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
3890 
3891 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3892   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3893          FormatTok.NewlinesBefore > 0;
3894 }
3895 
3896 // Checks if \p FormatTok is a line comment that continues the line comment
3897 // section on \p Line.
3898 static bool
3899 continuesLineCommentSection(const FormatToken &FormatTok,
3900                             const UnwrappedLine &Line,
3901                             const llvm::Regex &CommentPragmasRegex) {
3902   if (Line.Tokens.empty())
3903     return false;
3904 
3905   StringRef IndentContent = FormatTok.TokenText;
3906   if (FormatTok.TokenText.startswith("//") ||
3907       FormatTok.TokenText.startswith("/*"))
3908     IndentContent = FormatTok.TokenText.substr(2);
3909   if (CommentPragmasRegex.match(IndentContent))
3910     return false;
3911 
3912   // If Line starts with a line comment, then FormatTok continues the comment
3913   // section if its original column is greater or equal to the original start
3914   // column of the line.
3915   //
3916   // Define the min column token of a line as follows: if a line ends in '{' or
3917   // contains a '{' followed by a line comment, then the min column token is
3918   // that '{'. Otherwise, the min column token of the line is the first token of
3919   // the line.
3920   //
3921   // If Line starts with a token other than a line comment, then FormatTok
3922   // continues the comment section if its original column is greater than the
3923   // original start column of the min column token of the line.
3924   //
3925   // For example, the second line comment continues the first in these cases:
3926   //
3927   // // first line
3928   // // second line
3929   //
3930   // and:
3931   //
3932   // // first line
3933   //  // second line
3934   //
3935   // and:
3936   //
3937   // int i; // first line
3938   //  // second line
3939   //
3940   // and:
3941   //
3942   // do { // first line
3943   //      // second line
3944   //   int i;
3945   // } while (true);
3946   //
3947   // and:
3948   //
3949   // enum {
3950   //   a, // first line
3951   //    // second line
3952   //   b
3953   // };
3954   //
3955   // The second line comment doesn't continue the first in these cases:
3956   //
3957   //   // first line
3958   //  // second line
3959   //
3960   // and:
3961   //
3962   // int i; // first line
3963   // // second line
3964   //
3965   // and:
3966   //
3967   // do { // first line
3968   //   // second line
3969   //   int i;
3970   // } while (true);
3971   //
3972   // and:
3973   //
3974   // enum {
3975   //   a, // first line
3976   //   // second line
3977   // };
3978   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3979 
3980   // Scan for '{//'. If found, use the column of '{' as a min column for line
3981   // comment section continuation.
3982   const FormatToken *PreviousToken = nullptr;
3983   for (const UnwrappedLineNode &Node : Line.Tokens) {
3984     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3985         isLineComment(*Node.Tok)) {
3986       MinColumnToken = PreviousToken;
3987       break;
3988     }
3989     PreviousToken = Node.Tok;
3990 
3991     // Grab the last newline preceding a token in this unwrapped line.
3992     if (Node.Tok->NewlinesBefore > 0)
3993       MinColumnToken = Node.Tok;
3994   }
3995   if (PreviousToken && PreviousToken->is(tok::l_brace))
3996     MinColumnToken = PreviousToken;
3997 
3998   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3999                               MinColumnToken);
4000 }
4001 
4002 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4003   bool JustComments = Line->Tokens.empty();
4004   for (FormatToken *Tok : CommentsBeforeNextToken) {
4005     // Line comments that belong to the same line comment section are put on the
4006     // same line since later we might want to reflow content between them.
4007     // Additional fine-grained breaking of line comment sections is controlled
4008     // by the class BreakableLineCommentSection in case it is desirable to keep
4009     // several line comment sections in the same unwrapped line.
4010     //
4011     // FIXME: Consider putting separate line comment sections as children to the
4012     // unwrapped line instead.
4013     Tok->ContinuesLineCommentSection =
4014         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4015     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4016       addUnwrappedLine();
4017     pushToken(Tok);
4018   }
4019   if (NewlineBeforeNext && JustComments)
4020     addUnwrappedLine();
4021   CommentsBeforeNextToken.clear();
4022 }
4023 
4024 void UnwrappedLineParser::nextToken(int LevelDifference) {
4025   if (eof())
4026     return;
4027   flushComments(isOnNewLine(*FormatTok));
4028   pushToken(FormatTok);
4029   FormatToken *Previous = FormatTok;
4030   if (!Style.isJavaScript())
4031     readToken(LevelDifference);
4032   else
4033     readTokenWithJavaScriptASI();
4034   FormatTok->Previous = Previous;
4035 }
4036 
4037 void UnwrappedLineParser::distributeComments(
4038     const SmallVectorImpl<FormatToken *> &Comments,
4039     const FormatToken *NextTok) {
4040   // Whether or not a line comment token continues a line is controlled by
4041   // the method continuesLineCommentSection, with the following caveat:
4042   //
4043   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4044   // that each comment line from the trail is aligned with the next token, if
4045   // the next token exists. If a trail exists, the beginning of the maximal
4046   // trail is marked as a start of a new comment section.
4047   //
4048   // For example in this code:
4049   //
4050   // int a; // line about a
4051   //   // line 1 about b
4052   //   // line 2 about b
4053   //   int b;
4054   //
4055   // the two lines about b form a maximal trail, so there are two sections, the
4056   // first one consisting of the single comment "// line about a" and the
4057   // second one consisting of the next two comments.
4058   if (Comments.empty())
4059     return;
4060   bool ShouldPushCommentsInCurrentLine = true;
4061   bool HasTrailAlignedWithNextToken = false;
4062   unsigned StartOfTrailAlignedWithNextToken = 0;
4063   if (NextTok) {
4064     // We are skipping the first element intentionally.
4065     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4066       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4067         HasTrailAlignedWithNextToken = true;
4068         StartOfTrailAlignedWithNextToken = i;
4069       }
4070     }
4071   }
4072   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4073     FormatToken *FormatTok = Comments[i];
4074     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4075       FormatTok->ContinuesLineCommentSection = false;
4076     } else {
4077       FormatTok->ContinuesLineCommentSection =
4078           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4079     }
4080     if (!FormatTok->ContinuesLineCommentSection &&
4081         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
4082       ShouldPushCommentsInCurrentLine = false;
4083     if (ShouldPushCommentsInCurrentLine)
4084       pushToken(FormatTok);
4085     else
4086       CommentsBeforeNextToken.push_back(FormatTok);
4087   }
4088 }
4089 
4090 void UnwrappedLineParser::readToken(int LevelDifference) {
4091   SmallVector<FormatToken *, 1> Comments;
4092   bool PreviousWasComment = false;
4093   bool FirstNonCommentOnLine = false;
4094   do {
4095     FormatTok = Tokens->getNextToken();
4096     assert(FormatTok);
4097     while (FormatTok->getType() == TT_ConflictStart ||
4098            FormatTok->getType() == TT_ConflictEnd ||
4099            FormatTok->getType() == TT_ConflictAlternative) {
4100       if (FormatTok->getType() == TT_ConflictStart)
4101         conditionalCompilationStart(/*Unreachable=*/false);
4102       else if (FormatTok->getType() == TT_ConflictAlternative)
4103         conditionalCompilationAlternative();
4104       else if (FormatTok->getType() == TT_ConflictEnd)
4105         conditionalCompilationEnd();
4106       FormatTok = Tokens->getNextToken();
4107       FormatTok->MustBreakBefore = true;
4108     }
4109 
4110     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4111                                       const FormatToken &Tok,
4112                                       bool PreviousWasComment) {
4113       auto IsFirstOnLine = [](const FormatToken &Tok) {
4114         return Tok.HasUnescapedNewline || Tok.IsFirst;
4115       };
4116 
4117       // Consider preprocessor directives preceded by block comments as first
4118       // on line.
4119       if (PreviousWasComment)
4120         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4121       return IsFirstOnLine(Tok);
4122     };
4123 
4124     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4125         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4126     PreviousWasComment = FormatTok->is(tok::comment);
4127 
4128     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4129            FirstNonCommentOnLine) {
4130       distributeComments(Comments, FormatTok);
4131       Comments.clear();
4132       // If there is an unfinished unwrapped line, we flush the preprocessor
4133       // directives only after that unwrapped line was finished later.
4134       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4135       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4136       assert((LevelDifference >= 0 ||
4137               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4138              "LevelDifference makes Line->Level negative");
4139       Line->Level += LevelDifference;
4140       // Comments stored before the preprocessor directive need to be output
4141       // before the preprocessor directive, at the same level as the
4142       // preprocessor directive, as we consider them to apply to the directive.
4143       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4144           PPBranchLevel > 0)
4145         Line->Level += PPBranchLevel;
4146       flushComments(isOnNewLine(*FormatTok));
4147       parsePPDirective();
4148       PreviousWasComment = FormatTok->is(tok::comment);
4149       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4150           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4151     }
4152 
4153     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4154         !Line->InPPDirective)
4155       continue;
4156 
4157     if (!FormatTok->is(tok::comment)) {
4158       distributeComments(Comments, FormatTok);
4159       Comments.clear();
4160       return;
4161     }
4162 
4163     Comments.push_back(FormatTok);
4164   } while (!eof());
4165 
4166   distributeComments(Comments, nullptr);
4167   Comments.clear();
4168 }
4169 
4170 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4171   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4172   if (MustBreakBeforeNextToken) {
4173     Line->Tokens.back().Tok->MustBreakBefore = true;
4174     MustBreakBeforeNextToken = false;
4175   }
4176 }
4177 
4178 } // end namespace format
4179 } // end namespace clang
4180