1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 #include <utility>
24 
25 #define DEBUG_TYPE "format-parser"
26 
27 namespace clang {
28 namespace format {
29 
30 class FormatTokenSource {
31 public:
32   virtual ~FormatTokenSource() {}
33 
34   // Returns the next token in the token stream.
35   virtual FormatToken *getNextToken() = 0;
36 
37   // Returns the token preceding the token returned by the last call to
38   // getNextToken() in the token stream, or nullptr if no such token exists.
39   virtual FormatToken *getPreviousToken() = 0;
40 
41   // Returns the token that would be returned by the next call to
42   // getNextToken().
43   virtual FormatToken *peekNextToken() = 0;
44 
45   // Returns the token that would be returned after the next N calls to
46   // getNextToken(). N needs to be greater than zero, and small enough that
47   // there are still tokens. Check for tok::eof with N-1 before calling it with
48   // N.
49   virtual FormatToken *peekNextToken(int N) = 0;
50 
51   // Returns whether we are at the end of the file.
52   // This can be different from whether getNextToken() returned an eof token
53   // when the FormatTokenSource is a view on a part of the token stream.
54   virtual bool isEOF() = 0;
55 
56   // Gets the current position in the token stream, to be used by setPosition().
57   virtual unsigned getPosition() = 0;
58 
59   // Resets the token stream to the state it was in when getPosition() returned
60   // Position, and return the token at that position in the stream.
61   virtual FormatToken *setPosition(unsigned Position) = 0;
62 };
63 
64 namespace {
65 
66 class ScopedDeclarationState {
67 public:
68   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
69                          bool MustBeDeclaration)
70       : Line(Line), Stack(Stack) {
71     Line.MustBeDeclaration = MustBeDeclaration;
72     Stack.push_back(MustBeDeclaration);
73   }
74   ~ScopedDeclarationState() {
75     Stack.pop_back();
76     if (!Stack.empty())
77       Line.MustBeDeclaration = Stack.back();
78     else
79       Line.MustBeDeclaration = true;
80   }
81 
82 private:
83   UnwrappedLine &Line;
84   llvm::BitVector &Stack;
85 };
86 
87 static bool isLineComment(const FormatToken &FormatTok) {
88   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
89 }
90 
91 // Checks if \p FormatTok is a line comment that continues the line comment
92 // \p Previous. The original column of \p MinColumnToken is used to determine
93 // whether \p FormatTok is indented enough to the right to continue \p Previous.
94 static bool continuesLineComment(const FormatToken &FormatTok,
95                                  const FormatToken *Previous,
96                                  const FormatToken *MinColumnToken) {
97   if (!Previous || !MinColumnToken)
98     return false;
99   unsigned MinContinueColumn =
100       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
101   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
102          isLineComment(*Previous) &&
103          FormatTok.OriginalColumn >= MinContinueColumn;
104 }
105 
106 class ScopedMacroState : public FormatTokenSource {
107 public:
108   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
109                    FormatToken *&ResetToken)
110       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
111         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
112         Token(nullptr), PreviousToken(nullptr) {
113     FakeEOF.Tok.startToken();
114     FakeEOF.Tok.setKind(tok::eof);
115     TokenSource = this;
116     Line.Level = 0;
117     Line.InPPDirective = true;
118   }
119 
120   ~ScopedMacroState() override {
121     TokenSource = PreviousTokenSource;
122     ResetToken = Token;
123     Line.InPPDirective = false;
124     Line.Level = PreviousLineLevel;
125   }
126 
127   FormatToken *getNextToken() override {
128     // The \c UnwrappedLineParser guards against this by never calling
129     // \c getNextToken() after it has encountered the first eof token.
130     assert(!eof());
131     PreviousToken = Token;
132     Token = PreviousTokenSource->getNextToken();
133     if (eof())
134       return &FakeEOF;
135     return Token;
136   }
137 
138   FormatToken *getPreviousToken() override {
139     return PreviousTokenSource->getPreviousToken();
140   }
141 
142   FormatToken *peekNextToken() override {
143     if (eof())
144       return &FakeEOF;
145     return PreviousTokenSource->peekNextToken();
146   }
147 
148   FormatToken *peekNextToken(int N) override {
149     assert(N > 0);
150     if (eof())
151       return &FakeEOF;
152     return PreviousTokenSource->peekNextToken(N);
153   }
154 
155   bool isEOF() override { return PreviousTokenSource->isEOF(); }
156 
157   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
158 
159   FormatToken *setPosition(unsigned Position) override {
160     PreviousToken = nullptr;
161     Token = PreviousTokenSource->setPosition(Position);
162     return Token;
163   }
164 
165 private:
166   bool eof() {
167     return Token && Token->HasUnescapedNewline &&
168            !continuesLineComment(*Token, PreviousToken,
169                                  /*MinColumnToken=*/PreviousToken);
170   }
171 
172   FormatToken FakeEOF;
173   UnwrappedLine &Line;
174   FormatTokenSource *&TokenSource;
175   FormatToken *&ResetToken;
176   unsigned PreviousLineLevel;
177   FormatTokenSource *PreviousTokenSource;
178 
179   FormatToken *Token;
180   FormatToken *PreviousToken;
181 };
182 
183 } // end anonymous namespace
184 
185 class ScopedLineState {
186 public:
187   ScopedLineState(UnwrappedLineParser &Parser,
188                   bool SwitchToPreprocessorLines = false)
189       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
190     if (SwitchToPreprocessorLines)
191       Parser.CurrentLines = &Parser.PreprocessorDirectives;
192     else if (!Parser.Line->Tokens.empty())
193       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
194     PreBlockLine = std::move(Parser.Line);
195     Parser.Line = std::make_unique<UnwrappedLine>();
196     Parser.Line->Level = PreBlockLine->Level;
197     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
198   }
199 
200   ~ScopedLineState() {
201     if (!Parser.Line->Tokens.empty())
202       Parser.addUnwrappedLine();
203     assert(Parser.Line->Tokens.empty());
204     Parser.Line = std::move(PreBlockLine);
205     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
206       Parser.MustBreakBeforeNextToken = true;
207     Parser.CurrentLines = OriginalLines;
208   }
209 
210 private:
211   UnwrappedLineParser &Parser;
212 
213   std::unique_ptr<UnwrappedLine> PreBlockLine;
214   SmallVectorImpl<UnwrappedLine> *OriginalLines;
215 };
216 
217 class CompoundStatementIndenter {
218 public:
219   CompoundStatementIndenter(UnwrappedLineParser *Parser,
220                             const FormatStyle &Style, unsigned &LineLevel)
221       : CompoundStatementIndenter(Parser, LineLevel,
222                                   Style.BraceWrapping.AfterControlStatement,
223                                   Style.BraceWrapping.IndentBraces) {}
224   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
225                             bool WrapBrace, bool IndentBrace)
226       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
227     if (WrapBrace)
228       Parser->addUnwrappedLine();
229     if (IndentBrace)
230       ++LineLevel;
231   }
232   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
233 
234 private:
235   unsigned &LineLevel;
236   unsigned OldLineLevel;
237 };
238 
239 namespace {
240 
241 class IndexedTokenSource : public FormatTokenSource {
242 public:
243   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
244       : Tokens(Tokens), Position(-1) {}
245 
246   FormatToken *getNextToken() override {
247     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
248       LLVM_DEBUG({
249         llvm::dbgs() << "Next ";
250         dbgToken(Position);
251       });
252       return Tokens[Position];
253     }
254     ++Position;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Next ";
257       dbgToken(Position);
258     });
259     return Tokens[Position];
260   }
261 
262   FormatToken *getPreviousToken() override {
263     return Position > 0 ? Tokens[Position - 1] : nullptr;
264   }
265 
266   FormatToken *peekNextToken() override {
267     int Next = Position + 1;
268     LLVM_DEBUG({
269       llvm::dbgs() << "Peeking ";
270       dbgToken(Next);
271     });
272     return Tokens[Next];
273   }
274 
275   FormatToken *peekNextToken(int N) override {
276     assert(N > 0);
277     int Next = Position + N;
278     LLVM_DEBUG({
279       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
280       dbgToken(Next);
281     });
282     return Tokens[Next];
283   }
284 
285   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
286 
287   unsigned getPosition() override {
288     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
289     assert(Position >= 0);
290     return Position;
291   }
292 
293   FormatToken *setPosition(unsigned P) override {
294     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
295     Position = P;
296     return Tokens[Position];
297   }
298 
299   void reset() { Position = -1; }
300 
301 private:
302   void dbgToken(int Position, llvm::StringRef Indent = "") {
303     FormatToken *Tok = Tokens[Position];
304     llvm::dbgs() << Indent << "[" << Position
305                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
306                  << ", Macro: " << !!Tok->MacroCtx << "\n";
307   }
308 
309   ArrayRef<FormatToken *> Tokens;
310   int Position;
311 };
312 
313 } // end anonymous namespace
314 
315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
316                                          const AdditionalKeywords &Keywords,
317                                          unsigned FirstStartColumn,
318                                          ArrayRef<FormatToken *> Tokens,
319                                          UnwrappedLineConsumer &Callback)
320     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
321       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
322       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
323       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
324       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
325                        ? IG_Rejected
326                        : IG_Inited),
327       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
328 
329 void UnwrappedLineParser::reset() {
330   PPBranchLevel = -1;
331   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
332                      ? IG_Rejected
333                      : IG_Inited;
334   IncludeGuardToken = nullptr;
335   Line.reset(new UnwrappedLine);
336   CommentsBeforeNextToken.clear();
337   FormatTok = nullptr;
338   MustBreakBeforeNextToken = false;
339   PreprocessorDirectives.clear();
340   CurrentLines = &Lines;
341   DeclarationScopeStack.clear();
342   NestedTooDeep.clear();
343   PPStack.clear();
344   Line->FirstStartColumn = FirstStartColumn;
345 }
346 
347 void UnwrappedLineParser::parse() {
348   IndexedTokenSource TokenSource(AllTokens);
349   Line->FirstStartColumn = FirstStartColumn;
350   do {
351     LLVM_DEBUG(llvm::dbgs() << "----\n");
352     reset();
353     Tokens = &TokenSource;
354     TokenSource.reset();
355 
356     readToken();
357     parseFile();
358 
359     // If we found an include guard then all preprocessor directives (other than
360     // the guard) are over-indented by one.
361     if (IncludeGuard == IG_Found) {
362       for (auto &Line : Lines)
363         if (Line.InPPDirective && Line.Level > 0)
364           --Line.Level;
365     }
366 
367     // Create line with eof token.
368     pushToken(FormatTok);
369     addUnwrappedLine();
370 
371     for (const UnwrappedLine &Line : Lines)
372       Callback.consumeUnwrappedLine(Line);
373 
374     Callback.finishRun();
375     Lines.clear();
376     while (!PPLevelBranchIndex.empty() &&
377            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
378       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
379       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
380     }
381     if (!PPLevelBranchIndex.empty()) {
382       ++PPLevelBranchIndex.back();
383       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
384       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
385     }
386   } while (!PPLevelBranchIndex.empty());
387 }
388 
389 void UnwrappedLineParser::parseFile() {
390   // The top-level context in a file always has declarations, except for pre-
391   // processor directives and JavaScript files.
392   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
393   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
394                                           MustBeDeclaration);
395   if (Style.Language == FormatStyle::LK_TextProto)
396     parseBracedList();
397   else
398     parseLevel(/*OpeningBrace=*/nullptr, /*CanContainBracedList=*/true);
399   // Make sure to format the remaining tokens.
400   //
401   // LK_TextProto is special since its top-level is parsed as the body of a
402   // braced list, which does not necessarily have natural line separators such
403   // as a semicolon. Comments after the last entry that have been determined to
404   // not belong to that line, as in:
405   //   key: value
406   //   // endfile comment
407   // do not have a chance to be put on a line of their own until this point.
408   // Here we add this newline before end-of-file comments.
409   if (Style.Language == FormatStyle::LK_TextProto &&
410       !CommentsBeforeNextToken.empty()) {
411     addUnwrappedLine();
412   }
413   flushComments(true);
414   addUnwrappedLine();
415 }
416 
417 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
418   do {
419     switch (FormatTok->Tok.getKind()) {
420     case tok::l_brace:
421       return;
422     default:
423       if (FormatTok->is(Keywords.kw_where)) {
424         addUnwrappedLine();
425         nextToken();
426         parseCSharpGenericTypeConstraint();
427         break;
428       }
429       nextToken();
430       break;
431     }
432   } while (!eof());
433 }
434 
435 void UnwrappedLineParser::parseCSharpAttribute() {
436   int UnpairedSquareBrackets = 1;
437   do {
438     switch (FormatTok->Tok.getKind()) {
439     case tok::r_square:
440       nextToken();
441       --UnpairedSquareBrackets;
442       if (UnpairedSquareBrackets == 0) {
443         addUnwrappedLine();
444         return;
445       }
446       break;
447     case tok::l_square:
448       ++UnpairedSquareBrackets;
449       nextToken();
450       break;
451     default:
452       nextToken();
453       break;
454     }
455   } while (!eof());
456 }
457 
458 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
459   if (!Lines.empty() && Lines.back().InPPDirective)
460     return true;
461 
462   const FormatToken *Previous = Tokens->getPreviousToken();
463   return Previous && Previous->is(tok::comment) &&
464          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
465 }
466 
467 /// \brief Parses a level, that is ???.
468 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
469 /// \param CanContainBracedList If the content can contain (at any level) a
470 /// braced list.
471 /// \param NextLBracesType The type for left brace found in this level.
472 /// \returns true if a simple block of if/else/for/while, or false otherwise.
473 /// (A simple block has a single statement.)
474 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
475                                      bool CanContainBracedList,
476                                      IfStmtKind *IfKind,
477                                      TokenType NextLBracesType) {
478   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
479                                   ? TT_BracedListLBrace
480                                   : TT_Unknown;
481   const bool IsPrecededByCommentOrPPDirective =
482       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
483   bool HasLabel = false;
484   unsigned StatementCount = 0;
485   bool SwitchLabelEncountered = false;
486   do {
487     if (FormatTok->getType() == TT_AttributeMacro) {
488       nextToken();
489       continue;
490     }
491     tok::TokenKind kind = FormatTok->Tok.getKind();
492     if (FormatTok->getType() == TT_MacroBlockBegin)
493       kind = tok::l_brace;
494     else if (FormatTok->getType() == TT_MacroBlockEnd)
495       kind = tok::r_brace;
496 
497     auto ParseDefault = [this, OpeningBrace, IfKind, NextLevelLBracesType,
498                          &HasLabel, &StatementCount] {
499       parseStructuralElement(IfKind, !OpeningBrace, NextLevelLBracesType,
500                              HasLabel ? nullptr : &HasLabel);
501       ++StatementCount;
502       assert(StatementCount > 0 && "StatementCount overflow!");
503     };
504 
505     switch (kind) {
506     case tok::comment:
507       nextToken();
508       addUnwrappedLine();
509       break;
510     case tok::l_brace:
511       if (NextLBracesType != TT_Unknown) {
512         FormatTok->setFinalizedType(NextLBracesType);
513       } else if (FormatTok->Previous &&
514                  FormatTok->Previous->ClosesRequiresClause) {
515         // We need the 'default' case here to correctly parse a function
516         // l_brace.
517         ParseDefault();
518         continue;
519       }
520       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
521           tryToParseBracedList()) {
522         continue;
523       }
524       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
525                  /*MunchSemi=*/true, /*KeepBraces=*/true,
526                  /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
527                  NextLBracesType);
528       ++StatementCount;
529       assert(StatementCount > 0 && "StatementCount overflow!");
530       addUnwrappedLine();
531       break;
532     case tok::r_brace:
533       if (OpeningBrace) {
534         if (!Style.RemoveBracesLLVM ||
535             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
536           return false;
537         }
538         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
539             IsPrecededByCommentOrPPDirective ||
540             precededByCommentOrPPDirective()) {
541           return false;
542         }
543         const FormatToken *Next = Tokens->peekNextToken();
544         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
545       }
546       nextToken();
547       addUnwrappedLine();
548       break;
549     case tok::kw_default: {
550       unsigned StoredPosition = Tokens->getPosition();
551       FormatToken *Next;
552       do {
553         Next = Tokens->getNextToken();
554         assert(Next);
555       } while (Next->is(tok::comment));
556       FormatTok = Tokens->setPosition(StoredPosition);
557       if (Next->isNot(tok::colon)) {
558         // default not followed by ':' is not a case label; treat it like
559         // an identifier.
560         parseStructuralElement();
561         break;
562       }
563       // Else, if it is 'default:', fall through to the case handling.
564       LLVM_FALLTHROUGH;
565     }
566     case tok::kw_case:
567       if (Style.isJavaScript() && Line->MustBeDeclaration) {
568         // A 'case: string' style field declaration.
569         parseStructuralElement();
570         break;
571       }
572       if (!SwitchLabelEncountered &&
573           (Style.IndentCaseLabels ||
574            (Line->InPPDirective && Line->Level == 1))) {
575         ++Line->Level;
576       }
577       SwitchLabelEncountered = true;
578       parseStructuralElement();
579       break;
580     case tok::l_square:
581       if (Style.isCSharp()) {
582         nextToken();
583         parseCSharpAttribute();
584         break;
585       }
586       if (handleCppAttributes())
587         break;
588       LLVM_FALLTHROUGH;
589     default:
590       ParseDefault();
591       break;
592     }
593   } while (!eof());
594   return false;
595 }
596 
597 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
598   // We'll parse forward through the tokens until we hit
599   // a closing brace or eof - note that getNextToken() will
600   // parse macros, so this will magically work inside macro
601   // definitions, too.
602   unsigned StoredPosition = Tokens->getPosition();
603   FormatToken *Tok = FormatTok;
604   const FormatToken *PrevTok = Tok->Previous;
605   // Keep a stack of positions of lbrace tokens. We will
606   // update information about whether an lbrace starts a
607   // braced init list or a different block during the loop.
608   SmallVector<FormatToken *, 8> LBraceStack;
609   assert(Tok->is(tok::l_brace));
610   do {
611     // Get next non-comment token.
612     FormatToken *NextTok;
613     do {
614       NextTok = Tokens->getNextToken();
615     } while (NextTok->is(tok::comment));
616 
617     switch (Tok->Tok.getKind()) {
618     case tok::l_brace:
619       if (Style.isJavaScript() && PrevTok) {
620         if (PrevTok->isOneOf(tok::colon, tok::less)) {
621           // A ':' indicates this code is in a type, or a braced list
622           // following a label in an object literal ({a: {b: 1}}).
623           // A '<' could be an object used in a comparison, but that is nonsense
624           // code (can never return true), so more likely it is a generic type
625           // argument (`X<{a: string; b: number}>`).
626           // The code below could be confused by semicolons between the
627           // individual members in a type member list, which would normally
628           // trigger BK_Block. In both cases, this must be parsed as an inline
629           // braced init.
630           Tok->setBlockKind(BK_BracedInit);
631         } else if (PrevTok->is(tok::r_paren)) {
632           // `) { }` can only occur in function or method declarations in JS.
633           Tok->setBlockKind(BK_Block);
634         }
635       } else {
636         Tok->setBlockKind(BK_Unknown);
637       }
638       LBraceStack.push_back(Tok);
639       break;
640     case tok::r_brace:
641       if (LBraceStack.empty())
642         break;
643       if (LBraceStack.back()->is(BK_Unknown)) {
644         bool ProbablyBracedList = false;
645         if (Style.Language == FormatStyle::LK_Proto) {
646           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
647         } else {
648           // Skip NextTok over preprocessor lines, otherwise we may not
649           // properly diagnose the block as a braced intializer
650           // if the comma separator appears after the pp directive.
651           while (NextTok->is(tok::hash)) {
652             ScopedMacroState MacroState(*Line, Tokens, NextTok);
653             do {
654               NextTok = Tokens->getNextToken();
655             } while (NextTok->isNot(tok::eof));
656           }
657 
658           // Using OriginalColumn to distinguish between ObjC methods and
659           // binary operators is a bit hacky.
660           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
661                                   NextTok->OriginalColumn == 0;
662 
663           // Try to detect a braced list. Note that regardless how we mark inner
664           // braces here, we will overwrite the BlockKind later if we parse a
665           // braced list (where all blocks inside are by default braced lists),
666           // or when we explicitly detect blocks (for example while parsing
667           // lambdas).
668 
669           // If we already marked the opening brace as braced list, the closing
670           // must also be part of it.
671           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
672 
673           ProbablyBracedList = ProbablyBracedList ||
674                                (Style.isJavaScript() &&
675                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
676                                                  Keywords.kw_as));
677           ProbablyBracedList = ProbablyBracedList ||
678                                (Style.isCpp() && NextTok->is(tok::l_paren));
679 
680           // If there is a comma, semicolon or right paren after the closing
681           // brace, we assume this is a braced initializer list.
682           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
683           // braced list in JS.
684           ProbablyBracedList =
685               ProbablyBracedList ||
686               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
687                                tok::r_paren, tok::r_square, tok::l_brace,
688                                tok::ellipsis);
689 
690           ProbablyBracedList =
691               ProbablyBracedList ||
692               (NextTok->is(tok::identifier) &&
693                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
694 
695           ProbablyBracedList = ProbablyBracedList ||
696                                (NextTok->is(tok::semi) &&
697                                 (!ExpectClassBody || LBraceStack.size() != 1));
698 
699           ProbablyBracedList =
700               ProbablyBracedList ||
701               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
702 
703           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
704             // We can have an array subscript after a braced init
705             // list, but C++11 attributes are expected after blocks.
706             NextTok = Tokens->getNextToken();
707             ProbablyBracedList = NextTok->isNot(tok::l_square);
708           }
709         }
710         if (ProbablyBracedList) {
711           Tok->setBlockKind(BK_BracedInit);
712           LBraceStack.back()->setBlockKind(BK_BracedInit);
713         } else {
714           Tok->setBlockKind(BK_Block);
715           LBraceStack.back()->setBlockKind(BK_Block);
716         }
717       }
718       LBraceStack.pop_back();
719       break;
720     case tok::identifier:
721       if (!Tok->is(TT_StatementMacro))
722         break;
723       LLVM_FALLTHROUGH;
724     case tok::at:
725     case tok::semi:
726     case tok::kw_if:
727     case tok::kw_while:
728     case tok::kw_for:
729     case tok::kw_switch:
730     case tok::kw_try:
731     case tok::kw___try:
732       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
733         LBraceStack.back()->setBlockKind(BK_Block);
734       break;
735     default:
736       break;
737     }
738     PrevTok = Tok;
739     Tok = NextTok;
740   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
741 
742   // Assume other blocks for all unclosed opening braces.
743   for (FormatToken *LBrace : LBraceStack)
744     if (LBrace->is(BK_Unknown))
745       LBrace->setBlockKind(BK_Block);
746 
747   FormatTok = Tokens->setPosition(StoredPosition);
748 }
749 
750 template <class T>
751 static inline void hash_combine(std::size_t &seed, const T &v) {
752   std::hash<T> hasher;
753   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
754 }
755 
756 size_t UnwrappedLineParser::computePPHash() const {
757   size_t h = 0;
758   for (const auto &i : PPStack) {
759     hash_combine(h, size_t(i.Kind));
760     hash_combine(h, i.Line);
761   }
762   return h;
763 }
764 
765 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
766 // is not null, subtracts its length (plus the preceding space) when computing
767 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
768 // running the token annotator on it so that we can restore them afterward.
769 bool UnwrappedLineParser::mightFitOnOneLine(
770     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
771   const auto ColumnLimit = Style.ColumnLimit;
772   if (ColumnLimit == 0)
773     return true;
774 
775   auto &Tokens = ParsedLine.Tokens;
776   assert(!Tokens.empty());
777 
778   const auto *LastToken = Tokens.back().Tok;
779   assert(LastToken);
780 
781   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
782 
783   int Index = 0;
784   for (const auto &Token : Tokens) {
785     assert(Token.Tok);
786     auto &SavedToken = SavedTokens[Index++];
787     SavedToken.Tok = new FormatToken;
788     SavedToken.Tok->copyFrom(*Token.Tok);
789     SavedToken.Children = std::move(Token.Children);
790   }
791 
792   AnnotatedLine Line(ParsedLine);
793   assert(Line.Last == LastToken);
794 
795   TokenAnnotator Annotator(Style, Keywords);
796   Annotator.annotate(Line);
797   Annotator.calculateFormattingInformation(Line);
798 
799   auto Length = LastToken->TotalLength;
800   if (OpeningBrace) {
801     assert(OpeningBrace != Tokens.front().Tok);
802     Length -= OpeningBrace->TokenText.size() + 1;
803   }
804 
805   Index = 0;
806   for (auto &Token : Tokens) {
807     const auto &SavedToken = SavedTokens[Index++];
808     Token.Tok->copyFrom(*SavedToken.Tok);
809     Token.Children = std::move(SavedToken.Children);
810     delete SavedToken.Tok;
811   }
812 
813   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
814 }
815 
816 UnwrappedLineParser::IfStmtKind UnwrappedLineParser::parseBlock(
817     bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
818     bool UnindentWhitesmithsBraces, bool CanContainBracedList,
819     TokenType NextLBracesType) {
820   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
821          "'{' or macro block token expected");
822   FormatToken *Tok = FormatTok;
823   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
824   auto Index = CurrentLines->size();
825   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
826   FormatTok->setBlockKind(BK_Block);
827 
828   // For Whitesmiths mode, jump to the next level prior to skipping over the
829   // braces.
830   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
831     ++Line->Level;
832 
833   size_t PPStartHash = computePPHash();
834 
835   unsigned InitialLevel = Line->Level;
836   nextToken(/*LevelDifference=*/AddLevels);
837 
838   if (MacroBlock && FormatTok->is(tok::l_paren))
839     parseParens();
840 
841   size_t NbPreprocessorDirectives =
842       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
843   addUnwrappedLine();
844   size_t OpeningLineIndex =
845       CurrentLines->empty()
846           ? (UnwrappedLine::kInvalidIndex)
847           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
848 
849   // Whitesmiths is weird here. The brace needs to be indented for the namespace
850   // block, but the block itself may not be indented depending on the style
851   // settings. This allows the format to back up one level in those cases.
852   if (UnindentWhitesmithsBraces)
853     --Line->Level;
854 
855   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
856                                           MustBeDeclaration);
857   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
858     Line->Level += AddLevels;
859 
860   IfStmtKind IfKind = IfStmtKind::NotIf;
861   const bool SimpleBlock =
862       parseLevel(Tok, CanContainBracedList, &IfKind, NextLBracesType);
863 
864   if (eof())
865     return IfKind;
866 
867   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
868                  : !FormatTok->is(tok::r_brace)) {
869     Line->Level = InitialLevel;
870     FormatTok->setBlockKind(BK_Block);
871     return IfKind;
872   }
873 
874   if (SimpleBlock && !KeepBraces) {
875     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
876     assert(FormatTok->is(tok::r_brace));
877     const FormatToken *Previous = Tokens->getPreviousToken();
878     assert(Previous);
879     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
880       assert(!CurrentLines->empty());
881       const FormatToken *OpeningBrace = Tok;
882       if (!Tok->Previous) { // Wrapped l_brace.
883         if (FollowedByComment) {
884           KeepBraces = true;
885         } else {
886           assert(Index > 0);
887           --Index; // The line above the wrapped l_brace.
888           OpeningBrace = nullptr;
889         }
890       }
891       if (!KeepBraces && mightFitOnOneLine(CurrentLines->back()) &&
892           (Tok->is(TT_ElseLBrace) ||
893            mightFitOnOneLine((*CurrentLines)[Index], OpeningBrace))) {
894         Tok->MatchingParen = FormatTok;
895         FormatTok->MatchingParen = Tok;
896       }
897     }
898   }
899 
900   size_t PPEndHash = computePPHash();
901 
902   // Munch the closing brace.
903   nextToken(/*LevelDifference=*/-AddLevels);
904 
905   if (MacroBlock && FormatTok->is(tok::l_paren))
906     parseParens();
907 
908   if (FormatTok->is(tok::kw_noexcept)) {
909     // A noexcept in a requires expression.
910     nextToken();
911   }
912 
913   if (FormatTok->is(tok::arrow)) {
914     // Following the } or noexcept we can find a trailing return type arrow
915     // as part of an implicit conversion constraint.
916     nextToken();
917     parseStructuralElement();
918   }
919 
920   if (MunchSemi && FormatTok->is(tok::semi))
921     nextToken();
922 
923   Line->Level = InitialLevel;
924 
925   if (PPStartHash == PPEndHash) {
926     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
927     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
928       // Update the opening line to add the forward reference as well
929       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
930           CurrentLines->size() - 1;
931     }
932   }
933 
934   return IfKind;
935 }
936 
937 static bool isGoogScope(const UnwrappedLine &Line) {
938   // FIXME: Closure-library specific stuff should not be hard-coded but be
939   // configurable.
940   if (Line.Tokens.size() < 4)
941     return false;
942   auto I = Line.Tokens.begin();
943   if (I->Tok->TokenText != "goog")
944     return false;
945   ++I;
946   if (I->Tok->isNot(tok::period))
947     return false;
948   ++I;
949   if (I->Tok->TokenText != "scope")
950     return false;
951   ++I;
952   return I->Tok->is(tok::l_paren);
953 }
954 
955 static bool isIIFE(const UnwrappedLine &Line,
956                    const AdditionalKeywords &Keywords) {
957   // Look for the start of an immediately invoked anonymous function.
958   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
959   // This is commonly done in JavaScript to create a new, anonymous scope.
960   // Example: (function() { ... })()
961   if (Line.Tokens.size() < 3)
962     return false;
963   auto I = Line.Tokens.begin();
964   if (I->Tok->isNot(tok::l_paren))
965     return false;
966   ++I;
967   if (I->Tok->isNot(Keywords.kw_function))
968     return false;
969   ++I;
970   return I->Tok->is(tok::l_paren);
971 }
972 
973 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
974                                    const FormatToken &InitialToken) {
975   tok::TokenKind Kind = InitialToken.Tok.getKind();
976   if (InitialToken.is(TT_NamespaceMacro))
977     Kind = tok::kw_namespace;
978 
979   switch (Kind) {
980   case tok::kw_namespace:
981     return Style.BraceWrapping.AfterNamespace;
982   case tok::kw_class:
983     return Style.BraceWrapping.AfterClass;
984   case tok::kw_union:
985     return Style.BraceWrapping.AfterUnion;
986   case tok::kw_struct:
987     return Style.BraceWrapping.AfterStruct;
988   case tok::kw_enum:
989     return Style.BraceWrapping.AfterEnum;
990   default:
991     return false;
992   }
993 }
994 
995 void UnwrappedLineParser::parseChildBlock(
996     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
997   assert(FormatTok->is(tok::l_brace));
998   FormatTok->setBlockKind(BK_Block);
999   const FormatToken *OpeningBrace = FormatTok;
1000   nextToken();
1001   {
1002     bool SkipIndent = (Style.isJavaScript() &&
1003                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
1004     ScopedLineState LineState(*this);
1005     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
1006                                             /*MustBeDeclaration=*/false);
1007     Line->Level += SkipIndent ? 0 : 1;
1008     parseLevel(OpeningBrace, CanContainBracedList, /*IfKind=*/nullptr,
1009                NextLBracesType);
1010     flushComments(isOnNewLine(*FormatTok));
1011     Line->Level -= SkipIndent ? 0 : 1;
1012   }
1013   nextToken();
1014 }
1015 
1016 void UnwrappedLineParser::parsePPDirective() {
1017   assert(FormatTok->is(tok::hash) && "'#' expected");
1018   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1019 
1020   nextToken();
1021 
1022   if (!FormatTok->Tok.getIdentifierInfo()) {
1023     parsePPUnknown();
1024     return;
1025   }
1026 
1027   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1028   case tok::pp_define:
1029     parsePPDefine();
1030     return;
1031   case tok::pp_if:
1032     parsePPIf(/*IfDef=*/false);
1033     break;
1034   case tok::pp_ifdef:
1035   case tok::pp_ifndef:
1036     parsePPIf(/*IfDef=*/true);
1037     break;
1038   case tok::pp_else:
1039     parsePPElse();
1040     break;
1041   case tok::pp_elifdef:
1042   case tok::pp_elifndef:
1043   case tok::pp_elif:
1044     parsePPElIf();
1045     break;
1046   case tok::pp_endif:
1047     parsePPEndIf();
1048     break;
1049   default:
1050     parsePPUnknown();
1051     break;
1052   }
1053 }
1054 
1055 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1056   size_t Line = CurrentLines->size();
1057   if (CurrentLines == &PreprocessorDirectives)
1058     Line += Lines.size();
1059 
1060   if (Unreachable ||
1061       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1062     PPStack.push_back({PP_Unreachable, Line});
1063   } else {
1064     PPStack.push_back({PP_Conditional, Line});
1065   }
1066 }
1067 
1068 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1069   ++PPBranchLevel;
1070   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1071   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1072     PPLevelBranchIndex.push_back(0);
1073     PPLevelBranchCount.push_back(0);
1074   }
1075   PPChainBranchIndex.push(0);
1076   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1077   conditionalCompilationCondition(Unreachable || Skip);
1078 }
1079 
1080 void UnwrappedLineParser::conditionalCompilationAlternative() {
1081   if (!PPStack.empty())
1082     PPStack.pop_back();
1083   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1084   if (!PPChainBranchIndex.empty())
1085     ++PPChainBranchIndex.top();
1086   conditionalCompilationCondition(
1087       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1088       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1089 }
1090 
1091 void UnwrappedLineParser::conditionalCompilationEnd() {
1092   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1093   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1094     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1095       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1096   }
1097   // Guard against #endif's without #if.
1098   if (PPBranchLevel > -1)
1099     --PPBranchLevel;
1100   if (!PPChainBranchIndex.empty())
1101     PPChainBranchIndex.pop();
1102   if (!PPStack.empty())
1103     PPStack.pop_back();
1104 }
1105 
1106 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1107   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1108   nextToken();
1109   bool Unreachable = false;
1110   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1111     Unreachable = true;
1112   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1113     Unreachable = true;
1114   conditionalCompilationStart(Unreachable);
1115   FormatToken *IfCondition = FormatTok;
1116   // If there's a #ifndef on the first line, and the only lines before it are
1117   // comments, it could be an include guard.
1118   bool MaybeIncludeGuard = IfNDef;
1119   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1120     for (auto &Line : Lines) {
1121       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1122         MaybeIncludeGuard = false;
1123         IncludeGuard = IG_Rejected;
1124         break;
1125       }
1126     }
1127   }
1128   --PPBranchLevel;
1129   parsePPUnknown();
1130   ++PPBranchLevel;
1131   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1132     IncludeGuard = IG_IfNdefed;
1133     IncludeGuardToken = IfCondition;
1134   }
1135 }
1136 
1137 void UnwrappedLineParser::parsePPElse() {
1138   // If a potential include guard has an #else, it's not an include guard.
1139   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1140     IncludeGuard = IG_Rejected;
1141   conditionalCompilationAlternative();
1142   if (PPBranchLevel > -1)
1143     --PPBranchLevel;
1144   parsePPUnknown();
1145   ++PPBranchLevel;
1146 }
1147 
1148 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1149 
1150 void UnwrappedLineParser::parsePPEndIf() {
1151   conditionalCompilationEnd();
1152   parsePPUnknown();
1153   // If the #endif of a potential include guard is the last thing in the file,
1154   // then we found an include guard.
1155   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1156       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1157     IncludeGuard = IG_Found;
1158   }
1159 }
1160 
1161 void UnwrappedLineParser::parsePPDefine() {
1162   nextToken();
1163 
1164   if (!FormatTok->Tok.getIdentifierInfo()) {
1165     IncludeGuard = IG_Rejected;
1166     IncludeGuardToken = nullptr;
1167     parsePPUnknown();
1168     return;
1169   }
1170 
1171   if (IncludeGuard == IG_IfNdefed &&
1172       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1173     IncludeGuard = IG_Defined;
1174     IncludeGuardToken = nullptr;
1175     for (auto &Line : Lines) {
1176       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1177         IncludeGuard = IG_Rejected;
1178         break;
1179       }
1180     }
1181   }
1182 
1183   // In the context of a define, even keywords should be treated as normal
1184   // identifiers. Setting the kind to identifier is not enough, because we need
1185   // to treat additional keywords like __except as well, which are already
1186   // identifiers. Setting the identifier info to null interferes with include
1187   // guard processing above, and changes preprocessing nesting.
1188   FormatTok->Tok.setKind(tok::identifier);
1189   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1190   nextToken();
1191   if (FormatTok->Tok.getKind() == tok::l_paren &&
1192       !FormatTok->hasWhitespaceBefore()) {
1193     parseParens();
1194   }
1195   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1196     Line->Level += PPBranchLevel + 1;
1197   addUnwrappedLine();
1198   ++Line->Level;
1199 
1200   // Errors during a preprocessor directive can only affect the layout of the
1201   // preprocessor directive, and thus we ignore them. An alternative approach
1202   // would be to use the same approach we use on the file level (no
1203   // re-indentation if there was a structural error) within the macro
1204   // definition.
1205   parseFile();
1206 }
1207 
1208 void UnwrappedLineParser::parsePPUnknown() {
1209   do {
1210     nextToken();
1211   } while (!eof());
1212   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1213     Line->Level += PPBranchLevel + 1;
1214   addUnwrappedLine();
1215 }
1216 
1217 // Here we exclude certain tokens that are not usually the first token in an
1218 // unwrapped line. This is used in attempt to distinguish macro calls without
1219 // trailing semicolons from other constructs split to several lines.
1220 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1221   // Semicolon can be a null-statement, l_square can be a start of a macro or
1222   // a C++11 attribute, but this doesn't seem to be common.
1223   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1224          Tok.isNot(TT_AttributeSquare) &&
1225          // Tokens that can only be used as binary operators and a part of
1226          // overloaded operator names.
1227          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1228          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1229          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1230          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1231          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1232          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1233          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1234          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1235          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1236          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1237          Tok.isNot(tok::lesslessequal) &&
1238          // Colon is used in labels, base class lists, initializer lists,
1239          // range-based for loops, ternary operator, but should never be the
1240          // first token in an unwrapped line.
1241          Tok.isNot(tok::colon) &&
1242          // 'noexcept' is a trailing annotation.
1243          Tok.isNot(tok::kw_noexcept);
1244 }
1245 
1246 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1247                           const FormatToken *FormatTok) {
1248   // FIXME: This returns true for C/C++ keywords like 'struct'.
1249   return FormatTok->is(tok::identifier) &&
1250          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1251           !FormatTok->isOneOf(
1252               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1253               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1254               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1255               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1256               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1257               Keywords.kw_instanceof, Keywords.kw_interface,
1258               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1259 }
1260 
1261 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1262                                  const FormatToken *FormatTok) {
1263   return FormatTok->Tok.isLiteral() ||
1264          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1265          mustBeJSIdent(Keywords, FormatTok);
1266 }
1267 
1268 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1269 // when encountered after a value (see mustBeJSIdentOrValue).
1270 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1271                            const FormatToken *FormatTok) {
1272   return FormatTok->isOneOf(
1273       tok::kw_return, Keywords.kw_yield,
1274       // conditionals
1275       tok::kw_if, tok::kw_else,
1276       // loops
1277       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1278       // switch/case
1279       tok::kw_switch, tok::kw_case,
1280       // exceptions
1281       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1282       // declaration
1283       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1284       Keywords.kw_async, Keywords.kw_function,
1285       // import/export
1286       Keywords.kw_import, tok::kw_export);
1287 }
1288 
1289 // Checks whether a token is a type in K&R C (aka C78).
1290 static bool isC78Type(const FormatToken &Tok) {
1291   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1292                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1293                      tok::identifier);
1294 }
1295 
1296 // This function checks whether a token starts the first parameter declaration
1297 // in a K&R C (aka C78) function definition, e.g.:
1298 //   int f(a, b)
1299 //   short a, b;
1300 //   {
1301 //      return a + b;
1302 //   }
1303 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1304                                const FormatToken *FuncName) {
1305   assert(Tok);
1306   assert(Next);
1307   assert(FuncName);
1308 
1309   if (FuncName->isNot(tok::identifier))
1310     return false;
1311 
1312   const FormatToken *Prev = FuncName->Previous;
1313   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1314     return false;
1315 
1316   if (!isC78Type(*Tok) &&
1317       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1318     return false;
1319   }
1320 
1321   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1322     return false;
1323 
1324   Tok = Tok->Previous;
1325   if (!Tok || Tok->isNot(tok::r_paren))
1326     return false;
1327 
1328   Tok = Tok->Previous;
1329   if (!Tok || Tok->isNot(tok::identifier))
1330     return false;
1331 
1332   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1333 }
1334 
1335 void UnwrappedLineParser::parseModuleImport() {
1336   nextToken();
1337   while (!eof()) {
1338     if (FormatTok->is(tok::colon)) {
1339       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1340     }
1341     // Handle import <foo/bar.h> as we would an include statement.
1342     else if (FormatTok->is(tok::less)) {
1343       nextToken();
1344       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1345         // Mark tokens up to the trailing line comments as implicit string
1346         // literals.
1347         if (FormatTok->isNot(tok::comment) &&
1348             !FormatTok->TokenText.startswith("//")) {
1349           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1350         }
1351         nextToken();
1352       }
1353     }
1354     if (FormatTok->is(tok::semi)) {
1355       nextToken();
1356       break;
1357     }
1358     nextToken();
1359   }
1360 
1361   addUnwrappedLine();
1362 }
1363 
1364 // readTokenWithJavaScriptASI reads the next token and terminates the current
1365 // line if JavaScript Automatic Semicolon Insertion must
1366 // happen between the current token and the next token.
1367 //
1368 // This method is conservative - it cannot cover all edge cases of JavaScript,
1369 // but only aims to correctly handle certain well known cases. It *must not*
1370 // return true in speculative cases.
1371 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1372   FormatToken *Previous = FormatTok;
1373   readToken();
1374   FormatToken *Next = FormatTok;
1375 
1376   bool IsOnSameLine =
1377       CommentsBeforeNextToken.empty()
1378           ? Next->NewlinesBefore == 0
1379           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1380   if (IsOnSameLine)
1381     return;
1382 
1383   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1384   bool PreviousStartsTemplateExpr =
1385       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1386   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1387     // If the line contains an '@' sign, the previous token might be an
1388     // annotation, which can precede another identifier/value.
1389     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1390       return LineNode.Tok->is(tok::at);
1391     });
1392     if (HasAt)
1393       return;
1394   }
1395   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1396     return addUnwrappedLine();
1397   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1398   bool NextEndsTemplateExpr =
1399       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1400   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1401       (PreviousMustBeValue ||
1402        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1403                          tok::minusminus))) {
1404     return addUnwrappedLine();
1405   }
1406   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1407       isJSDeclOrStmt(Keywords, Next)) {
1408     return addUnwrappedLine();
1409   }
1410 }
1411 
1412 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1413                                                  bool IsTopLevel,
1414                                                  TokenType NextLBracesType,
1415                                                  bool *HasLabel) {
1416   if (Style.Language == FormatStyle::LK_TableGen &&
1417       FormatTok->is(tok::pp_include)) {
1418     nextToken();
1419     if (FormatTok->is(tok::string_literal))
1420       nextToken();
1421     addUnwrappedLine();
1422     return;
1423   }
1424   switch (FormatTok->Tok.getKind()) {
1425   case tok::kw_asm:
1426     nextToken();
1427     if (FormatTok->is(tok::l_brace)) {
1428       FormatTok->setFinalizedType(TT_InlineASMBrace);
1429       nextToken();
1430       while (FormatTok && FormatTok->isNot(tok::eof)) {
1431         if (FormatTok->is(tok::r_brace)) {
1432           FormatTok->setFinalizedType(TT_InlineASMBrace);
1433           nextToken();
1434           addUnwrappedLine();
1435           break;
1436         }
1437         FormatTok->Finalized = true;
1438         nextToken();
1439       }
1440     }
1441     break;
1442   case tok::kw_namespace:
1443     parseNamespace();
1444     return;
1445   case tok::kw_public:
1446   case tok::kw_protected:
1447   case tok::kw_private:
1448     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1449         Style.isCSharp()) {
1450       nextToken();
1451     } else {
1452       parseAccessSpecifier();
1453     }
1454     return;
1455   case tok::kw_if:
1456     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1457       // field/method declaration.
1458       break;
1459     }
1460     parseIfThenElse(IfKind);
1461     return;
1462   case tok::kw_for:
1463   case tok::kw_while:
1464     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1465       // field/method declaration.
1466       break;
1467     }
1468     parseForOrWhileLoop();
1469     return;
1470   case tok::kw_do:
1471     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1472       // field/method declaration.
1473       break;
1474     }
1475     parseDoWhile();
1476     return;
1477   case tok::kw_switch:
1478     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1479       // 'switch: string' field declaration.
1480       break;
1481     }
1482     parseSwitch();
1483     return;
1484   case tok::kw_default:
1485     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1486       // 'default: string' field declaration.
1487       break;
1488     }
1489     nextToken();
1490     if (FormatTok->is(tok::colon)) {
1491       parseLabel();
1492       return;
1493     }
1494     // e.g. "default void f() {}" in a Java interface.
1495     break;
1496   case tok::kw_case:
1497     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1498       // 'case: string' field declaration.
1499       nextToken();
1500       break;
1501     }
1502     parseCaseLabel();
1503     return;
1504   case tok::kw_try:
1505   case tok::kw___try:
1506     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1507       // field/method declaration.
1508       break;
1509     }
1510     parseTryCatch();
1511     return;
1512   case tok::kw_extern:
1513     nextToken();
1514     if (FormatTok->is(tok::string_literal)) {
1515       nextToken();
1516       if (FormatTok->is(tok::l_brace)) {
1517         if (Style.BraceWrapping.AfterExternBlock)
1518           addUnwrappedLine();
1519         // Either we indent or for backwards compatibility we follow the
1520         // AfterExternBlock style.
1521         unsigned AddLevels =
1522             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1523                     (Style.BraceWrapping.AfterExternBlock &&
1524                      Style.IndentExternBlock ==
1525                          FormatStyle::IEBS_AfterExternBlock)
1526                 ? 1u
1527                 : 0u;
1528         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1529         addUnwrappedLine();
1530         return;
1531       }
1532     }
1533     break;
1534   case tok::kw_export:
1535     if (Style.isJavaScript()) {
1536       parseJavaScriptEs6ImportExport();
1537       return;
1538     }
1539     if (!Style.isCpp())
1540       break;
1541     // Handle C++ "(inline|export) namespace".
1542     LLVM_FALLTHROUGH;
1543   case tok::kw_inline:
1544     nextToken();
1545     if (FormatTok->is(tok::kw_namespace)) {
1546       parseNamespace();
1547       return;
1548     }
1549     break;
1550   case tok::identifier:
1551     if (FormatTok->is(TT_ForEachMacro)) {
1552       parseForOrWhileLoop();
1553       return;
1554     }
1555     if (FormatTok->is(TT_MacroBlockBegin)) {
1556       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1557                  /*MunchSemi=*/false);
1558       return;
1559     }
1560     if (FormatTok->is(Keywords.kw_import)) {
1561       if (Style.isJavaScript()) {
1562         parseJavaScriptEs6ImportExport();
1563         return;
1564       }
1565       if (Style.Language == FormatStyle::LK_Proto) {
1566         nextToken();
1567         if (FormatTok->is(tok::kw_public))
1568           nextToken();
1569         if (!FormatTok->is(tok::string_literal))
1570           return;
1571         nextToken();
1572         if (FormatTok->is(tok::semi))
1573           nextToken();
1574         addUnwrappedLine();
1575         return;
1576       }
1577       if (Style.isCpp()) {
1578         parseModuleImport();
1579         return;
1580       }
1581     }
1582     if (Style.isCpp() &&
1583         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1584                            Keywords.kw_slots, Keywords.kw_qslots)) {
1585       nextToken();
1586       if (FormatTok->is(tok::colon)) {
1587         nextToken();
1588         addUnwrappedLine();
1589         return;
1590       }
1591     }
1592     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1593       parseStatementMacro();
1594       return;
1595     }
1596     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1597       parseNamespace();
1598       return;
1599     }
1600     // In all other cases, parse the declaration.
1601     break;
1602   default:
1603     break;
1604   }
1605   do {
1606     const FormatToken *Previous = FormatTok->Previous;
1607     switch (FormatTok->Tok.getKind()) {
1608     case tok::at:
1609       nextToken();
1610       if (FormatTok->is(tok::l_brace)) {
1611         nextToken();
1612         parseBracedList();
1613         break;
1614       } else if (Style.Language == FormatStyle::LK_Java &&
1615                  FormatTok->is(Keywords.kw_interface)) {
1616         nextToken();
1617         break;
1618       }
1619       switch (FormatTok->Tok.getObjCKeywordID()) {
1620       case tok::objc_public:
1621       case tok::objc_protected:
1622       case tok::objc_package:
1623       case tok::objc_private:
1624         return parseAccessSpecifier();
1625       case tok::objc_interface:
1626       case tok::objc_implementation:
1627         return parseObjCInterfaceOrImplementation();
1628       case tok::objc_protocol:
1629         if (parseObjCProtocol())
1630           return;
1631         break;
1632       case tok::objc_end:
1633         return; // Handled by the caller.
1634       case tok::objc_optional:
1635       case tok::objc_required:
1636         nextToken();
1637         addUnwrappedLine();
1638         return;
1639       case tok::objc_autoreleasepool:
1640         nextToken();
1641         if (FormatTok->is(tok::l_brace)) {
1642           if (Style.BraceWrapping.AfterControlStatement ==
1643               FormatStyle::BWACS_Always) {
1644             addUnwrappedLine();
1645           }
1646           parseBlock();
1647         }
1648         addUnwrappedLine();
1649         return;
1650       case tok::objc_synchronized:
1651         nextToken();
1652         if (FormatTok->is(tok::l_paren)) {
1653           // Skip synchronization object
1654           parseParens();
1655         }
1656         if (FormatTok->is(tok::l_brace)) {
1657           if (Style.BraceWrapping.AfterControlStatement ==
1658               FormatStyle::BWACS_Always) {
1659             addUnwrappedLine();
1660           }
1661           parseBlock();
1662         }
1663         addUnwrappedLine();
1664         return;
1665       case tok::objc_try:
1666         // This branch isn't strictly necessary (the kw_try case below would
1667         // do this too after the tok::at is parsed above).  But be explicit.
1668         parseTryCatch();
1669         return;
1670       default:
1671         break;
1672       }
1673       break;
1674     case tok::kw_concept:
1675       parseConcept();
1676       return;
1677     case tok::kw_requires: {
1678       if (Style.isCpp()) {
1679         bool ParsedClause = parseRequires();
1680         if (ParsedClause)
1681           return;
1682       } else {
1683         nextToken();
1684       }
1685       break;
1686     }
1687     case tok::kw_enum:
1688       // Ignore if this is part of "template <enum ...".
1689       if (Previous && Previous->is(tok::less)) {
1690         nextToken();
1691         break;
1692       }
1693 
1694       // parseEnum falls through and does not yet add an unwrapped line as an
1695       // enum definition can start a structural element.
1696       if (!parseEnum())
1697         break;
1698       // This only applies for C++.
1699       if (!Style.isCpp()) {
1700         addUnwrappedLine();
1701         return;
1702       }
1703       break;
1704     case tok::kw_typedef:
1705       nextToken();
1706       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1707                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1708                              Keywords.kw_CF_CLOSED_ENUM,
1709                              Keywords.kw_NS_CLOSED_ENUM)) {
1710         parseEnum();
1711       }
1712       break;
1713     case tok::kw_struct:
1714     case tok::kw_union:
1715     case tok::kw_class:
1716       if (parseStructLike())
1717         return;
1718       break;
1719     case tok::period:
1720       nextToken();
1721       // In Java, classes have an implicit static member "class".
1722       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1723           FormatTok->is(tok::kw_class)) {
1724         nextToken();
1725       }
1726       if (Style.isJavaScript() && FormatTok &&
1727           FormatTok->Tok.getIdentifierInfo()) {
1728         // JavaScript only has pseudo keywords, all keywords are allowed to
1729         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1730         nextToken();
1731       }
1732       break;
1733     case tok::semi:
1734       nextToken();
1735       addUnwrappedLine();
1736       return;
1737     case tok::r_brace:
1738       addUnwrappedLine();
1739       return;
1740     case tok::l_paren: {
1741       parseParens();
1742       // Break the unwrapped line if a K&R C function definition has a parameter
1743       // declaration.
1744       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1745         break;
1746       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1747         addUnwrappedLine();
1748         return;
1749       }
1750       break;
1751     }
1752     case tok::kw_operator:
1753       nextToken();
1754       if (FormatTok->isBinaryOperator())
1755         nextToken();
1756       break;
1757     case tok::caret:
1758       nextToken();
1759       if (FormatTok->Tok.isAnyIdentifier() ||
1760           FormatTok->isSimpleTypeSpecifier()) {
1761         nextToken();
1762       }
1763       if (FormatTok->is(tok::l_paren))
1764         parseParens();
1765       if (FormatTok->is(tok::l_brace))
1766         parseChildBlock();
1767       break;
1768     case tok::l_brace:
1769       if (NextLBracesType != TT_Unknown)
1770         FormatTok->setFinalizedType(NextLBracesType);
1771       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1772         // A block outside of parentheses must be the last part of a
1773         // structural element.
1774         // FIXME: Figure out cases where this is not true, and add projections
1775         // for them (the one we know is missing are lambdas).
1776         if (Style.Language == FormatStyle::LK_Java &&
1777             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1778           // If necessary, we could set the type to something different than
1779           // TT_FunctionLBrace.
1780           if (Style.BraceWrapping.AfterControlStatement ==
1781               FormatStyle::BWACS_Always) {
1782             addUnwrappedLine();
1783           }
1784         } else if (Style.BraceWrapping.AfterFunction) {
1785           addUnwrappedLine();
1786         }
1787         if (!Line->InPPDirective)
1788           FormatTok->setFinalizedType(TT_FunctionLBrace);
1789         parseBlock();
1790         addUnwrappedLine();
1791         return;
1792       }
1793       // Otherwise this was a braced init list, and the structural
1794       // element continues.
1795       break;
1796     case tok::kw_try:
1797       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1798         // field/method declaration.
1799         nextToken();
1800         break;
1801       }
1802       // We arrive here when parsing function-try blocks.
1803       if (Style.BraceWrapping.AfterFunction)
1804         addUnwrappedLine();
1805       parseTryCatch();
1806       return;
1807     case tok::identifier: {
1808       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1809           Line->MustBeDeclaration) {
1810         addUnwrappedLine();
1811         parseCSharpGenericTypeConstraint();
1812         break;
1813       }
1814       if (FormatTok->is(TT_MacroBlockEnd)) {
1815         addUnwrappedLine();
1816         return;
1817       }
1818 
1819       // Function declarations (as opposed to function expressions) are parsed
1820       // on their own unwrapped line by continuing this loop. Function
1821       // expressions (functions that are not on their own line) must not create
1822       // a new unwrapped line, so they are special cased below.
1823       size_t TokenCount = Line->Tokens.size();
1824       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1825           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1826                                                      Keywords.kw_async)))) {
1827         tryToParseJSFunction();
1828         break;
1829       }
1830       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1831           FormatTok->is(Keywords.kw_interface)) {
1832         if (Style.isJavaScript()) {
1833           // In JavaScript/TypeScript, "interface" can be used as a standalone
1834           // identifier, e.g. in `var interface = 1;`. If "interface" is
1835           // followed by another identifier, it is very like to be an actual
1836           // interface declaration.
1837           unsigned StoredPosition = Tokens->getPosition();
1838           FormatToken *Next = Tokens->getNextToken();
1839           FormatTok = Tokens->setPosition(StoredPosition);
1840           if (!mustBeJSIdent(Keywords, Next)) {
1841             nextToken();
1842             break;
1843           }
1844         }
1845         parseRecord();
1846         addUnwrappedLine();
1847         return;
1848       }
1849 
1850       if (FormatTok->is(Keywords.kw_interface)) {
1851         if (parseStructLike())
1852           return;
1853         break;
1854       }
1855 
1856       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1857         parseStatementMacro();
1858         return;
1859       }
1860 
1861       // See if the following token should start a new unwrapped line.
1862       StringRef Text = FormatTok->TokenText;
1863 
1864       FormatToken *PreviousToken = FormatTok;
1865       nextToken();
1866 
1867       // JS doesn't have macros, and within classes colons indicate fields, not
1868       // labels.
1869       if (Style.isJavaScript())
1870         break;
1871 
1872       TokenCount = Line->Tokens.size();
1873       if (TokenCount == 1 ||
1874           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1875         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1876           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1877           parseLabel(!Style.IndentGotoLabels);
1878           if (HasLabel)
1879             *HasLabel = true;
1880           return;
1881         }
1882         // Recognize function-like macro usages without trailing semicolon as
1883         // well as free-standing macros like Q_OBJECT.
1884         bool FunctionLike = FormatTok->is(tok::l_paren);
1885         if (FunctionLike)
1886           parseParens();
1887 
1888         bool FollowedByNewline =
1889             CommentsBeforeNextToken.empty()
1890                 ? FormatTok->NewlinesBefore > 0
1891                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1892 
1893         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1894             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1895           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1896           addUnwrappedLine();
1897           return;
1898         }
1899       }
1900       break;
1901     }
1902     case tok::equal:
1903       if ((Style.isJavaScript() || Style.isCSharp()) &&
1904           FormatTok->is(TT_FatArrow)) {
1905         tryToParseChildBlock();
1906         break;
1907       }
1908 
1909       nextToken();
1910       if (FormatTok->is(tok::l_brace)) {
1911         // Block kind should probably be set to BK_BracedInit for any language.
1912         // C# needs this change to ensure that array initialisers and object
1913         // initialisers are indented the same way.
1914         if (Style.isCSharp())
1915           FormatTok->setBlockKind(BK_BracedInit);
1916         nextToken();
1917         parseBracedList();
1918       } else if (Style.Language == FormatStyle::LK_Proto &&
1919                  FormatTok->is(tok::less)) {
1920         nextToken();
1921         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1922                         /*ClosingBraceKind=*/tok::greater);
1923       }
1924       break;
1925     case tok::l_square:
1926       parseSquare();
1927       break;
1928     case tok::kw_new:
1929       parseNew();
1930       break;
1931     case tok::kw_case:
1932       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1933         // 'case: string' field declaration.
1934         nextToken();
1935         break;
1936       }
1937       parseCaseLabel();
1938       break;
1939     default:
1940       nextToken();
1941       break;
1942     }
1943   } while (!eof());
1944 }
1945 
1946 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1947   assert(FormatTok->is(tok::l_brace));
1948   if (!Style.isCSharp())
1949     return false;
1950   // See if it's a property accessor.
1951   if (FormatTok->Previous->isNot(tok::identifier))
1952     return false;
1953 
1954   // See if we are inside a property accessor.
1955   //
1956   // Record the current tokenPosition so that we can advance and
1957   // reset the current token. `Next` is not set yet so we need
1958   // another way to advance along the token stream.
1959   unsigned int StoredPosition = Tokens->getPosition();
1960   FormatToken *Tok = Tokens->getNextToken();
1961 
1962   // A trivial property accessor is of the form:
1963   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1964   // Track these as they do not require line breaks to be introduced.
1965   bool HasSpecialAccessor = false;
1966   bool IsTrivialPropertyAccessor = true;
1967   while (!eof()) {
1968     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1969                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1970                      Keywords.kw_init, Keywords.kw_set)) {
1971       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1972         HasSpecialAccessor = true;
1973       Tok = Tokens->getNextToken();
1974       continue;
1975     }
1976     if (Tok->isNot(tok::r_brace))
1977       IsTrivialPropertyAccessor = false;
1978     break;
1979   }
1980 
1981   if (!HasSpecialAccessor) {
1982     Tokens->setPosition(StoredPosition);
1983     return false;
1984   }
1985 
1986   // Try to parse the property accessor:
1987   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1988   Tokens->setPosition(StoredPosition);
1989   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1990     addUnwrappedLine();
1991   nextToken();
1992   do {
1993     switch (FormatTok->Tok.getKind()) {
1994     case tok::r_brace:
1995       nextToken();
1996       if (FormatTok->is(tok::equal)) {
1997         while (!eof() && FormatTok->isNot(tok::semi))
1998           nextToken();
1999         nextToken();
2000       }
2001       addUnwrappedLine();
2002       return true;
2003     case tok::l_brace:
2004       ++Line->Level;
2005       parseBlock(/*MustBeDeclaration=*/true);
2006       addUnwrappedLine();
2007       --Line->Level;
2008       break;
2009     case tok::equal:
2010       if (FormatTok->is(TT_FatArrow)) {
2011         ++Line->Level;
2012         do {
2013           nextToken();
2014         } while (!eof() && FormatTok->isNot(tok::semi));
2015         nextToken();
2016         addUnwrappedLine();
2017         --Line->Level;
2018         break;
2019       }
2020       nextToken();
2021       break;
2022     default:
2023       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2024                              Keywords.kw_set) &&
2025           !IsTrivialPropertyAccessor) {
2026         // Non-trivial get/set needs to be on its own line.
2027         addUnwrappedLine();
2028       }
2029       nextToken();
2030     }
2031   } while (!eof());
2032 
2033   // Unreachable for well-formed code (paired '{' and '}').
2034   return true;
2035 }
2036 
2037 bool UnwrappedLineParser::tryToParseLambda() {
2038   assert(FormatTok->is(tok::l_square));
2039   if (!Style.isCpp()) {
2040     nextToken();
2041     return false;
2042   }
2043   FormatToken &LSquare = *FormatTok;
2044   if (!tryToParseLambdaIntroducer())
2045     return false;
2046 
2047   bool SeenArrow = false;
2048   bool InTemplateParameterList = false;
2049 
2050   while (FormatTok->isNot(tok::l_brace)) {
2051     if (FormatTok->isSimpleTypeSpecifier()) {
2052       nextToken();
2053       continue;
2054     }
2055     switch (FormatTok->Tok.getKind()) {
2056     case tok::l_brace:
2057       break;
2058     case tok::l_paren:
2059       parseParens();
2060       break;
2061     case tok::l_square:
2062       parseSquare();
2063       break;
2064     case tok::kw_class:
2065     case tok::kw_template:
2066     case tok::kw_typename:
2067       assert(FormatTok->Previous);
2068       if (FormatTok->Previous->is(tok::less))
2069         InTemplateParameterList = true;
2070       nextToken();
2071       break;
2072     case tok::amp:
2073     case tok::star:
2074     case tok::kw_const:
2075     case tok::comma:
2076     case tok::less:
2077     case tok::greater:
2078     case tok::identifier:
2079     case tok::numeric_constant:
2080     case tok::coloncolon:
2081     case tok::kw_mutable:
2082     case tok::kw_noexcept:
2083       nextToken();
2084       break;
2085     // Specialization of a template with an integer parameter can contain
2086     // arithmetic, logical, comparison and ternary operators.
2087     //
2088     // FIXME: This also accepts sequences of operators that are not in the scope
2089     // of a template argument list.
2090     //
2091     // In a C++ lambda a template type can only occur after an arrow. We use
2092     // this as an heuristic to distinguish between Objective-C expressions
2093     // followed by an `a->b` expression, such as:
2094     // ([obj func:arg] + a->b)
2095     // Otherwise the code below would parse as a lambda.
2096     //
2097     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2098     // explicit template lists: []<bool b = true && false>(U &&u){}
2099     case tok::plus:
2100     case tok::minus:
2101     case tok::exclaim:
2102     case tok::tilde:
2103     case tok::slash:
2104     case tok::percent:
2105     case tok::lessless:
2106     case tok::pipe:
2107     case tok::pipepipe:
2108     case tok::ampamp:
2109     case tok::caret:
2110     case tok::equalequal:
2111     case tok::exclaimequal:
2112     case tok::greaterequal:
2113     case tok::lessequal:
2114     case tok::question:
2115     case tok::colon:
2116     case tok::ellipsis:
2117     case tok::kw_true:
2118     case tok::kw_false:
2119       if (SeenArrow || InTemplateParameterList) {
2120         nextToken();
2121         break;
2122       }
2123       return true;
2124     case tok::arrow:
2125       // This might or might not actually be a lambda arrow (this could be an
2126       // ObjC method invocation followed by a dereferencing arrow). We might
2127       // reset this back to TT_Unknown in TokenAnnotator.
2128       FormatTok->setFinalizedType(TT_LambdaArrow);
2129       SeenArrow = true;
2130       nextToken();
2131       break;
2132     default:
2133       return true;
2134     }
2135   }
2136   FormatTok->setFinalizedType(TT_LambdaLBrace);
2137   LSquare.setFinalizedType(TT_LambdaLSquare);
2138   parseChildBlock();
2139   return true;
2140 }
2141 
2142 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2143   const FormatToken *Previous = FormatTok->Previous;
2144   const FormatToken *LeftSquare = FormatTok;
2145   nextToken();
2146   if (Previous &&
2147       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2148                          tok::kw_delete, tok::l_square) ||
2149        LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2150        Previous->isSimpleTypeSpecifier())) {
2151     return false;
2152   }
2153   if (FormatTok->is(tok::l_square))
2154     return false;
2155   if (FormatTok->is(tok::r_square)) {
2156     const FormatToken *Next = Tokens->peekNextToken();
2157     if (Next->is(tok::greater))
2158       return false;
2159   }
2160   parseSquare(/*LambdaIntroducer=*/true);
2161   return true;
2162 }
2163 
2164 void UnwrappedLineParser::tryToParseJSFunction() {
2165   assert(FormatTok->is(Keywords.kw_function) ||
2166          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2167   if (FormatTok->is(Keywords.kw_async))
2168     nextToken();
2169   // Consume "function".
2170   nextToken();
2171 
2172   // Consume * (generator function). Treat it like C++'s overloaded operators.
2173   if (FormatTok->is(tok::star)) {
2174     FormatTok->setFinalizedType(TT_OverloadedOperator);
2175     nextToken();
2176   }
2177 
2178   // Consume function name.
2179   if (FormatTok->is(tok::identifier))
2180     nextToken();
2181 
2182   if (FormatTok->isNot(tok::l_paren))
2183     return;
2184 
2185   // Parse formal parameter list.
2186   parseParens();
2187 
2188   if (FormatTok->is(tok::colon)) {
2189     // Parse a type definition.
2190     nextToken();
2191 
2192     // Eat the type declaration. For braced inline object types, balance braces,
2193     // otherwise just parse until finding an l_brace for the function body.
2194     if (FormatTok->is(tok::l_brace))
2195       tryToParseBracedList();
2196     else
2197       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2198         nextToken();
2199   }
2200 
2201   if (FormatTok->is(tok::semi))
2202     return;
2203 
2204   parseChildBlock();
2205 }
2206 
2207 bool UnwrappedLineParser::tryToParseBracedList() {
2208   if (FormatTok->is(BK_Unknown))
2209     calculateBraceTypes();
2210   assert(FormatTok->isNot(BK_Unknown));
2211   if (FormatTok->is(BK_Block))
2212     return false;
2213   nextToken();
2214   parseBracedList();
2215   return true;
2216 }
2217 
2218 bool UnwrappedLineParser::tryToParseChildBlock() {
2219   assert(Style.isJavaScript() || Style.isCSharp());
2220   assert(FormatTok->is(TT_FatArrow));
2221   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2222   // They always start an expression or a child block if followed by a curly
2223   // brace.
2224   nextToken();
2225   if (FormatTok->isNot(tok::l_brace))
2226     return false;
2227   parseChildBlock();
2228   return true;
2229 }
2230 
2231 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2232                                           bool IsEnum,
2233                                           tok::TokenKind ClosingBraceKind) {
2234   bool HasError = false;
2235 
2236   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2237   // replace this by using parseAssignmentExpression() inside.
2238   do {
2239     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2240         tryToParseChildBlock()) {
2241       continue;
2242     }
2243     if (Style.isJavaScript()) {
2244       if (FormatTok->is(Keywords.kw_function) ||
2245           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2246         tryToParseJSFunction();
2247         continue;
2248       }
2249       if (FormatTok->is(tok::l_brace)) {
2250         // Could be a method inside of a braced list `{a() { return 1; }}`.
2251         if (tryToParseBracedList())
2252           continue;
2253         parseChildBlock();
2254       }
2255     }
2256     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2257       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2258         addUnwrappedLine();
2259       nextToken();
2260       return !HasError;
2261     }
2262     switch (FormatTok->Tok.getKind()) {
2263     case tok::l_square:
2264       if (Style.isCSharp())
2265         parseSquare();
2266       else
2267         tryToParseLambda();
2268       break;
2269     case tok::l_paren:
2270       parseParens();
2271       // JavaScript can just have free standing methods and getters/setters in
2272       // object literals. Detect them by a "{" following ")".
2273       if (Style.isJavaScript()) {
2274         if (FormatTok->is(tok::l_brace))
2275           parseChildBlock();
2276         break;
2277       }
2278       break;
2279     case tok::l_brace:
2280       // Assume there are no blocks inside a braced init list apart
2281       // from the ones we explicitly parse out (like lambdas).
2282       FormatTok->setBlockKind(BK_BracedInit);
2283       nextToken();
2284       parseBracedList();
2285       break;
2286     case tok::less:
2287       if (Style.Language == FormatStyle::LK_Proto ||
2288           ClosingBraceKind == tok::greater) {
2289         nextToken();
2290         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2291                         /*ClosingBraceKind=*/tok::greater);
2292       } else {
2293         nextToken();
2294       }
2295       break;
2296     case tok::semi:
2297       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2298       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2299       // used for error recovery if we have otherwise determined that this is
2300       // a braced list.
2301       if (Style.isJavaScript()) {
2302         nextToken();
2303         break;
2304       }
2305       HasError = true;
2306       if (!ContinueOnSemicolons)
2307         return !HasError;
2308       nextToken();
2309       break;
2310     case tok::comma:
2311       nextToken();
2312       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2313         addUnwrappedLine();
2314       break;
2315     default:
2316       nextToken();
2317       break;
2318     }
2319   } while (!eof());
2320   return false;
2321 }
2322 
2323 /// \brief Parses a pair of parentheses (and everything between them).
2324 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2325 /// double ampersands. This only counts for the current parens scope.
2326 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2327   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2328   nextToken();
2329   do {
2330     switch (FormatTok->Tok.getKind()) {
2331     case tok::l_paren:
2332       parseParens();
2333       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2334         parseChildBlock();
2335       break;
2336     case tok::r_paren:
2337       nextToken();
2338       return;
2339     case tok::r_brace:
2340       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2341       return;
2342     case tok::l_square:
2343       tryToParseLambda();
2344       break;
2345     case tok::l_brace:
2346       if (!tryToParseBracedList())
2347         parseChildBlock();
2348       break;
2349     case tok::at:
2350       nextToken();
2351       if (FormatTok->is(tok::l_brace)) {
2352         nextToken();
2353         parseBracedList();
2354       }
2355       break;
2356     case tok::equal:
2357       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2358         tryToParseChildBlock();
2359       else
2360         nextToken();
2361       break;
2362     case tok::kw_class:
2363       if (Style.isJavaScript())
2364         parseRecord(/*ParseAsExpr=*/true);
2365       else
2366         nextToken();
2367       break;
2368     case tok::identifier:
2369       if (Style.isJavaScript() &&
2370           (FormatTok->is(Keywords.kw_function) ||
2371            FormatTok->startsSequence(Keywords.kw_async,
2372                                      Keywords.kw_function))) {
2373         tryToParseJSFunction();
2374       } else {
2375         nextToken();
2376       }
2377       break;
2378     case tok::kw_requires: {
2379       auto RequiresToken = FormatTok;
2380       nextToken();
2381       parseRequiresExpression(RequiresToken);
2382       break;
2383     }
2384     case tok::ampamp:
2385       if (AmpAmpTokenType != TT_Unknown)
2386         FormatTok->setFinalizedType(AmpAmpTokenType);
2387       LLVM_FALLTHROUGH;
2388     default:
2389       nextToken();
2390       break;
2391     }
2392   } while (!eof());
2393 }
2394 
2395 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2396   if (!LambdaIntroducer) {
2397     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2398     if (tryToParseLambda())
2399       return;
2400   }
2401   do {
2402     switch (FormatTok->Tok.getKind()) {
2403     case tok::l_paren:
2404       parseParens();
2405       break;
2406     case tok::r_square:
2407       nextToken();
2408       return;
2409     case tok::r_brace:
2410       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2411       return;
2412     case tok::l_square:
2413       parseSquare();
2414       break;
2415     case tok::l_brace: {
2416       if (!tryToParseBracedList())
2417         parseChildBlock();
2418       break;
2419     }
2420     case tok::at:
2421       nextToken();
2422       if (FormatTok->is(tok::l_brace)) {
2423         nextToken();
2424         parseBracedList();
2425       }
2426       break;
2427     default:
2428       nextToken();
2429       break;
2430     }
2431   } while (!eof());
2432 }
2433 
2434 void UnwrappedLineParser::keepAncestorBraces() {
2435   if (!Style.RemoveBracesLLVM)
2436     return;
2437 
2438   const int MaxNestingLevels = 2;
2439   const int Size = NestedTooDeep.size();
2440   if (Size >= MaxNestingLevels)
2441     NestedTooDeep[Size - MaxNestingLevels] = true;
2442   NestedTooDeep.push_back(false);
2443 }
2444 
2445 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2446   for (const auto &Token : llvm::reverse(Line.Tokens))
2447     if (Token.Tok->isNot(tok::comment))
2448       return Token.Tok;
2449 
2450   return nullptr;
2451 }
2452 
2453 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2454   FormatToken *Tok = nullptr;
2455 
2456   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2457       PreprocessorDirectives.empty()) {
2458     Tok = getLastNonComment(*Line);
2459     assert(Tok);
2460     if (Tok->BraceCount < 0) {
2461       assert(Tok->BraceCount == -1);
2462       Tok = nullptr;
2463     } else {
2464       Tok->BraceCount = -1;
2465     }
2466   }
2467 
2468   addUnwrappedLine();
2469   ++Line->Level;
2470   parseStructuralElement();
2471 
2472   if (Tok) {
2473     assert(!Line->InPPDirective);
2474     Tok = nullptr;
2475     for (const auto &L : llvm::reverse(*CurrentLines)) {
2476       if (!L.InPPDirective && getLastNonComment(L)) {
2477         Tok = L.Tokens.back().Tok;
2478         break;
2479       }
2480     }
2481     assert(Tok);
2482     ++Tok->BraceCount;
2483   }
2484 
2485   if (CheckEOF && FormatTok->is(tok::eof))
2486     addUnwrappedLine();
2487 
2488   --Line->Level;
2489 }
2490 
2491 static void markOptionalBraces(FormatToken *LeftBrace) {
2492   if (!LeftBrace)
2493     return;
2494 
2495   assert(LeftBrace->is(tok::l_brace));
2496 
2497   FormatToken *RightBrace = LeftBrace->MatchingParen;
2498   if (!RightBrace) {
2499     assert(!LeftBrace->Optional);
2500     return;
2501   }
2502 
2503   assert(RightBrace->is(tok::r_brace));
2504   assert(RightBrace->MatchingParen == LeftBrace);
2505   assert(LeftBrace->Optional == RightBrace->Optional);
2506 
2507   LeftBrace->Optional = true;
2508   RightBrace->Optional = true;
2509 }
2510 
2511 void UnwrappedLineParser::handleAttributes() {
2512   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2513   if (FormatTok->is(TT_AttributeMacro))
2514     nextToken();
2515   handleCppAttributes();
2516 }
2517 
2518 bool UnwrappedLineParser::handleCppAttributes() {
2519   // Handle [[likely]] / [[unlikely]] attributes.
2520   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2521     parseSquare();
2522     return true;
2523   }
2524   return false;
2525 }
2526 
2527 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2528                                                   bool KeepBraces) {
2529   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2530   nextToken();
2531   if (FormatTok->is(tok::exclaim))
2532     nextToken();
2533 
2534   bool KeepIfBraces = true;
2535   if (FormatTok->is(tok::kw_consteval)) {
2536     nextToken();
2537   } else {
2538     if (Style.RemoveBracesLLVM)
2539       KeepIfBraces = KeepBraces;
2540     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2541       nextToken();
2542     if (FormatTok->is(tok::l_paren))
2543       parseParens();
2544   }
2545   handleAttributes();
2546 
2547   bool NeedsUnwrappedLine = false;
2548   keepAncestorBraces();
2549 
2550   FormatToken *IfLeftBrace = nullptr;
2551   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2552 
2553   if (FormatTok->is(tok::l_brace)) {
2554     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2555     IfLeftBrace = FormatTok;
2556     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2557     IfBlockKind = parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2558                              /*MunchSemi=*/true, KeepIfBraces);
2559     if (Style.BraceWrapping.BeforeElse)
2560       addUnwrappedLine();
2561     else
2562       NeedsUnwrappedLine = true;
2563   } else {
2564     parseUnbracedBody();
2565   }
2566 
2567   if (Style.RemoveBracesLLVM) {
2568     assert(!NestedTooDeep.empty());
2569     KeepIfBraces = KeepIfBraces ||
2570                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2571                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2572                    IfBlockKind == IfStmtKind::IfElseIf;
2573   }
2574 
2575   bool KeepElseBraces = KeepIfBraces;
2576   FormatToken *ElseLeftBrace = nullptr;
2577   IfStmtKind Kind = IfStmtKind::IfOnly;
2578 
2579   if (FormatTok->is(tok::kw_else)) {
2580     if (Style.RemoveBracesLLVM) {
2581       NestedTooDeep.back() = false;
2582       Kind = IfStmtKind::IfElse;
2583     }
2584     nextToken();
2585     handleAttributes();
2586     if (FormatTok->is(tok::l_brace)) {
2587       FormatTok->setFinalizedType(TT_ElseLBrace);
2588       ElseLeftBrace = FormatTok;
2589       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2590       if (parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2591                      /*MunchSemi=*/true,
2592                      KeepElseBraces) == IfStmtKind::IfOnly) {
2593         Kind = IfStmtKind::IfElseIf;
2594       }
2595       addUnwrappedLine();
2596     } else if (FormatTok->is(tok::kw_if)) {
2597       const FormatToken *Previous = Tokens->getPreviousToken();
2598       assert(Previous);
2599       const bool IsPrecededByComment = Previous->is(tok::comment);
2600       if (IsPrecededByComment) {
2601         addUnwrappedLine();
2602         ++Line->Level;
2603       }
2604       bool TooDeep = true;
2605       if (Style.RemoveBracesLLVM) {
2606         Kind = IfStmtKind::IfElseIf;
2607         TooDeep = NestedTooDeep.pop_back_val();
2608       }
2609       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2610       if (Style.RemoveBracesLLVM)
2611         NestedTooDeep.push_back(TooDeep);
2612       if (IsPrecededByComment)
2613         --Line->Level;
2614     } else {
2615       parseUnbracedBody(/*CheckEOF=*/true);
2616     }
2617   } else {
2618     if (Style.RemoveBracesLLVM)
2619       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2620     if (NeedsUnwrappedLine)
2621       addUnwrappedLine();
2622   }
2623 
2624   if (!Style.RemoveBracesLLVM)
2625     return nullptr;
2626 
2627   assert(!NestedTooDeep.empty());
2628   KeepElseBraces = KeepElseBraces ||
2629                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2630                    NestedTooDeep.back();
2631 
2632   NestedTooDeep.pop_back();
2633 
2634   if (!KeepIfBraces && !KeepElseBraces) {
2635     markOptionalBraces(IfLeftBrace);
2636     markOptionalBraces(ElseLeftBrace);
2637   } else if (IfLeftBrace) {
2638     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2639     if (IfRightBrace) {
2640       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2641       assert(!IfLeftBrace->Optional);
2642       assert(!IfRightBrace->Optional);
2643       IfLeftBrace->MatchingParen = nullptr;
2644       IfRightBrace->MatchingParen = nullptr;
2645     }
2646   }
2647 
2648   if (IfKind)
2649     *IfKind = Kind;
2650 
2651   return IfLeftBrace;
2652 }
2653 
2654 void UnwrappedLineParser::parseTryCatch() {
2655   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2656   nextToken();
2657   bool NeedsUnwrappedLine = false;
2658   if (FormatTok->is(tok::colon)) {
2659     // We are in a function try block, what comes is an initializer list.
2660     nextToken();
2661 
2662     // In case identifiers were removed by clang-tidy, what might follow is
2663     // multiple commas in sequence - before the first identifier.
2664     while (FormatTok->is(tok::comma))
2665       nextToken();
2666 
2667     while (FormatTok->is(tok::identifier)) {
2668       nextToken();
2669       if (FormatTok->is(tok::l_paren))
2670         parseParens();
2671       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2672           FormatTok->is(tok::l_brace)) {
2673         do {
2674           nextToken();
2675         } while (!FormatTok->is(tok::r_brace));
2676         nextToken();
2677       }
2678 
2679       // In case identifiers were removed by clang-tidy, what might follow is
2680       // multiple commas in sequence - after the first identifier.
2681       while (FormatTok->is(tok::comma))
2682         nextToken();
2683     }
2684   }
2685   // Parse try with resource.
2686   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2687     parseParens();
2688 
2689   keepAncestorBraces();
2690 
2691   if (FormatTok->is(tok::l_brace)) {
2692     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2693     parseBlock();
2694     if (Style.BraceWrapping.BeforeCatch)
2695       addUnwrappedLine();
2696     else
2697       NeedsUnwrappedLine = true;
2698   } else if (!FormatTok->is(tok::kw_catch)) {
2699     // The C++ standard requires a compound-statement after a try.
2700     // If there's none, we try to assume there's a structuralElement
2701     // and try to continue.
2702     addUnwrappedLine();
2703     ++Line->Level;
2704     parseStructuralElement();
2705     --Line->Level;
2706   }
2707   while (true) {
2708     if (FormatTok->is(tok::at))
2709       nextToken();
2710     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2711                              tok::kw___finally) ||
2712           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2713            FormatTok->is(Keywords.kw_finally)) ||
2714           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2715            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2716       break;
2717     }
2718     nextToken();
2719     while (FormatTok->isNot(tok::l_brace)) {
2720       if (FormatTok->is(tok::l_paren)) {
2721         parseParens();
2722         continue;
2723       }
2724       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2725         if (Style.RemoveBracesLLVM)
2726           NestedTooDeep.pop_back();
2727         return;
2728       }
2729       nextToken();
2730     }
2731     NeedsUnwrappedLine = false;
2732     Line->MustBeDeclaration = false;
2733     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2734     parseBlock();
2735     if (Style.BraceWrapping.BeforeCatch)
2736       addUnwrappedLine();
2737     else
2738       NeedsUnwrappedLine = true;
2739   }
2740 
2741   if (Style.RemoveBracesLLVM)
2742     NestedTooDeep.pop_back();
2743 
2744   if (NeedsUnwrappedLine)
2745     addUnwrappedLine();
2746 }
2747 
2748 void UnwrappedLineParser::parseNamespace() {
2749   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2750          "'namespace' expected");
2751 
2752   const FormatToken &InitialToken = *FormatTok;
2753   nextToken();
2754   if (InitialToken.is(TT_NamespaceMacro)) {
2755     parseParens();
2756   } else {
2757     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2758                               tok::l_square, tok::period, tok::l_paren) ||
2759            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2760       if (FormatTok->is(tok::l_square))
2761         parseSquare();
2762       else if (FormatTok->is(tok::l_paren))
2763         parseParens();
2764       else
2765         nextToken();
2766     }
2767   }
2768   if (FormatTok->is(tok::l_brace)) {
2769     if (ShouldBreakBeforeBrace(Style, InitialToken))
2770       addUnwrappedLine();
2771 
2772     unsigned AddLevels =
2773         Style.NamespaceIndentation == FormatStyle::NI_All ||
2774                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2775                  DeclarationScopeStack.size() > 1)
2776             ? 1u
2777             : 0u;
2778     bool ManageWhitesmithsBraces =
2779         AddLevels == 0u &&
2780         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2781 
2782     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2783     // the whole block.
2784     if (ManageWhitesmithsBraces)
2785       ++Line->Level;
2786 
2787     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2788                /*KeepBraces=*/true, ManageWhitesmithsBraces);
2789 
2790     // Munch the semicolon after a namespace. This is more common than one would
2791     // think. Putting the semicolon into its own line is very ugly.
2792     if (FormatTok->is(tok::semi))
2793       nextToken();
2794 
2795     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2796 
2797     if (ManageWhitesmithsBraces)
2798       --Line->Level;
2799   }
2800   // FIXME: Add error handling.
2801 }
2802 
2803 void UnwrappedLineParser::parseNew() {
2804   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2805   nextToken();
2806 
2807   if (Style.isCSharp()) {
2808     do {
2809       if (FormatTok->is(tok::l_brace))
2810         parseBracedList();
2811 
2812       if (FormatTok->isOneOf(tok::semi, tok::comma))
2813         return;
2814 
2815       nextToken();
2816     } while (!eof());
2817   }
2818 
2819   if (Style.Language != FormatStyle::LK_Java)
2820     return;
2821 
2822   // In Java, we can parse everything up to the parens, which aren't optional.
2823   do {
2824     // There should not be a ;, { or } before the new's open paren.
2825     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2826       return;
2827 
2828     // Consume the parens.
2829     if (FormatTok->is(tok::l_paren)) {
2830       parseParens();
2831 
2832       // If there is a class body of an anonymous class, consume that as child.
2833       if (FormatTok->is(tok::l_brace))
2834         parseChildBlock();
2835       return;
2836     }
2837     nextToken();
2838   } while (!eof());
2839 }
2840 
2841 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
2842   keepAncestorBraces();
2843 
2844   if (FormatTok->is(tok::l_brace)) {
2845     if (!KeepBraces)
2846       FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2847     FormatToken *LeftBrace = FormatTok;
2848     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2849     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2850                /*MunchSemi=*/true, KeepBraces);
2851     if (!KeepBraces) {
2852       assert(!NestedTooDeep.empty());
2853       if (!NestedTooDeep.back())
2854         markOptionalBraces(LeftBrace);
2855     }
2856     if (WrapRightBrace)
2857       addUnwrappedLine();
2858   } else {
2859     parseUnbracedBody();
2860   }
2861 
2862   if (!KeepBraces)
2863     NestedTooDeep.pop_back();
2864 }
2865 
2866 void UnwrappedLineParser::parseForOrWhileLoop() {
2867   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2868          "'for', 'while' or foreach macro expected");
2869   const bool KeepBraces = !Style.RemoveBracesLLVM ||
2870                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
2871 
2872   nextToken();
2873   // JS' for await ( ...
2874   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2875     nextToken();
2876   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2877     nextToken();
2878   if (FormatTok->is(tok::l_paren))
2879     parseParens();
2880 
2881   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
2882 }
2883 
2884 void UnwrappedLineParser::parseDoWhile() {
2885   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2886   nextToken();
2887 
2888   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
2889 
2890   // FIXME: Add error handling.
2891   if (!FormatTok->is(tok::kw_while)) {
2892     addUnwrappedLine();
2893     return;
2894   }
2895 
2896   // If in Whitesmiths mode, the line with the while() needs to be indented
2897   // to the same level as the block.
2898   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2899     ++Line->Level;
2900 
2901   nextToken();
2902   parseStructuralElement();
2903 }
2904 
2905 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2906   nextToken();
2907   unsigned OldLineLevel = Line->Level;
2908   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2909     --Line->Level;
2910   if (LeftAlignLabel)
2911     Line->Level = 0;
2912 
2913   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2914       FormatTok->is(tok::l_brace)) {
2915 
2916     CompoundStatementIndenter Indenter(this, Line->Level,
2917                                        Style.BraceWrapping.AfterCaseLabel,
2918                                        Style.BraceWrapping.IndentBraces);
2919     parseBlock();
2920     if (FormatTok->is(tok::kw_break)) {
2921       if (Style.BraceWrapping.AfterControlStatement ==
2922           FormatStyle::BWACS_Always) {
2923         addUnwrappedLine();
2924         if (!Style.IndentCaseBlocks &&
2925             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2926           ++Line->Level;
2927         }
2928       }
2929       parseStructuralElement();
2930     }
2931     addUnwrappedLine();
2932   } else {
2933     if (FormatTok->is(tok::semi))
2934       nextToken();
2935     addUnwrappedLine();
2936   }
2937   Line->Level = OldLineLevel;
2938   if (FormatTok->isNot(tok::l_brace)) {
2939     parseStructuralElement();
2940     addUnwrappedLine();
2941   }
2942 }
2943 
2944 void UnwrappedLineParser::parseCaseLabel() {
2945   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2946 
2947   // FIXME: fix handling of complex expressions here.
2948   do {
2949     nextToken();
2950   } while (!eof() && !FormatTok->is(tok::colon));
2951   parseLabel();
2952 }
2953 
2954 void UnwrappedLineParser::parseSwitch() {
2955   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2956   nextToken();
2957   if (FormatTok->is(tok::l_paren))
2958     parseParens();
2959 
2960   keepAncestorBraces();
2961 
2962   if (FormatTok->is(tok::l_brace)) {
2963     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2964     parseBlock();
2965     addUnwrappedLine();
2966   } else {
2967     addUnwrappedLine();
2968     ++Line->Level;
2969     parseStructuralElement();
2970     --Line->Level;
2971   }
2972 
2973   if (Style.RemoveBracesLLVM)
2974     NestedTooDeep.pop_back();
2975 }
2976 
2977 // Operators that can follow a C variable.
2978 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
2979   switch (kind) {
2980   case tok::ampamp:
2981   case tok::ampequal:
2982   case tok::arrow:
2983   case tok::caret:
2984   case tok::caretequal:
2985   case tok::comma:
2986   case tok::ellipsis:
2987   case tok::equal:
2988   case tok::equalequal:
2989   case tok::exclaim:
2990   case tok::exclaimequal:
2991   case tok::greater:
2992   case tok::greaterequal:
2993   case tok::greatergreater:
2994   case tok::greatergreaterequal:
2995   case tok::l_paren:
2996   case tok::l_square:
2997   case tok::less:
2998   case tok::lessequal:
2999   case tok::lessless:
3000   case tok::lesslessequal:
3001   case tok::minus:
3002   case tok::minusequal:
3003   case tok::minusminus:
3004   case tok::percent:
3005   case tok::percentequal:
3006   case tok::period:
3007   case tok::pipe:
3008   case tok::pipeequal:
3009   case tok::pipepipe:
3010   case tok::plus:
3011   case tok::plusequal:
3012   case tok::plusplus:
3013   case tok::question:
3014   case tok::r_brace:
3015   case tok::r_paren:
3016   case tok::r_square:
3017   case tok::semi:
3018   case tok::slash:
3019   case tok::slashequal:
3020   case tok::star:
3021   case tok::starequal:
3022     return true;
3023   default:
3024     return false;
3025   }
3026 }
3027 
3028 void UnwrappedLineParser::parseAccessSpecifier() {
3029   FormatToken *AccessSpecifierCandidate = FormatTok;
3030   nextToken();
3031   // Understand Qt's slots.
3032   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3033     nextToken();
3034   // Otherwise, we don't know what it is, and we'd better keep the next token.
3035   if (FormatTok->is(tok::colon)) {
3036     nextToken();
3037     addUnwrappedLine();
3038   } else if (!FormatTok->is(tok::coloncolon) &&
3039              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3040     // Not a variable name nor namespace name.
3041     addUnwrappedLine();
3042   } else if (AccessSpecifierCandidate) {
3043     // Consider the access specifier to be a C identifier.
3044     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3045   }
3046 }
3047 
3048 /// \brief Parses a concept definition.
3049 /// \pre The current token has to be the concept keyword.
3050 ///
3051 /// Returns if either the concept has been completely parsed, or if it detects
3052 /// that the concept definition is incorrect.
3053 void UnwrappedLineParser::parseConcept() {
3054   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
3055   nextToken();
3056   if (!FormatTok->is(tok::identifier))
3057     return;
3058   nextToken();
3059   if (!FormatTok->is(tok::equal))
3060     return;
3061   nextToken();
3062   parseConstraintExpression();
3063   if (FormatTok->is(tok::semi))
3064     nextToken();
3065   addUnwrappedLine();
3066 }
3067 
3068 /// \brief Parses a requires, decides if it is a clause or an expression.
3069 /// \pre The current token has to be the requires keyword.
3070 /// \returns true if it parsed a clause.
3071 bool clang::format::UnwrappedLineParser::parseRequires() {
3072   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3073   auto RequiresToken = FormatTok;
3074 
3075   // We try to guess if it is a requires clause, or a requires expression. For
3076   // that we first consume the keyword and check the next token.
3077   nextToken();
3078 
3079   switch (FormatTok->Tok.getKind()) {
3080   case tok::l_brace:
3081     // This can only be an expression, never a clause.
3082     parseRequiresExpression(RequiresToken);
3083     return false;
3084   case tok::l_paren:
3085     // Clauses and expression can start with a paren, it's unclear what we have.
3086     break;
3087   default:
3088     // All other tokens can only be a clause.
3089     parseRequiresClause(RequiresToken);
3090     return true;
3091   }
3092 
3093   // Looking forward we would have to decide if there are function declaration
3094   // like arguments to the requires expression:
3095   // requires (T t) {
3096   // Or there is a constraint expression for the requires clause:
3097   // requires (C<T> && ...
3098 
3099   // But first let's look behind.
3100   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3101 
3102   if (!PreviousNonComment ||
3103       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3104     // If there is no token, or an expression left brace, we are a requires
3105     // clause within a requires expression.
3106     parseRequiresClause(RequiresToken);
3107     return true;
3108   }
3109 
3110   switch (PreviousNonComment->Tok.getKind()) {
3111   case tok::greater:
3112   case tok::r_paren:
3113   case tok::kw_noexcept:
3114   case tok::kw_const:
3115     // This is a requires clause.
3116     parseRequiresClause(RequiresToken);
3117     return true;
3118   case tok::amp:
3119   case tok::ampamp: {
3120     // This can be either:
3121     // if (... && requires (T t) ...)
3122     // Or
3123     // void member(...) && requires (C<T> ...
3124     // We check the one token before that for a const:
3125     // void member(...) const && requires (C<T> ...
3126     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3127     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3128       parseRequiresClause(RequiresToken);
3129       return true;
3130     }
3131     break;
3132   }
3133   default:
3134     // It's an expression.
3135     parseRequiresExpression(RequiresToken);
3136     return false;
3137   }
3138 
3139   // Now we look forward and try to check if the paren content is a parameter
3140   // list. The parameters can be cv-qualified and contain references or
3141   // pointers.
3142   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3143   // of stuff: typename, const, *, &, &&, ::, identifiers.
3144 
3145   int NextTokenOffset = 1;
3146   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3147   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3148     ++NextTokenOffset;
3149     NextToken = Tokens->peekNextToken(NextTokenOffset);
3150   };
3151 
3152   bool FoundType = false;
3153   bool LastWasColonColon = false;
3154   int OpenAngles = 0;
3155 
3156   for (; NextTokenOffset < 50; PeekNext()) {
3157     switch (NextToken->Tok.getKind()) {
3158     case tok::kw_volatile:
3159     case tok::kw_const:
3160     case tok::comma:
3161       parseRequiresExpression(RequiresToken);
3162       return false;
3163     case tok::r_paren:
3164     case tok::pipepipe:
3165       parseRequiresClause(RequiresToken);
3166       return true;
3167     case tok::eof:
3168       // Break out of the loop.
3169       NextTokenOffset = 50;
3170       break;
3171     case tok::coloncolon:
3172       LastWasColonColon = true;
3173       break;
3174     case tok::identifier:
3175       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3176         parseRequiresExpression(RequiresToken);
3177         return false;
3178       }
3179       FoundType = true;
3180       LastWasColonColon = false;
3181       break;
3182     case tok::less:
3183       ++OpenAngles;
3184       break;
3185     case tok::greater:
3186       --OpenAngles;
3187       break;
3188     default:
3189       if (NextToken->isSimpleTypeSpecifier()) {
3190         parseRequiresExpression(RequiresToken);
3191         return false;
3192       }
3193       break;
3194     }
3195   }
3196 
3197   // This seems to be a complicated expression, just assume it's a clause.
3198   parseRequiresClause(RequiresToken);
3199   return true;
3200 }
3201 
3202 /// \brief Parses a requires clause.
3203 /// \param RequiresToken The requires keyword token, which starts this clause.
3204 /// \pre We need to be on the next token after the requires keyword.
3205 /// \sa parseRequiresExpression
3206 ///
3207 /// Returns if it either has finished parsing the clause, or it detects, that
3208 /// the clause is incorrect.
3209 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3210   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3211   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3212 
3213   // If there is no previous token, we are within a requires expression,
3214   // otherwise we will always have the template or function declaration in front
3215   // of it.
3216   bool InRequiresExpression =
3217       !RequiresToken->Previous ||
3218       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3219 
3220   RequiresToken->setFinalizedType(InRequiresExpression
3221                                       ? TT_RequiresClauseInARequiresExpression
3222                                       : TT_RequiresClause);
3223 
3224   parseConstraintExpression();
3225 
3226   if (!InRequiresExpression)
3227     FormatTok->Previous->ClosesRequiresClause = true;
3228 }
3229 
3230 /// \brief Parses a requires expression.
3231 /// \param RequiresToken The requires keyword token, which starts this clause.
3232 /// \pre We need to be on the next token after the requires keyword.
3233 /// \sa parseRequiresClause
3234 ///
3235 /// Returns if it either has finished parsing the expression, or it detects,
3236 /// that the expression is incorrect.
3237 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3238   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3239   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3240 
3241   RequiresToken->setFinalizedType(TT_RequiresExpression);
3242 
3243   if (FormatTok->is(tok::l_paren)) {
3244     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3245     parseParens();
3246   }
3247 
3248   if (FormatTok->is(tok::l_brace)) {
3249     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3250     parseChildBlock(/*CanContainBracedList=*/false,
3251                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3252   }
3253 }
3254 
3255 /// \brief Parses a constraint expression.
3256 ///
3257 /// This is either the definition of a concept, or the body of a requires
3258 /// clause. It returns, when the parsing is complete, or the expression is
3259 /// incorrect.
3260 void UnwrappedLineParser::parseConstraintExpression() {
3261   // The special handling for lambdas is needed since tryToParseLambda() eats a
3262   // token and if a requires expression is the last part of a requires clause
3263   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3264   // not set on the correct token. Thus we need to be aware if we even expect a
3265   // lambda to be possible.
3266   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3267   bool LambdaNextTimeAllowed = true;
3268   do {
3269     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3270 
3271     switch (FormatTok->Tok.getKind()) {
3272     case tok::kw_requires: {
3273       auto RequiresToken = FormatTok;
3274       nextToken();
3275       parseRequiresExpression(RequiresToken);
3276       break;
3277     }
3278 
3279     case tok::l_paren:
3280       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3281       break;
3282 
3283     case tok::l_square:
3284       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3285         return;
3286       break;
3287 
3288     case tok::kw_const:
3289     case tok::semi:
3290     case tok::kw_class:
3291     case tok::kw_struct:
3292     case tok::kw_union:
3293       return;
3294 
3295     case tok::l_brace:
3296       // Potential function body.
3297       return;
3298 
3299     case tok::ampamp:
3300     case tok::pipepipe:
3301       FormatTok->setFinalizedType(TT_BinaryOperator);
3302       nextToken();
3303       LambdaNextTimeAllowed = true;
3304       break;
3305 
3306     case tok::comma:
3307     case tok::comment:
3308       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3309       nextToken();
3310       break;
3311 
3312     case tok::kw_sizeof:
3313     case tok::greater:
3314     case tok::greaterequal:
3315     case tok::greatergreater:
3316     case tok::less:
3317     case tok::lessequal:
3318     case tok::lessless:
3319     case tok::equalequal:
3320     case tok::exclaim:
3321     case tok::exclaimequal:
3322     case tok::plus:
3323     case tok::minus:
3324     case tok::star:
3325     case tok::slash:
3326     case tok::kw_decltype:
3327       LambdaNextTimeAllowed = true;
3328       // Just eat them.
3329       nextToken();
3330       break;
3331 
3332     case tok::numeric_constant:
3333     case tok::coloncolon:
3334     case tok::kw_true:
3335     case tok::kw_false:
3336       // Just eat them.
3337       nextToken();
3338       break;
3339 
3340     case tok::kw_static_cast:
3341     case tok::kw_const_cast:
3342     case tok::kw_reinterpret_cast:
3343     case tok::kw_dynamic_cast:
3344       nextToken();
3345       if (!FormatTok->is(tok::less))
3346         return;
3347 
3348       nextToken();
3349       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3350                       /*ClosingBraceKind=*/tok::greater);
3351       break;
3352 
3353     case tok::kw_bool:
3354       // bool is only allowed if it is directly followed by a paren for a cast:
3355       // concept C = bool(...);
3356       // and bool is the only type, all other types as cast must be inside a
3357       // cast to bool an thus are handled by the other cases.
3358       nextToken();
3359       if (FormatTok->isNot(tok::l_paren))
3360         return;
3361       parseParens();
3362       break;
3363 
3364     default:
3365       if (!FormatTok->Tok.getIdentifierInfo()) {
3366         // Identifiers are part of the default case, we check for more then
3367         // tok::identifier to handle builtin type traits.
3368         return;
3369       }
3370 
3371       // We need to differentiate identifiers for a template deduction guide,
3372       // variables, or function return types (the constraint expression has
3373       // ended before that), and basically all other cases. But it's easier to
3374       // check the other way around.
3375       assert(FormatTok->Previous);
3376       switch (FormatTok->Previous->Tok.getKind()) {
3377       case tok::coloncolon:  // Nested identifier.
3378       case tok::ampamp:      // Start of a function or variable for the
3379       case tok::pipepipe:    // constraint expression.
3380       case tok::kw_requires: // Initial identifier of a requires clause.
3381       case tok::equal:       // Initial identifier of a concept declaration.
3382         break;
3383       default:
3384         return;
3385       }
3386 
3387       // Read identifier with optional template declaration.
3388       nextToken();
3389       if (FormatTok->is(tok::less)) {
3390         nextToken();
3391         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3392                         /*ClosingBraceKind=*/tok::greater);
3393       }
3394       break;
3395     }
3396   } while (!eof());
3397 }
3398 
3399 bool UnwrappedLineParser::parseEnum() {
3400   const FormatToken &InitialToken = *FormatTok;
3401 
3402   // Won't be 'enum' for NS_ENUMs.
3403   if (FormatTok->is(tok::kw_enum))
3404     nextToken();
3405 
3406   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3407   // declarations. An "enum" keyword followed by a colon would be a syntax
3408   // error and thus assume it is just an identifier.
3409   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3410     return false;
3411 
3412   // In protobuf, "enum" can be used as a field name.
3413   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3414     return false;
3415 
3416   // Eat up enum class ...
3417   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3418     nextToken();
3419 
3420   while (FormatTok->Tok.getIdentifierInfo() ||
3421          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3422                             tok::greater, tok::comma, tok::question)) {
3423     nextToken();
3424     // We can have macros or attributes in between 'enum' and the enum name.
3425     if (FormatTok->is(tok::l_paren))
3426       parseParens();
3427     if (FormatTok->is(tok::identifier)) {
3428       nextToken();
3429       // If there are two identifiers in a row, this is likely an elaborate
3430       // return type. In Java, this can be "implements", etc.
3431       if (Style.isCpp() && FormatTok->is(tok::identifier))
3432         return false;
3433     }
3434   }
3435 
3436   // Just a declaration or something is wrong.
3437   if (FormatTok->isNot(tok::l_brace))
3438     return true;
3439   FormatTok->setFinalizedType(TT_EnumLBrace);
3440   FormatTok->setBlockKind(BK_Block);
3441 
3442   if (Style.Language == FormatStyle::LK_Java) {
3443     // Java enums are different.
3444     parseJavaEnumBody();
3445     return true;
3446   }
3447   if (Style.Language == FormatStyle::LK_Proto) {
3448     parseBlock(/*MustBeDeclaration=*/true);
3449     return true;
3450   }
3451 
3452   if (!Style.AllowShortEnumsOnASingleLine &&
3453       ShouldBreakBeforeBrace(Style, InitialToken)) {
3454     addUnwrappedLine();
3455   }
3456   // Parse enum body.
3457   nextToken();
3458   if (!Style.AllowShortEnumsOnASingleLine) {
3459     addUnwrappedLine();
3460     Line->Level += 1;
3461   }
3462   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3463                                    /*IsEnum=*/true);
3464   if (!Style.AllowShortEnumsOnASingleLine)
3465     Line->Level -= 1;
3466   if (HasError) {
3467     if (FormatTok->is(tok::semi))
3468       nextToken();
3469     addUnwrappedLine();
3470   }
3471   return true;
3472 
3473   // There is no addUnwrappedLine() here so that we fall through to parsing a
3474   // structural element afterwards. Thus, in "enum A {} n, m;",
3475   // "} n, m;" will end up in one unwrapped line.
3476 }
3477 
3478 bool UnwrappedLineParser::parseStructLike() {
3479   // parseRecord falls through and does not yet add an unwrapped line as a
3480   // record declaration or definition can start a structural element.
3481   parseRecord();
3482   // This does not apply to Java, JavaScript and C#.
3483   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3484       Style.isCSharp()) {
3485     if (FormatTok->is(tok::semi))
3486       nextToken();
3487     addUnwrappedLine();
3488     return true;
3489   }
3490   return false;
3491 }
3492 
3493 namespace {
3494 // A class used to set and restore the Token position when peeking
3495 // ahead in the token source.
3496 class ScopedTokenPosition {
3497   unsigned StoredPosition;
3498   FormatTokenSource *Tokens;
3499 
3500 public:
3501   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3502     assert(Tokens && "Tokens expected to not be null");
3503     StoredPosition = Tokens->getPosition();
3504   }
3505 
3506   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3507 };
3508 } // namespace
3509 
3510 // Look to see if we have [[ by looking ahead, if
3511 // its not then rewind to the original position.
3512 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3513   ScopedTokenPosition AutoPosition(Tokens);
3514   FormatToken *Tok = Tokens->getNextToken();
3515   // We already read the first [ check for the second.
3516   if (!Tok->is(tok::l_square))
3517     return false;
3518   // Double check that the attribute is just something
3519   // fairly simple.
3520   while (Tok->isNot(tok::eof)) {
3521     if (Tok->is(tok::r_square))
3522       break;
3523     Tok = Tokens->getNextToken();
3524   }
3525   if (Tok->is(tok::eof))
3526     return false;
3527   Tok = Tokens->getNextToken();
3528   if (!Tok->is(tok::r_square))
3529     return false;
3530   Tok = Tokens->getNextToken();
3531   if (Tok->is(tok::semi))
3532     return false;
3533   return true;
3534 }
3535 
3536 void UnwrappedLineParser::parseJavaEnumBody() {
3537   assert(FormatTok->is(tok::l_brace));
3538   const FormatToken *OpeningBrace = FormatTok;
3539 
3540   // Determine whether the enum is simple, i.e. does not have a semicolon or
3541   // constants with class bodies. Simple enums can be formatted like braced
3542   // lists, contracted to a single line, etc.
3543   unsigned StoredPosition = Tokens->getPosition();
3544   bool IsSimple = true;
3545   FormatToken *Tok = Tokens->getNextToken();
3546   while (!Tok->is(tok::eof)) {
3547     if (Tok->is(tok::r_brace))
3548       break;
3549     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3550       IsSimple = false;
3551       break;
3552     }
3553     // FIXME: This will also mark enums with braces in the arguments to enum
3554     // constants as "not simple". This is probably fine in practice, though.
3555     Tok = Tokens->getNextToken();
3556   }
3557   FormatTok = Tokens->setPosition(StoredPosition);
3558 
3559   if (IsSimple) {
3560     nextToken();
3561     parseBracedList();
3562     addUnwrappedLine();
3563     return;
3564   }
3565 
3566   // Parse the body of a more complex enum.
3567   // First add a line for everything up to the "{".
3568   nextToken();
3569   addUnwrappedLine();
3570   ++Line->Level;
3571 
3572   // Parse the enum constants.
3573   while (FormatTok->isNot(tok::eof)) {
3574     if (FormatTok->is(tok::l_brace)) {
3575       // Parse the constant's class body.
3576       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3577                  /*MunchSemi=*/false);
3578     } else if (FormatTok->is(tok::l_paren)) {
3579       parseParens();
3580     } else if (FormatTok->is(tok::comma)) {
3581       nextToken();
3582       addUnwrappedLine();
3583     } else if (FormatTok->is(tok::semi)) {
3584       nextToken();
3585       addUnwrappedLine();
3586       break;
3587     } else if (FormatTok->is(tok::r_brace)) {
3588       addUnwrappedLine();
3589       break;
3590     } else {
3591       nextToken();
3592     }
3593   }
3594 
3595   // Parse the class body after the enum's ";" if any.
3596   parseLevel(OpeningBrace, /*CanContainBracedList=*/true);
3597   nextToken();
3598   --Line->Level;
3599   addUnwrappedLine();
3600 }
3601 
3602 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3603   const FormatToken &InitialToken = *FormatTok;
3604   nextToken();
3605 
3606   // The actual identifier can be a nested name specifier, and in macros
3607   // it is often token-pasted.
3608   // An [[attribute]] can be before the identifier.
3609   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3610                             tok::kw___attribute, tok::kw___declspec,
3611                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3612          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3613           FormatTok->isOneOf(tok::period, tok::comma))) {
3614     if (Style.isJavaScript() &&
3615         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3616       // JavaScript/TypeScript supports inline object types in
3617       // extends/implements positions:
3618       //     class Foo implements {bar: number} { }
3619       nextToken();
3620       if (FormatTok->is(tok::l_brace)) {
3621         tryToParseBracedList();
3622         continue;
3623       }
3624     }
3625     bool IsNonMacroIdentifier =
3626         FormatTok->is(tok::identifier) &&
3627         FormatTok->TokenText != FormatTok->TokenText.upper();
3628     nextToken();
3629     // We can have macros or attributes in between 'class' and the class name.
3630     if (!IsNonMacroIdentifier) {
3631       if (FormatTok->is(tok::l_paren)) {
3632         parseParens();
3633       } else if (FormatTok->is(TT_AttributeSquare)) {
3634         parseSquare();
3635         // Consume the closing TT_AttributeSquare.
3636         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3637           nextToken();
3638       }
3639     }
3640   }
3641 
3642   // Note that parsing away template declarations here leads to incorrectly
3643   // accepting function declarations as record declarations.
3644   // In general, we cannot solve this problem. Consider:
3645   // class A<int> B() {}
3646   // which can be a function definition or a class definition when B() is a
3647   // macro. If we find enough real-world cases where this is a problem, we
3648   // can parse for the 'template' keyword in the beginning of the statement,
3649   // and thus rule out the record production in case there is no template
3650   // (this would still leave us with an ambiguity between template function
3651   // and class declarations).
3652   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3653     do {
3654       if (FormatTok->is(tok::l_brace)) {
3655         calculateBraceTypes(/*ExpectClassBody=*/true);
3656         if (!tryToParseBracedList())
3657           break;
3658       }
3659       if (FormatTok->is(tok::l_square)) {
3660         FormatToken *Previous = FormatTok->Previous;
3661         if (!Previous ||
3662             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3663           // Don't try parsing a lambda if we had a closing parenthesis before,
3664           // it was probably a pointer to an array: int (*)[].
3665           if (!tryToParseLambda())
3666             break;
3667         } else {
3668           parseSquare();
3669           continue;
3670         }
3671       }
3672       if (FormatTok->is(tok::semi))
3673         return;
3674       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3675         addUnwrappedLine();
3676         nextToken();
3677         parseCSharpGenericTypeConstraint();
3678         break;
3679       }
3680       nextToken();
3681     } while (!eof());
3682   }
3683 
3684   auto GetBraceType = [](const FormatToken &RecordTok) {
3685     switch (RecordTok.Tok.getKind()) {
3686     case tok::kw_class:
3687       return TT_ClassLBrace;
3688     case tok::kw_struct:
3689       return TT_StructLBrace;
3690     case tok::kw_union:
3691       return TT_UnionLBrace;
3692     default:
3693       // Useful for e.g. interface.
3694       return TT_RecordLBrace;
3695     }
3696   };
3697   if (FormatTok->is(tok::l_brace)) {
3698     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3699     if (ParseAsExpr) {
3700       parseChildBlock();
3701     } else {
3702       if (ShouldBreakBeforeBrace(Style, InitialToken))
3703         addUnwrappedLine();
3704 
3705       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3706       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3707     }
3708   }
3709   // There is no addUnwrappedLine() here so that we fall through to parsing a
3710   // structural element afterwards. Thus, in "class A {} n, m;",
3711   // "} n, m;" will end up in one unwrapped line.
3712 }
3713 
3714 void UnwrappedLineParser::parseObjCMethod() {
3715   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3716          "'(' or identifier expected.");
3717   do {
3718     if (FormatTok->is(tok::semi)) {
3719       nextToken();
3720       addUnwrappedLine();
3721       return;
3722     } else if (FormatTok->is(tok::l_brace)) {
3723       if (Style.BraceWrapping.AfterFunction)
3724         addUnwrappedLine();
3725       parseBlock();
3726       addUnwrappedLine();
3727       return;
3728     } else {
3729       nextToken();
3730     }
3731   } while (!eof());
3732 }
3733 
3734 void UnwrappedLineParser::parseObjCProtocolList() {
3735   assert(FormatTok->is(tok::less) && "'<' expected.");
3736   do {
3737     nextToken();
3738     // Early exit in case someone forgot a close angle.
3739     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3740         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3741       return;
3742     }
3743   } while (!eof() && FormatTok->isNot(tok::greater));
3744   nextToken(); // Skip '>'.
3745 }
3746 
3747 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3748   do {
3749     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3750       nextToken();
3751       addUnwrappedLine();
3752       break;
3753     }
3754     if (FormatTok->is(tok::l_brace)) {
3755       parseBlock();
3756       // In ObjC interfaces, nothing should be following the "}".
3757       addUnwrappedLine();
3758     } else if (FormatTok->is(tok::r_brace)) {
3759       // Ignore stray "}". parseStructuralElement doesn't consume them.
3760       nextToken();
3761       addUnwrappedLine();
3762     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3763       nextToken();
3764       parseObjCMethod();
3765     } else {
3766       parseStructuralElement();
3767     }
3768   } while (!eof());
3769 }
3770 
3771 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3772   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3773          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3774   nextToken();
3775   nextToken(); // interface name
3776 
3777   // @interface can be followed by a lightweight generic
3778   // specialization list, then either a base class or a category.
3779   if (FormatTok->is(tok::less))
3780     parseObjCLightweightGenerics();
3781   if (FormatTok->is(tok::colon)) {
3782     nextToken();
3783     nextToken(); // base class name
3784     // The base class can also have lightweight generics applied to it.
3785     if (FormatTok->is(tok::less))
3786       parseObjCLightweightGenerics();
3787   } else if (FormatTok->is(tok::l_paren)) {
3788     // Skip category, if present.
3789     parseParens();
3790   }
3791 
3792   if (FormatTok->is(tok::less))
3793     parseObjCProtocolList();
3794 
3795   if (FormatTok->is(tok::l_brace)) {
3796     if (Style.BraceWrapping.AfterObjCDeclaration)
3797       addUnwrappedLine();
3798     parseBlock(/*MustBeDeclaration=*/true);
3799   }
3800 
3801   // With instance variables, this puts '}' on its own line.  Without instance
3802   // variables, this ends the @interface line.
3803   addUnwrappedLine();
3804 
3805   parseObjCUntilAtEnd();
3806 }
3807 
3808 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3809   assert(FormatTok->is(tok::less));
3810   // Unlike protocol lists, generic parameterizations support
3811   // nested angles:
3812   //
3813   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3814   //     NSObject <NSCopying, NSSecureCoding>
3815   //
3816   // so we need to count how many open angles we have left.
3817   unsigned NumOpenAngles = 1;
3818   do {
3819     nextToken();
3820     // Early exit in case someone forgot a close angle.
3821     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3822         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3823       break;
3824     }
3825     if (FormatTok->is(tok::less)) {
3826       ++NumOpenAngles;
3827     } else if (FormatTok->is(tok::greater)) {
3828       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3829       --NumOpenAngles;
3830     }
3831   } while (!eof() && NumOpenAngles != 0);
3832   nextToken(); // Skip '>'.
3833 }
3834 
3835 // Returns true for the declaration/definition form of @protocol,
3836 // false for the expression form.
3837 bool UnwrappedLineParser::parseObjCProtocol() {
3838   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3839   nextToken();
3840 
3841   if (FormatTok->is(tok::l_paren)) {
3842     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3843     return false;
3844   }
3845 
3846   // The definition/declaration form,
3847   // @protocol Foo
3848   // - (int)someMethod;
3849   // @end
3850 
3851   nextToken(); // protocol name
3852 
3853   if (FormatTok->is(tok::less))
3854     parseObjCProtocolList();
3855 
3856   // Check for protocol declaration.
3857   if (FormatTok->is(tok::semi)) {
3858     nextToken();
3859     addUnwrappedLine();
3860     return true;
3861   }
3862 
3863   addUnwrappedLine();
3864   parseObjCUntilAtEnd();
3865   return true;
3866 }
3867 
3868 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3869   bool IsImport = FormatTok->is(Keywords.kw_import);
3870   assert(IsImport || FormatTok->is(tok::kw_export));
3871   nextToken();
3872 
3873   // Consume the "default" in "export default class/function".
3874   if (FormatTok->is(tok::kw_default))
3875     nextToken();
3876 
3877   // Consume "async function", "function" and "default function", so that these
3878   // get parsed as free-standing JS functions, i.e. do not require a trailing
3879   // semicolon.
3880   if (FormatTok->is(Keywords.kw_async))
3881     nextToken();
3882   if (FormatTok->is(Keywords.kw_function)) {
3883     nextToken();
3884     return;
3885   }
3886 
3887   // For imports, `export *`, `export {...}`, consume the rest of the line up
3888   // to the terminating `;`. For everything else, just return and continue
3889   // parsing the structural element, i.e. the declaration or expression for
3890   // `export default`.
3891   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3892       !FormatTok->isStringLiteral()) {
3893     return;
3894   }
3895 
3896   while (!eof()) {
3897     if (FormatTok->is(tok::semi))
3898       return;
3899     if (Line->Tokens.empty()) {
3900       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3901       // import statement should terminate.
3902       return;
3903     }
3904     if (FormatTok->is(tok::l_brace)) {
3905       FormatTok->setBlockKind(BK_Block);
3906       nextToken();
3907       parseBracedList();
3908     } else {
3909       nextToken();
3910     }
3911   }
3912 }
3913 
3914 void UnwrappedLineParser::parseStatementMacro() {
3915   nextToken();
3916   if (FormatTok->is(tok::l_paren))
3917     parseParens();
3918   if (FormatTok->is(tok::semi))
3919     nextToken();
3920   addUnwrappedLine();
3921 }
3922 
3923 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3924                                                  StringRef Prefix = "") {
3925   llvm::dbgs() << Prefix << "Line(" << Line.Level
3926                << ", FSC=" << Line.FirstStartColumn << ")"
3927                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3928   for (const auto &Node : Line.Tokens) {
3929     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3930                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3931                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3932   }
3933   for (const auto &Node : Line.Tokens)
3934     for (const auto &ChildNode : Node.Children)
3935       printDebugInfo(ChildNode, "\nChild: ");
3936 
3937   llvm::dbgs() << "\n";
3938 }
3939 
3940 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3941   if (Line->Tokens.empty())
3942     return;
3943   LLVM_DEBUG({
3944     if (CurrentLines == &Lines)
3945       printDebugInfo(*Line);
3946   });
3947 
3948   // If this line closes a block when in Whitesmiths mode, remember that
3949   // information so that the level can be decreased after the line is added.
3950   // This has to happen after the addition of the line since the line itself
3951   // needs to be indented.
3952   bool ClosesWhitesmithsBlock =
3953       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3954       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3955 
3956   CurrentLines->push_back(std::move(*Line));
3957   Line->Tokens.clear();
3958   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3959   Line->FirstStartColumn = 0;
3960 
3961   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3962     --Line->Level;
3963   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3964     CurrentLines->append(
3965         std::make_move_iterator(PreprocessorDirectives.begin()),
3966         std::make_move_iterator(PreprocessorDirectives.end()));
3967     PreprocessorDirectives.clear();
3968   }
3969   // Disconnect the current token from the last token on the previous line.
3970   FormatTok->Previous = nullptr;
3971 }
3972 
3973 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
3974 
3975 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3976   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3977          FormatTok.NewlinesBefore > 0;
3978 }
3979 
3980 // Checks if \p FormatTok is a line comment that continues the line comment
3981 // section on \p Line.
3982 static bool
3983 continuesLineCommentSection(const FormatToken &FormatTok,
3984                             const UnwrappedLine &Line,
3985                             const llvm::Regex &CommentPragmasRegex) {
3986   if (Line.Tokens.empty())
3987     return false;
3988 
3989   StringRef IndentContent = FormatTok.TokenText;
3990   if (FormatTok.TokenText.startswith("//") ||
3991       FormatTok.TokenText.startswith("/*")) {
3992     IndentContent = FormatTok.TokenText.substr(2);
3993   }
3994   if (CommentPragmasRegex.match(IndentContent))
3995     return false;
3996 
3997   // If Line starts with a line comment, then FormatTok continues the comment
3998   // section if its original column is greater or equal to the original start
3999   // column of the line.
4000   //
4001   // Define the min column token of a line as follows: if a line ends in '{' or
4002   // contains a '{' followed by a line comment, then the min column token is
4003   // that '{'. Otherwise, the min column token of the line is the first token of
4004   // the line.
4005   //
4006   // If Line starts with a token other than a line comment, then FormatTok
4007   // continues the comment section if its original column is greater than the
4008   // original start column of the min column token of the line.
4009   //
4010   // For example, the second line comment continues the first in these cases:
4011   //
4012   // // first line
4013   // // second line
4014   //
4015   // and:
4016   //
4017   // // first line
4018   //  // second line
4019   //
4020   // and:
4021   //
4022   // int i; // first line
4023   //  // second line
4024   //
4025   // and:
4026   //
4027   // do { // first line
4028   //      // second line
4029   //   int i;
4030   // } while (true);
4031   //
4032   // and:
4033   //
4034   // enum {
4035   //   a, // first line
4036   //    // second line
4037   //   b
4038   // };
4039   //
4040   // The second line comment doesn't continue the first in these cases:
4041   //
4042   //   // first line
4043   //  // second line
4044   //
4045   // and:
4046   //
4047   // int i; // first line
4048   // // second line
4049   //
4050   // and:
4051   //
4052   // do { // first line
4053   //   // second line
4054   //   int i;
4055   // } while (true);
4056   //
4057   // and:
4058   //
4059   // enum {
4060   //   a, // first line
4061   //   // second line
4062   // };
4063   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4064 
4065   // Scan for '{//'. If found, use the column of '{' as a min column for line
4066   // comment section continuation.
4067   const FormatToken *PreviousToken = nullptr;
4068   for (const UnwrappedLineNode &Node : Line.Tokens) {
4069     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4070         isLineComment(*Node.Tok)) {
4071       MinColumnToken = PreviousToken;
4072       break;
4073     }
4074     PreviousToken = Node.Tok;
4075 
4076     // Grab the last newline preceding a token in this unwrapped line.
4077     if (Node.Tok->NewlinesBefore > 0)
4078       MinColumnToken = Node.Tok;
4079   }
4080   if (PreviousToken && PreviousToken->is(tok::l_brace))
4081     MinColumnToken = PreviousToken;
4082 
4083   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4084                               MinColumnToken);
4085 }
4086 
4087 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4088   bool JustComments = Line->Tokens.empty();
4089   for (FormatToken *Tok : CommentsBeforeNextToken) {
4090     // Line comments that belong to the same line comment section are put on the
4091     // same line since later we might want to reflow content between them.
4092     // Additional fine-grained breaking of line comment sections is controlled
4093     // by the class BreakableLineCommentSection in case it is desirable to keep
4094     // several line comment sections in the same unwrapped line.
4095     //
4096     // FIXME: Consider putting separate line comment sections as children to the
4097     // unwrapped line instead.
4098     Tok->ContinuesLineCommentSection =
4099         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4100     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4101       addUnwrappedLine();
4102     pushToken(Tok);
4103   }
4104   if (NewlineBeforeNext && JustComments)
4105     addUnwrappedLine();
4106   CommentsBeforeNextToken.clear();
4107 }
4108 
4109 void UnwrappedLineParser::nextToken(int LevelDifference) {
4110   if (eof())
4111     return;
4112   flushComments(isOnNewLine(*FormatTok));
4113   pushToken(FormatTok);
4114   FormatToken *Previous = FormatTok;
4115   if (!Style.isJavaScript())
4116     readToken(LevelDifference);
4117   else
4118     readTokenWithJavaScriptASI();
4119   FormatTok->Previous = Previous;
4120 }
4121 
4122 void UnwrappedLineParser::distributeComments(
4123     const SmallVectorImpl<FormatToken *> &Comments,
4124     const FormatToken *NextTok) {
4125   // Whether or not a line comment token continues a line is controlled by
4126   // the method continuesLineCommentSection, with the following caveat:
4127   //
4128   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4129   // that each comment line from the trail is aligned with the next token, if
4130   // the next token exists. If a trail exists, the beginning of the maximal
4131   // trail is marked as a start of a new comment section.
4132   //
4133   // For example in this code:
4134   //
4135   // int a; // line about a
4136   //   // line 1 about b
4137   //   // line 2 about b
4138   //   int b;
4139   //
4140   // the two lines about b form a maximal trail, so there are two sections, the
4141   // first one consisting of the single comment "// line about a" and the
4142   // second one consisting of the next two comments.
4143   if (Comments.empty())
4144     return;
4145   bool ShouldPushCommentsInCurrentLine = true;
4146   bool HasTrailAlignedWithNextToken = false;
4147   unsigned StartOfTrailAlignedWithNextToken = 0;
4148   if (NextTok) {
4149     // We are skipping the first element intentionally.
4150     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4151       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4152         HasTrailAlignedWithNextToken = true;
4153         StartOfTrailAlignedWithNextToken = i;
4154       }
4155     }
4156   }
4157   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4158     FormatToken *FormatTok = Comments[i];
4159     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4160       FormatTok->ContinuesLineCommentSection = false;
4161     } else {
4162       FormatTok->ContinuesLineCommentSection =
4163           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4164     }
4165     if (!FormatTok->ContinuesLineCommentSection &&
4166         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4167       ShouldPushCommentsInCurrentLine = false;
4168     }
4169     if (ShouldPushCommentsInCurrentLine)
4170       pushToken(FormatTok);
4171     else
4172       CommentsBeforeNextToken.push_back(FormatTok);
4173   }
4174 }
4175 
4176 void UnwrappedLineParser::readToken(int LevelDifference) {
4177   SmallVector<FormatToken *, 1> Comments;
4178   bool PreviousWasComment = false;
4179   bool FirstNonCommentOnLine = false;
4180   do {
4181     FormatTok = Tokens->getNextToken();
4182     assert(FormatTok);
4183     while (FormatTok->getType() == TT_ConflictStart ||
4184            FormatTok->getType() == TT_ConflictEnd ||
4185            FormatTok->getType() == TT_ConflictAlternative) {
4186       if (FormatTok->getType() == TT_ConflictStart)
4187         conditionalCompilationStart(/*Unreachable=*/false);
4188       else if (FormatTok->getType() == TT_ConflictAlternative)
4189         conditionalCompilationAlternative();
4190       else if (FormatTok->getType() == TT_ConflictEnd)
4191         conditionalCompilationEnd();
4192       FormatTok = Tokens->getNextToken();
4193       FormatTok->MustBreakBefore = true;
4194     }
4195 
4196     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4197                                       const FormatToken &Tok,
4198                                       bool PreviousWasComment) {
4199       auto IsFirstOnLine = [](const FormatToken &Tok) {
4200         return Tok.HasUnescapedNewline || Tok.IsFirst;
4201       };
4202 
4203       // Consider preprocessor directives preceded by block comments as first
4204       // on line.
4205       if (PreviousWasComment)
4206         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4207       return IsFirstOnLine(Tok);
4208     };
4209 
4210     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4211         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4212     PreviousWasComment = FormatTok->is(tok::comment);
4213 
4214     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4215            FirstNonCommentOnLine) {
4216       distributeComments(Comments, FormatTok);
4217       Comments.clear();
4218       // If there is an unfinished unwrapped line, we flush the preprocessor
4219       // directives only after that unwrapped line was finished later.
4220       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4221       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4222       assert((LevelDifference >= 0 ||
4223               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4224              "LevelDifference makes Line->Level negative");
4225       Line->Level += LevelDifference;
4226       // Comments stored before the preprocessor directive need to be output
4227       // before the preprocessor directive, at the same level as the
4228       // preprocessor directive, as we consider them to apply to the directive.
4229       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4230           PPBranchLevel > 0) {
4231         Line->Level += PPBranchLevel;
4232       }
4233       flushComments(isOnNewLine(*FormatTok));
4234       parsePPDirective();
4235       PreviousWasComment = FormatTok->is(tok::comment);
4236       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4237           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4238     }
4239 
4240     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4241         !Line->InPPDirective) {
4242       continue;
4243     }
4244 
4245     if (!FormatTok->is(tok::comment)) {
4246       distributeComments(Comments, FormatTok);
4247       Comments.clear();
4248       return;
4249     }
4250 
4251     Comments.push_back(FormatTok);
4252   } while (!eof());
4253 
4254   distributeComments(Comments, nullptr);
4255   Comments.clear();
4256 }
4257 
4258 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4259   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4260   if (MustBreakBeforeNextToken) {
4261     Line->Tokens.back().Tok->MustBreakBefore = true;
4262     MustBreakBeforeNextToken = false;
4263   }
4264 }
4265 
4266 } // end namespace format
4267 } // end namespace clang
4268