1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 #include <utility>
24 
25 #define DEBUG_TYPE "format-parser"
26 
27 namespace clang {
28 namespace format {
29 
30 class FormatTokenSource {
31 public:
32   virtual ~FormatTokenSource() {}
33 
34   // Returns the next token in the token stream.
35   virtual FormatToken *getNextToken() = 0;
36 
37   // Returns the token preceding the token returned by the last call to
38   // getNextToken() in the token stream, or nullptr if no such token exists.
39   virtual FormatToken *getPreviousToken() = 0;
40 
41   // Returns the token that would be returned by the next call to
42   // getNextToken().
43   virtual FormatToken *peekNextToken() = 0;
44 
45   // Returns the token that would be returned after the next N calls to
46   // getNextToken(). N needs to be greater than zero, and small enough that
47   // there are still tokens. Check for tok::eof with N-1 before calling it with
48   // N.
49   virtual FormatToken *peekNextToken(int N) = 0;
50 
51   // Returns whether we are at the end of the file.
52   // This can be different from whether getNextToken() returned an eof token
53   // when the FormatTokenSource is a view on a part of the token stream.
54   virtual bool isEOF() = 0;
55 
56   // Gets the current position in the token stream, to be used by setPosition().
57   virtual unsigned getPosition() = 0;
58 
59   // Resets the token stream to the state it was in when getPosition() returned
60   // Position, and return the token at that position in the stream.
61   virtual FormatToken *setPosition(unsigned Position) = 0;
62 };
63 
64 namespace {
65 
66 class ScopedDeclarationState {
67 public:
68   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
69                          bool MustBeDeclaration)
70       : Line(Line), Stack(Stack) {
71     Line.MustBeDeclaration = MustBeDeclaration;
72     Stack.push_back(MustBeDeclaration);
73   }
74   ~ScopedDeclarationState() {
75     Stack.pop_back();
76     if (!Stack.empty())
77       Line.MustBeDeclaration = Stack.back();
78     else
79       Line.MustBeDeclaration = true;
80   }
81 
82 private:
83   UnwrappedLine &Line;
84   llvm::BitVector &Stack;
85 };
86 
87 static bool isLineComment(const FormatToken &FormatTok) {
88   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
89 }
90 
91 // Checks if \p FormatTok is a line comment that continues the line comment
92 // \p Previous. The original column of \p MinColumnToken is used to determine
93 // whether \p FormatTok is indented enough to the right to continue \p Previous.
94 static bool continuesLineComment(const FormatToken &FormatTok,
95                                  const FormatToken *Previous,
96                                  const FormatToken *MinColumnToken) {
97   if (!Previous || !MinColumnToken)
98     return false;
99   unsigned MinContinueColumn =
100       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
101   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
102          isLineComment(*Previous) &&
103          FormatTok.OriginalColumn >= MinContinueColumn;
104 }
105 
106 class ScopedMacroState : public FormatTokenSource {
107 public:
108   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
109                    FormatToken *&ResetToken)
110       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
111         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
112         Token(nullptr), PreviousToken(nullptr) {
113     FakeEOF.Tok.startToken();
114     FakeEOF.Tok.setKind(tok::eof);
115     TokenSource = this;
116     Line.Level = 0;
117     Line.InPPDirective = true;
118   }
119 
120   ~ScopedMacroState() override {
121     TokenSource = PreviousTokenSource;
122     ResetToken = Token;
123     Line.InPPDirective = false;
124     Line.Level = PreviousLineLevel;
125   }
126 
127   FormatToken *getNextToken() override {
128     // The \c UnwrappedLineParser guards against this by never calling
129     // \c getNextToken() after it has encountered the first eof token.
130     assert(!eof());
131     PreviousToken = Token;
132     Token = PreviousTokenSource->getNextToken();
133     if (eof())
134       return &FakeEOF;
135     return Token;
136   }
137 
138   FormatToken *getPreviousToken() override {
139     return PreviousTokenSource->getPreviousToken();
140   }
141 
142   FormatToken *peekNextToken() override {
143     if (eof())
144       return &FakeEOF;
145     return PreviousTokenSource->peekNextToken();
146   }
147 
148   FormatToken *peekNextToken(int N) override {
149     assert(N > 0);
150     if (eof())
151       return &FakeEOF;
152     return PreviousTokenSource->peekNextToken(N);
153   }
154 
155   bool isEOF() override { return PreviousTokenSource->isEOF(); }
156 
157   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
158 
159   FormatToken *setPosition(unsigned Position) override {
160     PreviousToken = nullptr;
161     Token = PreviousTokenSource->setPosition(Position);
162     return Token;
163   }
164 
165 private:
166   bool eof() {
167     return Token && Token->HasUnescapedNewline &&
168            !continuesLineComment(*Token, PreviousToken,
169                                  /*MinColumnToken=*/PreviousToken);
170   }
171 
172   FormatToken FakeEOF;
173   UnwrappedLine &Line;
174   FormatTokenSource *&TokenSource;
175   FormatToken *&ResetToken;
176   unsigned PreviousLineLevel;
177   FormatTokenSource *PreviousTokenSource;
178 
179   FormatToken *Token;
180   FormatToken *PreviousToken;
181 };
182 
183 } // end anonymous namespace
184 
185 class ScopedLineState {
186 public:
187   ScopedLineState(UnwrappedLineParser &Parser,
188                   bool SwitchToPreprocessorLines = false)
189       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
190     if (SwitchToPreprocessorLines)
191       Parser.CurrentLines = &Parser.PreprocessorDirectives;
192     else if (!Parser.Line->Tokens.empty())
193       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
194     PreBlockLine = std::move(Parser.Line);
195     Parser.Line = std::make_unique<UnwrappedLine>();
196     Parser.Line->Level = PreBlockLine->Level;
197     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
198   }
199 
200   ~ScopedLineState() {
201     if (!Parser.Line->Tokens.empty())
202       Parser.addUnwrappedLine();
203     assert(Parser.Line->Tokens.empty());
204     Parser.Line = std::move(PreBlockLine);
205     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
206       Parser.MustBreakBeforeNextToken = true;
207     Parser.CurrentLines = OriginalLines;
208   }
209 
210 private:
211   UnwrappedLineParser &Parser;
212 
213   std::unique_ptr<UnwrappedLine> PreBlockLine;
214   SmallVectorImpl<UnwrappedLine> *OriginalLines;
215 };
216 
217 class CompoundStatementIndenter {
218 public:
219   CompoundStatementIndenter(UnwrappedLineParser *Parser,
220                             const FormatStyle &Style, unsigned &LineLevel)
221       : CompoundStatementIndenter(Parser, LineLevel,
222                                   Style.BraceWrapping.AfterControlStatement,
223                                   Style.BraceWrapping.IndentBraces) {}
224   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
225                             bool WrapBrace, bool IndentBrace)
226       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
227     if (WrapBrace)
228       Parser->addUnwrappedLine();
229     if (IndentBrace)
230       ++LineLevel;
231   }
232   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
233 
234 private:
235   unsigned &LineLevel;
236   unsigned OldLineLevel;
237 };
238 
239 namespace {
240 
241 class IndexedTokenSource : public FormatTokenSource {
242 public:
243   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
244       : Tokens(Tokens), Position(-1) {}
245 
246   FormatToken *getNextToken() override {
247     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
248       LLVM_DEBUG({
249         llvm::dbgs() << "Next ";
250         dbgToken(Position);
251       });
252       return Tokens[Position];
253     }
254     ++Position;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Next ";
257       dbgToken(Position);
258     });
259     return Tokens[Position];
260   }
261 
262   FormatToken *getPreviousToken() override {
263     return Position > 0 ? Tokens[Position - 1] : nullptr;
264   }
265 
266   FormatToken *peekNextToken() override {
267     int Next = Position + 1;
268     LLVM_DEBUG({
269       llvm::dbgs() << "Peeking ";
270       dbgToken(Next);
271     });
272     return Tokens[Next];
273   }
274 
275   FormatToken *peekNextToken(int N) override {
276     assert(N > 0);
277     int Next = Position + N;
278     LLVM_DEBUG({
279       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
280       dbgToken(Next);
281     });
282     return Tokens[Next];
283   }
284 
285   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
286 
287   unsigned getPosition() override {
288     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
289     assert(Position >= 0);
290     return Position;
291   }
292 
293   FormatToken *setPosition(unsigned P) override {
294     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
295     Position = P;
296     return Tokens[Position];
297   }
298 
299   void reset() { Position = -1; }
300 
301 private:
302   void dbgToken(int Position, llvm::StringRef Indent = "") {
303     FormatToken *Tok = Tokens[Position];
304     llvm::dbgs() << Indent << "[" << Position
305                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
306                  << ", Macro: " << !!Tok->MacroCtx << "\n";
307   }
308 
309   ArrayRef<FormatToken *> Tokens;
310   int Position;
311 };
312 
313 } // end anonymous namespace
314 
315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
316                                          const AdditionalKeywords &Keywords,
317                                          unsigned FirstStartColumn,
318                                          ArrayRef<FormatToken *> Tokens,
319                                          UnwrappedLineConsumer &Callback)
320     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
321       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
322       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
323       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
324       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
325                        ? IG_Rejected
326                        : IG_Inited),
327       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
328 
329 void UnwrappedLineParser::reset() {
330   PPBranchLevel = -1;
331   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
332                      ? IG_Rejected
333                      : IG_Inited;
334   IncludeGuardToken = nullptr;
335   Line.reset(new UnwrappedLine);
336   CommentsBeforeNextToken.clear();
337   FormatTok = nullptr;
338   MustBreakBeforeNextToken = false;
339   PreprocessorDirectives.clear();
340   CurrentLines = &Lines;
341   DeclarationScopeStack.clear();
342   NestedTooDeep.clear();
343   PPStack.clear();
344   Line->FirstStartColumn = FirstStartColumn;
345 }
346 
347 void UnwrappedLineParser::parse() {
348   IndexedTokenSource TokenSource(AllTokens);
349   Line->FirstStartColumn = FirstStartColumn;
350   do {
351     LLVM_DEBUG(llvm::dbgs() << "----\n");
352     reset();
353     Tokens = &TokenSource;
354     TokenSource.reset();
355 
356     readToken();
357     parseFile();
358 
359     // If we found an include guard then all preprocessor directives (other than
360     // the guard) are over-indented by one.
361     if (IncludeGuard == IG_Found) {
362       for (auto &Line : Lines)
363         if (Line.InPPDirective && Line.Level > 0)
364           --Line.Level;
365     }
366 
367     // Create line with eof token.
368     pushToken(FormatTok);
369     addUnwrappedLine();
370 
371     for (const UnwrappedLine &Line : Lines)
372       Callback.consumeUnwrappedLine(Line);
373 
374     Callback.finishRun();
375     Lines.clear();
376     while (!PPLevelBranchIndex.empty() &&
377            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
378       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
379       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
380     }
381     if (!PPLevelBranchIndex.empty()) {
382       ++PPLevelBranchIndex.back();
383       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
384       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
385     }
386   } while (!PPLevelBranchIndex.empty());
387 }
388 
389 void UnwrappedLineParser::parseFile() {
390   // The top-level context in a file always has declarations, except for pre-
391   // processor directives and JavaScript files.
392   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
393   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
394                                           MustBeDeclaration);
395   if (Style.Language == FormatStyle::LK_TextProto)
396     parseBracedList();
397   else
398     parseLevel();
399   // Make sure to format the remaining tokens.
400   //
401   // LK_TextProto is special since its top-level is parsed as the body of a
402   // braced list, which does not necessarily have natural line separators such
403   // as a semicolon. Comments after the last entry that have been determined to
404   // not belong to that line, as in:
405   //   key: value
406   //   // endfile comment
407   // do not have a chance to be put on a line of their own until this point.
408   // Here we add this newline before end-of-file comments.
409   if (Style.Language == FormatStyle::LK_TextProto &&
410       !CommentsBeforeNextToken.empty()) {
411     addUnwrappedLine();
412   }
413   flushComments(true);
414   addUnwrappedLine();
415 }
416 
417 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
418   do {
419     switch (FormatTok->Tok.getKind()) {
420     case tok::l_brace:
421       return;
422     default:
423       if (FormatTok->is(Keywords.kw_where)) {
424         addUnwrappedLine();
425         nextToken();
426         parseCSharpGenericTypeConstraint();
427         break;
428       }
429       nextToken();
430       break;
431     }
432   } while (!eof());
433 }
434 
435 void UnwrappedLineParser::parseCSharpAttribute() {
436   int UnpairedSquareBrackets = 1;
437   do {
438     switch (FormatTok->Tok.getKind()) {
439     case tok::r_square:
440       nextToken();
441       --UnpairedSquareBrackets;
442       if (UnpairedSquareBrackets == 0) {
443         addUnwrappedLine();
444         return;
445       }
446       break;
447     case tok::l_square:
448       ++UnpairedSquareBrackets;
449       nextToken();
450       break;
451     default:
452       nextToken();
453       break;
454     }
455   } while (!eof());
456 }
457 
458 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
459   if (!Lines.empty() && Lines.back().InPPDirective)
460     return true;
461 
462   const FormatToken *Previous = Tokens->getPreviousToken();
463   return Previous && Previous->is(tok::comment) &&
464          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
465 }
466 
467 /// \brief Parses a level, that is ???.
468 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
469 /// \param CanContainBracedList If the content can contain (at any level) a
470 /// braced list.
471 /// \param NextLBracesType The type for left brace found in this level.
472 /// \param IfKind The \p if statement kind in the level.
473 /// \param IfLeftBrace The left brace of the \p if block in the level.
474 /// \returns true if a simple block of if/else/for/while, or false otherwise.
475 /// (A simple block has a single statement.)
476 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
477                                      bool CanContainBracedList,
478                                      TokenType NextLBracesType,
479                                      IfStmtKind *IfKind,
480                                      FormatToken **IfLeftBrace) {
481   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
482                                   ? TT_BracedListLBrace
483                                   : TT_Unknown;
484   const bool IsPrecededByCommentOrPPDirective =
485       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
486   FormatToken *IfLBrace = nullptr;
487   bool HasDoWhile = false;
488   bool HasLabel = false;
489   unsigned StatementCount = 0;
490   bool SwitchLabelEncountered = false;
491 
492   do {
493     if (FormatTok->getType() == TT_AttributeMacro) {
494       nextToken();
495       continue;
496     }
497     tok::TokenKind kind = FormatTok->Tok.getKind();
498     if (FormatTok->getType() == TT_MacroBlockBegin)
499       kind = tok::l_brace;
500     else if (FormatTok->getType() == TT_MacroBlockEnd)
501       kind = tok::r_brace;
502 
503     auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
504                          &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
505       parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
506                              &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
507                              HasLabel ? nullptr : &HasLabel);
508       ++StatementCount;
509       assert(StatementCount > 0 && "StatementCount overflow!");
510     };
511 
512     switch (kind) {
513     case tok::comment:
514       nextToken();
515       addUnwrappedLine();
516       break;
517     case tok::l_brace:
518       if (NextLBracesType != TT_Unknown) {
519         FormatTok->setFinalizedType(NextLBracesType);
520       } else if (FormatTok->Previous &&
521                  FormatTok->Previous->ClosesRequiresClause) {
522         // We need the 'default' case here to correctly parse a function
523         // l_brace.
524         ParseDefault();
525         continue;
526       }
527       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
528           tryToParseBracedList()) {
529         continue;
530       }
531       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
532                  /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
533                  /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
534                  NextLBracesType);
535       ++StatementCount;
536       assert(StatementCount > 0 && "StatementCount overflow!");
537       addUnwrappedLine();
538       break;
539     case tok::r_brace:
540       if (OpeningBrace) {
541         if (!Style.RemoveBracesLLVM ||
542             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
543           return false;
544         }
545         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
546             HasDoWhile || IsPrecededByCommentOrPPDirective ||
547             precededByCommentOrPPDirective()) {
548           return false;
549         }
550         const FormatToken *Next = Tokens->peekNextToken();
551         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
552           return false;
553         if (IfLeftBrace)
554           *IfLeftBrace = IfLBrace;
555         return true;
556       }
557       nextToken();
558       addUnwrappedLine();
559       break;
560     case tok::kw_default: {
561       unsigned StoredPosition = Tokens->getPosition();
562       FormatToken *Next;
563       do {
564         Next = Tokens->getNextToken();
565         assert(Next);
566       } while (Next->is(tok::comment));
567       FormatTok = Tokens->setPosition(StoredPosition);
568       if (Next->isNot(tok::colon)) {
569         // default not followed by ':' is not a case label; treat it like
570         // an identifier.
571         parseStructuralElement();
572         break;
573       }
574       // Else, if it is 'default:', fall through to the case handling.
575       LLVM_FALLTHROUGH;
576     }
577     case tok::kw_case:
578       if (Style.isJavaScript() && Line->MustBeDeclaration) {
579         // A 'case: string' style field declaration.
580         parseStructuralElement();
581         break;
582       }
583       if (!SwitchLabelEncountered &&
584           (Style.IndentCaseLabels ||
585            (Line->InPPDirective && Line->Level == 1))) {
586         ++Line->Level;
587       }
588       SwitchLabelEncountered = true;
589       parseStructuralElement();
590       break;
591     case tok::l_square:
592       if (Style.isCSharp()) {
593         nextToken();
594         parseCSharpAttribute();
595         break;
596       }
597       if (handleCppAttributes())
598         break;
599       LLVM_FALLTHROUGH;
600     default:
601       ParseDefault();
602       break;
603     }
604   } while (!eof());
605 
606   return false;
607 }
608 
609 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
610   // We'll parse forward through the tokens until we hit
611   // a closing brace or eof - note that getNextToken() will
612   // parse macros, so this will magically work inside macro
613   // definitions, too.
614   unsigned StoredPosition = Tokens->getPosition();
615   FormatToken *Tok = FormatTok;
616   const FormatToken *PrevTok = Tok->Previous;
617   // Keep a stack of positions of lbrace tokens. We will
618   // update information about whether an lbrace starts a
619   // braced init list or a different block during the loop.
620   SmallVector<FormatToken *, 8> LBraceStack;
621   assert(Tok->is(tok::l_brace));
622   do {
623     // Get next non-comment token.
624     FormatToken *NextTok;
625     do {
626       NextTok = Tokens->getNextToken();
627     } while (NextTok->is(tok::comment));
628 
629     switch (Tok->Tok.getKind()) {
630     case tok::l_brace:
631       if (Style.isJavaScript() && PrevTok) {
632         if (PrevTok->isOneOf(tok::colon, tok::less)) {
633           // A ':' indicates this code is in a type, or a braced list
634           // following a label in an object literal ({a: {b: 1}}).
635           // A '<' could be an object used in a comparison, but that is nonsense
636           // code (can never return true), so more likely it is a generic type
637           // argument (`X<{a: string; b: number}>`).
638           // The code below could be confused by semicolons between the
639           // individual members in a type member list, which would normally
640           // trigger BK_Block. In both cases, this must be parsed as an inline
641           // braced init.
642           Tok->setBlockKind(BK_BracedInit);
643         } else if (PrevTok->is(tok::r_paren)) {
644           // `) { }` can only occur in function or method declarations in JS.
645           Tok->setBlockKind(BK_Block);
646         }
647       } else {
648         Tok->setBlockKind(BK_Unknown);
649       }
650       LBraceStack.push_back(Tok);
651       break;
652     case tok::r_brace:
653       if (LBraceStack.empty())
654         break;
655       if (LBraceStack.back()->is(BK_Unknown)) {
656         bool ProbablyBracedList = false;
657         if (Style.Language == FormatStyle::LK_Proto) {
658           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
659         } else {
660           // Skip NextTok over preprocessor lines, otherwise we may not
661           // properly diagnose the block as a braced intializer
662           // if the comma separator appears after the pp directive.
663           while (NextTok->is(tok::hash)) {
664             ScopedMacroState MacroState(*Line, Tokens, NextTok);
665             do {
666               NextTok = Tokens->getNextToken();
667             } while (NextTok->isNot(tok::eof));
668           }
669 
670           // Using OriginalColumn to distinguish between ObjC methods and
671           // binary operators is a bit hacky.
672           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
673                                   NextTok->OriginalColumn == 0;
674 
675           // Try to detect a braced list. Note that regardless how we mark inner
676           // braces here, we will overwrite the BlockKind later if we parse a
677           // braced list (where all blocks inside are by default braced lists),
678           // or when we explicitly detect blocks (for example while parsing
679           // lambdas).
680 
681           // If we already marked the opening brace as braced list, the closing
682           // must also be part of it.
683           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
684 
685           ProbablyBracedList = ProbablyBracedList ||
686                                (Style.isJavaScript() &&
687                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
688                                                  Keywords.kw_as));
689           ProbablyBracedList = ProbablyBracedList ||
690                                (Style.isCpp() && NextTok->is(tok::l_paren));
691 
692           // If there is a comma, semicolon or right paren after the closing
693           // brace, we assume this is a braced initializer list.
694           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
695           // braced list in JS.
696           ProbablyBracedList =
697               ProbablyBracedList ||
698               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
699                                tok::r_paren, tok::r_square, tok::l_brace,
700                                tok::ellipsis);
701 
702           ProbablyBracedList =
703               ProbablyBracedList ||
704               (NextTok->is(tok::identifier) &&
705                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
706 
707           ProbablyBracedList = ProbablyBracedList ||
708                                (NextTok->is(tok::semi) &&
709                                 (!ExpectClassBody || LBraceStack.size() != 1));
710 
711           ProbablyBracedList =
712               ProbablyBracedList ||
713               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
714 
715           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
716             // We can have an array subscript after a braced init
717             // list, but C++11 attributes are expected after blocks.
718             NextTok = Tokens->getNextToken();
719             ProbablyBracedList = NextTok->isNot(tok::l_square);
720           }
721         }
722         if (ProbablyBracedList) {
723           Tok->setBlockKind(BK_BracedInit);
724           LBraceStack.back()->setBlockKind(BK_BracedInit);
725         } else {
726           Tok->setBlockKind(BK_Block);
727           LBraceStack.back()->setBlockKind(BK_Block);
728         }
729       }
730       LBraceStack.pop_back();
731       break;
732     case tok::identifier:
733       if (!Tok->is(TT_StatementMacro))
734         break;
735       LLVM_FALLTHROUGH;
736     case tok::at:
737     case tok::semi:
738     case tok::kw_if:
739     case tok::kw_while:
740     case tok::kw_for:
741     case tok::kw_switch:
742     case tok::kw_try:
743     case tok::kw___try:
744       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
745         LBraceStack.back()->setBlockKind(BK_Block);
746       break;
747     default:
748       break;
749     }
750     PrevTok = Tok;
751     Tok = NextTok;
752   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
753 
754   // Assume other blocks for all unclosed opening braces.
755   for (FormatToken *LBrace : LBraceStack)
756     if (LBrace->is(BK_Unknown))
757       LBrace->setBlockKind(BK_Block);
758 
759   FormatTok = Tokens->setPosition(StoredPosition);
760 }
761 
762 template <class T>
763 static inline void hash_combine(std::size_t &seed, const T &v) {
764   std::hash<T> hasher;
765   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
766 }
767 
768 size_t UnwrappedLineParser::computePPHash() const {
769   size_t h = 0;
770   for (const auto &i : PPStack) {
771     hash_combine(h, size_t(i.Kind));
772     hash_combine(h, i.Line);
773   }
774   return h;
775 }
776 
777 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
778 // is not null, subtracts its length (plus the preceding space) when computing
779 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
780 // running the token annotator on it so that we can restore them afterward.
781 bool UnwrappedLineParser::mightFitOnOneLine(
782     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
783   const auto ColumnLimit = Style.ColumnLimit;
784   if (ColumnLimit == 0)
785     return true;
786 
787   auto &Tokens = ParsedLine.Tokens;
788   assert(!Tokens.empty());
789 
790   const auto *LastToken = Tokens.back().Tok;
791   assert(LastToken);
792 
793   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
794 
795   int Index = 0;
796   for (const auto &Token : Tokens) {
797     assert(Token.Tok);
798     auto &SavedToken = SavedTokens[Index++];
799     SavedToken.Tok = new FormatToken;
800     SavedToken.Tok->copyFrom(*Token.Tok);
801     SavedToken.Children = std::move(Token.Children);
802   }
803 
804   AnnotatedLine Line(ParsedLine);
805   assert(Line.Last == LastToken);
806 
807   TokenAnnotator Annotator(Style, Keywords);
808   Annotator.annotate(Line);
809   Annotator.calculateFormattingInformation(Line);
810 
811   auto Length = LastToken->TotalLength;
812   if (OpeningBrace) {
813     assert(OpeningBrace != Tokens.front().Tok);
814     Length -= OpeningBrace->TokenText.size() + 1;
815   }
816 
817   Index = 0;
818   for (auto &Token : Tokens) {
819     const auto &SavedToken = SavedTokens[Index++];
820     Token.Tok->copyFrom(*SavedToken.Tok);
821     Token.Children = std::move(SavedToken.Children);
822     delete SavedToken.Tok;
823   }
824 
825   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
826 }
827 
828 FormatToken *UnwrappedLineParser::parseBlock(
829     bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
830     IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
831     bool CanContainBracedList, TokenType NextLBracesType) {
832   auto HandleVerilogBlockLabel = [this]() {
833     // ":" name
834     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
835       nextToken();
836       if (Keywords.isVerilogIdentifier(*FormatTok))
837         nextToken();
838     }
839   };
840 
841   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
842           (Style.isVerilog() && Keywords.isVerilogBegin(*FormatTok))) &&
843          "'{' or macro block token expected");
844   FormatToken *Tok = FormatTok;
845   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
846   auto Index = CurrentLines->size();
847   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
848   FormatTok->setBlockKind(BK_Block);
849 
850   // For Whitesmiths mode, jump to the next level prior to skipping over the
851   // braces.
852   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
853     ++Line->Level;
854 
855   size_t PPStartHash = computePPHash();
856 
857   const unsigned InitialLevel = Line->Level;
858   nextToken(/*LevelDifference=*/AddLevels);
859   HandleVerilogBlockLabel();
860 
861   // Bail out if there are too many levels. Otherwise, the stack might overflow.
862   if (Line->Level > 300)
863     return nullptr;
864 
865   if (MacroBlock && FormatTok->is(tok::l_paren))
866     parseParens();
867 
868   size_t NbPreprocessorDirectives =
869       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
870   addUnwrappedLine();
871   size_t OpeningLineIndex =
872       CurrentLines->empty()
873           ? (UnwrappedLine::kInvalidIndex)
874           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
875 
876   // Whitesmiths is weird here. The brace needs to be indented for the namespace
877   // block, but the block itself may not be indented depending on the style
878   // settings. This allows the format to back up one level in those cases.
879   if (UnindentWhitesmithsBraces)
880     --Line->Level;
881 
882   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
883                                           MustBeDeclaration);
884   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
885     Line->Level += AddLevels;
886 
887   FormatToken *IfLBrace = nullptr;
888   const bool SimpleBlock =
889       parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
890 
891   if (eof())
892     return IfLBrace;
893 
894   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
895                  : !FormatTok->is(tok::r_brace)) {
896     Line->Level = InitialLevel;
897     FormatTok->setBlockKind(BK_Block);
898     return IfLBrace;
899   }
900 
901   auto RemoveBraces = [=]() mutable {
902     if (!SimpleBlock)
903       return false;
904     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
905     assert(FormatTok->is(tok::r_brace));
906     const bool WrappedOpeningBrace = !Tok->Previous;
907     if (WrappedOpeningBrace && FollowedByComment)
908       return false;
909     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
910     if (KeepBraces && !HasRequiredIfBraces)
911       return false;
912     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
913       const FormatToken *Previous = Tokens->getPreviousToken();
914       assert(Previous);
915       if (Previous->is(tok::r_brace) && !Previous->Optional)
916         return false;
917     }
918     assert(!CurrentLines->empty());
919     auto &LastLine = CurrentLines->back();
920     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
921       return false;
922     if (Tok->is(TT_ElseLBrace))
923       return true;
924     if (WrappedOpeningBrace) {
925       assert(Index > 0);
926       --Index; // The line above the wrapped l_brace.
927       Tok = nullptr;
928     }
929     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
930   };
931   if (RemoveBraces()) {
932     Tok->MatchingParen = FormatTok;
933     FormatTok->MatchingParen = Tok;
934   }
935 
936   size_t PPEndHash = computePPHash();
937 
938   // Munch the closing brace.
939   nextToken(/*LevelDifference=*/-AddLevels);
940   HandleVerilogBlockLabel();
941 
942   if (MacroBlock && FormatTok->is(tok::l_paren))
943     parseParens();
944 
945   if (FormatTok->is(tok::kw_noexcept)) {
946     // A noexcept in a requires expression.
947     nextToken();
948   }
949 
950   if (FormatTok->is(tok::arrow)) {
951     // Following the } or noexcept we can find a trailing return type arrow
952     // as part of an implicit conversion constraint.
953     nextToken();
954     parseStructuralElement();
955   }
956 
957   if (MunchSemi && FormatTok->is(tok::semi))
958     nextToken();
959 
960   Line->Level = InitialLevel;
961 
962   if (PPStartHash == PPEndHash) {
963     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
964     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
965       // Update the opening line to add the forward reference as well
966       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
967           CurrentLines->size() - 1;
968     }
969   }
970 
971   return IfLBrace;
972 }
973 
974 static bool isGoogScope(const UnwrappedLine &Line) {
975   // FIXME: Closure-library specific stuff should not be hard-coded but be
976   // configurable.
977   if (Line.Tokens.size() < 4)
978     return false;
979   auto I = Line.Tokens.begin();
980   if (I->Tok->TokenText != "goog")
981     return false;
982   ++I;
983   if (I->Tok->isNot(tok::period))
984     return false;
985   ++I;
986   if (I->Tok->TokenText != "scope")
987     return false;
988   ++I;
989   return I->Tok->is(tok::l_paren);
990 }
991 
992 static bool isIIFE(const UnwrappedLine &Line,
993                    const AdditionalKeywords &Keywords) {
994   // Look for the start of an immediately invoked anonymous function.
995   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
996   // This is commonly done in JavaScript to create a new, anonymous scope.
997   // Example: (function() { ... })()
998   if (Line.Tokens.size() < 3)
999     return false;
1000   auto I = Line.Tokens.begin();
1001   if (I->Tok->isNot(tok::l_paren))
1002     return false;
1003   ++I;
1004   if (I->Tok->isNot(Keywords.kw_function))
1005     return false;
1006   ++I;
1007   return I->Tok->is(tok::l_paren);
1008 }
1009 
1010 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
1011                                    const FormatToken &InitialToken) {
1012   tok::TokenKind Kind = InitialToken.Tok.getKind();
1013   if (InitialToken.is(TT_NamespaceMacro))
1014     Kind = tok::kw_namespace;
1015 
1016   switch (Kind) {
1017   case tok::kw_namespace:
1018     return Style.BraceWrapping.AfterNamespace;
1019   case tok::kw_class:
1020     return Style.BraceWrapping.AfterClass;
1021   case tok::kw_union:
1022     return Style.BraceWrapping.AfterUnion;
1023   case tok::kw_struct:
1024     return Style.BraceWrapping.AfterStruct;
1025   case tok::kw_enum:
1026     return Style.BraceWrapping.AfterEnum;
1027   default:
1028     return false;
1029   }
1030 }
1031 
1032 void UnwrappedLineParser::parseChildBlock(
1033     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
1034   assert(FormatTok->is(tok::l_brace));
1035   FormatTok->setBlockKind(BK_Block);
1036   const FormatToken *OpeningBrace = FormatTok;
1037   nextToken();
1038   {
1039     bool SkipIndent = (Style.isJavaScript() &&
1040                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
1041     ScopedLineState LineState(*this);
1042     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
1043                                             /*MustBeDeclaration=*/false);
1044     Line->Level += SkipIndent ? 0 : 1;
1045     parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
1046     flushComments(isOnNewLine(*FormatTok));
1047     Line->Level -= SkipIndent ? 0 : 1;
1048   }
1049   nextToken();
1050 }
1051 
1052 void UnwrappedLineParser::parsePPDirective() {
1053   assert(FormatTok->is(tok::hash) && "'#' expected");
1054   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1055 
1056   nextToken();
1057 
1058   if (!FormatTok->Tok.getIdentifierInfo()) {
1059     parsePPUnknown();
1060     return;
1061   }
1062 
1063   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1064   case tok::pp_define:
1065     parsePPDefine();
1066     return;
1067   case tok::pp_if:
1068     parsePPIf(/*IfDef=*/false);
1069     break;
1070   case tok::pp_ifdef:
1071   case tok::pp_ifndef:
1072     parsePPIf(/*IfDef=*/true);
1073     break;
1074   case tok::pp_else:
1075     parsePPElse();
1076     break;
1077   case tok::pp_elifdef:
1078   case tok::pp_elifndef:
1079   case tok::pp_elif:
1080     parsePPElIf();
1081     break;
1082   case tok::pp_endif:
1083     parsePPEndIf();
1084     break;
1085   default:
1086     parsePPUnknown();
1087     break;
1088   }
1089 }
1090 
1091 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1092   size_t Line = CurrentLines->size();
1093   if (CurrentLines == &PreprocessorDirectives)
1094     Line += Lines.size();
1095 
1096   if (Unreachable ||
1097       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1098     PPStack.push_back({PP_Unreachable, Line});
1099   } else {
1100     PPStack.push_back({PP_Conditional, Line});
1101   }
1102 }
1103 
1104 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1105   ++PPBranchLevel;
1106   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1107   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1108     PPLevelBranchIndex.push_back(0);
1109     PPLevelBranchCount.push_back(0);
1110   }
1111   PPChainBranchIndex.push(0);
1112   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1113   conditionalCompilationCondition(Unreachable || Skip);
1114 }
1115 
1116 void UnwrappedLineParser::conditionalCompilationAlternative() {
1117   if (!PPStack.empty())
1118     PPStack.pop_back();
1119   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1120   if (!PPChainBranchIndex.empty())
1121     ++PPChainBranchIndex.top();
1122   conditionalCompilationCondition(
1123       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1124       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1125 }
1126 
1127 void UnwrappedLineParser::conditionalCompilationEnd() {
1128   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1129   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1130     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1131       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1132   }
1133   // Guard against #endif's without #if.
1134   if (PPBranchLevel > -1)
1135     --PPBranchLevel;
1136   if (!PPChainBranchIndex.empty())
1137     PPChainBranchIndex.pop();
1138   if (!PPStack.empty())
1139     PPStack.pop_back();
1140 }
1141 
1142 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1143   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1144   nextToken();
1145   bool Unreachable = false;
1146   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1147     Unreachable = true;
1148   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1149     Unreachable = true;
1150   conditionalCompilationStart(Unreachable);
1151   FormatToken *IfCondition = FormatTok;
1152   // If there's a #ifndef on the first line, and the only lines before it are
1153   // comments, it could be an include guard.
1154   bool MaybeIncludeGuard = IfNDef;
1155   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1156     for (auto &Line : Lines) {
1157       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1158         MaybeIncludeGuard = false;
1159         IncludeGuard = IG_Rejected;
1160         break;
1161       }
1162     }
1163   }
1164   --PPBranchLevel;
1165   parsePPUnknown();
1166   ++PPBranchLevel;
1167   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1168     IncludeGuard = IG_IfNdefed;
1169     IncludeGuardToken = IfCondition;
1170   }
1171 }
1172 
1173 void UnwrappedLineParser::parsePPElse() {
1174   // If a potential include guard has an #else, it's not an include guard.
1175   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1176     IncludeGuard = IG_Rejected;
1177   conditionalCompilationAlternative();
1178   if (PPBranchLevel > -1)
1179     --PPBranchLevel;
1180   parsePPUnknown();
1181   ++PPBranchLevel;
1182 }
1183 
1184 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1185 
1186 void UnwrappedLineParser::parsePPEndIf() {
1187   conditionalCompilationEnd();
1188   parsePPUnknown();
1189   // If the #endif of a potential include guard is the last thing in the file,
1190   // then we found an include guard.
1191   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1192       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1193     IncludeGuard = IG_Found;
1194   }
1195 }
1196 
1197 void UnwrappedLineParser::parsePPDefine() {
1198   nextToken();
1199 
1200   if (!FormatTok->Tok.getIdentifierInfo()) {
1201     IncludeGuard = IG_Rejected;
1202     IncludeGuardToken = nullptr;
1203     parsePPUnknown();
1204     return;
1205   }
1206 
1207   if (IncludeGuard == IG_IfNdefed &&
1208       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1209     IncludeGuard = IG_Defined;
1210     IncludeGuardToken = nullptr;
1211     for (auto &Line : Lines) {
1212       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1213         IncludeGuard = IG_Rejected;
1214         break;
1215       }
1216     }
1217   }
1218 
1219   // In the context of a define, even keywords should be treated as normal
1220   // identifiers. Setting the kind to identifier is not enough, because we need
1221   // to treat additional keywords like __except as well, which are already
1222   // identifiers. Setting the identifier info to null interferes with include
1223   // guard processing above, and changes preprocessing nesting.
1224   FormatTok->Tok.setKind(tok::identifier);
1225   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1226   nextToken();
1227   if (FormatTok->Tok.getKind() == tok::l_paren &&
1228       !FormatTok->hasWhitespaceBefore()) {
1229     parseParens();
1230   }
1231   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1232     Line->Level += PPBranchLevel + 1;
1233   addUnwrappedLine();
1234   ++Line->Level;
1235 
1236   // Errors during a preprocessor directive can only affect the layout of the
1237   // preprocessor directive, and thus we ignore them. An alternative approach
1238   // would be to use the same approach we use on the file level (no
1239   // re-indentation if there was a structural error) within the macro
1240   // definition.
1241   parseFile();
1242 }
1243 
1244 void UnwrappedLineParser::parsePPUnknown() {
1245   do {
1246     nextToken();
1247   } while (!eof());
1248   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1249     Line->Level += PPBranchLevel + 1;
1250   addUnwrappedLine();
1251 }
1252 
1253 // Here we exclude certain tokens that are not usually the first token in an
1254 // unwrapped line. This is used in attempt to distinguish macro calls without
1255 // trailing semicolons from other constructs split to several lines.
1256 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1257   // Semicolon can be a null-statement, l_square can be a start of a macro or
1258   // a C++11 attribute, but this doesn't seem to be common.
1259   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1260          Tok.isNot(TT_AttributeSquare) &&
1261          // Tokens that can only be used as binary operators and a part of
1262          // overloaded operator names.
1263          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1264          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1265          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1266          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1267          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1268          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1269          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1270          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1271          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1272          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1273          Tok.isNot(tok::lesslessequal) &&
1274          // Colon is used in labels, base class lists, initializer lists,
1275          // range-based for loops, ternary operator, but should never be the
1276          // first token in an unwrapped line.
1277          Tok.isNot(tok::colon) &&
1278          // 'noexcept' is a trailing annotation.
1279          Tok.isNot(tok::kw_noexcept);
1280 }
1281 
1282 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1283                           const FormatToken *FormatTok) {
1284   // FIXME: This returns true for C/C++ keywords like 'struct'.
1285   return FormatTok->is(tok::identifier) &&
1286          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1287           !FormatTok->isOneOf(
1288               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1289               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1290               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1291               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1292               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1293               Keywords.kw_instanceof, Keywords.kw_interface,
1294               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1295 }
1296 
1297 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1298                                  const FormatToken *FormatTok) {
1299   return FormatTok->Tok.isLiteral() ||
1300          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1301          mustBeJSIdent(Keywords, FormatTok);
1302 }
1303 
1304 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1305 // when encountered after a value (see mustBeJSIdentOrValue).
1306 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1307                            const FormatToken *FormatTok) {
1308   return FormatTok->isOneOf(
1309       tok::kw_return, Keywords.kw_yield,
1310       // conditionals
1311       tok::kw_if, tok::kw_else,
1312       // loops
1313       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1314       // switch/case
1315       tok::kw_switch, tok::kw_case,
1316       // exceptions
1317       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1318       // declaration
1319       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1320       Keywords.kw_async, Keywords.kw_function,
1321       // import/export
1322       Keywords.kw_import, tok::kw_export);
1323 }
1324 
1325 // Checks whether a token is a type in K&R C (aka C78).
1326 static bool isC78Type(const FormatToken &Tok) {
1327   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1328                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1329                      tok::identifier);
1330 }
1331 
1332 // This function checks whether a token starts the first parameter declaration
1333 // in a K&R C (aka C78) function definition, e.g.:
1334 //   int f(a, b)
1335 //   short a, b;
1336 //   {
1337 //      return a + b;
1338 //   }
1339 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1340                                const FormatToken *FuncName) {
1341   assert(Tok);
1342   assert(Next);
1343   assert(FuncName);
1344 
1345   if (FuncName->isNot(tok::identifier))
1346     return false;
1347 
1348   const FormatToken *Prev = FuncName->Previous;
1349   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1350     return false;
1351 
1352   if (!isC78Type(*Tok) &&
1353       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1354     return false;
1355   }
1356 
1357   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1358     return false;
1359 
1360   Tok = Tok->Previous;
1361   if (!Tok || Tok->isNot(tok::r_paren))
1362     return false;
1363 
1364   Tok = Tok->Previous;
1365   if (!Tok || Tok->isNot(tok::identifier))
1366     return false;
1367 
1368   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1369 }
1370 
1371 void UnwrappedLineParser::parseModuleImport() {
1372   nextToken();
1373   while (!eof()) {
1374     if (FormatTok->is(tok::colon)) {
1375       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1376     }
1377     // Handle import <foo/bar.h> as we would an include statement.
1378     else if (FormatTok->is(tok::less)) {
1379       nextToken();
1380       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1381         // Mark tokens up to the trailing line comments as implicit string
1382         // literals.
1383         if (FormatTok->isNot(tok::comment) &&
1384             !FormatTok->TokenText.startswith("//")) {
1385           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1386         }
1387         nextToken();
1388       }
1389     }
1390     if (FormatTok->is(tok::semi)) {
1391       nextToken();
1392       break;
1393     }
1394     nextToken();
1395   }
1396 
1397   addUnwrappedLine();
1398 }
1399 
1400 // readTokenWithJavaScriptASI reads the next token and terminates the current
1401 // line if JavaScript Automatic Semicolon Insertion must
1402 // happen between the current token and the next token.
1403 //
1404 // This method is conservative - it cannot cover all edge cases of JavaScript,
1405 // but only aims to correctly handle certain well known cases. It *must not*
1406 // return true in speculative cases.
1407 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1408   FormatToken *Previous = FormatTok;
1409   readToken();
1410   FormatToken *Next = FormatTok;
1411 
1412   bool IsOnSameLine =
1413       CommentsBeforeNextToken.empty()
1414           ? Next->NewlinesBefore == 0
1415           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1416   if (IsOnSameLine)
1417     return;
1418 
1419   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1420   bool PreviousStartsTemplateExpr =
1421       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1422   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1423     // If the line contains an '@' sign, the previous token might be an
1424     // annotation, which can precede another identifier/value.
1425     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1426       return LineNode.Tok->is(tok::at);
1427     });
1428     if (HasAt)
1429       return;
1430   }
1431   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1432     return addUnwrappedLine();
1433   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1434   bool NextEndsTemplateExpr =
1435       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1436   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1437       (PreviousMustBeValue ||
1438        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1439                          tok::minusminus))) {
1440     return addUnwrappedLine();
1441   }
1442   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1443       isJSDeclOrStmt(Keywords, Next)) {
1444     return addUnwrappedLine();
1445   }
1446 }
1447 
1448 void UnwrappedLineParser::parseStructuralElement(
1449     bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
1450     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1451   if (Style.Language == FormatStyle::LK_TableGen &&
1452       FormatTok->is(tok::pp_include)) {
1453     nextToken();
1454     if (FormatTok->is(tok::string_literal))
1455       nextToken();
1456     addUnwrappedLine();
1457     return;
1458   }
1459   switch (FormatTok->Tok.getKind()) {
1460   case tok::kw_asm:
1461     nextToken();
1462     if (FormatTok->is(tok::l_brace)) {
1463       FormatTok->setFinalizedType(TT_InlineASMBrace);
1464       nextToken();
1465       while (FormatTok && FormatTok->isNot(tok::eof)) {
1466         if (FormatTok->is(tok::r_brace)) {
1467           FormatTok->setFinalizedType(TT_InlineASMBrace);
1468           nextToken();
1469           addUnwrappedLine();
1470           break;
1471         }
1472         FormatTok->Finalized = true;
1473         nextToken();
1474       }
1475     }
1476     break;
1477   case tok::kw_namespace:
1478     parseNamespace();
1479     return;
1480   case tok::kw_public:
1481   case tok::kw_protected:
1482   case tok::kw_private:
1483     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1484         Style.isCSharp()) {
1485       nextToken();
1486     } else {
1487       parseAccessSpecifier();
1488     }
1489     return;
1490   case tok::kw_if: {
1491     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1492       // field/method declaration.
1493       break;
1494     }
1495     FormatToken *Tok = parseIfThenElse(IfKind);
1496     if (IfLeftBrace)
1497       *IfLeftBrace = Tok;
1498     return;
1499   }
1500   case tok::kw_for:
1501   case tok::kw_while:
1502     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1503       // field/method declaration.
1504       break;
1505     }
1506     parseForOrWhileLoop();
1507     return;
1508   case tok::kw_do:
1509     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1510       // field/method declaration.
1511       break;
1512     }
1513     parseDoWhile();
1514     if (HasDoWhile)
1515       *HasDoWhile = true;
1516     return;
1517   case tok::kw_switch:
1518     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1519       // 'switch: string' field declaration.
1520       break;
1521     }
1522     parseSwitch();
1523     return;
1524   case tok::kw_default:
1525     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1526       // 'default: string' field declaration.
1527       break;
1528     }
1529     nextToken();
1530     if (FormatTok->is(tok::colon)) {
1531       parseLabel();
1532       return;
1533     }
1534     // e.g. "default void f() {}" in a Java interface.
1535     break;
1536   case tok::kw_case:
1537     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1538       // 'case: string' field declaration.
1539       nextToken();
1540       break;
1541     }
1542     parseCaseLabel();
1543     return;
1544   case tok::kw_try:
1545   case tok::kw___try:
1546     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1547       // field/method declaration.
1548       break;
1549     }
1550     parseTryCatch();
1551     return;
1552   case tok::kw_extern:
1553     nextToken();
1554     if (FormatTok->is(tok::string_literal)) {
1555       nextToken();
1556       if (FormatTok->is(tok::l_brace)) {
1557         if (Style.BraceWrapping.AfterExternBlock)
1558           addUnwrappedLine();
1559         // Either we indent or for backwards compatibility we follow the
1560         // AfterExternBlock style.
1561         unsigned AddLevels =
1562             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1563                     (Style.BraceWrapping.AfterExternBlock &&
1564                      Style.IndentExternBlock ==
1565                          FormatStyle::IEBS_AfterExternBlock)
1566                 ? 1u
1567                 : 0u;
1568         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1569         addUnwrappedLine();
1570         return;
1571       }
1572     }
1573     break;
1574   case tok::kw_export:
1575     if (Style.isJavaScript()) {
1576       parseJavaScriptEs6ImportExport();
1577       return;
1578     }
1579     if (!Style.isCpp())
1580       break;
1581     // Handle C++ "(inline|export) namespace".
1582     LLVM_FALLTHROUGH;
1583   case tok::kw_inline:
1584     nextToken();
1585     if (FormatTok->is(tok::kw_namespace)) {
1586       parseNamespace();
1587       return;
1588     }
1589     break;
1590   case tok::identifier:
1591     if (FormatTok->is(TT_ForEachMacro)) {
1592       parseForOrWhileLoop();
1593       return;
1594     }
1595     if (FormatTok->is(TT_MacroBlockBegin)) {
1596       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1597                  /*MunchSemi=*/false);
1598       return;
1599     }
1600     if (FormatTok->is(Keywords.kw_import)) {
1601       if (Style.isJavaScript()) {
1602         parseJavaScriptEs6ImportExport();
1603         return;
1604       }
1605       if (Style.Language == FormatStyle::LK_Proto) {
1606         nextToken();
1607         if (FormatTok->is(tok::kw_public))
1608           nextToken();
1609         if (!FormatTok->is(tok::string_literal))
1610           return;
1611         nextToken();
1612         if (FormatTok->is(tok::semi))
1613           nextToken();
1614         addUnwrappedLine();
1615         return;
1616       }
1617       if (Style.isCpp()) {
1618         parseModuleImport();
1619         return;
1620       }
1621     }
1622     if (Style.isCpp() &&
1623         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1624                            Keywords.kw_slots, Keywords.kw_qslots)) {
1625       nextToken();
1626       if (FormatTok->is(tok::colon)) {
1627         nextToken();
1628         addUnwrappedLine();
1629         return;
1630       }
1631     }
1632     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1633       parseStatementMacro();
1634       return;
1635     }
1636     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1637       parseNamespace();
1638       return;
1639     }
1640     // In all other cases, parse the declaration.
1641     break;
1642   default:
1643     break;
1644   }
1645   do {
1646     const FormatToken *Previous = FormatTok->Previous;
1647     switch (FormatTok->Tok.getKind()) {
1648     case tok::at:
1649       nextToken();
1650       if (FormatTok->is(tok::l_brace)) {
1651         nextToken();
1652         parseBracedList();
1653         break;
1654       } else if (Style.Language == FormatStyle::LK_Java &&
1655                  FormatTok->is(Keywords.kw_interface)) {
1656         nextToken();
1657         break;
1658       }
1659       switch (FormatTok->Tok.getObjCKeywordID()) {
1660       case tok::objc_public:
1661       case tok::objc_protected:
1662       case tok::objc_package:
1663       case tok::objc_private:
1664         return parseAccessSpecifier();
1665       case tok::objc_interface:
1666       case tok::objc_implementation:
1667         return parseObjCInterfaceOrImplementation();
1668       case tok::objc_protocol:
1669         if (parseObjCProtocol())
1670           return;
1671         break;
1672       case tok::objc_end:
1673         return; // Handled by the caller.
1674       case tok::objc_optional:
1675       case tok::objc_required:
1676         nextToken();
1677         addUnwrappedLine();
1678         return;
1679       case tok::objc_autoreleasepool:
1680         nextToken();
1681         if (FormatTok->is(tok::l_brace)) {
1682           if (Style.BraceWrapping.AfterControlStatement ==
1683               FormatStyle::BWACS_Always) {
1684             addUnwrappedLine();
1685           }
1686           parseBlock();
1687         }
1688         addUnwrappedLine();
1689         return;
1690       case tok::objc_synchronized:
1691         nextToken();
1692         if (FormatTok->is(tok::l_paren)) {
1693           // Skip synchronization object
1694           parseParens();
1695         }
1696         if (FormatTok->is(tok::l_brace)) {
1697           if (Style.BraceWrapping.AfterControlStatement ==
1698               FormatStyle::BWACS_Always) {
1699             addUnwrappedLine();
1700           }
1701           parseBlock();
1702         }
1703         addUnwrappedLine();
1704         return;
1705       case tok::objc_try:
1706         // This branch isn't strictly necessary (the kw_try case below would
1707         // do this too after the tok::at is parsed above).  But be explicit.
1708         parseTryCatch();
1709         return;
1710       default:
1711         break;
1712       }
1713       break;
1714     case tok::kw_concept:
1715       parseConcept();
1716       return;
1717     case tok::kw_requires: {
1718       if (Style.isCpp()) {
1719         bool ParsedClause = parseRequires();
1720         if (ParsedClause)
1721           return;
1722       } else {
1723         nextToken();
1724       }
1725       break;
1726     }
1727     case tok::kw_enum:
1728       // Ignore if this is part of "template <enum ...".
1729       if (Previous && Previous->is(tok::less)) {
1730         nextToken();
1731         break;
1732       }
1733 
1734       // parseEnum falls through and does not yet add an unwrapped line as an
1735       // enum definition can start a structural element.
1736       if (!parseEnum())
1737         break;
1738       // This only applies for C++.
1739       if (!Style.isCpp()) {
1740         addUnwrappedLine();
1741         return;
1742       }
1743       break;
1744     case tok::kw_typedef:
1745       nextToken();
1746       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1747                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1748                              Keywords.kw_CF_CLOSED_ENUM,
1749                              Keywords.kw_NS_CLOSED_ENUM)) {
1750         parseEnum();
1751       }
1752       break;
1753     case tok::kw_struct:
1754     case tok::kw_union:
1755     case tok::kw_class:
1756       if (parseStructLike())
1757         return;
1758       break;
1759     case tok::period:
1760       nextToken();
1761       // In Java, classes have an implicit static member "class".
1762       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1763           FormatTok->is(tok::kw_class)) {
1764         nextToken();
1765       }
1766       if (Style.isJavaScript() && FormatTok &&
1767           FormatTok->Tok.getIdentifierInfo()) {
1768         // JavaScript only has pseudo keywords, all keywords are allowed to
1769         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1770         nextToken();
1771       }
1772       break;
1773     case tok::semi:
1774       nextToken();
1775       addUnwrappedLine();
1776       return;
1777     case tok::r_brace:
1778       addUnwrappedLine();
1779       return;
1780     case tok::l_paren: {
1781       parseParens();
1782       // Break the unwrapped line if a K&R C function definition has a parameter
1783       // declaration.
1784       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1785         break;
1786       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1787         addUnwrappedLine();
1788         return;
1789       }
1790       break;
1791     }
1792     case tok::kw_operator:
1793       nextToken();
1794       if (FormatTok->isBinaryOperator())
1795         nextToken();
1796       break;
1797     case tok::caret:
1798       nextToken();
1799       if (FormatTok->Tok.isAnyIdentifier() ||
1800           FormatTok->isSimpleTypeSpecifier()) {
1801         nextToken();
1802       }
1803       if (FormatTok->is(tok::l_paren))
1804         parseParens();
1805       if (FormatTok->is(tok::l_brace))
1806         parseChildBlock();
1807       break;
1808     case tok::l_brace:
1809       if (NextLBracesType != TT_Unknown)
1810         FormatTok->setFinalizedType(NextLBracesType);
1811       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1812         // A block outside of parentheses must be the last part of a
1813         // structural element.
1814         // FIXME: Figure out cases where this is not true, and add projections
1815         // for them (the one we know is missing are lambdas).
1816         if (Style.Language == FormatStyle::LK_Java &&
1817             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1818           // If necessary, we could set the type to something different than
1819           // TT_FunctionLBrace.
1820           if (Style.BraceWrapping.AfterControlStatement ==
1821               FormatStyle::BWACS_Always) {
1822             addUnwrappedLine();
1823           }
1824         } else if (Style.BraceWrapping.AfterFunction) {
1825           addUnwrappedLine();
1826         }
1827         if (!Line->InPPDirective)
1828           FormatTok->setFinalizedType(TT_FunctionLBrace);
1829         parseBlock();
1830         addUnwrappedLine();
1831         return;
1832       }
1833       // Otherwise this was a braced init list, and the structural
1834       // element continues.
1835       break;
1836     case tok::kw_try:
1837       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1838         // field/method declaration.
1839         nextToken();
1840         break;
1841       }
1842       // We arrive here when parsing function-try blocks.
1843       if (Style.BraceWrapping.AfterFunction)
1844         addUnwrappedLine();
1845       parseTryCatch();
1846       return;
1847     case tok::identifier: {
1848       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1849           Line->MustBeDeclaration) {
1850         addUnwrappedLine();
1851         parseCSharpGenericTypeConstraint();
1852         break;
1853       }
1854       if (FormatTok->is(TT_MacroBlockEnd)) {
1855         addUnwrappedLine();
1856         return;
1857       }
1858 
1859       // Function declarations (as opposed to function expressions) are parsed
1860       // on their own unwrapped line by continuing this loop. Function
1861       // expressions (functions that are not on their own line) must not create
1862       // a new unwrapped line, so they are special cased below.
1863       size_t TokenCount = Line->Tokens.size();
1864       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1865           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1866                                                      Keywords.kw_async)))) {
1867         tryToParseJSFunction();
1868         break;
1869       }
1870       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1871           FormatTok->is(Keywords.kw_interface)) {
1872         if (Style.isJavaScript()) {
1873           // In JavaScript/TypeScript, "interface" can be used as a standalone
1874           // identifier, e.g. in `var interface = 1;`. If "interface" is
1875           // followed by another identifier, it is very like to be an actual
1876           // interface declaration.
1877           unsigned StoredPosition = Tokens->getPosition();
1878           FormatToken *Next = Tokens->getNextToken();
1879           FormatTok = Tokens->setPosition(StoredPosition);
1880           if (!mustBeJSIdent(Keywords, Next)) {
1881             nextToken();
1882             break;
1883           }
1884         }
1885         parseRecord();
1886         addUnwrappedLine();
1887         return;
1888       }
1889 
1890       if (FormatTok->is(Keywords.kw_interface)) {
1891         if (parseStructLike())
1892           return;
1893         break;
1894       }
1895 
1896       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1897         parseStatementMacro();
1898         return;
1899       }
1900 
1901       // See if the following token should start a new unwrapped line.
1902       StringRef Text = FormatTok->TokenText;
1903 
1904       FormatToken *PreviousToken = FormatTok;
1905       nextToken();
1906 
1907       // JS doesn't have macros, and within classes colons indicate fields, not
1908       // labels.
1909       if (Style.isJavaScript())
1910         break;
1911 
1912       auto OneTokenSoFar = [&]() {
1913         const UnwrappedLineNode *Tok = &Line->Tokens.front(),
1914                                 *End = Tok + Line->Tokens.size();
1915         while (Tok != End && Tok->Tok->is(tok::comment))
1916           ++Tok;
1917         // In Verilog, macro invocations start with a backtick which the code
1918         // treats as a hash.  Skip it.
1919         if (Style.isVerilog() && Tok != End && Tok->Tok->is(tok::hash))
1920           ++Tok;
1921         return End - Tok == 1;
1922       };
1923       if (OneTokenSoFar()) {
1924         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1925           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1926           parseLabel(!Style.IndentGotoLabels);
1927           if (HasLabel)
1928             *HasLabel = true;
1929           return;
1930         }
1931         // Recognize function-like macro usages without trailing semicolon as
1932         // well as free-standing macros like Q_OBJECT.
1933         bool FunctionLike = FormatTok->is(tok::l_paren);
1934         if (FunctionLike)
1935           parseParens();
1936 
1937         bool FollowedByNewline =
1938             CommentsBeforeNextToken.empty()
1939                 ? FormatTok->NewlinesBefore > 0
1940                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1941 
1942         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1943             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1944           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1945           addUnwrappedLine();
1946           return;
1947         }
1948       }
1949       break;
1950     }
1951     case tok::equal:
1952       if ((Style.isJavaScript() || Style.isCSharp()) &&
1953           FormatTok->is(TT_FatArrow)) {
1954         tryToParseChildBlock();
1955         break;
1956       }
1957 
1958       nextToken();
1959       if (FormatTok->is(tok::l_brace)) {
1960         // Block kind should probably be set to BK_BracedInit for any language.
1961         // C# needs this change to ensure that array initialisers and object
1962         // initialisers are indented the same way.
1963         if (Style.isCSharp())
1964           FormatTok->setBlockKind(BK_BracedInit);
1965         nextToken();
1966         parseBracedList();
1967       } else if (Style.Language == FormatStyle::LK_Proto &&
1968                  FormatTok->is(tok::less)) {
1969         nextToken();
1970         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1971                         /*ClosingBraceKind=*/tok::greater);
1972       }
1973       break;
1974     case tok::l_square:
1975       parseSquare();
1976       break;
1977     case tok::kw_new:
1978       parseNew();
1979       break;
1980     case tok::kw_case:
1981       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1982         // 'case: string' field declaration.
1983         nextToken();
1984         break;
1985       }
1986       parseCaseLabel();
1987       break;
1988     default:
1989       nextToken();
1990       break;
1991     }
1992   } while (!eof());
1993 }
1994 
1995 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1996   assert(FormatTok->is(tok::l_brace));
1997   if (!Style.isCSharp())
1998     return false;
1999   // See if it's a property accessor.
2000   if (FormatTok->Previous->isNot(tok::identifier))
2001     return false;
2002 
2003   // See if we are inside a property accessor.
2004   //
2005   // Record the current tokenPosition so that we can advance and
2006   // reset the current token. `Next` is not set yet so we need
2007   // another way to advance along the token stream.
2008   unsigned int StoredPosition = Tokens->getPosition();
2009   FormatToken *Tok = Tokens->getNextToken();
2010 
2011   // A trivial property accessor is of the form:
2012   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2013   // Track these as they do not require line breaks to be introduced.
2014   bool HasSpecialAccessor = false;
2015   bool IsTrivialPropertyAccessor = true;
2016   while (!eof()) {
2017     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2018                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2019                      Keywords.kw_init, Keywords.kw_set)) {
2020       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2021         HasSpecialAccessor = true;
2022       Tok = Tokens->getNextToken();
2023       continue;
2024     }
2025     if (Tok->isNot(tok::r_brace))
2026       IsTrivialPropertyAccessor = false;
2027     break;
2028   }
2029 
2030   if (!HasSpecialAccessor) {
2031     Tokens->setPosition(StoredPosition);
2032     return false;
2033   }
2034 
2035   // Try to parse the property accessor:
2036   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2037   Tokens->setPosition(StoredPosition);
2038   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2039     addUnwrappedLine();
2040   nextToken();
2041   do {
2042     switch (FormatTok->Tok.getKind()) {
2043     case tok::r_brace:
2044       nextToken();
2045       if (FormatTok->is(tok::equal)) {
2046         while (!eof() && FormatTok->isNot(tok::semi))
2047           nextToken();
2048         nextToken();
2049       }
2050       addUnwrappedLine();
2051       return true;
2052     case tok::l_brace:
2053       ++Line->Level;
2054       parseBlock(/*MustBeDeclaration=*/true);
2055       addUnwrappedLine();
2056       --Line->Level;
2057       break;
2058     case tok::equal:
2059       if (FormatTok->is(TT_FatArrow)) {
2060         ++Line->Level;
2061         do {
2062           nextToken();
2063         } while (!eof() && FormatTok->isNot(tok::semi));
2064         nextToken();
2065         addUnwrappedLine();
2066         --Line->Level;
2067         break;
2068       }
2069       nextToken();
2070       break;
2071     default:
2072       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2073                              Keywords.kw_set) &&
2074           !IsTrivialPropertyAccessor) {
2075         // Non-trivial get/set needs to be on its own line.
2076         addUnwrappedLine();
2077       }
2078       nextToken();
2079     }
2080   } while (!eof());
2081 
2082   // Unreachable for well-formed code (paired '{' and '}').
2083   return true;
2084 }
2085 
2086 bool UnwrappedLineParser::tryToParseLambda() {
2087   assert(FormatTok->is(tok::l_square));
2088   if (!Style.isCpp()) {
2089     nextToken();
2090     return false;
2091   }
2092   FormatToken &LSquare = *FormatTok;
2093   if (!tryToParseLambdaIntroducer())
2094     return false;
2095 
2096   bool SeenArrow = false;
2097   bool InTemplateParameterList = false;
2098 
2099   while (FormatTok->isNot(tok::l_brace)) {
2100     if (FormatTok->isSimpleTypeSpecifier()) {
2101       nextToken();
2102       continue;
2103     }
2104     switch (FormatTok->Tok.getKind()) {
2105     case tok::l_brace:
2106       break;
2107     case tok::l_paren:
2108       parseParens();
2109       break;
2110     case tok::l_square:
2111       parseSquare();
2112       break;
2113     case tok::kw_class:
2114     case tok::kw_template:
2115     case tok::kw_typename:
2116       assert(FormatTok->Previous);
2117       if (FormatTok->Previous->is(tok::less))
2118         InTemplateParameterList = true;
2119       nextToken();
2120       break;
2121     case tok::amp:
2122     case tok::star:
2123     case tok::kw_const:
2124     case tok::comma:
2125     case tok::less:
2126     case tok::greater:
2127     case tok::identifier:
2128     case tok::numeric_constant:
2129     case tok::coloncolon:
2130     case tok::kw_mutable:
2131     case tok::kw_noexcept:
2132       nextToken();
2133       break;
2134     // Specialization of a template with an integer parameter can contain
2135     // arithmetic, logical, comparison and ternary operators.
2136     //
2137     // FIXME: This also accepts sequences of operators that are not in the scope
2138     // of a template argument list.
2139     //
2140     // In a C++ lambda a template type can only occur after an arrow. We use
2141     // this as an heuristic to distinguish between Objective-C expressions
2142     // followed by an `a->b` expression, such as:
2143     // ([obj func:arg] + a->b)
2144     // Otherwise the code below would parse as a lambda.
2145     //
2146     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2147     // explicit template lists: []<bool b = true && false>(U &&u){}
2148     case tok::plus:
2149     case tok::minus:
2150     case tok::exclaim:
2151     case tok::tilde:
2152     case tok::slash:
2153     case tok::percent:
2154     case tok::lessless:
2155     case tok::pipe:
2156     case tok::pipepipe:
2157     case tok::ampamp:
2158     case tok::caret:
2159     case tok::equalequal:
2160     case tok::exclaimequal:
2161     case tok::greaterequal:
2162     case tok::lessequal:
2163     case tok::question:
2164     case tok::colon:
2165     case tok::ellipsis:
2166     case tok::kw_true:
2167     case tok::kw_false:
2168       if (SeenArrow || InTemplateParameterList) {
2169         nextToken();
2170         break;
2171       }
2172       return true;
2173     case tok::arrow:
2174       // This might or might not actually be a lambda arrow (this could be an
2175       // ObjC method invocation followed by a dereferencing arrow). We might
2176       // reset this back to TT_Unknown in TokenAnnotator.
2177       FormatTok->setFinalizedType(TT_LambdaArrow);
2178       SeenArrow = true;
2179       nextToken();
2180       break;
2181     default:
2182       return true;
2183     }
2184   }
2185   FormatTok->setFinalizedType(TT_LambdaLBrace);
2186   LSquare.setFinalizedType(TT_LambdaLSquare);
2187   parseChildBlock();
2188   return true;
2189 }
2190 
2191 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2192   const FormatToken *Previous = FormatTok->Previous;
2193   const FormatToken *LeftSquare = FormatTok;
2194   nextToken();
2195   if (Previous &&
2196       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2197                          tok::kw_delete, tok::l_square) ||
2198        LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2199        Previous->isSimpleTypeSpecifier())) {
2200     return false;
2201   }
2202   if (FormatTok->is(tok::l_square))
2203     return false;
2204   if (FormatTok->is(tok::r_square)) {
2205     const FormatToken *Next = Tokens->peekNextToken();
2206     if (Next->is(tok::greater))
2207       return false;
2208   }
2209   parseSquare(/*LambdaIntroducer=*/true);
2210   return true;
2211 }
2212 
2213 void UnwrappedLineParser::tryToParseJSFunction() {
2214   assert(FormatTok->is(Keywords.kw_function) ||
2215          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2216   if (FormatTok->is(Keywords.kw_async))
2217     nextToken();
2218   // Consume "function".
2219   nextToken();
2220 
2221   // Consume * (generator function). Treat it like C++'s overloaded operators.
2222   if (FormatTok->is(tok::star)) {
2223     FormatTok->setFinalizedType(TT_OverloadedOperator);
2224     nextToken();
2225   }
2226 
2227   // Consume function name.
2228   if (FormatTok->is(tok::identifier))
2229     nextToken();
2230 
2231   if (FormatTok->isNot(tok::l_paren))
2232     return;
2233 
2234   // Parse formal parameter list.
2235   parseParens();
2236 
2237   if (FormatTok->is(tok::colon)) {
2238     // Parse a type definition.
2239     nextToken();
2240 
2241     // Eat the type declaration. For braced inline object types, balance braces,
2242     // otherwise just parse until finding an l_brace for the function body.
2243     if (FormatTok->is(tok::l_brace))
2244       tryToParseBracedList();
2245     else
2246       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2247         nextToken();
2248   }
2249 
2250   if (FormatTok->is(tok::semi))
2251     return;
2252 
2253   parseChildBlock();
2254 }
2255 
2256 bool UnwrappedLineParser::tryToParseBracedList() {
2257   if (FormatTok->is(BK_Unknown))
2258     calculateBraceTypes();
2259   assert(FormatTok->isNot(BK_Unknown));
2260   if (FormatTok->is(BK_Block))
2261     return false;
2262   nextToken();
2263   parseBracedList();
2264   return true;
2265 }
2266 
2267 bool UnwrappedLineParser::tryToParseChildBlock() {
2268   assert(Style.isJavaScript() || Style.isCSharp());
2269   assert(FormatTok->is(TT_FatArrow));
2270   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2271   // They always start an expression or a child block if followed by a curly
2272   // brace.
2273   nextToken();
2274   if (FormatTok->isNot(tok::l_brace))
2275     return false;
2276   parseChildBlock();
2277   return true;
2278 }
2279 
2280 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2281                                           bool IsEnum,
2282                                           tok::TokenKind ClosingBraceKind) {
2283   bool HasError = false;
2284 
2285   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2286   // replace this by using parseAssignmentExpression() inside.
2287   do {
2288     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2289         tryToParseChildBlock()) {
2290       continue;
2291     }
2292     if (Style.isJavaScript()) {
2293       if (FormatTok->is(Keywords.kw_function) ||
2294           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2295         tryToParseJSFunction();
2296         continue;
2297       }
2298       if (FormatTok->is(tok::l_brace)) {
2299         // Could be a method inside of a braced list `{a() { return 1; }}`.
2300         if (tryToParseBracedList())
2301           continue;
2302         parseChildBlock();
2303       }
2304     }
2305     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2306       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2307         addUnwrappedLine();
2308       nextToken();
2309       return !HasError;
2310     }
2311     switch (FormatTok->Tok.getKind()) {
2312     case tok::l_square:
2313       if (Style.isCSharp())
2314         parseSquare();
2315       else
2316         tryToParseLambda();
2317       break;
2318     case tok::l_paren:
2319       parseParens();
2320       // JavaScript can just have free standing methods and getters/setters in
2321       // object literals. Detect them by a "{" following ")".
2322       if (Style.isJavaScript()) {
2323         if (FormatTok->is(tok::l_brace))
2324           parseChildBlock();
2325         break;
2326       }
2327       break;
2328     case tok::l_brace:
2329       // Assume there are no blocks inside a braced init list apart
2330       // from the ones we explicitly parse out (like lambdas).
2331       FormatTok->setBlockKind(BK_BracedInit);
2332       nextToken();
2333       parseBracedList();
2334       break;
2335     case tok::less:
2336       if (Style.Language == FormatStyle::LK_Proto ||
2337           ClosingBraceKind == tok::greater) {
2338         nextToken();
2339         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2340                         /*ClosingBraceKind=*/tok::greater);
2341       } else {
2342         nextToken();
2343       }
2344       break;
2345     case tok::semi:
2346       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2347       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2348       // used for error recovery if we have otherwise determined that this is
2349       // a braced list.
2350       if (Style.isJavaScript()) {
2351         nextToken();
2352         break;
2353       }
2354       HasError = true;
2355       if (!ContinueOnSemicolons)
2356         return !HasError;
2357       nextToken();
2358       break;
2359     case tok::comma:
2360       nextToken();
2361       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2362         addUnwrappedLine();
2363       break;
2364     default:
2365       nextToken();
2366       break;
2367     }
2368   } while (!eof());
2369   return false;
2370 }
2371 
2372 /// \brief Parses a pair of parentheses (and everything between them).
2373 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2374 /// double ampersands. This only counts for the current parens scope.
2375 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2376   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2377   nextToken();
2378   do {
2379     switch (FormatTok->Tok.getKind()) {
2380     case tok::l_paren:
2381       parseParens();
2382       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2383         parseChildBlock();
2384       break;
2385     case tok::r_paren:
2386       nextToken();
2387       return;
2388     case tok::r_brace:
2389       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2390       return;
2391     case tok::l_square:
2392       tryToParseLambda();
2393       break;
2394     case tok::l_brace:
2395       if (!tryToParseBracedList())
2396         parseChildBlock();
2397       break;
2398     case tok::at:
2399       nextToken();
2400       if (FormatTok->is(tok::l_brace)) {
2401         nextToken();
2402         parseBracedList();
2403       }
2404       break;
2405     case tok::equal:
2406       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2407         tryToParseChildBlock();
2408       else
2409         nextToken();
2410       break;
2411     case tok::kw_class:
2412       if (Style.isJavaScript())
2413         parseRecord(/*ParseAsExpr=*/true);
2414       else
2415         nextToken();
2416       break;
2417     case tok::identifier:
2418       if (Style.isJavaScript() &&
2419           (FormatTok->is(Keywords.kw_function) ||
2420            FormatTok->startsSequence(Keywords.kw_async,
2421                                      Keywords.kw_function))) {
2422         tryToParseJSFunction();
2423       } else {
2424         nextToken();
2425       }
2426       break;
2427     case tok::kw_requires: {
2428       auto RequiresToken = FormatTok;
2429       nextToken();
2430       parseRequiresExpression(RequiresToken);
2431       break;
2432     }
2433     case tok::ampamp:
2434       if (AmpAmpTokenType != TT_Unknown)
2435         FormatTok->setFinalizedType(AmpAmpTokenType);
2436       LLVM_FALLTHROUGH;
2437     default:
2438       nextToken();
2439       break;
2440     }
2441   } while (!eof());
2442 }
2443 
2444 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2445   if (!LambdaIntroducer) {
2446     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2447     if (tryToParseLambda())
2448       return;
2449   }
2450   do {
2451     switch (FormatTok->Tok.getKind()) {
2452     case tok::l_paren:
2453       parseParens();
2454       break;
2455     case tok::r_square:
2456       nextToken();
2457       return;
2458     case tok::r_brace:
2459       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2460       return;
2461     case tok::l_square:
2462       parseSquare();
2463       break;
2464     case tok::l_brace: {
2465       if (!tryToParseBracedList())
2466         parseChildBlock();
2467       break;
2468     }
2469     case tok::at:
2470       nextToken();
2471       if (FormatTok->is(tok::l_brace)) {
2472         nextToken();
2473         parseBracedList();
2474       }
2475       break;
2476     default:
2477       nextToken();
2478       break;
2479     }
2480   } while (!eof());
2481 }
2482 
2483 void UnwrappedLineParser::keepAncestorBraces() {
2484   if (!Style.RemoveBracesLLVM)
2485     return;
2486 
2487   const int MaxNestingLevels = 2;
2488   const int Size = NestedTooDeep.size();
2489   if (Size >= MaxNestingLevels)
2490     NestedTooDeep[Size - MaxNestingLevels] = true;
2491   NestedTooDeep.push_back(false);
2492 }
2493 
2494 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2495   for (const auto &Token : llvm::reverse(Line.Tokens))
2496     if (Token.Tok->isNot(tok::comment))
2497       return Token.Tok;
2498 
2499   return nullptr;
2500 }
2501 
2502 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2503   FormatToken *Tok = nullptr;
2504 
2505   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2506       PreprocessorDirectives.empty()) {
2507     Tok = getLastNonComment(*Line);
2508     assert(Tok);
2509     if (Tok->BraceCount < 0) {
2510       assert(Tok->BraceCount == -1);
2511       Tok = nullptr;
2512     } else {
2513       Tok->BraceCount = -1;
2514     }
2515   }
2516 
2517   addUnwrappedLine();
2518   ++Line->Level;
2519   parseStructuralElement();
2520 
2521   if (Tok) {
2522     assert(!Line->InPPDirective);
2523     Tok = nullptr;
2524     for (const auto &L : llvm::reverse(*CurrentLines)) {
2525       if (!L.InPPDirective && getLastNonComment(L)) {
2526         Tok = L.Tokens.back().Tok;
2527         break;
2528       }
2529     }
2530     assert(Tok);
2531     ++Tok->BraceCount;
2532   }
2533 
2534   if (CheckEOF && FormatTok->is(tok::eof))
2535     addUnwrappedLine();
2536 
2537   --Line->Level;
2538 }
2539 
2540 static void markOptionalBraces(FormatToken *LeftBrace) {
2541   if (!LeftBrace)
2542     return;
2543 
2544   assert(LeftBrace->is(tok::l_brace));
2545 
2546   FormatToken *RightBrace = LeftBrace->MatchingParen;
2547   if (!RightBrace) {
2548     assert(!LeftBrace->Optional);
2549     return;
2550   }
2551 
2552   assert(RightBrace->is(tok::r_brace));
2553   assert(RightBrace->MatchingParen == LeftBrace);
2554   assert(LeftBrace->Optional == RightBrace->Optional);
2555 
2556   LeftBrace->Optional = true;
2557   RightBrace->Optional = true;
2558 }
2559 
2560 void UnwrappedLineParser::handleAttributes() {
2561   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2562   if (FormatTok->is(TT_AttributeMacro))
2563     nextToken();
2564   handleCppAttributes();
2565 }
2566 
2567 bool UnwrappedLineParser::handleCppAttributes() {
2568   // Handle [[likely]] / [[unlikely]] attributes.
2569   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2570     parseSquare();
2571     return true;
2572   }
2573   return false;
2574 }
2575 
2576 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2577                                                   bool KeepBraces) {
2578   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2579   nextToken();
2580   if (FormatTok->is(tok::exclaim))
2581     nextToken();
2582 
2583   bool KeepIfBraces = true;
2584   if (FormatTok->is(tok::kw_consteval)) {
2585     nextToken();
2586   } else {
2587     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2588     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2589       nextToken();
2590     if (FormatTok->is(tok::l_paren))
2591       parseParens();
2592   }
2593   handleAttributes();
2594 
2595   bool NeedsUnwrappedLine = false;
2596   keepAncestorBraces();
2597 
2598   FormatToken *IfLeftBrace = nullptr;
2599   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2600 
2601   if (Keywords.isBlockBegin(*FormatTok, Style)) {
2602     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2603     IfLeftBrace = FormatTok;
2604     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2605     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2606                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2607     if (Style.BraceWrapping.BeforeElse)
2608       addUnwrappedLine();
2609     else
2610       NeedsUnwrappedLine = true;
2611   } else {
2612     parseUnbracedBody();
2613   }
2614 
2615   if (Style.RemoveBracesLLVM) {
2616     assert(!NestedTooDeep.empty());
2617     KeepIfBraces = KeepIfBraces ||
2618                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2619                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2620                    IfBlockKind == IfStmtKind::IfElseIf;
2621   }
2622 
2623   bool KeepElseBraces = KeepIfBraces;
2624   FormatToken *ElseLeftBrace = nullptr;
2625   IfStmtKind Kind = IfStmtKind::IfOnly;
2626 
2627   if (FormatTok->is(tok::kw_else)) {
2628     if (Style.RemoveBracesLLVM) {
2629       NestedTooDeep.back() = false;
2630       Kind = IfStmtKind::IfElse;
2631     }
2632     nextToken();
2633     handleAttributes();
2634     if (Keywords.isBlockBegin(*FormatTok, Style)) {
2635       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2636       FormatTok->setFinalizedType(TT_ElseLBrace);
2637       ElseLeftBrace = FormatTok;
2638       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2639       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2640       FormatToken *IfLBrace =
2641           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2642                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2643       if (FormatTok->is(tok::kw_else)) {
2644         KeepElseBraces = KeepElseBraces ||
2645                          ElseBlockKind == IfStmtKind::IfOnly ||
2646                          ElseBlockKind == IfStmtKind::IfElseIf;
2647       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2648         KeepElseBraces = true;
2649         assert(ElseLeftBrace->MatchingParen);
2650         markOptionalBraces(ElseLeftBrace);
2651       }
2652       addUnwrappedLine();
2653     } else if (FormatTok->is(tok::kw_if)) {
2654       const FormatToken *Previous = Tokens->getPreviousToken();
2655       assert(Previous);
2656       const bool IsPrecededByComment = Previous->is(tok::comment);
2657       if (IsPrecededByComment) {
2658         addUnwrappedLine();
2659         ++Line->Level;
2660       }
2661       bool TooDeep = true;
2662       if (Style.RemoveBracesLLVM) {
2663         Kind = IfStmtKind::IfElseIf;
2664         TooDeep = NestedTooDeep.pop_back_val();
2665       }
2666       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2667       if (Style.RemoveBracesLLVM)
2668         NestedTooDeep.push_back(TooDeep);
2669       if (IsPrecededByComment)
2670         --Line->Level;
2671     } else {
2672       parseUnbracedBody(/*CheckEOF=*/true);
2673     }
2674   } else {
2675     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2676     if (NeedsUnwrappedLine)
2677       addUnwrappedLine();
2678   }
2679 
2680   if (!Style.RemoveBracesLLVM)
2681     return nullptr;
2682 
2683   assert(!NestedTooDeep.empty());
2684   KeepElseBraces = KeepElseBraces ||
2685                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2686                    NestedTooDeep.back();
2687 
2688   NestedTooDeep.pop_back();
2689 
2690   if (!KeepIfBraces && !KeepElseBraces) {
2691     markOptionalBraces(IfLeftBrace);
2692     markOptionalBraces(ElseLeftBrace);
2693   } else if (IfLeftBrace) {
2694     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2695     if (IfRightBrace) {
2696       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2697       assert(!IfLeftBrace->Optional);
2698       assert(!IfRightBrace->Optional);
2699       IfLeftBrace->MatchingParen = nullptr;
2700       IfRightBrace->MatchingParen = nullptr;
2701     }
2702   }
2703 
2704   if (IfKind)
2705     *IfKind = Kind;
2706 
2707   return IfLeftBrace;
2708 }
2709 
2710 void UnwrappedLineParser::parseTryCatch() {
2711   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2712   nextToken();
2713   bool NeedsUnwrappedLine = false;
2714   if (FormatTok->is(tok::colon)) {
2715     // We are in a function try block, what comes is an initializer list.
2716     nextToken();
2717 
2718     // In case identifiers were removed by clang-tidy, what might follow is
2719     // multiple commas in sequence - before the first identifier.
2720     while (FormatTok->is(tok::comma))
2721       nextToken();
2722 
2723     while (FormatTok->is(tok::identifier)) {
2724       nextToken();
2725       if (FormatTok->is(tok::l_paren))
2726         parseParens();
2727       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2728           FormatTok->is(tok::l_brace)) {
2729         do {
2730           nextToken();
2731         } while (!FormatTok->is(tok::r_brace));
2732         nextToken();
2733       }
2734 
2735       // In case identifiers were removed by clang-tidy, what might follow is
2736       // multiple commas in sequence - after the first identifier.
2737       while (FormatTok->is(tok::comma))
2738         nextToken();
2739     }
2740   }
2741   // Parse try with resource.
2742   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2743     parseParens();
2744 
2745   keepAncestorBraces();
2746 
2747   if (FormatTok->is(tok::l_brace)) {
2748     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2749     parseBlock();
2750     if (Style.BraceWrapping.BeforeCatch)
2751       addUnwrappedLine();
2752     else
2753       NeedsUnwrappedLine = true;
2754   } else if (!FormatTok->is(tok::kw_catch)) {
2755     // The C++ standard requires a compound-statement after a try.
2756     // If there's none, we try to assume there's a structuralElement
2757     // and try to continue.
2758     addUnwrappedLine();
2759     ++Line->Level;
2760     parseStructuralElement();
2761     --Line->Level;
2762   }
2763   while (true) {
2764     if (FormatTok->is(tok::at))
2765       nextToken();
2766     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2767                              tok::kw___finally) ||
2768           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2769            FormatTok->is(Keywords.kw_finally)) ||
2770           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2771            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2772       break;
2773     }
2774     nextToken();
2775     while (FormatTok->isNot(tok::l_brace)) {
2776       if (FormatTok->is(tok::l_paren)) {
2777         parseParens();
2778         continue;
2779       }
2780       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2781         if (Style.RemoveBracesLLVM)
2782           NestedTooDeep.pop_back();
2783         return;
2784       }
2785       nextToken();
2786     }
2787     NeedsUnwrappedLine = false;
2788     Line->MustBeDeclaration = false;
2789     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2790     parseBlock();
2791     if (Style.BraceWrapping.BeforeCatch)
2792       addUnwrappedLine();
2793     else
2794       NeedsUnwrappedLine = true;
2795   }
2796 
2797   if (Style.RemoveBracesLLVM)
2798     NestedTooDeep.pop_back();
2799 
2800   if (NeedsUnwrappedLine)
2801     addUnwrappedLine();
2802 }
2803 
2804 void UnwrappedLineParser::parseNamespace() {
2805   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2806          "'namespace' expected");
2807 
2808   const FormatToken &InitialToken = *FormatTok;
2809   nextToken();
2810   if (InitialToken.is(TT_NamespaceMacro)) {
2811     parseParens();
2812   } else {
2813     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2814                               tok::l_square, tok::period, tok::l_paren) ||
2815            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2816       if (FormatTok->is(tok::l_square))
2817         parseSquare();
2818       else if (FormatTok->is(tok::l_paren))
2819         parseParens();
2820       else
2821         nextToken();
2822     }
2823   }
2824   if (FormatTok->is(tok::l_brace)) {
2825     if (ShouldBreakBeforeBrace(Style, InitialToken))
2826       addUnwrappedLine();
2827 
2828     unsigned AddLevels =
2829         Style.NamespaceIndentation == FormatStyle::NI_All ||
2830                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2831                  DeclarationScopeStack.size() > 1)
2832             ? 1u
2833             : 0u;
2834     bool ManageWhitesmithsBraces =
2835         AddLevels == 0u &&
2836         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2837 
2838     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2839     // the whole block.
2840     if (ManageWhitesmithsBraces)
2841       ++Line->Level;
2842 
2843     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2844                /*KeepBraces=*/true, /*IfKind=*/nullptr,
2845                ManageWhitesmithsBraces);
2846 
2847     // Munch the semicolon after a namespace. This is more common than one would
2848     // think. Putting the semicolon into its own line is very ugly.
2849     if (FormatTok->is(tok::semi))
2850       nextToken();
2851 
2852     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2853 
2854     if (ManageWhitesmithsBraces)
2855       --Line->Level;
2856   }
2857   // FIXME: Add error handling.
2858 }
2859 
2860 void UnwrappedLineParser::parseNew() {
2861   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2862   nextToken();
2863 
2864   if (Style.isCSharp()) {
2865     do {
2866       if (FormatTok->is(tok::l_brace))
2867         parseBracedList();
2868 
2869       if (FormatTok->isOneOf(tok::semi, tok::comma))
2870         return;
2871 
2872       nextToken();
2873     } while (!eof());
2874   }
2875 
2876   if (Style.Language != FormatStyle::LK_Java)
2877     return;
2878 
2879   // In Java, we can parse everything up to the parens, which aren't optional.
2880   do {
2881     // There should not be a ;, { or } before the new's open paren.
2882     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2883       return;
2884 
2885     // Consume the parens.
2886     if (FormatTok->is(tok::l_paren)) {
2887       parseParens();
2888 
2889       // If there is a class body of an anonymous class, consume that as child.
2890       if (FormatTok->is(tok::l_brace))
2891         parseChildBlock();
2892       return;
2893     }
2894     nextToken();
2895   } while (!eof());
2896 }
2897 
2898 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
2899   keepAncestorBraces();
2900 
2901   if (Keywords.isBlockBegin(*FormatTok, Style)) {
2902     if (!KeepBraces)
2903       FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2904     FormatToken *LeftBrace = FormatTok;
2905     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2906     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2907                /*MunchSemi=*/true, KeepBraces);
2908     if (!KeepBraces) {
2909       assert(!NestedTooDeep.empty());
2910       if (!NestedTooDeep.back())
2911         markOptionalBraces(LeftBrace);
2912     }
2913     if (WrapRightBrace)
2914       addUnwrappedLine();
2915   } else {
2916     parseUnbracedBody();
2917   }
2918 
2919   if (!KeepBraces)
2920     NestedTooDeep.pop_back();
2921 }
2922 
2923 void UnwrappedLineParser::parseForOrWhileLoop() {
2924   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2925          "'for', 'while' or foreach macro expected");
2926   const bool KeepBraces = !Style.RemoveBracesLLVM ||
2927                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
2928 
2929   nextToken();
2930   // JS' for await ( ...
2931   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2932     nextToken();
2933   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2934     nextToken();
2935   if (FormatTok->is(tok::l_paren))
2936     parseParens();
2937 
2938   handleAttributes();
2939   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
2940 }
2941 
2942 void UnwrappedLineParser::parseDoWhile() {
2943   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2944   nextToken();
2945 
2946   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
2947 
2948   // FIXME: Add error handling.
2949   if (!FormatTok->is(tok::kw_while)) {
2950     addUnwrappedLine();
2951     return;
2952   }
2953 
2954   // If in Whitesmiths mode, the line with the while() needs to be indented
2955   // to the same level as the block.
2956   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2957     ++Line->Level;
2958 
2959   nextToken();
2960   parseStructuralElement();
2961 }
2962 
2963 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2964   nextToken();
2965   unsigned OldLineLevel = Line->Level;
2966   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2967     --Line->Level;
2968   if (LeftAlignLabel)
2969     Line->Level = 0;
2970 
2971   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2972       FormatTok->is(tok::l_brace)) {
2973 
2974     CompoundStatementIndenter Indenter(this, Line->Level,
2975                                        Style.BraceWrapping.AfterCaseLabel,
2976                                        Style.BraceWrapping.IndentBraces);
2977     parseBlock();
2978     if (FormatTok->is(tok::kw_break)) {
2979       if (Style.BraceWrapping.AfterControlStatement ==
2980           FormatStyle::BWACS_Always) {
2981         addUnwrappedLine();
2982         if (!Style.IndentCaseBlocks &&
2983             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2984           ++Line->Level;
2985         }
2986       }
2987       parseStructuralElement();
2988     }
2989     addUnwrappedLine();
2990   } else {
2991     if (FormatTok->is(tok::semi))
2992       nextToken();
2993     addUnwrappedLine();
2994   }
2995   Line->Level = OldLineLevel;
2996   if (FormatTok->isNot(tok::l_brace)) {
2997     parseStructuralElement();
2998     addUnwrappedLine();
2999   }
3000 }
3001 
3002 void UnwrappedLineParser::parseCaseLabel() {
3003   assert(FormatTok->is(tok::kw_case) && "'case' expected");
3004 
3005   // FIXME: fix handling of complex expressions here.
3006   do {
3007     nextToken();
3008   } while (!eof() && !FormatTok->is(tok::colon));
3009   parseLabel();
3010 }
3011 
3012 void UnwrappedLineParser::parseSwitch() {
3013   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3014   nextToken();
3015   if (FormatTok->is(tok::l_paren))
3016     parseParens();
3017 
3018   keepAncestorBraces();
3019 
3020   if (FormatTok->is(tok::l_brace)) {
3021     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3022     parseBlock();
3023     addUnwrappedLine();
3024   } else {
3025     addUnwrappedLine();
3026     ++Line->Level;
3027     parseStructuralElement();
3028     --Line->Level;
3029   }
3030 
3031   if (Style.RemoveBracesLLVM)
3032     NestedTooDeep.pop_back();
3033 }
3034 
3035 // Operators that can follow a C variable.
3036 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3037   switch (kind) {
3038   case tok::ampamp:
3039   case tok::ampequal:
3040   case tok::arrow:
3041   case tok::caret:
3042   case tok::caretequal:
3043   case tok::comma:
3044   case tok::ellipsis:
3045   case tok::equal:
3046   case tok::equalequal:
3047   case tok::exclaim:
3048   case tok::exclaimequal:
3049   case tok::greater:
3050   case tok::greaterequal:
3051   case tok::greatergreater:
3052   case tok::greatergreaterequal:
3053   case tok::l_paren:
3054   case tok::l_square:
3055   case tok::less:
3056   case tok::lessequal:
3057   case tok::lessless:
3058   case tok::lesslessequal:
3059   case tok::minus:
3060   case tok::minusequal:
3061   case tok::minusminus:
3062   case tok::percent:
3063   case tok::percentequal:
3064   case tok::period:
3065   case tok::pipe:
3066   case tok::pipeequal:
3067   case tok::pipepipe:
3068   case tok::plus:
3069   case tok::plusequal:
3070   case tok::plusplus:
3071   case tok::question:
3072   case tok::r_brace:
3073   case tok::r_paren:
3074   case tok::r_square:
3075   case tok::semi:
3076   case tok::slash:
3077   case tok::slashequal:
3078   case tok::star:
3079   case tok::starequal:
3080     return true;
3081   default:
3082     return false;
3083   }
3084 }
3085 
3086 void UnwrappedLineParser::parseAccessSpecifier() {
3087   FormatToken *AccessSpecifierCandidate = FormatTok;
3088   nextToken();
3089   // Understand Qt's slots.
3090   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3091     nextToken();
3092   // Otherwise, we don't know what it is, and we'd better keep the next token.
3093   if (FormatTok->is(tok::colon)) {
3094     nextToken();
3095     addUnwrappedLine();
3096   } else if (!FormatTok->is(tok::coloncolon) &&
3097              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3098     // Not a variable name nor namespace name.
3099     addUnwrappedLine();
3100   } else if (AccessSpecifierCandidate) {
3101     // Consider the access specifier to be a C identifier.
3102     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3103   }
3104 }
3105 
3106 /// \brief Parses a concept definition.
3107 /// \pre The current token has to be the concept keyword.
3108 ///
3109 /// Returns if either the concept has been completely parsed, or if it detects
3110 /// that the concept definition is incorrect.
3111 void UnwrappedLineParser::parseConcept() {
3112   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
3113   nextToken();
3114   if (!FormatTok->is(tok::identifier))
3115     return;
3116   nextToken();
3117   if (!FormatTok->is(tok::equal))
3118     return;
3119   nextToken();
3120   parseConstraintExpression();
3121   if (FormatTok->is(tok::semi))
3122     nextToken();
3123   addUnwrappedLine();
3124 }
3125 
3126 /// \brief Parses a requires, decides if it is a clause or an expression.
3127 /// \pre The current token has to be the requires keyword.
3128 /// \returns true if it parsed a clause.
3129 bool clang::format::UnwrappedLineParser::parseRequires() {
3130   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3131   auto RequiresToken = FormatTok;
3132 
3133   // We try to guess if it is a requires clause, or a requires expression. For
3134   // that we first consume the keyword and check the next token.
3135   nextToken();
3136 
3137   switch (FormatTok->Tok.getKind()) {
3138   case tok::l_brace:
3139     // This can only be an expression, never a clause.
3140     parseRequiresExpression(RequiresToken);
3141     return false;
3142   case tok::l_paren:
3143     // Clauses and expression can start with a paren, it's unclear what we have.
3144     break;
3145   default:
3146     // All other tokens can only be a clause.
3147     parseRequiresClause(RequiresToken);
3148     return true;
3149   }
3150 
3151   // Looking forward we would have to decide if there are function declaration
3152   // like arguments to the requires expression:
3153   // requires (T t) {
3154   // Or there is a constraint expression for the requires clause:
3155   // requires (C<T> && ...
3156 
3157   // But first let's look behind.
3158   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3159 
3160   if (!PreviousNonComment ||
3161       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3162     // If there is no token, or an expression left brace, we are a requires
3163     // clause within a requires expression.
3164     parseRequiresClause(RequiresToken);
3165     return true;
3166   }
3167 
3168   switch (PreviousNonComment->Tok.getKind()) {
3169   case tok::greater:
3170   case tok::r_paren:
3171   case tok::kw_noexcept:
3172   case tok::kw_const:
3173     // This is a requires clause.
3174     parseRequiresClause(RequiresToken);
3175     return true;
3176   case tok::amp:
3177   case tok::ampamp: {
3178     // This can be either:
3179     // if (... && requires (T t) ...)
3180     // Or
3181     // void member(...) && requires (C<T> ...
3182     // We check the one token before that for a const:
3183     // void member(...) const && requires (C<T> ...
3184     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3185     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3186       parseRequiresClause(RequiresToken);
3187       return true;
3188     }
3189     break;
3190   }
3191   default:
3192     if (PreviousNonComment->isTypeOrIdentifier()) {
3193       // This is a requires clause.
3194       parseRequiresClause(RequiresToken);
3195       return true;
3196     }
3197     // It's an expression.
3198     parseRequiresExpression(RequiresToken);
3199     return false;
3200   }
3201 
3202   // Now we look forward and try to check if the paren content is a parameter
3203   // list. The parameters can be cv-qualified and contain references or
3204   // pointers.
3205   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3206   // of stuff: typename, const, *, &, &&, ::, identifiers.
3207 
3208   int NextTokenOffset = 1;
3209   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3210   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3211     ++NextTokenOffset;
3212     NextToken = Tokens->peekNextToken(NextTokenOffset);
3213   };
3214 
3215   bool FoundType = false;
3216   bool LastWasColonColon = false;
3217   int OpenAngles = 0;
3218 
3219   for (; NextTokenOffset < 50; PeekNext()) {
3220     switch (NextToken->Tok.getKind()) {
3221     case tok::kw_volatile:
3222     case tok::kw_const:
3223     case tok::comma:
3224       parseRequiresExpression(RequiresToken);
3225       return false;
3226     case tok::r_paren:
3227     case tok::pipepipe:
3228       parseRequiresClause(RequiresToken);
3229       return true;
3230     case tok::eof:
3231       // Break out of the loop.
3232       NextTokenOffset = 50;
3233       break;
3234     case tok::coloncolon:
3235       LastWasColonColon = true;
3236       break;
3237     case tok::identifier:
3238       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3239         parseRequiresExpression(RequiresToken);
3240         return false;
3241       }
3242       FoundType = true;
3243       LastWasColonColon = false;
3244       break;
3245     case tok::less:
3246       ++OpenAngles;
3247       break;
3248     case tok::greater:
3249       --OpenAngles;
3250       break;
3251     default:
3252       if (NextToken->isSimpleTypeSpecifier()) {
3253         parseRequiresExpression(RequiresToken);
3254         return false;
3255       }
3256       break;
3257     }
3258   }
3259 
3260   // This seems to be a complicated expression, just assume it's a clause.
3261   parseRequiresClause(RequiresToken);
3262   return true;
3263 }
3264 
3265 /// \brief Parses a requires clause.
3266 /// \param RequiresToken The requires keyword token, which starts this clause.
3267 /// \pre We need to be on the next token after the requires keyword.
3268 /// \sa parseRequiresExpression
3269 ///
3270 /// Returns if it either has finished parsing the clause, or it detects, that
3271 /// the clause is incorrect.
3272 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3273   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3274   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3275 
3276   // If there is no previous token, we are within a requires expression,
3277   // otherwise we will always have the template or function declaration in front
3278   // of it.
3279   bool InRequiresExpression =
3280       !RequiresToken->Previous ||
3281       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3282 
3283   RequiresToken->setFinalizedType(InRequiresExpression
3284                                       ? TT_RequiresClauseInARequiresExpression
3285                                       : TT_RequiresClause);
3286 
3287   parseConstraintExpression();
3288 
3289   if (!InRequiresExpression)
3290     FormatTok->Previous->ClosesRequiresClause = true;
3291 }
3292 
3293 /// \brief Parses a requires expression.
3294 /// \param RequiresToken The requires keyword token, which starts this clause.
3295 /// \pre We need to be on the next token after the requires keyword.
3296 /// \sa parseRequiresClause
3297 ///
3298 /// Returns if it either has finished parsing the expression, or it detects,
3299 /// that the expression is incorrect.
3300 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3301   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3302   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3303 
3304   RequiresToken->setFinalizedType(TT_RequiresExpression);
3305 
3306   if (FormatTok->is(tok::l_paren)) {
3307     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3308     parseParens();
3309   }
3310 
3311   if (FormatTok->is(tok::l_brace)) {
3312     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3313     parseChildBlock(/*CanContainBracedList=*/false,
3314                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3315   }
3316 }
3317 
3318 /// \brief Parses a constraint expression.
3319 ///
3320 /// This is either the definition of a concept, or the body of a requires
3321 /// clause. It returns, when the parsing is complete, or the expression is
3322 /// incorrect.
3323 void UnwrappedLineParser::parseConstraintExpression() {
3324   // The special handling for lambdas is needed since tryToParseLambda() eats a
3325   // token and if a requires expression is the last part of a requires clause
3326   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3327   // not set on the correct token. Thus we need to be aware if we even expect a
3328   // lambda to be possible.
3329   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3330   bool LambdaNextTimeAllowed = true;
3331   do {
3332     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3333 
3334     switch (FormatTok->Tok.getKind()) {
3335     case tok::kw_requires: {
3336       auto RequiresToken = FormatTok;
3337       nextToken();
3338       parseRequiresExpression(RequiresToken);
3339       break;
3340     }
3341 
3342     case tok::l_paren:
3343       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3344       break;
3345 
3346     case tok::l_square:
3347       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3348         return;
3349       break;
3350 
3351     case tok::kw_const:
3352     case tok::semi:
3353     case tok::kw_class:
3354     case tok::kw_struct:
3355     case tok::kw_union:
3356       return;
3357 
3358     case tok::l_brace:
3359       // Potential function body.
3360       return;
3361 
3362     case tok::ampamp:
3363     case tok::pipepipe:
3364       FormatTok->setFinalizedType(TT_BinaryOperator);
3365       nextToken();
3366       LambdaNextTimeAllowed = true;
3367       break;
3368 
3369     case tok::comma:
3370     case tok::comment:
3371       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3372       nextToken();
3373       break;
3374 
3375     case tok::kw_sizeof:
3376     case tok::greater:
3377     case tok::greaterequal:
3378     case tok::greatergreater:
3379     case tok::less:
3380     case tok::lessequal:
3381     case tok::lessless:
3382     case tok::equalequal:
3383     case tok::exclaim:
3384     case tok::exclaimequal:
3385     case tok::plus:
3386     case tok::minus:
3387     case tok::star:
3388     case tok::slash:
3389     case tok::kw_decltype:
3390       LambdaNextTimeAllowed = true;
3391       // Just eat them.
3392       nextToken();
3393       break;
3394 
3395     case tok::numeric_constant:
3396     case tok::coloncolon:
3397     case tok::kw_true:
3398     case tok::kw_false:
3399       // Just eat them.
3400       nextToken();
3401       break;
3402 
3403     case tok::kw_static_cast:
3404     case tok::kw_const_cast:
3405     case tok::kw_reinterpret_cast:
3406     case tok::kw_dynamic_cast:
3407       nextToken();
3408       if (!FormatTok->is(tok::less))
3409         return;
3410 
3411       nextToken();
3412       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3413                       /*ClosingBraceKind=*/tok::greater);
3414       break;
3415 
3416     case tok::kw_bool:
3417       // bool is only allowed if it is directly followed by a paren for a cast:
3418       // concept C = bool(...);
3419       // and bool is the only type, all other types as cast must be inside a
3420       // cast to bool an thus are handled by the other cases.
3421       nextToken();
3422       if (FormatTok->isNot(tok::l_paren))
3423         return;
3424       parseParens();
3425       break;
3426 
3427     default:
3428       if (!FormatTok->Tok.getIdentifierInfo()) {
3429         // Identifiers are part of the default case, we check for more then
3430         // tok::identifier to handle builtin type traits.
3431         return;
3432       }
3433 
3434       // We need to differentiate identifiers for a template deduction guide,
3435       // variables, or function return types (the constraint expression has
3436       // ended before that), and basically all other cases. But it's easier to
3437       // check the other way around.
3438       assert(FormatTok->Previous);
3439       switch (FormatTok->Previous->Tok.getKind()) {
3440       case tok::coloncolon:  // Nested identifier.
3441       case tok::ampamp:      // Start of a function or variable for the
3442       case tok::pipepipe:    // constraint expression.
3443       case tok::kw_requires: // Initial identifier of a requires clause.
3444       case tok::equal:       // Initial identifier of a concept declaration.
3445         break;
3446       default:
3447         return;
3448       }
3449 
3450       // Read identifier with optional template declaration.
3451       nextToken();
3452       if (FormatTok->is(tok::less)) {
3453         nextToken();
3454         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3455                         /*ClosingBraceKind=*/tok::greater);
3456       }
3457       break;
3458     }
3459   } while (!eof());
3460 }
3461 
3462 bool UnwrappedLineParser::parseEnum() {
3463   const FormatToken &InitialToken = *FormatTok;
3464 
3465   // Won't be 'enum' for NS_ENUMs.
3466   if (FormatTok->is(tok::kw_enum))
3467     nextToken();
3468 
3469   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3470   // declarations. An "enum" keyword followed by a colon would be a syntax
3471   // error and thus assume it is just an identifier.
3472   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3473     return false;
3474 
3475   // In protobuf, "enum" can be used as a field name.
3476   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3477     return false;
3478 
3479   // Eat up enum class ...
3480   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3481     nextToken();
3482 
3483   while (FormatTok->Tok.getIdentifierInfo() ||
3484          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3485                             tok::greater, tok::comma, tok::question,
3486                             tok::l_square, tok::r_square)) {
3487     nextToken();
3488     // We can have macros or attributes in between 'enum' and the enum name.
3489     if (FormatTok->is(tok::l_paren))
3490       parseParens();
3491     if (FormatTok->is(TT_AttributeSquare)) {
3492       parseSquare();
3493       // Consume the closing TT_AttributeSquare.
3494       if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3495         nextToken();
3496     }
3497     if (FormatTok->is(tok::identifier)) {
3498       nextToken();
3499       // If there are two identifiers in a row, this is likely an elaborate
3500       // return type. In Java, this can be "implements", etc.
3501       if (Style.isCpp() && FormatTok->is(tok::identifier))
3502         return false;
3503     }
3504   }
3505 
3506   // Just a declaration or something is wrong.
3507   if (FormatTok->isNot(tok::l_brace))
3508     return true;
3509   FormatTok->setFinalizedType(TT_EnumLBrace);
3510   FormatTok->setBlockKind(BK_Block);
3511 
3512   if (Style.Language == FormatStyle::LK_Java) {
3513     // Java enums are different.
3514     parseJavaEnumBody();
3515     return true;
3516   }
3517   if (Style.Language == FormatStyle::LK_Proto) {
3518     parseBlock(/*MustBeDeclaration=*/true);
3519     return true;
3520   }
3521 
3522   if (!Style.AllowShortEnumsOnASingleLine &&
3523       ShouldBreakBeforeBrace(Style, InitialToken)) {
3524     addUnwrappedLine();
3525   }
3526   // Parse enum body.
3527   nextToken();
3528   if (!Style.AllowShortEnumsOnASingleLine) {
3529     addUnwrappedLine();
3530     Line->Level += 1;
3531   }
3532   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3533                                    /*IsEnum=*/true);
3534   if (!Style.AllowShortEnumsOnASingleLine)
3535     Line->Level -= 1;
3536   if (HasError) {
3537     if (FormatTok->is(tok::semi))
3538       nextToken();
3539     addUnwrappedLine();
3540   }
3541   return true;
3542 
3543   // There is no addUnwrappedLine() here so that we fall through to parsing a
3544   // structural element afterwards. Thus, in "enum A {} n, m;",
3545   // "} n, m;" will end up in one unwrapped line.
3546 }
3547 
3548 bool UnwrappedLineParser::parseStructLike() {
3549   // parseRecord falls through and does not yet add an unwrapped line as a
3550   // record declaration or definition can start a structural element.
3551   parseRecord();
3552   // This does not apply to Java, JavaScript and C#.
3553   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3554       Style.isCSharp()) {
3555     if (FormatTok->is(tok::semi))
3556       nextToken();
3557     addUnwrappedLine();
3558     return true;
3559   }
3560   return false;
3561 }
3562 
3563 namespace {
3564 // A class used to set and restore the Token position when peeking
3565 // ahead in the token source.
3566 class ScopedTokenPosition {
3567   unsigned StoredPosition;
3568   FormatTokenSource *Tokens;
3569 
3570 public:
3571   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3572     assert(Tokens && "Tokens expected to not be null");
3573     StoredPosition = Tokens->getPosition();
3574   }
3575 
3576   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3577 };
3578 } // namespace
3579 
3580 // Look to see if we have [[ by looking ahead, if
3581 // its not then rewind to the original position.
3582 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3583   ScopedTokenPosition AutoPosition(Tokens);
3584   FormatToken *Tok = Tokens->getNextToken();
3585   // We already read the first [ check for the second.
3586   if (!Tok->is(tok::l_square))
3587     return false;
3588   // Double check that the attribute is just something
3589   // fairly simple.
3590   while (Tok->isNot(tok::eof)) {
3591     if (Tok->is(tok::r_square))
3592       break;
3593     Tok = Tokens->getNextToken();
3594   }
3595   if (Tok->is(tok::eof))
3596     return false;
3597   Tok = Tokens->getNextToken();
3598   if (!Tok->is(tok::r_square))
3599     return false;
3600   Tok = Tokens->getNextToken();
3601   if (Tok->is(tok::semi))
3602     return false;
3603   return true;
3604 }
3605 
3606 void UnwrappedLineParser::parseJavaEnumBody() {
3607   assert(FormatTok->is(tok::l_brace));
3608   const FormatToken *OpeningBrace = FormatTok;
3609 
3610   // Determine whether the enum is simple, i.e. does not have a semicolon or
3611   // constants with class bodies. Simple enums can be formatted like braced
3612   // lists, contracted to a single line, etc.
3613   unsigned StoredPosition = Tokens->getPosition();
3614   bool IsSimple = true;
3615   FormatToken *Tok = Tokens->getNextToken();
3616   while (!Tok->is(tok::eof)) {
3617     if (Tok->is(tok::r_brace))
3618       break;
3619     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3620       IsSimple = false;
3621       break;
3622     }
3623     // FIXME: This will also mark enums with braces in the arguments to enum
3624     // constants as "not simple". This is probably fine in practice, though.
3625     Tok = Tokens->getNextToken();
3626   }
3627   FormatTok = Tokens->setPosition(StoredPosition);
3628 
3629   if (IsSimple) {
3630     nextToken();
3631     parseBracedList();
3632     addUnwrappedLine();
3633     return;
3634   }
3635 
3636   // Parse the body of a more complex enum.
3637   // First add a line for everything up to the "{".
3638   nextToken();
3639   addUnwrappedLine();
3640   ++Line->Level;
3641 
3642   // Parse the enum constants.
3643   while (FormatTok->isNot(tok::eof)) {
3644     if (FormatTok->is(tok::l_brace)) {
3645       // Parse the constant's class body.
3646       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3647                  /*MunchSemi=*/false);
3648     } else if (FormatTok->is(tok::l_paren)) {
3649       parseParens();
3650     } else if (FormatTok->is(tok::comma)) {
3651       nextToken();
3652       addUnwrappedLine();
3653     } else if (FormatTok->is(tok::semi)) {
3654       nextToken();
3655       addUnwrappedLine();
3656       break;
3657     } else if (FormatTok->is(tok::r_brace)) {
3658       addUnwrappedLine();
3659       break;
3660     } else {
3661       nextToken();
3662     }
3663   }
3664 
3665   // Parse the class body after the enum's ";" if any.
3666   parseLevel(OpeningBrace);
3667   nextToken();
3668   --Line->Level;
3669   addUnwrappedLine();
3670 }
3671 
3672 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3673   const FormatToken &InitialToken = *FormatTok;
3674   nextToken();
3675 
3676   // The actual identifier can be a nested name specifier, and in macros
3677   // it is often token-pasted.
3678   // An [[attribute]] can be before the identifier.
3679   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3680                             tok::kw___attribute, tok::kw___declspec,
3681                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3682          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3683           FormatTok->isOneOf(tok::period, tok::comma))) {
3684     if (Style.isJavaScript() &&
3685         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3686       // JavaScript/TypeScript supports inline object types in
3687       // extends/implements positions:
3688       //     class Foo implements {bar: number} { }
3689       nextToken();
3690       if (FormatTok->is(tok::l_brace)) {
3691         tryToParseBracedList();
3692         continue;
3693       }
3694     }
3695     bool IsNonMacroIdentifier =
3696         FormatTok->is(tok::identifier) &&
3697         FormatTok->TokenText != FormatTok->TokenText.upper();
3698     nextToken();
3699     // We can have macros or attributes in between 'class' and the class name.
3700     if (!IsNonMacroIdentifier) {
3701       if (FormatTok->is(tok::l_paren)) {
3702         parseParens();
3703       } else if (FormatTok->is(TT_AttributeSquare)) {
3704         parseSquare();
3705         // Consume the closing TT_AttributeSquare.
3706         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3707           nextToken();
3708       }
3709     }
3710   }
3711 
3712   // Note that parsing away template declarations here leads to incorrectly
3713   // accepting function declarations as record declarations.
3714   // In general, we cannot solve this problem. Consider:
3715   // class A<int> B() {}
3716   // which can be a function definition or a class definition when B() is a
3717   // macro. If we find enough real-world cases where this is a problem, we
3718   // can parse for the 'template' keyword in the beginning of the statement,
3719   // and thus rule out the record production in case there is no template
3720   // (this would still leave us with an ambiguity between template function
3721   // and class declarations).
3722   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3723     do {
3724       if (FormatTok->is(tok::l_brace)) {
3725         calculateBraceTypes(/*ExpectClassBody=*/true);
3726         if (!tryToParseBracedList())
3727           break;
3728       }
3729       if (FormatTok->is(tok::l_square)) {
3730         FormatToken *Previous = FormatTok->Previous;
3731         if (!Previous ||
3732             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3733           // Don't try parsing a lambda if we had a closing parenthesis before,
3734           // it was probably a pointer to an array: int (*)[].
3735           if (!tryToParseLambda())
3736             break;
3737         } else {
3738           parseSquare();
3739           continue;
3740         }
3741       }
3742       if (FormatTok->is(tok::semi))
3743         return;
3744       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3745         addUnwrappedLine();
3746         nextToken();
3747         parseCSharpGenericTypeConstraint();
3748         break;
3749       }
3750       nextToken();
3751     } while (!eof());
3752   }
3753 
3754   auto GetBraceType = [](const FormatToken &RecordTok) {
3755     switch (RecordTok.Tok.getKind()) {
3756     case tok::kw_class:
3757       return TT_ClassLBrace;
3758     case tok::kw_struct:
3759       return TT_StructLBrace;
3760     case tok::kw_union:
3761       return TT_UnionLBrace;
3762     default:
3763       // Useful for e.g. interface.
3764       return TT_RecordLBrace;
3765     }
3766   };
3767   if (FormatTok->is(tok::l_brace)) {
3768     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3769     if (ParseAsExpr) {
3770       parseChildBlock();
3771     } else {
3772       if (ShouldBreakBeforeBrace(Style, InitialToken))
3773         addUnwrappedLine();
3774 
3775       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3776       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3777     }
3778   }
3779   // There is no addUnwrappedLine() here so that we fall through to parsing a
3780   // structural element afterwards. Thus, in "class A {} n, m;",
3781   // "} n, m;" will end up in one unwrapped line.
3782 }
3783 
3784 void UnwrappedLineParser::parseObjCMethod() {
3785   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3786          "'(' or identifier expected.");
3787   do {
3788     if (FormatTok->is(tok::semi)) {
3789       nextToken();
3790       addUnwrappedLine();
3791       return;
3792     } else if (FormatTok->is(tok::l_brace)) {
3793       if (Style.BraceWrapping.AfterFunction)
3794         addUnwrappedLine();
3795       parseBlock();
3796       addUnwrappedLine();
3797       return;
3798     } else {
3799       nextToken();
3800     }
3801   } while (!eof());
3802 }
3803 
3804 void UnwrappedLineParser::parseObjCProtocolList() {
3805   assert(FormatTok->is(tok::less) && "'<' expected.");
3806   do {
3807     nextToken();
3808     // Early exit in case someone forgot a close angle.
3809     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3810         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3811       return;
3812     }
3813   } while (!eof() && FormatTok->isNot(tok::greater));
3814   nextToken(); // Skip '>'.
3815 }
3816 
3817 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3818   do {
3819     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3820       nextToken();
3821       addUnwrappedLine();
3822       break;
3823     }
3824     if (FormatTok->is(tok::l_brace)) {
3825       parseBlock();
3826       // In ObjC interfaces, nothing should be following the "}".
3827       addUnwrappedLine();
3828     } else if (FormatTok->is(tok::r_brace)) {
3829       // Ignore stray "}". parseStructuralElement doesn't consume them.
3830       nextToken();
3831       addUnwrappedLine();
3832     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3833       nextToken();
3834       parseObjCMethod();
3835     } else {
3836       parseStructuralElement();
3837     }
3838   } while (!eof());
3839 }
3840 
3841 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3842   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3843          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3844   nextToken();
3845   nextToken(); // interface name
3846 
3847   // @interface can be followed by a lightweight generic
3848   // specialization list, then either a base class or a category.
3849   if (FormatTok->is(tok::less))
3850     parseObjCLightweightGenerics();
3851   if (FormatTok->is(tok::colon)) {
3852     nextToken();
3853     nextToken(); // base class name
3854     // The base class can also have lightweight generics applied to it.
3855     if (FormatTok->is(tok::less))
3856       parseObjCLightweightGenerics();
3857   } else if (FormatTok->is(tok::l_paren)) {
3858     // Skip category, if present.
3859     parseParens();
3860   }
3861 
3862   if (FormatTok->is(tok::less))
3863     parseObjCProtocolList();
3864 
3865   if (FormatTok->is(tok::l_brace)) {
3866     if (Style.BraceWrapping.AfterObjCDeclaration)
3867       addUnwrappedLine();
3868     parseBlock(/*MustBeDeclaration=*/true);
3869   }
3870 
3871   // With instance variables, this puts '}' on its own line.  Without instance
3872   // variables, this ends the @interface line.
3873   addUnwrappedLine();
3874 
3875   parseObjCUntilAtEnd();
3876 }
3877 
3878 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3879   assert(FormatTok->is(tok::less));
3880   // Unlike protocol lists, generic parameterizations support
3881   // nested angles:
3882   //
3883   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3884   //     NSObject <NSCopying, NSSecureCoding>
3885   //
3886   // so we need to count how many open angles we have left.
3887   unsigned NumOpenAngles = 1;
3888   do {
3889     nextToken();
3890     // Early exit in case someone forgot a close angle.
3891     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3892         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3893       break;
3894     }
3895     if (FormatTok->is(tok::less)) {
3896       ++NumOpenAngles;
3897     } else if (FormatTok->is(tok::greater)) {
3898       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3899       --NumOpenAngles;
3900     }
3901   } while (!eof() && NumOpenAngles != 0);
3902   nextToken(); // Skip '>'.
3903 }
3904 
3905 // Returns true for the declaration/definition form of @protocol,
3906 // false for the expression form.
3907 bool UnwrappedLineParser::parseObjCProtocol() {
3908   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3909   nextToken();
3910 
3911   if (FormatTok->is(tok::l_paren)) {
3912     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3913     return false;
3914   }
3915 
3916   // The definition/declaration form,
3917   // @protocol Foo
3918   // - (int)someMethod;
3919   // @end
3920 
3921   nextToken(); // protocol name
3922 
3923   if (FormatTok->is(tok::less))
3924     parseObjCProtocolList();
3925 
3926   // Check for protocol declaration.
3927   if (FormatTok->is(tok::semi)) {
3928     nextToken();
3929     addUnwrappedLine();
3930     return true;
3931   }
3932 
3933   addUnwrappedLine();
3934   parseObjCUntilAtEnd();
3935   return true;
3936 }
3937 
3938 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3939   bool IsImport = FormatTok->is(Keywords.kw_import);
3940   assert(IsImport || FormatTok->is(tok::kw_export));
3941   nextToken();
3942 
3943   // Consume the "default" in "export default class/function".
3944   if (FormatTok->is(tok::kw_default))
3945     nextToken();
3946 
3947   // Consume "async function", "function" and "default function", so that these
3948   // get parsed as free-standing JS functions, i.e. do not require a trailing
3949   // semicolon.
3950   if (FormatTok->is(Keywords.kw_async))
3951     nextToken();
3952   if (FormatTok->is(Keywords.kw_function)) {
3953     nextToken();
3954     return;
3955   }
3956 
3957   // For imports, `export *`, `export {...}`, consume the rest of the line up
3958   // to the terminating `;`. For everything else, just return and continue
3959   // parsing the structural element, i.e. the declaration or expression for
3960   // `export default`.
3961   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3962       !FormatTok->isStringLiteral()) {
3963     return;
3964   }
3965 
3966   while (!eof()) {
3967     if (FormatTok->is(tok::semi))
3968       return;
3969     if (Line->Tokens.empty()) {
3970       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3971       // import statement should terminate.
3972       return;
3973     }
3974     if (FormatTok->is(tok::l_brace)) {
3975       FormatTok->setBlockKind(BK_Block);
3976       nextToken();
3977       parseBracedList();
3978     } else {
3979       nextToken();
3980     }
3981   }
3982 }
3983 
3984 void UnwrappedLineParser::parseStatementMacro() {
3985   nextToken();
3986   if (FormatTok->is(tok::l_paren))
3987     parseParens();
3988   if (FormatTok->is(tok::semi))
3989     nextToken();
3990   addUnwrappedLine();
3991 }
3992 
3993 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3994                                                  StringRef Prefix = "") {
3995   llvm::dbgs() << Prefix << "Line(" << Line.Level
3996                << ", FSC=" << Line.FirstStartColumn << ")"
3997                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3998   for (const auto &Node : Line.Tokens) {
3999     llvm::dbgs() << Node.Tok->Tok.getName() << "["
4000                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
4001                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
4002   }
4003   for (const auto &Node : Line.Tokens)
4004     for (const auto &ChildNode : Node.Children)
4005       printDebugInfo(ChildNode, "\nChild: ");
4006 
4007   llvm::dbgs() << "\n";
4008 }
4009 
4010 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4011   if (Line->Tokens.empty())
4012     return;
4013   LLVM_DEBUG({
4014     if (CurrentLines == &Lines)
4015       printDebugInfo(*Line);
4016   });
4017 
4018   // If this line closes a block when in Whitesmiths mode, remember that
4019   // information so that the level can be decreased after the line is added.
4020   // This has to happen after the addition of the line since the line itself
4021   // needs to be indented.
4022   bool ClosesWhitesmithsBlock =
4023       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4024       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4025 
4026   CurrentLines->push_back(std::move(*Line));
4027   Line->Tokens.clear();
4028   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4029   Line->FirstStartColumn = 0;
4030 
4031   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4032     --Line->Level;
4033   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
4034     CurrentLines->append(
4035         std::make_move_iterator(PreprocessorDirectives.begin()),
4036         std::make_move_iterator(PreprocessorDirectives.end()));
4037     PreprocessorDirectives.clear();
4038   }
4039   // Disconnect the current token from the last token on the previous line.
4040   FormatTok->Previous = nullptr;
4041 }
4042 
4043 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4044 
4045 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4046   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4047          FormatTok.NewlinesBefore > 0;
4048 }
4049 
4050 // Checks if \p FormatTok is a line comment that continues the line comment
4051 // section on \p Line.
4052 static bool
4053 continuesLineCommentSection(const FormatToken &FormatTok,
4054                             const UnwrappedLine &Line,
4055                             const llvm::Regex &CommentPragmasRegex) {
4056   if (Line.Tokens.empty())
4057     return false;
4058 
4059   StringRef IndentContent = FormatTok.TokenText;
4060   if (FormatTok.TokenText.startswith("//") ||
4061       FormatTok.TokenText.startswith("/*")) {
4062     IndentContent = FormatTok.TokenText.substr(2);
4063   }
4064   if (CommentPragmasRegex.match(IndentContent))
4065     return false;
4066 
4067   // If Line starts with a line comment, then FormatTok continues the comment
4068   // section if its original column is greater or equal to the original start
4069   // column of the line.
4070   //
4071   // Define the min column token of a line as follows: if a line ends in '{' or
4072   // contains a '{' followed by a line comment, then the min column token is
4073   // that '{'. Otherwise, the min column token of the line is the first token of
4074   // the line.
4075   //
4076   // If Line starts with a token other than a line comment, then FormatTok
4077   // continues the comment section if its original column is greater than the
4078   // original start column of the min column token of the line.
4079   //
4080   // For example, the second line comment continues the first in these cases:
4081   //
4082   // // first line
4083   // // second line
4084   //
4085   // and:
4086   //
4087   // // first line
4088   //  // second line
4089   //
4090   // and:
4091   //
4092   // int i; // first line
4093   //  // second line
4094   //
4095   // and:
4096   //
4097   // do { // first line
4098   //      // second line
4099   //   int i;
4100   // } while (true);
4101   //
4102   // and:
4103   //
4104   // enum {
4105   //   a, // first line
4106   //    // second line
4107   //   b
4108   // };
4109   //
4110   // The second line comment doesn't continue the first in these cases:
4111   //
4112   //   // first line
4113   //  // second line
4114   //
4115   // and:
4116   //
4117   // int i; // first line
4118   // // second line
4119   //
4120   // and:
4121   //
4122   // do { // first line
4123   //   // second line
4124   //   int i;
4125   // } while (true);
4126   //
4127   // and:
4128   //
4129   // enum {
4130   //   a, // first line
4131   //   // second line
4132   // };
4133   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4134 
4135   // Scan for '{//'. If found, use the column of '{' as a min column for line
4136   // comment section continuation.
4137   const FormatToken *PreviousToken = nullptr;
4138   for (const UnwrappedLineNode &Node : Line.Tokens) {
4139     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4140         isLineComment(*Node.Tok)) {
4141       MinColumnToken = PreviousToken;
4142       break;
4143     }
4144     PreviousToken = Node.Tok;
4145 
4146     // Grab the last newline preceding a token in this unwrapped line.
4147     if (Node.Tok->NewlinesBefore > 0)
4148       MinColumnToken = Node.Tok;
4149   }
4150   if (PreviousToken && PreviousToken->is(tok::l_brace))
4151     MinColumnToken = PreviousToken;
4152 
4153   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4154                               MinColumnToken);
4155 }
4156 
4157 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4158   bool JustComments = Line->Tokens.empty();
4159   for (FormatToken *Tok : CommentsBeforeNextToken) {
4160     // Line comments that belong to the same line comment section are put on the
4161     // same line since later we might want to reflow content between them.
4162     // Additional fine-grained breaking of line comment sections is controlled
4163     // by the class BreakableLineCommentSection in case it is desirable to keep
4164     // several line comment sections in the same unwrapped line.
4165     //
4166     // FIXME: Consider putting separate line comment sections as children to the
4167     // unwrapped line instead.
4168     Tok->ContinuesLineCommentSection =
4169         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4170     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4171       addUnwrappedLine();
4172     pushToken(Tok);
4173   }
4174   if (NewlineBeforeNext && JustComments)
4175     addUnwrappedLine();
4176   CommentsBeforeNextToken.clear();
4177 }
4178 
4179 void UnwrappedLineParser::nextToken(int LevelDifference) {
4180   if (eof())
4181     return;
4182   flushComments(isOnNewLine(*FormatTok));
4183   pushToken(FormatTok);
4184   FormatToken *Previous = FormatTok;
4185   if (!Style.isJavaScript())
4186     readToken(LevelDifference);
4187   else
4188     readTokenWithJavaScriptASI();
4189   FormatTok->Previous = Previous;
4190   if (Style.isVerilog()) {
4191     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4192     // keywords like `begin`, we can't treat them the same as left braces
4193     // because some contexts require one of them.  For example structs use
4194     // braces and if blocks use keywords, and a left brace can occur in an if
4195     // statement, but it is not a block.  For keywords like `end`, we simply
4196     // treat them the same as right braces.
4197     if (Keywords.isVerilogEnd(*FormatTok))
4198       FormatTok->Tok.setKind(tok::r_brace);
4199   }
4200 }
4201 
4202 void UnwrappedLineParser::distributeComments(
4203     const SmallVectorImpl<FormatToken *> &Comments,
4204     const FormatToken *NextTok) {
4205   // Whether or not a line comment token continues a line is controlled by
4206   // the method continuesLineCommentSection, with the following caveat:
4207   //
4208   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4209   // that each comment line from the trail is aligned with the next token, if
4210   // the next token exists. If a trail exists, the beginning of the maximal
4211   // trail is marked as a start of a new comment section.
4212   //
4213   // For example in this code:
4214   //
4215   // int a; // line about a
4216   //   // line 1 about b
4217   //   // line 2 about b
4218   //   int b;
4219   //
4220   // the two lines about b form a maximal trail, so there are two sections, the
4221   // first one consisting of the single comment "// line about a" and the
4222   // second one consisting of the next two comments.
4223   if (Comments.empty())
4224     return;
4225   bool ShouldPushCommentsInCurrentLine = true;
4226   bool HasTrailAlignedWithNextToken = false;
4227   unsigned StartOfTrailAlignedWithNextToken = 0;
4228   if (NextTok) {
4229     // We are skipping the first element intentionally.
4230     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4231       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4232         HasTrailAlignedWithNextToken = true;
4233         StartOfTrailAlignedWithNextToken = i;
4234       }
4235     }
4236   }
4237   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4238     FormatToken *FormatTok = Comments[i];
4239     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4240       FormatTok->ContinuesLineCommentSection = false;
4241     } else {
4242       FormatTok->ContinuesLineCommentSection =
4243           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4244     }
4245     if (!FormatTok->ContinuesLineCommentSection &&
4246         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4247       ShouldPushCommentsInCurrentLine = false;
4248     }
4249     if (ShouldPushCommentsInCurrentLine)
4250       pushToken(FormatTok);
4251     else
4252       CommentsBeforeNextToken.push_back(FormatTok);
4253   }
4254 }
4255 
4256 void UnwrappedLineParser::readToken(int LevelDifference) {
4257   SmallVector<FormatToken *, 1> Comments;
4258   bool PreviousWasComment = false;
4259   bool FirstNonCommentOnLine = false;
4260   do {
4261     FormatTok = Tokens->getNextToken();
4262     assert(FormatTok);
4263     while (FormatTok->getType() == TT_ConflictStart ||
4264            FormatTok->getType() == TT_ConflictEnd ||
4265            FormatTok->getType() == TT_ConflictAlternative) {
4266       if (FormatTok->getType() == TT_ConflictStart)
4267         conditionalCompilationStart(/*Unreachable=*/false);
4268       else if (FormatTok->getType() == TT_ConflictAlternative)
4269         conditionalCompilationAlternative();
4270       else if (FormatTok->getType() == TT_ConflictEnd)
4271         conditionalCompilationEnd();
4272       FormatTok = Tokens->getNextToken();
4273       FormatTok->MustBreakBefore = true;
4274     }
4275 
4276     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4277                                       const FormatToken &Tok,
4278                                       bool PreviousWasComment) {
4279       auto IsFirstOnLine = [](const FormatToken &Tok) {
4280         return Tok.HasUnescapedNewline || Tok.IsFirst;
4281       };
4282 
4283       // Consider preprocessor directives preceded by block comments as first
4284       // on line.
4285       if (PreviousWasComment)
4286         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4287       return IsFirstOnLine(Tok);
4288     };
4289 
4290     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4291         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4292     PreviousWasComment = FormatTok->is(tok::comment);
4293 
4294     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4295            (!Style.isVerilog() ||
4296             Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4297            FirstNonCommentOnLine) {
4298       distributeComments(Comments, FormatTok);
4299       Comments.clear();
4300       // If there is an unfinished unwrapped line, we flush the preprocessor
4301       // directives only after that unwrapped line was finished later.
4302       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4303       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4304       assert((LevelDifference >= 0 ||
4305               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4306              "LevelDifference makes Line->Level negative");
4307       Line->Level += LevelDifference;
4308       // Comments stored before the preprocessor directive need to be output
4309       // before the preprocessor directive, at the same level as the
4310       // preprocessor directive, as we consider them to apply to the directive.
4311       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4312           PPBranchLevel > 0) {
4313         Line->Level += PPBranchLevel;
4314       }
4315       flushComments(isOnNewLine(*FormatTok));
4316       parsePPDirective();
4317       PreviousWasComment = FormatTok->is(tok::comment);
4318       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4319           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4320     }
4321 
4322     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4323         !Line->InPPDirective) {
4324       continue;
4325     }
4326 
4327     if (!FormatTok->is(tok::comment)) {
4328       distributeComments(Comments, FormatTok);
4329       Comments.clear();
4330       return;
4331     }
4332 
4333     Comments.push_back(FormatTok);
4334   } while (!eof());
4335 
4336   distributeComments(Comments, nullptr);
4337   Comments.clear();
4338 }
4339 
4340 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4341   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4342   if (MustBreakBeforeNextToken) {
4343     Line->Tokens.back().Tok->MustBreakBefore = true;
4344     MustBreakBeforeNextToken = false;
4345   }
4346 }
4347 
4348 } // end namespace format
4349 } // end namespace clang
4350