1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 #include <utility>
24 
25 #define DEBUG_TYPE "format-parser"
26 
27 namespace clang {
28 namespace format {
29 
30 class FormatTokenSource {
31 public:
32   virtual ~FormatTokenSource() {}
33 
34   // Returns the next token in the token stream.
35   virtual FormatToken *getNextToken() = 0;
36 
37   // Returns the token preceding the token returned by the last call to
38   // getNextToken() in the token stream, or nullptr if no such token exists.
39   virtual FormatToken *getPreviousToken() = 0;
40 
41   // Returns the token that would be returned by the next call to
42   // getNextToken().
43   virtual FormatToken *peekNextToken() = 0;
44 
45   // Returns the token that would be returned after the next N calls to
46   // getNextToken(). N needs to be greater than zero, and small enough that
47   // there are still tokens. Check for tok::eof with N-1 before calling it with
48   // N.
49   virtual FormatToken *peekNextToken(int N) = 0;
50 
51   // Returns whether we are at the end of the file.
52   // This can be different from whether getNextToken() returned an eof token
53   // when the FormatTokenSource is a view on a part of the token stream.
54   virtual bool isEOF() = 0;
55 
56   // Gets the current position in the token stream, to be used by setPosition().
57   virtual unsigned getPosition() = 0;
58 
59   // Resets the token stream to the state it was in when getPosition() returned
60   // Position, and return the token at that position in the stream.
61   virtual FormatToken *setPosition(unsigned Position) = 0;
62 };
63 
64 namespace {
65 
66 class ScopedDeclarationState {
67 public:
68   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
69                          bool MustBeDeclaration)
70       : Line(Line), Stack(Stack) {
71     Line.MustBeDeclaration = MustBeDeclaration;
72     Stack.push_back(MustBeDeclaration);
73   }
74   ~ScopedDeclarationState() {
75     Stack.pop_back();
76     if (!Stack.empty())
77       Line.MustBeDeclaration = Stack.back();
78     else
79       Line.MustBeDeclaration = true;
80   }
81 
82 private:
83   UnwrappedLine &Line;
84   llvm::BitVector &Stack;
85 };
86 
87 static bool isLineComment(const FormatToken &FormatTok) {
88   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
89 }
90 
91 // Checks if \p FormatTok is a line comment that continues the line comment
92 // \p Previous. The original column of \p MinColumnToken is used to determine
93 // whether \p FormatTok is indented enough to the right to continue \p Previous.
94 static bool continuesLineComment(const FormatToken &FormatTok,
95                                  const FormatToken *Previous,
96                                  const FormatToken *MinColumnToken) {
97   if (!Previous || !MinColumnToken)
98     return false;
99   unsigned MinContinueColumn =
100       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
101   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
102          isLineComment(*Previous) &&
103          FormatTok.OriginalColumn >= MinContinueColumn;
104 }
105 
106 class ScopedMacroState : public FormatTokenSource {
107 public:
108   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
109                    FormatToken *&ResetToken)
110       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
111         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
112         Token(nullptr), PreviousToken(nullptr) {
113     FakeEOF.Tok.startToken();
114     FakeEOF.Tok.setKind(tok::eof);
115     TokenSource = this;
116     Line.Level = 0;
117     Line.InPPDirective = true;
118   }
119 
120   ~ScopedMacroState() override {
121     TokenSource = PreviousTokenSource;
122     ResetToken = Token;
123     Line.InPPDirective = false;
124     Line.Level = PreviousLineLevel;
125   }
126 
127   FormatToken *getNextToken() override {
128     // The \c UnwrappedLineParser guards against this by never calling
129     // \c getNextToken() after it has encountered the first eof token.
130     assert(!eof());
131     PreviousToken = Token;
132     Token = PreviousTokenSource->getNextToken();
133     if (eof())
134       return &FakeEOF;
135     return Token;
136   }
137 
138   FormatToken *getPreviousToken() override {
139     return PreviousTokenSource->getPreviousToken();
140   }
141 
142   FormatToken *peekNextToken() override {
143     if (eof())
144       return &FakeEOF;
145     return PreviousTokenSource->peekNextToken();
146   }
147 
148   FormatToken *peekNextToken(int N) override {
149     assert(N > 0);
150     if (eof())
151       return &FakeEOF;
152     return PreviousTokenSource->peekNextToken(N);
153   }
154 
155   bool isEOF() override { return PreviousTokenSource->isEOF(); }
156 
157   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
158 
159   FormatToken *setPosition(unsigned Position) override {
160     PreviousToken = nullptr;
161     Token = PreviousTokenSource->setPosition(Position);
162     return Token;
163   }
164 
165 private:
166   bool eof() {
167     return Token && Token->HasUnescapedNewline &&
168            !continuesLineComment(*Token, PreviousToken,
169                                  /*MinColumnToken=*/PreviousToken);
170   }
171 
172   FormatToken FakeEOF;
173   UnwrappedLine &Line;
174   FormatTokenSource *&TokenSource;
175   FormatToken *&ResetToken;
176   unsigned PreviousLineLevel;
177   FormatTokenSource *PreviousTokenSource;
178 
179   FormatToken *Token;
180   FormatToken *PreviousToken;
181 };
182 
183 } // end anonymous namespace
184 
185 class ScopedLineState {
186 public:
187   ScopedLineState(UnwrappedLineParser &Parser,
188                   bool SwitchToPreprocessorLines = false)
189       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
190     if (SwitchToPreprocessorLines)
191       Parser.CurrentLines = &Parser.PreprocessorDirectives;
192     else if (!Parser.Line->Tokens.empty())
193       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
194     PreBlockLine = std::move(Parser.Line);
195     Parser.Line = std::make_unique<UnwrappedLine>();
196     Parser.Line->Level = PreBlockLine->Level;
197     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
198   }
199 
200   ~ScopedLineState() {
201     if (!Parser.Line->Tokens.empty())
202       Parser.addUnwrappedLine();
203     assert(Parser.Line->Tokens.empty());
204     Parser.Line = std::move(PreBlockLine);
205     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
206       Parser.MustBreakBeforeNextToken = true;
207     Parser.CurrentLines = OriginalLines;
208   }
209 
210 private:
211   UnwrappedLineParser &Parser;
212 
213   std::unique_ptr<UnwrappedLine> PreBlockLine;
214   SmallVectorImpl<UnwrappedLine> *OriginalLines;
215 };
216 
217 class CompoundStatementIndenter {
218 public:
219   CompoundStatementIndenter(UnwrappedLineParser *Parser,
220                             const FormatStyle &Style, unsigned &LineLevel)
221       : CompoundStatementIndenter(Parser, LineLevel,
222                                   Style.BraceWrapping.AfterControlStatement,
223                                   Style.BraceWrapping.IndentBraces) {}
224   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
225                             bool WrapBrace, bool IndentBrace)
226       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
227     if (WrapBrace)
228       Parser->addUnwrappedLine();
229     if (IndentBrace)
230       ++LineLevel;
231   }
232   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
233 
234 private:
235   unsigned &LineLevel;
236   unsigned OldLineLevel;
237 };
238 
239 namespace {
240 
241 class IndexedTokenSource : public FormatTokenSource {
242 public:
243   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
244       : Tokens(Tokens), Position(-1) {}
245 
246   FormatToken *getNextToken() override {
247     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
248       LLVM_DEBUG({
249         llvm::dbgs() << "Next ";
250         dbgToken(Position);
251       });
252       return Tokens[Position];
253     }
254     ++Position;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Next ";
257       dbgToken(Position);
258     });
259     return Tokens[Position];
260   }
261 
262   FormatToken *getPreviousToken() override {
263     return Position > 0 ? Tokens[Position - 1] : nullptr;
264   }
265 
266   FormatToken *peekNextToken() override {
267     int Next = Position + 1;
268     LLVM_DEBUG({
269       llvm::dbgs() << "Peeking ";
270       dbgToken(Next);
271     });
272     return Tokens[Next];
273   }
274 
275   FormatToken *peekNextToken(int N) override {
276     assert(N > 0);
277     int Next = Position + N;
278     LLVM_DEBUG({
279       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
280       dbgToken(Next);
281     });
282     return Tokens[Next];
283   }
284 
285   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
286 
287   unsigned getPosition() override {
288     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
289     assert(Position >= 0);
290     return Position;
291   }
292 
293   FormatToken *setPosition(unsigned P) override {
294     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
295     Position = P;
296     return Tokens[Position];
297   }
298 
299   void reset() { Position = -1; }
300 
301 private:
302   void dbgToken(int Position, llvm::StringRef Indent = "") {
303     FormatToken *Tok = Tokens[Position];
304     llvm::dbgs() << Indent << "[" << Position
305                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
306                  << ", Macro: " << !!Tok->MacroCtx << "\n";
307   }
308 
309   ArrayRef<FormatToken *> Tokens;
310   int Position;
311 };
312 
313 } // end anonymous namespace
314 
315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
316                                          const AdditionalKeywords &Keywords,
317                                          unsigned FirstStartColumn,
318                                          ArrayRef<FormatToken *> Tokens,
319                                          UnwrappedLineConsumer &Callback)
320     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
321       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
322       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
323       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
324       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
325                        ? IG_Rejected
326                        : IG_Inited),
327       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
328 
329 void UnwrappedLineParser::reset() {
330   PPBranchLevel = -1;
331   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
332                      ? IG_Rejected
333                      : IG_Inited;
334   IncludeGuardToken = nullptr;
335   Line.reset(new UnwrappedLine);
336   CommentsBeforeNextToken.clear();
337   FormatTok = nullptr;
338   MustBreakBeforeNextToken = false;
339   PreprocessorDirectives.clear();
340   CurrentLines = &Lines;
341   DeclarationScopeStack.clear();
342   NestedTooDeep.clear();
343   PPStack.clear();
344   Line->FirstStartColumn = FirstStartColumn;
345 }
346 
347 void UnwrappedLineParser::parse() {
348   IndexedTokenSource TokenSource(AllTokens);
349   Line->FirstStartColumn = FirstStartColumn;
350   do {
351     LLVM_DEBUG(llvm::dbgs() << "----\n");
352     reset();
353     Tokens = &TokenSource;
354     TokenSource.reset();
355 
356     readToken();
357     parseFile();
358 
359     // If we found an include guard then all preprocessor directives (other than
360     // the guard) are over-indented by one.
361     if (IncludeGuard == IG_Found) {
362       for (auto &Line : Lines)
363         if (Line.InPPDirective && Line.Level > 0)
364           --Line.Level;
365     }
366 
367     // Create line with eof token.
368     pushToken(FormatTok);
369     addUnwrappedLine();
370 
371     for (const UnwrappedLine &Line : Lines)
372       Callback.consumeUnwrappedLine(Line);
373 
374     Callback.finishRun();
375     Lines.clear();
376     while (!PPLevelBranchIndex.empty() &&
377            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
378       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
379       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
380     }
381     if (!PPLevelBranchIndex.empty()) {
382       ++PPLevelBranchIndex.back();
383       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
384       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
385     }
386   } while (!PPLevelBranchIndex.empty());
387 }
388 
389 void UnwrappedLineParser::parseFile() {
390   // The top-level context in a file always has declarations, except for pre-
391   // processor directives and JavaScript files.
392   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
393   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
394                                           MustBeDeclaration);
395   if (Style.Language == FormatStyle::LK_TextProto)
396     parseBracedList();
397   else
398     parseLevel();
399   // Make sure to format the remaining tokens.
400   //
401   // LK_TextProto is special since its top-level is parsed as the body of a
402   // braced list, which does not necessarily have natural line separators such
403   // as a semicolon. Comments after the last entry that have been determined to
404   // not belong to that line, as in:
405   //   key: value
406   //   // endfile comment
407   // do not have a chance to be put on a line of their own until this point.
408   // Here we add this newline before end-of-file comments.
409   if (Style.Language == FormatStyle::LK_TextProto &&
410       !CommentsBeforeNextToken.empty()) {
411     addUnwrappedLine();
412   }
413   flushComments(true);
414   addUnwrappedLine();
415 }
416 
417 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
418   do {
419     switch (FormatTok->Tok.getKind()) {
420     case tok::l_brace:
421       return;
422     default:
423       if (FormatTok->is(Keywords.kw_where)) {
424         addUnwrappedLine();
425         nextToken();
426         parseCSharpGenericTypeConstraint();
427         break;
428       }
429       nextToken();
430       break;
431     }
432   } while (!eof());
433 }
434 
435 void UnwrappedLineParser::parseCSharpAttribute() {
436   int UnpairedSquareBrackets = 1;
437   do {
438     switch (FormatTok->Tok.getKind()) {
439     case tok::r_square:
440       nextToken();
441       --UnpairedSquareBrackets;
442       if (UnpairedSquareBrackets == 0) {
443         addUnwrappedLine();
444         return;
445       }
446       break;
447     case tok::l_square:
448       ++UnpairedSquareBrackets;
449       nextToken();
450       break;
451     default:
452       nextToken();
453       break;
454     }
455   } while (!eof());
456 }
457 
458 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
459   if (!Lines.empty() && Lines.back().InPPDirective)
460     return true;
461 
462   const FormatToken *Previous = Tokens->getPreviousToken();
463   return Previous && Previous->is(tok::comment) &&
464          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
465 }
466 
467 /// \brief Parses a level, that is ???.
468 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
469 /// \param CanContainBracedList If the content can contain (at any level) a
470 /// braced list.
471 /// \param NextLBracesType The type for left brace found in this level.
472 /// \param IfKind The \p if statement kind in the level.
473 /// \param IfLeftBrace The left brace of the \p if block in the level.
474 /// \returns true if a simple block of if/else/for/while, or false otherwise.
475 /// (A simple block has a single statement.)
476 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
477                                      bool CanContainBracedList,
478                                      TokenType NextLBracesType,
479                                      IfStmtKind *IfKind,
480                                      FormatToken **IfLeftBrace) {
481   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
482                                   ? TT_BracedListLBrace
483                                   : TT_Unknown;
484   const bool IsPrecededByCommentOrPPDirective =
485       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
486   FormatToken *IfLBrace = nullptr;
487   bool HasDoWhile = false;
488   bool HasLabel = false;
489   unsigned StatementCount = 0;
490   bool SwitchLabelEncountered = false;
491 
492   do {
493     if (FormatTok->getType() == TT_AttributeMacro) {
494       nextToken();
495       continue;
496     }
497     tok::TokenKind kind = FormatTok->Tok.getKind();
498     if (FormatTok->getType() == TT_MacroBlockBegin)
499       kind = tok::l_brace;
500     else if (FormatTok->getType() == TT_MacroBlockEnd)
501       kind = tok::r_brace;
502 
503     auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
504                          &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
505       parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
506                              &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
507                              HasLabel ? nullptr : &HasLabel);
508       ++StatementCount;
509       assert(StatementCount > 0 && "StatementCount overflow!");
510     };
511 
512     switch (kind) {
513     case tok::comment:
514       nextToken();
515       addUnwrappedLine();
516       break;
517     case tok::l_brace:
518       if (NextLBracesType != TT_Unknown) {
519         FormatTok->setFinalizedType(NextLBracesType);
520       } else if (FormatTok->Previous &&
521                  FormatTok->Previous->ClosesRequiresClause) {
522         // We need the 'default' case here to correctly parse a function
523         // l_brace.
524         ParseDefault();
525         continue;
526       }
527       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
528           tryToParseBracedList()) {
529         continue;
530       }
531       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
532                  /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
533                  /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
534                  NextLBracesType);
535       ++StatementCount;
536       assert(StatementCount > 0 && "StatementCount overflow!");
537       addUnwrappedLine();
538       break;
539     case tok::r_brace:
540       if (OpeningBrace) {
541         if (!Style.RemoveBracesLLVM ||
542             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
543           return false;
544         }
545         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
546             HasDoWhile || IsPrecededByCommentOrPPDirective ||
547             precededByCommentOrPPDirective()) {
548           return false;
549         }
550         const FormatToken *Next = Tokens->peekNextToken();
551         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
552           return false;
553         if (IfLeftBrace)
554           *IfLeftBrace = IfLBrace;
555         return true;
556       }
557       nextToken();
558       addUnwrappedLine();
559       break;
560     case tok::kw_default: {
561       unsigned StoredPosition = Tokens->getPosition();
562       FormatToken *Next;
563       do {
564         Next = Tokens->getNextToken();
565         assert(Next);
566       } while (Next->is(tok::comment));
567       FormatTok = Tokens->setPosition(StoredPosition);
568       if (Next->isNot(tok::colon)) {
569         // default not followed by ':' is not a case label; treat it like
570         // an identifier.
571         parseStructuralElement();
572         break;
573       }
574       // Else, if it is 'default:', fall through to the case handling.
575       LLVM_FALLTHROUGH;
576     }
577     case tok::kw_case:
578       if (Style.isJavaScript() && Line->MustBeDeclaration) {
579         // A 'case: string' style field declaration.
580         parseStructuralElement();
581         break;
582       }
583       if (!SwitchLabelEncountered &&
584           (Style.IndentCaseLabels ||
585            (Line->InPPDirective && Line->Level == 1))) {
586         ++Line->Level;
587       }
588       SwitchLabelEncountered = true;
589       parseStructuralElement();
590       break;
591     case tok::l_square:
592       if (Style.isCSharp()) {
593         nextToken();
594         parseCSharpAttribute();
595         break;
596       }
597       if (handleCppAttributes())
598         break;
599       LLVM_FALLTHROUGH;
600     default:
601       ParseDefault();
602       break;
603     }
604   } while (!eof());
605 
606   return false;
607 }
608 
609 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
610   // We'll parse forward through the tokens until we hit
611   // a closing brace or eof - note that getNextToken() will
612   // parse macros, so this will magically work inside macro
613   // definitions, too.
614   unsigned StoredPosition = Tokens->getPosition();
615   FormatToken *Tok = FormatTok;
616   const FormatToken *PrevTok = Tok->Previous;
617   // Keep a stack of positions of lbrace tokens. We will
618   // update information about whether an lbrace starts a
619   // braced init list or a different block during the loop.
620   SmallVector<FormatToken *, 8> LBraceStack;
621   assert(Tok->is(tok::l_brace));
622   do {
623     // Get next non-comment token.
624     FormatToken *NextTok;
625     do {
626       NextTok = Tokens->getNextToken();
627     } while (NextTok->is(tok::comment));
628 
629     switch (Tok->Tok.getKind()) {
630     case tok::l_brace:
631       if (Style.isJavaScript() && PrevTok) {
632         if (PrevTok->isOneOf(tok::colon, tok::less)) {
633           // A ':' indicates this code is in a type, or a braced list
634           // following a label in an object literal ({a: {b: 1}}).
635           // A '<' could be an object used in a comparison, but that is nonsense
636           // code (can never return true), so more likely it is a generic type
637           // argument (`X<{a: string; b: number}>`).
638           // The code below could be confused by semicolons between the
639           // individual members in a type member list, which would normally
640           // trigger BK_Block. In both cases, this must be parsed as an inline
641           // braced init.
642           Tok->setBlockKind(BK_BracedInit);
643         } else if (PrevTok->is(tok::r_paren)) {
644           // `) { }` can only occur in function or method declarations in JS.
645           Tok->setBlockKind(BK_Block);
646         }
647       } else {
648         Tok->setBlockKind(BK_Unknown);
649       }
650       LBraceStack.push_back(Tok);
651       break;
652     case tok::r_brace:
653       if (LBraceStack.empty())
654         break;
655       if (LBraceStack.back()->is(BK_Unknown)) {
656         bool ProbablyBracedList = false;
657         if (Style.Language == FormatStyle::LK_Proto) {
658           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
659         } else {
660           // Skip NextTok over preprocessor lines, otherwise we may not
661           // properly diagnose the block as a braced intializer
662           // if the comma separator appears after the pp directive.
663           while (NextTok->is(tok::hash)) {
664             ScopedMacroState MacroState(*Line, Tokens, NextTok);
665             do {
666               NextTok = Tokens->getNextToken();
667             } while (NextTok->isNot(tok::eof));
668           }
669 
670           // Using OriginalColumn to distinguish between ObjC methods and
671           // binary operators is a bit hacky.
672           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
673                                   NextTok->OriginalColumn == 0;
674 
675           // Try to detect a braced list. Note that regardless how we mark inner
676           // braces here, we will overwrite the BlockKind later if we parse a
677           // braced list (where all blocks inside are by default braced lists),
678           // or when we explicitly detect blocks (for example while parsing
679           // lambdas).
680 
681           // If we already marked the opening brace as braced list, the closing
682           // must also be part of it.
683           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
684 
685           ProbablyBracedList = ProbablyBracedList ||
686                                (Style.isJavaScript() &&
687                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
688                                                  Keywords.kw_as));
689           ProbablyBracedList = ProbablyBracedList ||
690                                (Style.isCpp() && NextTok->is(tok::l_paren));
691 
692           // If there is a comma, semicolon or right paren after the closing
693           // brace, we assume this is a braced initializer list.
694           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
695           // braced list in JS.
696           ProbablyBracedList =
697               ProbablyBracedList ||
698               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
699                                tok::r_paren, tok::r_square, tok::l_brace,
700                                tok::ellipsis);
701 
702           ProbablyBracedList =
703               ProbablyBracedList ||
704               (NextTok->is(tok::identifier) &&
705                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
706 
707           ProbablyBracedList = ProbablyBracedList ||
708                                (NextTok->is(tok::semi) &&
709                                 (!ExpectClassBody || LBraceStack.size() != 1));
710 
711           ProbablyBracedList =
712               ProbablyBracedList ||
713               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
714 
715           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
716             // We can have an array subscript after a braced init
717             // list, but C++11 attributes are expected after blocks.
718             NextTok = Tokens->getNextToken();
719             ProbablyBracedList = NextTok->isNot(tok::l_square);
720           }
721         }
722         if (ProbablyBracedList) {
723           Tok->setBlockKind(BK_BracedInit);
724           LBraceStack.back()->setBlockKind(BK_BracedInit);
725         } else {
726           Tok->setBlockKind(BK_Block);
727           LBraceStack.back()->setBlockKind(BK_Block);
728         }
729       }
730       LBraceStack.pop_back();
731       break;
732     case tok::identifier:
733       if (!Tok->is(TT_StatementMacro))
734         break;
735       LLVM_FALLTHROUGH;
736     case tok::at:
737     case tok::semi:
738     case tok::kw_if:
739     case tok::kw_while:
740     case tok::kw_for:
741     case tok::kw_switch:
742     case tok::kw_try:
743     case tok::kw___try:
744       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
745         LBraceStack.back()->setBlockKind(BK_Block);
746       break;
747     default:
748       break;
749     }
750     PrevTok = Tok;
751     Tok = NextTok;
752   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
753 
754   // Assume other blocks for all unclosed opening braces.
755   for (FormatToken *LBrace : LBraceStack)
756     if (LBrace->is(BK_Unknown))
757       LBrace->setBlockKind(BK_Block);
758 
759   FormatTok = Tokens->setPosition(StoredPosition);
760 }
761 
762 template <class T>
763 static inline void hash_combine(std::size_t &seed, const T &v) {
764   std::hash<T> hasher;
765   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
766 }
767 
768 size_t UnwrappedLineParser::computePPHash() const {
769   size_t h = 0;
770   for (const auto &i : PPStack) {
771     hash_combine(h, size_t(i.Kind));
772     hash_combine(h, i.Line);
773   }
774   return h;
775 }
776 
777 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
778 // is not null, subtracts its length (plus the preceding space) when computing
779 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
780 // running the token annotator on it so that we can restore them afterward.
781 bool UnwrappedLineParser::mightFitOnOneLine(
782     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
783   const auto ColumnLimit = Style.ColumnLimit;
784   if (ColumnLimit == 0)
785     return true;
786 
787   auto &Tokens = ParsedLine.Tokens;
788   assert(!Tokens.empty());
789 
790   const auto *LastToken = Tokens.back().Tok;
791   assert(LastToken);
792 
793   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
794 
795   int Index = 0;
796   for (const auto &Token : Tokens) {
797     assert(Token.Tok);
798     auto &SavedToken = SavedTokens[Index++];
799     SavedToken.Tok = new FormatToken;
800     SavedToken.Tok->copyFrom(*Token.Tok);
801     SavedToken.Children = std::move(Token.Children);
802   }
803 
804   AnnotatedLine Line(ParsedLine);
805   assert(Line.Last == LastToken);
806 
807   TokenAnnotator Annotator(Style, Keywords);
808   Annotator.annotate(Line);
809   Annotator.calculateFormattingInformation(Line);
810 
811   auto Length = LastToken->TotalLength;
812   if (OpeningBrace) {
813     assert(OpeningBrace != Tokens.front().Tok);
814     Length -= OpeningBrace->TokenText.size() + 1;
815   }
816 
817   Index = 0;
818   for (auto &Token : Tokens) {
819     const auto &SavedToken = SavedTokens[Index++];
820     Token.Tok->copyFrom(*SavedToken.Tok);
821     Token.Children = std::move(SavedToken.Children);
822     delete SavedToken.Tok;
823   }
824 
825   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
826 }
827 
828 FormatToken *UnwrappedLineParser::parseBlock(
829     bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
830     IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
831     bool CanContainBracedList, TokenType NextLBracesType) {
832   auto HandleVerilogBlockLabel = [this]() {
833     // ":" name
834     if (Style.isVerilog() && FormatTok->is(tok::colon)) {
835       nextToken();
836       if (Keywords.isVerilogIdentifier(*FormatTok))
837         nextToken();
838     }
839   };
840 
841   assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
842           (Style.isVerilog() && Keywords.isVerilogBegin(*FormatTok))) &&
843          "'{' or macro block token expected");
844   FormatToken *Tok = FormatTok;
845   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
846   auto Index = CurrentLines->size();
847   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
848   FormatTok->setBlockKind(BK_Block);
849 
850   // For Whitesmiths mode, jump to the next level prior to skipping over the
851   // braces.
852   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
853     ++Line->Level;
854 
855   size_t PPStartHash = computePPHash();
856 
857   const unsigned InitialLevel = Line->Level;
858   nextToken(/*LevelDifference=*/AddLevels);
859   HandleVerilogBlockLabel();
860 
861   // Bail out if there are too many levels. Otherwise, the stack might overflow.
862   if (Line->Level > 300)
863     return nullptr;
864 
865   if (MacroBlock && FormatTok->is(tok::l_paren))
866     parseParens();
867 
868   size_t NbPreprocessorDirectives =
869       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
870   addUnwrappedLine();
871   size_t OpeningLineIndex =
872       CurrentLines->empty()
873           ? (UnwrappedLine::kInvalidIndex)
874           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
875 
876   // Whitesmiths is weird here. The brace needs to be indented for the namespace
877   // block, but the block itself may not be indented depending on the style
878   // settings. This allows the format to back up one level in those cases.
879   if (UnindentWhitesmithsBraces)
880     --Line->Level;
881 
882   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
883                                           MustBeDeclaration);
884   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
885     Line->Level += AddLevels;
886 
887   FormatToken *IfLBrace = nullptr;
888   const bool SimpleBlock =
889       parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
890 
891   if (eof())
892     return IfLBrace;
893 
894   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
895                  : !FormatTok->is(tok::r_brace)) {
896     Line->Level = InitialLevel;
897     FormatTok->setBlockKind(BK_Block);
898     return IfLBrace;
899   }
900 
901   auto RemoveBraces = [=]() mutable {
902     if (!SimpleBlock)
903       return false;
904     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
905     assert(FormatTok->is(tok::r_brace));
906     const bool WrappedOpeningBrace = !Tok->Previous;
907     if (WrappedOpeningBrace && FollowedByComment)
908       return false;
909     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
910     if (KeepBraces && !HasRequiredIfBraces)
911       return false;
912     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
913       const FormatToken *Previous = Tokens->getPreviousToken();
914       assert(Previous);
915       if (Previous->is(tok::r_brace) && !Previous->Optional)
916         return false;
917     }
918     assert(!CurrentLines->empty());
919     auto &LastLine = CurrentLines->back();
920     if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
921       return false;
922     if (Tok->is(TT_ElseLBrace))
923       return true;
924     if (WrappedOpeningBrace) {
925       assert(Index > 0);
926       --Index; // The line above the wrapped l_brace.
927       Tok = nullptr;
928     }
929     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
930   };
931   if (RemoveBraces()) {
932     Tok->MatchingParen = FormatTok;
933     FormatTok->MatchingParen = Tok;
934   }
935 
936   size_t PPEndHash = computePPHash();
937 
938   // Munch the closing brace.
939   nextToken(/*LevelDifference=*/-AddLevels);
940   HandleVerilogBlockLabel();
941 
942   if (MacroBlock && FormatTok->is(tok::l_paren))
943     parseParens();
944 
945   if (FormatTok->is(tok::kw_noexcept)) {
946     // A noexcept in a requires expression.
947     nextToken();
948   }
949 
950   if (FormatTok->is(tok::arrow)) {
951     // Following the } or noexcept we can find a trailing return type arrow
952     // as part of an implicit conversion constraint.
953     nextToken();
954     parseStructuralElement();
955   }
956 
957   if (MunchSemi && FormatTok->is(tok::semi))
958     nextToken();
959 
960   Line->Level = InitialLevel;
961 
962   if (PPStartHash == PPEndHash) {
963     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
964     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
965       // Update the opening line to add the forward reference as well
966       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
967           CurrentLines->size() - 1;
968     }
969   }
970 
971   return IfLBrace;
972 }
973 
974 static bool isGoogScope(const UnwrappedLine &Line) {
975   // FIXME: Closure-library specific stuff should not be hard-coded but be
976   // configurable.
977   if (Line.Tokens.size() < 4)
978     return false;
979   auto I = Line.Tokens.begin();
980   if (I->Tok->TokenText != "goog")
981     return false;
982   ++I;
983   if (I->Tok->isNot(tok::period))
984     return false;
985   ++I;
986   if (I->Tok->TokenText != "scope")
987     return false;
988   ++I;
989   return I->Tok->is(tok::l_paren);
990 }
991 
992 static bool isIIFE(const UnwrappedLine &Line,
993                    const AdditionalKeywords &Keywords) {
994   // Look for the start of an immediately invoked anonymous function.
995   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
996   // This is commonly done in JavaScript to create a new, anonymous scope.
997   // Example: (function() { ... })()
998   if (Line.Tokens.size() < 3)
999     return false;
1000   auto I = Line.Tokens.begin();
1001   if (I->Tok->isNot(tok::l_paren))
1002     return false;
1003   ++I;
1004   if (I->Tok->isNot(Keywords.kw_function))
1005     return false;
1006   ++I;
1007   return I->Tok->is(tok::l_paren);
1008 }
1009 
1010 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
1011                                    const FormatToken &InitialToken) {
1012   tok::TokenKind Kind = InitialToken.Tok.getKind();
1013   if (InitialToken.is(TT_NamespaceMacro))
1014     Kind = tok::kw_namespace;
1015 
1016   switch (Kind) {
1017   case tok::kw_namespace:
1018     return Style.BraceWrapping.AfterNamespace;
1019   case tok::kw_class:
1020     return Style.BraceWrapping.AfterClass;
1021   case tok::kw_union:
1022     return Style.BraceWrapping.AfterUnion;
1023   case tok::kw_struct:
1024     return Style.BraceWrapping.AfterStruct;
1025   case tok::kw_enum:
1026     return Style.BraceWrapping.AfterEnum;
1027   default:
1028     return false;
1029   }
1030 }
1031 
1032 void UnwrappedLineParser::parseChildBlock(
1033     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
1034   assert(FormatTok->is(tok::l_brace));
1035   FormatTok->setBlockKind(BK_Block);
1036   const FormatToken *OpeningBrace = FormatTok;
1037   nextToken();
1038   {
1039     bool SkipIndent = (Style.isJavaScript() &&
1040                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
1041     ScopedLineState LineState(*this);
1042     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
1043                                             /*MustBeDeclaration=*/false);
1044     Line->Level += SkipIndent ? 0 : 1;
1045     parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
1046     flushComments(isOnNewLine(*FormatTok));
1047     Line->Level -= SkipIndent ? 0 : 1;
1048   }
1049   nextToken();
1050 }
1051 
1052 void UnwrappedLineParser::parsePPDirective() {
1053   assert(FormatTok->is(tok::hash) && "'#' expected");
1054   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1055 
1056   nextToken();
1057 
1058   if (!FormatTok->Tok.getIdentifierInfo()) {
1059     parsePPUnknown();
1060     return;
1061   }
1062 
1063   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1064   case tok::pp_define:
1065     parsePPDefine();
1066     return;
1067   case tok::pp_if:
1068     parsePPIf(/*IfDef=*/false);
1069     break;
1070   case tok::pp_ifdef:
1071   case tok::pp_ifndef:
1072     parsePPIf(/*IfDef=*/true);
1073     break;
1074   case tok::pp_else:
1075     parsePPElse();
1076     break;
1077   case tok::pp_elifdef:
1078   case tok::pp_elifndef:
1079   case tok::pp_elif:
1080     parsePPElIf();
1081     break;
1082   case tok::pp_endif:
1083     parsePPEndIf();
1084     break;
1085   default:
1086     parsePPUnknown();
1087     break;
1088   }
1089 }
1090 
1091 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1092   size_t Line = CurrentLines->size();
1093   if (CurrentLines == &PreprocessorDirectives)
1094     Line += Lines.size();
1095 
1096   if (Unreachable ||
1097       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1098     PPStack.push_back({PP_Unreachable, Line});
1099   } else {
1100     PPStack.push_back({PP_Conditional, Line});
1101   }
1102 }
1103 
1104 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1105   ++PPBranchLevel;
1106   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1107   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1108     PPLevelBranchIndex.push_back(0);
1109     PPLevelBranchCount.push_back(0);
1110   }
1111   PPChainBranchIndex.push(0);
1112   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1113   conditionalCompilationCondition(Unreachable || Skip);
1114 }
1115 
1116 void UnwrappedLineParser::conditionalCompilationAlternative() {
1117   if (!PPStack.empty())
1118     PPStack.pop_back();
1119   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1120   if (!PPChainBranchIndex.empty())
1121     ++PPChainBranchIndex.top();
1122   conditionalCompilationCondition(
1123       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1124       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1125 }
1126 
1127 void UnwrappedLineParser::conditionalCompilationEnd() {
1128   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1129   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1130     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1131       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1132   }
1133   // Guard against #endif's without #if.
1134   if (PPBranchLevel > -1)
1135     --PPBranchLevel;
1136   if (!PPChainBranchIndex.empty())
1137     PPChainBranchIndex.pop();
1138   if (!PPStack.empty())
1139     PPStack.pop_back();
1140 }
1141 
1142 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1143   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1144   nextToken();
1145   bool Unreachable = false;
1146   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1147     Unreachable = true;
1148   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1149     Unreachable = true;
1150   conditionalCompilationStart(Unreachable);
1151   FormatToken *IfCondition = FormatTok;
1152   // If there's a #ifndef on the first line, and the only lines before it are
1153   // comments, it could be an include guard.
1154   bool MaybeIncludeGuard = IfNDef;
1155   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1156     for (auto &Line : Lines) {
1157       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1158         MaybeIncludeGuard = false;
1159         IncludeGuard = IG_Rejected;
1160         break;
1161       }
1162     }
1163   }
1164   --PPBranchLevel;
1165   parsePPUnknown();
1166   ++PPBranchLevel;
1167   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1168     IncludeGuard = IG_IfNdefed;
1169     IncludeGuardToken = IfCondition;
1170   }
1171 }
1172 
1173 void UnwrappedLineParser::parsePPElse() {
1174   // If a potential include guard has an #else, it's not an include guard.
1175   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1176     IncludeGuard = IG_Rejected;
1177   conditionalCompilationAlternative();
1178   if (PPBranchLevel > -1)
1179     --PPBranchLevel;
1180   parsePPUnknown();
1181   ++PPBranchLevel;
1182 }
1183 
1184 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1185 
1186 void UnwrappedLineParser::parsePPEndIf() {
1187   conditionalCompilationEnd();
1188   parsePPUnknown();
1189   // If the #endif of a potential include guard is the last thing in the file,
1190   // then we found an include guard.
1191   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1192       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1193     IncludeGuard = IG_Found;
1194   }
1195 }
1196 
1197 void UnwrappedLineParser::parsePPDefine() {
1198   nextToken();
1199 
1200   if (!FormatTok->Tok.getIdentifierInfo()) {
1201     IncludeGuard = IG_Rejected;
1202     IncludeGuardToken = nullptr;
1203     parsePPUnknown();
1204     return;
1205   }
1206 
1207   if (IncludeGuard == IG_IfNdefed &&
1208       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1209     IncludeGuard = IG_Defined;
1210     IncludeGuardToken = nullptr;
1211     for (auto &Line : Lines) {
1212       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1213         IncludeGuard = IG_Rejected;
1214         break;
1215       }
1216     }
1217   }
1218 
1219   // In the context of a define, even keywords should be treated as normal
1220   // identifiers. Setting the kind to identifier is not enough, because we need
1221   // to treat additional keywords like __except as well, which are already
1222   // identifiers. Setting the identifier info to null interferes with include
1223   // guard processing above, and changes preprocessing nesting.
1224   FormatTok->Tok.setKind(tok::identifier);
1225   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1226   nextToken();
1227   if (FormatTok->Tok.getKind() == tok::l_paren &&
1228       !FormatTok->hasWhitespaceBefore()) {
1229     parseParens();
1230   }
1231   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1232     Line->Level += PPBranchLevel + 1;
1233   addUnwrappedLine();
1234   ++Line->Level;
1235 
1236   // Errors during a preprocessor directive can only affect the layout of the
1237   // preprocessor directive, and thus we ignore them. An alternative approach
1238   // would be to use the same approach we use on the file level (no
1239   // re-indentation if there was a structural error) within the macro
1240   // definition.
1241   parseFile();
1242 }
1243 
1244 void UnwrappedLineParser::parsePPUnknown() {
1245   do {
1246     nextToken();
1247   } while (!eof());
1248   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1249     Line->Level += PPBranchLevel + 1;
1250   addUnwrappedLine();
1251 }
1252 
1253 // Here we exclude certain tokens that are not usually the first token in an
1254 // unwrapped line. This is used in attempt to distinguish macro calls without
1255 // trailing semicolons from other constructs split to several lines.
1256 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1257   // Semicolon can be a null-statement, l_square can be a start of a macro or
1258   // a C++11 attribute, but this doesn't seem to be common.
1259   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1260          Tok.isNot(TT_AttributeSquare) &&
1261          // Tokens that can only be used as binary operators and a part of
1262          // overloaded operator names.
1263          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1264          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1265          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1266          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1267          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1268          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1269          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1270          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1271          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1272          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1273          Tok.isNot(tok::lesslessequal) &&
1274          // Colon is used in labels, base class lists, initializer lists,
1275          // range-based for loops, ternary operator, but should never be the
1276          // first token in an unwrapped line.
1277          Tok.isNot(tok::colon) &&
1278          // 'noexcept' is a trailing annotation.
1279          Tok.isNot(tok::kw_noexcept);
1280 }
1281 
1282 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1283                           const FormatToken *FormatTok) {
1284   // FIXME: This returns true for C/C++ keywords like 'struct'.
1285   return FormatTok->is(tok::identifier) &&
1286          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1287           !FormatTok->isOneOf(
1288               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1289               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1290               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1291               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1292               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1293               Keywords.kw_instanceof, Keywords.kw_interface,
1294               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1295 }
1296 
1297 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1298                                  const FormatToken *FormatTok) {
1299   return FormatTok->Tok.isLiteral() ||
1300          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1301          mustBeJSIdent(Keywords, FormatTok);
1302 }
1303 
1304 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1305 // when encountered after a value (see mustBeJSIdentOrValue).
1306 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1307                            const FormatToken *FormatTok) {
1308   return FormatTok->isOneOf(
1309       tok::kw_return, Keywords.kw_yield,
1310       // conditionals
1311       tok::kw_if, tok::kw_else,
1312       // loops
1313       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1314       // switch/case
1315       tok::kw_switch, tok::kw_case,
1316       // exceptions
1317       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1318       // declaration
1319       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1320       Keywords.kw_async, Keywords.kw_function,
1321       // import/export
1322       Keywords.kw_import, tok::kw_export);
1323 }
1324 
1325 // Checks whether a token is a type in K&R C (aka C78).
1326 static bool isC78Type(const FormatToken &Tok) {
1327   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1328                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1329                      tok::identifier);
1330 }
1331 
1332 // This function checks whether a token starts the first parameter declaration
1333 // in a K&R C (aka C78) function definition, e.g.:
1334 //   int f(a, b)
1335 //   short a, b;
1336 //   {
1337 //      return a + b;
1338 //   }
1339 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1340                                const FormatToken *FuncName) {
1341   assert(Tok);
1342   assert(Next);
1343   assert(FuncName);
1344 
1345   if (FuncName->isNot(tok::identifier))
1346     return false;
1347 
1348   const FormatToken *Prev = FuncName->Previous;
1349   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1350     return false;
1351 
1352   if (!isC78Type(*Tok) &&
1353       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1354     return false;
1355   }
1356 
1357   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1358     return false;
1359 
1360   Tok = Tok->Previous;
1361   if (!Tok || Tok->isNot(tok::r_paren))
1362     return false;
1363 
1364   Tok = Tok->Previous;
1365   if (!Tok || Tok->isNot(tok::identifier))
1366     return false;
1367 
1368   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1369 }
1370 
1371 void UnwrappedLineParser::parseModuleImport() {
1372   nextToken();
1373   while (!eof()) {
1374     if (FormatTok->is(tok::colon)) {
1375       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1376     }
1377     // Handle import <foo/bar.h> as we would an include statement.
1378     else if (FormatTok->is(tok::less)) {
1379       nextToken();
1380       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1381         // Mark tokens up to the trailing line comments as implicit string
1382         // literals.
1383         if (FormatTok->isNot(tok::comment) &&
1384             !FormatTok->TokenText.startswith("//")) {
1385           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1386         }
1387         nextToken();
1388       }
1389     }
1390     if (FormatTok->is(tok::semi)) {
1391       nextToken();
1392       break;
1393     }
1394     nextToken();
1395   }
1396 
1397   addUnwrappedLine();
1398 }
1399 
1400 // readTokenWithJavaScriptASI reads the next token and terminates the current
1401 // line if JavaScript Automatic Semicolon Insertion must
1402 // happen between the current token and the next token.
1403 //
1404 // This method is conservative - it cannot cover all edge cases of JavaScript,
1405 // but only aims to correctly handle certain well known cases. It *must not*
1406 // return true in speculative cases.
1407 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1408   FormatToken *Previous = FormatTok;
1409   readToken();
1410   FormatToken *Next = FormatTok;
1411 
1412   bool IsOnSameLine =
1413       CommentsBeforeNextToken.empty()
1414           ? Next->NewlinesBefore == 0
1415           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1416   if (IsOnSameLine)
1417     return;
1418 
1419   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1420   bool PreviousStartsTemplateExpr =
1421       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1422   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1423     // If the line contains an '@' sign, the previous token might be an
1424     // annotation, which can precede another identifier/value.
1425     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1426       return LineNode.Tok->is(tok::at);
1427     });
1428     if (HasAt)
1429       return;
1430   }
1431   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1432     return addUnwrappedLine();
1433   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1434   bool NextEndsTemplateExpr =
1435       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1436   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1437       (PreviousMustBeValue ||
1438        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1439                          tok::minusminus))) {
1440     return addUnwrappedLine();
1441   }
1442   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1443       isJSDeclOrStmt(Keywords, Next)) {
1444     return addUnwrappedLine();
1445   }
1446 }
1447 
1448 void UnwrappedLineParser::parseStructuralElement(
1449     bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
1450     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1451   if (Style.Language == FormatStyle::LK_TableGen &&
1452       FormatTok->is(tok::pp_include)) {
1453     nextToken();
1454     if (FormatTok->is(tok::string_literal))
1455       nextToken();
1456     addUnwrappedLine();
1457     return;
1458   }
1459   switch (FormatTok->Tok.getKind()) {
1460   case tok::kw_asm:
1461     nextToken();
1462     if (FormatTok->is(tok::l_brace)) {
1463       FormatTok->setFinalizedType(TT_InlineASMBrace);
1464       nextToken();
1465       while (FormatTok && FormatTok->isNot(tok::eof)) {
1466         if (FormatTok->is(tok::r_brace)) {
1467           FormatTok->setFinalizedType(TT_InlineASMBrace);
1468           nextToken();
1469           addUnwrappedLine();
1470           break;
1471         }
1472         FormatTok->Finalized = true;
1473         nextToken();
1474       }
1475     }
1476     break;
1477   case tok::kw_namespace:
1478     parseNamespace();
1479     return;
1480   case tok::kw_public:
1481   case tok::kw_protected:
1482   case tok::kw_private:
1483     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1484         Style.isCSharp()) {
1485       nextToken();
1486     } else {
1487       parseAccessSpecifier();
1488     }
1489     return;
1490   case tok::kw_if: {
1491     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1492       // field/method declaration.
1493       break;
1494     }
1495     FormatToken *Tok = parseIfThenElse(IfKind);
1496     if (IfLeftBrace)
1497       *IfLeftBrace = Tok;
1498     return;
1499   }
1500   case tok::kw_for:
1501   case tok::kw_while:
1502     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1503       // field/method declaration.
1504       break;
1505     }
1506     parseForOrWhileLoop();
1507     return;
1508   case tok::kw_do:
1509     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1510       // field/method declaration.
1511       break;
1512     }
1513     parseDoWhile();
1514     if (HasDoWhile)
1515       *HasDoWhile = true;
1516     return;
1517   case tok::kw_switch:
1518     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1519       // 'switch: string' field declaration.
1520       break;
1521     }
1522     parseSwitch();
1523     return;
1524   case tok::kw_default:
1525     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1526       // 'default: string' field declaration.
1527       break;
1528     }
1529     nextToken();
1530     if (FormatTok->is(tok::colon)) {
1531       parseLabel();
1532       return;
1533     }
1534     // e.g. "default void f() {}" in a Java interface.
1535     break;
1536   case tok::kw_case:
1537     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1538       // 'case: string' field declaration.
1539       nextToken();
1540       break;
1541     }
1542     parseCaseLabel();
1543     return;
1544   case tok::kw_try:
1545   case tok::kw___try:
1546     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1547       // field/method declaration.
1548       break;
1549     }
1550     parseTryCatch();
1551     return;
1552   case tok::kw_extern:
1553     nextToken();
1554     if (FormatTok->is(tok::string_literal)) {
1555       nextToken();
1556       if (FormatTok->is(tok::l_brace)) {
1557         if (Style.BraceWrapping.AfterExternBlock)
1558           addUnwrappedLine();
1559         // Either we indent or for backwards compatibility we follow the
1560         // AfterExternBlock style.
1561         unsigned AddLevels =
1562             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1563                     (Style.BraceWrapping.AfterExternBlock &&
1564                      Style.IndentExternBlock ==
1565                          FormatStyle::IEBS_AfterExternBlock)
1566                 ? 1u
1567                 : 0u;
1568         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1569         addUnwrappedLine();
1570         return;
1571       }
1572     }
1573     break;
1574   case tok::kw_export:
1575     if (Style.isJavaScript()) {
1576       parseJavaScriptEs6ImportExport();
1577       return;
1578     }
1579     if (!Style.isCpp())
1580       break;
1581     // Handle C++ "(inline|export) namespace".
1582     LLVM_FALLTHROUGH;
1583   case tok::kw_inline:
1584     nextToken();
1585     if (FormatTok->is(tok::kw_namespace)) {
1586       parseNamespace();
1587       return;
1588     }
1589     break;
1590   case tok::identifier:
1591     if (FormatTok->is(TT_ForEachMacro)) {
1592       parseForOrWhileLoop();
1593       return;
1594     }
1595     if (FormatTok->is(TT_MacroBlockBegin)) {
1596       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1597                  /*MunchSemi=*/false);
1598       return;
1599     }
1600     if (FormatTok->is(Keywords.kw_import)) {
1601       if (Style.isJavaScript()) {
1602         parseJavaScriptEs6ImportExport();
1603         return;
1604       }
1605       if (Style.Language == FormatStyle::LK_Proto) {
1606         nextToken();
1607         if (FormatTok->is(tok::kw_public))
1608           nextToken();
1609         if (!FormatTok->is(tok::string_literal))
1610           return;
1611         nextToken();
1612         if (FormatTok->is(tok::semi))
1613           nextToken();
1614         addUnwrappedLine();
1615         return;
1616       }
1617       if (Style.isCpp()) {
1618         parseModuleImport();
1619         return;
1620       }
1621     }
1622     if (Style.isCpp() &&
1623         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1624                            Keywords.kw_slots, Keywords.kw_qslots)) {
1625       nextToken();
1626       if (FormatTok->is(tok::colon)) {
1627         nextToken();
1628         addUnwrappedLine();
1629         return;
1630       }
1631     }
1632     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1633       parseStatementMacro();
1634       return;
1635     }
1636     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1637       parseNamespace();
1638       return;
1639     }
1640     // In all other cases, parse the declaration.
1641     break;
1642   default:
1643     break;
1644   }
1645   do {
1646     const FormatToken *Previous = FormatTok->Previous;
1647     switch (FormatTok->Tok.getKind()) {
1648     case tok::at:
1649       nextToken();
1650       if (FormatTok->is(tok::l_brace)) {
1651         nextToken();
1652         parseBracedList();
1653         break;
1654       } else if (Style.Language == FormatStyle::LK_Java &&
1655                  FormatTok->is(Keywords.kw_interface)) {
1656         nextToken();
1657         break;
1658       }
1659       switch (FormatTok->Tok.getObjCKeywordID()) {
1660       case tok::objc_public:
1661       case tok::objc_protected:
1662       case tok::objc_package:
1663       case tok::objc_private:
1664         return parseAccessSpecifier();
1665       case tok::objc_interface:
1666       case tok::objc_implementation:
1667         return parseObjCInterfaceOrImplementation();
1668       case tok::objc_protocol:
1669         if (parseObjCProtocol())
1670           return;
1671         break;
1672       case tok::objc_end:
1673         return; // Handled by the caller.
1674       case tok::objc_optional:
1675       case tok::objc_required:
1676         nextToken();
1677         addUnwrappedLine();
1678         return;
1679       case tok::objc_autoreleasepool:
1680         nextToken();
1681         if (FormatTok->is(tok::l_brace)) {
1682           if (Style.BraceWrapping.AfterControlStatement ==
1683               FormatStyle::BWACS_Always) {
1684             addUnwrappedLine();
1685           }
1686           parseBlock();
1687         }
1688         addUnwrappedLine();
1689         return;
1690       case tok::objc_synchronized:
1691         nextToken();
1692         if (FormatTok->is(tok::l_paren)) {
1693           // Skip synchronization object
1694           parseParens();
1695         }
1696         if (FormatTok->is(tok::l_brace)) {
1697           if (Style.BraceWrapping.AfterControlStatement ==
1698               FormatStyle::BWACS_Always) {
1699             addUnwrappedLine();
1700           }
1701           parseBlock();
1702         }
1703         addUnwrappedLine();
1704         return;
1705       case tok::objc_try:
1706         // This branch isn't strictly necessary (the kw_try case below would
1707         // do this too after the tok::at is parsed above).  But be explicit.
1708         parseTryCatch();
1709         return;
1710       default:
1711         break;
1712       }
1713       break;
1714     case tok::kw_concept:
1715       parseConcept();
1716       return;
1717     case tok::kw_requires: {
1718       if (Style.isCpp()) {
1719         bool ParsedClause = parseRequires();
1720         if (ParsedClause)
1721           return;
1722       } else {
1723         nextToken();
1724       }
1725       break;
1726     }
1727     case tok::kw_enum:
1728       // Ignore if this is part of "template <enum ...".
1729       if (Previous && Previous->is(tok::less)) {
1730         nextToken();
1731         break;
1732       }
1733 
1734       // parseEnum falls through and does not yet add an unwrapped line as an
1735       // enum definition can start a structural element.
1736       if (!parseEnum())
1737         break;
1738       // This only applies for C++.
1739       if (!Style.isCpp()) {
1740         addUnwrappedLine();
1741         return;
1742       }
1743       break;
1744     case tok::kw_typedef:
1745       nextToken();
1746       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1747                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1748                              Keywords.kw_CF_CLOSED_ENUM,
1749                              Keywords.kw_NS_CLOSED_ENUM)) {
1750         parseEnum();
1751       }
1752       break;
1753     case tok::kw_struct:
1754     case tok::kw_union:
1755     case tok::kw_class:
1756       if (parseStructLike())
1757         return;
1758       break;
1759     case tok::period:
1760       nextToken();
1761       // In Java, classes have an implicit static member "class".
1762       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1763           FormatTok->is(tok::kw_class)) {
1764         nextToken();
1765       }
1766       if (Style.isJavaScript() && FormatTok &&
1767           FormatTok->Tok.getIdentifierInfo()) {
1768         // JavaScript only has pseudo keywords, all keywords are allowed to
1769         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1770         nextToken();
1771       }
1772       break;
1773     case tok::semi:
1774       nextToken();
1775       addUnwrappedLine();
1776       return;
1777     case tok::r_brace:
1778       addUnwrappedLine();
1779       return;
1780     case tok::l_paren: {
1781       parseParens();
1782       // Break the unwrapped line if a K&R C function definition has a parameter
1783       // declaration.
1784       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1785         break;
1786       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1787         addUnwrappedLine();
1788         return;
1789       }
1790       break;
1791     }
1792     case tok::kw_operator:
1793       nextToken();
1794       if (FormatTok->isBinaryOperator())
1795         nextToken();
1796       break;
1797     case tok::caret:
1798       nextToken();
1799       if (FormatTok->Tok.isAnyIdentifier() ||
1800           FormatTok->isSimpleTypeSpecifier()) {
1801         nextToken();
1802       }
1803       if (FormatTok->is(tok::l_paren))
1804         parseParens();
1805       if (FormatTok->is(tok::l_brace))
1806         parseChildBlock();
1807       break;
1808     case tok::l_brace:
1809       if (NextLBracesType != TT_Unknown)
1810         FormatTok->setFinalizedType(NextLBracesType);
1811       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1812         // A block outside of parentheses must be the last part of a
1813         // structural element.
1814         // FIXME: Figure out cases where this is not true, and add projections
1815         // for them (the one we know is missing are lambdas).
1816         if (Style.Language == FormatStyle::LK_Java &&
1817             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1818           // If necessary, we could set the type to something different than
1819           // TT_FunctionLBrace.
1820           if (Style.BraceWrapping.AfterControlStatement ==
1821               FormatStyle::BWACS_Always) {
1822             addUnwrappedLine();
1823           }
1824         } else if (Style.BraceWrapping.AfterFunction) {
1825           addUnwrappedLine();
1826         }
1827         if (!Line->InPPDirective)
1828           FormatTok->setFinalizedType(TT_FunctionLBrace);
1829         parseBlock();
1830         addUnwrappedLine();
1831         return;
1832       }
1833       // Otherwise this was a braced init list, and the structural
1834       // element continues.
1835       break;
1836     case tok::kw_try:
1837       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1838         // field/method declaration.
1839         nextToken();
1840         break;
1841       }
1842       // We arrive here when parsing function-try blocks.
1843       if (Style.BraceWrapping.AfterFunction)
1844         addUnwrappedLine();
1845       parseTryCatch();
1846       return;
1847     case tok::identifier: {
1848       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1849           Line->MustBeDeclaration) {
1850         addUnwrappedLine();
1851         parseCSharpGenericTypeConstraint();
1852         break;
1853       }
1854       if (FormatTok->is(TT_MacroBlockEnd)) {
1855         addUnwrappedLine();
1856         return;
1857       }
1858 
1859       // Function declarations (as opposed to function expressions) are parsed
1860       // on their own unwrapped line by continuing this loop. Function
1861       // expressions (functions that are not on their own line) must not create
1862       // a new unwrapped line, so they are special cased below.
1863       size_t TokenCount = Line->Tokens.size();
1864       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1865           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1866                                                      Keywords.kw_async)))) {
1867         tryToParseJSFunction();
1868         break;
1869       }
1870       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1871           FormatTok->is(Keywords.kw_interface)) {
1872         if (Style.isJavaScript()) {
1873           // In JavaScript/TypeScript, "interface" can be used as a standalone
1874           // identifier, e.g. in `var interface = 1;`. If "interface" is
1875           // followed by another identifier, it is very like to be an actual
1876           // interface declaration.
1877           unsigned StoredPosition = Tokens->getPosition();
1878           FormatToken *Next = Tokens->getNextToken();
1879           FormatTok = Tokens->setPosition(StoredPosition);
1880           if (!mustBeJSIdent(Keywords, Next)) {
1881             nextToken();
1882             break;
1883           }
1884         }
1885         parseRecord();
1886         addUnwrappedLine();
1887         return;
1888       }
1889 
1890       if (FormatTok->is(Keywords.kw_interface)) {
1891         if (parseStructLike())
1892           return;
1893         break;
1894       }
1895 
1896       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1897         parseStatementMacro();
1898         return;
1899       }
1900 
1901       // See if the following token should start a new unwrapped line.
1902       StringRef Text = FormatTok->TokenText;
1903 
1904       FormatToken *PreviousToken = FormatTok;
1905       nextToken();
1906 
1907       // JS doesn't have macros, and within classes colons indicate fields, not
1908       // labels.
1909       if (Style.isJavaScript())
1910         break;
1911 
1912       TokenCount = Line->Tokens.size();
1913       if (TokenCount == 1 ||
1914           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1915         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1916           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1917           parseLabel(!Style.IndentGotoLabels);
1918           if (HasLabel)
1919             *HasLabel = true;
1920           return;
1921         }
1922         // Recognize function-like macro usages without trailing semicolon as
1923         // well as free-standing macros like Q_OBJECT.
1924         bool FunctionLike = FormatTok->is(tok::l_paren);
1925         if (FunctionLike)
1926           parseParens();
1927 
1928         bool FollowedByNewline =
1929             CommentsBeforeNextToken.empty()
1930                 ? FormatTok->NewlinesBefore > 0
1931                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1932 
1933         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1934             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1935           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1936           addUnwrappedLine();
1937           return;
1938         }
1939       }
1940       break;
1941     }
1942     case tok::equal:
1943       if ((Style.isJavaScript() || Style.isCSharp()) &&
1944           FormatTok->is(TT_FatArrow)) {
1945         tryToParseChildBlock();
1946         break;
1947       }
1948 
1949       nextToken();
1950       if (FormatTok->is(tok::l_brace)) {
1951         // Block kind should probably be set to BK_BracedInit for any language.
1952         // C# needs this change to ensure that array initialisers and object
1953         // initialisers are indented the same way.
1954         if (Style.isCSharp())
1955           FormatTok->setBlockKind(BK_BracedInit);
1956         nextToken();
1957         parseBracedList();
1958       } else if (Style.Language == FormatStyle::LK_Proto &&
1959                  FormatTok->is(tok::less)) {
1960         nextToken();
1961         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1962                         /*ClosingBraceKind=*/tok::greater);
1963       }
1964       break;
1965     case tok::l_square:
1966       parseSquare();
1967       break;
1968     case tok::kw_new:
1969       parseNew();
1970       break;
1971     case tok::kw_case:
1972       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1973         // 'case: string' field declaration.
1974         nextToken();
1975         break;
1976       }
1977       parseCaseLabel();
1978       break;
1979     default:
1980       nextToken();
1981       break;
1982     }
1983   } while (!eof());
1984 }
1985 
1986 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1987   assert(FormatTok->is(tok::l_brace));
1988   if (!Style.isCSharp())
1989     return false;
1990   // See if it's a property accessor.
1991   if (FormatTok->Previous->isNot(tok::identifier))
1992     return false;
1993 
1994   // See if we are inside a property accessor.
1995   //
1996   // Record the current tokenPosition so that we can advance and
1997   // reset the current token. `Next` is not set yet so we need
1998   // another way to advance along the token stream.
1999   unsigned int StoredPosition = Tokens->getPosition();
2000   FormatToken *Tok = Tokens->getNextToken();
2001 
2002   // A trivial property accessor is of the form:
2003   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2004   // Track these as they do not require line breaks to be introduced.
2005   bool HasSpecialAccessor = false;
2006   bool IsTrivialPropertyAccessor = true;
2007   while (!eof()) {
2008     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2009                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2010                      Keywords.kw_init, Keywords.kw_set)) {
2011       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2012         HasSpecialAccessor = true;
2013       Tok = Tokens->getNextToken();
2014       continue;
2015     }
2016     if (Tok->isNot(tok::r_brace))
2017       IsTrivialPropertyAccessor = false;
2018     break;
2019   }
2020 
2021   if (!HasSpecialAccessor) {
2022     Tokens->setPosition(StoredPosition);
2023     return false;
2024   }
2025 
2026   // Try to parse the property accessor:
2027   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2028   Tokens->setPosition(StoredPosition);
2029   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2030     addUnwrappedLine();
2031   nextToken();
2032   do {
2033     switch (FormatTok->Tok.getKind()) {
2034     case tok::r_brace:
2035       nextToken();
2036       if (FormatTok->is(tok::equal)) {
2037         while (!eof() && FormatTok->isNot(tok::semi))
2038           nextToken();
2039         nextToken();
2040       }
2041       addUnwrappedLine();
2042       return true;
2043     case tok::l_brace:
2044       ++Line->Level;
2045       parseBlock(/*MustBeDeclaration=*/true);
2046       addUnwrappedLine();
2047       --Line->Level;
2048       break;
2049     case tok::equal:
2050       if (FormatTok->is(TT_FatArrow)) {
2051         ++Line->Level;
2052         do {
2053           nextToken();
2054         } while (!eof() && FormatTok->isNot(tok::semi));
2055         nextToken();
2056         addUnwrappedLine();
2057         --Line->Level;
2058         break;
2059       }
2060       nextToken();
2061       break;
2062     default:
2063       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2064                              Keywords.kw_set) &&
2065           !IsTrivialPropertyAccessor) {
2066         // Non-trivial get/set needs to be on its own line.
2067         addUnwrappedLine();
2068       }
2069       nextToken();
2070     }
2071   } while (!eof());
2072 
2073   // Unreachable for well-formed code (paired '{' and '}').
2074   return true;
2075 }
2076 
2077 bool UnwrappedLineParser::tryToParseLambda() {
2078   assert(FormatTok->is(tok::l_square));
2079   if (!Style.isCpp()) {
2080     nextToken();
2081     return false;
2082   }
2083   FormatToken &LSquare = *FormatTok;
2084   if (!tryToParseLambdaIntroducer())
2085     return false;
2086 
2087   bool SeenArrow = false;
2088   bool InTemplateParameterList = false;
2089 
2090   while (FormatTok->isNot(tok::l_brace)) {
2091     if (FormatTok->isSimpleTypeSpecifier()) {
2092       nextToken();
2093       continue;
2094     }
2095     switch (FormatTok->Tok.getKind()) {
2096     case tok::l_brace:
2097       break;
2098     case tok::l_paren:
2099       parseParens();
2100       break;
2101     case tok::l_square:
2102       parseSquare();
2103       break;
2104     case tok::kw_class:
2105     case tok::kw_template:
2106     case tok::kw_typename:
2107       assert(FormatTok->Previous);
2108       if (FormatTok->Previous->is(tok::less))
2109         InTemplateParameterList = true;
2110       nextToken();
2111       break;
2112     case tok::amp:
2113     case tok::star:
2114     case tok::kw_const:
2115     case tok::comma:
2116     case tok::less:
2117     case tok::greater:
2118     case tok::identifier:
2119     case tok::numeric_constant:
2120     case tok::coloncolon:
2121     case tok::kw_mutable:
2122     case tok::kw_noexcept:
2123       nextToken();
2124       break;
2125     // Specialization of a template with an integer parameter can contain
2126     // arithmetic, logical, comparison and ternary operators.
2127     //
2128     // FIXME: This also accepts sequences of operators that are not in the scope
2129     // of a template argument list.
2130     //
2131     // In a C++ lambda a template type can only occur after an arrow. We use
2132     // this as an heuristic to distinguish between Objective-C expressions
2133     // followed by an `a->b` expression, such as:
2134     // ([obj func:arg] + a->b)
2135     // Otherwise the code below would parse as a lambda.
2136     //
2137     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2138     // explicit template lists: []<bool b = true && false>(U &&u){}
2139     case tok::plus:
2140     case tok::minus:
2141     case tok::exclaim:
2142     case tok::tilde:
2143     case tok::slash:
2144     case tok::percent:
2145     case tok::lessless:
2146     case tok::pipe:
2147     case tok::pipepipe:
2148     case tok::ampamp:
2149     case tok::caret:
2150     case tok::equalequal:
2151     case tok::exclaimequal:
2152     case tok::greaterequal:
2153     case tok::lessequal:
2154     case tok::question:
2155     case tok::colon:
2156     case tok::ellipsis:
2157     case tok::kw_true:
2158     case tok::kw_false:
2159       if (SeenArrow || InTemplateParameterList) {
2160         nextToken();
2161         break;
2162       }
2163       return true;
2164     case tok::arrow:
2165       // This might or might not actually be a lambda arrow (this could be an
2166       // ObjC method invocation followed by a dereferencing arrow). We might
2167       // reset this back to TT_Unknown in TokenAnnotator.
2168       FormatTok->setFinalizedType(TT_LambdaArrow);
2169       SeenArrow = true;
2170       nextToken();
2171       break;
2172     default:
2173       return true;
2174     }
2175   }
2176   FormatTok->setFinalizedType(TT_LambdaLBrace);
2177   LSquare.setFinalizedType(TT_LambdaLSquare);
2178   parseChildBlock();
2179   return true;
2180 }
2181 
2182 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2183   const FormatToken *Previous = FormatTok->Previous;
2184   const FormatToken *LeftSquare = FormatTok;
2185   nextToken();
2186   if (Previous &&
2187       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2188                          tok::kw_delete, tok::l_square) ||
2189        LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2190        Previous->isSimpleTypeSpecifier())) {
2191     return false;
2192   }
2193   if (FormatTok->is(tok::l_square))
2194     return false;
2195   if (FormatTok->is(tok::r_square)) {
2196     const FormatToken *Next = Tokens->peekNextToken();
2197     if (Next->is(tok::greater))
2198       return false;
2199   }
2200   parseSquare(/*LambdaIntroducer=*/true);
2201   return true;
2202 }
2203 
2204 void UnwrappedLineParser::tryToParseJSFunction() {
2205   assert(FormatTok->is(Keywords.kw_function) ||
2206          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2207   if (FormatTok->is(Keywords.kw_async))
2208     nextToken();
2209   // Consume "function".
2210   nextToken();
2211 
2212   // Consume * (generator function). Treat it like C++'s overloaded operators.
2213   if (FormatTok->is(tok::star)) {
2214     FormatTok->setFinalizedType(TT_OverloadedOperator);
2215     nextToken();
2216   }
2217 
2218   // Consume function name.
2219   if (FormatTok->is(tok::identifier))
2220     nextToken();
2221 
2222   if (FormatTok->isNot(tok::l_paren))
2223     return;
2224 
2225   // Parse formal parameter list.
2226   parseParens();
2227 
2228   if (FormatTok->is(tok::colon)) {
2229     // Parse a type definition.
2230     nextToken();
2231 
2232     // Eat the type declaration. For braced inline object types, balance braces,
2233     // otherwise just parse until finding an l_brace for the function body.
2234     if (FormatTok->is(tok::l_brace))
2235       tryToParseBracedList();
2236     else
2237       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2238         nextToken();
2239   }
2240 
2241   if (FormatTok->is(tok::semi))
2242     return;
2243 
2244   parseChildBlock();
2245 }
2246 
2247 bool UnwrappedLineParser::tryToParseBracedList() {
2248   if (FormatTok->is(BK_Unknown))
2249     calculateBraceTypes();
2250   assert(FormatTok->isNot(BK_Unknown));
2251   if (FormatTok->is(BK_Block))
2252     return false;
2253   nextToken();
2254   parseBracedList();
2255   return true;
2256 }
2257 
2258 bool UnwrappedLineParser::tryToParseChildBlock() {
2259   assert(Style.isJavaScript() || Style.isCSharp());
2260   assert(FormatTok->is(TT_FatArrow));
2261   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2262   // They always start an expression or a child block if followed by a curly
2263   // brace.
2264   nextToken();
2265   if (FormatTok->isNot(tok::l_brace))
2266     return false;
2267   parseChildBlock();
2268   return true;
2269 }
2270 
2271 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2272                                           bool IsEnum,
2273                                           tok::TokenKind ClosingBraceKind) {
2274   bool HasError = false;
2275 
2276   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2277   // replace this by using parseAssignmentExpression() inside.
2278   do {
2279     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2280         tryToParseChildBlock()) {
2281       continue;
2282     }
2283     if (Style.isJavaScript()) {
2284       if (FormatTok->is(Keywords.kw_function) ||
2285           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2286         tryToParseJSFunction();
2287         continue;
2288       }
2289       if (FormatTok->is(tok::l_brace)) {
2290         // Could be a method inside of a braced list `{a() { return 1; }}`.
2291         if (tryToParseBracedList())
2292           continue;
2293         parseChildBlock();
2294       }
2295     }
2296     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2297       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2298         addUnwrappedLine();
2299       nextToken();
2300       return !HasError;
2301     }
2302     switch (FormatTok->Tok.getKind()) {
2303     case tok::l_square:
2304       if (Style.isCSharp())
2305         parseSquare();
2306       else
2307         tryToParseLambda();
2308       break;
2309     case tok::l_paren:
2310       parseParens();
2311       // JavaScript can just have free standing methods and getters/setters in
2312       // object literals. Detect them by a "{" following ")".
2313       if (Style.isJavaScript()) {
2314         if (FormatTok->is(tok::l_brace))
2315           parseChildBlock();
2316         break;
2317       }
2318       break;
2319     case tok::l_brace:
2320       // Assume there are no blocks inside a braced init list apart
2321       // from the ones we explicitly parse out (like lambdas).
2322       FormatTok->setBlockKind(BK_BracedInit);
2323       nextToken();
2324       parseBracedList();
2325       break;
2326     case tok::less:
2327       if (Style.Language == FormatStyle::LK_Proto ||
2328           ClosingBraceKind == tok::greater) {
2329         nextToken();
2330         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2331                         /*ClosingBraceKind=*/tok::greater);
2332       } else {
2333         nextToken();
2334       }
2335       break;
2336     case tok::semi:
2337       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2338       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2339       // used for error recovery if we have otherwise determined that this is
2340       // a braced list.
2341       if (Style.isJavaScript()) {
2342         nextToken();
2343         break;
2344       }
2345       HasError = true;
2346       if (!ContinueOnSemicolons)
2347         return !HasError;
2348       nextToken();
2349       break;
2350     case tok::comma:
2351       nextToken();
2352       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2353         addUnwrappedLine();
2354       break;
2355     default:
2356       nextToken();
2357       break;
2358     }
2359   } while (!eof());
2360   return false;
2361 }
2362 
2363 /// \brief Parses a pair of parentheses (and everything between them).
2364 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2365 /// double ampersands. This only counts for the current parens scope.
2366 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2367   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2368   nextToken();
2369   do {
2370     switch (FormatTok->Tok.getKind()) {
2371     case tok::l_paren:
2372       parseParens();
2373       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2374         parseChildBlock();
2375       break;
2376     case tok::r_paren:
2377       nextToken();
2378       return;
2379     case tok::r_brace:
2380       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2381       return;
2382     case tok::l_square:
2383       tryToParseLambda();
2384       break;
2385     case tok::l_brace:
2386       if (!tryToParseBracedList())
2387         parseChildBlock();
2388       break;
2389     case tok::at:
2390       nextToken();
2391       if (FormatTok->is(tok::l_brace)) {
2392         nextToken();
2393         parseBracedList();
2394       }
2395       break;
2396     case tok::equal:
2397       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2398         tryToParseChildBlock();
2399       else
2400         nextToken();
2401       break;
2402     case tok::kw_class:
2403       if (Style.isJavaScript())
2404         parseRecord(/*ParseAsExpr=*/true);
2405       else
2406         nextToken();
2407       break;
2408     case tok::identifier:
2409       if (Style.isJavaScript() &&
2410           (FormatTok->is(Keywords.kw_function) ||
2411            FormatTok->startsSequence(Keywords.kw_async,
2412                                      Keywords.kw_function))) {
2413         tryToParseJSFunction();
2414       } else {
2415         nextToken();
2416       }
2417       break;
2418     case tok::kw_requires: {
2419       auto RequiresToken = FormatTok;
2420       nextToken();
2421       parseRequiresExpression(RequiresToken);
2422       break;
2423     }
2424     case tok::ampamp:
2425       if (AmpAmpTokenType != TT_Unknown)
2426         FormatTok->setFinalizedType(AmpAmpTokenType);
2427       LLVM_FALLTHROUGH;
2428     default:
2429       nextToken();
2430       break;
2431     }
2432   } while (!eof());
2433 }
2434 
2435 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2436   if (!LambdaIntroducer) {
2437     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2438     if (tryToParseLambda())
2439       return;
2440   }
2441   do {
2442     switch (FormatTok->Tok.getKind()) {
2443     case tok::l_paren:
2444       parseParens();
2445       break;
2446     case tok::r_square:
2447       nextToken();
2448       return;
2449     case tok::r_brace:
2450       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2451       return;
2452     case tok::l_square:
2453       parseSquare();
2454       break;
2455     case tok::l_brace: {
2456       if (!tryToParseBracedList())
2457         parseChildBlock();
2458       break;
2459     }
2460     case tok::at:
2461       nextToken();
2462       if (FormatTok->is(tok::l_brace)) {
2463         nextToken();
2464         parseBracedList();
2465       }
2466       break;
2467     default:
2468       nextToken();
2469       break;
2470     }
2471   } while (!eof());
2472 }
2473 
2474 void UnwrappedLineParser::keepAncestorBraces() {
2475   if (!Style.RemoveBracesLLVM)
2476     return;
2477 
2478   const int MaxNestingLevels = 2;
2479   const int Size = NestedTooDeep.size();
2480   if (Size >= MaxNestingLevels)
2481     NestedTooDeep[Size - MaxNestingLevels] = true;
2482   NestedTooDeep.push_back(false);
2483 }
2484 
2485 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2486   for (const auto &Token : llvm::reverse(Line.Tokens))
2487     if (Token.Tok->isNot(tok::comment))
2488       return Token.Tok;
2489 
2490   return nullptr;
2491 }
2492 
2493 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2494   FormatToken *Tok = nullptr;
2495 
2496   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2497       PreprocessorDirectives.empty()) {
2498     Tok = getLastNonComment(*Line);
2499     assert(Tok);
2500     if (Tok->BraceCount < 0) {
2501       assert(Tok->BraceCount == -1);
2502       Tok = nullptr;
2503     } else {
2504       Tok->BraceCount = -1;
2505     }
2506   }
2507 
2508   addUnwrappedLine();
2509   ++Line->Level;
2510   parseStructuralElement();
2511 
2512   if (Tok) {
2513     assert(!Line->InPPDirective);
2514     Tok = nullptr;
2515     for (const auto &L : llvm::reverse(*CurrentLines)) {
2516       if (!L.InPPDirective && getLastNonComment(L)) {
2517         Tok = L.Tokens.back().Tok;
2518         break;
2519       }
2520     }
2521     assert(Tok);
2522     ++Tok->BraceCount;
2523   }
2524 
2525   if (CheckEOF && FormatTok->is(tok::eof))
2526     addUnwrappedLine();
2527 
2528   --Line->Level;
2529 }
2530 
2531 static void markOptionalBraces(FormatToken *LeftBrace) {
2532   if (!LeftBrace)
2533     return;
2534 
2535   assert(LeftBrace->is(tok::l_brace));
2536 
2537   FormatToken *RightBrace = LeftBrace->MatchingParen;
2538   if (!RightBrace) {
2539     assert(!LeftBrace->Optional);
2540     return;
2541   }
2542 
2543   assert(RightBrace->is(tok::r_brace));
2544   assert(RightBrace->MatchingParen == LeftBrace);
2545   assert(LeftBrace->Optional == RightBrace->Optional);
2546 
2547   LeftBrace->Optional = true;
2548   RightBrace->Optional = true;
2549 }
2550 
2551 void UnwrappedLineParser::handleAttributes() {
2552   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2553   if (FormatTok->is(TT_AttributeMacro))
2554     nextToken();
2555   handleCppAttributes();
2556 }
2557 
2558 bool UnwrappedLineParser::handleCppAttributes() {
2559   // Handle [[likely]] / [[unlikely]] attributes.
2560   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2561     parseSquare();
2562     return true;
2563   }
2564   return false;
2565 }
2566 
2567 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2568                                                   bool KeepBraces) {
2569   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2570   nextToken();
2571   if (FormatTok->is(tok::exclaim))
2572     nextToken();
2573 
2574   bool KeepIfBraces = true;
2575   if (FormatTok->is(tok::kw_consteval)) {
2576     nextToken();
2577   } else {
2578     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2579     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2580       nextToken();
2581     if (FormatTok->is(tok::l_paren))
2582       parseParens();
2583   }
2584   handleAttributes();
2585 
2586   bool NeedsUnwrappedLine = false;
2587   keepAncestorBraces();
2588 
2589   FormatToken *IfLeftBrace = nullptr;
2590   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2591 
2592   if (Keywords.isBlockBegin(*FormatTok, Style)) {
2593     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2594     IfLeftBrace = FormatTok;
2595     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2596     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2597                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2598     if (Style.BraceWrapping.BeforeElse)
2599       addUnwrappedLine();
2600     else
2601       NeedsUnwrappedLine = true;
2602   } else {
2603     parseUnbracedBody();
2604   }
2605 
2606   if (Style.RemoveBracesLLVM) {
2607     assert(!NestedTooDeep.empty());
2608     KeepIfBraces = KeepIfBraces ||
2609                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2610                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2611                    IfBlockKind == IfStmtKind::IfElseIf;
2612   }
2613 
2614   bool KeepElseBraces = KeepIfBraces;
2615   FormatToken *ElseLeftBrace = nullptr;
2616   IfStmtKind Kind = IfStmtKind::IfOnly;
2617 
2618   if (FormatTok->is(tok::kw_else)) {
2619     if (Style.RemoveBracesLLVM) {
2620       NestedTooDeep.back() = false;
2621       Kind = IfStmtKind::IfElse;
2622     }
2623     nextToken();
2624     handleAttributes();
2625     if (Keywords.isBlockBegin(*FormatTok, Style)) {
2626       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2627       FormatTok->setFinalizedType(TT_ElseLBrace);
2628       ElseLeftBrace = FormatTok;
2629       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2630       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2631       FormatToken *IfLBrace =
2632           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2633                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2634       if (FormatTok->is(tok::kw_else)) {
2635         KeepElseBraces = KeepElseBraces ||
2636                          ElseBlockKind == IfStmtKind::IfOnly ||
2637                          ElseBlockKind == IfStmtKind::IfElseIf;
2638       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2639         KeepElseBraces = true;
2640         assert(ElseLeftBrace->MatchingParen);
2641         markOptionalBraces(ElseLeftBrace);
2642       }
2643       addUnwrappedLine();
2644     } else if (FormatTok->is(tok::kw_if)) {
2645       const FormatToken *Previous = Tokens->getPreviousToken();
2646       assert(Previous);
2647       const bool IsPrecededByComment = Previous->is(tok::comment);
2648       if (IsPrecededByComment) {
2649         addUnwrappedLine();
2650         ++Line->Level;
2651       }
2652       bool TooDeep = true;
2653       if (Style.RemoveBracesLLVM) {
2654         Kind = IfStmtKind::IfElseIf;
2655         TooDeep = NestedTooDeep.pop_back_val();
2656       }
2657       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2658       if (Style.RemoveBracesLLVM)
2659         NestedTooDeep.push_back(TooDeep);
2660       if (IsPrecededByComment)
2661         --Line->Level;
2662     } else {
2663       parseUnbracedBody(/*CheckEOF=*/true);
2664     }
2665   } else {
2666     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2667     if (NeedsUnwrappedLine)
2668       addUnwrappedLine();
2669   }
2670 
2671   if (!Style.RemoveBracesLLVM)
2672     return nullptr;
2673 
2674   assert(!NestedTooDeep.empty());
2675   KeepElseBraces = KeepElseBraces ||
2676                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2677                    NestedTooDeep.back();
2678 
2679   NestedTooDeep.pop_back();
2680 
2681   if (!KeepIfBraces && !KeepElseBraces) {
2682     markOptionalBraces(IfLeftBrace);
2683     markOptionalBraces(ElseLeftBrace);
2684   } else if (IfLeftBrace) {
2685     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2686     if (IfRightBrace) {
2687       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2688       assert(!IfLeftBrace->Optional);
2689       assert(!IfRightBrace->Optional);
2690       IfLeftBrace->MatchingParen = nullptr;
2691       IfRightBrace->MatchingParen = nullptr;
2692     }
2693   }
2694 
2695   if (IfKind)
2696     *IfKind = Kind;
2697 
2698   return IfLeftBrace;
2699 }
2700 
2701 void UnwrappedLineParser::parseTryCatch() {
2702   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2703   nextToken();
2704   bool NeedsUnwrappedLine = false;
2705   if (FormatTok->is(tok::colon)) {
2706     // We are in a function try block, what comes is an initializer list.
2707     nextToken();
2708 
2709     // In case identifiers were removed by clang-tidy, what might follow is
2710     // multiple commas in sequence - before the first identifier.
2711     while (FormatTok->is(tok::comma))
2712       nextToken();
2713 
2714     while (FormatTok->is(tok::identifier)) {
2715       nextToken();
2716       if (FormatTok->is(tok::l_paren))
2717         parseParens();
2718       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2719           FormatTok->is(tok::l_brace)) {
2720         do {
2721           nextToken();
2722         } while (!FormatTok->is(tok::r_brace));
2723         nextToken();
2724       }
2725 
2726       // In case identifiers were removed by clang-tidy, what might follow is
2727       // multiple commas in sequence - after the first identifier.
2728       while (FormatTok->is(tok::comma))
2729         nextToken();
2730     }
2731   }
2732   // Parse try with resource.
2733   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2734     parseParens();
2735 
2736   keepAncestorBraces();
2737 
2738   if (FormatTok->is(tok::l_brace)) {
2739     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2740     parseBlock();
2741     if (Style.BraceWrapping.BeforeCatch)
2742       addUnwrappedLine();
2743     else
2744       NeedsUnwrappedLine = true;
2745   } else if (!FormatTok->is(tok::kw_catch)) {
2746     // The C++ standard requires a compound-statement after a try.
2747     // If there's none, we try to assume there's a structuralElement
2748     // and try to continue.
2749     addUnwrappedLine();
2750     ++Line->Level;
2751     parseStructuralElement();
2752     --Line->Level;
2753   }
2754   while (true) {
2755     if (FormatTok->is(tok::at))
2756       nextToken();
2757     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2758                              tok::kw___finally) ||
2759           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2760            FormatTok->is(Keywords.kw_finally)) ||
2761           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2762            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2763       break;
2764     }
2765     nextToken();
2766     while (FormatTok->isNot(tok::l_brace)) {
2767       if (FormatTok->is(tok::l_paren)) {
2768         parseParens();
2769         continue;
2770       }
2771       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2772         if (Style.RemoveBracesLLVM)
2773           NestedTooDeep.pop_back();
2774         return;
2775       }
2776       nextToken();
2777     }
2778     NeedsUnwrappedLine = false;
2779     Line->MustBeDeclaration = false;
2780     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2781     parseBlock();
2782     if (Style.BraceWrapping.BeforeCatch)
2783       addUnwrappedLine();
2784     else
2785       NeedsUnwrappedLine = true;
2786   }
2787 
2788   if (Style.RemoveBracesLLVM)
2789     NestedTooDeep.pop_back();
2790 
2791   if (NeedsUnwrappedLine)
2792     addUnwrappedLine();
2793 }
2794 
2795 void UnwrappedLineParser::parseNamespace() {
2796   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2797          "'namespace' expected");
2798 
2799   const FormatToken &InitialToken = *FormatTok;
2800   nextToken();
2801   if (InitialToken.is(TT_NamespaceMacro)) {
2802     parseParens();
2803   } else {
2804     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2805                               tok::l_square, tok::period, tok::l_paren) ||
2806            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2807       if (FormatTok->is(tok::l_square))
2808         parseSquare();
2809       else if (FormatTok->is(tok::l_paren))
2810         parseParens();
2811       else
2812         nextToken();
2813     }
2814   }
2815   if (FormatTok->is(tok::l_brace)) {
2816     if (ShouldBreakBeforeBrace(Style, InitialToken))
2817       addUnwrappedLine();
2818 
2819     unsigned AddLevels =
2820         Style.NamespaceIndentation == FormatStyle::NI_All ||
2821                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2822                  DeclarationScopeStack.size() > 1)
2823             ? 1u
2824             : 0u;
2825     bool ManageWhitesmithsBraces =
2826         AddLevels == 0u &&
2827         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2828 
2829     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2830     // the whole block.
2831     if (ManageWhitesmithsBraces)
2832       ++Line->Level;
2833 
2834     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2835                /*KeepBraces=*/true, /*IfKind=*/nullptr,
2836                ManageWhitesmithsBraces);
2837 
2838     // Munch the semicolon after a namespace. This is more common than one would
2839     // think. Putting the semicolon into its own line is very ugly.
2840     if (FormatTok->is(tok::semi))
2841       nextToken();
2842 
2843     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2844 
2845     if (ManageWhitesmithsBraces)
2846       --Line->Level;
2847   }
2848   // FIXME: Add error handling.
2849 }
2850 
2851 void UnwrappedLineParser::parseNew() {
2852   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2853   nextToken();
2854 
2855   if (Style.isCSharp()) {
2856     do {
2857       if (FormatTok->is(tok::l_brace))
2858         parseBracedList();
2859 
2860       if (FormatTok->isOneOf(tok::semi, tok::comma))
2861         return;
2862 
2863       nextToken();
2864     } while (!eof());
2865   }
2866 
2867   if (Style.Language != FormatStyle::LK_Java)
2868     return;
2869 
2870   // In Java, we can parse everything up to the parens, which aren't optional.
2871   do {
2872     // There should not be a ;, { or } before the new's open paren.
2873     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2874       return;
2875 
2876     // Consume the parens.
2877     if (FormatTok->is(tok::l_paren)) {
2878       parseParens();
2879 
2880       // If there is a class body of an anonymous class, consume that as child.
2881       if (FormatTok->is(tok::l_brace))
2882         parseChildBlock();
2883       return;
2884     }
2885     nextToken();
2886   } while (!eof());
2887 }
2888 
2889 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
2890   keepAncestorBraces();
2891 
2892   if (Keywords.isBlockBegin(*FormatTok, Style)) {
2893     if (!KeepBraces)
2894       FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2895     FormatToken *LeftBrace = FormatTok;
2896     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2897     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2898                /*MunchSemi=*/true, KeepBraces);
2899     if (!KeepBraces) {
2900       assert(!NestedTooDeep.empty());
2901       if (!NestedTooDeep.back())
2902         markOptionalBraces(LeftBrace);
2903     }
2904     if (WrapRightBrace)
2905       addUnwrappedLine();
2906   } else {
2907     parseUnbracedBody();
2908   }
2909 
2910   if (!KeepBraces)
2911     NestedTooDeep.pop_back();
2912 }
2913 
2914 void UnwrappedLineParser::parseForOrWhileLoop() {
2915   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2916          "'for', 'while' or foreach macro expected");
2917   const bool KeepBraces = !Style.RemoveBracesLLVM ||
2918                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
2919 
2920   nextToken();
2921   // JS' for await ( ...
2922   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2923     nextToken();
2924   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2925     nextToken();
2926   if (FormatTok->is(tok::l_paren))
2927     parseParens();
2928 
2929   handleAttributes();
2930   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
2931 }
2932 
2933 void UnwrappedLineParser::parseDoWhile() {
2934   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2935   nextToken();
2936 
2937   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
2938 
2939   // FIXME: Add error handling.
2940   if (!FormatTok->is(tok::kw_while)) {
2941     addUnwrappedLine();
2942     return;
2943   }
2944 
2945   // If in Whitesmiths mode, the line with the while() needs to be indented
2946   // to the same level as the block.
2947   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2948     ++Line->Level;
2949 
2950   nextToken();
2951   parseStructuralElement();
2952 }
2953 
2954 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2955   nextToken();
2956   unsigned OldLineLevel = Line->Level;
2957   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2958     --Line->Level;
2959   if (LeftAlignLabel)
2960     Line->Level = 0;
2961 
2962   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2963       FormatTok->is(tok::l_brace)) {
2964 
2965     CompoundStatementIndenter Indenter(this, Line->Level,
2966                                        Style.BraceWrapping.AfterCaseLabel,
2967                                        Style.BraceWrapping.IndentBraces);
2968     parseBlock();
2969     if (FormatTok->is(tok::kw_break)) {
2970       if (Style.BraceWrapping.AfterControlStatement ==
2971           FormatStyle::BWACS_Always) {
2972         addUnwrappedLine();
2973         if (!Style.IndentCaseBlocks &&
2974             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2975           ++Line->Level;
2976         }
2977       }
2978       parseStructuralElement();
2979     }
2980     addUnwrappedLine();
2981   } else {
2982     if (FormatTok->is(tok::semi))
2983       nextToken();
2984     addUnwrappedLine();
2985   }
2986   Line->Level = OldLineLevel;
2987   if (FormatTok->isNot(tok::l_brace)) {
2988     parseStructuralElement();
2989     addUnwrappedLine();
2990   }
2991 }
2992 
2993 void UnwrappedLineParser::parseCaseLabel() {
2994   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2995 
2996   // FIXME: fix handling of complex expressions here.
2997   do {
2998     nextToken();
2999   } while (!eof() && !FormatTok->is(tok::colon));
3000   parseLabel();
3001 }
3002 
3003 void UnwrappedLineParser::parseSwitch() {
3004   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3005   nextToken();
3006   if (FormatTok->is(tok::l_paren))
3007     parseParens();
3008 
3009   keepAncestorBraces();
3010 
3011   if (FormatTok->is(tok::l_brace)) {
3012     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3013     parseBlock();
3014     addUnwrappedLine();
3015   } else {
3016     addUnwrappedLine();
3017     ++Line->Level;
3018     parseStructuralElement();
3019     --Line->Level;
3020   }
3021 
3022   if (Style.RemoveBracesLLVM)
3023     NestedTooDeep.pop_back();
3024 }
3025 
3026 // Operators that can follow a C variable.
3027 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3028   switch (kind) {
3029   case tok::ampamp:
3030   case tok::ampequal:
3031   case tok::arrow:
3032   case tok::caret:
3033   case tok::caretequal:
3034   case tok::comma:
3035   case tok::ellipsis:
3036   case tok::equal:
3037   case tok::equalequal:
3038   case tok::exclaim:
3039   case tok::exclaimequal:
3040   case tok::greater:
3041   case tok::greaterequal:
3042   case tok::greatergreater:
3043   case tok::greatergreaterequal:
3044   case tok::l_paren:
3045   case tok::l_square:
3046   case tok::less:
3047   case tok::lessequal:
3048   case tok::lessless:
3049   case tok::lesslessequal:
3050   case tok::minus:
3051   case tok::minusequal:
3052   case tok::minusminus:
3053   case tok::percent:
3054   case tok::percentequal:
3055   case tok::period:
3056   case tok::pipe:
3057   case tok::pipeequal:
3058   case tok::pipepipe:
3059   case tok::plus:
3060   case tok::plusequal:
3061   case tok::plusplus:
3062   case tok::question:
3063   case tok::r_brace:
3064   case tok::r_paren:
3065   case tok::r_square:
3066   case tok::semi:
3067   case tok::slash:
3068   case tok::slashequal:
3069   case tok::star:
3070   case tok::starequal:
3071     return true;
3072   default:
3073     return false;
3074   }
3075 }
3076 
3077 void UnwrappedLineParser::parseAccessSpecifier() {
3078   FormatToken *AccessSpecifierCandidate = FormatTok;
3079   nextToken();
3080   // Understand Qt's slots.
3081   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3082     nextToken();
3083   // Otherwise, we don't know what it is, and we'd better keep the next token.
3084   if (FormatTok->is(tok::colon)) {
3085     nextToken();
3086     addUnwrappedLine();
3087   } else if (!FormatTok->is(tok::coloncolon) &&
3088              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3089     // Not a variable name nor namespace name.
3090     addUnwrappedLine();
3091   } else if (AccessSpecifierCandidate) {
3092     // Consider the access specifier to be a C identifier.
3093     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3094   }
3095 }
3096 
3097 /// \brief Parses a concept definition.
3098 /// \pre The current token has to be the concept keyword.
3099 ///
3100 /// Returns if either the concept has been completely parsed, or if it detects
3101 /// that the concept definition is incorrect.
3102 void UnwrappedLineParser::parseConcept() {
3103   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
3104   nextToken();
3105   if (!FormatTok->is(tok::identifier))
3106     return;
3107   nextToken();
3108   if (!FormatTok->is(tok::equal))
3109     return;
3110   nextToken();
3111   parseConstraintExpression();
3112   if (FormatTok->is(tok::semi))
3113     nextToken();
3114   addUnwrappedLine();
3115 }
3116 
3117 /// \brief Parses a requires, decides if it is a clause or an expression.
3118 /// \pre The current token has to be the requires keyword.
3119 /// \returns true if it parsed a clause.
3120 bool clang::format::UnwrappedLineParser::parseRequires() {
3121   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3122   auto RequiresToken = FormatTok;
3123 
3124   // We try to guess if it is a requires clause, or a requires expression. For
3125   // that we first consume the keyword and check the next token.
3126   nextToken();
3127 
3128   switch (FormatTok->Tok.getKind()) {
3129   case tok::l_brace:
3130     // This can only be an expression, never a clause.
3131     parseRequiresExpression(RequiresToken);
3132     return false;
3133   case tok::l_paren:
3134     // Clauses and expression can start with a paren, it's unclear what we have.
3135     break;
3136   default:
3137     // All other tokens can only be a clause.
3138     parseRequiresClause(RequiresToken);
3139     return true;
3140   }
3141 
3142   // Looking forward we would have to decide if there are function declaration
3143   // like arguments to the requires expression:
3144   // requires (T t) {
3145   // Or there is a constraint expression for the requires clause:
3146   // requires (C<T> && ...
3147 
3148   // But first let's look behind.
3149   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3150 
3151   if (!PreviousNonComment ||
3152       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3153     // If there is no token, or an expression left brace, we are a requires
3154     // clause within a requires expression.
3155     parseRequiresClause(RequiresToken);
3156     return true;
3157   }
3158 
3159   switch (PreviousNonComment->Tok.getKind()) {
3160   case tok::greater:
3161   case tok::r_paren:
3162   case tok::kw_noexcept:
3163   case tok::kw_const:
3164     // This is a requires clause.
3165     parseRequiresClause(RequiresToken);
3166     return true;
3167   case tok::amp:
3168   case tok::ampamp: {
3169     // This can be either:
3170     // if (... && requires (T t) ...)
3171     // Or
3172     // void member(...) && requires (C<T> ...
3173     // We check the one token before that for a const:
3174     // void member(...) const && requires (C<T> ...
3175     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3176     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3177       parseRequiresClause(RequiresToken);
3178       return true;
3179     }
3180     break;
3181   }
3182   default:
3183     if (PreviousNonComment->isTypeOrIdentifier()) {
3184       // This is a requires clause.
3185       parseRequiresClause(RequiresToken);
3186       return true;
3187     }
3188     // It's an expression.
3189     parseRequiresExpression(RequiresToken);
3190     return false;
3191   }
3192 
3193   // Now we look forward and try to check if the paren content is a parameter
3194   // list. The parameters can be cv-qualified and contain references or
3195   // pointers.
3196   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3197   // of stuff: typename, const, *, &, &&, ::, identifiers.
3198 
3199   int NextTokenOffset = 1;
3200   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3201   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3202     ++NextTokenOffset;
3203     NextToken = Tokens->peekNextToken(NextTokenOffset);
3204   };
3205 
3206   bool FoundType = false;
3207   bool LastWasColonColon = false;
3208   int OpenAngles = 0;
3209 
3210   for (; NextTokenOffset < 50; PeekNext()) {
3211     switch (NextToken->Tok.getKind()) {
3212     case tok::kw_volatile:
3213     case tok::kw_const:
3214     case tok::comma:
3215       parseRequiresExpression(RequiresToken);
3216       return false;
3217     case tok::r_paren:
3218     case tok::pipepipe:
3219       parseRequiresClause(RequiresToken);
3220       return true;
3221     case tok::eof:
3222       // Break out of the loop.
3223       NextTokenOffset = 50;
3224       break;
3225     case tok::coloncolon:
3226       LastWasColonColon = true;
3227       break;
3228     case tok::identifier:
3229       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3230         parseRequiresExpression(RequiresToken);
3231         return false;
3232       }
3233       FoundType = true;
3234       LastWasColonColon = false;
3235       break;
3236     case tok::less:
3237       ++OpenAngles;
3238       break;
3239     case tok::greater:
3240       --OpenAngles;
3241       break;
3242     default:
3243       if (NextToken->isSimpleTypeSpecifier()) {
3244         parseRequiresExpression(RequiresToken);
3245         return false;
3246       }
3247       break;
3248     }
3249   }
3250 
3251   // This seems to be a complicated expression, just assume it's a clause.
3252   parseRequiresClause(RequiresToken);
3253   return true;
3254 }
3255 
3256 /// \brief Parses a requires clause.
3257 /// \param RequiresToken The requires keyword token, which starts this clause.
3258 /// \pre We need to be on the next token after the requires keyword.
3259 /// \sa parseRequiresExpression
3260 ///
3261 /// Returns if it either has finished parsing the clause, or it detects, that
3262 /// the clause is incorrect.
3263 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3264   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3265   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3266 
3267   // If there is no previous token, we are within a requires expression,
3268   // otherwise we will always have the template or function declaration in front
3269   // of it.
3270   bool InRequiresExpression =
3271       !RequiresToken->Previous ||
3272       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3273 
3274   RequiresToken->setFinalizedType(InRequiresExpression
3275                                       ? TT_RequiresClauseInARequiresExpression
3276                                       : TT_RequiresClause);
3277 
3278   parseConstraintExpression();
3279 
3280   if (!InRequiresExpression)
3281     FormatTok->Previous->ClosesRequiresClause = true;
3282 }
3283 
3284 /// \brief Parses a requires expression.
3285 /// \param RequiresToken The requires keyword token, which starts this clause.
3286 /// \pre We need to be on the next token after the requires keyword.
3287 /// \sa parseRequiresClause
3288 ///
3289 /// Returns if it either has finished parsing the expression, or it detects,
3290 /// that the expression is incorrect.
3291 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3292   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3293   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3294 
3295   RequiresToken->setFinalizedType(TT_RequiresExpression);
3296 
3297   if (FormatTok->is(tok::l_paren)) {
3298     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3299     parseParens();
3300   }
3301 
3302   if (FormatTok->is(tok::l_brace)) {
3303     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3304     parseChildBlock(/*CanContainBracedList=*/false,
3305                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3306   }
3307 }
3308 
3309 /// \brief Parses a constraint expression.
3310 ///
3311 /// This is either the definition of a concept, or the body of a requires
3312 /// clause. It returns, when the parsing is complete, or the expression is
3313 /// incorrect.
3314 void UnwrappedLineParser::parseConstraintExpression() {
3315   // The special handling for lambdas is needed since tryToParseLambda() eats a
3316   // token and if a requires expression is the last part of a requires clause
3317   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3318   // not set on the correct token. Thus we need to be aware if we even expect a
3319   // lambda to be possible.
3320   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3321   bool LambdaNextTimeAllowed = true;
3322   do {
3323     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3324 
3325     switch (FormatTok->Tok.getKind()) {
3326     case tok::kw_requires: {
3327       auto RequiresToken = FormatTok;
3328       nextToken();
3329       parseRequiresExpression(RequiresToken);
3330       break;
3331     }
3332 
3333     case tok::l_paren:
3334       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3335       break;
3336 
3337     case tok::l_square:
3338       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3339         return;
3340       break;
3341 
3342     case tok::kw_const:
3343     case tok::semi:
3344     case tok::kw_class:
3345     case tok::kw_struct:
3346     case tok::kw_union:
3347       return;
3348 
3349     case tok::l_brace:
3350       // Potential function body.
3351       return;
3352 
3353     case tok::ampamp:
3354     case tok::pipepipe:
3355       FormatTok->setFinalizedType(TT_BinaryOperator);
3356       nextToken();
3357       LambdaNextTimeAllowed = true;
3358       break;
3359 
3360     case tok::comma:
3361     case tok::comment:
3362       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3363       nextToken();
3364       break;
3365 
3366     case tok::kw_sizeof:
3367     case tok::greater:
3368     case tok::greaterequal:
3369     case tok::greatergreater:
3370     case tok::less:
3371     case tok::lessequal:
3372     case tok::lessless:
3373     case tok::equalequal:
3374     case tok::exclaim:
3375     case tok::exclaimequal:
3376     case tok::plus:
3377     case tok::minus:
3378     case tok::star:
3379     case tok::slash:
3380     case tok::kw_decltype:
3381       LambdaNextTimeAllowed = true;
3382       // Just eat them.
3383       nextToken();
3384       break;
3385 
3386     case tok::numeric_constant:
3387     case tok::coloncolon:
3388     case tok::kw_true:
3389     case tok::kw_false:
3390       // Just eat them.
3391       nextToken();
3392       break;
3393 
3394     case tok::kw_static_cast:
3395     case tok::kw_const_cast:
3396     case tok::kw_reinterpret_cast:
3397     case tok::kw_dynamic_cast:
3398       nextToken();
3399       if (!FormatTok->is(tok::less))
3400         return;
3401 
3402       nextToken();
3403       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3404                       /*ClosingBraceKind=*/tok::greater);
3405       break;
3406 
3407     case tok::kw_bool:
3408       // bool is only allowed if it is directly followed by a paren for a cast:
3409       // concept C = bool(...);
3410       // and bool is the only type, all other types as cast must be inside a
3411       // cast to bool an thus are handled by the other cases.
3412       nextToken();
3413       if (FormatTok->isNot(tok::l_paren))
3414         return;
3415       parseParens();
3416       break;
3417 
3418     default:
3419       if (!FormatTok->Tok.getIdentifierInfo()) {
3420         // Identifiers are part of the default case, we check for more then
3421         // tok::identifier to handle builtin type traits.
3422         return;
3423       }
3424 
3425       // We need to differentiate identifiers for a template deduction guide,
3426       // variables, or function return types (the constraint expression has
3427       // ended before that), and basically all other cases. But it's easier to
3428       // check the other way around.
3429       assert(FormatTok->Previous);
3430       switch (FormatTok->Previous->Tok.getKind()) {
3431       case tok::coloncolon:  // Nested identifier.
3432       case tok::ampamp:      // Start of a function or variable for the
3433       case tok::pipepipe:    // constraint expression.
3434       case tok::kw_requires: // Initial identifier of a requires clause.
3435       case tok::equal:       // Initial identifier of a concept declaration.
3436         break;
3437       default:
3438         return;
3439       }
3440 
3441       // Read identifier with optional template declaration.
3442       nextToken();
3443       if (FormatTok->is(tok::less)) {
3444         nextToken();
3445         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3446                         /*ClosingBraceKind=*/tok::greater);
3447       }
3448       break;
3449     }
3450   } while (!eof());
3451 }
3452 
3453 bool UnwrappedLineParser::parseEnum() {
3454   const FormatToken &InitialToken = *FormatTok;
3455 
3456   // Won't be 'enum' for NS_ENUMs.
3457   if (FormatTok->is(tok::kw_enum))
3458     nextToken();
3459 
3460   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3461   // declarations. An "enum" keyword followed by a colon would be a syntax
3462   // error and thus assume it is just an identifier.
3463   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3464     return false;
3465 
3466   // In protobuf, "enum" can be used as a field name.
3467   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3468     return false;
3469 
3470   // Eat up enum class ...
3471   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3472     nextToken();
3473 
3474   while (FormatTok->Tok.getIdentifierInfo() ||
3475          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3476                             tok::greater, tok::comma, tok::question,
3477                             tok::l_square, tok::r_square)) {
3478     nextToken();
3479     // We can have macros or attributes in between 'enum' and the enum name.
3480     if (FormatTok->is(tok::l_paren))
3481       parseParens();
3482     if (FormatTok->is(TT_AttributeSquare)) {
3483       parseSquare();
3484       // Consume the closing TT_AttributeSquare.
3485       if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3486         nextToken();
3487     }
3488     if (FormatTok->is(tok::identifier)) {
3489       nextToken();
3490       // If there are two identifiers in a row, this is likely an elaborate
3491       // return type. In Java, this can be "implements", etc.
3492       if (Style.isCpp() && FormatTok->is(tok::identifier))
3493         return false;
3494     }
3495   }
3496 
3497   // Just a declaration or something is wrong.
3498   if (FormatTok->isNot(tok::l_brace))
3499     return true;
3500   FormatTok->setFinalizedType(TT_EnumLBrace);
3501   FormatTok->setBlockKind(BK_Block);
3502 
3503   if (Style.Language == FormatStyle::LK_Java) {
3504     // Java enums are different.
3505     parseJavaEnumBody();
3506     return true;
3507   }
3508   if (Style.Language == FormatStyle::LK_Proto) {
3509     parseBlock(/*MustBeDeclaration=*/true);
3510     return true;
3511   }
3512 
3513   if (!Style.AllowShortEnumsOnASingleLine &&
3514       ShouldBreakBeforeBrace(Style, InitialToken)) {
3515     addUnwrappedLine();
3516   }
3517   // Parse enum body.
3518   nextToken();
3519   if (!Style.AllowShortEnumsOnASingleLine) {
3520     addUnwrappedLine();
3521     Line->Level += 1;
3522   }
3523   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3524                                    /*IsEnum=*/true);
3525   if (!Style.AllowShortEnumsOnASingleLine)
3526     Line->Level -= 1;
3527   if (HasError) {
3528     if (FormatTok->is(tok::semi))
3529       nextToken();
3530     addUnwrappedLine();
3531   }
3532   return true;
3533 
3534   // There is no addUnwrappedLine() here so that we fall through to parsing a
3535   // structural element afterwards. Thus, in "enum A {} n, m;",
3536   // "} n, m;" will end up in one unwrapped line.
3537 }
3538 
3539 bool UnwrappedLineParser::parseStructLike() {
3540   // parseRecord falls through and does not yet add an unwrapped line as a
3541   // record declaration or definition can start a structural element.
3542   parseRecord();
3543   // This does not apply to Java, JavaScript and C#.
3544   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3545       Style.isCSharp()) {
3546     if (FormatTok->is(tok::semi))
3547       nextToken();
3548     addUnwrappedLine();
3549     return true;
3550   }
3551   return false;
3552 }
3553 
3554 namespace {
3555 // A class used to set and restore the Token position when peeking
3556 // ahead in the token source.
3557 class ScopedTokenPosition {
3558   unsigned StoredPosition;
3559   FormatTokenSource *Tokens;
3560 
3561 public:
3562   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3563     assert(Tokens && "Tokens expected to not be null");
3564     StoredPosition = Tokens->getPosition();
3565   }
3566 
3567   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3568 };
3569 } // namespace
3570 
3571 // Look to see if we have [[ by looking ahead, if
3572 // its not then rewind to the original position.
3573 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3574   ScopedTokenPosition AutoPosition(Tokens);
3575   FormatToken *Tok = Tokens->getNextToken();
3576   // We already read the first [ check for the second.
3577   if (!Tok->is(tok::l_square))
3578     return false;
3579   // Double check that the attribute is just something
3580   // fairly simple.
3581   while (Tok->isNot(tok::eof)) {
3582     if (Tok->is(tok::r_square))
3583       break;
3584     Tok = Tokens->getNextToken();
3585   }
3586   if (Tok->is(tok::eof))
3587     return false;
3588   Tok = Tokens->getNextToken();
3589   if (!Tok->is(tok::r_square))
3590     return false;
3591   Tok = Tokens->getNextToken();
3592   if (Tok->is(tok::semi))
3593     return false;
3594   return true;
3595 }
3596 
3597 void UnwrappedLineParser::parseJavaEnumBody() {
3598   assert(FormatTok->is(tok::l_brace));
3599   const FormatToken *OpeningBrace = FormatTok;
3600 
3601   // Determine whether the enum is simple, i.e. does not have a semicolon or
3602   // constants with class bodies. Simple enums can be formatted like braced
3603   // lists, contracted to a single line, etc.
3604   unsigned StoredPosition = Tokens->getPosition();
3605   bool IsSimple = true;
3606   FormatToken *Tok = Tokens->getNextToken();
3607   while (!Tok->is(tok::eof)) {
3608     if (Tok->is(tok::r_brace))
3609       break;
3610     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3611       IsSimple = false;
3612       break;
3613     }
3614     // FIXME: This will also mark enums with braces in the arguments to enum
3615     // constants as "not simple". This is probably fine in practice, though.
3616     Tok = Tokens->getNextToken();
3617   }
3618   FormatTok = Tokens->setPosition(StoredPosition);
3619 
3620   if (IsSimple) {
3621     nextToken();
3622     parseBracedList();
3623     addUnwrappedLine();
3624     return;
3625   }
3626 
3627   // Parse the body of a more complex enum.
3628   // First add a line for everything up to the "{".
3629   nextToken();
3630   addUnwrappedLine();
3631   ++Line->Level;
3632 
3633   // Parse the enum constants.
3634   while (FormatTok->isNot(tok::eof)) {
3635     if (FormatTok->is(tok::l_brace)) {
3636       // Parse the constant's class body.
3637       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3638                  /*MunchSemi=*/false);
3639     } else if (FormatTok->is(tok::l_paren)) {
3640       parseParens();
3641     } else if (FormatTok->is(tok::comma)) {
3642       nextToken();
3643       addUnwrappedLine();
3644     } else if (FormatTok->is(tok::semi)) {
3645       nextToken();
3646       addUnwrappedLine();
3647       break;
3648     } else if (FormatTok->is(tok::r_brace)) {
3649       addUnwrappedLine();
3650       break;
3651     } else {
3652       nextToken();
3653     }
3654   }
3655 
3656   // Parse the class body after the enum's ";" if any.
3657   parseLevel(OpeningBrace);
3658   nextToken();
3659   --Line->Level;
3660   addUnwrappedLine();
3661 }
3662 
3663 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3664   const FormatToken &InitialToken = *FormatTok;
3665   nextToken();
3666 
3667   // The actual identifier can be a nested name specifier, and in macros
3668   // it is often token-pasted.
3669   // An [[attribute]] can be before the identifier.
3670   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3671                             tok::kw___attribute, tok::kw___declspec,
3672                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3673          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3674           FormatTok->isOneOf(tok::period, tok::comma))) {
3675     if (Style.isJavaScript() &&
3676         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3677       // JavaScript/TypeScript supports inline object types in
3678       // extends/implements positions:
3679       //     class Foo implements {bar: number} { }
3680       nextToken();
3681       if (FormatTok->is(tok::l_brace)) {
3682         tryToParseBracedList();
3683         continue;
3684       }
3685     }
3686     bool IsNonMacroIdentifier =
3687         FormatTok->is(tok::identifier) &&
3688         FormatTok->TokenText != FormatTok->TokenText.upper();
3689     nextToken();
3690     // We can have macros or attributes in between 'class' and the class name.
3691     if (!IsNonMacroIdentifier) {
3692       if (FormatTok->is(tok::l_paren)) {
3693         parseParens();
3694       } else if (FormatTok->is(TT_AttributeSquare)) {
3695         parseSquare();
3696         // Consume the closing TT_AttributeSquare.
3697         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3698           nextToken();
3699       }
3700     }
3701   }
3702 
3703   // Note that parsing away template declarations here leads to incorrectly
3704   // accepting function declarations as record declarations.
3705   // In general, we cannot solve this problem. Consider:
3706   // class A<int> B() {}
3707   // which can be a function definition or a class definition when B() is a
3708   // macro. If we find enough real-world cases where this is a problem, we
3709   // can parse for the 'template' keyword in the beginning of the statement,
3710   // and thus rule out the record production in case there is no template
3711   // (this would still leave us with an ambiguity between template function
3712   // and class declarations).
3713   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3714     do {
3715       if (FormatTok->is(tok::l_brace)) {
3716         calculateBraceTypes(/*ExpectClassBody=*/true);
3717         if (!tryToParseBracedList())
3718           break;
3719       }
3720       if (FormatTok->is(tok::l_square)) {
3721         FormatToken *Previous = FormatTok->Previous;
3722         if (!Previous ||
3723             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3724           // Don't try parsing a lambda if we had a closing parenthesis before,
3725           // it was probably a pointer to an array: int (*)[].
3726           if (!tryToParseLambda())
3727             break;
3728         } else {
3729           parseSquare();
3730           continue;
3731         }
3732       }
3733       if (FormatTok->is(tok::semi))
3734         return;
3735       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3736         addUnwrappedLine();
3737         nextToken();
3738         parseCSharpGenericTypeConstraint();
3739         break;
3740       }
3741       nextToken();
3742     } while (!eof());
3743   }
3744 
3745   auto GetBraceType = [](const FormatToken &RecordTok) {
3746     switch (RecordTok.Tok.getKind()) {
3747     case tok::kw_class:
3748       return TT_ClassLBrace;
3749     case tok::kw_struct:
3750       return TT_StructLBrace;
3751     case tok::kw_union:
3752       return TT_UnionLBrace;
3753     default:
3754       // Useful for e.g. interface.
3755       return TT_RecordLBrace;
3756     }
3757   };
3758   if (FormatTok->is(tok::l_brace)) {
3759     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3760     if (ParseAsExpr) {
3761       parseChildBlock();
3762     } else {
3763       if (ShouldBreakBeforeBrace(Style, InitialToken))
3764         addUnwrappedLine();
3765 
3766       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3767       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3768     }
3769   }
3770   // There is no addUnwrappedLine() here so that we fall through to parsing a
3771   // structural element afterwards. Thus, in "class A {} n, m;",
3772   // "} n, m;" will end up in one unwrapped line.
3773 }
3774 
3775 void UnwrappedLineParser::parseObjCMethod() {
3776   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3777          "'(' or identifier expected.");
3778   do {
3779     if (FormatTok->is(tok::semi)) {
3780       nextToken();
3781       addUnwrappedLine();
3782       return;
3783     } else if (FormatTok->is(tok::l_brace)) {
3784       if (Style.BraceWrapping.AfterFunction)
3785         addUnwrappedLine();
3786       parseBlock();
3787       addUnwrappedLine();
3788       return;
3789     } else {
3790       nextToken();
3791     }
3792   } while (!eof());
3793 }
3794 
3795 void UnwrappedLineParser::parseObjCProtocolList() {
3796   assert(FormatTok->is(tok::less) && "'<' expected.");
3797   do {
3798     nextToken();
3799     // Early exit in case someone forgot a close angle.
3800     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3801         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3802       return;
3803     }
3804   } while (!eof() && FormatTok->isNot(tok::greater));
3805   nextToken(); // Skip '>'.
3806 }
3807 
3808 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3809   do {
3810     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3811       nextToken();
3812       addUnwrappedLine();
3813       break;
3814     }
3815     if (FormatTok->is(tok::l_brace)) {
3816       parseBlock();
3817       // In ObjC interfaces, nothing should be following the "}".
3818       addUnwrappedLine();
3819     } else if (FormatTok->is(tok::r_brace)) {
3820       // Ignore stray "}". parseStructuralElement doesn't consume them.
3821       nextToken();
3822       addUnwrappedLine();
3823     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3824       nextToken();
3825       parseObjCMethod();
3826     } else {
3827       parseStructuralElement();
3828     }
3829   } while (!eof());
3830 }
3831 
3832 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3833   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3834          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3835   nextToken();
3836   nextToken(); // interface name
3837 
3838   // @interface can be followed by a lightweight generic
3839   // specialization list, then either a base class or a category.
3840   if (FormatTok->is(tok::less))
3841     parseObjCLightweightGenerics();
3842   if (FormatTok->is(tok::colon)) {
3843     nextToken();
3844     nextToken(); // base class name
3845     // The base class can also have lightweight generics applied to it.
3846     if (FormatTok->is(tok::less))
3847       parseObjCLightweightGenerics();
3848   } else if (FormatTok->is(tok::l_paren)) {
3849     // Skip category, if present.
3850     parseParens();
3851   }
3852 
3853   if (FormatTok->is(tok::less))
3854     parseObjCProtocolList();
3855 
3856   if (FormatTok->is(tok::l_brace)) {
3857     if (Style.BraceWrapping.AfterObjCDeclaration)
3858       addUnwrappedLine();
3859     parseBlock(/*MustBeDeclaration=*/true);
3860   }
3861 
3862   // With instance variables, this puts '}' on its own line.  Without instance
3863   // variables, this ends the @interface line.
3864   addUnwrappedLine();
3865 
3866   parseObjCUntilAtEnd();
3867 }
3868 
3869 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3870   assert(FormatTok->is(tok::less));
3871   // Unlike protocol lists, generic parameterizations support
3872   // nested angles:
3873   //
3874   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3875   //     NSObject <NSCopying, NSSecureCoding>
3876   //
3877   // so we need to count how many open angles we have left.
3878   unsigned NumOpenAngles = 1;
3879   do {
3880     nextToken();
3881     // Early exit in case someone forgot a close angle.
3882     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3883         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3884       break;
3885     }
3886     if (FormatTok->is(tok::less)) {
3887       ++NumOpenAngles;
3888     } else if (FormatTok->is(tok::greater)) {
3889       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3890       --NumOpenAngles;
3891     }
3892   } while (!eof() && NumOpenAngles != 0);
3893   nextToken(); // Skip '>'.
3894 }
3895 
3896 // Returns true for the declaration/definition form of @protocol,
3897 // false for the expression form.
3898 bool UnwrappedLineParser::parseObjCProtocol() {
3899   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3900   nextToken();
3901 
3902   if (FormatTok->is(tok::l_paren)) {
3903     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3904     return false;
3905   }
3906 
3907   // The definition/declaration form,
3908   // @protocol Foo
3909   // - (int)someMethod;
3910   // @end
3911 
3912   nextToken(); // protocol name
3913 
3914   if (FormatTok->is(tok::less))
3915     parseObjCProtocolList();
3916 
3917   // Check for protocol declaration.
3918   if (FormatTok->is(tok::semi)) {
3919     nextToken();
3920     addUnwrappedLine();
3921     return true;
3922   }
3923 
3924   addUnwrappedLine();
3925   parseObjCUntilAtEnd();
3926   return true;
3927 }
3928 
3929 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3930   bool IsImport = FormatTok->is(Keywords.kw_import);
3931   assert(IsImport || FormatTok->is(tok::kw_export));
3932   nextToken();
3933 
3934   // Consume the "default" in "export default class/function".
3935   if (FormatTok->is(tok::kw_default))
3936     nextToken();
3937 
3938   // Consume "async function", "function" and "default function", so that these
3939   // get parsed as free-standing JS functions, i.e. do not require a trailing
3940   // semicolon.
3941   if (FormatTok->is(Keywords.kw_async))
3942     nextToken();
3943   if (FormatTok->is(Keywords.kw_function)) {
3944     nextToken();
3945     return;
3946   }
3947 
3948   // For imports, `export *`, `export {...}`, consume the rest of the line up
3949   // to the terminating `;`. For everything else, just return and continue
3950   // parsing the structural element, i.e. the declaration or expression for
3951   // `export default`.
3952   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3953       !FormatTok->isStringLiteral()) {
3954     return;
3955   }
3956 
3957   while (!eof()) {
3958     if (FormatTok->is(tok::semi))
3959       return;
3960     if (Line->Tokens.empty()) {
3961       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3962       // import statement should terminate.
3963       return;
3964     }
3965     if (FormatTok->is(tok::l_brace)) {
3966       FormatTok->setBlockKind(BK_Block);
3967       nextToken();
3968       parseBracedList();
3969     } else {
3970       nextToken();
3971     }
3972   }
3973 }
3974 
3975 void UnwrappedLineParser::parseStatementMacro() {
3976   nextToken();
3977   if (FormatTok->is(tok::l_paren))
3978     parseParens();
3979   if (FormatTok->is(tok::semi))
3980     nextToken();
3981   addUnwrappedLine();
3982 }
3983 
3984 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3985                                                  StringRef Prefix = "") {
3986   llvm::dbgs() << Prefix << "Line(" << Line.Level
3987                << ", FSC=" << Line.FirstStartColumn << ")"
3988                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3989   for (const auto &Node : Line.Tokens) {
3990     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3991                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3992                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3993   }
3994   for (const auto &Node : Line.Tokens)
3995     for (const auto &ChildNode : Node.Children)
3996       printDebugInfo(ChildNode, "\nChild: ");
3997 
3998   llvm::dbgs() << "\n";
3999 }
4000 
4001 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4002   if (Line->Tokens.empty())
4003     return;
4004   LLVM_DEBUG({
4005     if (CurrentLines == &Lines)
4006       printDebugInfo(*Line);
4007   });
4008 
4009   // If this line closes a block when in Whitesmiths mode, remember that
4010   // information so that the level can be decreased after the line is added.
4011   // This has to happen after the addition of the line since the line itself
4012   // needs to be indented.
4013   bool ClosesWhitesmithsBlock =
4014       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4015       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4016 
4017   CurrentLines->push_back(std::move(*Line));
4018   Line->Tokens.clear();
4019   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4020   Line->FirstStartColumn = 0;
4021 
4022   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4023     --Line->Level;
4024   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
4025     CurrentLines->append(
4026         std::make_move_iterator(PreprocessorDirectives.begin()),
4027         std::make_move_iterator(PreprocessorDirectives.end()));
4028     PreprocessorDirectives.clear();
4029   }
4030   // Disconnect the current token from the last token on the previous line.
4031   FormatTok->Previous = nullptr;
4032 }
4033 
4034 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4035 
4036 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4037   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4038          FormatTok.NewlinesBefore > 0;
4039 }
4040 
4041 // Checks if \p FormatTok is a line comment that continues the line comment
4042 // section on \p Line.
4043 static bool
4044 continuesLineCommentSection(const FormatToken &FormatTok,
4045                             const UnwrappedLine &Line,
4046                             const llvm::Regex &CommentPragmasRegex) {
4047   if (Line.Tokens.empty())
4048     return false;
4049 
4050   StringRef IndentContent = FormatTok.TokenText;
4051   if (FormatTok.TokenText.startswith("//") ||
4052       FormatTok.TokenText.startswith("/*")) {
4053     IndentContent = FormatTok.TokenText.substr(2);
4054   }
4055   if (CommentPragmasRegex.match(IndentContent))
4056     return false;
4057 
4058   // If Line starts with a line comment, then FormatTok continues the comment
4059   // section if its original column is greater or equal to the original start
4060   // column of the line.
4061   //
4062   // Define the min column token of a line as follows: if a line ends in '{' or
4063   // contains a '{' followed by a line comment, then the min column token is
4064   // that '{'. Otherwise, the min column token of the line is the first token of
4065   // the line.
4066   //
4067   // If Line starts with a token other than a line comment, then FormatTok
4068   // continues the comment section if its original column is greater than the
4069   // original start column of the min column token of the line.
4070   //
4071   // For example, the second line comment continues the first in these cases:
4072   //
4073   // // first line
4074   // // second line
4075   //
4076   // and:
4077   //
4078   // // first line
4079   //  // second line
4080   //
4081   // and:
4082   //
4083   // int i; // first line
4084   //  // second line
4085   //
4086   // and:
4087   //
4088   // do { // first line
4089   //      // second line
4090   //   int i;
4091   // } while (true);
4092   //
4093   // and:
4094   //
4095   // enum {
4096   //   a, // first line
4097   //    // second line
4098   //   b
4099   // };
4100   //
4101   // The second line comment doesn't continue the first in these cases:
4102   //
4103   //   // first line
4104   //  // second line
4105   //
4106   // and:
4107   //
4108   // int i; // first line
4109   // // second line
4110   //
4111   // and:
4112   //
4113   // do { // first line
4114   //   // second line
4115   //   int i;
4116   // } while (true);
4117   //
4118   // and:
4119   //
4120   // enum {
4121   //   a, // first line
4122   //   // second line
4123   // };
4124   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4125 
4126   // Scan for '{//'. If found, use the column of '{' as a min column for line
4127   // comment section continuation.
4128   const FormatToken *PreviousToken = nullptr;
4129   for (const UnwrappedLineNode &Node : Line.Tokens) {
4130     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4131         isLineComment(*Node.Tok)) {
4132       MinColumnToken = PreviousToken;
4133       break;
4134     }
4135     PreviousToken = Node.Tok;
4136 
4137     // Grab the last newline preceding a token in this unwrapped line.
4138     if (Node.Tok->NewlinesBefore > 0)
4139       MinColumnToken = Node.Tok;
4140   }
4141   if (PreviousToken && PreviousToken->is(tok::l_brace))
4142     MinColumnToken = PreviousToken;
4143 
4144   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4145                               MinColumnToken);
4146 }
4147 
4148 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4149   bool JustComments = Line->Tokens.empty();
4150   for (FormatToken *Tok : CommentsBeforeNextToken) {
4151     // Line comments that belong to the same line comment section are put on the
4152     // same line since later we might want to reflow content between them.
4153     // Additional fine-grained breaking of line comment sections is controlled
4154     // by the class BreakableLineCommentSection in case it is desirable to keep
4155     // several line comment sections in the same unwrapped line.
4156     //
4157     // FIXME: Consider putting separate line comment sections as children to the
4158     // unwrapped line instead.
4159     Tok->ContinuesLineCommentSection =
4160         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4161     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4162       addUnwrappedLine();
4163     pushToken(Tok);
4164   }
4165   if (NewlineBeforeNext && JustComments)
4166     addUnwrappedLine();
4167   CommentsBeforeNextToken.clear();
4168 }
4169 
4170 void UnwrappedLineParser::nextToken(int LevelDifference) {
4171   if (eof())
4172     return;
4173   flushComments(isOnNewLine(*FormatTok));
4174   pushToken(FormatTok);
4175   FormatToken *Previous = FormatTok;
4176   if (!Style.isJavaScript())
4177     readToken(LevelDifference);
4178   else
4179     readTokenWithJavaScriptASI();
4180   FormatTok->Previous = Previous;
4181   if (Style.isVerilog()) {
4182     // Blocks in Verilog can have `begin` and `end` instead of braces.  For
4183     // keywords like `begin`, we can't treat them the same as left braces
4184     // because some contexts require one of them.  For example structs use
4185     // braces and if blocks use keywords, and a left brace can occur in an if
4186     // statement, but it is not a block.  For keywords like `end`, we simply
4187     // treat them the same as right braces.
4188     if (Keywords.isVerilogEnd(*FormatTok))
4189       FormatTok->Tok.setKind(tok::r_brace);
4190   }
4191 }
4192 
4193 void UnwrappedLineParser::distributeComments(
4194     const SmallVectorImpl<FormatToken *> &Comments,
4195     const FormatToken *NextTok) {
4196   // Whether or not a line comment token continues a line is controlled by
4197   // the method continuesLineCommentSection, with the following caveat:
4198   //
4199   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4200   // that each comment line from the trail is aligned with the next token, if
4201   // the next token exists. If a trail exists, the beginning of the maximal
4202   // trail is marked as a start of a new comment section.
4203   //
4204   // For example in this code:
4205   //
4206   // int a; // line about a
4207   //   // line 1 about b
4208   //   // line 2 about b
4209   //   int b;
4210   //
4211   // the two lines about b form a maximal trail, so there are two sections, the
4212   // first one consisting of the single comment "// line about a" and the
4213   // second one consisting of the next two comments.
4214   if (Comments.empty())
4215     return;
4216   bool ShouldPushCommentsInCurrentLine = true;
4217   bool HasTrailAlignedWithNextToken = false;
4218   unsigned StartOfTrailAlignedWithNextToken = 0;
4219   if (NextTok) {
4220     // We are skipping the first element intentionally.
4221     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4222       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4223         HasTrailAlignedWithNextToken = true;
4224         StartOfTrailAlignedWithNextToken = i;
4225       }
4226     }
4227   }
4228   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4229     FormatToken *FormatTok = Comments[i];
4230     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4231       FormatTok->ContinuesLineCommentSection = false;
4232     } else {
4233       FormatTok->ContinuesLineCommentSection =
4234           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4235     }
4236     if (!FormatTok->ContinuesLineCommentSection &&
4237         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4238       ShouldPushCommentsInCurrentLine = false;
4239     }
4240     if (ShouldPushCommentsInCurrentLine)
4241       pushToken(FormatTok);
4242     else
4243       CommentsBeforeNextToken.push_back(FormatTok);
4244   }
4245 }
4246 
4247 void UnwrappedLineParser::readToken(int LevelDifference) {
4248   SmallVector<FormatToken *, 1> Comments;
4249   bool PreviousWasComment = false;
4250   bool FirstNonCommentOnLine = false;
4251   do {
4252     FormatTok = Tokens->getNextToken();
4253     assert(FormatTok);
4254     while (FormatTok->getType() == TT_ConflictStart ||
4255            FormatTok->getType() == TT_ConflictEnd ||
4256            FormatTok->getType() == TT_ConflictAlternative) {
4257       if (FormatTok->getType() == TT_ConflictStart)
4258         conditionalCompilationStart(/*Unreachable=*/false);
4259       else if (FormatTok->getType() == TT_ConflictAlternative)
4260         conditionalCompilationAlternative();
4261       else if (FormatTok->getType() == TT_ConflictEnd)
4262         conditionalCompilationEnd();
4263       FormatTok = Tokens->getNextToken();
4264       FormatTok->MustBreakBefore = true;
4265     }
4266 
4267     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4268                                       const FormatToken &Tok,
4269                                       bool PreviousWasComment) {
4270       auto IsFirstOnLine = [](const FormatToken &Tok) {
4271         return Tok.HasUnescapedNewline || Tok.IsFirst;
4272       };
4273 
4274       // Consider preprocessor directives preceded by block comments as first
4275       // on line.
4276       if (PreviousWasComment)
4277         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4278       return IsFirstOnLine(Tok);
4279     };
4280 
4281     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4282         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4283     PreviousWasComment = FormatTok->is(tok::comment);
4284 
4285     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4286            FirstNonCommentOnLine) {
4287       distributeComments(Comments, FormatTok);
4288       Comments.clear();
4289       // If there is an unfinished unwrapped line, we flush the preprocessor
4290       // directives only after that unwrapped line was finished later.
4291       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4292       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4293       assert((LevelDifference >= 0 ||
4294               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4295              "LevelDifference makes Line->Level negative");
4296       Line->Level += LevelDifference;
4297       // Comments stored before the preprocessor directive need to be output
4298       // before the preprocessor directive, at the same level as the
4299       // preprocessor directive, as we consider them to apply to the directive.
4300       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4301           PPBranchLevel > 0) {
4302         Line->Level += PPBranchLevel;
4303       }
4304       flushComments(isOnNewLine(*FormatTok));
4305       parsePPDirective();
4306       PreviousWasComment = FormatTok->is(tok::comment);
4307       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4308           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4309     }
4310 
4311     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4312         !Line->InPPDirective) {
4313       continue;
4314     }
4315 
4316     if (!FormatTok->is(tok::comment)) {
4317       distributeComments(Comments, FormatTok);
4318       Comments.clear();
4319       return;
4320     }
4321 
4322     Comments.push_back(FormatTok);
4323   } while (!eof());
4324 
4325   distributeComments(Comments, nullptr);
4326   Comments.clear();
4327 }
4328 
4329 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4330   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4331   if (MustBreakBeforeNextToken) {
4332     Line->Tokens.back().Tok->MustBreakBefore = true;
4333     MustBreakBeforeNextToken = false;
4334   }
4335 }
4336 
4337 } // end namespace format
4338 } // end namespace clang
4339