1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 #include <utility>
24 
25 #define DEBUG_TYPE "format-parser"
26 
27 namespace clang {
28 namespace format {
29 
30 class FormatTokenSource {
31 public:
32   virtual ~FormatTokenSource() {}
33 
34   // Returns the next token in the token stream.
35   virtual FormatToken *getNextToken() = 0;
36 
37   // Returns the token preceding the token returned by the last call to
38   // getNextToken() in the token stream, or nullptr if no such token exists.
39   virtual FormatToken *getPreviousToken() = 0;
40 
41   // Returns the token that would be returned by the next call to
42   // getNextToken().
43   virtual FormatToken *peekNextToken() = 0;
44 
45   // Returns the token that would be returned after the next N calls to
46   // getNextToken(). N needs to be greater than zero, and small enough that
47   // there are still tokens. Check for tok::eof with N-1 before calling it with
48   // N.
49   virtual FormatToken *peekNextToken(int N) = 0;
50 
51   // Returns whether we are at the end of the file.
52   // This can be different from whether getNextToken() returned an eof token
53   // when the FormatTokenSource is a view on a part of the token stream.
54   virtual bool isEOF() = 0;
55 
56   // Gets the current position in the token stream, to be used by setPosition().
57   virtual unsigned getPosition() = 0;
58 
59   // Resets the token stream to the state it was in when getPosition() returned
60   // Position, and return the token at that position in the stream.
61   virtual FormatToken *setPosition(unsigned Position) = 0;
62 };
63 
64 namespace {
65 
66 class ScopedDeclarationState {
67 public:
68   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
69                          bool MustBeDeclaration)
70       : Line(Line), Stack(Stack) {
71     Line.MustBeDeclaration = MustBeDeclaration;
72     Stack.push_back(MustBeDeclaration);
73   }
74   ~ScopedDeclarationState() {
75     Stack.pop_back();
76     if (!Stack.empty())
77       Line.MustBeDeclaration = Stack.back();
78     else
79       Line.MustBeDeclaration = true;
80   }
81 
82 private:
83   UnwrappedLine &Line;
84   llvm::BitVector &Stack;
85 };
86 
87 static bool isLineComment(const FormatToken &FormatTok) {
88   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
89 }
90 
91 // Checks if \p FormatTok is a line comment that continues the line comment
92 // \p Previous. The original column of \p MinColumnToken is used to determine
93 // whether \p FormatTok is indented enough to the right to continue \p Previous.
94 static bool continuesLineComment(const FormatToken &FormatTok,
95                                  const FormatToken *Previous,
96                                  const FormatToken *MinColumnToken) {
97   if (!Previous || !MinColumnToken)
98     return false;
99   unsigned MinContinueColumn =
100       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
101   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
102          isLineComment(*Previous) &&
103          FormatTok.OriginalColumn >= MinContinueColumn;
104 }
105 
106 class ScopedMacroState : public FormatTokenSource {
107 public:
108   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
109                    FormatToken *&ResetToken)
110       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
111         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
112         Token(nullptr), PreviousToken(nullptr) {
113     FakeEOF.Tok.startToken();
114     FakeEOF.Tok.setKind(tok::eof);
115     TokenSource = this;
116     Line.Level = 0;
117     Line.InPPDirective = true;
118   }
119 
120   ~ScopedMacroState() override {
121     TokenSource = PreviousTokenSource;
122     ResetToken = Token;
123     Line.InPPDirective = false;
124     Line.Level = PreviousLineLevel;
125   }
126 
127   FormatToken *getNextToken() override {
128     // The \c UnwrappedLineParser guards against this by never calling
129     // \c getNextToken() after it has encountered the first eof token.
130     assert(!eof());
131     PreviousToken = Token;
132     Token = PreviousTokenSource->getNextToken();
133     if (eof())
134       return &FakeEOF;
135     return Token;
136   }
137 
138   FormatToken *getPreviousToken() override {
139     return PreviousTokenSource->getPreviousToken();
140   }
141 
142   FormatToken *peekNextToken() override {
143     if (eof())
144       return &FakeEOF;
145     return PreviousTokenSource->peekNextToken();
146   }
147 
148   FormatToken *peekNextToken(int N) override {
149     assert(N > 0);
150     if (eof())
151       return &FakeEOF;
152     return PreviousTokenSource->peekNextToken(N);
153   }
154 
155   bool isEOF() override { return PreviousTokenSource->isEOF(); }
156 
157   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
158 
159   FormatToken *setPosition(unsigned Position) override {
160     PreviousToken = nullptr;
161     Token = PreviousTokenSource->setPosition(Position);
162     return Token;
163   }
164 
165 private:
166   bool eof() {
167     return Token && Token->HasUnescapedNewline &&
168            !continuesLineComment(*Token, PreviousToken,
169                                  /*MinColumnToken=*/PreviousToken);
170   }
171 
172   FormatToken FakeEOF;
173   UnwrappedLine &Line;
174   FormatTokenSource *&TokenSource;
175   FormatToken *&ResetToken;
176   unsigned PreviousLineLevel;
177   FormatTokenSource *PreviousTokenSource;
178 
179   FormatToken *Token;
180   FormatToken *PreviousToken;
181 };
182 
183 } // end anonymous namespace
184 
185 class ScopedLineState {
186 public:
187   ScopedLineState(UnwrappedLineParser &Parser,
188                   bool SwitchToPreprocessorLines = false)
189       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
190     if (SwitchToPreprocessorLines)
191       Parser.CurrentLines = &Parser.PreprocessorDirectives;
192     else if (!Parser.Line->Tokens.empty())
193       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
194     PreBlockLine = std::move(Parser.Line);
195     Parser.Line = std::make_unique<UnwrappedLine>();
196     Parser.Line->Level = PreBlockLine->Level;
197     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
198   }
199 
200   ~ScopedLineState() {
201     if (!Parser.Line->Tokens.empty())
202       Parser.addUnwrappedLine();
203     assert(Parser.Line->Tokens.empty());
204     Parser.Line = std::move(PreBlockLine);
205     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
206       Parser.MustBreakBeforeNextToken = true;
207     Parser.CurrentLines = OriginalLines;
208   }
209 
210 private:
211   UnwrappedLineParser &Parser;
212 
213   std::unique_ptr<UnwrappedLine> PreBlockLine;
214   SmallVectorImpl<UnwrappedLine> *OriginalLines;
215 };
216 
217 class CompoundStatementIndenter {
218 public:
219   CompoundStatementIndenter(UnwrappedLineParser *Parser,
220                             const FormatStyle &Style, unsigned &LineLevel)
221       : CompoundStatementIndenter(Parser, LineLevel,
222                                   Style.BraceWrapping.AfterControlStatement,
223                                   Style.BraceWrapping.IndentBraces) {}
224   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
225                             bool WrapBrace, bool IndentBrace)
226       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
227     if (WrapBrace)
228       Parser->addUnwrappedLine();
229     if (IndentBrace)
230       ++LineLevel;
231   }
232   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
233 
234 private:
235   unsigned &LineLevel;
236   unsigned OldLineLevel;
237 };
238 
239 namespace {
240 
241 class IndexedTokenSource : public FormatTokenSource {
242 public:
243   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
244       : Tokens(Tokens), Position(-1) {}
245 
246   FormatToken *getNextToken() override {
247     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
248       LLVM_DEBUG({
249         llvm::dbgs() << "Next ";
250         dbgToken(Position);
251       });
252       return Tokens[Position];
253     }
254     ++Position;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Next ";
257       dbgToken(Position);
258     });
259     return Tokens[Position];
260   }
261 
262   FormatToken *getPreviousToken() override {
263     return Position > 0 ? Tokens[Position - 1] : nullptr;
264   }
265 
266   FormatToken *peekNextToken() override {
267     int Next = Position + 1;
268     LLVM_DEBUG({
269       llvm::dbgs() << "Peeking ";
270       dbgToken(Next);
271     });
272     return Tokens[Next];
273   }
274 
275   FormatToken *peekNextToken(int N) override {
276     assert(N > 0);
277     int Next = Position + N;
278     LLVM_DEBUG({
279       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
280       dbgToken(Next);
281     });
282     return Tokens[Next];
283   }
284 
285   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
286 
287   unsigned getPosition() override {
288     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
289     assert(Position >= 0);
290     return Position;
291   }
292 
293   FormatToken *setPosition(unsigned P) override {
294     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
295     Position = P;
296     return Tokens[Position];
297   }
298 
299   void reset() { Position = -1; }
300 
301 private:
302   void dbgToken(int Position, llvm::StringRef Indent = "") {
303     FormatToken *Tok = Tokens[Position];
304     llvm::dbgs() << Indent << "[" << Position
305                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
306                  << ", Macro: " << !!Tok->MacroCtx << "\n";
307   }
308 
309   ArrayRef<FormatToken *> Tokens;
310   int Position;
311 };
312 
313 } // end anonymous namespace
314 
315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
316                                          const AdditionalKeywords &Keywords,
317                                          unsigned FirstStartColumn,
318                                          ArrayRef<FormatToken *> Tokens,
319                                          UnwrappedLineConsumer &Callback)
320     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
321       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
322       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
323       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
324       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
325                        ? IG_Rejected
326                        : IG_Inited),
327       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
328 
329 void UnwrappedLineParser::reset() {
330   PPBranchLevel = -1;
331   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
332                      ? IG_Rejected
333                      : IG_Inited;
334   IncludeGuardToken = nullptr;
335   Line.reset(new UnwrappedLine);
336   CommentsBeforeNextToken.clear();
337   FormatTok = nullptr;
338   MustBreakBeforeNextToken = false;
339   PreprocessorDirectives.clear();
340   CurrentLines = &Lines;
341   DeclarationScopeStack.clear();
342   NestedTooDeep.clear();
343   PPStack.clear();
344   Line->FirstStartColumn = FirstStartColumn;
345 }
346 
347 void UnwrappedLineParser::parse() {
348   IndexedTokenSource TokenSource(AllTokens);
349   Line->FirstStartColumn = FirstStartColumn;
350   do {
351     LLVM_DEBUG(llvm::dbgs() << "----\n");
352     reset();
353     Tokens = &TokenSource;
354     TokenSource.reset();
355 
356     readToken();
357     parseFile();
358 
359     // If we found an include guard then all preprocessor directives (other than
360     // the guard) are over-indented by one.
361     if (IncludeGuard == IG_Found) {
362       for (auto &Line : Lines)
363         if (Line.InPPDirective && Line.Level > 0)
364           --Line.Level;
365     }
366 
367     // Create line with eof token.
368     pushToken(FormatTok);
369     addUnwrappedLine();
370 
371     for (const UnwrappedLine &Line : Lines)
372       Callback.consumeUnwrappedLine(Line);
373 
374     Callback.finishRun();
375     Lines.clear();
376     while (!PPLevelBranchIndex.empty() &&
377            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
378       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
379       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
380     }
381     if (!PPLevelBranchIndex.empty()) {
382       ++PPLevelBranchIndex.back();
383       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
384       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
385     }
386   } while (!PPLevelBranchIndex.empty());
387 }
388 
389 void UnwrappedLineParser::parseFile() {
390   // The top-level context in a file always has declarations, except for pre-
391   // processor directives and JavaScript files.
392   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
393   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
394                                           MustBeDeclaration);
395   if (Style.Language == FormatStyle::LK_TextProto)
396     parseBracedList();
397   else
398     parseLevel();
399   // Make sure to format the remaining tokens.
400   //
401   // LK_TextProto is special since its top-level is parsed as the body of a
402   // braced list, which does not necessarily have natural line separators such
403   // as a semicolon. Comments after the last entry that have been determined to
404   // not belong to that line, as in:
405   //   key: value
406   //   // endfile comment
407   // do not have a chance to be put on a line of their own until this point.
408   // Here we add this newline before end-of-file comments.
409   if (Style.Language == FormatStyle::LK_TextProto &&
410       !CommentsBeforeNextToken.empty()) {
411     addUnwrappedLine();
412   }
413   flushComments(true);
414   addUnwrappedLine();
415 }
416 
417 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
418   do {
419     switch (FormatTok->Tok.getKind()) {
420     case tok::l_brace:
421       return;
422     default:
423       if (FormatTok->is(Keywords.kw_where)) {
424         addUnwrappedLine();
425         nextToken();
426         parseCSharpGenericTypeConstraint();
427         break;
428       }
429       nextToken();
430       break;
431     }
432   } while (!eof());
433 }
434 
435 void UnwrappedLineParser::parseCSharpAttribute() {
436   int UnpairedSquareBrackets = 1;
437   do {
438     switch (FormatTok->Tok.getKind()) {
439     case tok::r_square:
440       nextToken();
441       --UnpairedSquareBrackets;
442       if (UnpairedSquareBrackets == 0) {
443         addUnwrappedLine();
444         return;
445       }
446       break;
447     case tok::l_square:
448       ++UnpairedSquareBrackets;
449       nextToken();
450       break;
451     default:
452       nextToken();
453       break;
454     }
455   } while (!eof());
456 }
457 
458 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
459   if (!Lines.empty() && Lines.back().InPPDirective)
460     return true;
461 
462   const FormatToken *Previous = Tokens->getPreviousToken();
463   return Previous && Previous->is(tok::comment) &&
464          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
465 }
466 
467 /// \brief Parses a level, that is ???.
468 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
469 /// \param CanContainBracedList If the content can contain (at any level) a
470 /// braced list.
471 /// \param NextLBracesType The type for left brace found in this level.
472 /// \param IfKind The \p if statement kind in the level.
473 /// \param IfLeftBrace The left brace of the \p if block in the level.
474 /// \returns true if a simple block of if/else/for/while, or false otherwise.
475 /// (A simple block has a single statement.)
476 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
477                                      bool CanContainBracedList,
478                                      TokenType NextLBracesType,
479                                      IfStmtKind *IfKind,
480                                      FormatToken **IfLeftBrace) {
481   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
482                                   ? TT_BracedListLBrace
483                                   : TT_Unknown;
484   const bool IsPrecededByCommentOrPPDirective =
485       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
486   FormatToken *IfLBrace = nullptr;
487   bool HasDoWhile = false;
488   bool HasLabel = false;
489   unsigned StatementCount = 0;
490   bool SwitchLabelEncountered = false;
491 
492   do {
493     if (FormatTok->getType() == TT_AttributeMacro) {
494       nextToken();
495       continue;
496     }
497     tok::TokenKind kind = FormatTok->Tok.getKind();
498     if (FormatTok->getType() == TT_MacroBlockBegin)
499       kind = tok::l_brace;
500     else if (FormatTok->getType() == TT_MacroBlockEnd)
501       kind = tok::r_brace;
502 
503     auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
504                          &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
505       parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
506                              &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
507                              HasLabel ? nullptr : &HasLabel);
508       ++StatementCount;
509       assert(StatementCount > 0 && "StatementCount overflow!");
510     };
511 
512     switch (kind) {
513     case tok::comment:
514       nextToken();
515       addUnwrappedLine();
516       break;
517     case tok::l_brace:
518       if (NextLBracesType != TT_Unknown) {
519         FormatTok->setFinalizedType(NextLBracesType);
520       } else if (FormatTok->Previous &&
521                  FormatTok->Previous->ClosesRequiresClause) {
522         // We need the 'default' case here to correctly parse a function
523         // l_brace.
524         ParseDefault();
525         continue;
526       }
527       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
528           tryToParseBracedList()) {
529         continue;
530       }
531       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
532                  /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
533                  /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
534                  NextLBracesType);
535       ++StatementCount;
536       assert(StatementCount > 0 && "StatementCount overflow!");
537       addUnwrappedLine();
538       break;
539     case tok::r_brace:
540       if (OpeningBrace) {
541         if (!Style.RemoveBracesLLVM ||
542             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
543           return false;
544         }
545         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
546             HasDoWhile || IsPrecededByCommentOrPPDirective ||
547             precededByCommentOrPPDirective()) {
548           return false;
549         }
550         const FormatToken *Next = Tokens->peekNextToken();
551         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
552           return false;
553         if (IfLeftBrace)
554           *IfLeftBrace = IfLBrace;
555         return true;
556       }
557       nextToken();
558       addUnwrappedLine();
559       break;
560     case tok::kw_default: {
561       unsigned StoredPosition = Tokens->getPosition();
562       FormatToken *Next;
563       do {
564         Next = Tokens->getNextToken();
565         assert(Next);
566       } while (Next->is(tok::comment));
567       FormatTok = Tokens->setPosition(StoredPosition);
568       if (Next->isNot(tok::colon)) {
569         // default not followed by ':' is not a case label; treat it like
570         // an identifier.
571         parseStructuralElement();
572         break;
573       }
574       // Else, if it is 'default:', fall through to the case handling.
575       LLVM_FALLTHROUGH;
576     }
577     case tok::kw_case:
578       if (Style.isJavaScript() && Line->MustBeDeclaration) {
579         // A 'case: string' style field declaration.
580         parseStructuralElement();
581         break;
582       }
583       if (!SwitchLabelEncountered &&
584           (Style.IndentCaseLabels ||
585            (Line->InPPDirective && Line->Level == 1))) {
586         ++Line->Level;
587       }
588       SwitchLabelEncountered = true;
589       parseStructuralElement();
590       break;
591     case tok::l_square:
592       if (Style.isCSharp()) {
593         nextToken();
594         parseCSharpAttribute();
595         break;
596       }
597       if (handleCppAttributes())
598         break;
599       LLVM_FALLTHROUGH;
600     default:
601       ParseDefault();
602       break;
603     }
604   } while (!eof());
605 
606   return false;
607 }
608 
609 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
610   // We'll parse forward through the tokens until we hit
611   // a closing brace or eof - note that getNextToken() will
612   // parse macros, so this will magically work inside macro
613   // definitions, too.
614   unsigned StoredPosition = Tokens->getPosition();
615   FormatToken *Tok = FormatTok;
616   const FormatToken *PrevTok = Tok->Previous;
617   // Keep a stack of positions of lbrace tokens. We will
618   // update information about whether an lbrace starts a
619   // braced init list or a different block during the loop.
620   SmallVector<FormatToken *, 8> LBraceStack;
621   assert(Tok->is(tok::l_brace));
622   do {
623     // Get next non-comment token.
624     FormatToken *NextTok;
625     do {
626       NextTok = Tokens->getNextToken();
627     } while (NextTok->is(tok::comment));
628 
629     switch (Tok->Tok.getKind()) {
630     case tok::l_brace:
631       if (Style.isJavaScript() && PrevTok) {
632         if (PrevTok->isOneOf(tok::colon, tok::less)) {
633           // A ':' indicates this code is in a type, or a braced list
634           // following a label in an object literal ({a: {b: 1}}).
635           // A '<' could be an object used in a comparison, but that is nonsense
636           // code (can never return true), so more likely it is a generic type
637           // argument (`X<{a: string; b: number}>`).
638           // The code below could be confused by semicolons between the
639           // individual members in a type member list, which would normally
640           // trigger BK_Block. In both cases, this must be parsed as an inline
641           // braced init.
642           Tok->setBlockKind(BK_BracedInit);
643         } else if (PrevTok->is(tok::r_paren)) {
644           // `) { }` can only occur in function or method declarations in JS.
645           Tok->setBlockKind(BK_Block);
646         }
647       } else {
648         Tok->setBlockKind(BK_Unknown);
649       }
650       LBraceStack.push_back(Tok);
651       break;
652     case tok::r_brace:
653       if (LBraceStack.empty())
654         break;
655       if (LBraceStack.back()->is(BK_Unknown)) {
656         bool ProbablyBracedList = false;
657         if (Style.Language == FormatStyle::LK_Proto) {
658           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
659         } else {
660           // Skip NextTok over preprocessor lines, otherwise we may not
661           // properly diagnose the block as a braced intializer
662           // if the comma separator appears after the pp directive.
663           while (NextTok->is(tok::hash)) {
664             ScopedMacroState MacroState(*Line, Tokens, NextTok);
665             do {
666               NextTok = Tokens->getNextToken();
667             } while (NextTok->isNot(tok::eof));
668           }
669 
670           // Using OriginalColumn to distinguish between ObjC methods and
671           // binary operators is a bit hacky.
672           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
673                                   NextTok->OriginalColumn == 0;
674 
675           // Try to detect a braced list. Note that regardless how we mark inner
676           // braces here, we will overwrite the BlockKind later if we parse a
677           // braced list (where all blocks inside are by default braced lists),
678           // or when we explicitly detect blocks (for example while parsing
679           // lambdas).
680 
681           // If we already marked the opening brace as braced list, the closing
682           // must also be part of it.
683           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
684 
685           ProbablyBracedList = ProbablyBracedList ||
686                                (Style.isJavaScript() &&
687                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
688                                                  Keywords.kw_as));
689           ProbablyBracedList = ProbablyBracedList ||
690                                (Style.isCpp() && NextTok->is(tok::l_paren));
691 
692           // If there is a comma, semicolon or right paren after the closing
693           // brace, we assume this is a braced initializer list.
694           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
695           // braced list in JS.
696           ProbablyBracedList =
697               ProbablyBracedList ||
698               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
699                                tok::r_paren, tok::r_square, tok::l_brace,
700                                tok::ellipsis);
701 
702           ProbablyBracedList =
703               ProbablyBracedList ||
704               (NextTok->is(tok::identifier) &&
705                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
706 
707           ProbablyBracedList = ProbablyBracedList ||
708                                (NextTok->is(tok::semi) &&
709                                 (!ExpectClassBody || LBraceStack.size() != 1));
710 
711           ProbablyBracedList =
712               ProbablyBracedList ||
713               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
714 
715           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
716             // We can have an array subscript after a braced init
717             // list, but C++11 attributes are expected after blocks.
718             NextTok = Tokens->getNextToken();
719             ProbablyBracedList = NextTok->isNot(tok::l_square);
720           }
721         }
722         if (ProbablyBracedList) {
723           Tok->setBlockKind(BK_BracedInit);
724           LBraceStack.back()->setBlockKind(BK_BracedInit);
725         } else {
726           Tok->setBlockKind(BK_Block);
727           LBraceStack.back()->setBlockKind(BK_Block);
728         }
729       }
730       LBraceStack.pop_back();
731       break;
732     case tok::identifier:
733       if (!Tok->is(TT_StatementMacro))
734         break;
735       LLVM_FALLTHROUGH;
736     case tok::at:
737     case tok::semi:
738     case tok::kw_if:
739     case tok::kw_while:
740     case tok::kw_for:
741     case tok::kw_switch:
742     case tok::kw_try:
743     case tok::kw___try:
744       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
745         LBraceStack.back()->setBlockKind(BK_Block);
746       break;
747     default:
748       break;
749     }
750     PrevTok = Tok;
751     Tok = NextTok;
752   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
753 
754   // Assume other blocks for all unclosed opening braces.
755   for (FormatToken *LBrace : LBraceStack)
756     if (LBrace->is(BK_Unknown))
757       LBrace->setBlockKind(BK_Block);
758 
759   FormatTok = Tokens->setPosition(StoredPosition);
760 }
761 
762 template <class T>
763 static inline void hash_combine(std::size_t &seed, const T &v) {
764   std::hash<T> hasher;
765   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
766 }
767 
768 size_t UnwrappedLineParser::computePPHash() const {
769   size_t h = 0;
770   for (const auto &i : PPStack) {
771     hash_combine(h, size_t(i.Kind));
772     hash_combine(h, i.Line);
773   }
774   return h;
775 }
776 
777 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
778 // is not null, subtracts its length (plus the preceding space) when computing
779 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
780 // running the token annotator on it so that we can restore them afterward.
781 bool UnwrappedLineParser::mightFitOnOneLine(
782     UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
783   const auto ColumnLimit = Style.ColumnLimit;
784   if (ColumnLimit == 0)
785     return true;
786 
787   auto &Tokens = ParsedLine.Tokens;
788   assert(!Tokens.empty());
789 
790   const auto *LastToken = Tokens.back().Tok;
791   assert(LastToken);
792 
793   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
794 
795   int Index = 0;
796   for (const auto &Token : Tokens) {
797     assert(Token.Tok);
798     auto &SavedToken = SavedTokens[Index++];
799     SavedToken.Tok = new FormatToken;
800     SavedToken.Tok->copyFrom(*Token.Tok);
801     SavedToken.Children = std::move(Token.Children);
802   }
803 
804   AnnotatedLine Line(ParsedLine);
805   assert(Line.Last == LastToken);
806 
807   TokenAnnotator Annotator(Style, Keywords);
808   Annotator.annotate(Line);
809   Annotator.calculateFormattingInformation(Line);
810 
811   auto Length = LastToken->TotalLength;
812   if (OpeningBrace) {
813     assert(OpeningBrace != Tokens.front().Tok);
814     Length -= OpeningBrace->TokenText.size() + 1;
815   }
816 
817   Index = 0;
818   for (auto &Token : Tokens) {
819     const auto &SavedToken = SavedTokens[Index++];
820     Token.Tok->copyFrom(*SavedToken.Tok);
821     Token.Children = std::move(SavedToken.Children);
822     delete SavedToken.Tok;
823   }
824 
825   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
826 }
827 
828 FormatToken *UnwrappedLineParser::parseBlock(
829     bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
830     IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
831     bool CanContainBracedList, TokenType NextLBracesType) {
832   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
833          "'{' or macro block token expected");
834   FormatToken *Tok = FormatTok;
835   const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
836   auto Index = CurrentLines->size();
837   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
838   FormatTok->setBlockKind(BK_Block);
839 
840   // For Whitesmiths mode, jump to the next level prior to skipping over the
841   // braces.
842   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
843     ++Line->Level;
844 
845   size_t PPStartHash = computePPHash();
846 
847   unsigned InitialLevel = Line->Level;
848   nextToken(/*LevelDifference=*/AddLevels);
849 
850   // Bail out if there are too many levels. Otherwise, the stack might overflow.
851   if (Line->Level > 300)
852     return nullptr;
853 
854   if (MacroBlock && FormatTok->is(tok::l_paren))
855     parseParens();
856 
857   size_t NbPreprocessorDirectives =
858       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
859   addUnwrappedLine();
860   size_t OpeningLineIndex =
861       CurrentLines->empty()
862           ? (UnwrappedLine::kInvalidIndex)
863           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
864 
865   // Whitesmiths is weird here. The brace needs to be indented for the namespace
866   // block, but the block itself may not be indented depending on the style
867   // settings. This allows the format to back up one level in those cases.
868   if (UnindentWhitesmithsBraces)
869     --Line->Level;
870 
871   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
872                                           MustBeDeclaration);
873   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
874     Line->Level += AddLevels;
875 
876   FormatToken *IfLBrace = nullptr;
877   const bool SimpleBlock =
878       parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
879 
880   if (eof())
881     return IfLBrace;
882 
883   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
884                  : !FormatTok->is(tok::r_brace)) {
885     Line->Level = InitialLevel;
886     FormatTok->setBlockKind(BK_Block);
887     return IfLBrace;
888   }
889 
890   auto RemoveBraces = [=]() mutable {
891     if (!SimpleBlock)
892       return false;
893     assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
894     assert(FormatTok->is(tok::r_brace));
895     const bool WrappedOpeningBrace = !Tok->Previous;
896     if (WrappedOpeningBrace && FollowedByComment)
897       return false;
898     const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
899     if (KeepBraces && !HasRequiredIfBraces)
900       return false;
901     if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
902       const FormatToken *Previous = Tokens->getPreviousToken();
903       assert(Previous);
904       if (Previous->is(tok::r_brace) && !Previous->Optional)
905         return false;
906     }
907     assert(!CurrentLines->empty());
908     if (!mightFitOnOneLine(CurrentLines->back()))
909       return false;
910     if (Tok->is(TT_ElseLBrace))
911       return true;
912     if (WrappedOpeningBrace) {
913       assert(Index > 0);
914       --Index; // The line above the wrapped l_brace.
915       Tok = nullptr;
916     }
917     return mightFitOnOneLine((*CurrentLines)[Index], Tok);
918   };
919   if (RemoveBraces()) {
920     Tok->MatchingParen = FormatTok;
921     FormatTok->MatchingParen = Tok;
922   }
923 
924   size_t PPEndHash = computePPHash();
925 
926   // Munch the closing brace.
927   nextToken(/*LevelDifference=*/-AddLevels);
928 
929   if (MacroBlock && FormatTok->is(tok::l_paren))
930     parseParens();
931 
932   if (FormatTok->is(tok::kw_noexcept)) {
933     // A noexcept in a requires expression.
934     nextToken();
935   }
936 
937   if (FormatTok->is(tok::arrow)) {
938     // Following the } or noexcept we can find a trailing return type arrow
939     // as part of an implicit conversion constraint.
940     nextToken();
941     parseStructuralElement();
942   }
943 
944   if (MunchSemi && FormatTok->is(tok::semi))
945     nextToken();
946 
947   Line->Level = InitialLevel;
948 
949   if (PPStartHash == PPEndHash) {
950     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
951     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
952       // Update the opening line to add the forward reference as well
953       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
954           CurrentLines->size() - 1;
955     }
956   }
957 
958   return IfLBrace;
959 }
960 
961 static bool isGoogScope(const UnwrappedLine &Line) {
962   // FIXME: Closure-library specific stuff should not be hard-coded but be
963   // configurable.
964   if (Line.Tokens.size() < 4)
965     return false;
966   auto I = Line.Tokens.begin();
967   if (I->Tok->TokenText != "goog")
968     return false;
969   ++I;
970   if (I->Tok->isNot(tok::period))
971     return false;
972   ++I;
973   if (I->Tok->TokenText != "scope")
974     return false;
975   ++I;
976   return I->Tok->is(tok::l_paren);
977 }
978 
979 static bool isIIFE(const UnwrappedLine &Line,
980                    const AdditionalKeywords &Keywords) {
981   // Look for the start of an immediately invoked anonymous function.
982   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
983   // This is commonly done in JavaScript to create a new, anonymous scope.
984   // Example: (function() { ... })()
985   if (Line.Tokens.size() < 3)
986     return false;
987   auto I = Line.Tokens.begin();
988   if (I->Tok->isNot(tok::l_paren))
989     return false;
990   ++I;
991   if (I->Tok->isNot(Keywords.kw_function))
992     return false;
993   ++I;
994   return I->Tok->is(tok::l_paren);
995 }
996 
997 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
998                                    const FormatToken &InitialToken) {
999   tok::TokenKind Kind = InitialToken.Tok.getKind();
1000   if (InitialToken.is(TT_NamespaceMacro))
1001     Kind = tok::kw_namespace;
1002 
1003   switch (Kind) {
1004   case tok::kw_namespace:
1005     return Style.BraceWrapping.AfterNamespace;
1006   case tok::kw_class:
1007     return Style.BraceWrapping.AfterClass;
1008   case tok::kw_union:
1009     return Style.BraceWrapping.AfterUnion;
1010   case tok::kw_struct:
1011     return Style.BraceWrapping.AfterStruct;
1012   case tok::kw_enum:
1013     return Style.BraceWrapping.AfterEnum;
1014   default:
1015     return false;
1016   }
1017 }
1018 
1019 void UnwrappedLineParser::parseChildBlock(
1020     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
1021   assert(FormatTok->is(tok::l_brace));
1022   FormatTok->setBlockKind(BK_Block);
1023   const FormatToken *OpeningBrace = FormatTok;
1024   nextToken();
1025   {
1026     bool SkipIndent = (Style.isJavaScript() &&
1027                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
1028     ScopedLineState LineState(*this);
1029     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
1030                                             /*MustBeDeclaration=*/false);
1031     Line->Level += SkipIndent ? 0 : 1;
1032     parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
1033     flushComments(isOnNewLine(*FormatTok));
1034     Line->Level -= SkipIndent ? 0 : 1;
1035   }
1036   nextToken();
1037 }
1038 
1039 void UnwrappedLineParser::parsePPDirective() {
1040   assert(FormatTok->is(tok::hash) && "'#' expected");
1041   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1042 
1043   nextToken();
1044 
1045   if (!FormatTok->Tok.getIdentifierInfo()) {
1046     parsePPUnknown();
1047     return;
1048   }
1049 
1050   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1051   case tok::pp_define:
1052     parsePPDefine();
1053     return;
1054   case tok::pp_if:
1055     parsePPIf(/*IfDef=*/false);
1056     break;
1057   case tok::pp_ifdef:
1058   case tok::pp_ifndef:
1059     parsePPIf(/*IfDef=*/true);
1060     break;
1061   case tok::pp_else:
1062     parsePPElse();
1063     break;
1064   case tok::pp_elifdef:
1065   case tok::pp_elifndef:
1066   case tok::pp_elif:
1067     parsePPElIf();
1068     break;
1069   case tok::pp_endif:
1070     parsePPEndIf();
1071     break;
1072   default:
1073     parsePPUnknown();
1074     break;
1075   }
1076 }
1077 
1078 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1079   size_t Line = CurrentLines->size();
1080   if (CurrentLines == &PreprocessorDirectives)
1081     Line += Lines.size();
1082 
1083   if (Unreachable ||
1084       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1085     PPStack.push_back({PP_Unreachable, Line});
1086   } else {
1087     PPStack.push_back({PP_Conditional, Line});
1088   }
1089 }
1090 
1091 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1092   ++PPBranchLevel;
1093   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1094   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1095     PPLevelBranchIndex.push_back(0);
1096     PPLevelBranchCount.push_back(0);
1097   }
1098   PPChainBranchIndex.push(0);
1099   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1100   conditionalCompilationCondition(Unreachable || Skip);
1101 }
1102 
1103 void UnwrappedLineParser::conditionalCompilationAlternative() {
1104   if (!PPStack.empty())
1105     PPStack.pop_back();
1106   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1107   if (!PPChainBranchIndex.empty())
1108     ++PPChainBranchIndex.top();
1109   conditionalCompilationCondition(
1110       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1111       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1112 }
1113 
1114 void UnwrappedLineParser::conditionalCompilationEnd() {
1115   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1116   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1117     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1118       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1119   }
1120   // Guard against #endif's without #if.
1121   if (PPBranchLevel > -1)
1122     --PPBranchLevel;
1123   if (!PPChainBranchIndex.empty())
1124     PPChainBranchIndex.pop();
1125   if (!PPStack.empty())
1126     PPStack.pop_back();
1127 }
1128 
1129 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1130   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1131   nextToken();
1132   bool Unreachable = false;
1133   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1134     Unreachable = true;
1135   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1136     Unreachable = true;
1137   conditionalCompilationStart(Unreachable);
1138   FormatToken *IfCondition = FormatTok;
1139   // If there's a #ifndef on the first line, and the only lines before it are
1140   // comments, it could be an include guard.
1141   bool MaybeIncludeGuard = IfNDef;
1142   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1143     for (auto &Line : Lines) {
1144       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1145         MaybeIncludeGuard = false;
1146         IncludeGuard = IG_Rejected;
1147         break;
1148       }
1149     }
1150   }
1151   --PPBranchLevel;
1152   parsePPUnknown();
1153   ++PPBranchLevel;
1154   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1155     IncludeGuard = IG_IfNdefed;
1156     IncludeGuardToken = IfCondition;
1157   }
1158 }
1159 
1160 void UnwrappedLineParser::parsePPElse() {
1161   // If a potential include guard has an #else, it's not an include guard.
1162   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1163     IncludeGuard = IG_Rejected;
1164   conditionalCompilationAlternative();
1165   if (PPBranchLevel > -1)
1166     --PPBranchLevel;
1167   parsePPUnknown();
1168   ++PPBranchLevel;
1169 }
1170 
1171 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1172 
1173 void UnwrappedLineParser::parsePPEndIf() {
1174   conditionalCompilationEnd();
1175   parsePPUnknown();
1176   // If the #endif of a potential include guard is the last thing in the file,
1177   // then we found an include guard.
1178   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1179       Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1180     IncludeGuard = IG_Found;
1181   }
1182 }
1183 
1184 void UnwrappedLineParser::parsePPDefine() {
1185   nextToken();
1186 
1187   if (!FormatTok->Tok.getIdentifierInfo()) {
1188     IncludeGuard = IG_Rejected;
1189     IncludeGuardToken = nullptr;
1190     parsePPUnknown();
1191     return;
1192   }
1193 
1194   if (IncludeGuard == IG_IfNdefed &&
1195       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1196     IncludeGuard = IG_Defined;
1197     IncludeGuardToken = nullptr;
1198     for (auto &Line : Lines) {
1199       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1200         IncludeGuard = IG_Rejected;
1201         break;
1202       }
1203     }
1204   }
1205 
1206   // In the context of a define, even keywords should be treated as normal
1207   // identifiers. Setting the kind to identifier is not enough, because we need
1208   // to treat additional keywords like __except as well, which are already
1209   // identifiers. Setting the identifier info to null interferes with include
1210   // guard processing above, and changes preprocessing nesting.
1211   FormatTok->Tok.setKind(tok::identifier);
1212   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1213   nextToken();
1214   if (FormatTok->Tok.getKind() == tok::l_paren &&
1215       !FormatTok->hasWhitespaceBefore()) {
1216     parseParens();
1217   }
1218   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1219     Line->Level += PPBranchLevel + 1;
1220   addUnwrappedLine();
1221   ++Line->Level;
1222 
1223   // Errors during a preprocessor directive can only affect the layout of the
1224   // preprocessor directive, and thus we ignore them. An alternative approach
1225   // would be to use the same approach we use on the file level (no
1226   // re-indentation if there was a structural error) within the macro
1227   // definition.
1228   parseFile();
1229 }
1230 
1231 void UnwrappedLineParser::parsePPUnknown() {
1232   do {
1233     nextToken();
1234   } while (!eof());
1235   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1236     Line->Level += PPBranchLevel + 1;
1237   addUnwrappedLine();
1238 }
1239 
1240 // Here we exclude certain tokens that are not usually the first token in an
1241 // unwrapped line. This is used in attempt to distinguish macro calls without
1242 // trailing semicolons from other constructs split to several lines.
1243 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1244   // Semicolon can be a null-statement, l_square can be a start of a macro or
1245   // a C++11 attribute, but this doesn't seem to be common.
1246   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1247          Tok.isNot(TT_AttributeSquare) &&
1248          // Tokens that can only be used as binary operators and a part of
1249          // overloaded operator names.
1250          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1251          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1252          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1253          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1254          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1255          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1256          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1257          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1258          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1259          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1260          Tok.isNot(tok::lesslessequal) &&
1261          // Colon is used in labels, base class lists, initializer lists,
1262          // range-based for loops, ternary operator, but should never be the
1263          // first token in an unwrapped line.
1264          Tok.isNot(tok::colon) &&
1265          // 'noexcept' is a trailing annotation.
1266          Tok.isNot(tok::kw_noexcept);
1267 }
1268 
1269 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1270                           const FormatToken *FormatTok) {
1271   // FIXME: This returns true for C/C++ keywords like 'struct'.
1272   return FormatTok->is(tok::identifier) &&
1273          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1274           !FormatTok->isOneOf(
1275               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1276               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1277               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1278               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1279               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1280               Keywords.kw_instanceof, Keywords.kw_interface,
1281               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1282 }
1283 
1284 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1285                                  const FormatToken *FormatTok) {
1286   return FormatTok->Tok.isLiteral() ||
1287          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1288          mustBeJSIdent(Keywords, FormatTok);
1289 }
1290 
1291 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1292 // when encountered after a value (see mustBeJSIdentOrValue).
1293 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1294                            const FormatToken *FormatTok) {
1295   return FormatTok->isOneOf(
1296       tok::kw_return, Keywords.kw_yield,
1297       // conditionals
1298       tok::kw_if, tok::kw_else,
1299       // loops
1300       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1301       // switch/case
1302       tok::kw_switch, tok::kw_case,
1303       // exceptions
1304       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1305       // declaration
1306       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1307       Keywords.kw_async, Keywords.kw_function,
1308       // import/export
1309       Keywords.kw_import, tok::kw_export);
1310 }
1311 
1312 // Checks whether a token is a type in K&R C (aka C78).
1313 static bool isC78Type(const FormatToken &Tok) {
1314   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1315                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1316                      tok::identifier);
1317 }
1318 
1319 // This function checks whether a token starts the first parameter declaration
1320 // in a K&R C (aka C78) function definition, e.g.:
1321 //   int f(a, b)
1322 //   short a, b;
1323 //   {
1324 //      return a + b;
1325 //   }
1326 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1327                                const FormatToken *FuncName) {
1328   assert(Tok);
1329   assert(Next);
1330   assert(FuncName);
1331 
1332   if (FuncName->isNot(tok::identifier))
1333     return false;
1334 
1335   const FormatToken *Prev = FuncName->Previous;
1336   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1337     return false;
1338 
1339   if (!isC78Type(*Tok) &&
1340       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1341     return false;
1342   }
1343 
1344   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1345     return false;
1346 
1347   Tok = Tok->Previous;
1348   if (!Tok || Tok->isNot(tok::r_paren))
1349     return false;
1350 
1351   Tok = Tok->Previous;
1352   if (!Tok || Tok->isNot(tok::identifier))
1353     return false;
1354 
1355   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1356 }
1357 
1358 void UnwrappedLineParser::parseModuleImport() {
1359   nextToken();
1360   while (!eof()) {
1361     if (FormatTok->is(tok::colon)) {
1362       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1363     }
1364     // Handle import <foo/bar.h> as we would an include statement.
1365     else if (FormatTok->is(tok::less)) {
1366       nextToken();
1367       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1368         // Mark tokens up to the trailing line comments as implicit string
1369         // literals.
1370         if (FormatTok->isNot(tok::comment) &&
1371             !FormatTok->TokenText.startswith("//")) {
1372           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1373         }
1374         nextToken();
1375       }
1376     }
1377     if (FormatTok->is(tok::semi)) {
1378       nextToken();
1379       break;
1380     }
1381     nextToken();
1382   }
1383 
1384   addUnwrappedLine();
1385 }
1386 
1387 // readTokenWithJavaScriptASI reads the next token and terminates the current
1388 // line if JavaScript Automatic Semicolon Insertion must
1389 // happen between the current token and the next token.
1390 //
1391 // This method is conservative - it cannot cover all edge cases of JavaScript,
1392 // but only aims to correctly handle certain well known cases. It *must not*
1393 // return true in speculative cases.
1394 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1395   FormatToken *Previous = FormatTok;
1396   readToken();
1397   FormatToken *Next = FormatTok;
1398 
1399   bool IsOnSameLine =
1400       CommentsBeforeNextToken.empty()
1401           ? Next->NewlinesBefore == 0
1402           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1403   if (IsOnSameLine)
1404     return;
1405 
1406   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1407   bool PreviousStartsTemplateExpr =
1408       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1409   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1410     // If the line contains an '@' sign, the previous token might be an
1411     // annotation, which can precede another identifier/value.
1412     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1413       return LineNode.Tok->is(tok::at);
1414     });
1415     if (HasAt)
1416       return;
1417   }
1418   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1419     return addUnwrappedLine();
1420   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1421   bool NextEndsTemplateExpr =
1422       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1423   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1424       (PreviousMustBeValue ||
1425        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1426                          tok::minusminus))) {
1427     return addUnwrappedLine();
1428   }
1429   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1430       isJSDeclOrStmt(Keywords, Next)) {
1431     return addUnwrappedLine();
1432   }
1433 }
1434 
1435 void UnwrappedLineParser::parseStructuralElement(
1436     bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
1437     FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1438   if (Style.Language == FormatStyle::LK_TableGen &&
1439       FormatTok->is(tok::pp_include)) {
1440     nextToken();
1441     if (FormatTok->is(tok::string_literal))
1442       nextToken();
1443     addUnwrappedLine();
1444     return;
1445   }
1446   switch (FormatTok->Tok.getKind()) {
1447   case tok::kw_asm:
1448     nextToken();
1449     if (FormatTok->is(tok::l_brace)) {
1450       FormatTok->setFinalizedType(TT_InlineASMBrace);
1451       nextToken();
1452       while (FormatTok && FormatTok->isNot(tok::eof)) {
1453         if (FormatTok->is(tok::r_brace)) {
1454           FormatTok->setFinalizedType(TT_InlineASMBrace);
1455           nextToken();
1456           addUnwrappedLine();
1457           break;
1458         }
1459         FormatTok->Finalized = true;
1460         nextToken();
1461       }
1462     }
1463     break;
1464   case tok::kw_namespace:
1465     parseNamespace();
1466     return;
1467   case tok::kw_public:
1468   case tok::kw_protected:
1469   case tok::kw_private:
1470     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1471         Style.isCSharp()) {
1472       nextToken();
1473     } else {
1474       parseAccessSpecifier();
1475     }
1476     return;
1477   case tok::kw_if: {
1478     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1479       // field/method declaration.
1480       break;
1481     }
1482     FormatToken *Tok = parseIfThenElse(IfKind);
1483     if (IfLeftBrace)
1484       *IfLeftBrace = Tok;
1485     return;
1486   }
1487   case tok::kw_for:
1488   case tok::kw_while:
1489     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1490       // field/method declaration.
1491       break;
1492     }
1493     parseForOrWhileLoop();
1494     return;
1495   case tok::kw_do:
1496     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1497       // field/method declaration.
1498       break;
1499     }
1500     parseDoWhile();
1501     if (HasDoWhile)
1502       *HasDoWhile = true;
1503     return;
1504   case tok::kw_switch:
1505     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1506       // 'switch: string' field declaration.
1507       break;
1508     }
1509     parseSwitch();
1510     return;
1511   case tok::kw_default:
1512     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1513       // 'default: string' field declaration.
1514       break;
1515     }
1516     nextToken();
1517     if (FormatTok->is(tok::colon)) {
1518       parseLabel();
1519       return;
1520     }
1521     // e.g. "default void f() {}" in a Java interface.
1522     break;
1523   case tok::kw_case:
1524     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1525       // 'case: string' field declaration.
1526       nextToken();
1527       break;
1528     }
1529     parseCaseLabel();
1530     return;
1531   case tok::kw_try:
1532   case tok::kw___try:
1533     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1534       // field/method declaration.
1535       break;
1536     }
1537     parseTryCatch();
1538     return;
1539   case tok::kw_extern:
1540     nextToken();
1541     if (FormatTok->is(tok::string_literal)) {
1542       nextToken();
1543       if (FormatTok->is(tok::l_brace)) {
1544         if (Style.BraceWrapping.AfterExternBlock)
1545           addUnwrappedLine();
1546         // Either we indent or for backwards compatibility we follow the
1547         // AfterExternBlock style.
1548         unsigned AddLevels =
1549             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1550                     (Style.BraceWrapping.AfterExternBlock &&
1551                      Style.IndentExternBlock ==
1552                          FormatStyle::IEBS_AfterExternBlock)
1553                 ? 1u
1554                 : 0u;
1555         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1556         addUnwrappedLine();
1557         return;
1558       }
1559     }
1560     break;
1561   case tok::kw_export:
1562     if (Style.isJavaScript()) {
1563       parseJavaScriptEs6ImportExport();
1564       return;
1565     }
1566     if (!Style.isCpp())
1567       break;
1568     // Handle C++ "(inline|export) namespace".
1569     LLVM_FALLTHROUGH;
1570   case tok::kw_inline:
1571     nextToken();
1572     if (FormatTok->is(tok::kw_namespace)) {
1573       parseNamespace();
1574       return;
1575     }
1576     break;
1577   case tok::identifier:
1578     if (FormatTok->is(TT_ForEachMacro)) {
1579       parseForOrWhileLoop();
1580       return;
1581     }
1582     if (FormatTok->is(TT_MacroBlockBegin)) {
1583       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1584                  /*MunchSemi=*/false);
1585       return;
1586     }
1587     if (FormatTok->is(Keywords.kw_import)) {
1588       if (Style.isJavaScript()) {
1589         parseJavaScriptEs6ImportExport();
1590         return;
1591       }
1592       if (Style.Language == FormatStyle::LK_Proto) {
1593         nextToken();
1594         if (FormatTok->is(tok::kw_public))
1595           nextToken();
1596         if (!FormatTok->is(tok::string_literal))
1597           return;
1598         nextToken();
1599         if (FormatTok->is(tok::semi))
1600           nextToken();
1601         addUnwrappedLine();
1602         return;
1603       }
1604       if (Style.isCpp()) {
1605         parseModuleImport();
1606         return;
1607       }
1608     }
1609     if (Style.isCpp() &&
1610         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1611                            Keywords.kw_slots, Keywords.kw_qslots)) {
1612       nextToken();
1613       if (FormatTok->is(tok::colon)) {
1614         nextToken();
1615         addUnwrappedLine();
1616         return;
1617       }
1618     }
1619     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1620       parseStatementMacro();
1621       return;
1622     }
1623     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1624       parseNamespace();
1625       return;
1626     }
1627     // In all other cases, parse the declaration.
1628     break;
1629   default:
1630     break;
1631   }
1632   do {
1633     const FormatToken *Previous = FormatTok->Previous;
1634     switch (FormatTok->Tok.getKind()) {
1635     case tok::at:
1636       nextToken();
1637       if (FormatTok->is(tok::l_brace)) {
1638         nextToken();
1639         parseBracedList();
1640         break;
1641       } else if (Style.Language == FormatStyle::LK_Java &&
1642                  FormatTok->is(Keywords.kw_interface)) {
1643         nextToken();
1644         break;
1645       }
1646       switch (FormatTok->Tok.getObjCKeywordID()) {
1647       case tok::objc_public:
1648       case tok::objc_protected:
1649       case tok::objc_package:
1650       case tok::objc_private:
1651         return parseAccessSpecifier();
1652       case tok::objc_interface:
1653       case tok::objc_implementation:
1654         return parseObjCInterfaceOrImplementation();
1655       case tok::objc_protocol:
1656         if (parseObjCProtocol())
1657           return;
1658         break;
1659       case tok::objc_end:
1660         return; // Handled by the caller.
1661       case tok::objc_optional:
1662       case tok::objc_required:
1663         nextToken();
1664         addUnwrappedLine();
1665         return;
1666       case tok::objc_autoreleasepool:
1667         nextToken();
1668         if (FormatTok->is(tok::l_brace)) {
1669           if (Style.BraceWrapping.AfterControlStatement ==
1670               FormatStyle::BWACS_Always) {
1671             addUnwrappedLine();
1672           }
1673           parseBlock();
1674         }
1675         addUnwrappedLine();
1676         return;
1677       case tok::objc_synchronized:
1678         nextToken();
1679         if (FormatTok->is(tok::l_paren)) {
1680           // Skip synchronization object
1681           parseParens();
1682         }
1683         if (FormatTok->is(tok::l_brace)) {
1684           if (Style.BraceWrapping.AfterControlStatement ==
1685               FormatStyle::BWACS_Always) {
1686             addUnwrappedLine();
1687           }
1688           parseBlock();
1689         }
1690         addUnwrappedLine();
1691         return;
1692       case tok::objc_try:
1693         // This branch isn't strictly necessary (the kw_try case below would
1694         // do this too after the tok::at is parsed above).  But be explicit.
1695         parseTryCatch();
1696         return;
1697       default:
1698         break;
1699       }
1700       break;
1701     case tok::kw_concept:
1702       parseConcept();
1703       return;
1704     case tok::kw_requires: {
1705       if (Style.isCpp()) {
1706         bool ParsedClause = parseRequires();
1707         if (ParsedClause)
1708           return;
1709       } else {
1710         nextToken();
1711       }
1712       break;
1713     }
1714     case tok::kw_enum:
1715       // Ignore if this is part of "template <enum ...".
1716       if (Previous && Previous->is(tok::less)) {
1717         nextToken();
1718         break;
1719       }
1720 
1721       // parseEnum falls through and does not yet add an unwrapped line as an
1722       // enum definition can start a structural element.
1723       if (!parseEnum())
1724         break;
1725       // This only applies for C++.
1726       if (!Style.isCpp()) {
1727         addUnwrappedLine();
1728         return;
1729       }
1730       break;
1731     case tok::kw_typedef:
1732       nextToken();
1733       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1734                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1735                              Keywords.kw_CF_CLOSED_ENUM,
1736                              Keywords.kw_NS_CLOSED_ENUM)) {
1737         parseEnum();
1738       }
1739       break;
1740     case tok::kw_struct:
1741     case tok::kw_union:
1742     case tok::kw_class:
1743       if (parseStructLike())
1744         return;
1745       break;
1746     case tok::period:
1747       nextToken();
1748       // In Java, classes have an implicit static member "class".
1749       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1750           FormatTok->is(tok::kw_class)) {
1751         nextToken();
1752       }
1753       if (Style.isJavaScript() && FormatTok &&
1754           FormatTok->Tok.getIdentifierInfo()) {
1755         // JavaScript only has pseudo keywords, all keywords are allowed to
1756         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1757         nextToken();
1758       }
1759       break;
1760     case tok::semi:
1761       nextToken();
1762       addUnwrappedLine();
1763       return;
1764     case tok::r_brace:
1765       addUnwrappedLine();
1766       return;
1767     case tok::l_paren: {
1768       parseParens();
1769       // Break the unwrapped line if a K&R C function definition has a parameter
1770       // declaration.
1771       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1772         break;
1773       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1774         addUnwrappedLine();
1775         return;
1776       }
1777       break;
1778     }
1779     case tok::kw_operator:
1780       nextToken();
1781       if (FormatTok->isBinaryOperator())
1782         nextToken();
1783       break;
1784     case tok::caret:
1785       nextToken();
1786       if (FormatTok->Tok.isAnyIdentifier() ||
1787           FormatTok->isSimpleTypeSpecifier()) {
1788         nextToken();
1789       }
1790       if (FormatTok->is(tok::l_paren))
1791         parseParens();
1792       if (FormatTok->is(tok::l_brace))
1793         parseChildBlock();
1794       break;
1795     case tok::l_brace:
1796       if (NextLBracesType != TT_Unknown)
1797         FormatTok->setFinalizedType(NextLBracesType);
1798       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1799         // A block outside of parentheses must be the last part of a
1800         // structural element.
1801         // FIXME: Figure out cases where this is not true, and add projections
1802         // for them (the one we know is missing are lambdas).
1803         if (Style.Language == FormatStyle::LK_Java &&
1804             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1805           // If necessary, we could set the type to something different than
1806           // TT_FunctionLBrace.
1807           if (Style.BraceWrapping.AfterControlStatement ==
1808               FormatStyle::BWACS_Always) {
1809             addUnwrappedLine();
1810           }
1811         } else if (Style.BraceWrapping.AfterFunction) {
1812           addUnwrappedLine();
1813         }
1814         if (!Line->InPPDirective)
1815           FormatTok->setFinalizedType(TT_FunctionLBrace);
1816         parseBlock();
1817         addUnwrappedLine();
1818         return;
1819       }
1820       // Otherwise this was a braced init list, and the structural
1821       // element continues.
1822       break;
1823     case tok::kw_try:
1824       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1825         // field/method declaration.
1826         nextToken();
1827         break;
1828       }
1829       // We arrive here when parsing function-try blocks.
1830       if (Style.BraceWrapping.AfterFunction)
1831         addUnwrappedLine();
1832       parseTryCatch();
1833       return;
1834     case tok::identifier: {
1835       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1836           Line->MustBeDeclaration) {
1837         addUnwrappedLine();
1838         parseCSharpGenericTypeConstraint();
1839         break;
1840       }
1841       if (FormatTok->is(TT_MacroBlockEnd)) {
1842         addUnwrappedLine();
1843         return;
1844       }
1845 
1846       // Function declarations (as opposed to function expressions) are parsed
1847       // on their own unwrapped line by continuing this loop. Function
1848       // expressions (functions that are not on their own line) must not create
1849       // a new unwrapped line, so they are special cased below.
1850       size_t TokenCount = Line->Tokens.size();
1851       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1852           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1853                                                      Keywords.kw_async)))) {
1854         tryToParseJSFunction();
1855         break;
1856       }
1857       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1858           FormatTok->is(Keywords.kw_interface)) {
1859         if (Style.isJavaScript()) {
1860           // In JavaScript/TypeScript, "interface" can be used as a standalone
1861           // identifier, e.g. in `var interface = 1;`. If "interface" is
1862           // followed by another identifier, it is very like to be an actual
1863           // interface declaration.
1864           unsigned StoredPosition = Tokens->getPosition();
1865           FormatToken *Next = Tokens->getNextToken();
1866           FormatTok = Tokens->setPosition(StoredPosition);
1867           if (!mustBeJSIdent(Keywords, Next)) {
1868             nextToken();
1869             break;
1870           }
1871         }
1872         parseRecord();
1873         addUnwrappedLine();
1874         return;
1875       }
1876 
1877       if (FormatTok->is(Keywords.kw_interface)) {
1878         if (parseStructLike())
1879           return;
1880         break;
1881       }
1882 
1883       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1884         parseStatementMacro();
1885         return;
1886       }
1887 
1888       // See if the following token should start a new unwrapped line.
1889       StringRef Text = FormatTok->TokenText;
1890 
1891       FormatToken *PreviousToken = FormatTok;
1892       nextToken();
1893 
1894       // JS doesn't have macros, and within classes colons indicate fields, not
1895       // labels.
1896       if (Style.isJavaScript())
1897         break;
1898 
1899       TokenCount = Line->Tokens.size();
1900       if (TokenCount == 1 ||
1901           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1902         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1903           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1904           parseLabel(!Style.IndentGotoLabels);
1905           if (HasLabel)
1906             *HasLabel = true;
1907           return;
1908         }
1909         // Recognize function-like macro usages without trailing semicolon as
1910         // well as free-standing macros like Q_OBJECT.
1911         bool FunctionLike = FormatTok->is(tok::l_paren);
1912         if (FunctionLike)
1913           parseParens();
1914 
1915         bool FollowedByNewline =
1916             CommentsBeforeNextToken.empty()
1917                 ? FormatTok->NewlinesBefore > 0
1918                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1919 
1920         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1921             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1922           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1923           addUnwrappedLine();
1924           return;
1925         }
1926       }
1927       break;
1928     }
1929     case tok::equal:
1930       if ((Style.isJavaScript() || Style.isCSharp()) &&
1931           FormatTok->is(TT_FatArrow)) {
1932         tryToParseChildBlock();
1933         break;
1934       }
1935 
1936       nextToken();
1937       if (FormatTok->is(tok::l_brace)) {
1938         // Block kind should probably be set to BK_BracedInit for any language.
1939         // C# needs this change to ensure that array initialisers and object
1940         // initialisers are indented the same way.
1941         if (Style.isCSharp())
1942           FormatTok->setBlockKind(BK_BracedInit);
1943         nextToken();
1944         parseBracedList();
1945       } else if (Style.Language == FormatStyle::LK_Proto &&
1946                  FormatTok->is(tok::less)) {
1947         nextToken();
1948         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1949                         /*ClosingBraceKind=*/tok::greater);
1950       }
1951       break;
1952     case tok::l_square:
1953       parseSquare();
1954       break;
1955     case tok::kw_new:
1956       parseNew();
1957       break;
1958     case tok::kw_case:
1959       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1960         // 'case: string' field declaration.
1961         nextToken();
1962         break;
1963       }
1964       parseCaseLabel();
1965       break;
1966     default:
1967       nextToken();
1968       break;
1969     }
1970   } while (!eof());
1971 }
1972 
1973 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1974   assert(FormatTok->is(tok::l_brace));
1975   if (!Style.isCSharp())
1976     return false;
1977   // See if it's a property accessor.
1978   if (FormatTok->Previous->isNot(tok::identifier))
1979     return false;
1980 
1981   // See if we are inside a property accessor.
1982   //
1983   // Record the current tokenPosition so that we can advance and
1984   // reset the current token. `Next` is not set yet so we need
1985   // another way to advance along the token stream.
1986   unsigned int StoredPosition = Tokens->getPosition();
1987   FormatToken *Tok = Tokens->getNextToken();
1988 
1989   // A trivial property accessor is of the form:
1990   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1991   // Track these as they do not require line breaks to be introduced.
1992   bool HasSpecialAccessor = false;
1993   bool IsTrivialPropertyAccessor = true;
1994   while (!eof()) {
1995     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1996                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1997                      Keywords.kw_init, Keywords.kw_set)) {
1998       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1999         HasSpecialAccessor = true;
2000       Tok = Tokens->getNextToken();
2001       continue;
2002     }
2003     if (Tok->isNot(tok::r_brace))
2004       IsTrivialPropertyAccessor = false;
2005     break;
2006   }
2007 
2008   if (!HasSpecialAccessor) {
2009     Tokens->setPosition(StoredPosition);
2010     return false;
2011   }
2012 
2013   // Try to parse the property accessor:
2014   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2015   Tokens->setPosition(StoredPosition);
2016   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2017     addUnwrappedLine();
2018   nextToken();
2019   do {
2020     switch (FormatTok->Tok.getKind()) {
2021     case tok::r_brace:
2022       nextToken();
2023       if (FormatTok->is(tok::equal)) {
2024         while (!eof() && FormatTok->isNot(tok::semi))
2025           nextToken();
2026         nextToken();
2027       }
2028       addUnwrappedLine();
2029       return true;
2030     case tok::l_brace:
2031       ++Line->Level;
2032       parseBlock(/*MustBeDeclaration=*/true);
2033       addUnwrappedLine();
2034       --Line->Level;
2035       break;
2036     case tok::equal:
2037       if (FormatTok->is(TT_FatArrow)) {
2038         ++Line->Level;
2039         do {
2040           nextToken();
2041         } while (!eof() && FormatTok->isNot(tok::semi));
2042         nextToken();
2043         addUnwrappedLine();
2044         --Line->Level;
2045         break;
2046       }
2047       nextToken();
2048       break;
2049     default:
2050       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2051                              Keywords.kw_set) &&
2052           !IsTrivialPropertyAccessor) {
2053         // Non-trivial get/set needs to be on its own line.
2054         addUnwrappedLine();
2055       }
2056       nextToken();
2057     }
2058   } while (!eof());
2059 
2060   // Unreachable for well-formed code (paired '{' and '}').
2061   return true;
2062 }
2063 
2064 bool UnwrappedLineParser::tryToParseLambda() {
2065   assert(FormatTok->is(tok::l_square));
2066   if (!Style.isCpp()) {
2067     nextToken();
2068     return false;
2069   }
2070   FormatToken &LSquare = *FormatTok;
2071   if (!tryToParseLambdaIntroducer())
2072     return false;
2073 
2074   bool SeenArrow = false;
2075   bool InTemplateParameterList = false;
2076 
2077   while (FormatTok->isNot(tok::l_brace)) {
2078     if (FormatTok->isSimpleTypeSpecifier()) {
2079       nextToken();
2080       continue;
2081     }
2082     switch (FormatTok->Tok.getKind()) {
2083     case tok::l_brace:
2084       break;
2085     case tok::l_paren:
2086       parseParens();
2087       break;
2088     case tok::l_square:
2089       parseSquare();
2090       break;
2091     case tok::kw_class:
2092     case tok::kw_template:
2093     case tok::kw_typename:
2094       assert(FormatTok->Previous);
2095       if (FormatTok->Previous->is(tok::less))
2096         InTemplateParameterList = true;
2097       nextToken();
2098       break;
2099     case tok::amp:
2100     case tok::star:
2101     case tok::kw_const:
2102     case tok::comma:
2103     case tok::less:
2104     case tok::greater:
2105     case tok::identifier:
2106     case tok::numeric_constant:
2107     case tok::coloncolon:
2108     case tok::kw_mutable:
2109     case tok::kw_noexcept:
2110       nextToken();
2111       break;
2112     // Specialization of a template with an integer parameter can contain
2113     // arithmetic, logical, comparison and ternary operators.
2114     //
2115     // FIXME: This also accepts sequences of operators that are not in the scope
2116     // of a template argument list.
2117     //
2118     // In a C++ lambda a template type can only occur after an arrow. We use
2119     // this as an heuristic to distinguish between Objective-C expressions
2120     // followed by an `a->b` expression, such as:
2121     // ([obj func:arg] + a->b)
2122     // Otherwise the code below would parse as a lambda.
2123     //
2124     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2125     // explicit template lists: []<bool b = true && false>(U &&u){}
2126     case tok::plus:
2127     case tok::minus:
2128     case tok::exclaim:
2129     case tok::tilde:
2130     case tok::slash:
2131     case tok::percent:
2132     case tok::lessless:
2133     case tok::pipe:
2134     case tok::pipepipe:
2135     case tok::ampamp:
2136     case tok::caret:
2137     case tok::equalequal:
2138     case tok::exclaimequal:
2139     case tok::greaterequal:
2140     case tok::lessequal:
2141     case tok::question:
2142     case tok::colon:
2143     case tok::ellipsis:
2144     case tok::kw_true:
2145     case tok::kw_false:
2146       if (SeenArrow || InTemplateParameterList) {
2147         nextToken();
2148         break;
2149       }
2150       return true;
2151     case tok::arrow:
2152       // This might or might not actually be a lambda arrow (this could be an
2153       // ObjC method invocation followed by a dereferencing arrow). We might
2154       // reset this back to TT_Unknown in TokenAnnotator.
2155       FormatTok->setFinalizedType(TT_LambdaArrow);
2156       SeenArrow = true;
2157       nextToken();
2158       break;
2159     default:
2160       return true;
2161     }
2162   }
2163   FormatTok->setFinalizedType(TT_LambdaLBrace);
2164   LSquare.setFinalizedType(TT_LambdaLSquare);
2165   parseChildBlock();
2166   return true;
2167 }
2168 
2169 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2170   const FormatToken *Previous = FormatTok->Previous;
2171   const FormatToken *LeftSquare = FormatTok;
2172   nextToken();
2173   if (Previous &&
2174       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2175                          tok::kw_delete, tok::l_square) ||
2176        LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2177        Previous->isSimpleTypeSpecifier())) {
2178     return false;
2179   }
2180   if (FormatTok->is(tok::l_square))
2181     return false;
2182   if (FormatTok->is(tok::r_square)) {
2183     const FormatToken *Next = Tokens->peekNextToken();
2184     if (Next->is(tok::greater))
2185       return false;
2186   }
2187   parseSquare(/*LambdaIntroducer=*/true);
2188   return true;
2189 }
2190 
2191 void UnwrappedLineParser::tryToParseJSFunction() {
2192   assert(FormatTok->is(Keywords.kw_function) ||
2193          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2194   if (FormatTok->is(Keywords.kw_async))
2195     nextToken();
2196   // Consume "function".
2197   nextToken();
2198 
2199   // Consume * (generator function). Treat it like C++'s overloaded operators.
2200   if (FormatTok->is(tok::star)) {
2201     FormatTok->setFinalizedType(TT_OverloadedOperator);
2202     nextToken();
2203   }
2204 
2205   // Consume function name.
2206   if (FormatTok->is(tok::identifier))
2207     nextToken();
2208 
2209   if (FormatTok->isNot(tok::l_paren))
2210     return;
2211 
2212   // Parse formal parameter list.
2213   parseParens();
2214 
2215   if (FormatTok->is(tok::colon)) {
2216     // Parse a type definition.
2217     nextToken();
2218 
2219     // Eat the type declaration. For braced inline object types, balance braces,
2220     // otherwise just parse until finding an l_brace for the function body.
2221     if (FormatTok->is(tok::l_brace))
2222       tryToParseBracedList();
2223     else
2224       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2225         nextToken();
2226   }
2227 
2228   if (FormatTok->is(tok::semi))
2229     return;
2230 
2231   parseChildBlock();
2232 }
2233 
2234 bool UnwrappedLineParser::tryToParseBracedList() {
2235   if (FormatTok->is(BK_Unknown))
2236     calculateBraceTypes();
2237   assert(FormatTok->isNot(BK_Unknown));
2238   if (FormatTok->is(BK_Block))
2239     return false;
2240   nextToken();
2241   parseBracedList();
2242   return true;
2243 }
2244 
2245 bool UnwrappedLineParser::tryToParseChildBlock() {
2246   assert(Style.isJavaScript() || Style.isCSharp());
2247   assert(FormatTok->is(TT_FatArrow));
2248   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2249   // They always start an expression or a child block if followed by a curly
2250   // brace.
2251   nextToken();
2252   if (FormatTok->isNot(tok::l_brace))
2253     return false;
2254   parseChildBlock();
2255   return true;
2256 }
2257 
2258 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2259                                           bool IsEnum,
2260                                           tok::TokenKind ClosingBraceKind) {
2261   bool HasError = false;
2262 
2263   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2264   // replace this by using parseAssignmentExpression() inside.
2265   do {
2266     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2267         tryToParseChildBlock()) {
2268       continue;
2269     }
2270     if (Style.isJavaScript()) {
2271       if (FormatTok->is(Keywords.kw_function) ||
2272           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2273         tryToParseJSFunction();
2274         continue;
2275       }
2276       if (FormatTok->is(tok::l_brace)) {
2277         // Could be a method inside of a braced list `{a() { return 1; }}`.
2278         if (tryToParseBracedList())
2279           continue;
2280         parseChildBlock();
2281       }
2282     }
2283     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2284       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2285         addUnwrappedLine();
2286       nextToken();
2287       return !HasError;
2288     }
2289     switch (FormatTok->Tok.getKind()) {
2290     case tok::l_square:
2291       if (Style.isCSharp())
2292         parseSquare();
2293       else
2294         tryToParseLambda();
2295       break;
2296     case tok::l_paren:
2297       parseParens();
2298       // JavaScript can just have free standing methods and getters/setters in
2299       // object literals. Detect them by a "{" following ")".
2300       if (Style.isJavaScript()) {
2301         if (FormatTok->is(tok::l_brace))
2302           parseChildBlock();
2303         break;
2304       }
2305       break;
2306     case tok::l_brace:
2307       // Assume there are no blocks inside a braced init list apart
2308       // from the ones we explicitly parse out (like lambdas).
2309       FormatTok->setBlockKind(BK_BracedInit);
2310       nextToken();
2311       parseBracedList();
2312       break;
2313     case tok::less:
2314       if (Style.Language == FormatStyle::LK_Proto ||
2315           ClosingBraceKind == tok::greater) {
2316         nextToken();
2317         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2318                         /*ClosingBraceKind=*/tok::greater);
2319       } else {
2320         nextToken();
2321       }
2322       break;
2323     case tok::semi:
2324       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2325       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2326       // used for error recovery if we have otherwise determined that this is
2327       // a braced list.
2328       if (Style.isJavaScript()) {
2329         nextToken();
2330         break;
2331       }
2332       HasError = true;
2333       if (!ContinueOnSemicolons)
2334         return !HasError;
2335       nextToken();
2336       break;
2337     case tok::comma:
2338       nextToken();
2339       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2340         addUnwrappedLine();
2341       break;
2342     default:
2343       nextToken();
2344       break;
2345     }
2346   } while (!eof());
2347   return false;
2348 }
2349 
2350 /// \brief Parses a pair of parentheses (and everything between them).
2351 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2352 /// double ampersands. This only counts for the current parens scope.
2353 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2354   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2355   nextToken();
2356   do {
2357     switch (FormatTok->Tok.getKind()) {
2358     case tok::l_paren:
2359       parseParens();
2360       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2361         parseChildBlock();
2362       break;
2363     case tok::r_paren:
2364       nextToken();
2365       return;
2366     case tok::r_brace:
2367       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2368       return;
2369     case tok::l_square:
2370       tryToParseLambda();
2371       break;
2372     case tok::l_brace:
2373       if (!tryToParseBracedList())
2374         parseChildBlock();
2375       break;
2376     case tok::at:
2377       nextToken();
2378       if (FormatTok->is(tok::l_brace)) {
2379         nextToken();
2380         parseBracedList();
2381       }
2382       break;
2383     case tok::equal:
2384       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2385         tryToParseChildBlock();
2386       else
2387         nextToken();
2388       break;
2389     case tok::kw_class:
2390       if (Style.isJavaScript())
2391         parseRecord(/*ParseAsExpr=*/true);
2392       else
2393         nextToken();
2394       break;
2395     case tok::identifier:
2396       if (Style.isJavaScript() &&
2397           (FormatTok->is(Keywords.kw_function) ||
2398            FormatTok->startsSequence(Keywords.kw_async,
2399                                      Keywords.kw_function))) {
2400         tryToParseJSFunction();
2401       } else {
2402         nextToken();
2403       }
2404       break;
2405     case tok::kw_requires: {
2406       auto RequiresToken = FormatTok;
2407       nextToken();
2408       parseRequiresExpression(RequiresToken);
2409       break;
2410     }
2411     case tok::ampamp:
2412       if (AmpAmpTokenType != TT_Unknown)
2413         FormatTok->setFinalizedType(AmpAmpTokenType);
2414       LLVM_FALLTHROUGH;
2415     default:
2416       nextToken();
2417       break;
2418     }
2419   } while (!eof());
2420 }
2421 
2422 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2423   if (!LambdaIntroducer) {
2424     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2425     if (tryToParseLambda())
2426       return;
2427   }
2428   do {
2429     switch (FormatTok->Tok.getKind()) {
2430     case tok::l_paren:
2431       parseParens();
2432       break;
2433     case tok::r_square:
2434       nextToken();
2435       return;
2436     case tok::r_brace:
2437       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2438       return;
2439     case tok::l_square:
2440       parseSquare();
2441       break;
2442     case tok::l_brace: {
2443       if (!tryToParseBracedList())
2444         parseChildBlock();
2445       break;
2446     }
2447     case tok::at:
2448       nextToken();
2449       if (FormatTok->is(tok::l_brace)) {
2450         nextToken();
2451         parseBracedList();
2452       }
2453       break;
2454     default:
2455       nextToken();
2456       break;
2457     }
2458   } while (!eof());
2459 }
2460 
2461 void UnwrappedLineParser::keepAncestorBraces() {
2462   if (!Style.RemoveBracesLLVM)
2463     return;
2464 
2465   const int MaxNestingLevels = 2;
2466   const int Size = NestedTooDeep.size();
2467   if (Size >= MaxNestingLevels)
2468     NestedTooDeep[Size - MaxNestingLevels] = true;
2469   NestedTooDeep.push_back(false);
2470 }
2471 
2472 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2473   for (const auto &Token : llvm::reverse(Line.Tokens))
2474     if (Token.Tok->isNot(tok::comment))
2475       return Token.Tok;
2476 
2477   return nullptr;
2478 }
2479 
2480 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2481   FormatToken *Tok = nullptr;
2482 
2483   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2484       PreprocessorDirectives.empty()) {
2485     Tok = getLastNonComment(*Line);
2486     assert(Tok);
2487     if (Tok->BraceCount < 0) {
2488       assert(Tok->BraceCount == -1);
2489       Tok = nullptr;
2490     } else {
2491       Tok->BraceCount = -1;
2492     }
2493   }
2494 
2495   addUnwrappedLine();
2496   ++Line->Level;
2497   parseStructuralElement();
2498 
2499   if (Tok) {
2500     assert(!Line->InPPDirective);
2501     Tok = nullptr;
2502     for (const auto &L : llvm::reverse(*CurrentLines)) {
2503       if (!L.InPPDirective && getLastNonComment(L)) {
2504         Tok = L.Tokens.back().Tok;
2505         break;
2506       }
2507     }
2508     assert(Tok);
2509     ++Tok->BraceCount;
2510   }
2511 
2512   if (CheckEOF && FormatTok->is(tok::eof))
2513     addUnwrappedLine();
2514 
2515   --Line->Level;
2516 }
2517 
2518 static void markOptionalBraces(FormatToken *LeftBrace) {
2519   if (!LeftBrace)
2520     return;
2521 
2522   assert(LeftBrace->is(tok::l_brace));
2523 
2524   FormatToken *RightBrace = LeftBrace->MatchingParen;
2525   if (!RightBrace) {
2526     assert(!LeftBrace->Optional);
2527     return;
2528   }
2529 
2530   assert(RightBrace->is(tok::r_brace));
2531   assert(RightBrace->MatchingParen == LeftBrace);
2532   assert(LeftBrace->Optional == RightBrace->Optional);
2533 
2534   LeftBrace->Optional = true;
2535   RightBrace->Optional = true;
2536 }
2537 
2538 void UnwrappedLineParser::handleAttributes() {
2539   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2540   if (FormatTok->is(TT_AttributeMacro))
2541     nextToken();
2542   handleCppAttributes();
2543 }
2544 
2545 bool UnwrappedLineParser::handleCppAttributes() {
2546   // Handle [[likely]] / [[unlikely]] attributes.
2547   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2548     parseSquare();
2549     return true;
2550   }
2551   return false;
2552 }
2553 
2554 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2555                                                   bool KeepBraces) {
2556   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2557   nextToken();
2558   if (FormatTok->is(tok::exclaim))
2559     nextToken();
2560 
2561   bool KeepIfBraces = true;
2562   if (FormatTok->is(tok::kw_consteval)) {
2563     nextToken();
2564   } else {
2565     KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2566     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2567       nextToken();
2568     if (FormatTok->is(tok::l_paren))
2569       parseParens();
2570   }
2571   handleAttributes();
2572 
2573   bool NeedsUnwrappedLine = false;
2574   keepAncestorBraces();
2575 
2576   FormatToken *IfLeftBrace = nullptr;
2577   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2578 
2579   if (FormatTok->is(tok::l_brace)) {
2580     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2581     IfLeftBrace = FormatTok;
2582     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2583     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2584                /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2585     if (Style.BraceWrapping.BeforeElse)
2586       addUnwrappedLine();
2587     else
2588       NeedsUnwrappedLine = true;
2589   } else {
2590     parseUnbracedBody();
2591   }
2592 
2593   if (Style.RemoveBracesLLVM) {
2594     assert(!NestedTooDeep.empty());
2595     KeepIfBraces = KeepIfBraces ||
2596                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2597                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2598                    IfBlockKind == IfStmtKind::IfElseIf;
2599   }
2600 
2601   bool KeepElseBraces = KeepIfBraces;
2602   FormatToken *ElseLeftBrace = nullptr;
2603   IfStmtKind Kind = IfStmtKind::IfOnly;
2604 
2605   if (FormatTok->is(tok::kw_else)) {
2606     if (Style.RemoveBracesLLVM) {
2607       NestedTooDeep.back() = false;
2608       Kind = IfStmtKind::IfElse;
2609     }
2610     nextToken();
2611     handleAttributes();
2612     if (FormatTok->is(tok::l_brace)) {
2613       const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2614       FormatTok->setFinalizedType(TT_ElseLBrace);
2615       ElseLeftBrace = FormatTok;
2616       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2617       IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2618       FormatToken *IfLBrace =
2619           parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2620                      /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2621       if (FormatTok->is(tok::kw_else)) {
2622         KeepElseBraces = KeepElseBraces ||
2623                          ElseBlockKind == IfStmtKind::IfOnly ||
2624                          ElseBlockKind == IfStmtKind::IfElseIf;
2625       } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2626         KeepElseBraces = true;
2627         assert(ElseLeftBrace->MatchingParen);
2628         markOptionalBraces(ElseLeftBrace);
2629       }
2630       addUnwrappedLine();
2631     } else if (FormatTok->is(tok::kw_if)) {
2632       const FormatToken *Previous = Tokens->getPreviousToken();
2633       assert(Previous);
2634       const bool IsPrecededByComment = Previous->is(tok::comment);
2635       if (IsPrecededByComment) {
2636         addUnwrappedLine();
2637         ++Line->Level;
2638       }
2639       bool TooDeep = true;
2640       if (Style.RemoveBracesLLVM) {
2641         Kind = IfStmtKind::IfElseIf;
2642         TooDeep = NestedTooDeep.pop_back_val();
2643       }
2644       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2645       if (Style.RemoveBracesLLVM)
2646         NestedTooDeep.push_back(TooDeep);
2647       if (IsPrecededByComment)
2648         --Line->Level;
2649     } else {
2650       parseUnbracedBody(/*CheckEOF=*/true);
2651     }
2652   } else {
2653     KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2654     if (NeedsUnwrappedLine)
2655       addUnwrappedLine();
2656   }
2657 
2658   if (!Style.RemoveBracesLLVM)
2659     return nullptr;
2660 
2661   assert(!NestedTooDeep.empty());
2662   KeepElseBraces = KeepElseBraces ||
2663                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2664                    NestedTooDeep.back();
2665 
2666   NestedTooDeep.pop_back();
2667 
2668   if (!KeepIfBraces && !KeepElseBraces) {
2669     markOptionalBraces(IfLeftBrace);
2670     markOptionalBraces(ElseLeftBrace);
2671   } else if (IfLeftBrace) {
2672     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2673     if (IfRightBrace) {
2674       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2675       assert(!IfLeftBrace->Optional);
2676       assert(!IfRightBrace->Optional);
2677       IfLeftBrace->MatchingParen = nullptr;
2678       IfRightBrace->MatchingParen = nullptr;
2679     }
2680   }
2681 
2682   if (IfKind)
2683     *IfKind = Kind;
2684 
2685   return IfLeftBrace;
2686 }
2687 
2688 void UnwrappedLineParser::parseTryCatch() {
2689   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2690   nextToken();
2691   bool NeedsUnwrappedLine = false;
2692   if (FormatTok->is(tok::colon)) {
2693     // We are in a function try block, what comes is an initializer list.
2694     nextToken();
2695 
2696     // In case identifiers were removed by clang-tidy, what might follow is
2697     // multiple commas in sequence - before the first identifier.
2698     while (FormatTok->is(tok::comma))
2699       nextToken();
2700 
2701     while (FormatTok->is(tok::identifier)) {
2702       nextToken();
2703       if (FormatTok->is(tok::l_paren))
2704         parseParens();
2705       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2706           FormatTok->is(tok::l_brace)) {
2707         do {
2708           nextToken();
2709         } while (!FormatTok->is(tok::r_brace));
2710         nextToken();
2711       }
2712 
2713       // In case identifiers were removed by clang-tidy, what might follow is
2714       // multiple commas in sequence - after the first identifier.
2715       while (FormatTok->is(tok::comma))
2716         nextToken();
2717     }
2718   }
2719   // Parse try with resource.
2720   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2721     parseParens();
2722 
2723   keepAncestorBraces();
2724 
2725   if (FormatTok->is(tok::l_brace)) {
2726     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2727     parseBlock();
2728     if (Style.BraceWrapping.BeforeCatch)
2729       addUnwrappedLine();
2730     else
2731       NeedsUnwrappedLine = true;
2732   } else if (!FormatTok->is(tok::kw_catch)) {
2733     // The C++ standard requires a compound-statement after a try.
2734     // If there's none, we try to assume there's a structuralElement
2735     // and try to continue.
2736     addUnwrappedLine();
2737     ++Line->Level;
2738     parseStructuralElement();
2739     --Line->Level;
2740   }
2741   while (true) {
2742     if (FormatTok->is(tok::at))
2743       nextToken();
2744     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2745                              tok::kw___finally) ||
2746           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2747            FormatTok->is(Keywords.kw_finally)) ||
2748           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2749            FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2750       break;
2751     }
2752     nextToken();
2753     while (FormatTok->isNot(tok::l_brace)) {
2754       if (FormatTok->is(tok::l_paren)) {
2755         parseParens();
2756         continue;
2757       }
2758       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2759         if (Style.RemoveBracesLLVM)
2760           NestedTooDeep.pop_back();
2761         return;
2762       }
2763       nextToken();
2764     }
2765     NeedsUnwrappedLine = false;
2766     Line->MustBeDeclaration = false;
2767     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2768     parseBlock();
2769     if (Style.BraceWrapping.BeforeCatch)
2770       addUnwrappedLine();
2771     else
2772       NeedsUnwrappedLine = true;
2773   }
2774 
2775   if (Style.RemoveBracesLLVM)
2776     NestedTooDeep.pop_back();
2777 
2778   if (NeedsUnwrappedLine)
2779     addUnwrappedLine();
2780 }
2781 
2782 void UnwrappedLineParser::parseNamespace() {
2783   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2784          "'namespace' expected");
2785 
2786   const FormatToken &InitialToken = *FormatTok;
2787   nextToken();
2788   if (InitialToken.is(TT_NamespaceMacro)) {
2789     parseParens();
2790   } else {
2791     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2792                               tok::l_square, tok::period, tok::l_paren) ||
2793            (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2794       if (FormatTok->is(tok::l_square))
2795         parseSquare();
2796       else if (FormatTok->is(tok::l_paren))
2797         parseParens();
2798       else
2799         nextToken();
2800     }
2801   }
2802   if (FormatTok->is(tok::l_brace)) {
2803     if (ShouldBreakBeforeBrace(Style, InitialToken))
2804       addUnwrappedLine();
2805 
2806     unsigned AddLevels =
2807         Style.NamespaceIndentation == FormatStyle::NI_All ||
2808                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2809                  DeclarationScopeStack.size() > 1)
2810             ? 1u
2811             : 0u;
2812     bool ManageWhitesmithsBraces =
2813         AddLevels == 0u &&
2814         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2815 
2816     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2817     // the whole block.
2818     if (ManageWhitesmithsBraces)
2819       ++Line->Level;
2820 
2821     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2822                /*KeepBraces=*/true, /*IfKind=*/nullptr,
2823                ManageWhitesmithsBraces);
2824 
2825     // Munch the semicolon after a namespace. This is more common than one would
2826     // think. Putting the semicolon into its own line is very ugly.
2827     if (FormatTok->is(tok::semi))
2828       nextToken();
2829 
2830     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2831 
2832     if (ManageWhitesmithsBraces)
2833       --Line->Level;
2834   }
2835   // FIXME: Add error handling.
2836 }
2837 
2838 void UnwrappedLineParser::parseNew() {
2839   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2840   nextToken();
2841 
2842   if (Style.isCSharp()) {
2843     do {
2844       if (FormatTok->is(tok::l_brace))
2845         parseBracedList();
2846 
2847       if (FormatTok->isOneOf(tok::semi, tok::comma))
2848         return;
2849 
2850       nextToken();
2851     } while (!eof());
2852   }
2853 
2854   if (Style.Language != FormatStyle::LK_Java)
2855     return;
2856 
2857   // In Java, we can parse everything up to the parens, which aren't optional.
2858   do {
2859     // There should not be a ;, { or } before the new's open paren.
2860     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2861       return;
2862 
2863     // Consume the parens.
2864     if (FormatTok->is(tok::l_paren)) {
2865       parseParens();
2866 
2867       // If there is a class body of an anonymous class, consume that as child.
2868       if (FormatTok->is(tok::l_brace))
2869         parseChildBlock();
2870       return;
2871     }
2872     nextToken();
2873   } while (!eof());
2874 }
2875 
2876 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
2877   keepAncestorBraces();
2878 
2879   if (FormatTok->is(tok::l_brace)) {
2880     if (!KeepBraces)
2881       FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2882     FormatToken *LeftBrace = FormatTok;
2883     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2884     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2885                /*MunchSemi=*/true, KeepBraces);
2886     if (!KeepBraces) {
2887       assert(!NestedTooDeep.empty());
2888       if (!NestedTooDeep.back())
2889         markOptionalBraces(LeftBrace);
2890     }
2891     if (WrapRightBrace)
2892       addUnwrappedLine();
2893   } else {
2894     parseUnbracedBody();
2895   }
2896 
2897   if (!KeepBraces)
2898     NestedTooDeep.pop_back();
2899 }
2900 
2901 void UnwrappedLineParser::parseForOrWhileLoop() {
2902   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2903          "'for', 'while' or foreach macro expected");
2904   const bool KeepBraces = !Style.RemoveBracesLLVM ||
2905                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
2906 
2907   nextToken();
2908   // JS' for await ( ...
2909   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2910     nextToken();
2911   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2912     nextToken();
2913   if (FormatTok->is(tok::l_paren))
2914     parseParens();
2915 
2916   handleAttributes();
2917   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
2918 }
2919 
2920 void UnwrappedLineParser::parseDoWhile() {
2921   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2922   nextToken();
2923 
2924   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
2925 
2926   // FIXME: Add error handling.
2927   if (!FormatTok->is(tok::kw_while)) {
2928     addUnwrappedLine();
2929     return;
2930   }
2931 
2932   // If in Whitesmiths mode, the line with the while() needs to be indented
2933   // to the same level as the block.
2934   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2935     ++Line->Level;
2936 
2937   nextToken();
2938   parseStructuralElement();
2939 }
2940 
2941 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2942   nextToken();
2943   unsigned OldLineLevel = Line->Level;
2944   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2945     --Line->Level;
2946   if (LeftAlignLabel)
2947     Line->Level = 0;
2948 
2949   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2950       FormatTok->is(tok::l_brace)) {
2951 
2952     CompoundStatementIndenter Indenter(this, Line->Level,
2953                                        Style.BraceWrapping.AfterCaseLabel,
2954                                        Style.BraceWrapping.IndentBraces);
2955     parseBlock();
2956     if (FormatTok->is(tok::kw_break)) {
2957       if (Style.BraceWrapping.AfterControlStatement ==
2958           FormatStyle::BWACS_Always) {
2959         addUnwrappedLine();
2960         if (!Style.IndentCaseBlocks &&
2961             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2962           ++Line->Level;
2963         }
2964       }
2965       parseStructuralElement();
2966     }
2967     addUnwrappedLine();
2968   } else {
2969     if (FormatTok->is(tok::semi))
2970       nextToken();
2971     addUnwrappedLine();
2972   }
2973   Line->Level = OldLineLevel;
2974   if (FormatTok->isNot(tok::l_brace)) {
2975     parseStructuralElement();
2976     addUnwrappedLine();
2977   }
2978 }
2979 
2980 void UnwrappedLineParser::parseCaseLabel() {
2981   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2982 
2983   // FIXME: fix handling of complex expressions here.
2984   do {
2985     nextToken();
2986   } while (!eof() && !FormatTok->is(tok::colon));
2987   parseLabel();
2988 }
2989 
2990 void UnwrappedLineParser::parseSwitch() {
2991   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2992   nextToken();
2993   if (FormatTok->is(tok::l_paren))
2994     parseParens();
2995 
2996   keepAncestorBraces();
2997 
2998   if (FormatTok->is(tok::l_brace)) {
2999     CompoundStatementIndenter Indenter(this, Style, Line->Level);
3000     parseBlock();
3001     addUnwrappedLine();
3002   } else {
3003     addUnwrappedLine();
3004     ++Line->Level;
3005     parseStructuralElement();
3006     --Line->Level;
3007   }
3008 
3009   if (Style.RemoveBracesLLVM)
3010     NestedTooDeep.pop_back();
3011 }
3012 
3013 // Operators that can follow a C variable.
3014 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3015   switch (kind) {
3016   case tok::ampamp:
3017   case tok::ampequal:
3018   case tok::arrow:
3019   case tok::caret:
3020   case tok::caretequal:
3021   case tok::comma:
3022   case tok::ellipsis:
3023   case tok::equal:
3024   case tok::equalequal:
3025   case tok::exclaim:
3026   case tok::exclaimequal:
3027   case tok::greater:
3028   case tok::greaterequal:
3029   case tok::greatergreater:
3030   case tok::greatergreaterequal:
3031   case tok::l_paren:
3032   case tok::l_square:
3033   case tok::less:
3034   case tok::lessequal:
3035   case tok::lessless:
3036   case tok::lesslessequal:
3037   case tok::minus:
3038   case tok::minusequal:
3039   case tok::minusminus:
3040   case tok::percent:
3041   case tok::percentequal:
3042   case tok::period:
3043   case tok::pipe:
3044   case tok::pipeequal:
3045   case tok::pipepipe:
3046   case tok::plus:
3047   case tok::plusequal:
3048   case tok::plusplus:
3049   case tok::question:
3050   case tok::r_brace:
3051   case tok::r_paren:
3052   case tok::r_square:
3053   case tok::semi:
3054   case tok::slash:
3055   case tok::slashequal:
3056   case tok::star:
3057   case tok::starequal:
3058     return true;
3059   default:
3060     return false;
3061   }
3062 }
3063 
3064 void UnwrappedLineParser::parseAccessSpecifier() {
3065   FormatToken *AccessSpecifierCandidate = FormatTok;
3066   nextToken();
3067   // Understand Qt's slots.
3068   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3069     nextToken();
3070   // Otherwise, we don't know what it is, and we'd better keep the next token.
3071   if (FormatTok->is(tok::colon)) {
3072     nextToken();
3073     addUnwrappedLine();
3074   } else if (!FormatTok->is(tok::coloncolon) &&
3075              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3076     // Not a variable name nor namespace name.
3077     addUnwrappedLine();
3078   } else if (AccessSpecifierCandidate) {
3079     // Consider the access specifier to be a C identifier.
3080     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3081   }
3082 }
3083 
3084 /// \brief Parses a concept definition.
3085 /// \pre The current token has to be the concept keyword.
3086 ///
3087 /// Returns if either the concept has been completely parsed, or if it detects
3088 /// that the concept definition is incorrect.
3089 void UnwrappedLineParser::parseConcept() {
3090   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
3091   nextToken();
3092   if (!FormatTok->is(tok::identifier))
3093     return;
3094   nextToken();
3095   if (!FormatTok->is(tok::equal))
3096     return;
3097   nextToken();
3098   parseConstraintExpression();
3099   if (FormatTok->is(tok::semi))
3100     nextToken();
3101   addUnwrappedLine();
3102 }
3103 
3104 /// \brief Parses a requires, decides if it is a clause or an expression.
3105 /// \pre The current token has to be the requires keyword.
3106 /// \returns true if it parsed a clause.
3107 bool clang::format::UnwrappedLineParser::parseRequires() {
3108   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3109   auto RequiresToken = FormatTok;
3110 
3111   // We try to guess if it is a requires clause, or a requires expression. For
3112   // that we first consume the keyword and check the next token.
3113   nextToken();
3114 
3115   switch (FormatTok->Tok.getKind()) {
3116   case tok::l_brace:
3117     // This can only be an expression, never a clause.
3118     parseRequiresExpression(RequiresToken);
3119     return false;
3120   case tok::l_paren:
3121     // Clauses and expression can start with a paren, it's unclear what we have.
3122     break;
3123   default:
3124     // All other tokens can only be a clause.
3125     parseRequiresClause(RequiresToken);
3126     return true;
3127   }
3128 
3129   // Looking forward we would have to decide if there are function declaration
3130   // like arguments to the requires expression:
3131   // requires (T t) {
3132   // Or there is a constraint expression for the requires clause:
3133   // requires (C<T> && ...
3134 
3135   // But first let's look behind.
3136   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3137 
3138   if (!PreviousNonComment ||
3139       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3140     // If there is no token, or an expression left brace, we are a requires
3141     // clause within a requires expression.
3142     parseRequiresClause(RequiresToken);
3143     return true;
3144   }
3145 
3146   switch (PreviousNonComment->Tok.getKind()) {
3147   case tok::greater:
3148   case tok::r_paren:
3149   case tok::kw_noexcept:
3150   case tok::kw_const:
3151     // This is a requires clause.
3152     parseRequiresClause(RequiresToken);
3153     return true;
3154   case tok::amp:
3155   case tok::ampamp: {
3156     // This can be either:
3157     // if (... && requires (T t) ...)
3158     // Or
3159     // void member(...) && requires (C<T> ...
3160     // We check the one token before that for a const:
3161     // void member(...) const && requires (C<T> ...
3162     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3163     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3164       parseRequiresClause(RequiresToken);
3165       return true;
3166     }
3167     break;
3168   }
3169   default:
3170     // It's an expression.
3171     parseRequiresExpression(RequiresToken);
3172     return false;
3173   }
3174 
3175   // Now we look forward and try to check if the paren content is a parameter
3176   // list. The parameters can be cv-qualified and contain references or
3177   // pointers.
3178   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3179   // of stuff: typename, const, *, &, &&, ::, identifiers.
3180 
3181   int NextTokenOffset = 1;
3182   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3183   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3184     ++NextTokenOffset;
3185     NextToken = Tokens->peekNextToken(NextTokenOffset);
3186   };
3187 
3188   bool FoundType = false;
3189   bool LastWasColonColon = false;
3190   int OpenAngles = 0;
3191 
3192   for (; NextTokenOffset < 50; PeekNext()) {
3193     switch (NextToken->Tok.getKind()) {
3194     case tok::kw_volatile:
3195     case tok::kw_const:
3196     case tok::comma:
3197       parseRequiresExpression(RequiresToken);
3198       return false;
3199     case tok::r_paren:
3200     case tok::pipepipe:
3201       parseRequiresClause(RequiresToken);
3202       return true;
3203     case tok::eof:
3204       // Break out of the loop.
3205       NextTokenOffset = 50;
3206       break;
3207     case tok::coloncolon:
3208       LastWasColonColon = true;
3209       break;
3210     case tok::identifier:
3211       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3212         parseRequiresExpression(RequiresToken);
3213         return false;
3214       }
3215       FoundType = true;
3216       LastWasColonColon = false;
3217       break;
3218     case tok::less:
3219       ++OpenAngles;
3220       break;
3221     case tok::greater:
3222       --OpenAngles;
3223       break;
3224     default:
3225       if (NextToken->isSimpleTypeSpecifier()) {
3226         parseRequiresExpression(RequiresToken);
3227         return false;
3228       }
3229       break;
3230     }
3231   }
3232 
3233   // This seems to be a complicated expression, just assume it's a clause.
3234   parseRequiresClause(RequiresToken);
3235   return true;
3236 }
3237 
3238 /// \brief Parses a requires clause.
3239 /// \param RequiresToken The requires keyword token, which starts this clause.
3240 /// \pre We need to be on the next token after the requires keyword.
3241 /// \sa parseRequiresExpression
3242 ///
3243 /// Returns if it either has finished parsing the clause, or it detects, that
3244 /// the clause is incorrect.
3245 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3246   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3247   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3248 
3249   // If there is no previous token, we are within a requires expression,
3250   // otherwise we will always have the template or function declaration in front
3251   // of it.
3252   bool InRequiresExpression =
3253       !RequiresToken->Previous ||
3254       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3255 
3256   RequiresToken->setFinalizedType(InRequiresExpression
3257                                       ? TT_RequiresClauseInARequiresExpression
3258                                       : TT_RequiresClause);
3259 
3260   parseConstraintExpression();
3261 
3262   if (!InRequiresExpression)
3263     FormatTok->Previous->ClosesRequiresClause = true;
3264 }
3265 
3266 /// \brief Parses a requires expression.
3267 /// \param RequiresToken The requires keyword token, which starts this clause.
3268 /// \pre We need to be on the next token after the requires keyword.
3269 /// \sa parseRequiresClause
3270 ///
3271 /// Returns if it either has finished parsing the expression, or it detects,
3272 /// that the expression is incorrect.
3273 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3274   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3275   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3276 
3277   RequiresToken->setFinalizedType(TT_RequiresExpression);
3278 
3279   if (FormatTok->is(tok::l_paren)) {
3280     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3281     parseParens();
3282   }
3283 
3284   if (FormatTok->is(tok::l_brace)) {
3285     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3286     parseChildBlock(/*CanContainBracedList=*/false,
3287                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3288   }
3289 }
3290 
3291 /// \brief Parses a constraint expression.
3292 ///
3293 /// This is either the definition of a concept, or the body of a requires
3294 /// clause. It returns, when the parsing is complete, or the expression is
3295 /// incorrect.
3296 void UnwrappedLineParser::parseConstraintExpression() {
3297   // The special handling for lambdas is needed since tryToParseLambda() eats a
3298   // token and if a requires expression is the last part of a requires clause
3299   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3300   // not set on the correct token. Thus we need to be aware if we even expect a
3301   // lambda to be possible.
3302   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3303   bool LambdaNextTimeAllowed = true;
3304   do {
3305     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3306 
3307     switch (FormatTok->Tok.getKind()) {
3308     case tok::kw_requires: {
3309       auto RequiresToken = FormatTok;
3310       nextToken();
3311       parseRequiresExpression(RequiresToken);
3312       break;
3313     }
3314 
3315     case tok::l_paren:
3316       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3317       break;
3318 
3319     case tok::l_square:
3320       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3321         return;
3322       break;
3323 
3324     case tok::kw_const:
3325     case tok::semi:
3326     case tok::kw_class:
3327     case tok::kw_struct:
3328     case tok::kw_union:
3329       return;
3330 
3331     case tok::l_brace:
3332       // Potential function body.
3333       return;
3334 
3335     case tok::ampamp:
3336     case tok::pipepipe:
3337       FormatTok->setFinalizedType(TT_BinaryOperator);
3338       nextToken();
3339       LambdaNextTimeAllowed = true;
3340       break;
3341 
3342     case tok::comma:
3343     case tok::comment:
3344       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3345       nextToken();
3346       break;
3347 
3348     case tok::kw_sizeof:
3349     case tok::greater:
3350     case tok::greaterequal:
3351     case tok::greatergreater:
3352     case tok::less:
3353     case tok::lessequal:
3354     case tok::lessless:
3355     case tok::equalequal:
3356     case tok::exclaim:
3357     case tok::exclaimequal:
3358     case tok::plus:
3359     case tok::minus:
3360     case tok::star:
3361     case tok::slash:
3362     case tok::kw_decltype:
3363       LambdaNextTimeAllowed = true;
3364       // Just eat them.
3365       nextToken();
3366       break;
3367 
3368     case tok::numeric_constant:
3369     case tok::coloncolon:
3370     case tok::kw_true:
3371     case tok::kw_false:
3372       // Just eat them.
3373       nextToken();
3374       break;
3375 
3376     case tok::kw_static_cast:
3377     case tok::kw_const_cast:
3378     case tok::kw_reinterpret_cast:
3379     case tok::kw_dynamic_cast:
3380       nextToken();
3381       if (!FormatTok->is(tok::less))
3382         return;
3383 
3384       nextToken();
3385       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3386                       /*ClosingBraceKind=*/tok::greater);
3387       break;
3388 
3389     case tok::kw_bool:
3390       // bool is only allowed if it is directly followed by a paren for a cast:
3391       // concept C = bool(...);
3392       // and bool is the only type, all other types as cast must be inside a
3393       // cast to bool an thus are handled by the other cases.
3394       nextToken();
3395       if (FormatTok->isNot(tok::l_paren))
3396         return;
3397       parseParens();
3398       break;
3399 
3400     default:
3401       if (!FormatTok->Tok.getIdentifierInfo()) {
3402         // Identifiers are part of the default case, we check for more then
3403         // tok::identifier to handle builtin type traits.
3404         return;
3405       }
3406 
3407       // We need to differentiate identifiers for a template deduction guide,
3408       // variables, or function return types (the constraint expression has
3409       // ended before that), and basically all other cases. But it's easier to
3410       // check the other way around.
3411       assert(FormatTok->Previous);
3412       switch (FormatTok->Previous->Tok.getKind()) {
3413       case tok::coloncolon:  // Nested identifier.
3414       case tok::ampamp:      // Start of a function or variable for the
3415       case tok::pipepipe:    // constraint expression.
3416       case tok::kw_requires: // Initial identifier of a requires clause.
3417       case tok::equal:       // Initial identifier of a concept declaration.
3418         break;
3419       default:
3420         return;
3421       }
3422 
3423       // Read identifier with optional template declaration.
3424       nextToken();
3425       if (FormatTok->is(tok::less)) {
3426         nextToken();
3427         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3428                         /*ClosingBraceKind=*/tok::greater);
3429       }
3430       break;
3431     }
3432   } while (!eof());
3433 }
3434 
3435 bool UnwrappedLineParser::parseEnum() {
3436   const FormatToken &InitialToken = *FormatTok;
3437 
3438   // Won't be 'enum' for NS_ENUMs.
3439   if (FormatTok->is(tok::kw_enum))
3440     nextToken();
3441 
3442   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3443   // declarations. An "enum" keyword followed by a colon would be a syntax
3444   // error and thus assume it is just an identifier.
3445   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3446     return false;
3447 
3448   // In protobuf, "enum" can be used as a field name.
3449   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3450     return false;
3451 
3452   // Eat up enum class ...
3453   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3454     nextToken();
3455 
3456   while (FormatTok->Tok.getIdentifierInfo() ||
3457          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3458                             tok::greater, tok::comma, tok::question,
3459                             tok::l_square, tok::r_square)) {
3460     nextToken();
3461     // We can have macros or attributes in between 'enum' and the enum name.
3462     if (FormatTok->is(tok::l_paren))
3463       parseParens();
3464     if (FormatTok->is(TT_AttributeSquare)) {
3465       parseSquare();
3466       // Consume the closing TT_AttributeSquare.
3467       if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3468         nextToken();
3469     }
3470     if (FormatTok->is(tok::identifier)) {
3471       nextToken();
3472       // If there are two identifiers in a row, this is likely an elaborate
3473       // return type. In Java, this can be "implements", etc.
3474       if (Style.isCpp() && FormatTok->is(tok::identifier))
3475         return false;
3476     }
3477   }
3478 
3479   // Just a declaration or something is wrong.
3480   if (FormatTok->isNot(tok::l_brace))
3481     return true;
3482   FormatTok->setFinalizedType(TT_EnumLBrace);
3483   FormatTok->setBlockKind(BK_Block);
3484 
3485   if (Style.Language == FormatStyle::LK_Java) {
3486     // Java enums are different.
3487     parseJavaEnumBody();
3488     return true;
3489   }
3490   if (Style.Language == FormatStyle::LK_Proto) {
3491     parseBlock(/*MustBeDeclaration=*/true);
3492     return true;
3493   }
3494 
3495   if (!Style.AllowShortEnumsOnASingleLine &&
3496       ShouldBreakBeforeBrace(Style, InitialToken)) {
3497     addUnwrappedLine();
3498   }
3499   // Parse enum body.
3500   nextToken();
3501   if (!Style.AllowShortEnumsOnASingleLine) {
3502     addUnwrappedLine();
3503     Line->Level += 1;
3504   }
3505   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3506                                    /*IsEnum=*/true);
3507   if (!Style.AllowShortEnumsOnASingleLine)
3508     Line->Level -= 1;
3509   if (HasError) {
3510     if (FormatTok->is(tok::semi))
3511       nextToken();
3512     addUnwrappedLine();
3513   }
3514   return true;
3515 
3516   // There is no addUnwrappedLine() here so that we fall through to parsing a
3517   // structural element afterwards. Thus, in "enum A {} n, m;",
3518   // "} n, m;" will end up in one unwrapped line.
3519 }
3520 
3521 bool UnwrappedLineParser::parseStructLike() {
3522   // parseRecord falls through and does not yet add an unwrapped line as a
3523   // record declaration or definition can start a structural element.
3524   parseRecord();
3525   // This does not apply to Java, JavaScript and C#.
3526   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3527       Style.isCSharp()) {
3528     if (FormatTok->is(tok::semi))
3529       nextToken();
3530     addUnwrappedLine();
3531     return true;
3532   }
3533   return false;
3534 }
3535 
3536 namespace {
3537 // A class used to set and restore the Token position when peeking
3538 // ahead in the token source.
3539 class ScopedTokenPosition {
3540   unsigned StoredPosition;
3541   FormatTokenSource *Tokens;
3542 
3543 public:
3544   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3545     assert(Tokens && "Tokens expected to not be null");
3546     StoredPosition = Tokens->getPosition();
3547   }
3548 
3549   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3550 };
3551 } // namespace
3552 
3553 // Look to see if we have [[ by looking ahead, if
3554 // its not then rewind to the original position.
3555 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3556   ScopedTokenPosition AutoPosition(Tokens);
3557   FormatToken *Tok = Tokens->getNextToken();
3558   // We already read the first [ check for the second.
3559   if (!Tok->is(tok::l_square))
3560     return false;
3561   // Double check that the attribute is just something
3562   // fairly simple.
3563   while (Tok->isNot(tok::eof)) {
3564     if (Tok->is(tok::r_square))
3565       break;
3566     Tok = Tokens->getNextToken();
3567   }
3568   if (Tok->is(tok::eof))
3569     return false;
3570   Tok = Tokens->getNextToken();
3571   if (!Tok->is(tok::r_square))
3572     return false;
3573   Tok = Tokens->getNextToken();
3574   if (Tok->is(tok::semi))
3575     return false;
3576   return true;
3577 }
3578 
3579 void UnwrappedLineParser::parseJavaEnumBody() {
3580   assert(FormatTok->is(tok::l_brace));
3581   const FormatToken *OpeningBrace = FormatTok;
3582 
3583   // Determine whether the enum is simple, i.e. does not have a semicolon or
3584   // constants with class bodies. Simple enums can be formatted like braced
3585   // lists, contracted to a single line, etc.
3586   unsigned StoredPosition = Tokens->getPosition();
3587   bool IsSimple = true;
3588   FormatToken *Tok = Tokens->getNextToken();
3589   while (!Tok->is(tok::eof)) {
3590     if (Tok->is(tok::r_brace))
3591       break;
3592     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3593       IsSimple = false;
3594       break;
3595     }
3596     // FIXME: This will also mark enums with braces in the arguments to enum
3597     // constants as "not simple". This is probably fine in practice, though.
3598     Tok = Tokens->getNextToken();
3599   }
3600   FormatTok = Tokens->setPosition(StoredPosition);
3601 
3602   if (IsSimple) {
3603     nextToken();
3604     parseBracedList();
3605     addUnwrappedLine();
3606     return;
3607   }
3608 
3609   // Parse the body of a more complex enum.
3610   // First add a line for everything up to the "{".
3611   nextToken();
3612   addUnwrappedLine();
3613   ++Line->Level;
3614 
3615   // Parse the enum constants.
3616   while (FormatTok->isNot(tok::eof)) {
3617     if (FormatTok->is(tok::l_brace)) {
3618       // Parse the constant's class body.
3619       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3620                  /*MunchSemi=*/false);
3621     } else if (FormatTok->is(tok::l_paren)) {
3622       parseParens();
3623     } else if (FormatTok->is(tok::comma)) {
3624       nextToken();
3625       addUnwrappedLine();
3626     } else if (FormatTok->is(tok::semi)) {
3627       nextToken();
3628       addUnwrappedLine();
3629       break;
3630     } else if (FormatTok->is(tok::r_brace)) {
3631       addUnwrappedLine();
3632       break;
3633     } else {
3634       nextToken();
3635     }
3636   }
3637 
3638   // Parse the class body after the enum's ";" if any.
3639   parseLevel(OpeningBrace);
3640   nextToken();
3641   --Line->Level;
3642   addUnwrappedLine();
3643 }
3644 
3645 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3646   const FormatToken &InitialToken = *FormatTok;
3647   nextToken();
3648 
3649   // The actual identifier can be a nested name specifier, and in macros
3650   // it is often token-pasted.
3651   // An [[attribute]] can be before the identifier.
3652   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3653                             tok::kw___attribute, tok::kw___declspec,
3654                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3655          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3656           FormatTok->isOneOf(tok::period, tok::comma))) {
3657     if (Style.isJavaScript() &&
3658         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3659       // JavaScript/TypeScript supports inline object types in
3660       // extends/implements positions:
3661       //     class Foo implements {bar: number} { }
3662       nextToken();
3663       if (FormatTok->is(tok::l_brace)) {
3664         tryToParseBracedList();
3665         continue;
3666       }
3667     }
3668     bool IsNonMacroIdentifier =
3669         FormatTok->is(tok::identifier) &&
3670         FormatTok->TokenText != FormatTok->TokenText.upper();
3671     nextToken();
3672     // We can have macros or attributes in between 'class' and the class name.
3673     if (!IsNonMacroIdentifier) {
3674       if (FormatTok->is(tok::l_paren)) {
3675         parseParens();
3676       } else if (FormatTok->is(TT_AttributeSquare)) {
3677         parseSquare();
3678         // Consume the closing TT_AttributeSquare.
3679         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3680           nextToken();
3681       }
3682     }
3683   }
3684 
3685   // Note that parsing away template declarations here leads to incorrectly
3686   // accepting function declarations as record declarations.
3687   // In general, we cannot solve this problem. Consider:
3688   // class A<int> B() {}
3689   // which can be a function definition or a class definition when B() is a
3690   // macro. If we find enough real-world cases where this is a problem, we
3691   // can parse for the 'template' keyword in the beginning of the statement,
3692   // and thus rule out the record production in case there is no template
3693   // (this would still leave us with an ambiguity between template function
3694   // and class declarations).
3695   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3696     do {
3697       if (FormatTok->is(tok::l_brace)) {
3698         calculateBraceTypes(/*ExpectClassBody=*/true);
3699         if (!tryToParseBracedList())
3700           break;
3701       }
3702       if (FormatTok->is(tok::l_square)) {
3703         FormatToken *Previous = FormatTok->Previous;
3704         if (!Previous ||
3705             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3706           // Don't try parsing a lambda if we had a closing parenthesis before,
3707           // it was probably a pointer to an array: int (*)[].
3708           if (!tryToParseLambda())
3709             break;
3710         } else {
3711           parseSquare();
3712           continue;
3713         }
3714       }
3715       if (FormatTok->is(tok::semi))
3716         return;
3717       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3718         addUnwrappedLine();
3719         nextToken();
3720         parseCSharpGenericTypeConstraint();
3721         break;
3722       }
3723       nextToken();
3724     } while (!eof());
3725   }
3726 
3727   auto GetBraceType = [](const FormatToken &RecordTok) {
3728     switch (RecordTok.Tok.getKind()) {
3729     case tok::kw_class:
3730       return TT_ClassLBrace;
3731     case tok::kw_struct:
3732       return TT_StructLBrace;
3733     case tok::kw_union:
3734       return TT_UnionLBrace;
3735     default:
3736       // Useful for e.g. interface.
3737       return TT_RecordLBrace;
3738     }
3739   };
3740   if (FormatTok->is(tok::l_brace)) {
3741     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3742     if (ParseAsExpr) {
3743       parseChildBlock();
3744     } else {
3745       if (ShouldBreakBeforeBrace(Style, InitialToken))
3746         addUnwrappedLine();
3747 
3748       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3749       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3750     }
3751   }
3752   // There is no addUnwrappedLine() here so that we fall through to parsing a
3753   // structural element afterwards. Thus, in "class A {} n, m;",
3754   // "} n, m;" will end up in one unwrapped line.
3755 }
3756 
3757 void UnwrappedLineParser::parseObjCMethod() {
3758   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3759          "'(' or identifier expected.");
3760   do {
3761     if (FormatTok->is(tok::semi)) {
3762       nextToken();
3763       addUnwrappedLine();
3764       return;
3765     } else if (FormatTok->is(tok::l_brace)) {
3766       if (Style.BraceWrapping.AfterFunction)
3767         addUnwrappedLine();
3768       parseBlock();
3769       addUnwrappedLine();
3770       return;
3771     } else {
3772       nextToken();
3773     }
3774   } while (!eof());
3775 }
3776 
3777 void UnwrappedLineParser::parseObjCProtocolList() {
3778   assert(FormatTok->is(tok::less) && "'<' expected.");
3779   do {
3780     nextToken();
3781     // Early exit in case someone forgot a close angle.
3782     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3783         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3784       return;
3785     }
3786   } while (!eof() && FormatTok->isNot(tok::greater));
3787   nextToken(); // Skip '>'.
3788 }
3789 
3790 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3791   do {
3792     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3793       nextToken();
3794       addUnwrappedLine();
3795       break;
3796     }
3797     if (FormatTok->is(tok::l_brace)) {
3798       parseBlock();
3799       // In ObjC interfaces, nothing should be following the "}".
3800       addUnwrappedLine();
3801     } else if (FormatTok->is(tok::r_brace)) {
3802       // Ignore stray "}". parseStructuralElement doesn't consume them.
3803       nextToken();
3804       addUnwrappedLine();
3805     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3806       nextToken();
3807       parseObjCMethod();
3808     } else {
3809       parseStructuralElement();
3810     }
3811   } while (!eof());
3812 }
3813 
3814 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3815   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3816          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3817   nextToken();
3818   nextToken(); // interface name
3819 
3820   // @interface can be followed by a lightweight generic
3821   // specialization list, then either a base class or a category.
3822   if (FormatTok->is(tok::less))
3823     parseObjCLightweightGenerics();
3824   if (FormatTok->is(tok::colon)) {
3825     nextToken();
3826     nextToken(); // base class name
3827     // The base class can also have lightweight generics applied to it.
3828     if (FormatTok->is(tok::less))
3829       parseObjCLightweightGenerics();
3830   } else if (FormatTok->is(tok::l_paren)) {
3831     // Skip category, if present.
3832     parseParens();
3833   }
3834 
3835   if (FormatTok->is(tok::less))
3836     parseObjCProtocolList();
3837 
3838   if (FormatTok->is(tok::l_brace)) {
3839     if (Style.BraceWrapping.AfterObjCDeclaration)
3840       addUnwrappedLine();
3841     parseBlock(/*MustBeDeclaration=*/true);
3842   }
3843 
3844   // With instance variables, this puts '}' on its own line.  Without instance
3845   // variables, this ends the @interface line.
3846   addUnwrappedLine();
3847 
3848   parseObjCUntilAtEnd();
3849 }
3850 
3851 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3852   assert(FormatTok->is(tok::less));
3853   // Unlike protocol lists, generic parameterizations support
3854   // nested angles:
3855   //
3856   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3857   //     NSObject <NSCopying, NSSecureCoding>
3858   //
3859   // so we need to count how many open angles we have left.
3860   unsigned NumOpenAngles = 1;
3861   do {
3862     nextToken();
3863     // Early exit in case someone forgot a close angle.
3864     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3865         FormatTok->isObjCAtKeyword(tok::objc_end)) {
3866       break;
3867     }
3868     if (FormatTok->is(tok::less)) {
3869       ++NumOpenAngles;
3870     } else if (FormatTok->is(tok::greater)) {
3871       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3872       --NumOpenAngles;
3873     }
3874   } while (!eof() && NumOpenAngles != 0);
3875   nextToken(); // Skip '>'.
3876 }
3877 
3878 // Returns true for the declaration/definition form of @protocol,
3879 // false for the expression form.
3880 bool UnwrappedLineParser::parseObjCProtocol() {
3881   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3882   nextToken();
3883 
3884   if (FormatTok->is(tok::l_paren)) {
3885     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3886     return false;
3887   }
3888 
3889   // The definition/declaration form,
3890   // @protocol Foo
3891   // - (int)someMethod;
3892   // @end
3893 
3894   nextToken(); // protocol name
3895 
3896   if (FormatTok->is(tok::less))
3897     parseObjCProtocolList();
3898 
3899   // Check for protocol declaration.
3900   if (FormatTok->is(tok::semi)) {
3901     nextToken();
3902     addUnwrappedLine();
3903     return true;
3904   }
3905 
3906   addUnwrappedLine();
3907   parseObjCUntilAtEnd();
3908   return true;
3909 }
3910 
3911 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3912   bool IsImport = FormatTok->is(Keywords.kw_import);
3913   assert(IsImport || FormatTok->is(tok::kw_export));
3914   nextToken();
3915 
3916   // Consume the "default" in "export default class/function".
3917   if (FormatTok->is(tok::kw_default))
3918     nextToken();
3919 
3920   // Consume "async function", "function" and "default function", so that these
3921   // get parsed as free-standing JS functions, i.e. do not require a trailing
3922   // semicolon.
3923   if (FormatTok->is(Keywords.kw_async))
3924     nextToken();
3925   if (FormatTok->is(Keywords.kw_function)) {
3926     nextToken();
3927     return;
3928   }
3929 
3930   // For imports, `export *`, `export {...}`, consume the rest of the line up
3931   // to the terminating `;`. For everything else, just return and continue
3932   // parsing the structural element, i.e. the declaration or expression for
3933   // `export default`.
3934   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3935       !FormatTok->isStringLiteral()) {
3936     return;
3937   }
3938 
3939   while (!eof()) {
3940     if (FormatTok->is(tok::semi))
3941       return;
3942     if (Line->Tokens.empty()) {
3943       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3944       // import statement should terminate.
3945       return;
3946     }
3947     if (FormatTok->is(tok::l_brace)) {
3948       FormatTok->setBlockKind(BK_Block);
3949       nextToken();
3950       parseBracedList();
3951     } else {
3952       nextToken();
3953     }
3954   }
3955 }
3956 
3957 void UnwrappedLineParser::parseStatementMacro() {
3958   nextToken();
3959   if (FormatTok->is(tok::l_paren))
3960     parseParens();
3961   if (FormatTok->is(tok::semi))
3962     nextToken();
3963   addUnwrappedLine();
3964 }
3965 
3966 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3967                                                  StringRef Prefix = "") {
3968   llvm::dbgs() << Prefix << "Line(" << Line.Level
3969                << ", FSC=" << Line.FirstStartColumn << ")"
3970                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3971   for (const auto &Node : Line.Tokens) {
3972     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3973                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3974                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3975   }
3976   for (const auto &Node : Line.Tokens)
3977     for (const auto &ChildNode : Node.Children)
3978       printDebugInfo(ChildNode, "\nChild: ");
3979 
3980   llvm::dbgs() << "\n";
3981 }
3982 
3983 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3984   if (Line->Tokens.empty())
3985     return;
3986   LLVM_DEBUG({
3987     if (CurrentLines == &Lines)
3988       printDebugInfo(*Line);
3989   });
3990 
3991   // If this line closes a block when in Whitesmiths mode, remember that
3992   // information so that the level can be decreased after the line is added.
3993   // This has to happen after the addition of the line since the line itself
3994   // needs to be indented.
3995   bool ClosesWhitesmithsBlock =
3996       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3997       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3998 
3999   CurrentLines->push_back(std::move(*Line));
4000   Line->Tokens.clear();
4001   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4002   Line->FirstStartColumn = 0;
4003 
4004   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4005     --Line->Level;
4006   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
4007     CurrentLines->append(
4008         std::make_move_iterator(PreprocessorDirectives.begin()),
4009         std::make_move_iterator(PreprocessorDirectives.end()));
4010     PreprocessorDirectives.clear();
4011   }
4012   // Disconnect the current token from the last token on the previous line.
4013   FormatTok->Previous = nullptr;
4014 }
4015 
4016 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4017 
4018 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4019   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4020          FormatTok.NewlinesBefore > 0;
4021 }
4022 
4023 // Checks if \p FormatTok is a line comment that continues the line comment
4024 // section on \p Line.
4025 static bool
4026 continuesLineCommentSection(const FormatToken &FormatTok,
4027                             const UnwrappedLine &Line,
4028                             const llvm::Regex &CommentPragmasRegex) {
4029   if (Line.Tokens.empty())
4030     return false;
4031 
4032   StringRef IndentContent = FormatTok.TokenText;
4033   if (FormatTok.TokenText.startswith("//") ||
4034       FormatTok.TokenText.startswith("/*")) {
4035     IndentContent = FormatTok.TokenText.substr(2);
4036   }
4037   if (CommentPragmasRegex.match(IndentContent))
4038     return false;
4039 
4040   // If Line starts with a line comment, then FormatTok continues the comment
4041   // section if its original column is greater or equal to the original start
4042   // column of the line.
4043   //
4044   // Define the min column token of a line as follows: if a line ends in '{' or
4045   // contains a '{' followed by a line comment, then the min column token is
4046   // that '{'. Otherwise, the min column token of the line is the first token of
4047   // the line.
4048   //
4049   // If Line starts with a token other than a line comment, then FormatTok
4050   // continues the comment section if its original column is greater than the
4051   // original start column of the min column token of the line.
4052   //
4053   // For example, the second line comment continues the first in these cases:
4054   //
4055   // // first line
4056   // // second line
4057   //
4058   // and:
4059   //
4060   // // first line
4061   //  // second line
4062   //
4063   // and:
4064   //
4065   // int i; // first line
4066   //  // second line
4067   //
4068   // and:
4069   //
4070   // do { // first line
4071   //      // second line
4072   //   int i;
4073   // } while (true);
4074   //
4075   // and:
4076   //
4077   // enum {
4078   //   a, // first line
4079   //    // second line
4080   //   b
4081   // };
4082   //
4083   // The second line comment doesn't continue the first in these cases:
4084   //
4085   //   // first line
4086   //  // second line
4087   //
4088   // and:
4089   //
4090   // int i; // first line
4091   // // second line
4092   //
4093   // and:
4094   //
4095   // do { // first line
4096   //   // second line
4097   //   int i;
4098   // } while (true);
4099   //
4100   // and:
4101   //
4102   // enum {
4103   //   a, // first line
4104   //   // second line
4105   // };
4106   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4107 
4108   // Scan for '{//'. If found, use the column of '{' as a min column for line
4109   // comment section continuation.
4110   const FormatToken *PreviousToken = nullptr;
4111   for (const UnwrappedLineNode &Node : Line.Tokens) {
4112     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4113         isLineComment(*Node.Tok)) {
4114       MinColumnToken = PreviousToken;
4115       break;
4116     }
4117     PreviousToken = Node.Tok;
4118 
4119     // Grab the last newline preceding a token in this unwrapped line.
4120     if (Node.Tok->NewlinesBefore > 0)
4121       MinColumnToken = Node.Tok;
4122   }
4123   if (PreviousToken && PreviousToken->is(tok::l_brace))
4124     MinColumnToken = PreviousToken;
4125 
4126   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4127                               MinColumnToken);
4128 }
4129 
4130 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4131   bool JustComments = Line->Tokens.empty();
4132   for (FormatToken *Tok : CommentsBeforeNextToken) {
4133     // Line comments that belong to the same line comment section are put on the
4134     // same line since later we might want to reflow content between them.
4135     // Additional fine-grained breaking of line comment sections is controlled
4136     // by the class BreakableLineCommentSection in case it is desirable to keep
4137     // several line comment sections in the same unwrapped line.
4138     //
4139     // FIXME: Consider putting separate line comment sections as children to the
4140     // unwrapped line instead.
4141     Tok->ContinuesLineCommentSection =
4142         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4143     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4144       addUnwrappedLine();
4145     pushToken(Tok);
4146   }
4147   if (NewlineBeforeNext && JustComments)
4148     addUnwrappedLine();
4149   CommentsBeforeNextToken.clear();
4150 }
4151 
4152 void UnwrappedLineParser::nextToken(int LevelDifference) {
4153   if (eof())
4154     return;
4155   flushComments(isOnNewLine(*FormatTok));
4156   pushToken(FormatTok);
4157   FormatToken *Previous = FormatTok;
4158   if (!Style.isJavaScript())
4159     readToken(LevelDifference);
4160   else
4161     readTokenWithJavaScriptASI();
4162   FormatTok->Previous = Previous;
4163 }
4164 
4165 void UnwrappedLineParser::distributeComments(
4166     const SmallVectorImpl<FormatToken *> &Comments,
4167     const FormatToken *NextTok) {
4168   // Whether or not a line comment token continues a line is controlled by
4169   // the method continuesLineCommentSection, with the following caveat:
4170   //
4171   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4172   // that each comment line from the trail is aligned with the next token, if
4173   // the next token exists. If a trail exists, the beginning of the maximal
4174   // trail is marked as a start of a new comment section.
4175   //
4176   // For example in this code:
4177   //
4178   // int a; // line about a
4179   //   // line 1 about b
4180   //   // line 2 about b
4181   //   int b;
4182   //
4183   // the two lines about b form a maximal trail, so there are two sections, the
4184   // first one consisting of the single comment "// line about a" and the
4185   // second one consisting of the next two comments.
4186   if (Comments.empty())
4187     return;
4188   bool ShouldPushCommentsInCurrentLine = true;
4189   bool HasTrailAlignedWithNextToken = false;
4190   unsigned StartOfTrailAlignedWithNextToken = 0;
4191   if (NextTok) {
4192     // We are skipping the first element intentionally.
4193     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4194       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4195         HasTrailAlignedWithNextToken = true;
4196         StartOfTrailAlignedWithNextToken = i;
4197       }
4198     }
4199   }
4200   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4201     FormatToken *FormatTok = Comments[i];
4202     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4203       FormatTok->ContinuesLineCommentSection = false;
4204     } else {
4205       FormatTok->ContinuesLineCommentSection =
4206           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4207     }
4208     if (!FormatTok->ContinuesLineCommentSection &&
4209         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4210       ShouldPushCommentsInCurrentLine = false;
4211     }
4212     if (ShouldPushCommentsInCurrentLine)
4213       pushToken(FormatTok);
4214     else
4215       CommentsBeforeNextToken.push_back(FormatTok);
4216   }
4217 }
4218 
4219 void UnwrappedLineParser::readToken(int LevelDifference) {
4220   SmallVector<FormatToken *, 1> Comments;
4221   bool PreviousWasComment = false;
4222   bool FirstNonCommentOnLine = false;
4223   do {
4224     FormatTok = Tokens->getNextToken();
4225     assert(FormatTok);
4226     while (FormatTok->getType() == TT_ConflictStart ||
4227            FormatTok->getType() == TT_ConflictEnd ||
4228            FormatTok->getType() == TT_ConflictAlternative) {
4229       if (FormatTok->getType() == TT_ConflictStart)
4230         conditionalCompilationStart(/*Unreachable=*/false);
4231       else if (FormatTok->getType() == TT_ConflictAlternative)
4232         conditionalCompilationAlternative();
4233       else if (FormatTok->getType() == TT_ConflictEnd)
4234         conditionalCompilationEnd();
4235       FormatTok = Tokens->getNextToken();
4236       FormatTok->MustBreakBefore = true;
4237     }
4238 
4239     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4240                                       const FormatToken &Tok,
4241                                       bool PreviousWasComment) {
4242       auto IsFirstOnLine = [](const FormatToken &Tok) {
4243         return Tok.HasUnescapedNewline || Tok.IsFirst;
4244       };
4245 
4246       // Consider preprocessor directives preceded by block comments as first
4247       // on line.
4248       if (PreviousWasComment)
4249         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4250       return IsFirstOnLine(Tok);
4251     };
4252 
4253     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4254         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4255     PreviousWasComment = FormatTok->is(tok::comment);
4256 
4257     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4258            FirstNonCommentOnLine) {
4259       distributeComments(Comments, FormatTok);
4260       Comments.clear();
4261       // If there is an unfinished unwrapped line, we flush the preprocessor
4262       // directives only after that unwrapped line was finished later.
4263       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4264       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4265       assert((LevelDifference >= 0 ||
4266               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4267              "LevelDifference makes Line->Level negative");
4268       Line->Level += LevelDifference;
4269       // Comments stored before the preprocessor directive need to be output
4270       // before the preprocessor directive, at the same level as the
4271       // preprocessor directive, as we consider them to apply to the directive.
4272       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4273           PPBranchLevel > 0) {
4274         Line->Level += PPBranchLevel;
4275       }
4276       flushComments(isOnNewLine(*FormatTok));
4277       parsePPDirective();
4278       PreviousWasComment = FormatTok->is(tok::comment);
4279       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4280           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4281     }
4282 
4283     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4284         !Line->InPPDirective) {
4285       continue;
4286     }
4287 
4288     if (!FormatTok->is(tok::comment)) {
4289       distributeComments(Comments, FormatTok);
4290       Comments.clear();
4291       return;
4292     }
4293 
4294     Comments.push_back(FormatTok);
4295   } while (!eof());
4296 
4297   distributeComments(Comments, nullptr);
4298   Comments.clear();
4299 }
4300 
4301 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4302   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4303   if (MustBreakBeforeNextToken) {
4304     Line->Tokens.back().Tok->MustBreakBefore = true;
4305     MustBreakBeforeNextToken = false;
4306   }
4307 }
4308 
4309 } // end namespace format
4310 } // end namespace clang
4311