1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 #include <utility>
24 
25 #define DEBUG_TYPE "format-parser"
26 
27 namespace clang {
28 namespace format {
29 
30 class FormatTokenSource {
31 public:
32   virtual ~FormatTokenSource() {}
33 
34   // Returns the next token in the token stream.
35   virtual FormatToken *getNextToken() = 0;
36 
37   // Returns the token preceding the token returned by the last call to
38   // getNextToken() in the token stream, or nullptr if no such token exists.
39   virtual FormatToken *getPreviousToken() = 0;
40 
41   // Returns the token that would be returned by the next call to
42   // getNextToken().
43   virtual FormatToken *peekNextToken() = 0;
44 
45   // Returns the token that would be returned after the next N calls to
46   // getNextToken(). N needs to be greater than zero, and small enough that
47   // there are still tokens. Check for tok::eof with N-1 before calling it with
48   // N.
49   virtual FormatToken *peekNextToken(int N) = 0;
50 
51   // Returns whether we are at the end of the file.
52   // This can be different from whether getNextToken() returned an eof token
53   // when the FormatTokenSource is a view on a part of the token stream.
54   virtual bool isEOF() = 0;
55 
56   // Gets the current position in the token stream, to be used by setPosition().
57   virtual unsigned getPosition() = 0;
58 
59   // Resets the token stream to the state it was in when getPosition() returned
60   // Position, and return the token at that position in the stream.
61   virtual FormatToken *setPosition(unsigned Position) = 0;
62 };
63 
64 namespace {
65 
66 class ScopedDeclarationState {
67 public:
68   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
69                          bool MustBeDeclaration)
70       : Line(Line), Stack(Stack) {
71     Line.MustBeDeclaration = MustBeDeclaration;
72     Stack.push_back(MustBeDeclaration);
73   }
74   ~ScopedDeclarationState() {
75     Stack.pop_back();
76     if (!Stack.empty())
77       Line.MustBeDeclaration = Stack.back();
78     else
79       Line.MustBeDeclaration = true;
80   }
81 
82 private:
83   UnwrappedLine &Line;
84   llvm::BitVector &Stack;
85 };
86 
87 static bool isLineComment(const FormatToken &FormatTok) {
88   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
89 }
90 
91 // Checks if \p FormatTok is a line comment that continues the line comment
92 // \p Previous. The original column of \p MinColumnToken is used to determine
93 // whether \p FormatTok is indented enough to the right to continue \p Previous.
94 static bool continuesLineComment(const FormatToken &FormatTok,
95                                  const FormatToken *Previous,
96                                  const FormatToken *MinColumnToken) {
97   if (!Previous || !MinColumnToken)
98     return false;
99   unsigned MinContinueColumn =
100       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
101   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
102          isLineComment(*Previous) &&
103          FormatTok.OriginalColumn >= MinContinueColumn;
104 }
105 
106 class ScopedMacroState : public FormatTokenSource {
107 public:
108   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
109                    FormatToken *&ResetToken)
110       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
111         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
112         Token(nullptr), PreviousToken(nullptr) {
113     FakeEOF.Tok.startToken();
114     FakeEOF.Tok.setKind(tok::eof);
115     TokenSource = this;
116     Line.Level = 0;
117     Line.InPPDirective = true;
118   }
119 
120   ~ScopedMacroState() override {
121     TokenSource = PreviousTokenSource;
122     ResetToken = Token;
123     Line.InPPDirective = false;
124     Line.Level = PreviousLineLevel;
125   }
126 
127   FormatToken *getNextToken() override {
128     // The \c UnwrappedLineParser guards against this by never calling
129     // \c getNextToken() after it has encountered the first eof token.
130     assert(!eof());
131     PreviousToken = Token;
132     Token = PreviousTokenSource->getNextToken();
133     if (eof())
134       return &FakeEOF;
135     return Token;
136   }
137 
138   FormatToken *getPreviousToken() override {
139     return PreviousTokenSource->getPreviousToken();
140   }
141 
142   FormatToken *peekNextToken() override {
143     if (eof())
144       return &FakeEOF;
145     return PreviousTokenSource->peekNextToken();
146   }
147 
148   FormatToken *peekNextToken(int N) override {
149     assert(N > 0);
150     if (eof())
151       return &FakeEOF;
152     return PreviousTokenSource->peekNextToken(N);
153   }
154 
155   bool isEOF() override { return PreviousTokenSource->isEOF(); }
156 
157   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
158 
159   FormatToken *setPosition(unsigned Position) override {
160     PreviousToken = nullptr;
161     Token = PreviousTokenSource->setPosition(Position);
162     return Token;
163   }
164 
165 private:
166   bool eof() {
167     return Token && Token->HasUnescapedNewline &&
168            !continuesLineComment(*Token, PreviousToken,
169                                  /*MinColumnToken=*/PreviousToken);
170   }
171 
172   FormatToken FakeEOF;
173   UnwrappedLine &Line;
174   FormatTokenSource *&TokenSource;
175   FormatToken *&ResetToken;
176   unsigned PreviousLineLevel;
177   FormatTokenSource *PreviousTokenSource;
178 
179   FormatToken *Token;
180   FormatToken *PreviousToken;
181 };
182 
183 } // end anonymous namespace
184 
185 class ScopedLineState {
186 public:
187   ScopedLineState(UnwrappedLineParser &Parser,
188                   bool SwitchToPreprocessorLines = false)
189       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
190     if (SwitchToPreprocessorLines)
191       Parser.CurrentLines = &Parser.PreprocessorDirectives;
192     else if (!Parser.Line->Tokens.empty())
193       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
194     PreBlockLine = std::move(Parser.Line);
195     Parser.Line = std::make_unique<UnwrappedLine>();
196     Parser.Line->Level = PreBlockLine->Level;
197     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
198   }
199 
200   ~ScopedLineState() {
201     if (!Parser.Line->Tokens.empty())
202       Parser.addUnwrappedLine();
203     assert(Parser.Line->Tokens.empty());
204     Parser.Line = std::move(PreBlockLine);
205     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
206       Parser.MustBreakBeforeNextToken = true;
207     Parser.CurrentLines = OriginalLines;
208   }
209 
210 private:
211   UnwrappedLineParser &Parser;
212 
213   std::unique_ptr<UnwrappedLine> PreBlockLine;
214   SmallVectorImpl<UnwrappedLine> *OriginalLines;
215 };
216 
217 class CompoundStatementIndenter {
218 public:
219   CompoundStatementIndenter(UnwrappedLineParser *Parser,
220                             const FormatStyle &Style, unsigned &LineLevel)
221       : CompoundStatementIndenter(Parser, LineLevel,
222                                   Style.BraceWrapping.AfterControlStatement,
223                                   Style.BraceWrapping.IndentBraces) {}
224   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
225                             bool WrapBrace, bool IndentBrace)
226       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
227     if (WrapBrace)
228       Parser->addUnwrappedLine();
229     if (IndentBrace)
230       ++LineLevel;
231   }
232   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
233 
234 private:
235   unsigned &LineLevel;
236   unsigned OldLineLevel;
237 };
238 
239 namespace {
240 
241 class IndexedTokenSource : public FormatTokenSource {
242 public:
243   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
244       : Tokens(Tokens), Position(-1) {}
245 
246   FormatToken *getNextToken() override {
247     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
248       LLVM_DEBUG({
249         llvm::dbgs() << "Next ";
250         dbgToken(Position);
251       });
252       return Tokens[Position];
253     }
254     ++Position;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Next ";
257       dbgToken(Position);
258     });
259     return Tokens[Position];
260   }
261 
262   FormatToken *getPreviousToken() override {
263     return Position > 0 ? Tokens[Position - 1] : nullptr;
264   }
265 
266   FormatToken *peekNextToken() override {
267     int Next = Position + 1;
268     LLVM_DEBUG({
269       llvm::dbgs() << "Peeking ";
270       dbgToken(Next);
271     });
272     return Tokens[Next];
273   }
274 
275   FormatToken *peekNextToken(int N) override {
276     assert(N > 0);
277     int Next = Position + N;
278     LLVM_DEBUG({
279       llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
280       dbgToken(Next);
281     });
282     return Tokens[Next];
283   }
284 
285   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
286 
287   unsigned getPosition() override {
288     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
289     assert(Position >= 0);
290     return Position;
291   }
292 
293   FormatToken *setPosition(unsigned P) override {
294     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
295     Position = P;
296     return Tokens[Position];
297   }
298 
299   void reset() { Position = -1; }
300 
301 private:
302   void dbgToken(int Position, llvm::StringRef Indent = "") {
303     FormatToken *Tok = Tokens[Position];
304     llvm::dbgs() << Indent << "[" << Position
305                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
306                  << ", Macro: " << !!Tok->MacroCtx << "\n";
307   }
308 
309   ArrayRef<FormatToken *> Tokens;
310   int Position;
311 };
312 
313 } // end anonymous namespace
314 
315 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
316                                          const AdditionalKeywords &Keywords,
317                                          unsigned FirstStartColumn,
318                                          ArrayRef<FormatToken *> Tokens,
319                                          UnwrappedLineConsumer &Callback)
320     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
321       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
322       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
323       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
324       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
325                        ? IG_Rejected
326                        : IG_Inited),
327       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
328 
329 void UnwrappedLineParser::reset() {
330   PPBranchLevel = -1;
331   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
332                      ? IG_Rejected
333                      : IG_Inited;
334   IncludeGuardToken = nullptr;
335   Line.reset(new UnwrappedLine);
336   CommentsBeforeNextToken.clear();
337   FormatTok = nullptr;
338   MustBreakBeforeNextToken = false;
339   PreprocessorDirectives.clear();
340   CurrentLines = &Lines;
341   DeclarationScopeStack.clear();
342   NestedTooDeep.clear();
343   PPStack.clear();
344   Line->FirstStartColumn = FirstStartColumn;
345 }
346 
347 void UnwrappedLineParser::parse() {
348   IndexedTokenSource TokenSource(AllTokens);
349   Line->FirstStartColumn = FirstStartColumn;
350   do {
351     LLVM_DEBUG(llvm::dbgs() << "----\n");
352     reset();
353     Tokens = &TokenSource;
354     TokenSource.reset();
355 
356     readToken();
357     parseFile();
358 
359     // If we found an include guard then all preprocessor directives (other than
360     // the guard) are over-indented by one.
361     if (IncludeGuard == IG_Found)
362       for (auto &Line : Lines)
363         if (Line.InPPDirective && Line.Level > 0)
364           --Line.Level;
365 
366     // Create line with eof token.
367     pushToken(FormatTok);
368     addUnwrappedLine();
369 
370     for (const UnwrappedLine &Line : Lines)
371       Callback.consumeUnwrappedLine(Line);
372 
373     Callback.finishRun();
374     Lines.clear();
375     while (!PPLevelBranchIndex.empty() &&
376            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
377       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
378       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
379     }
380     if (!PPLevelBranchIndex.empty()) {
381       ++PPLevelBranchIndex.back();
382       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
383       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
384     }
385   } while (!PPLevelBranchIndex.empty());
386 }
387 
388 void UnwrappedLineParser::parseFile() {
389   // The top-level context in a file always has declarations, except for pre-
390   // processor directives and JavaScript files.
391   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
392   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
393                                           MustBeDeclaration);
394   if (Style.Language == FormatStyle::LK_TextProto)
395     parseBracedList();
396   else
397     parseLevel(/*OpeningBrace=*/nullptr, /*CanContainBracedList=*/true);
398   // Make sure to format the remaining tokens.
399   //
400   // LK_TextProto is special since its top-level is parsed as the body of a
401   // braced list, which does not necessarily have natural line separators such
402   // as a semicolon. Comments after the last entry that have been determined to
403   // not belong to that line, as in:
404   //   key: value
405   //   // endfile comment
406   // do not have a chance to be put on a line of their own until this point.
407   // Here we add this newline before end-of-file comments.
408   if (Style.Language == FormatStyle::LK_TextProto &&
409       !CommentsBeforeNextToken.empty())
410     addUnwrappedLine();
411   flushComments(true);
412   addUnwrappedLine();
413 }
414 
415 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
416   do {
417     switch (FormatTok->Tok.getKind()) {
418     case tok::l_brace:
419       return;
420     default:
421       if (FormatTok->is(Keywords.kw_where)) {
422         addUnwrappedLine();
423         nextToken();
424         parseCSharpGenericTypeConstraint();
425         break;
426       }
427       nextToken();
428       break;
429     }
430   } while (!eof());
431 }
432 
433 void UnwrappedLineParser::parseCSharpAttribute() {
434   int UnpairedSquareBrackets = 1;
435   do {
436     switch (FormatTok->Tok.getKind()) {
437     case tok::r_square:
438       nextToken();
439       --UnpairedSquareBrackets;
440       if (UnpairedSquareBrackets == 0) {
441         addUnwrappedLine();
442         return;
443       }
444       break;
445     case tok::l_square:
446       ++UnpairedSquareBrackets;
447       nextToken();
448       break;
449     default:
450       nextToken();
451       break;
452     }
453   } while (!eof());
454 }
455 
456 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
457   if (!Lines.empty() && Lines.back().InPPDirective)
458     return true;
459 
460   const FormatToken *Previous = Tokens->getPreviousToken();
461   return Previous && Previous->is(tok::comment) &&
462          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
463 }
464 
465 /// \brief Parses a level, that is ???.
466 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
467 /// \param CanContainBracedList If the content can contain (at any level) a
468 /// braced list.
469 /// \param NextLBracesType The type for left brace found in this level.
470 /// \returns true if a simple block of if/else/for/while, or false otherwise.
471 /// (A simple block has a single statement.)
472 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
473                                      bool CanContainBracedList,
474                                      IfStmtKind *IfKind,
475                                      TokenType NextLBracesType) {
476   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
477                                   ? TT_BracedListLBrace
478                                   : TT_Unknown;
479   const bool IsPrecededByCommentOrPPDirective =
480       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
481   bool HasLabel = false;
482   unsigned StatementCount = 0;
483   bool SwitchLabelEncountered = false;
484   do {
485     if (FormatTok->getType() == TT_AttributeMacro) {
486       nextToken();
487       continue;
488     }
489     tok::TokenKind kind = FormatTok->Tok.getKind();
490     if (FormatTok->getType() == TT_MacroBlockBegin)
491       kind = tok::l_brace;
492     else if (FormatTok->getType() == TT_MacroBlockEnd)
493       kind = tok::r_brace;
494 
495     auto ParseDefault = [this, OpeningBrace, IfKind, NextLevelLBracesType,
496                          &HasLabel, &StatementCount] {
497       parseStructuralElement(IfKind, !OpeningBrace, NextLevelLBracesType,
498                              HasLabel ? nullptr : &HasLabel);
499       ++StatementCount;
500       assert(StatementCount > 0 && "StatementCount overflow!");
501     };
502 
503     switch (kind) {
504     case tok::comment:
505       nextToken();
506       addUnwrappedLine();
507       break;
508     case tok::l_brace:
509       if (NextLBracesType != TT_Unknown)
510         FormatTok->setFinalizedType(NextLBracesType);
511       else if (FormatTok->Previous &&
512                FormatTok->Previous->ClosesRequiresClause) {
513         // We need the 'default' case here to correctly parse a function
514         // l_brace.
515         ParseDefault();
516         continue;
517       }
518       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
519           tryToParseBracedList())
520         continue;
521       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
522                  /*MunchSemi=*/true, /*KeepBraces=*/true,
523                  /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
524                  NextLBracesType);
525       ++StatementCount;
526       assert(StatementCount > 0 && "StatementCount overflow!");
527       addUnwrappedLine();
528       break;
529     case tok::r_brace:
530       if (OpeningBrace) {
531         if (!Style.RemoveBracesLLVM ||
532             !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace))
533           return false;
534         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
535             IsPrecededByCommentOrPPDirective ||
536             precededByCommentOrPPDirective())
537           return false;
538         const FormatToken *Next = Tokens->peekNextToken();
539         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
540       }
541       nextToken();
542       addUnwrappedLine();
543       break;
544     case tok::kw_default: {
545       unsigned StoredPosition = Tokens->getPosition();
546       FormatToken *Next;
547       do {
548         Next = Tokens->getNextToken();
549         assert(Next);
550       } while (Next->is(tok::comment));
551       FormatTok = Tokens->setPosition(StoredPosition);
552       if (Next->isNot(tok::colon)) {
553         // default not followed by ':' is not a case label; treat it like
554         // an identifier.
555         parseStructuralElement();
556         break;
557       }
558       // Else, if it is 'default:', fall through to the case handling.
559       LLVM_FALLTHROUGH;
560     }
561     case tok::kw_case:
562       if (Style.isJavaScript() && Line->MustBeDeclaration) {
563         // A 'case: string' style field declaration.
564         parseStructuralElement();
565         break;
566       }
567       if (!SwitchLabelEncountered &&
568           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
569         ++Line->Level;
570       SwitchLabelEncountered = true;
571       parseStructuralElement();
572       break;
573     case tok::l_square:
574       if (Style.isCSharp()) {
575         nextToken();
576         parseCSharpAttribute();
577         break;
578       }
579       if (handleCppAttributes())
580         break;
581       LLVM_FALLTHROUGH;
582     default:
583       ParseDefault();
584       break;
585     }
586   } while (!eof());
587   return false;
588 }
589 
590 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
591   // We'll parse forward through the tokens until we hit
592   // a closing brace or eof - note that getNextToken() will
593   // parse macros, so this will magically work inside macro
594   // definitions, too.
595   unsigned StoredPosition = Tokens->getPosition();
596   FormatToken *Tok = FormatTok;
597   const FormatToken *PrevTok = Tok->Previous;
598   // Keep a stack of positions of lbrace tokens. We will
599   // update information about whether an lbrace starts a
600   // braced init list or a different block during the loop.
601   SmallVector<FormatToken *, 8> LBraceStack;
602   assert(Tok->is(tok::l_brace));
603   do {
604     // Get next non-comment token.
605     FormatToken *NextTok;
606     do {
607       NextTok = Tokens->getNextToken();
608     } while (NextTok->is(tok::comment));
609 
610     switch (Tok->Tok.getKind()) {
611     case tok::l_brace:
612       if (Style.isJavaScript() && PrevTok) {
613         if (PrevTok->isOneOf(tok::colon, tok::less))
614           // A ':' indicates this code is in a type, or a braced list
615           // following a label in an object literal ({a: {b: 1}}).
616           // A '<' could be an object used in a comparison, but that is nonsense
617           // code (can never return true), so more likely it is a generic type
618           // argument (`X<{a: string; b: number}>`).
619           // The code below could be confused by semicolons between the
620           // individual members in a type member list, which would normally
621           // trigger BK_Block. In both cases, this must be parsed as an inline
622           // braced init.
623           Tok->setBlockKind(BK_BracedInit);
624         else if (PrevTok->is(tok::r_paren))
625           // `) { }` can only occur in function or method declarations in JS.
626           Tok->setBlockKind(BK_Block);
627       } else {
628         Tok->setBlockKind(BK_Unknown);
629       }
630       LBraceStack.push_back(Tok);
631       break;
632     case tok::r_brace:
633       if (LBraceStack.empty())
634         break;
635       if (LBraceStack.back()->is(BK_Unknown)) {
636         bool ProbablyBracedList = false;
637         if (Style.Language == FormatStyle::LK_Proto) {
638           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
639         } else {
640           // Skip NextTok over preprocessor lines, otherwise we may not
641           // properly diagnose the block as a braced intializer
642           // if the comma separator appears after the pp directive.
643           while (NextTok->is(tok::hash)) {
644             ScopedMacroState MacroState(*Line, Tokens, NextTok);
645             do {
646               NextTok = Tokens->getNextToken();
647             } while (NextTok->isNot(tok::eof));
648           }
649 
650           // Using OriginalColumn to distinguish between ObjC methods and
651           // binary operators is a bit hacky.
652           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
653                                   NextTok->OriginalColumn == 0;
654 
655           // Try to detect a braced list. Note that regardless how we mark inner
656           // braces here, we will overwrite the BlockKind later if we parse a
657           // braced list (where all blocks inside are by default braced lists),
658           // or when we explicitly detect blocks (for example while parsing
659           // lambdas).
660 
661           // If we already marked the opening brace as braced list, the closing
662           // must also be part of it.
663           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
664 
665           ProbablyBracedList = ProbablyBracedList ||
666                                (Style.isJavaScript() &&
667                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
668                                                  Keywords.kw_as));
669           ProbablyBracedList = ProbablyBracedList ||
670                                (Style.isCpp() && NextTok->is(tok::l_paren));
671 
672           // If there is a comma, semicolon or right paren after the closing
673           // brace, we assume this is a braced initializer list.
674           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
675           // braced list in JS.
676           ProbablyBracedList =
677               ProbablyBracedList ||
678               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
679                                tok::r_paren, tok::r_square, tok::l_brace,
680                                tok::ellipsis);
681 
682           ProbablyBracedList =
683               ProbablyBracedList ||
684               (NextTok->is(tok::identifier) &&
685                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
686 
687           ProbablyBracedList = ProbablyBracedList ||
688                                (NextTok->is(tok::semi) &&
689                                 (!ExpectClassBody || LBraceStack.size() != 1));
690 
691           ProbablyBracedList =
692               ProbablyBracedList ||
693               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
694 
695           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
696             // We can have an array subscript after a braced init
697             // list, but C++11 attributes are expected after blocks.
698             NextTok = Tokens->getNextToken();
699             ProbablyBracedList = NextTok->isNot(tok::l_square);
700           }
701         }
702         if (ProbablyBracedList) {
703           Tok->setBlockKind(BK_BracedInit);
704           LBraceStack.back()->setBlockKind(BK_BracedInit);
705         } else {
706           Tok->setBlockKind(BK_Block);
707           LBraceStack.back()->setBlockKind(BK_Block);
708         }
709       }
710       LBraceStack.pop_back();
711       break;
712     case tok::identifier:
713       if (!Tok->is(TT_StatementMacro))
714         break;
715       LLVM_FALLTHROUGH;
716     case tok::at:
717     case tok::semi:
718     case tok::kw_if:
719     case tok::kw_while:
720     case tok::kw_for:
721     case tok::kw_switch:
722     case tok::kw_try:
723     case tok::kw___try:
724       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
725         LBraceStack.back()->setBlockKind(BK_Block);
726       break;
727     default:
728       break;
729     }
730     PrevTok = Tok;
731     Tok = NextTok;
732   } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
733 
734   // Assume other blocks for all unclosed opening braces.
735   for (FormatToken *LBrace : LBraceStack)
736     if (LBrace->is(BK_Unknown))
737       LBrace->setBlockKind(BK_Block);
738 
739   FormatTok = Tokens->setPosition(StoredPosition);
740 }
741 
742 template <class T>
743 static inline void hash_combine(std::size_t &seed, const T &v) {
744   std::hash<T> hasher;
745   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
746 }
747 
748 size_t UnwrappedLineParser::computePPHash() const {
749   size_t h = 0;
750   for (const auto &i : PPStack) {
751     hash_combine(h, size_t(i.Kind));
752     hash_combine(h, i.Line);
753   }
754   return h;
755 }
756 
757 // Checks whether \p ParsedLine might fit on a single line. We must clone the
758 // tokens of \p ParsedLine before running the token annotator on it so that we
759 // can restore them afterward.
760 bool UnwrappedLineParser::mightFitOnOneLine(UnwrappedLine &ParsedLine) const {
761   const auto ColumnLimit = Style.ColumnLimit;
762   if (ColumnLimit == 0)
763     return true;
764 
765   auto &Tokens = ParsedLine.Tokens;
766   assert(!Tokens.empty());
767   const auto *LastToken = Tokens.back().Tok;
768   assert(LastToken);
769 
770   SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
771 
772   int Index = 0;
773   for (const auto &Token : Tokens) {
774     assert(Token.Tok);
775     auto &SavedToken = SavedTokens[Index++];
776     SavedToken.Tok = new FormatToken;
777     SavedToken.Tok->copyFrom(*Token.Tok);
778     SavedToken.Children = std::move(Token.Children);
779   }
780 
781   AnnotatedLine Line(ParsedLine);
782   assert(Line.Last == LastToken);
783 
784   TokenAnnotator Annotator(Style, Keywords);
785   Annotator.annotate(Line);
786   Annotator.calculateFormattingInformation(Line);
787 
788   const int Length = LastToken->TotalLength;
789 
790   Index = 0;
791   for (auto &Token : Tokens) {
792     const auto &SavedToken = SavedTokens[Index++];
793     Token.Tok->copyFrom(*SavedToken.Tok);
794     Token.Children = std::move(SavedToken.Children);
795     delete SavedToken.Tok;
796   }
797 
798   return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
799 }
800 
801 UnwrappedLineParser::IfStmtKind UnwrappedLineParser::parseBlock(
802     bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
803     bool UnindentWhitesmithsBraces, bool CanContainBracedList,
804     TokenType NextLBracesType) {
805   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
806          "'{' or macro block token expected");
807   FormatToken *Tok = FormatTok;
808   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
809   FormatTok->setBlockKind(BK_Block);
810 
811   // For Whitesmiths mode, jump to the next level prior to skipping over the
812   // braces.
813   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
814     ++Line->Level;
815 
816   size_t PPStartHash = computePPHash();
817 
818   unsigned InitialLevel = Line->Level;
819   nextToken(/*LevelDifference=*/AddLevels);
820 
821   if (MacroBlock && FormatTok->is(tok::l_paren))
822     parseParens();
823 
824   size_t NbPreprocessorDirectives =
825       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
826   addUnwrappedLine();
827   size_t OpeningLineIndex =
828       CurrentLines->empty()
829           ? (UnwrappedLine::kInvalidIndex)
830           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
831 
832   // Whitesmiths is weird here. The brace needs to be indented for the namespace
833   // block, but the block itself may not be indented depending on the style
834   // settings. This allows the format to back up one level in those cases.
835   if (UnindentWhitesmithsBraces)
836     --Line->Level;
837 
838   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
839                                           MustBeDeclaration);
840   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
841     Line->Level += AddLevels;
842 
843   IfStmtKind IfKind = IfStmtKind::NotIf;
844   const bool SimpleBlock =
845       parseLevel(Tok, CanContainBracedList, &IfKind, NextLBracesType);
846 
847   if (eof())
848     return IfKind;
849 
850   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
851                  : !FormatTok->is(tok::r_brace)) {
852     Line->Level = InitialLevel;
853     FormatTok->setBlockKind(BK_Block);
854     return IfKind;
855   }
856 
857   if (SimpleBlock && !KeepBraces &&
858       Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
859     assert(FormatTok->is(tok::r_brace));
860     const FormatToken *Previous = Tokens->getPreviousToken();
861     assert(Previous);
862     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
863       assert(!CurrentLines->empty());
864       if (mightFitOnOneLine(CurrentLines->back())) {
865         Tok->MatchingParen = FormatTok;
866         FormatTok->MatchingParen = Tok;
867       }
868     }
869   }
870 
871   size_t PPEndHash = computePPHash();
872 
873   // Munch the closing brace.
874   nextToken(/*LevelDifference=*/-AddLevels);
875 
876   if (MacroBlock && FormatTok->is(tok::l_paren))
877     parseParens();
878 
879   if (FormatTok->is(tok::kw_noexcept)) {
880     // A noexcept in a requires expression.
881     nextToken();
882   }
883 
884   if (FormatTok->is(tok::arrow)) {
885     // Following the } or noexcept we can find a trailing return type arrow
886     // as part of an implicit conversion constraint.
887     nextToken();
888     parseStructuralElement();
889   }
890 
891   if (MunchSemi && FormatTok->is(tok::semi))
892     nextToken();
893 
894   Line->Level = InitialLevel;
895 
896   if (PPStartHash == PPEndHash) {
897     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
898     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
899       // Update the opening line to add the forward reference as well
900       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
901           CurrentLines->size() - 1;
902     }
903   }
904 
905   return IfKind;
906 }
907 
908 static bool isGoogScope(const UnwrappedLine &Line) {
909   // FIXME: Closure-library specific stuff should not be hard-coded but be
910   // configurable.
911   if (Line.Tokens.size() < 4)
912     return false;
913   auto I = Line.Tokens.begin();
914   if (I->Tok->TokenText != "goog")
915     return false;
916   ++I;
917   if (I->Tok->isNot(tok::period))
918     return false;
919   ++I;
920   if (I->Tok->TokenText != "scope")
921     return false;
922   ++I;
923   return I->Tok->is(tok::l_paren);
924 }
925 
926 static bool isIIFE(const UnwrappedLine &Line,
927                    const AdditionalKeywords &Keywords) {
928   // Look for the start of an immediately invoked anonymous function.
929   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
930   // This is commonly done in JavaScript to create a new, anonymous scope.
931   // Example: (function() { ... })()
932   if (Line.Tokens.size() < 3)
933     return false;
934   auto I = Line.Tokens.begin();
935   if (I->Tok->isNot(tok::l_paren))
936     return false;
937   ++I;
938   if (I->Tok->isNot(Keywords.kw_function))
939     return false;
940   ++I;
941   return I->Tok->is(tok::l_paren);
942 }
943 
944 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
945                                    const FormatToken &InitialToken) {
946   tok::TokenKind Kind = InitialToken.Tok.getKind();
947   if (InitialToken.is(TT_NamespaceMacro))
948     Kind = tok::kw_namespace;
949 
950   switch (Kind) {
951   case tok::kw_namespace:
952     return Style.BraceWrapping.AfterNamespace;
953   case tok::kw_class:
954     return Style.BraceWrapping.AfterClass;
955   case tok::kw_union:
956     return Style.BraceWrapping.AfterUnion;
957   case tok::kw_struct:
958     return Style.BraceWrapping.AfterStruct;
959   case tok::kw_enum:
960     return Style.BraceWrapping.AfterEnum;
961   default:
962     return false;
963   }
964 }
965 
966 void UnwrappedLineParser::parseChildBlock(
967     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
968   assert(FormatTok->is(tok::l_brace));
969   FormatTok->setBlockKind(BK_Block);
970   const FormatToken *OpeningBrace = FormatTok;
971   nextToken();
972   {
973     bool SkipIndent = (Style.isJavaScript() &&
974                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
975     ScopedLineState LineState(*this);
976     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
977                                             /*MustBeDeclaration=*/false);
978     Line->Level += SkipIndent ? 0 : 1;
979     parseLevel(OpeningBrace, CanContainBracedList, /*IfKind=*/nullptr,
980                NextLBracesType);
981     flushComments(isOnNewLine(*FormatTok));
982     Line->Level -= SkipIndent ? 0 : 1;
983   }
984   nextToken();
985 }
986 
987 void UnwrappedLineParser::parsePPDirective() {
988   assert(FormatTok->is(tok::hash) && "'#' expected");
989   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
990 
991   nextToken();
992 
993   if (!FormatTok->Tok.getIdentifierInfo()) {
994     parsePPUnknown();
995     return;
996   }
997 
998   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
999   case tok::pp_define:
1000     parsePPDefine();
1001     return;
1002   case tok::pp_if:
1003     parsePPIf(/*IfDef=*/false);
1004     break;
1005   case tok::pp_ifdef:
1006   case tok::pp_ifndef:
1007     parsePPIf(/*IfDef=*/true);
1008     break;
1009   case tok::pp_else:
1010     parsePPElse();
1011     break;
1012   case tok::pp_elifdef:
1013   case tok::pp_elifndef:
1014   case tok::pp_elif:
1015     parsePPElIf();
1016     break;
1017   case tok::pp_endif:
1018     parsePPEndIf();
1019     break;
1020   default:
1021     parsePPUnknown();
1022     break;
1023   }
1024 }
1025 
1026 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1027   size_t Line = CurrentLines->size();
1028   if (CurrentLines == &PreprocessorDirectives)
1029     Line += Lines.size();
1030 
1031   if (Unreachable ||
1032       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
1033     PPStack.push_back({PP_Unreachable, Line});
1034   else
1035     PPStack.push_back({PP_Conditional, Line});
1036 }
1037 
1038 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1039   ++PPBranchLevel;
1040   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1041   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1042     PPLevelBranchIndex.push_back(0);
1043     PPLevelBranchCount.push_back(0);
1044   }
1045   PPChainBranchIndex.push(0);
1046   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1047   conditionalCompilationCondition(Unreachable || Skip);
1048 }
1049 
1050 void UnwrappedLineParser::conditionalCompilationAlternative() {
1051   if (!PPStack.empty())
1052     PPStack.pop_back();
1053   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1054   if (!PPChainBranchIndex.empty())
1055     ++PPChainBranchIndex.top();
1056   conditionalCompilationCondition(
1057       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1058       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1059 }
1060 
1061 void UnwrappedLineParser::conditionalCompilationEnd() {
1062   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1063   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1064     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1065       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1066   }
1067   // Guard against #endif's without #if.
1068   if (PPBranchLevel > -1)
1069     --PPBranchLevel;
1070   if (!PPChainBranchIndex.empty())
1071     PPChainBranchIndex.pop();
1072   if (!PPStack.empty())
1073     PPStack.pop_back();
1074 }
1075 
1076 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1077   bool IfNDef = FormatTok->is(tok::pp_ifndef);
1078   nextToken();
1079   bool Unreachable = false;
1080   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1081     Unreachable = true;
1082   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1083     Unreachable = true;
1084   conditionalCompilationStart(Unreachable);
1085   FormatToken *IfCondition = FormatTok;
1086   // If there's a #ifndef on the first line, and the only lines before it are
1087   // comments, it could be an include guard.
1088   bool MaybeIncludeGuard = IfNDef;
1089   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
1090     for (auto &Line : Lines) {
1091       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1092         MaybeIncludeGuard = false;
1093         IncludeGuard = IG_Rejected;
1094         break;
1095       }
1096     }
1097   --PPBranchLevel;
1098   parsePPUnknown();
1099   ++PPBranchLevel;
1100   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1101     IncludeGuard = IG_IfNdefed;
1102     IncludeGuardToken = IfCondition;
1103   }
1104 }
1105 
1106 void UnwrappedLineParser::parsePPElse() {
1107   // If a potential include guard has an #else, it's not an include guard.
1108   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1109     IncludeGuard = IG_Rejected;
1110   conditionalCompilationAlternative();
1111   if (PPBranchLevel > -1)
1112     --PPBranchLevel;
1113   parsePPUnknown();
1114   ++PPBranchLevel;
1115 }
1116 
1117 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1118 
1119 void UnwrappedLineParser::parsePPEndIf() {
1120   conditionalCompilationEnd();
1121   parsePPUnknown();
1122   // If the #endif of a potential include guard is the last thing in the file,
1123   // then we found an include guard.
1124   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1125       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1126     IncludeGuard = IG_Found;
1127 }
1128 
1129 void UnwrappedLineParser::parsePPDefine() {
1130   nextToken();
1131 
1132   if (!FormatTok->Tok.getIdentifierInfo()) {
1133     IncludeGuard = IG_Rejected;
1134     IncludeGuardToken = nullptr;
1135     parsePPUnknown();
1136     return;
1137   }
1138 
1139   if (IncludeGuard == IG_IfNdefed &&
1140       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1141     IncludeGuard = IG_Defined;
1142     IncludeGuardToken = nullptr;
1143     for (auto &Line : Lines) {
1144       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1145         IncludeGuard = IG_Rejected;
1146         break;
1147       }
1148     }
1149   }
1150 
1151   // In the context of a define, even keywords should be treated as normal
1152   // identifiers. Setting the kind to identifier is not enough, because we need
1153   // to treat additional keywords like __except as well, which are already
1154   // identifiers. Setting the identifier info to null interferes with include
1155   // guard processing above, and changes preprocessing nesting.
1156   FormatTok->Tok.setKind(tok::identifier);
1157   FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1158   nextToken();
1159   if (FormatTok->Tok.getKind() == tok::l_paren &&
1160       !FormatTok->hasWhitespaceBefore())
1161     parseParens();
1162   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1163     Line->Level += PPBranchLevel + 1;
1164   addUnwrappedLine();
1165   ++Line->Level;
1166 
1167   // Errors during a preprocessor directive can only affect the layout of the
1168   // preprocessor directive, and thus we ignore them. An alternative approach
1169   // would be to use the same approach we use on the file level (no
1170   // re-indentation if there was a structural error) within the macro
1171   // definition.
1172   parseFile();
1173 }
1174 
1175 void UnwrappedLineParser::parsePPUnknown() {
1176   do {
1177     nextToken();
1178   } while (!eof());
1179   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1180     Line->Level += PPBranchLevel + 1;
1181   addUnwrappedLine();
1182 }
1183 
1184 // Here we exclude certain tokens that are not usually the first token in an
1185 // unwrapped line. This is used in attempt to distinguish macro calls without
1186 // trailing semicolons from other constructs split to several lines.
1187 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1188   // Semicolon can be a null-statement, l_square can be a start of a macro or
1189   // a C++11 attribute, but this doesn't seem to be common.
1190   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1191          Tok.isNot(TT_AttributeSquare) &&
1192          // Tokens that can only be used as binary operators and a part of
1193          // overloaded operator names.
1194          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1195          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1196          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1197          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1198          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1199          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1200          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1201          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1202          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1203          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1204          Tok.isNot(tok::lesslessequal) &&
1205          // Colon is used in labels, base class lists, initializer lists,
1206          // range-based for loops, ternary operator, but should never be the
1207          // first token in an unwrapped line.
1208          Tok.isNot(tok::colon) &&
1209          // 'noexcept' is a trailing annotation.
1210          Tok.isNot(tok::kw_noexcept);
1211 }
1212 
1213 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1214                           const FormatToken *FormatTok) {
1215   // FIXME: This returns true for C/C++ keywords like 'struct'.
1216   return FormatTok->is(tok::identifier) &&
1217          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1218           !FormatTok->isOneOf(
1219               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1220               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1221               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1222               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1223               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1224               Keywords.kw_instanceof, Keywords.kw_interface,
1225               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1226 }
1227 
1228 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1229                                  const FormatToken *FormatTok) {
1230   return FormatTok->Tok.isLiteral() ||
1231          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1232          mustBeJSIdent(Keywords, FormatTok);
1233 }
1234 
1235 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1236 // when encountered after a value (see mustBeJSIdentOrValue).
1237 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1238                            const FormatToken *FormatTok) {
1239   return FormatTok->isOneOf(
1240       tok::kw_return, Keywords.kw_yield,
1241       // conditionals
1242       tok::kw_if, tok::kw_else,
1243       // loops
1244       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1245       // switch/case
1246       tok::kw_switch, tok::kw_case,
1247       // exceptions
1248       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1249       // declaration
1250       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1251       Keywords.kw_async, Keywords.kw_function,
1252       // import/export
1253       Keywords.kw_import, tok::kw_export);
1254 }
1255 
1256 // Checks whether a token is a type in K&R C (aka C78).
1257 static bool isC78Type(const FormatToken &Tok) {
1258   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1259                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1260                      tok::identifier);
1261 }
1262 
1263 // This function checks whether a token starts the first parameter declaration
1264 // in a K&R C (aka C78) function definition, e.g.:
1265 //   int f(a, b)
1266 //   short a, b;
1267 //   {
1268 //      return a + b;
1269 //   }
1270 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1271                                const FormatToken *FuncName) {
1272   assert(Tok);
1273   assert(Next);
1274   assert(FuncName);
1275 
1276   if (FuncName->isNot(tok::identifier))
1277     return false;
1278 
1279   const FormatToken *Prev = FuncName->Previous;
1280   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1281     return false;
1282 
1283   if (!isC78Type(*Tok) &&
1284       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1285     return false;
1286 
1287   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1288     return false;
1289 
1290   Tok = Tok->Previous;
1291   if (!Tok || Tok->isNot(tok::r_paren))
1292     return false;
1293 
1294   Tok = Tok->Previous;
1295   if (!Tok || Tok->isNot(tok::identifier))
1296     return false;
1297 
1298   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1299 }
1300 
1301 void UnwrappedLineParser::parseModuleImport() {
1302   nextToken();
1303   while (!eof()) {
1304     if (FormatTok->is(tok::colon)) {
1305       FormatTok->setFinalizedType(TT_ModulePartitionColon);
1306     }
1307     // Handle import <foo/bar.h> as we would an include statement.
1308     else if (FormatTok->is(tok::less)) {
1309       nextToken();
1310       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1311         // Mark tokens up to the trailing line comments as implicit string
1312         // literals.
1313         if (FormatTok->isNot(tok::comment) &&
1314             !FormatTok->TokenText.startswith("//"))
1315           FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1316         nextToken();
1317       }
1318     }
1319     if (FormatTok->is(tok::semi)) {
1320       nextToken();
1321       break;
1322     }
1323     nextToken();
1324   }
1325 
1326   addUnwrappedLine();
1327 }
1328 
1329 // readTokenWithJavaScriptASI reads the next token and terminates the current
1330 // line if JavaScript Automatic Semicolon Insertion must
1331 // happen between the current token and the next token.
1332 //
1333 // This method is conservative - it cannot cover all edge cases of JavaScript,
1334 // but only aims to correctly handle certain well known cases. It *must not*
1335 // return true in speculative cases.
1336 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1337   FormatToken *Previous = FormatTok;
1338   readToken();
1339   FormatToken *Next = FormatTok;
1340 
1341   bool IsOnSameLine =
1342       CommentsBeforeNextToken.empty()
1343           ? Next->NewlinesBefore == 0
1344           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1345   if (IsOnSameLine)
1346     return;
1347 
1348   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1349   bool PreviousStartsTemplateExpr =
1350       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1351   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1352     // If the line contains an '@' sign, the previous token might be an
1353     // annotation, which can precede another identifier/value.
1354     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1355       return LineNode.Tok->is(tok::at);
1356     });
1357     if (HasAt)
1358       return;
1359   }
1360   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1361     return addUnwrappedLine();
1362   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1363   bool NextEndsTemplateExpr =
1364       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1365   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1366       (PreviousMustBeValue ||
1367        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1368                          tok::minusminus)))
1369     return addUnwrappedLine();
1370   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1371       isJSDeclOrStmt(Keywords, Next))
1372     return addUnwrappedLine();
1373 }
1374 
1375 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1376                                                  bool IsTopLevel,
1377                                                  TokenType NextLBracesType,
1378                                                  bool *HasLabel) {
1379   if (Style.Language == FormatStyle::LK_TableGen &&
1380       FormatTok->is(tok::pp_include)) {
1381     nextToken();
1382     if (FormatTok->is(tok::string_literal))
1383       nextToken();
1384     addUnwrappedLine();
1385     return;
1386   }
1387   switch (FormatTok->Tok.getKind()) {
1388   case tok::kw_asm:
1389     nextToken();
1390     if (FormatTok->is(tok::l_brace)) {
1391       FormatTok->setFinalizedType(TT_InlineASMBrace);
1392       nextToken();
1393       while (FormatTok && FormatTok->isNot(tok::eof)) {
1394         if (FormatTok->is(tok::r_brace)) {
1395           FormatTok->setFinalizedType(TT_InlineASMBrace);
1396           nextToken();
1397           addUnwrappedLine();
1398           break;
1399         }
1400         FormatTok->Finalized = true;
1401         nextToken();
1402       }
1403     }
1404     break;
1405   case tok::kw_namespace:
1406     parseNamespace();
1407     return;
1408   case tok::kw_public:
1409   case tok::kw_protected:
1410   case tok::kw_private:
1411     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1412         Style.isCSharp())
1413       nextToken();
1414     else
1415       parseAccessSpecifier();
1416     return;
1417   case tok::kw_if:
1418     if (Style.isJavaScript() && Line->MustBeDeclaration)
1419       // field/method declaration.
1420       break;
1421     parseIfThenElse(IfKind);
1422     return;
1423   case tok::kw_for:
1424   case tok::kw_while:
1425     if (Style.isJavaScript() && Line->MustBeDeclaration)
1426       // field/method declaration.
1427       break;
1428     parseForOrWhileLoop();
1429     return;
1430   case tok::kw_do:
1431     if (Style.isJavaScript() && Line->MustBeDeclaration)
1432       // field/method declaration.
1433       break;
1434     parseDoWhile();
1435     return;
1436   case tok::kw_switch:
1437     if (Style.isJavaScript() && Line->MustBeDeclaration)
1438       // 'switch: string' field declaration.
1439       break;
1440     parseSwitch();
1441     return;
1442   case tok::kw_default:
1443     if (Style.isJavaScript() && Line->MustBeDeclaration)
1444       // 'default: string' field declaration.
1445       break;
1446     nextToken();
1447     if (FormatTok->is(tok::colon)) {
1448       parseLabel();
1449       return;
1450     }
1451     // e.g. "default void f() {}" in a Java interface.
1452     break;
1453   case tok::kw_case:
1454     if (Style.isJavaScript() && Line->MustBeDeclaration) {
1455       // 'case: string' field declaration.
1456       nextToken();
1457       break;
1458     }
1459     parseCaseLabel();
1460     return;
1461   case tok::kw_try:
1462   case tok::kw___try:
1463     if (Style.isJavaScript() && Line->MustBeDeclaration)
1464       // field/method declaration.
1465       break;
1466     parseTryCatch();
1467     return;
1468   case tok::kw_extern:
1469     nextToken();
1470     if (FormatTok->is(tok::string_literal)) {
1471       nextToken();
1472       if (FormatTok->is(tok::l_brace)) {
1473         if (Style.BraceWrapping.AfterExternBlock)
1474           addUnwrappedLine();
1475         // Either we indent or for backwards compatibility we follow the
1476         // AfterExternBlock style.
1477         unsigned AddLevels =
1478             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1479                     (Style.BraceWrapping.AfterExternBlock &&
1480                      Style.IndentExternBlock ==
1481                          FormatStyle::IEBS_AfterExternBlock)
1482                 ? 1u
1483                 : 0u;
1484         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1485         addUnwrappedLine();
1486         return;
1487       }
1488     }
1489     break;
1490   case tok::kw_export:
1491     if (Style.isJavaScript()) {
1492       parseJavaScriptEs6ImportExport();
1493       return;
1494     }
1495     if (!Style.isCpp())
1496       break;
1497     // Handle C++ "(inline|export) namespace".
1498     LLVM_FALLTHROUGH;
1499   case tok::kw_inline:
1500     nextToken();
1501     if (FormatTok->is(tok::kw_namespace)) {
1502       parseNamespace();
1503       return;
1504     }
1505     break;
1506   case tok::identifier:
1507     if (FormatTok->is(TT_ForEachMacro)) {
1508       parseForOrWhileLoop();
1509       return;
1510     }
1511     if (FormatTok->is(TT_MacroBlockBegin)) {
1512       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1513                  /*MunchSemi=*/false);
1514       return;
1515     }
1516     if (FormatTok->is(Keywords.kw_import)) {
1517       if (Style.isJavaScript()) {
1518         parseJavaScriptEs6ImportExport();
1519         return;
1520       }
1521       if (Style.Language == FormatStyle::LK_Proto) {
1522         nextToken();
1523         if (FormatTok->is(tok::kw_public))
1524           nextToken();
1525         if (!FormatTok->is(tok::string_literal))
1526           return;
1527         nextToken();
1528         if (FormatTok->is(tok::semi))
1529           nextToken();
1530         addUnwrappedLine();
1531         return;
1532       }
1533       if (Style.isCpp()) {
1534         parseModuleImport();
1535         return;
1536       }
1537     }
1538     if (Style.isCpp() &&
1539         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1540                            Keywords.kw_slots, Keywords.kw_qslots)) {
1541       nextToken();
1542       if (FormatTok->is(tok::colon)) {
1543         nextToken();
1544         addUnwrappedLine();
1545         return;
1546       }
1547     }
1548     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1549       parseStatementMacro();
1550       return;
1551     }
1552     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1553       parseNamespace();
1554       return;
1555     }
1556     // In all other cases, parse the declaration.
1557     break;
1558   default:
1559     break;
1560   }
1561   do {
1562     const FormatToken *Previous = FormatTok->Previous;
1563     switch (FormatTok->Tok.getKind()) {
1564     case tok::at:
1565       nextToken();
1566       if (FormatTok->is(tok::l_brace)) {
1567         nextToken();
1568         parseBracedList();
1569         break;
1570       } else if (Style.Language == FormatStyle::LK_Java &&
1571                  FormatTok->is(Keywords.kw_interface)) {
1572         nextToken();
1573         break;
1574       }
1575       switch (FormatTok->Tok.getObjCKeywordID()) {
1576       case tok::objc_public:
1577       case tok::objc_protected:
1578       case tok::objc_package:
1579       case tok::objc_private:
1580         return parseAccessSpecifier();
1581       case tok::objc_interface:
1582       case tok::objc_implementation:
1583         return parseObjCInterfaceOrImplementation();
1584       case tok::objc_protocol:
1585         if (parseObjCProtocol())
1586           return;
1587         break;
1588       case tok::objc_end:
1589         return; // Handled by the caller.
1590       case tok::objc_optional:
1591       case tok::objc_required:
1592         nextToken();
1593         addUnwrappedLine();
1594         return;
1595       case tok::objc_autoreleasepool:
1596         nextToken();
1597         if (FormatTok->is(tok::l_brace)) {
1598           if (Style.BraceWrapping.AfterControlStatement ==
1599               FormatStyle::BWACS_Always)
1600             addUnwrappedLine();
1601           parseBlock();
1602         }
1603         addUnwrappedLine();
1604         return;
1605       case tok::objc_synchronized:
1606         nextToken();
1607         if (FormatTok->is(tok::l_paren))
1608           // Skip synchronization object
1609           parseParens();
1610         if (FormatTok->is(tok::l_brace)) {
1611           if (Style.BraceWrapping.AfterControlStatement ==
1612               FormatStyle::BWACS_Always)
1613             addUnwrappedLine();
1614           parseBlock();
1615         }
1616         addUnwrappedLine();
1617         return;
1618       case tok::objc_try:
1619         // This branch isn't strictly necessary (the kw_try case below would
1620         // do this too after the tok::at is parsed above).  But be explicit.
1621         parseTryCatch();
1622         return;
1623       default:
1624         break;
1625       }
1626       break;
1627     case tok::kw_concept:
1628       parseConcept();
1629       return;
1630     case tok::kw_requires: {
1631       if (Style.isCpp()) {
1632         bool ParsedClause = parseRequires();
1633         if (ParsedClause)
1634           return;
1635       } else {
1636         nextToken();
1637       }
1638       break;
1639     }
1640     case tok::kw_enum:
1641       // Ignore if this is part of "template <enum ...".
1642       if (Previous && Previous->is(tok::less)) {
1643         nextToken();
1644         break;
1645       }
1646 
1647       // parseEnum falls through and does not yet add an unwrapped line as an
1648       // enum definition can start a structural element.
1649       if (!parseEnum())
1650         break;
1651       // This only applies for C++.
1652       if (!Style.isCpp()) {
1653         addUnwrappedLine();
1654         return;
1655       }
1656       break;
1657     case tok::kw_typedef:
1658       nextToken();
1659       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1660                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1661                              Keywords.kw_CF_CLOSED_ENUM,
1662                              Keywords.kw_NS_CLOSED_ENUM))
1663         parseEnum();
1664       break;
1665     case tok::kw_struct:
1666     case tok::kw_union:
1667     case tok::kw_class:
1668       if (parseStructLike())
1669         return;
1670       break;
1671     case tok::period:
1672       nextToken();
1673       // In Java, classes have an implicit static member "class".
1674       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1675           FormatTok->is(tok::kw_class))
1676         nextToken();
1677       if (Style.isJavaScript() && FormatTok &&
1678           FormatTok->Tok.getIdentifierInfo())
1679         // JavaScript only has pseudo keywords, all keywords are allowed to
1680         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1681         nextToken();
1682       break;
1683     case tok::semi:
1684       nextToken();
1685       addUnwrappedLine();
1686       return;
1687     case tok::r_brace:
1688       addUnwrappedLine();
1689       return;
1690     case tok::l_paren: {
1691       parseParens();
1692       // Break the unwrapped line if a K&R C function definition has a parameter
1693       // declaration.
1694       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1695         break;
1696       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1697         addUnwrappedLine();
1698         return;
1699       }
1700       break;
1701     }
1702     case tok::kw_operator:
1703       nextToken();
1704       if (FormatTok->isBinaryOperator())
1705         nextToken();
1706       break;
1707     case tok::caret:
1708       nextToken();
1709       if (FormatTok->Tok.isAnyIdentifier() ||
1710           FormatTok->isSimpleTypeSpecifier())
1711         nextToken();
1712       if (FormatTok->is(tok::l_paren))
1713         parseParens();
1714       if (FormatTok->is(tok::l_brace))
1715         parseChildBlock();
1716       break;
1717     case tok::l_brace:
1718       if (NextLBracesType != TT_Unknown)
1719         FormatTok->setFinalizedType(NextLBracesType);
1720       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1721         // A block outside of parentheses must be the last part of a
1722         // structural element.
1723         // FIXME: Figure out cases where this is not true, and add projections
1724         // for them (the one we know is missing are lambdas).
1725         if (Style.Language == FormatStyle::LK_Java &&
1726             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1727           // If necessary, we could set the type to something different than
1728           // TT_FunctionLBrace.
1729           if (Style.BraceWrapping.AfterControlStatement ==
1730               FormatStyle::BWACS_Always)
1731             addUnwrappedLine();
1732         } else if (Style.BraceWrapping.AfterFunction) {
1733           addUnwrappedLine();
1734         }
1735         if (!Line->InPPDirective)
1736           FormatTok->setFinalizedType(TT_FunctionLBrace);
1737         parseBlock();
1738         addUnwrappedLine();
1739         return;
1740       }
1741       // Otherwise this was a braced init list, and the structural
1742       // element continues.
1743       break;
1744     case tok::kw_try:
1745       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1746         // field/method declaration.
1747         nextToken();
1748         break;
1749       }
1750       // We arrive here when parsing function-try blocks.
1751       if (Style.BraceWrapping.AfterFunction)
1752         addUnwrappedLine();
1753       parseTryCatch();
1754       return;
1755     case tok::identifier: {
1756       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1757           Line->MustBeDeclaration) {
1758         addUnwrappedLine();
1759         parseCSharpGenericTypeConstraint();
1760         break;
1761       }
1762       if (FormatTok->is(TT_MacroBlockEnd)) {
1763         addUnwrappedLine();
1764         return;
1765       }
1766 
1767       // Function declarations (as opposed to function expressions) are parsed
1768       // on their own unwrapped line by continuing this loop. Function
1769       // expressions (functions that are not on their own line) must not create
1770       // a new unwrapped line, so they are special cased below.
1771       size_t TokenCount = Line->Tokens.size();
1772       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1773           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1774                                                      Keywords.kw_async)))) {
1775         tryToParseJSFunction();
1776         break;
1777       }
1778       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1779           FormatTok->is(Keywords.kw_interface)) {
1780         if (Style.isJavaScript()) {
1781           // In JavaScript/TypeScript, "interface" can be used as a standalone
1782           // identifier, e.g. in `var interface = 1;`. If "interface" is
1783           // followed by another identifier, it is very like to be an actual
1784           // interface declaration.
1785           unsigned StoredPosition = Tokens->getPosition();
1786           FormatToken *Next = Tokens->getNextToken();
1787           FormatTok = Tokens->setPosition(StoredPosition);
1788           if (!mustBeJSIdent(Keywords, Next)) {
1789             nextToken();
1790             break;
1791           }
1792         }
1793         parseRecord();
1794         addUnwrappedLine();
1795         return;
1796       }
1797 
1798       if (FormatTok->is(Keywords.kw_interface)) {
1799         if (parseStructLike())
1800           return;
1801         break;
1802       }
1803 
1804       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1805         parseStatementMacro();
1806         return;
1807       }
1808 
1809       // See if the following token should start a new unwrapped line.
1810       StringRef Text = FormatTok->TokenText;
1811 
1812       FormatToken *PreviousToken = FormatTok;
1813       nextToken();
1814 
1815       // JS doesn't have macros, and within classes colons indicate fields, not
1816       // labels.
1817       if (Style.isJavaScript())
1818         break;
1819 
1820       TokenCount = Line->Tokens.size();
1821       if (TokenCount == 1 ||
1822           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1823         if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1824           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1825           parseLabel(!Style.IndentGotoLabels);
1826           if (HasLabel)
1827             *HasLabel = true;
1828           return;
1829         }
1830         // Recognize function-like macro usages without trailing semicolon as
1831         // well as free-standing macros like Q_OBJECT.
1832         bool FunctionLike = FormatTok->is(tok::l_paren);
1833         if (FunctionLike)
1834           parseParens();
1835 
1836         bool FollowedByNewline =
1837             CommentsBeforeNextToken.empty()
1838                 ? FormatTok->NewlinesBefore > 0
1839                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1840 
1841         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1842             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1843           PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1844           addUnwrappedLine();
1845           return;
1846         }
1847       }
1848       break;
1849     }
1850     case tok::equal:
1851       if ((Style.isJavaScript() || Style.isCSharp()) &&
1852           FormatTok->is(TT_FatArrow)) {
1853         tryToParseChildBlock();
1854         break;
1855       }
1856 
1857       nextToken();
1858       if (FormatTok->is(tok::l_brace)) {
1859         // Block kind should probably be set to BK_BracedInit for any language.
1860         // C# needs this change to ensure that array initialisers and object
1861         // initialisers are indented the same way.
1862         if (Style.isCSharp())
1863           FormatTok->setBlockKind(BK_BracedInit);
1864         nextToken();
1865         parseBracedList();
1866       } else if (Style.Language == FormatStyle::LK_Proto &&
1867                  FormatTok->is(tok::less)) {
1868         nextToken();
1869         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1870                         /*ClosingBraceKind=*/tok::greater);
1871       }
1872       break;
1873     case tok::l_square:
1874       parseSquare();
1875       break;
1876     case tok::kw_new:
1877       parseNew();
1878       break;
1879     case tok::kw_case:
1880       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1881         // 'case: string' field declaration.
1882         nextToken();
1883         break;
1884       }
1885       parseCaseLabel();
1886       break;
1887     default:
1888       nextToken();
1889       break;
1890     }
1891   } while (!eof());
1892 }
1893 
1894 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1895   assert(FormatTok->is(tok::l_brace));
1896   if (!Style.isCSharp())
1897     return false;
1898   // See if it's a property accessor.
1899   if (FormatTok->Previous->isNot(tok::identifier))
1900     return false;
1901 
1902   // See if we are inside a property accessor.
1903   //
1904   // Record the current tokenPosition so that we can advance and
1905   // reset the current token. `Next` is not set yet so we need
1906   // another way to advance along the token stream.
1907   unsigned int StoredPosition = Tokens->getPosition();
1908   FormatToken *Tok = Tokens->getNextToken();
1909 
1910   // A trivial property accessor is of the form:
1911   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
1912   // Track these as they do not require line breaks to be introduced.
1913   bool HasSpecialAccessor = false;
1914   bool IsTrivialPropertyAccessor = true;
1915   while (!eof()) {
1916     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1917                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1918                      Keywords.kw_init, Keywords.kw_set)) {
1919       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
1920         HasSpecialAccessor = true;
1921       Tok = Tokens->getNextToken();
1922       continue;
1923     }
1924     if (Tok->isNot(tok::r_brace))
1925       IsTrivialPropertyAccessor = false;
1926     break;
1927   }
1928 
1929   if (!HasSpecialAccessor) {
1930     Tokens->setPosition(StoredPosition);
1931     return false;
1932   }
1933 
1934   // Try to parse the property accessor:
1935   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1936   Tokens->setPosition(StoredPosition);
1937   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1938     addUnwrappedLine();
1939   nextToken();
1940   do {
1941     switch (FormatTok->Tok.getKind()) {
1942     case tok::r_brace:
1943       nextToken();
1944       if (FormatTok->is(tok::equal)) {
1945         while (!eof() && FormatTok->isNot(tok::semi))
1946           nextToken();
1947         nextToken();
1948       }
1949       addUnwrappedLine();
1950       return true;
1951     case tok::l_brace:
1952       ++Line->Level;
1953       parseBlock(/*MustBeDeclaration=*/true);
1954       addUnwrappedLine();
1955       --Line->Level;
1956       break;
1957     case tok::equal:
1958       if (FormatTok->is(TT_FatArrow)) {
1959         ++Line->Level;
1960         do {
1961           nextToken();
1962         } while (!eof() && FormatTok->isNot(tok::semi));
1963         nextToken();
1964         addUnwrappedLine();
1965         --Line->Level;
1966         break;
1967       }
1968       nextToken();
1969       break;
1970     default:
1971       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
1972                              Keywords.kw_set) &&
1973           !IsTrivialPropertyAccessor) {
1974         // Non-trivial get/set needs to be on its own line.
1975         addUnwrappedLine();
1976       }
1977       nextToken();
1978     }
1979   } while (!eof());
1980 
1981   // Unreachable for well-formed code (paired '{' and '}').
1982   return true;
1983 }
1984 
1985 bool UnwrappedLineParser::tryToParseLambda() {
1986   assert(FormatTok->is(tok::l_square));
1987   if (!Style.isCpp()) {
1988     nextToken();
1989     return false;
1990   }
1991   FormatToken &LSquare = *FormatTok;
1992   if (!tryToParseLambdaIntroducer())
1993     return false;
1994 
1995   bool SeenArrow = false;
1996   bool InTemplateParameterList = false;
1997 
1998   while (FormatTok->isNot(tok::l_brace)) {
1999     if (FormatTok->isSimpleTypeSpecifier()) {
2000       nextToken();
2001       continue;
2002     }
2003     switch (FormatTok->Tok.getKind()) {
2004     case tok::l_brace:
2005       break;
2006     case tok::l_paren:
2007       parseParens();
2008       break;
2009     case tok::l_square:
2010       parseSquare();
2011       break;
2012     case tok::kw_class:
2013     case tok::kw_template:
2014     case tok::kw_typename:
2015       assert(FormatTok->Previous);
2016       if (FormatTok->Previous->is(tok::less))
2017         InTemplateParameterList = true;
2018       nextToken();
2019       break;
2020     case tok::amp:
2021     case tok::star:
2022     case tok::kw_const:
2023     case tok::comma:
2024     case tok::less:
2025     case tok::greater:
2026     case tok::identifier:
2027     case tok::numeric_constant:
2028     case tok::coloncolon:
2029     case tok::kw_mutable:
2030     case tok::kw_noexcept:
2031       nextToken();
2032       break;
2033     // Specialization of a template with an integer parameter can contain
2034     // arithmetic, logical, comparison and ternary operators.
2035     //
2036     // FIXME: This also accepts sequences of operators that are not in the scope
2037     // of a template argument list.
2038     //
2039     // In a C++ lambda a template type can only occur after an arrow. We use
2040     // this as an heuristic to distinguish between Objective-C expressions
2041     // followed by an `a->b` expression, such as:
2042     // ([obj func:arg] + a->b)
2043     // Otherwise the code below would parse as a lambda.
2044     //
2045     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2046     // explicit template lists: []<bool b = true && false>(U &&u){}
2047     case tok::plus:
2048     case tok::minus:
2049     case tok::exclaim:
2050     case tok::tilde:
2051     case tok::slash:
2052     case tok::percent:
2053     case tok::lessless:
2054     case tok::pipe:
2055     case tok::pipepipe:
2056     case tok::ampamp:
2057     case tok::caret:
2058     case tok::equalequal:
2059     case tok::exclaimequal:
2060     case tok::greaterequal:
2061     case tok::lessequal:
2062     case tok::question:
2063     case tok::colon:
2064     case tok::ellipsis:
2065     case tok::kw_true:
2066     case tok::kw_false:
2067       if (SeenArrow || InTemplateParameterList) {
2068         nextToken();
2069         break;
2070       }
2071       return true;
2072     case tok::arrow:
2073       // This might or might not actually be a lambda arrow (this could be an
2074       // ObjC method invocation followed by a dereferencing arrow). We might
2075       // reset this back to TT_Unknown in TokenAnnotator.
2076       FormatTok->setFinalizedType(TT_LambdaArrow);
2077       SeenArrow = true;
2078       nextToken();
2079       break;
2080     default:
2081       return true;
2082     }
2083   }
2084   FormatTok->setFinalizedType(TT_LambdaLBrace);
2085   LSquare.setFinalizedType(TT_LambdaLSquare);
2086   parseChildBlock();
2087   return true;
2088 }
2089 
2090 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2091   const FormatToken *Previous = FormatTok->Previous;
2092   const FormatToken *LeftSquare = FormatTok;
2093   nextToken();
2094   if (Previous &&
2095       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2096                          tok::kw_delete, tok::l_square) ||
2097        LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2098        Previous->isSimpleTypeSpecifier())) {
2099     return false;
2100   }
2101   if (FormatTok->is(tok::l_square))
2102     return false;
2103   if (FormatTok->is(tok::r_square)) {
2104     const FormatToken *Next = Tokens->peekNextToken();
2105     if (Next->is(tok::greater))
2106       return false;
2107   }
2108   parseSquare(/*LambdaIntroducer=*/true);
2109   return true;
2110 }
2111 
2112 void UnwrappedLineParser::tryToParseJSFunction() {
2113   assert(FormatTok->is(Keywords.kw_function) ||
2114          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2115   if (FormatTok->is(Keywords.kw_async))
2116     nextToken();
2117   // Consume "function".
2118   nextToken();
2119 
2120   // Consume * (generator function). Treat it like C++'s overloaded operators.
2121   if (FormatTok->is(tok::star)) {
2122     FormatTok->setFinalizedType(TT_OverloadedOperator);
2123     nextToken();
2124   }
2125 
2126   // Consume function name.
2127   if (FormatTok->is(tok::identifier))
2128     nextToken();
2129 
2130   if (FormatTok->isNot(tok::l_paren))
2131     return;
2132 
2133   // Parse formal parameter list.
2134   parseParens();
2135 
2136   if (FormatTok->is(tok::colon)) {
2137     // Parse a type definition.
2138     nextToken();
2139 
2140     // Eat the type declaration. For braced inline object types, balance braces,
2141     // otherwise just parse until finding an l_brace for the function body.
2142     if (FormatTok->is(tok::l_brace))
2143       tryToParseBracedList();
2144     else
2145       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2146         nextToken();
2147   }
2148 
2149   if (FormatTok->is(tok::semi))
2150     return;
2151 
2152   parseChildBlock();
2153 }
2154 
2155 bool UnwrappedLineParser::tryToParseBracedList() {
2156   if (FormatTok->is(BK_Unknown))
2157     calculateBraceTypes();
2158   assert(FormatTok->isNot(BK_Unknown));
2159   if (FormatTok->is(BK_Block))
2160     return false;
2161   nextToken();
2162   parseBracedList();
2163   return true;
2164 }
2165 
2166 bool UnwrappedLineParser::tryToParseChildBlock() {
2167   assert(Style.isJavaScript() || Style.isCSharp());
2168   assert(FormatTok->is(TT_FatArrow));
2169   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2170   // They always start an expression or a child block if followed by a curly
2171   // brace.
2172   nextToken();
2173   if (FormatTok->isNot(tok::l_brace))
2174     return false;
2175   parseChildBlock();
2176   return true;
2177 }
2178 
2179 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2180                                           bool IsEnum,
2181                                           tok::TokenKind ClosingBraceKind) {
2182   bool HasError = false;
2183 
2184   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2185   // replace this by using parseAssignmentExpression() inside.
2186   do {
2187     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2188         tryToParseChildBlock())
2189       continue;
2190     if (Style.isJavaScript()) {
2191       if (FormatTok->is(Keywords.kw_function) ||
2192           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2193         tryToParseJSFunction();
2194         continue;
2195       }
2196       if (FormatTok->is(tok::l_brace)) {
2197         // Could be a method inside of a braced list `{a() { return 1; }}`.
2198         if (tryToParseBracedList())
2199           continue;
2200         parseChildBlock();
2201       }
2202     }
2203     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2204       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2205         addUnwrappedLine();
2206       nextToken();
2207       return !HasError;
2208     }
2209     switch (FormatTok->Tok.getKind()) {
2210     case tok::l_square:
2211       if (Style.isCSharp())
2212         parseSquare();
2213       else
2214         tryToParseLambda();
2215       break;
2216     case tok::l_paren:
2217       parseParens();
2218       // JavaScript can just have free standing methods and getters/setters in
2219       // object literals. Detect them by a "{" following ")".
2220       if (Style.isJavaScript()) {
2221         if (FormatTok->is(tok::l_brace))
2222           parseChildBlock();
2223         break;
2224       }
2225       break;
2226     case tok::l_brace:
2227       // Assume there are no blocks inside a braced init list apart
2228       // from the ones we explicitly parse out (like lambdas).
2229       FormatTok->setBlockKind(BK_BracedInit);
2230       nextToken();
2231       parseBracedList();
2232       break;
2233     case tok::less:
2234       if (Style.Language == FormatStyle::LK_Proto ||
2235           ClosingBraceKind == tok::greater) {
2236         nextToken();
2237         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2238                         /*ClosingBraceKind=*/tok::greater);
2239       } else {
2240         nextToken();
2241       }
2242       break;
2243     case tok::semi:
2244       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2245       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2246       // used for error recovery if we have otherwise determined that this is
2247       // a braced list.
2248       if (Style.isJavaScript()) {
2249         nextToken();
2250         break;
2251       }
2252       HasError = true;
2253       if (!ContinueOnSemicolons)
2254         return !HasError;
2255       nextToken();
2256       break;
2257     case tok::comma:
2258       nextToken();
2259       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2260         addUnwrappedLine();
2261       break;
2262     default:
2263       nextToken();
2264       break;
2265     }
2266   } while (!eof());
2267   return false;
2268 }
2269 
2270 /// \brief Parses a pair of parentheses (and everything between them).
2271 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2272 /// double ampersands. This only counts for the current parens scope.
2273 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2274   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2275   nextToken();
2276   do {
2277     switch (FormatTok->Tok.getKind()) {
2278     case tok::l_paren:
2279       parseParens();
2280       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2281         parseChildBlock();
2282       break;
2283     case tok::r_paren:
2284       nextToken();
2285       return;
2286     case tok::r_brace:
2287       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2288       return;
2289     case tok::l_square:
2290       tryToParseLambda();
2291       break;
2292     case tok::l_brace:
2293       if (!tryToParseBracedList())
2294         parseChildBlock();
2295       break;
2296     case tok::at:
2297       nextToken();
2298       if (FormatTok->is(tok::l_brace)) {
2299         nextToken();
2300         parseBracedList();
2301       }
2302       break;
2303     case tok::equal:
2304       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2305         tryToParseChildBlock();
2306       else
2307         nextToken();
2308       break;
2309     case tok::kw_class:
2310       if (Style.isJavaScript())
2311         parseRecord(/*ParseAsExpr=*/true);
2312       else
2313         nextToken();
2314       break;
2315     case tok::identifier:
2316       if (Style.isJavaScript() &&
2317           (FormatTok->is(Keywords.kw_function) ||
2318            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2319         tryToParseJSFunction();
2320       else
2321         nextToken();
2322       break;
2323     case tok::kw_requires: {
2324       auto RequiresToken = FormatTok;
2325       nextToken();
2326       parseRequiresExpression(RequiresToken);
2327       break;
2328     }
2329     case tok::ampamp:
2330       if (AmpAmpTokenType != TT_Unknown)
2331         FormatTok->setFinalizedType(AmpAmpTokenType);
2332       LLVM_FALLTHROUGH;
2333     default:
2334       nextToken();
2335       break;
2336     }
2337   } while (!eof());
2338 }
2339 
2340 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2341   if (!LambdaIntroducer) {
2342     assert(FormatTok->is(tok::l_square) && "'[' expected.");
2343     if (tryToParseLambda())
2344       return;
2345   }
2346   do {
2347     switch (FormatTok->Tok.getKind()) {
2348     case tok::l_paren:
2349       parseParens();
2350       break;
2351     case tok::r_square:
2352       nextToken();
2353       return;
2354     case tok::r_brace:
2355       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2356       return;
2357     case tok::l_square:
2358       parseSquare();
2359       break;
2360     case tok::l_brace: {
2361       if (!tryToParseBracedList())
2362         parseChildBlock();
2363       break;
2364     }
2365     case tok::at:
2366       nextToken();
2367       if (FormatTok->is(tok::l_brace)) {
2368         nextToken();
2369         parseBracedList();
2370       }
2371       break;
2372     default:
2373       nextToken();
2374       break;
2375     }
2376   } while (!eof());
2377 }
2378 
2379 void UnwrappedLineParser::keepAncestorBraces() {
2380   if (!Style.RemoveBracesLLVM)
2381     return;
2382 
2383   const int MaxNestingLevels = 2;
2384   const int Size = NestedTooDeep.size();
2385   if (Size >= MaxNestingLevels)
2386     NestedTooDeep[Size - MaxNestingLevels] = true;
2387   NestedTooDeep.push_back(false);
2388 }
2389 
2390 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2391   for (const auto &Token : llvm::reverse(Line.Tokens))
2392     if (Token.Tok->isNot(tok::comment))
2393       return Token.Tok;
2394 
2395   return nullptr;
2396 }
2397 
2398 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2399   FormatToken *Tok = nullptr;
2400 
2401   if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2402       PreprocessorDirectives.empty()) {
2403     Tok = getLastNonComment(*Line);
2404     assert(Tok);
2405     if (Tok->BraceCount < 0) {
2406       assert(Tok->BraceCount == -1);
2407       Tok = nullptr;
2408     } else {
2409       Tok->BraceCount = -1;
2410     }
2411   }
2412 
2413   addUnwrappedLine();
2414   ++Line->Level;
2415   parseStructuralElement();
2416 
2417   if (Tok) {
2418     assert(!Line->InPPDirective);
2419     Tok = nullptr;
2420     for (const auto &L : llvm::reverse(*CurrentLines)) {
2421       if (!L.InPPDirective && getLastNonComment(L)) {
2422         Tok = L.Tokens.back().Tok;
2423         break;
2424       }
2425     }
2426     assert(Tok);
2427     ++Tok->BraceCount;
2428   }
2429 
2430   if (CheckEOF && FormatTok->is(tok::eof))
2431     addUnwrappedLine();
2432 
2433   --Line->Level;
2434 }
2435 
2436 static void markOptionalBraces(FormatToken *LeftBrace) {
2437   if (!LeftBrace)
2438     return;
2439 
2440   assert(LeftBrace->is(tok::l_brace));
2441 
2442   FormatToken *RightBrace = LeftBrace->MatchingParen;
2443   if (!RightBrace) {
2444     assert(!LeftBrace->Optional);
2445     return;
2446   }
2447 
2448   assert(RightBrace->is(tok::r_brace));
2449   assert(RightBrace->MatchingParen == LeftBrace);
2450   assert(LeftBrace->Optional == RightBrace->Optional);
2451 
2452   LeftBrace->Optional = true;
2453   RightBrace->Optional = true;
2454 }
2455 
2456 void UnwrappedLineParser::handleAttributes() {
2457   // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2458   if (FormatTok->is(TT_AttributeMacro))
2459     nextToken();
2460   handleCppAttributes();
2461 }
2462 
2463 bool UnwrappedLineParser::handleCppAttributes() {
2464   // Handle [[likely]] / [[unlikely]] attributes.
2465   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2466     parseSquare();
2467     return true;
2468   }
2469   return false;
2470 }
2471 
2472 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2473                                                   bool KeepBraces) {
2474   assert(FormatTok->is(tok::kw_if) && "'if' expected");
2475   nextToken();
2476   if (FormatTok->is(tok::exclaim))
2477     nextToken();
2478 
2479   bool KeepIfBraces = true;
2480   if (FormatTok->is(tok::kw_consteval)) {
2481     nextToken();
2482   } else {
2483     if (Style.RemoveBracesLLVM)
2484       KeepIfBraces = KeepBraces;
2485     if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2486       nextToken();
2487     if (FormatTok->is(tok::l_paren))
2488       parseParens();
2489   }
2490   handleAttributes();
2491 
2492   bool NeedsUnwrappedLine = false;
2493   keepAncestorBraces();
2494 
2495   FormatToken *IfLeftBrace = nullptr;
2496   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2497 
2498   if (FormatTok->is(tok::l_brace)) {
2499     FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2500     IfLeftBrace = FormatTok;
2501     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2502     IfBlockKind = parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2503                              /*MunchSemi=*/true, KeepIfBraces);
2504     if (Style.BraceWrapping.BeforeElse)
2505       addUnwrappedLine();
2506     else
2507       NeedsUnwrappedLine = true;
2508   } else {
2509     parseUnbracedBody();
2510   }
2511 
2512   if (Style.RemoveBracesLLVM) {
2513     assert(!NestedTooDeep.empty());
2514     KeepIfBraces = KeepIfBraces ||
2515                    (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2516                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2517                    IfBlockKind == IfStmtKind::IfElseIf;
2518   }
2519 
2520   bool KeepElseBraces = KeepIfBraces;
2521   FormatToken *ElseLeftBrace = nullptr;
2522   IfStmtKind Kind = IfStmtKind::IfOnly;
2523 
2524   if (FormatTok->is(tok::kw_else)) {
2525     if (Style.RemoveBracesLLVM) {
2526       NestedTooDeep.back() = false;
2527       Kind = IfStmtKind::IfElse;
2528     }
2529     nextToken();
2530     handleAttributes();
2531     if (FormatTok->is(tok::l_brace)) {
2532       FormatTok->setFinalizedType(TT_ElseLBrace);
2533       ElseLeftBrace = FormatTok;
2534       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2535       if (parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2536                      /*MunchSemi=*/true, KeepElseBraces) == IfStmtKind::IfOnly)
2537         Kind = IfStmtKind::IfElseIf;
2538       addUnwrappedLine();
2539     } else if (FormatTok->is(tok::kw_if)) {
2540       const FormatToken *Previous = Tokens->getPreviousToken();
2541       assert(Previous);
2542       const bool IsPrecededByComment = Previous->is(tok::comment);
2543       if (IsPrecededByComment) {
2544         addUnwrappedLine();
2545         ++Line->Level;
2546       }
2547       bool TooDeep = true;
2548       if (Style.RemoveBracesLLVM) {
2549         Kind = IfStmtKind::IfElseIf;
2550         TooDeep = NestedTooDeep.pop_back_val();
2551       }
2552       ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2553       if (Style.RemoveBracesLLVM)
2554         NestedTooDeep.push_back(TooDeep);
2555       if (IsPrecededByComment)
2556         --Line->Level;
2557     } else {
2558       parseUnbracedBody(/*CheckEOF=*/true);
2559     }
2560   } else {
2561     if (Style.RemoveBracesLLVM)
2562       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2563     if (NeedsUnwrappedLine)
2564       addUnwrappedLine();
2565   }
2566 
2567   if (!Style.RemoveBracesLLVM)
2568     return nullptr;
2569 
2570   assert(!NestedTooDeep.empty());
2571   KeepElseBraces = KeepElseBraces ||
2572                    (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2573                    NestedTooDeep.back();
2574 
2575   NestedTooDeep.pop_back();
2576 
2577   if (!KeepIfBraces && !KeepElseBraces) {
2578     markOptionalBraces(IfLeftBrace);
2579     markOptionalBraces(ElseLeftBrace);
2580   } else if (IfLeftBrace) {
2581     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2582     if (IfRightBrace) {
2583       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2584       assert(!IfLeftBrace->Optional);
2585       assert(!IfRightBrace->Optional);
2586       IfLeftBrace->MatchingParen = nullptr;
2587       IfRightBrace->MatchingParen = nullptr;
2588     }
2589   }
2590 
2591   if (IfKind)
2592     *IfKind = Kind;
2593 
2594   return IfLeftBrace;
2595 }
2596 
2597 void UnwrappedLineParser::parseTryCatch() {
2598   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2599   nextToken();
2600   bool NeedsUnwrappedLine = false;
2601   if (FormatTok->is(tok::colon)) {
2602     // We are in a function try block, what comes is an initializer list.
2603     nextToken();
2604 
2605     // In case identifiers were removed by clang-tidy, what might follow is
2606     // multiple commas in sequence - before the first identifier.
2607     while (FormatTok->is(tok::comma))
2608       nextToken();
2609 
2610     while (FormatTok->is(tok::identifier)) {
2611       nextToken();
2612       if (FormatTok->is(tok::l_paren))
2613         parseParens();
2614       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2615           FormatTok->is(tok::l_brace)) {
2616         do {
2617           nextToken();
2618         } while (!FormatTok->is(tok::r_brace));
2619         nextToken();
2620       }
2621 
2622       // In case identifiers were removed by clang-tidy, what might follow is
2623       // multiple commas in sequence - after the first identifier.
2624       while (FormatTok->is(tok::comma))
2625         nextToken();
2626     }
2627   }
2628   // Parse try with resource.
2629   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2630     parseParens();
2631 
2632   keepAncestorBraces();
2633 
2634   if (FormatTok->is(tok::l_brace)) {
2635     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2636     parseBlock();
2637     if (Style.BraceWrapping.BeforeCatch)
2638       addUnwrappedLine();
2639     else
2640       NeedsUnwrappedLine = true;
2641   } else if (!FormatTok->is(tok::kw_catch)) {
2642     // The C++ standard requires a compound-statement after a try.
2643     // If there's none, we try to assume there's a structuralElement
2644     // and try to continue.
2645     addUnwrappedLine();
2646     ++Line->Level;
2647     parseStructuralElement();
2648     --Line->Level;
2649   }
2650   while (true) {
2651     if (FormatTok->is(tok::at))
2652       nextToken();
2653     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2654                              tok::kw___finally) ||
2655           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2656            FormatTok->is(Keywords.kw_finally)) ||
2657           (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2658            FormatTok->isObjCAtKeyword(tok::objc_finally))))
2659       break;
2660     nextToken();
2661     while (FormatTok->isNot(tok::l_brace)) {
2662       if (FormatTok->is(tok::l_paren)) {
2663         parseParens();
2664         continue;
2665       }
2666       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2667         if (Style.RemoveBracesLLVM)
2668           NestedTooDeep.pop_back();
2669         return;
2670       }
2671       nextToken();
2672     }
2673     NeedsUnwrappedLine = false;
2674     Line->MustBeDeclaration = false;
2675     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2676     parseBlock();
2677     if (Style.BraceWrapping.BeforeCatch)
2678       addUnwrappedLine();
2679     else
2680       NeedsUnwrappedLine = true;
2681   }
2682 
2683   if (Style.RemoveBracesLLVM)
2684     NestedTooDeep.pop_back();
2685 
2686   if (NeedsUnwrappedLine)
2687     addUnwrappedLine();
2688 }
2689 
2690 void UnwrappedLineParser::parseNamespace() {
2691   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2692          "'namespace' expected");
2693 
2694   const FormatToken &InitialToken = *FormatTok;
2695   nextToken();
2696   if (InitialToken.is(TT_NamespaceMacro)) {
2697     parseParens();
2698   } else {
2699     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2700                               tok::l_square, tok::period, tok::l_paren) ||
2701            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2702       if (FormatTok->is(tok::l_square))
2703         parseSquare();
2704       else if (FormatTok->is(tok::l_paren))
2705         parseParens();
2706       else
2707         nextToken();
2708   }
2709   if (FormatTok->is(tok::l_brace)) {
2710     if (ShouldBreakBeforeBrace(Style, InitialToken))
2711       addUnwrappedLine();
2712 
2713     unsigned AddLevels =
2714         Style.NamespaceIndentation == FormatStyle::NI_All ||
2715                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2716                  DeclarationScopeStack.size() > 1)
2717             ? 1u
2718             : 0u;
2719     bool ManageWhitesmithsBraces =
2720         AddLevels == 0u &&
2721         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2722 
2723     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2724     // the whole block.
2725     if (ManageWhitesmithsBraces)
2726       ++Line->Level;
2727 
2728     parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2729                /*KeepBraces=*/true, ManageWhitesmithsBraces);
2730 
2731     // Munch the semicolon after a namespace. This is more common than one would
2732     // think. Putting the semicolon into its own line is very ugly.
2733     if (FormatTok->is(tok::semi))
2734       nextToken();
2735 
2736     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2737 
2738     if (ManageWhitesmithsBraces)
2739       --Line->Level;
2740   }
2741   // FIXME: Add error handling.
2742 }
2743 
2744 void UnwrappedLineParser::parseNew() {
2745   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2746   nextToken();
2747 
2748   if (Style.isCSharp()) {
2749     do {
2750       if (FormatTok->is(tok::l_brace))
2751         parseBracedList();
2752 
2753       if (FormatTok->isOneOf(tok::semi, tok::comma))
2754         return;
2755 
2756       nextToken();
2757     } while (!eof());
2758   }
2759 
2760   if (Style.Language != FormatStyle::LK_Java)
2761     return;
2762 
2763   // In Java, we can parse everything up to the parens, which aren't optional.
2764   do {
2765     // There should not be a ;, { or } before the new's open paren.
2766     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2767       return;
2768 
2769     // Consume the parens.
2770     if (FormatTok->is(tok::l_paren)) {
2771       parseParens();
2772 
2773       // If there is a class body of an anonymous class, consume that as child.
2774       if (FormatTok->is(tok::l_brace))
2775         parseChildBlock();
2776       return;
2777     }
2778     nextToken();
2779   } while (!eof());
2780 }
2781 
2782 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
2783   keepAncestorBraces();
2784 
2785   if (FormatTok->is(tok::l_brace)) {
2786     if (!KeepBraces)
2787       FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2788     FormatToken *LeftBrace = FormatTok;
2789     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2790     parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2791                /*MunchSemi=*/true, KeepBraces);
2792     if (!KeepBraces) {
2793       assert(!NestedTooDeep.empty());
2794       if (!NestedTooDeep.back())
2795         markOptionalBraces(LeftBrace);
2796     }
2797     if (WrapRightBrace)
2798       addUnwrappedLine();
2799   } else {
2800     parseUnbracedBody();
2801   }
2802 
2803   if (!KeepBraces)
2804     NestedTooDeep.pop_back();
2805 }
2806 
2807 void UnwrappedLineParser::parseForOrWhileLoop() {
2808   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2809          "'for', 'while' or foreach macro expected");
2810   const bool KeepBraces = !Style.RemoveBracesLLVM ||
2811                           !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
2812 
2813   nextToken();
2814   // JS' for await ( ...
2815   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2816     nextToken();
2817   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2818     nextToken();
2819   if (FormatTok->is(tok::l_paren))
2820     parseParens();
2821 
2822   parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
2823 }
2824 
2825 void UnwrappedLineParser::parseDoWhile() {
2826   assert(FormatTok->is(tok::kw_do) && "'do' expected");
2827   nextToken();
2828 
2829   parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
2830 
2831   // FIXME: Add error handling.
2832   if (!FormatTok->is(tok::kw_while)) {
2833     addUnwrappedLine();
2834     return;
2835   }
2836 
2837   // If in Whitesmiths mode, the line with the while() needs to be indented
2838   // to the same level as the block.
2839   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2840     ++Line->Level;
2841 
2842   nextToken();
2843   parseStructuralElement();
2844 }
2845 
2846 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2847   nextToken();
2848   unsigned OldLineLevel = Line->Level;
2849   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2850     --Line->Level;
2851   if (LeftAlignLabel)
2852     Line->Level = 0;
2853 
2854   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2855       FormatTok->is(tok::l_brace)) {
2856 
2857     CompoundStatementIndenter Indenter(this, Line->Level,
2858                                        Style.BraceWrapping.AfterCaseLabel,
2859                                        Style.BraceWrapping.IndentBraces);
2860     parseBlock();
2861     if (FormatTok->is(tok::kw_break)) {
2862       if (Style.BraceWrapping.AfterControlStatement ==
2863           FormatStyle::BWACS_Always) {
2864         addUnwrappedLine();
2865         if (!Style.IndentCaseBlocks &&
2866             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2867           ++Line->Level;
2868       }
2869       parseStructuralElement();
2870     }
2871     addUnwrappedLine();
2872   } else {
2873     if (FormatTok->is(tok::semi))
2874       nextToken();
2875     addUnwrappedLine();
2876   }
2877   Line->Level = OldLineLevel;
2878   if (FormatTok->isNot(tok::l_brace)) {
2879     parseStructuralElement();
2880     addUnwrappedLine();
2881   }
2882 }
2883 
2884 void UnwrappedLineParser::parseCaseLabel() {
2885   assert(FormatTok->is(tok::kw_case) && "'case' expected");
2886 
2887   // FIXME: fix handling of complex expressions here.
2888   do {
2889     nextToken();
2890   } while (!eof() && !FormatTok->is(tok::colon));
2891   parseLabel();
2892 }
2893 
2894 void UnwrappedLineParser::parseSwitch() {
2895   assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
2896   nextToken();
2897   if (FormatTok->is(tok::l_paren))
2898     parseParens();
2899 
2900   keepAncestorBraces();
2901 
2902   if (FormatTok->is(tok::l_brace)) {
2903     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2904     parseBlock();
2905     addUnwrappedLine();
2906   } else {
2907     addUnwrappedLine();
2908     ++Line->Level;
2909     parseStructuralElement();
2910     --Line->Level;
2911   }
2912 
2913   if (Style.RemoveBracesLLVM)
2914     NestedTooDeep.pop_back();
2915 }
2916 
2917 // Operators that can follow a C variable.
2918 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
2919   switch (kind) {
2920   case tok::ampamp:
2921   case tok::ampequal:
2922   case tok::arrow:
2923   case tok::caret:
2924   case tok::caretequal:
2925   case tok::comma:
2926   case tok::ellipsis:
2927   case tok::equal:
2928   case tok::equalequal:
2929   case tok::exclaim:
2930   case tok::exclaimequal:
2931   case tok::greater:
2932   case tok::greaterequal:
2933   case tok::greatergreater:
2934   case tok::greatergreaterequal:
2935   case tok::l_paren:
2936   case tok::l_square:
2937   case tok::less:
2938   case tok::lessequal:
2939   case tok::lessless:
2940   case tok::lesslessequal:
2941   case tok::minus:
2942   case tok::minusequal:
2943   case tok::minusminus:
2944   case tok::percent:
2945   case tok::percentequal:
2946   case tok::period:
2947   case tok::pipe:
2948   case tok::pipeequal:
2949   case tok::pipepipe:
2950   case tok::plus:
2951   case tok::plusequal:
2952   case tok::plusplus:
2953   case tok::question:
2954   case tok::r_brace:
2955   case tok::r_paren:
2956   case tok::r_square:
2957   case tok::semi:
2958   case tok::slash:
2959   case tok::slashequal:
2960   case tok::star:
2961   case tok::starequal:
2962     return true;
2963   default:
2964     return false;
2965   }
2966 }
2967 
2968 void UnwrappedLineParser::parseAccessSpecifier() {
2969   FormatToken *AccessSpecifierCandidate = FormatTok;
2970   nextToken();
2971   // Understand Qt's slots.
2972   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2973     nextToken();
2974   // Otherwise, we don't know what it is, and we'd better keep the next token.
2975   if (FormatTok->is(tok::colon)) {
2976     nextToken();
2977     addUnwrappedLine();
2978   } else if (!FormatTok->is(tok::coloncolon) &&
2979              !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
2980     // Not a variable name nor namespace name.
2981     addUnwrappedLine();
2982   } else if (AccessSpecifierCandidate) {
2983     // Consider the access specifier to be a C identifier.
2984     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2985   }
2986 }
2987 
2988 /// \brief Parses a concept definition.
2989 /// \pre The current token has to be the concept keyword.
2990 ///
2991 /// Returns if either the concept has been completely parsed, or if it detects
2992 /// that the concept definition is incorrect.
2993 void UnwrappedLineParser::parseConcept() {
2994   assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
2995   nextToken();
2996   if (!FormatTok->is(tok::identifier))
2997     return;
2998   nextToken();
2999   if (!FormatTok->is(tok::equal))
3000     return;
3001   nextToken();
3002   parseConstraintExpression();
3003   if (FormatTok->is(tok::semi))
3004     nextToken();
3005   addUnwrappedLine();
3006 }
3007 
3008 /// \brief Parses a requires, decides if it is a clause or an expression.
3009 /// \pre The current token has to be the requires keyword.
3010 /// \returns true if it parsed a clause.
3011 bool clang::format::UnwrappedLineParser::parseRequires() {
3012   assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3013   auto RequiresToken = FormatTok;
3014 
3015   // We try to guess if it is a requires clause, or a requires expression. For
3016   // that we first consume the keyword and check the next token.
3017   nextToken();
3018 
3019   switch (FormatTok->Tok.getKind()) {
3020   case tok::l_brace:
3021     // This can only be an expression, never a clause.
3022     parseRequiresExpression(RequiresToken);
3023     return false;
3024   case tok::l_paren:
3025     // Clauses and expression can start with a paren, it's unclear what we have.
3026     break;
3027   default:
3028     // All other tokens can only be a clause.
3029     parseRequiresClause(RequiresToken);
3030     return true;
3031   }
3032 
3033   // Looking forward we would have to decide if there are function declaration
3034   // like arguments to the requires expression:
3035   // requires (T t) {
3036   // Or there is a constraint expression for the requires clause:
3037   // requires (C<T> && ...
3038 
3039   // But first let's look behind.
3040   auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3041 
3042   if (!PreviousNonComment ||
3043       PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3044     // If there is no token, or an expression left brace, we are a requires
3045     // clause within a requires expression.
3046     parseRequiresClause(RequiresToken);
3047     return true;
3048   }
3049 
3050   switch (PreviousNonComment->Tok.getKind()) {
3051   case tok::greater:
3052   case tok::r_paren:
3053   case tok::kw_noexcept:
3054   case tok::kw_const:
3055     // This is a requires clause.
3056     parseRequiresClause(RequiresToken);
3057     return true;
3058   case tok::amp:
3059   case tok::ampamp: {
3060     // This can be either:
3061     // if (... && requires (T t) ...)
3062     // Or
3063     // void member(...) && requires (C<T> ...
3064     // We check the one token before that for a const:
3065     // void member(...) const && requires (C<T> ...
3066     auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3067     if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3068       parseRequiresClause(RequiresToken);
3069       return true;
3070     }
3071     break;
3072   }
3073   default:
3074     // It's an expression.
3075     parseRequiresExpression(RequiresToken);
3076     return false;
3077   }
3078 
3079   // Now we look forward and try to check if the paren content is a parameter
3080   // list. The parameters can be cv-qualified and contain references or
3081   // pointers.
3082   // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3083   // of stuff: typename, const, *, &, &&, ::, identifiers.
3084 
3085   int NextTokenOffset = 1;
3086   auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3087   auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3088     ++NextTokenOffset;
3089     NextToken = Tokens->peekNextToken(NextTokenOffset);
3090   };
3091 
3092   bool FoundType = false;
3093   bool LastWasColonColon = false;
3094   int OpenAngles = 0;
3095 
3096   for (; NextTokenOffset < 50; PeekNext()) {
3097     switch (NextToken->Tok.getKind()) {
3098     case tok::kw_volatile:
3099     case tok::kw_const:
3100     case tok::comma:
3101       parseRequiresExpression(RequiresToken);
3102       return false;
3103     case tok::r_paren:
3104     case tok::pipepipe:
3105       parseRequiresClause(RequiresToken);
3106       return true;
3107     case tok::eof:
3108       // Break out of the loop.
3109       NextTokenOffset = 50;
3110       break;
3111     case tok::coloncolon:
3112       LastWasColonColon = true;
3113       break;
3114     case tok::identifier:
3115       if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3116         parseRequiresExpression(RequiresToken);
3117         return false;
3118       }
3119       FoundType = true;
3120       LastWasColonColon = false;
3121       break;
3122     case tok::less:
3123       ++OpenAngles;
3124       break;
3125     case tok::greater:
3126       --OpenAngles;
3127       break;
3128     default:
3129       if (NextToken->isSimpleTypeSpecifier()) {
3130         parseRequiresExpression(RequiresToken);
3131         return false;
3132       }
3133       break;
3134     }
3135   }
3136 
3137   // This seems to be a complicated expression, just assume it's a clause.
3138   parseRequiresClause(RequiresToken);
3139   return true;
3140 }
3141 
3142 /// \brief Parses a requires clause.
3143 /// \param RequiresToken The requires keyword token, which starts this clause.
3144 /// \pre We need to be on the next token after the requires keyword.
3145 /// \sa parseRequiresExpression
3146 ///
3147 /// Returns if it either has finished parsing the clause, or it detects, that
3148 /// the clause is incorrect.
3149 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3150   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3151   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3152 
3153   // If there is no previous token, we are within a requires expression,
3154   // otherwise we will always have the template or function declaration in front
3155   // of it.
3156   bool InRequiresExpression =
3157       !RequiresToken->Previous ||
3158       RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3159 
3160   RequiresToken->setFinalizedType(InRequiresExpression
3161                                       ? TT_RequiresClauseInARequiresExpression
3162                                       : TT_RequiresClause);
3163 
3164   parseConstraintExpression();
3165 
3166   if (!InRequiresExpression)
3167     FormatTok->Previous->ClosesRequiresClause = true;
3168 }
3169 
3170 /// \brief Parses a requires expression.
3171 /// \param RequiresToken The requires keyword token, which starts this clause.
3172 /// \pre We need to be on the next token after the requires keyword.
3173 /// \sa parseRequiresClause
3174 ///
3175 /// Returns if it either has finished parsing the expression, or it detects,
3176 /// that the expression is incorrect.
3177 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3178   assert(FormatTok->getPreviousNonComment() == RequiresToken);
3179   assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3180 
3181   RequiresToken->setFinalizedType(TT_RequiresExpression);
3182 
3183   if (FormatTok->is(tok::l_paren)) {
3184     FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3185     parseParens();
3186   }
3187 
3188   if (FormatTok->is(tok::l_brace)) {
3189     FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3190     parseChildBlock(/*CanContainBracedList=*/false,
3191                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3192   }
3193 }
3194 
3195 /// \brief Parses a constraint expression.
3196 ///
3197 /// This is either the definition of a concept, or the body of a requires
3198 /// clause. It returns, when the parsing is complete, or the expression is
3199 /// incorrect.
3200 void UnwrappedLineParser::parseConstraintExpression() {
3201   // The special handling for lambdas is needed since tryToParseLambda() eats a
3202   // token and if a requires expression is the last part of a requires clause
3203   // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3204   // not set on the correct token. Thus we need to be aware if we even expect a
3205   // lambda to be possible.
3206   // template <typename T> requires requires { ... } [[nodiscard]] ...;
3207   bool LambdaNextTimeAllowed = true;
3208   do {
3209     bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3210 
3211     switch (FormatTok->Tok.getKind()) {
3212     case tok::kw_requires: {
3213       auto RequiresToken = FormatTok;
3214       nextToken();
3215       parseRequiresExpression(RequiresToken);
3216       break;
3217     }
3218 
3219     case tok::l_paren:
3220       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3221       break;
3222 
3223     case tok::l_square:
3224       if (!LambdaThisTimeAllowed || !tryToParseLambda())
3225         return;
3226       break;
3227 
3228     case tok::kw_const:
3229     case tok::semi:
3230     case tok::kw_class:
3231     case tok::kw_struct:
3232     case tok::kw_union:
3233       return;
3234 
3235     case tok::l_brace:
3236       // Potential function body.
3237       return;
3238 
3239     case tok::ampamp:
3240     case tok::pipepipe:
3241       FormatTok->setFinalizedType(TT_BinaryOperator);
3242       nextToken();
3243       LambdaNextTimeAllowed = true;
3244       break;
3245 
3246     case tok::comma:
3247     case tok::comment:
3248       LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3249       nextToken();
3250       break;
3251 
3252     case tok::kw_sizeof:
3253     case tok::greater:
3254     case tok::greaterequal:
3255     case tok::greatergreater:
3256     case tok::less:
3257     case tok::lessequal:
3258     case tok::lessless:
3259     case tok::equalequal:
3260     case tok::exclaim:
3261     case tok::exclaimequal:
3262     case tok::plus:
3263     case tok::minus:
3264     case tok::star:
3265     case tok::slash:
3266     case tok::kw_decltype:
3267       LambdaNextTimeAllowed = true;
3268       // Just eat them.
3269       nextToken();
3270       break;
3271 
3272     case tok::numeric_constant:
3273     case tok::coloncolon:
3274     case tok::kw_true:
3275     case tok::kw_false:
3276       // Just eat them.
3277       nextToken();
3278       break;
3279 
3280     case tok::kw_static_cast:
3281     case tok::kw_const_cast:
3282     case tok::kw_reinterpret_cast:
3283     case tok::kw_dynamic_cast:
3284       nextToken();
3285       if (!FormatTok->is(tok::less))
3286         return;
3287 
3288       nextToken();
3289       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3290                       /*ClosingBraceKind=*/tok::greater);
3291       break;
3292 
3293     case tok::kw_bool:
3294       // bool is only allowed if it is directly followed by a paren for a cast:
3295       // concept C = bool(...);
3296       // and bool is the only type, all other types as cast must be inside a
3297       // cast to bool an thus are handled by the other cases.
3298       nextToken();
3299       if (FormatTok->isNot(tok::l_paren))
3300         return;
3301       parseParens();
3302       break;
3303 
3304     default:
3305       if (!FormatTok->Tok.getIdentifierInfo()) {
3306         // Identifiers are part of the default case, we check for more then
3307         // tok::identifier to handle builtin type traits.
3308         return;
3309       }
3310 
3311       // We need to differentiate identifiers for a template deduction guide,
3312       // variables, or function return types (the constraint expression has
3313       // ended before that), and basically all other cases. But it's easier to
3314       // check the other way around.
3315       assert(FormatTok->Previous);
3316       switch (FormatTok->Previous->Tok.getKind()) {
3317       case tok::coloncolon:  // Nested identifier.
3318       case tok::ampamp:      // Start of a function or variable for the
3319       case tok::pipepipe:    // constraint expression.
3320       case tok::kw_requires: // Initial identifier of a requires clause.
3321       case tok::equal:       // Initial identifier of a concept declaration.
3322         break;
3323       default:
3324         return;
3325       }
3326 
3327       // Read identifier with optional template declaration.
3328       nextToken();
3329       if (FormatTok->is(tok::less)) {
3330         nextToken();
3331         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3332                         /*ClosingBraceKind=*/tok::greater);
3333       }
3334       break;
3335     }
3336   } while (!eof());
3337 }
3338 
3339 bool UnwrappedLineParser::parseEnum() {
3340   const FormatToken &InitialToken = *FormatTok;
3341 
3342   // Won't be 'enum' for NS_ENUMs.
3343   if (FormatTok->is(tok::kw_enum))
3344     nextToken();
3345 
3346   // In TypeScript, "enum" can also be used as property name, e.g. in interface
3347   // declarations. An "enum" keyword followed by a colon would be a syntax
3348   // error and thus assume it is just an identifier.
3349   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3350     return false;
3351 
3352   // In protobuf, "enum" can be used as a field name.
3353   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3354     return false;
3355 
3356   // Eat up enum class ...
3357   if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3358     nextToken();
3359 
3360   while (FormatTok->Tok.getIdentifierInfo() ||
3361          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3362                             tok::greater, tok::comma, tok::question)) {
3363     nextToken();
3364     // We can have macros or attributes in between 'enum' and the enum name.
3365     if (FormatTok->is(tok::l_paren))
3366       parseParens();
3367     if (FormatTok->is(tok::identifier)) {
3368       nextToken();
3369       // If there are two identifiers in a row, this is likely an elaborate
3370       // return type. In Java, this can be "implements", etc.
3371       if (Style.isCpp() && FormatTok->is(tok::identifier))
3372         return false;
3373     }
3374   }
3375 
3376   // Just a declaration or something is wrong.
3377   if (FormatTok->isNot(tok::l_brace))
3378     return true;
3379   FormatTok->setFinalizedType(TT_EnumLBrace);
3380   FormatTok->setBlockKind(BK_Block);
3381 
3382   if (Style.Language == FormatStyle::LK_Java) {
3383     // Java enums are different.
3384     parseJavaEnumBody();
3385     return true;
3386   }
3387   if (Style.Language == FormatStyle::LK_Proto) {
3388     parseBlock(/*MustBeDeclaration=*/true);
3389     return true;
3390   }
3391 
3392   if (!Style.AllowShortEnumsOnASingleLine &&
3393       ShouldBreakBeforeBrace(Style, InitialToken))
3394     addUnwrappedLine();
3395   // Parse enum body.
3396   nextToken();
3397   if (!Style.AllowShortEnumsOnASingleLine) {
3398     addUnwrappedLine();
3399     Line->Level += 1;
3400   }
3401   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3402                                    /*IsEnum=*/true);
3403   if (!Style.AllowShortEnumsOnASingleLine)
3404     Line->Level -= 1;
3405   if (HasError) {
3406     if (FormatTok->is(tok::semi))
3407       nextToken();
3408     addUnwrappedLine();
3409   }
3410   return true;
3411 
3412   // There is no addUnwrappedLine() here so that we fall through to parsing a
3413   // structural element afterwards. Thus, in "enum A {} n, m;",
3414   // "} n, m;" will end up in one unwrapped line.
3415 }
3416 
3417 bool UnwrappedLineParser::parseStructLike() {
3418   // parseRecord falls through and does not yet add an unwrapped line as a
3419   // record declaration or definition can start a structural element.
3420   parseRecord();
3421   // This does not apply to Java, JavaScript and C#.
3422   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3423       Style.isCSharp()) {
3424     if (FormatTok->is(tok::semi))
3425       nextToken();
3426     addUnwrappedLine();
3427     return true;
3428   }
3429   return false;
3430 }
3431 
3432 namespace {
3433 // A class used to set and restore the Token position when peeking
3434 // ahead in the token source.
3435 class ScopedTokenPosition {
3436   unsigned StoredPosition;
3437   FormatTokenSource *Tokens;
3438 
3439 public:
3440   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3441     assert(Tokens && "Tokens expected to not be null");
3442     StoredPosition = Tokens->getPosition();
3443   }
3444 
3445   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3446 };
3447 } // namespace
3448 
3449 // Look to see if we have [[ by looking ahead, if
3450 // its not then rewind to the original position.
3451 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3452   ScopedTokenPosition AutoPosition(Tokens);
3453   FormatToken *Tok = Tokens->getNextToken();
3454   // We already read the first [ check for the second.
3455   if (!Tok->is(tok::l_square))
3456     return false;
3457   // Double check that the attribute is just something
3458   // fairly simple.
3459   while (Tok->isNot(tok::eof)) {
3460     if (Tok->is(tok::r_square))
3461       break;
3462     Tok = Tokens->getNextToken();
3463   }
3464   if (Tok->is(tok::eof))
3465     return false;
3466   Tok = Tokens->getNextToken();
3467   if (!Tok->is(tok::r_square))
3468     return false;
3469   Tok = Tokens->getNextToken();
3470   if (Tok->is(tok::semi))
3471     return false;
3472   return true;
3473 }
3474 
3475 void UnwrappedLineParser::parseJavaEnumBody() {
3476   assert(FormatTok->is(tok::l_brace));
3477   const FormatToken *OpeningBrace = FormatTok;
3478 
3479   // Determine whether the enum is simple, i.e. does not have a semicolon or
3480   // constants with class bodies. Simple enums can be formatted like braced
3481   // lists, contracted to a single line, etc.
3482   unsigned StoredPosition = Tokens->getPosition();
3483   bool IsSimple = true;
3484   FormatToken *Tok = Tokens->getNextToken();
3485   while (!Tok->is(tok::eof)) {
3486     if (Tok->is(tok::r_brace))
3487       break;
3488     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3489       IsSimple = false;
3490       break;
3491     }
3492     // FIXME: This will also mark enums with braces in the arguments to enum
3493     // constants as "not simple". This is probably fine in practice, though.
3494     Tok = Tokens->getNextToken();
3495   }
3496   FormatTok = Tokens->setPosition(StoredPosition);
3497 
3498   if (IsSimple) {
3499     nextToken();
3500     parseBracedList();
3501     addUnwrappedLine();
3502     return;
3503   }
3504 
3505   // Parse the body of a more complex enum.
3506   // First add a line for everything up to the "{".
3507   nextToken();
3508   addUnwrappedLine();
3509   ++Line->Level;
3510 
3511   // Parse the enum constants.
3512   while (FormatTok) {
3513     if (FormatTok->is(tok::l_brace)) {
3514       // Parse the constant's class body.
3515       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3516                  /*MunchSemi=*/false);
3517     } else if (FormatTok->is(tok::l_paren)) {
3518       parseParens();
3519     } else if (FormatTok->is(tok::comma)) {
3520       nextToken();
3521       addUnwrappedLine();
3522     } else if (FormatTok->is(tok::semi)) {
3523       nextToken();
3524       addUnwrappedLine();
3525       break;
3526     } else if (FormatTok->is(tok::r_brace)) {
3527       addUnwrappedLine();
3528       break;
3529     } else {
3530       nextToken();
3531     }
3532   }
3533 
3534   // Parse the class body after the enum's ";" if any.
3535   parseLevel(OpeningBrace, /*CanContainBracedList=*/true);
3536   nextToken();
3537   --Line->Level;
3538   addUnwrappedLine();
3539 }
3540 
3541 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3542   const FormatToken &InitialToken = *FormatTok;
3543   nextToken();
3544 
3545   // The actual identifier can be a nested name specifier, and in macros
3546   // it is often token-pasted.
3547   // An [[attribute]] can be before the identifier.
3548   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3549                             tok::kw___attribute, tok::kw___declspec,
3550                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3551          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3552           FormatTok->isOneOf(tok::period, tok::comma))) {
3553     if (Style.isJavaScript() &&
3554         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3555       // JavaScript/TypeScript supports inline object types in
3556       // extends/implements positions:
3557       //     class Foo implements {bar: number} { }
3558       nextToken();
3559       if (FormatTok->is(tok::l_brace)) {
3560         tryToParseBracedList();
3561         continue;
3562       }
3563     }
3564     bool IsNonMacroIdentifier =
3565         FormatTok->is(tok::identifier) &&
3566         FormatTok->TokenText != FormatTok->TokenText.upper();
3567     nextToken();
3568     // We can have macros or attributes in between 'class' and the class name.
3569     if (!IsNonMacroIdentifier) {
3570       if (FormatTok->is(tok::l_paren)) {
3571         parseParens();
3572       } else if (FormatTok->is(TT_AttributeSquare)) {
3573         parseSquare();
3574         // Consume the closing TT_AttributeSquare.
3575         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3576           nextToken();
3577       }
3578     }
3579   }
3580 
3581   // Note that parsing away template declarations here leads to incorrectly
3582   // accepting function declarations as record declarations.
3583   // In general, we cannot solve this problem. Consider:
3584   // class A<int> B() {}
3585   // which can be a function definition or a class definition when B() is a
3586   // macro. If we find enough real-world cases where this is a problem, we
3587   // can parse for the 'template' keyword in the beginning of the statement,
3588   // and thus rule out the record production in case there is no template
3589   // (this would still leave us with an ambiguity between template function
3590   // and class declarations).
3591   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3592     do {
3593       if (FormatTok->is(tok::l_brace)) {
3594         calculateBraceTypes(/*ExpectClassBody=*/true);
3595         if (!tryToParseBracedList())
3596           break;
3597       }
3598       if (FormatTok->is(tok::l_square)) {
3599         FormatToken *Previous = FormatTok->Previous;
3600         if (!Previous ||
3601             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3602           // Don't try parsing a lambda if we had a closing parenthesis before,
3603           // it was probably a pointer to an array: int (*)[].
3604           if (!tryToParseLambda())
3605             break;
3606         } else {
3607           parseSquare();
3608           continue;
3609         }
3610       }
3611       if (FormatTok->is(tok::semi))
3612         return;
3613       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3614         addUnwrappedLine();
3615         nextToken();
3616         parseCSharpGenericTypeConstraint();
3617         break;
3618       }
3619       nextToken();
3620     } while (!eof());
3621   }
3622 
3623   auto GetBraceType = [](const FormatToken &RecordTok) {
3624     switch (RecordTok.Tok.getKind()) {
3625     case tok::kw_class:
3626       return TT_ClassLBrace;
3627     case tok::kw_struct:
3628       return TT_StructLBrace;
3629     case tok::kw_union:
3630       return TT_UnionLBrace;
3631     default:
3632       // Useful for e.g. interface.
3633       return TT_RecordLBrace;
3634     }
3635   };
3636   if (FormatTok->is(tok::l_brace)) {
3637     FormatTok->setFinalizedType(GetBraceType(InitialToken));
3638     if (ParseAsExpr) {
3639       parseChildBlock();
3640     } else {
3641       if (ShouldBreakBeforeBrace(Style, InitialToken))
3642         addUnwrappedLine();
3643 
3644       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3645       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3646     }
3647   }
3648   // There is no addUnwrappedLine() here so that we fall through to parsing a
3649   // structural element afterwards. Thus, in "class A {} n, m;",
3650   // "} n, m;" will end up in one unwrapped line.
3651 }
3652 
3653 void UnwrappedLineParser::parseObjCMethod() {
3654   assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3655          "'(' or identifier expected.");
3656   do {
3657     if (FormatTok->is(tok::semi)) {
3658       nextToken();
3659       addUnwrappedLine();
3660       return;
3661     } else if (FormatTok->is(tok::l_brace)) {
3662       if (Style.BraceWrapping.AfterFunction)
3663         addUnwrappedLine();
3664       parseBlock();
3665       addUnwrappedLine();
3666       return;
3667     } else {
3668       nextToken();
3669     }
3670   } while (!eof());
3671 }
3672 
3673 void UnwrappedLineParser::parseObjCProtocolList() {
3674   assert(FormatTok->is(tok::less) && "'<' expected.");
3675   do {
3676     nextToken();
3677     // Early exit in case someone forgot a close angle.
3678     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3679         FormatTok->isObjCAtKeyword(tok::objc_end))
3680       return;
3681   } while (!eof() && FormatTok->isNot(tok::greater));
3682   nextToken(); // Skip '>'.
3683 }
3684 
3685 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3686   do {
3687     if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3688       nextToken();
3689       addUnwrappedLine();
3690       break;
3691     }
3692     if (FormatTok->is(tok::l_brace)) {
3693       parseBlock();
3694       // In ObjC interfaces, nothing should be following the "}".
3695       addUnwrappedLine();
3696     } else if (FormatTok->is(tok::r_brace)) {
3697       // Ignore stray "}". parseStructuralElement doesn't consume them.
3698       nextToken();
3699       addUnwrappedLine();
3700     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3701       nextToken();
3702       parseObjCMethod();
3703     } else {
3704       parseStructuralElement();
3705     }
3706   } while (!eof());
3707 }
3708 
3709 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3710   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3711          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3712   nextToken();
3713   nextToken(); // interface name
3714 
3715   // @interface can be followed by a lightweight generic
3716   // specialization list, then either a base class or a category.
3717   if (FormatTok->is(tok::less))
3718     parseObjCLightweightGenerics();
3719   if (FormatTok->is(tok::colon)) {
3720     nextToken();
3721     nextToken(); // base class name
3722     // The base class can also have lightweight generics applied to it.
3723     if (FormatTok->is(tok::less))
3724       parseObjCLightweightGenerics();
3725   } else if (FormatTok->is(tok::l_paren))
3726     // Skip category, if present.
3727     parseParens();
3728 
3729   if (FormatTok->is(tok::less))
3730     parseObjCProtocolList();
3731 
3732   if (FormatTok->is(tok::l_brace)) {
3733     if (Style.BraceWrapping.AfterObjCDeclaration)
3734       addUnwrappedLine();
3735     parseBlock(/*MustBeDeclaration=*/true);
3736   }
3737 
3738   // With instance variables, this puts '}' on its own line.  Without instance
3739   // variables, this ends the @interface line.
3740   addUnwrappedLine();
3741 
3742   parseObjCUntilAtEnd();
3743 }
3744 
3745 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3746   assert(FormatTok->is(tok::less));
3747   // Unlike protocol lists, generic parameterizations support
3748   // nested angles:
3749   //
3750   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3751   //     NSObject <NSCopying, NSSecureCoding>
3752   //
3753   // so we need to count how many open angles we have left.
3754   unsigned NumOpenAngles = 1;
3755   do {
3756     nextToken();
3757     // Early exit in case someone forgot a close angle.
3758     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3759         FormatTok->isObjCAtKeyword(tok::objc_end))
3760       break;
3761     if (FormatTok->is(tok::less))
3762       ++NumOpenAngles;
3763     else if (FormatTok->is(tok::greater)) {
3764       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3765       --NumOpenAngles;
3766     }
3767   } while (!eof() && NumOpenAngles != 0);
3768   nextToken(); // Skip '>'.
3769 }
3770 
3771 // Returns true for the declaration/definition form of @protocol,
3772 // false for the expression form.
3773 bool UnwrappedLineParser::parseObjCProtocol() {
3774   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3775   nextToken();
3776 
3777   if (FormatTok->is(tok::l_paren))
3778     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3779     return false;
3780 
3781   // The definition/declaration form,
3782   // @protocol Foo
3783   // - (int)someMethod;
3784   // @end
3785 
3786   nextToken(); // protocol name
3787 
3788   if (FormatTok->is(tok::less))
3789     parseObjCProtocolList();
3790 
3791   // Check for protocol declaration.
3792   if (FormatTok->is(tok::semi)) {
3793     nextToken();
3794     addUnwrappedLine();
3795     return true;
3796   }
3797 
3798   addUnwrappedLine();
3799   parseObjCUntilAtEnd();
3800   return true;
3801 }
3802 
3803 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3804   bool IsImport = FormatTok->is(Keywords.kw_import);
3805   assert(IsImport || FormatTok->is(tok::kw_export));
3806   nextToken();
3807 
3808   // Consume the "default" in "export default class/function".
3809   if (FormatTok->is(tok::kw_default))
3810     nextToken();
3811 
3812   // Consume "async function", "function" and "default function", so that these
3813   // get parsed as free-standing JS functions, i.e. do not require a trailing
3814   // semicolon.
3815   if (FormatTok->is(Keywords.kw_async))
3816     nextToken();
3817   if (FormatTok->is(Keywords.kw_function)) {
3818     nextToken();
3819     return;
3820   }
3821 
3822   // For imports, `export *`, `export {...}`, consume the rest of the line up
3823   // to the terminating `;`. For everything else, just return and continue
3824   // parsing the structural element, i.e. the declaration or expression for
3825   // `export default`.
3826   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3827       !FormatTok->isStringLiteral())
3828     return;
3829 
3830   while (!eof()) {
3831     if (FormatTok->is(tok::semi))
3832       return;
3833     if (Line->Tokens.empty()) {
3834       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3835       // import statement should terminate.
3836       return;
3837     }
3838     if (FormatTok->is(tok::l_brace)) {
3839       FormatTok->setBlockKind(BK_Block);
3840       nextToken();
3841       parseBracedList();
3842     } else {
3843       nextToken();
3844     }
3845   }
3846 }
3847 
3848 void UnwrappedLineParser::parseStatementMacro() {
3849   nextToken();
3850   if (FormatTok->is(tok::l_paren))
3851     parseParens();
3852   if (FormatTok->is(tok::semi))
3853     nextToken();
3854   addUnwrappedLine();
3855 }
3856 
3857 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3858                                                  StringRef Prefix = "") {
3859   llvm::dbgs() << Prefix << "Line(" << Line.Level
3860                << ", FSC=" << Line.FirstStartColumn << ")"
3861                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3862   for (const auto &Node : Line.Tokens) {
3863     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3864                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3865                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3866   }
3867   for (const auto &Node : Line.Tokens)
3868     for (const auto &ChildNode : Node.Children)
3869       printDebugInfo(ChildNode, "\nChild: ");
3870 
3871   llvm::dbgs() << "\n";
3872 }
3873 
3874 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3875   if (Line->Tokens.empty())
3876     return;
3877   LLVM_DEBUG({
3878     if (CurrentLines == &Lines)
3879       printDebugInfo(*Line);
3880   });
3881 
3882   // If this line closes a block when in Whitesmiths mode, remember that
3883   // information so that the level can be decreased after the line is added.
3884   // This has to happen after the addition of the line since the line itself
3885   // needs to be indented.
3886   bool ClosesWhitesmithsBlock =
3887       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3888       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3889 
3890   CurrentLines->push_back(std::move(*Line));
3891   Line->Tokens.clear();
3892   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3893   Line->FirstStartColumn = 0;
3894 
3895   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3896     --Line->Level;
3897   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3898     CurrentLines->append(
3899         std::make_move_iterator(PreprocessorDirectives.begin()),
3900         std::make_move_iterator(PreprocessorDirectives.end()));
3901     PreprocessorDirectives.clear();
3902   }
3903   // Disconnect the current token from the last token on the previous line.
3904   FormatTok->Previous = nullptr;
3905 }
3906 
3907 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
3908 
3909 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3910   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3911          FormatTok.NewlinesBefore > 0;
3912 }
3913 
3914 // Checks if \p FormatTok is a line comment that continues the line comment
3915 // section on \p Line.
3916 static bool
3917 continuesLineCommentSection(const FormatToken &FormatTok,
3918                             const UnwrappedLine &Line,
3919                             const llvm::Regex &CommentPragmasRegex) {
3920   if (Line.Tokens.empty())
3921     return false;
3922 
3923   StringRef IndentContent = FormatTok.TokenText;
3924   if (FormatTok.TokenText.startswith("//") ||
3925       FormatTok.TokenText.startswith("/*"))
3926     IndentContent = FormatTok.TokenText.substr(2);
3927   if (CommentPragmasRegex.match(IndentContent))
3928     return false;
3929 
3930   // If Line starts with a line comment, then FormatTok continues the comment
3931   // section if its original column is greater or equal to the original start
3932   // column of the line.
3933   //
3934   // Define the min column token of a line as follows: if a line ends in '{' or
3935   // contains a '{' followed by a line comment, then the min column token is
3936   // that '{'. Otherwise, the min column token of the line is the first token of
3937   // the line.
3938   //
3939   // If Line starts with a token other than a line comment, then FormatTok
3940   // continues the comment section if its original column is greater than the
3941   // original start column of the min column token of the line.
3942   //
3943   // For example, the second line comment continues the first in these cases:
3944   //
3945   // // first line
3946   // // second line
3947   //
3948   // and:
3949   //
3950   // // first line
3951   //  // second line
3952   //
3953   // and:
3954   //
3955   // int i; // first line
3956   //  // second line
3957   //
3958   // and:
3959   //
3960   // do { // first line
3961   //      // second line
3962   //   int i;
3963   // } while (true);
3964   //
3965   // and:
3966   //
3967   // enum {
3968   //   a, // first line
3969   //    // second line
3970   //   b
3971   // };
3972   //
3973   // The second line comment doesn't continue the first in these cases:
3974   //
3975   //   // first line
3976   //  // second line
3977   //
3978   // and:
3979   //
3980   // int i; // first line
3981   // // second line
3982   //
3983   // and:
3984   //
3985   // do { // first line
3986   //   // second line
3987   //   int i;
3988   // } while (true);
3989   //
3990   // and:
3991   //
3992   // enum {
3993   //   a, // first line
3994   //   // second line
3995   // };
3996   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3997 
3998   // Scan for '{//'. If found, use the column of '{' as a min column for line
3999   // comment section continuation.
4000   const FormatToken *PreviousToken = nullptr;
4001   for (const UnwrappedLineNode &Node : Line.Tokens) {
4002     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4003         isLineComment(*Node.Tok)) {
4004       MinColumnToken = PreviousToken;
4005       break;
4006     }
4007     PreviousToken = Node.Tok;
4008 
4009     // Grab the last newline preceding a token in this unwrapped line.
4010     if (Node.Tok->NewlinesBefore > 0)
4011       MinColumnToken = Node.Tok;
4012   }
4013   if (PreviousToken && PreviousToken->is(tok::l_brace))
4014     MinColumnToken = PreviousToken;
4015 
4016   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4017                               MinColumnToken);
4018 }
4019 
4020 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4021   bool JustComments = Line->Tokens.empty();
4022   for (FormatToken *Tok : CommentsBeforeNextToken) {
4023     // Line comments that belong to the same line comment section are put on the
4024     // same line since later we might want to reflow content between them.
4025     // Additional fine-grained breaking of line comment sections is controlled
4026     // by the class BreakableLineCommentSection in case it is desirable to keep
4027     // several line comment sections in the same unwrapped line.
4028     //
4029     // FIXME: Consider putting separate line comment sections as children to the
4030     // unwrapped line instead.
4031     Tok->ContinuesLineCommentSection =
4032         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4033     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4034       addUnwrappedLine();
4035     pushToken(Tok);
4036   }
4037   if (NewlineBeforeNext && JustComments)
4038     addUnwrappedLine();
4039   CommentsBeforeNextToken.clear();
4040 }
4041 
4042 void UnwrappedLineParser::nextToken(int LevelDifference) {
4043   if (eof())
4044     return;
4045   flushComments(isOnNewLine(*FormatTok));
4046   pushToken(FormatTok);
4047   FormatToken *Previous = FormatTok;
4048   if (!Style.isJavaScript())
4049     readToken(LevelDifference);
4050   else
4051     readTokenWithJavaScriptASI();
4052   FormatTok->Previous = Previous;
4053 }
4054 
4055 void UnwrappedLineParser::distributeComments(
4056     const SmallVectorImpl<FormatToken *> &Comments,
4057     const FormatToken *NextTok) {
4058   // Whether or not a line comment token continues a line is controlled by
4059   // the method continuesLineCommentSection, with the following caveat:
4060   //
4061   // Define a trail of Comments to be a nonempty proper postfix of Comments such
4062   // that each comment line from the trail is aligned with the next token, if
4063   // the next token exists. If a trail exists, the beginning of the maximal
4064   // trail is marked as a start of a new comment section.
4065   //
4066   // For example in this code:
4067   //
4068   // int a; // line about a
4069   //   // line 1 about b
4070   //   // line 2 about b
4071   //   int b;
4072   //
4073   // the two lines about b form a maximal trail, so there are two sections, the
4074   // first one consisting of the single comment "// line about a" and the
4075   // second one consisting of the next two comments.
4076   if (Comments.empty())
4077     return;
4078   bool ShouldPushCommentsInCurrentLine = true;
4079   bool HasTrailAlignedWithNextToken = false;
4080   unsigned StartOfTrailAlignedWithNextToken = 0;
4081   if (NextTok) {
4082     // We are skipping the first element intentionally.
4083     for (unsigned i = Comments.size() - 1; i > 0; --i) {
4084       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4085         HasTrailAlignedWithNextToken = true;
4086         StartOfTrailAlignedWithNextToken = i;
4087       }
4088     }
4089   }
4090   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4091     FormatToken *FormatTok = Comments[i];
4092     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4093       FormatTok->ContinuesLineCommentSection = false;
4094     } else {
4095       FormatTok->ContinuesLineCommentSection =
4096           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4097     }
4098     if (!FormatTok->ContinuesLineCommentSection &&
4099         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
4100       ShouldPushCommentsInCurrentLine = false;
4101     if (ShouldPushCommentsInCurrentLine)
4102       pushToken(FormatTok);
4103     else
4104       CommentsBeforeNextToken.push_back(FormatTok);
4105   }
4106 }
4107 
4108 void UnwrappedLineParser::readToken(int LevelDifference) {
4109   SmallVector<FormatToken *, 1> Comments;
4110   bool PreviousWasComment = false;
4111   bool FirstNonCommentOnLine = false;
4112   do {
4113     FormatTok = Tokens->getNextToken();
4114     assert(FormatTok);
4115     while (FormatTok->getType() == TT_ConflictStart ||
4116            FormatTok->getType() == TT_ConflictEnd ||
4117            FormatTok->getType() == TT_ConflictAlternative) {
4118       if (FormatTok->getType() == TT_ConflictStart)
4119         conditionalCompilationStart(/*Unreachable=*/false);
4120       else if (FormatTok->getType() == TT_ConflictAlternative)
4121         conditionalCompilationAlternative();
4122       else if (FormatTok->getType() == TT_ConflictEnd)
4123         conditionalCompilationEnd();
4124       FormatTok = Tokens->getNextToken();
4125       FormatTok->MustBreakBefore = true;
4126     }
4127 
4128     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4129                                       const FormatToken &Tok,
4130                                       bool PreviousWasComment) {
4131       auto IsFirstOnLine = [](const FormatToken &Tok) {
4132         return Tok.HasUnescapedNewline || Tok.IsFirst;
4133       };
4134 
4135       // Consider preprocessor directives preceded by block comments as first
4136       // on line.
4137       if (PreviousWasComment)
4138         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4139       return IsFirstOnLine(Tok);
4140     };
4141 
4142     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4143         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4144     PreviousWasComment = FormatTok->is(tok::comment);
4145 
4146     while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4147            FirstNonCommentOnLine) {
4148       distributeComments(Comments, FormatTok);
4149       Comments.clear();
4150       // If there is an unfinished unwrapped line, we flush the preprocessor
4151       // directives only after that unwrapped line was finished later.
4152       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4153       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4154       assert((LevelDifference >= 0 ||
4155               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4156              "LevelDifference makes Line->Level negative");
4157       Line->Level += LevelDifference;
4158       // Comments stored before the preprocessor directive need to be output
4159       // before the preprocessor directive, at the same level as the
4160       // preprocessor directive, as we consider them to apply to the directive.
4161       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4162           PPBranchLevel > 0)
4163         Line->Level += PPBranchLevel;
4164       flushComments(isOnNewLine(*FormatTok));
4165       parsePPDirective();
4166       PreviousWasComment = FormatTok->is(tok::comment);
4167       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4168           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4169     }
4170 
4171     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4172         !Line->InPPDirective)
4173       continue;
4174 
4175     if (!FormatTok->is(tok::comment)) {
4176       distributeComments(Comments, FormatTok);
4177       Comments.clear();
4178       return;
4179     }
4180 
4181     Comments.push_back(FormatTok);
4182   } while (!eof());
4183 
4184   distributeComments(Comments, nullptr);
4185   Comments.clear();
4186 }
4187 
4188 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4189   Line->Tokens.push_back(UnwrappedLineNode(Tok));
4190   if (MustBreakBeforeNextToken) {
4191     Line->Tokens.back().Tok->MustBreakBefore = true;
4192     MustBreakBeforeNextToken = false;
4193   }
4194 }
4195 
4196 } // end namespace format
4197 } // end namespace clang
4198