1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31 
32   // Returns the next token in the token stream.
33   virtual FormatToken *getNextToken() = 0;
34 
35   // Returns the token preceding the token returned by the last call to
36   // getNextToken() in the token stream, or nullptr if no such token exists.
37   virtual FormatToken *getPreviousToken() = 0;
38 
39   // Returns the token that would be returned by the next call to
40   // getNextToken().
41   virtual FormatToken *peekNextToken() = 0;
42 
43   // Returns whether we are at the end of the file.
44   // This can be different from whether getNextToken() returned an eof token
45   // when the FormatTokenSource is a view on a part of the token stream.
46   virtual bool isEOF() = 0;
47 
48   // Gets the current position in the token stream, to be used by setPosition().
49   virtual unsigned getPosition() = 0;
50 
51   // Resets the token stream to the state it was in when getPosition() returned
52   // Position, and return the token at that position in the stream.
53   virtual FormatToken *setPosition(unsigned Position) = 0;
54 };
55 
56 namespace {
57 
58 class ScopedDeclarationState {
59 public:
60   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
61                          bool MustBeDeclaration)
62       : Line(Line), Stack(Stack) {
63     Line.MustBeDeclaration = MustBeDeclaration;
64     Stack.push_back(MustBeDeclaration);
65   }
66   ~ScopedDeclarationState() {
67     Stack.pop_back();
68     if (!Stack.empty())
69       Line.MustBeDeclaration = Stack.back();
70     else
71       Line.MustBeDeclaration = true;
72   }
73 
74 private:
75   UnwrappedLine &Line;
76   llvm::BitVector &Stack;
77 };
78 
79 static bool isLineComment(const FormatToken &FormatTok) {
80   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
81 }
82 
83 // Checks if \p FormatTok is a line comment that continues the line comment
84 // \p Previous. The original column of \p MinColumnToken is used to determine
85 // whether \p FormatTok is indented enough to the right to continue \p Previous.
86 static bool continuesLineComment(const FormatToken &FormatTok,
87                                  const FormatToken *Previous,
88                                  const FormatToken *MinColumnToken) {
89   if (!Previous || !MinColumnToken)
90     return false;
91   unsigned MinContinueColumn =
92       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
93   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
94          isLineComment(*Previous) &&
95          FormatTok.OriginalColumn >= MinContinueColumn;
96 }
97 
98 class ScopedMacroState : public FormatTokenSource {
99 public:
100   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
101                    FormatToken *&ResetToken)
102       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
103         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
104         Token(nullptr), PreviousToken(nullptr) {
105     FakeEOF.Tok.startToken();
106     FakeEOF.Tok.setKind(tok::eof);
107     TokenSource = this;
108     Line.Level = 0;
109     Line.InPPDirective = true;
110   }
111 
112   ~ScopedMacroState() override {
113     TokenSource = PreviousTokenSource;
114     ResetToken = Token;
115     Line.InPPDirective = false;
116     Line.Level = PreviousLineLevel;
117   }
118 
119   FormatToken *getNextToken() override {
120     // The \c UnwrappedLineParser guards against this by never calling
121     // \c getNextToken() after it has encountered the first eof token.
122     assert(!eof());
123     PreviousToken = Token;
124     Token = PreviousTokenSource->getNextToken();
125     if (eof())
126       return &FakeEOF;
127     return Token;
128   }
129 
130   FormatToken *getPreviousToken() override {
131     return PreviousTokenSource->getPreviousToken();
132   }
133 
134   FormatToken *peekNextToken() override {
135     if (eof())
136       return &FakeEOF;
137     return PreviousTokenSource->peekNextToken();
138   }
139 
140   bool isEOF() override { return PreviousTokenSource->isEOF(); }
141 
142   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
143 
144   FormatToken *setPosition(unsigned Position) override {
145     PreviousToken = nullptr;
146     Token = PreviousTokenSource->setPosition(Position);
147     return Token;
148   }
149 
150 private:
151   bool eof() {
152     return Token && Token->HasUnescapedNewline &&
153            !continuesLineComment(*Token, PreviousToken,
154                                  /*MinColumnToken=*/PreviousToken);
155   }
156 
157   FormatToken FakeEOF;
158   UnwrappedLine &Line;
159   FormatTokenSource *&TokenSource;
160   FormatToken *&ResetToken;
161   unsigned PreviousLineLevel;
162   FormatTokenSource *PreviousTokenSource;
163 
164   FormatToken *Token;
165   FormatToken *PreviousToken;
166 };
167 
168 } // end anonymous namespace
169 
170 class ScopedLineState {
171 public:
172   ScopedLineState(UnwrappedLineParser &Parser,
173                   bool SwitchToPreprocessorLines = false)
174       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
175     if (SwitchToPreprocessorLines)
176       Parser.CurrentLines = &Parser.PreprocessorDirectives;
177     else if (!Parser.Line->Tokens.empty())
178       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
179     PreBlockLine = std::move(Parser.Line);
180     Parser.Line = std::make_unique<UnwrappedLine>();
181     Parser.Line->Level = PreBlockLine->Level;
182     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
183   }
184 
185   ~ScopedLineState() {
186     if (!Parser.Line->Tokens.empty())
187       Parser.addUnwrappedLine();
188     assert(Parser.Line->Tokens.empty());
189     Parser.Line = std::move(PreBlockLine);
190     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
191       Parser.MustBreakBeforeNextToken = true;
192     Parser.CurrentLines = OriginalLines;
193   }
194 
195 private:
196   UnwrappedLineParser &Parser;
197 
198   std::unique_ptr<UnwrappedLine> PreBlockLine;
199   SmallVectorImpl<UnwrappedLine> *OriginalLines;
200 };
201 
202 class CompoundStatementIndenter {
203 public:
204   CompoundStatementIndenter(UnwrappedLineParser *Parser,
205                             const FormatStyle &Style, unsigned &LineLevel)
206       : CompoundStatementIndenter(Parser, LineLevel,
207                                   Style.BraceWrapping.AfterControlStatement,
208                                   Style.BraceWrapping.IndentBraces) {}
209   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
210                             bool WrapBrace, bool IndentBrace)
211       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
212     if (WrapBrace)
213       Parser->addUnwrappedLine();
214     if (IndentBrace)
215       ++LineLevel;
216   }
217   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
218 
219 private:
220   unsigned &LineLevel;
221   unsigned OldLineLevel;
222 };
223 
224 namespace {
225 
226 class IndexedTokenSource : public FormatTokenSource {
227 public:
228   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
229       : Tokens(Tokens), Position(-1) {}
230 
231   FormatToken *getNextToken() override {
232     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
233       LLVM_DEBUG({
234         llvm::dbgs() << "Next ";
235         dbgToken(Position);
236       });
237       return Tokens[Position];
238     }
239     ++Position;
240     LLVM_DEBUG({
241       llvm::dbgs() << "Next ";
242       dbgToken(Position);
243     });
244     return Tokens[Position];
245   }
246 
247   FormatToken *getPreviousToken() override {
248     return Position > 0 ? Tokens[Position - 1] : nullptr;
249   }
250 
251   FormatToken *peekNextToken() override {
252     int Next = Position + 1;
253     LLVM_DEBUG({
254       llvm::dbgs() << "Peeking ";
255       dbgToken(Next);
256     });
257     return Tokens[Next];
258   }
259 
260   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
261 
262   unsigned getPosition() override {
263     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
264     assert(Position >= 0);
265     return Position;
266   }
267 
268   FormatToken *setPosition(unsigned P) override {
269     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
270     Position = P;
271     return Tokens[Position];
272   }
273 
274   void reset() { Position = -1; }
275 
276 private:
277   void dbgToken(int Position, llvm::StringRef Indent = "") {
278     FormatToken *Tok = Tokens[Position];
279     llvm::dbgs() << Indent << "[" << Position
280                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
281                  << ", Macro: " << !!Tok->MacroCtx << "\n";
282   }
283 
284   ArrayRef<FormatToken *> Tokens;
285   int Position;
286 };
287 
288 } // end anonymous namespace
289 
290 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
291                                          const AdditionalKeywords &Keywords,
292                                          unsigned FirstStartColumn,
293                                          ArrayRef<FormatToken *> Tokens,
294                                          UnwrappedLineConsumer &Callback)
295     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
296       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
297       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
298       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
299       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
300                        ? IG_Rejected
301                        : IG_Inited),
302       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
303 
304 void UnwrappedLineParser::reset() {
305   PPBranchLevel = -1;
306   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
307                      ? IG_Rejected
308                      : IG_Inited;
309   IncludeGuardToken = nullptr;
310   Line.reset(new UnwrappedLine);
311   CommentsBeforeNextToken.clear();
312   FormatTok = nullptr;
313   MustBreakBeforeNextToken = false;
314   PreprocessorDirectives.clear();
315   CurrentLines = &Lines;
316   DeclarationScopeStack.clear();
317   NestedTooDeep.clear();
318   PPStack.clear();
319   Line->FirstStartColumn = FirstStartColumn;
320 }
321 
322 void UnwrappedLineParser::parse() {
323   IndexedTokenSource TokenSource(AllTokens);
324   Line->FirstStartColumn = FirstStartColumn;
325   do {
326     LLVM_DEBUG(llvm::dbgs() << "----\n");
327     reset();
328     Tokens = &TokenSource;
329     TokenSource.reset();
330 
331     readToken();
332     parseFile();
333 
334     // If we found an include guard then all preprocessor directives (other than
335     // the guard) are over-indented by one.
336     if (IncludeGuard == IG_Found)
337       for (auto &Line : Lines)
338         if (Line.InPPDirective && Line.Level > 0)
339           --Line.Level;
340 
341     // Create line with eof token.
342     pushToken(FormatTok);
343     addUnwrappedLine();
344 
345     for (const UnwrappedLine &Line : Lines)
346       Callback.consumeUnwrappedLine(Line);
347 
348     Callback.finishRun();
349     Lines.clear();
350     while (!PPLevelBranchIndex.empty() &&
351            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
352       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
353       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
354     }
355     if (!PPLevelBranchIndex.empty()) {
356       ++PPLevelBranchIndex.back();
357       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
358       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
359     }
360   } while (!PPLevelBranchIndex.empty());
361 }
362 
363 void UnwrappedLineParser::parseFile() {
364   // The top-level context in a file always has declarations, except for pre-
365   // processor directives and JavaScript files.
366   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
367   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
368                                           MustBeDeclaration);
369   if (Style.Language == FormatStyle::LK_TextProto)
370     parseBracedList();
371   else
372     parseLevel(/*HasOpeningBrace=*/false, /*CanContainBracedList=*/true);
373   // Make sure to format the remaining tokens.
374   //
375   // LK_TextProto is special since its top-level is parsed as the body of a
376   // braced list, which does not necessarily have natural line separators such
377   // as a semicolon. Comments after the last entry that have been determined to
378   // not belong to that line, as in:
379   //   key: value
380   //   // endfile comment
381   // do not have a chance to be put on a line of their own until this point.
382   // Here we add this newline before end-of-file comments.
383   if (Style.Language == FormatStyle::LK_TextProto &&
384       !CommentsBeforeNextToken.empty())
385     addUnwrappedLine();
386   flushComments(true);
387   addUnwrappedLine();
388 }
389 
390 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
391   do {
392     switch (FormatTok->Tok.getKind()) {
393     case tok::l_brace:
394       return;
395     default:
396       if (FormatTok->is(Keywords.kw_where)) {
397         addUnwrappedLine();
398         nextToken();
399         parseCSharpGenericTypeConstraint();
400         break;
401       }
402       nextToken();
403       break;
404     }
405   } while (!eof());
406 }
407 
408 void UnwrappedLineParser::parseCSharpAttribute() {
409   int UnpairedSquareBrackets = 1;
410   do {
411     switch (FormatTok->Tok.getKind()) {
412     case tok::r_square:
413       nextToken();
414       --UnpairedSquareBrackets;
415       if (UnpairedSquareBrackets == 0) {
416         addUnwrappedLine();
417         return;
418       }
419       break;
420     case tok::l_square:
421       ++UnpairedSquareBrackets;
422       nextToken();
423       break;
424     default:
425       nextToken();
426       break;
427     }
428   } while (!eof());
429 }
430 
431 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
432   if (!Lines.empty() && Lines.back().InPPDirective)
433     return true;
434 
435   const FormatToken *Previous = Tokens->getPreviousToken();
436   return Previous && Previous->is(tok::comment) &&
437          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
438 }
439 /// \brief Parses a level, that is ???.
440 /// \param HasOpeningBrace If that level is started by an opening brace.
441 /// \param CanContainBracedList If the content can contain (at any level) a
442 /// braced list.
443 /// \param NextLBracesType The type for left brace found in this level.
444 /// \returns true if a simple block, or false otherwise. (A simple block has a
445 /// single statement.)
446 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace,
447                                      bool CanContainBracedList,
448                                      IfStmtKind *IfKind,
449                                      TokenType NextLBracesType) {
450   auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
451                                   ? TT_BracedListLBrace
452                                   : TT_Unknown;
453   const bool IsPrecededByCommentOrPPDirective =
454       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
455   unsigned StatementCount = 0;
456   bool SwitchLabelEncountered = false;
457   do {
458     tok::TokenKind kind = FormatTok->Tok.getKind();
459     if (FormatTok->getType() == TT_MacroBlockBegin)
460       kind = tok::l_brace;
461     else if (FormatTok->getType() == TT_MacroBlockEnd)
462       kind = tok::r_brace;
463 
464     auto ParseDefault = [this, HasOpeningBrace, IfKind, NextLevelLBracesType,
465                          &StatementCount] {
466       parseStructuralElement(IfKind, /*IsTopLevel=*/!HasOpeningBrace,
467                              /*NextLBracesType=*/NextLevelLBracesType);
468       ++StatementCount;
469       assert(StatementCount > 0 && "StatementCount overflow!");
470     };
471 
472     switch (kind) {
473     case tok::comment:
474       nextToken();
475       addUnwrappedLine();
476       break;
477     case tok::l_brace:
478       if (NextLBracesType != TT_Unknown)
479         FormatTok->setType(NextLBracesType);
480       else if (FormatTok->Previous &&
481                FormatTok->Previous->ClosesRequiresClause) {
482         // We need the 'default' case here to correctly parse a function
483         // l_brace.
484         ParseDefault();
485         continue;
486       }
487       if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
488           tryToParseBracedList())
489         continue;
490       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
491                  /*MunchSemi=*/true, /*UnindentWhitesmithBraces=*/false,
492                  CanContainBracedList,
493                  /*NextLBracesType=*/NextLBracesType);
494       ++StatementCount;
495       assert(StatementCount > 0 && "StatementCount overflow!");
496       addUnwrappedLine();
497       break;
498     case tok::r_brace:
499       if (HasOpeningBrace) {
500         if (!Style.RemoveBracesLLVM)
501           return false;
502         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 ||
503             IsPrecededByCommentOrPPDirective ||
504             precededByCommentOrPPDirective())
505           return false;
506         const FormatToken *Next = Tokens->peekNextToken();
507         return Next->isNot(tok::comment) || Next->NewlinesBefore > 0;
508       }
509       nextToken();
510       addUnwrappedLine();
511       break;
512     case tok::kw_default: {
513       unsigned StoredPosition = Tokens->getPosition();
514       FormatToken *Next;
515       do {
516         Next = Tokens->getNextToken();
517         assert(Next);
518       } while (Next->is(tok::comment));
519       FormatTok = Tokens->setPosition(StoredPosition);
520       if (Next->isNot(tok::colon)) {
521         // default not followed by ':' is not a case label; treat it like
522         // an identifier.
523         parseStructuralElement();
524         break;
525       }
526       // Else, if it is 'default:', fall through to the case handling.
527       LLVM_FALLTHROUGH;
528     }
529     case tok::kw_case:
530       if (Style.isJavaScript() && Line->MustBeDeclaration) {
531         // A 'case: string' style field declaration.
532         parseStructuralElement();
533         break;
534       }
535       if (!SwitchLabelEncountered &&
536           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
537         ++Line->Level;
538       SwitchLabelEncountered = true;
539       parseStructuralElement();
540       break;
541     case tok::l_square:
542       if (Style.isCSharp()) {
543         nextToken();
544         parseCSharpAttribute();
545         break;
546       }
547       LLVM_FALLTHROUGH;
548     default:
549       ParseDefault();
550       break;
551     }
552   } while (!eof());
553   return false;
554 }
555 
556 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
557   // We'll parse forward through the tokens until we hit
558   // a closing brace or eof - note that getNextToken() will
559   // parse macros, so this will magically work inside macro
560   // definitions, too.
561   unsigned StoredPosition = Tokens->getPosition();
562   FormatToken *Tok = FormatTok;
563   const FormatToken *PrevTok = Tok->Previous;
564   // Keep a stack of positions of lbrace tokens. We will
565   // update information about whether an lbrace starts a
566   // braced init list or a different block during the loop.
567   SmallVector<FormatToken *, 8> LBraceStack;
568   assert(Tok->Tok.is(tok::l_brace));
569   do {
570     // Get next non-comment token.
571     FormatToken *NextTok;
572     unsigned ReadTokens = 0;
573     do {
574       NextTok = Tokens->getNextToken();
575       ++ReadTokens;
576     } while (NextTok->is(tok::comment));
577 
578     switch (Tok->Tok.getKind()) {
579     case tok::l_brace:
580       if (Style.isJavaScript() && PrevTok) {
581         if (PrevTok->isOneOf(tok::colon, tok::less))
582           // A ':' indicates this code is in a type, or a braced list
583           // following a label in an object literal ({a: {b: 1}}).
584           // A '<' could be an object used in a comparison, but that is nonsense
585           // code (can never return true), so more likely it is a generic type
586           // argument (`X<{a: string; b: number}>`).
587           // The code below could be confused by semicolons between the
588           // individual members in a type member list, which would normally
589           // trigger BK_Block. In both cases, this must be parsed as an inline
590           // braced init.
591           Tok->setBlockKind(BK_BracedInit);
592         else if (PrevTok->is(tok::r_paren))
593           // `) { }` can only occur in function or method declarations in JS.
594           Tok->setBlockKind(BK_Block);
595       } else {
596         Tok->setBlockKind(BK_Unknown);
597       }
598       LBraceStack.push_back(Tok);
599       break;
600     case tok::r_brace:
601       if (LBraceStack.empty())
602         break;
603       if (LBraceStack.back()->is(BK_Unknown)) {
604         bool ProbablyBracedList = false;
605         if (Style.Language == FormatStyle::LK_Proto) {
606           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
607         } else {
608           // Skip NextTok over preprocessor lines, otherwise we may not
609           // properly diagnose the block as a braced intializer
610           // if the comma separator appears after the pp directive.
611           while (NextTok->is(tok::hash)) {
612             ScopedMacroState MacroState(*Line, Tokens, NextTok);
613             do {
614               NextTok = Tokens->getNextToken();
615               ++ReadTokens;
616             } while (NextTok->isNot(tok::eof));
617           }
618 
619           // Using OriginalColumn to distinguish between ObjC methods and
620           // binary operators is a bit hacky.
621           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
622                                   NextTok->OriginalColumn == 0;
623 
624           // Try to detect a braced list. Note that regardless how we mark inner
625           // braces here, we will overwrite the BlockKind later if we parse a
626           // braced list (where all blocks inside are by default braced lists),
627           // or when we explicitly detect blocks (for example while parsing
628           // lambdas).
629 
630           // If we already marked the opening brace as braced list, the closing
631           // must also be part of it.
632           ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
633 
634           ProbablyBracedList = ProbablyBracedList ||
635                                (Style.isJavaScript() &&
636                                 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
637                                                  Keywords.kw_as));
638           ProbablyBracedList = ProbablyBracedList ||
639                                (Style.isCpp() && NextTok->is(tok::l_paren));
640 
641           // If there is a comma, semicolon or right paren after the closing
642           // brace, we assume this is a braced initializer list.
643           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
644           // braced list in JS.
645           ProbablyBracedList =
646               ProbablyBracedList ||
647               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
648                                tok::r_paren, tok::r_square, tok::l_brace,
649                                tok::ellipsis);
650 
651           ProbablyBracedList =
652               ProbablyBracedList ||
653               (NextTok->is(tok::identifier) &&
654                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
655 
656           ProbablyBracedList = ProbablyBracedList ||
657                                (NextTok->is(tok::semi) &&
658                                 (!ExpectClassBody || LBraceStack.size() != 1));
659 
660           ProbablyBracedList =
661               ProbablyBracedList ||
662               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
663 
664           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
665             // We can have an array subscript after a braced init
666             // list, but C++11 attributes are expected after blocks.
667             NextTok = Tokens->getNextToken();
668             ++ReadTokens;
669             ProbablyBracedList = NextTok->isNot(tok::l_square);
670           }
671         }
672         if (ProbablyBracedList) {
673           Tok->setBlockKind(BK_BracedInit);
674           LBraceStack.back()->setBlockKind(BK_BracedInit);
675         } else {
676           Tok->setBlockKind(BK_Block);
677           LBraceStack.back()->setBlockKind(BK_Block);
678         }
679       }
680       LBraceStack.pop_back();
681       break;
682     case tok::identifier:
683       if (!Tok->is(TT_StatementMacro))
684         break;
685       LLVM_FALLTHROUGH;
686     case tok::at:
687     case tok::semi:
688     case tok::kw_if:
689     case tok::kw_while:
690     case tok::kw_for:
691     case tok::kw_switch:
692     case tok::kw_try:
693     case tok::kw___try:
694       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
695         LBraceStack.back()->setBlockKind(BK_Block);
696       break;
697     default:
698       break;
699     }
700     PrevTok = Tok;
701     Tok = NextTok;
702   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
703 
704   // Assume other blocks for all unclosed opening braces.
705   for (FormatToken *LBrace : LBraceStack)
706     if (LBrace->is(BK_Unknown))
707       LBrace->setBlockKind(BK_Block);
708 
709   FormatTok = Tokens->setPosition(StoredPosition);
710 }
711 
712 template <class T>
713 static inline void hash_combine(std::size_t &seed, const T &v) {
714   std::hash<T> hasher;
715   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
716 }
717 
718 size_t UnwrappedLineParser::computePPHash() const {
719   size_t h = 0;
720   for (const auto &i : PPStack) {
721     hash_combine(h, size_t(i.Kind));
722     hash_combine(h, i.Line);
723   }
724   return h;
725 }
726 
727 UnwrappedLineParser::IfStmtKind
728 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
729                                 bool MunchSemi, bool UnindentWhitesmithsBraces,
730                                 bool CanContainBracedList,
731                                 TokenType NextLBracesType) {
732   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
733          "'{' or macro block token expected");
734   FormatToken *Tok = FormatTok;
735   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
736   FormatTok->setBlockKind(BK_Block);
737 
738   // For Whitesmiths mode, jump to the next level prior to skipping over the
739   // braces.
740   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
741     ++Line->Level;
742 
743   size_t PPStartHash = computePPHash();
744 
745   unsigned InitialLevel = Line->Level;
746   nextToken(/*LevelDifference=*/AddLevels);
747 
748   if (MacroBlock && FormatTok->is(tok::l_paren))
749     parseParens();
750 
751   size_t NbPreprocessorDirectives =
752       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
753   addUnwrappedLine();
754   size_t OpeningLineIndex =
755       CurrentLines->empty()
756           ? (UnwrappedLine::kInvalidIndex)
757           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
758 
759   // Whitesmiths is weird here. The brace needs to be indented for the namespace
760   // block, but the block itself may not be indented depending on the style
761   // settings. This allows the format to back up one level in those cases.
762   if (UnindentWhitesmithsBraces)
763     --Line->Level;
764 
765   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
766                                           MustBeDeclaration);
767   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
768     Line->Level += AddLevels;
769 
770   IfStmtKind IfKind = IfStmtKind::NotIf;
771   const bool SimpleBlock = parseLevel(
772       /*HasOpeningBrace=*/true, CanContainBracedList, &IfKind, NextLBracesType);
773 
774   if (eof())
775     return IfKind;
776 
777   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
778                  : !FormatTok->is(tok::r_brace)) {
779     Line->Level = InitialLevel;
780     FormatTok->setBlockKind(BK_Block);
781     return IfKind;
782   }
783 
784   if (SimpleBlock && Tok->is(tok::l_brace)) {
785     assert(FormatTok->is(tok::r_brace));
786     const FormatToken *Previous = Tokens->getPreviousToken();
787     assert(Previous);
788     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
789       Tok->MatchingParen = FormatTok;
790       FormatTok->MatchingParen = Tok;
791     }
792   }
793 
794   size_t PPEndHash = computePPHash();
795 
796   // Munch the closing brace.
797   nextToken(/*LevelDifference=*/-AddLevels);
798 
799   if (MacroBlock && FormatTok->is(tok::l_paren))
800     parseParens();
801 
802   if (FormatTok->is(tok::kw_noexcept)) {
803     // A noexcept in a requires expression.
804     nextToken();
805   }
806 
807   if (FormatTok->is(tok::arrow)) {
808     // Following the } or noexcept we can find a trailing return type arrow
809     // as part of an implicit conversion constraint.
810     nextToken();
811     parseStructuralElement();
812   }
813 
814   if (MunchSemi && FormatTok->Tok.is(tok::semi))
815     nextToken();
816 
817   Line->Level = InitialLevel;
818 
819   if (PPStartHash == PPEndHash) {
820     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
821     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
822       // Update the opening line to add the forward reference as well
823       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
824           CurrentLines->size() - 1;
825     }
826   }
827 
828   return IfKind;
829 }
830 
831 static bool isGoogScope(const UnwrappedLine &Line) {
832   // FIXME: Closure-library specific stuff should not be hard-coded but be
833   // configurable.
834   if (Line.Tokens.size() < 4)
835     return false;
836   auto I = Line.Tokens.begin();
837   if (I->Tok->TokenText != "goog")
838     return false;
839   ++I;
840   if (I->Tok->isNot(tok::period))
841     return false;
842   ++I;
843   if (I->Tok->TokenText != "scope")
844     return false;
845   ++I;
846   return I->Tok->is(tok::l_paren);
847 }
848 
849 static bool isIIFE(const UnwrappedLine &Line,
850                    const AdditionalKeywords &Keywords) {
851   // Look for the start of an immediately invoked anonymous function.
852   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
853   // This is commonly done in JavaScript to create a new, anonymous scope.
854   // Example: (function() { ... })()
855   if (Line.Tokens.size() < 3)
856     return false;
857   auto I = Line.Tokens.begin();
858   if (I->Tok->isNot(tok::l_paren))
859     return false;
860   ++I;
861   if (I->Tok->isNot(Keywords.kw_function))
862     return false;
863   ++I;
864   return I->Tok->is(tok::l_paren);
865 }
866 
867 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
868                                    const FormatToken &InitialToken) {
869   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
870     return Style.BraceWrapping.AfterNamespace;
871   if (InitialToken.is(tok::kw_class))
872     return Style.BraceWrapping.AfterClass;
873   if (InitialToken.is(tok::kw_union))
874     return Style.BraceWrapping.AfterUnion;
875   if (InitialToken.is(tok::kw_struct))
876     return Style.BraceWrapping.AfterStruct;
877   if (InitialToken.is(tok::kw_enum))
878     return Style.BraceWrapping.AfterEnum;
879   return false;
880 }
881 
882 void UnwrappedLineParser::parseChildBlock(
883     bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
884   FormatTok->setBlockKind(BK_Block);
885   nextToken();
886   {
887     bool SkipIndent = (Style.isJavaScript() &&
888                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
889     ScopedLineState LineState(*this);
890     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
891                                             /*MustBeDeclaration=*/false);
892     Line->Level += SkipIndent ? 0 : 1;
893     parseLevel(/*HasOpeningBrace=*/true, CanContainBracedList,
894                /*IfKind=*/nullptr, NextLBracesType);
895     flushComments(isOnNewLine(*FormatTok));
896     Line->Level -= SkipIndent ? 0 : 1;
897   }
898   nextToken();
899 }
900 
901 void UnwrappedLineParser::parsePPDirective() {
902   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
903   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
904 
905   nextToken();
906 
907   if (!FormatTok->Tok.getIdentifierInfo()) {
908     parsePPUnknown();
909     return;
910   }
911 
912   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
913   case tok::pp_define:
914     parsePPDefine();
915     return;
916   case tok::pp_if:
917     parsePPIf(/*IfDef=*/false);
918     break;
919   case tok::pp_ifdef:
920   case tok::pp_ifndef:
921     parsePPIf(/*IfDef=*/true);
922     break;
923   case tok::pp_else:
924     parsePPElse();
925     break;
926   case tok::pp_elifdef:
927   case tok::pp_elifndef:
928   case tok::pp_elif:
929     parsePPElIf();
930     break;
931   case tok::pp_endif:
932     parsePPEndIf();
933     break;
934   default:
935     parsePPUnknown();
936     break;
937   }
938 }
939 
940 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
941   size_t Line = CurrentLines->size();
942   if (CurrentLines == &PreprocessorDirectives)
943     Line += Lines.size();
944 
945   if (Unreachable ||
946       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
947     PPStack.push_back({PP_Unreachable, Line});
948   else
949     PPStack.push_back({PP_Conditional, Line});
950 }
951 
952 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
953   ++PPBranchLevel;
954   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
955   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
956     PPLevelBranchIndex.push_back(0);
957     PPLevelBranchCount.push_back(0);
958   }
959   PPChainBranchIndex.push(0);
960   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
961   conditionalCompilationCondition(Unreachable || Skip);
962 }
963 
964 void UnwrappedLineParser::conditionalCompilationAlternative() {
965   if (!PPStack.empty())
966     PPStack.pop_back();
967   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
968   if (!PPChainBranchIndex.empty())
969     ++PPChainBranchIndex.top();
970   conditionalCompilationCondition(
971       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
972       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
973 }
974 
975 void UnwrappedLineParser::conditionalCompilationEnd() {
976   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
977   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
978     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
979       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
980   }
981   // Guard against #endif's without #if.
982   if (PPBranchLevel > -1)
983     --PPBranchLevel;
984   if (!PPChainBranchIndex.empty())
985     PPChainBranchIndex.pop();
986   if (!PPStack.empty())
987     PPStack.pop_back();
988 }
989 
990 void UnwrappedLineParser::parsePPIf(bool IfDef) {
991   bool IfNDef = FormatTok->is(tok::pp_ifndef);
992   nextToken();
993   bool Unreachable = false;
994   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
995     Unreachable = true;
996   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
997     Unreachable = true;
998   conditionalCompilationStart(Unreachable);
999   FormatToken *IfCondition = FormatTok;
1000   // If there's a #ifndef on the first line, and the only lines before it are
1001   // comments, it could be an include guard.
1002   bool MaybeIncludeGuard = IfNDef;
1003   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
1004     for (auto &Line : Lines) {
1005       if (!Line.Tokens.front().Tok->is(tok::comment)) {
1006         MaybeIncludeGuard = false;
1007         IncludeGuard = IG_Rejected;
1008         break;
1009       }
1010     }
1011   --PPBranchLevel;
1012   parsePPUnknown();
1013   ++PPBranchLevel;
1014   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1015     IncludeGuard = IG_IfNdefed;
1016     IncludeGuardToken = IfCondition;
1017   }
1018 }
1019 
1020 void UnwrappedLineParser::parsePPElse() {
1021   // If a potential include guard has an #else, it's not an include guard.
1022   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1023     IncludeGuard = IG_Rejected;
1024   conditionalCompilationAlternative();
1025   if (PPBranchLevel > -1)
1026     --PPBranchLevel;
1027   parsePPUnknown();
1028   ++PPBranchLevel;
1029 }
1030 
1031 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1032 
1033 void UnwrappedLineParser::parsePPEndIf() {
1034   conditionalCompilationEnd();
1035   parsePPUnknown();
1036   // If the #endif of a potential include guard is the last thing in the file,
1037   // then we found an include guard.
1038   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1039       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1040     IncludeGuard = IG_Found;
1041 }
1042 
1043 void UnwrappedLineParser::parsePPDefine() {
1044   nextToken();
1045 
1046   if (!FormatTok->Tok.getIdentifierInfo()) {
1047     IncludeGuard = IG_Rejected;
1048     IncludeGuardToken = nullptr;
1049     parsePPUnknown();
1050     return;
1051   }
1052 
1053   if (IncludeGuard == IG_IfNdefed &&
1054       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1055     IncludeGuard = IG_Defined;
1056     IncludeGuardToken = nullptr;
1057     for (auto &Line : Lines) {
1058       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1059         IncludeGuard = IG_Rejected;
1060         break;
1061       }
1062     }
1063   }
1064 
1065   // In the context of a define, even keywords should be treated as normal
1066   // identifiers. Setting the kind to identifier is not enough, because we need
1067   // to treat additional keywords like __except as well, which are already
1068   // identifiers.
1069   FormatTok->Tok.setKind(tok::identifier);
1070   FormatTok->Tok.setIdentifierInfo(nullptr);
1071   nextToken();
1072   if (FormatTok->Tok.getKind() == tok::l_paren &&
1073       !FormatTok->hasWhitespaceBefore())
1074     parseParens();
1075   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1076     Line->Level += PPBranchLevel + 1;
1077   addUnwrappedLine();
1078   ++Line->Level;
1079 
1080   // Errors during a preprocessor directive can only affect the layout of the
1081   // preprocessor directive, and thus we ignore them. An alternative approach
1082   // would be to use the same approach we use on the file level (no
1083   // re-indentation if there was a structural error) within the macro
1084   // definition.
1085   parseFile();
1086 }
1087 
1088 void UnwrappedLineParser::parsePPUnknown() {
1089   do {
1090     nextToken();
1091   } while (!eof());
1092   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1093     Line->Level += PPBranchLevel + 1;
1094   addUnwrappedLine();
1095 }
1096 
1097 // Here we exclude certain tokens that are not usually the first token in an
1098 // unwrapped line. This is used in attempt to distinguish macro calls without
1099 // trailing semicolons from other constructs split to several lines.
1100 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1101   // Semicolon can be a null-statement, l_square can be a start of a macro or
1102   // a C++11 attribute, but this doesn't seem to be common.
1103   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1104          Tok.isNot(TT_AttributeSquare) &&
1105          // Tokens that can only be used as binary operators and a part of
1106          // overloaded operator names.
1107          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1108          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1109          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1110          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1111          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1112          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1113          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1114          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1115          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1116          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1117          Tok.isNot(tok::lesslessequal) &&
1118          // Colon is used in labels, base class lists, initializer lists,
1119          // range-based for loops, ternary operator, but should never be the
1120          // first token in an unwrapped line.
1121          Tok.isNot(tok::colon) &&
1122          // 'noexcept' is a trailing annotation.
1123          Tok.isNot(tok::kw_noexcept);
1124 }
1125 
1126 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1127                           const FormatToken *FormatTok) {
1128   // FIXME: This returns true for C/C++ keywords like 'struct'.
1129   return FormatTok->is(tok::identifier) &&
1130          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1131           !FormatTok->isOneOf(
1132               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1133               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1134               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1135               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1136               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1137               Keywords.kw_instanceof, Keywords.kw_interface,
1138               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1139 }
1140 
1141 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1142                                  const FormatToken *FormatTok) {
1143   return FormatTok->Tok.isLiteral() ||
1144          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1145          mustBeJSIdent(Keywords, FormatTok);
1146 }
1147 
1148 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1149 // when encountered after a value (see mustBeJSIdentOrValue).
1150 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1151                            const FormatToken *FormatTok) {
1152   return FormatTok->isOneOf(
1153       tok::kw_return, Keywords.kw_yield,
1154       // conditionals
1155       tok::kw_if, tok::kw_else,
1156       // loops
1157       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1158       // switch/case
1159       tok::kw_switch, tok::kw_case,
1160       // exceptions
1161       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1162       // declaration
1163       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1164       Keywords.kw_async, Keywords.kw_function,
1165       // import/export
1166       Keywords.kw_import, tok::kw_export);
1167 }
1168 
1169 // Checks whether a token is a type in K&R C (aka C78).
1170 static bool isC78Type(const FormatToken &Tok) {
1171   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1172                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1173                      tok::identifier);
1174 }
1175 
1176 // This function checks whether a token starts the first parameter declaration
1177 // in a K&R C (aka C78) function definition, e.g.:
1178 //   int f(a, b)
1179 //   short a, b;
1180 //   {
1181 //      return a + b;
1182 //   }
1183 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1184                                const FormatToken *FuncName) {
1185   assert(Tok);
1186   assert(Next);
1187   assert(FuncName);
1188 
1189   if (FuncName->isNot(tok::identifier))
1190     return false;
1191 
1192   const FormatToken *Prev = FuncName->Previous;
1193   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1194     return false;
1195 
1196   if (!isC78Type(*Tok) &&
1197       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1198     return false;
1199 
1200   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1201     return false;
1202 
1203   Tok = Tok->Previous;
1204   if (!Tok || Tok->isNot(tok::r_paren))
1205     return false;
1206 
1207   Tok = Tok->Previous;
1208   if (!Tok || Tok->isNot(tok::identifier))
1209     return false;
1210 
1211   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1212 }
1213 
1214 void UnwrappedLineParser::parseModuleImport() {
1215   nextToken();
1216   while (!eof()) {
1217     if (FormatTok->is(tok::colon)) {
1218       FormatTok->setType(TT_ModulePartitionColon);
1219     }
1220     // Handle import <foo/bar.h> as we would an include statement.
1221     else if (FormatTok->is(tok::less)) {
1222       nextToken();
1223       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1224         // Mark tokens up to the trailing line comments as implicit string
1225         // literals.
1226         if (FormatTok->isNot(tok::comment) &&
1227             !FormatTok->TokenText.startswith("//"))
1228           FormatTok->setType(TT_ImplicitStringLiteral);
1229         nextToken();
1230       }
1231     }
1232     if (FormatTok->is(tok::semi)) {
1233       nextToken();
1234       break;
1235     }
1236     nextToken();
1237   }
1238 
1239   addUnwrappedLine();
1240 }
1241 
1242 // readTokenWithJavaScriptASI reads the next token and terminates the current
1243 // line if JavaScript Automatic Semicolon Insertion must
1244 // happen between the current token and the next token.
1245 //
1246 // This method is conservative - it cannot cover all edge cases of JavaScript,
1247 // but only aims to correctly handle certain well known cases. It *must not*
1248 // return true in speculative cases.
1249 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1250   FormatToken *Previous = FormatTok;
1251   readToken();
1252   FormatToken *Next = FormatTok;
1253 
1254   bool IsOnSameLine =
1255       CommentsBeforeNextToken.empty()
1256           ? Next->NewlinesBefore == 0
1257           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1258   if (IsOnSameLine)
1259     return;
1260 
1261   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1262   bool PreviousStartsTemplateExpr =
1263       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1264   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1265     // If the line contains an '@' sign, the previous token might be an
1266     // annotation, which can precede another identifier/value.
1267     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1268       return LineNode.Tok->is(tok::at);
1269     });
1270     if (HasAt)
1271       return;
1272   }
1273   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1274     return addUnwrappedLine();
1275   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1276   bool NextEndsTemplateExpr =
1277       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1278   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1279       (PreviousMustBeValue ||
1280        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1281                          tok::minusminus)))
1282     return addUnwrappedLine();
1283   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1284       isJSDeclOrStmt(Keywords, Next))
1285     return addUnwrappedLine();
1286 }
1287 
1288 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1289                                                  bool IsTopLevel,
1290                                                  TokenType NextLBracesType) {
1291   if (Style.Language == FormatStyle::LK_TableGen &&
1292       FormatTok->is(tok::pp_include)) {
1293     nextToken();
1294     if (FormatTok->is(tok::string_literal))
1295       nextToken();
1296     addUnwrappedLine();
1297     return;
1298   }
1299   switch (FormatTok->Tok.getKind()) {
1300   case tok::kw_asm:
1301     nextToken();
1302     if (FormatTok->is(tok::l_brace)) {
1303       FormatTok->setType(TT_InlineASMBrace);
1304       nextToken();
1305       while (FormatTok && FormatTok->isNot(tok::eof)) {
1306         if (FormatTok->is(tok::r_brace)) {
1307           FormatTok->setType(TT_InlineASMBrace);
1308           nextToken();
1309           addUnwrappedLine();
1310           break;
1311         }
1312         FormatTok->Finalized = true;
1313         nextToken();
1314       }
1315     }
1316     break;
1317   case tok::kw_namespace:
1318     parseNamespace();
1319     return;
1320   case tok::kw_public:
1321   case tok::kw_protected:
1322   case tok::kw_private:
1323     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1324         Style.isCSharp())
1325       nextToken();
1326     else
1327       parseAccessSpecifier();
1328     return;
1329   case tok::kw_if:
1330     if (Style.isJavaScript() && Line->MustBeDeclaration)
1331       // field/method declaration.
1332       break;
1333     parseIfThenElse(IfKind);
1334     return;
1335   case tok::kw_for:
1336   case tok::kw_while:
1337     if (Style.isJavaScript() && Line->MustBeDeclaration)
1338       // field/method declaration.
1339       break;
1340     parseForOrWhileLoop();
1341     return;
1342   case tok::kw_do:
1343     if (Style.isJavaScript() && Line->MustBeDeclaration)
1344       // field/method declaration.
1345       break;
1346     parseDoWhile();
1347     return;
1348   case tok::kw_switch:
1349     if (Style.isJavaScript() && Line->MustBeDeclaration)
1350       // 'switch: string' field declaration.
1351       break;
1352     parseSwitch();
1353     return;
1354   case tok::kw_default:
1355     if (Style.isJavaScript() && Line->MustBeDeclaration)
1356       // 'default: string' field declaration.
1357       break;
1358     nextToken();
1359     if (FormatTok->is(tok::colon)) {
1360       parseLabel();
1361       return;
1362     }
1363     // e.g. "default void f() {}" in a Java interface.
1364     break;
1365   case tok::kw_case:
1366     if (Style.isJavaScript() && Line->MustBeDeclaration)
1367       // 'case: string' field declaration.
1368       break;
1369     parseCaseLabel();
1370     return;
1371   case tok::kw_try:
1372   case tok::kw___try:
1373     if (Style.isJavaScript() && Line->MustBeDeclaration)
1374       // field/method declaration.
1375       break;
1376     parseTryCatch();
1377     return;
1378   case tok::kw_extern:
1379     nextToken();
1380     if (FormatTok->Tok.is(tok::string_literal)) {
1381       nextToken();
1382       if (FormatTok->Tok.is(tok::l_brace)) {
1383         if (Style.BraceWrapping.AfterExternBlock)
1384           addUnwrappedLine();
1385         // Either we indent or for backwards compatibility we follow the
1386         // AfterExternBlock style.
1387         unsigned AddLevels =
1388             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1389                     (Style.BraceWrapping.AfterExternBlock &&
1390                      Style.IndentExternBlock ==
1391                          FormatStyle::IEBS_AfterExternBlock)
1392                 ? 1u
1393                 : 0u;
1394         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1395         addUnwrappedLine();
1396         return;
1397       }
1398     }
1399     break;
1400   case tok::kw_export:
1401     if (Style.isJavaScript()) {
1402       parseJavaScriptEs6ImportExport();
1403       return;
1404     }
1405     if (!Style.isCpp())
1406       break;
1407     // Handle C++ "(inline|export) namespace".
1408     LLVM_FALLTHROUGH;
1409   case tok::kw_inline:
1410     nextToken();
1411     if (FormatTok->Tok.is(tok::kw_namespace)) {
1412       parseNamespace();
1413       return;
1414     }
1415     break;
1416   case tok::identifier:
1417     if (FormatTok->is(TT_ForEachMacro)) {
1418       parseForOrWhileLoop();
1419       return;
1420     }
1421     if (FormatTok->is(TT_MacroBlockBegin)) {
1422       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1423                  /*MunchSemi=*/false);
1424       return;
1425     }
1426     if (FormatTok->is(Keywords.kw_import)) {
1427       if (Style.isJavaScript()) {
1428         parseJavaScriptEs6ImportExport();
1429         return;
1430       }
1431       if (Style.Language == FormatStyle::LK_Proto) {
1432         nextToken();
1433         if (FormatTok->is(tok::kw_public))
1434           nextToken();
1435         if (!FormatTok->is(tok::string_literal))
1436           return;
1437         nextToken();
1438         if (FormatTok->is(tok::semi))
1439           nextToken();
1440         addUnwrappedLine();
1441         return;
1442       }
1443       if (Style.isCpp()) {
1444         parseModuleImport();
1445         return;
1446       }
1447     }
1448     if (Style.isCpp() &&
1449         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1450                            Keywords.kw_slots, Keywords.kw_qslots)) {
1451       nextToken();
1452       if (FormatTok->is(tok::colon)) {
1453         nextToken();
1454         addUnwrappedLine();
1455         return;
1456       }
1457     }
1458     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1459       parseStatementMacro();
1460       return;
1461     }
1462     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1463       parseNamespace();
1464       return;
1465     }
1466     // In all other cases, parse the declaration.
1467     break;
1468   default:
1469     break;
1470   }
1471   do {
1472     const FormatToken *Previous = FormatTok->Previous;
1473     switch (FormatTok->Tok.getKind()) {
1474     case tok::at:
1475       nextToken();
1476       if (FormatTok->Tok.is(tok::l_brace)) {
1477         nextToken();
1478         parseBracedList();
1479         break;
1480       } else if (Style.Language == FormatStyle::LK_Java &&
1481                  FormatTok->is(Keywords.kw_interface)) {
1482         nextToken();
1483         break;
1484       }
1485       switch (FormatTok->Tok.getObjCKeywordID()) {
1486       case tok::objc_public:
1487       case tok::objc_protected:
1488       case tok::objc_package:
1489       case tok::objc_private:
1490         return parseAccessSpecifier();
1491       case tok::objc_interface:
1492       case tok::objc_implementation:
1493         return parseObjCInterfaceOrImplementation();
1494       case tok::objc_protocol:
1495         if (parseObjCProtocol())
1496           return;
1497         break;
1498       case tok::objc_end:
1499         return; // Handled by the caller.
1500       case tok::objc_optional:
1501       case tok::objc_required:
1502         nextToken();
1503         addUnwrappedLine();
1504         return;
1505       case tok::objc_autoreleasepool:
1506         nextToken();
1507         if (FormatTok->Tok.is(tok::l_brace)) {
1508           if (Style.BraceWrapping.AfterControlStatement ==
1509               FormatStyle::BWACS_Always)
1510             addUnwrappedLine();
1511           parseBlock();
1512         }
1513         addUnwrappedLine();
1514         return;
1515       case tok::objc_synchronized:
1516         nextToken();
1517         if (FormatTok->Tok.is(tok::l_paren))
1518           // Skip synchronization object
1519           parseParens();
1520         if (FormatTok->Tok.is(tok::l_brace)) {
1521           if (Style.BraceWrapping.AfterControlStatement ==
1522               FormatStyle::BWACS_Always)
1523             addUnwrappedLine();
1524           parseBlock();
1525         }
1526         addUnwrappedLine();
1527         return;
1528       case tok::objc_try:
1529         // This branch isn't strictly necessary (the kw_try case below would
1530         // do this too after the tok::at is parsed above).  But be explicit.
1531         parseTryCatch();
1532         return;
1533       default:
1534         break;
1535       }
1536       break;
1537     case tok::kw_concept:
1538       parseConcept();
1539       return;
1540     case tok::kw_requires:
1541       parseRequiresClause();
1542       return;
1543     case tok::kw_enum:
1544       // Ignore if this is part of "template <enum ...".
1545       if (Previous && Previous->is(tok::less)) {
1546         nextToken();
1547         break;
1548       }
1549 
1550       // parseEnum falls through and does not yet add an unwrapped line as an
1551       // enum definition can start a structural element.
1552       if (!parseEnum())
1553         break;
1554       // This only applies for C++.
1555       if (!Style.isCpp()) {
1556         addUnwrappedLine();
1557         return;
1558       }
1559       break;
1560     case tok::kw_typedef:
1561       nextToken();
1562       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1563                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1564                              Keywords.kw_CF_CLOSED_ENUM,
1565                              Keywords.kw_NS_CLOSED_ENUM))
1566         parseEnum();
1567       break;
1568     case tok::kw_struct:
1569     case tok::kw_union:
1570     case tok::kw_class:
1571       if (parseStructLike())
1572         return;
1573       break;
1574     case tok::period:
1575       nextToken();
1576       // In Java, classes have an implicit static member "class".
1577       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1578           FormatTok->is(tok::kw_class))
1579         nextToken();
1580       if (Style.isJavaScript() && FormatTok &&
1581           FormatTok->Tok.getIdentifierInfo())
1582         // JavaScript only has pseudo keywords, all keywords are allowed to
1583         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1584         nextToken();
1585       break;
1586     case tok::semi:
1587       nextToken();
1588       addUnwrappedLine();
1589       return;
1590     case tok::r_brace:
1591       addUnwrappedLine();
1592       return;
1593     case tok::l_paren: {
1594       parseParens();
1595       // Break the unwrapped line if a K&R C function definition has a parameter
1596       // declaration.
1597       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1598         break;
1599       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1600         addUnwrappedLine();
1601         return;
1602       }
1603       break;
1604     }
1605     case tok::kw_operator:
1606       nextToken();
1607       if (FormatTok->isBinaryOperator())
1608         nextToken();
1609       break;
1610     case tok::caret:
1611       nextToken();
1612       if (FormatTok->Tok.isAnyIdentifier() ||
1613           FormatTok->isSimpleTypeSpecifier())
1614         nextToken();
1615       if (FormatTok->is(tok::l_paren))
1616         parseParens();
1617       if (FormatTok->is(tok::l_brace))
1618         parseChildBlock();
1619       break;
1620     case tok::l_brace:
1621       if (NextLBracesType != TT_Unknown)
1622         FormatTok->setType(NextLBracesType);
1623       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1624         // A block outside of parentheses must be the last part of a
1625         // structural element.
1626         // FIXME: Figure out cases where this is not true, and add projections
1627         // for them (the one we know is missing are lambdas).
1628         if (Style.Language == FormatStyle::LK_Java &&
1629             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1630           // If necessary, we could set the type to something different than
1631           // TT_FunctionLBrace.
1632           if (Style.BraceWrapping.AfterControlStatement ==
1633               FormatStyle::BWACS_Always)
1634             addUnwrappedLine();
1635         } else if (Style.BraceWrapping.AfterFunction) {
1636           addUnwrappedLine();
1637         }
1638         if (!Line->InPPDirective)
1639           FormatTok->setType(TT_FunctionLBrace);
1640         parseBlock();
1641         addUnwrappedLine();
1642         return;
1643       }
1644       // Otherwise this was a braced init list, and the structural
1645       // element continues.
1646       break;
1647     case tok::kw_try:
1648       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1649         // field/method declaration.
1650         nextToken();
1651         break;
1652       }
1653       // We arrive here when parsing function-try blocks.
1654       if (Style.BraceWrapping.AfterFunction)
1655         addUnwrappedLine();
1656       parseTryCatch();
1657       return;
1658     case tok::identifier: {
1659       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1660           Line->MustBeDeclaration) {
1661         addUnwrappedLine();
1662         parseCSharpGenericTypeConstraint();
1663         break;
1664       }
1665       if (FormatTok->is(TT_MacroBlockEnd)) {
1666         addUnwrappedLine();
1667         return;
1668       }
1669 
1670       // Function declarations (as opposed to function expressions) are parsed
1671       // on their own unwrapped line by continuing this loop. Function
1672       // expressions (functions that are not on their own line) must not create
1673       // a new unwrapped line, so they are special cased below.
1674       size_t TokenCount = Line->Tokens.size();
1675       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1676           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1677                                                      Keywords.kw_async)))) {
1678         tryToParseJSFunction();
1679         break;
1680       }
1681       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1682           FormatTok->is(Keywords.kw_interface)) {
1683         if (Style.isJavaScript()) {
1684           // In JavaScript/TypeScript, "interface" can be used as a standalone
1685           // identifier, e.g. in `var interface = 1;`. If "interface" is
1686           // followed by another identifier, it is very like to be an actual
1687           // interface declaration.
1688           unsigned StoredPosition = Tokens->getPosition();
1689           FormatToken *Next = Tokens->getNextToken();
1690           FormatTok = Tokens->setPosition(StoredPosition);
1691           if (!mustBeJSIdent(Keywords, Next)) {
1692             nextToken();
1693             break;
1694           }
1695         }
1696         parseRecord();
1697         addUnwrappedLine();
1698         return;
1699       }
1700 
1701       if (FormatTok->is(Keywords.kw_interface)) {
1702         if (parseStructLike())
1703           return;
1704         break;
1705       }
1706 
1707       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1708         parseStatementMacro();
1709         return;
1710       }
1711 
1712       // See if the following token should start a new unwrapped line.
1713       StringRef Text = FormatTok->TokenText;
1714 
1715       FormatToken *PreviousToken = FormatTok;
1716       nextToken();
1717 
1718       // JS doesn't have macros, and within classes colons indicate fields, not
1719       // labels.
1720       if (Style.isJavaScript())
1721         break;
1722 
1723       TokenCount = Line->Tokens.size();
1724       if (TokenCount == 1 ||
1725           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1726         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1727           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1728           parseLabel(!Style.IndentGotoLabels);
1729           return;
1730         }
1731         // Recognize function-like macro usages without trailing semicolon as
1732         // well as free-standing macros like Q_OBJECT.
1733         bool FunctionLike = FormatTok->is(tok::l_paren);
1734         if (FunctionLike)
1735           parseParens();
1736 
1737         bool FollowedByNewline =
1738             CommentsBeforeNextToken.empty()
1739                 ? FormatTok->NewlinesBefore > 0
1740                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1741 
1742         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1743             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1744           PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro);
1745           addUnwrappedLine();
1746           return;
1747         }
1748       }
1749       break;
1750     }
1751     case tok::equal:
1752       if ((Style.isJavaScript() || Style.isCSharp()) &&
1753           FormatTok->is(TT_FatArrow)) {
1754         tryToParseChildBlock();
1755         break;
1756       }
1757 
1758       nextToken();
1759       if (FormatTok->Tok.is(tok::l_brace)) {
1760         // Block kind should probably be set to BK_BracedInit for any language.
1761         // C# needs this change to ensure that array initialisers and object
1762         // initialisers are indented the same way.
1763         if (Style.isCSharp())
1764           FormatTok->setBlockKind(BK_BracedInit);
1765         nextToken();
1766         parseBracedList();
1767       } else if (Style.Language == FormatStyle::LK_Proto &&
1768                  FormatTok->Tok.is(tok::less)) {
1769         nextToken();
1770         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1771                         /*ClosingBraceKind=*/tok::greater);
1772       }
1773       break;
1774     case tok::l_square:
1775       parseSquare();
1776       break;
1777     case tok::kw_new:
1778       parseNew();
1779       break;
1780     default:
1781       nextToken();
1782       break;
1783     }
1784   } while (!eof());
1785 }
1786 
1787 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1788   assert(FormatTok->is(tok::l_brace));
1789   if (!Style.isCSharp())
1790     return false;
1791   // See if it's a property accessor.
1792   if (FormatTok->Previous->isNot(tok::identifier))
1793     return false;
1794 
1795   // See if we are inside a property accessor.
1796   //
1797   // Record the current tokenPosition so that we can advance and
1798   // reset the current token. `Next` is not set yet so we need
1799   // another way to advance along the token stream.
1800   unsigned int StoredPosition = Tokens->getPosition();
1801   FormatToken *Tok = Tokens->getNextToken();
1802 
1803   // A trivial property accessor is of the form:
1804   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1805   // Track these as they do not require line breaks to be introduced.
1806   bool HasGetOrSet = false;
1807   bool IsTrivialPropertyAccessor = true;
1808   while (!eof()) {
1809     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1810                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1811                      Keywords.kw_set)) {
1812       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1813         HasGetOrSet = true;
1814       Tok = Tokens->getNextToken();
1815       continue;
1816     }
1817     if (Tok->isNot(tok::r_brace))
1818       IsTrivialPropertyAccessor = false;
1819     break;
1820   }
1821 
1822   if (!HasGetOrSet) {
1823     Tokens->setPosition(StoredPosition);
1824     return false;
1825   }
1826 
1827   // Try to parse the property accessor:
1828   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1829   Tokens->setPosition(StoredPosition);
1830   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1831     addUnwrappedLine();
1832   nextToken();
1833   do {
1834     switch (FormatTok->Tok.getKind()) {
1835     case tok::r_brace:
1836       nextToken();
1837       if (FormatTok->is(tok::equal)) {
1838         while (!eof() && FormatTok->isNot(tok::semi))
1839           nextToken();
1840         nextToken();
1841       }
1842       addUnwrappedLine();
1843       return true;
1844     case tok::l_brace:
1845       ++Line->Level;
1846       parseBlock(/*MustBeDeclaration=*/true);
1847       addUnwrappedLine();
1848       --Line->Level;
1849       break;
1850     case tok::equal:
1851       if (FormatTok->is(TT_FatArrow)) {
1852         ++Line->Level;
1853         do {
1854           nextToken();
1855         } while (!eof() && FormatTok->isNot(tok::semi));
1856         nextToken();
1857         addUnwrappedLine();
1858         --Line->Level;
1859         break;
1860       }
1861       nextToken();
1862       break;
1863     default:
1864       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1865           !IsTrivialPropertyAccessor) {
1866         // Non-trivial get/set needs to be on its own line.
1867         addUnwrappedLine();
1868       }
1869       nextToken();
1870     }
1871   } while (!eof());
1872 
1873   // Unreachable for well-formed code (paired '{' and '}').
1874   return true;
1875 }
1876 
1877 bool UnwrappedLineParser::tryToParseLambda() {
1878   if (!Style.isCpp()) {
1879     nextToken();
1880     return false;
1881   }
1882   assert(FormatTok->is(tok::l_square));
1883   FormatToken &LSquare = *FormatTok;
1884   if (!tryToParseLambdaIntroducer())
1885     return false;
1886 
1887   bool SeenArrow = false;
1888   bool InTemplateParameterList = false;
1889 
1890   while (FormatTok->isNot(tok::l_brace)) {
1891     if (FormatTok->isSimpleTypeSpecifier()) {
1892       nextToken();
1893       continue;
1894     }
1895     switch (FormatTok->Tok.getKind()) {
1896     case tok::l_brace:
1897       break;
1898     case tok::l_paren:
1899       parseParens();
1900       break;
1901     case tok::l_square:
1902       parseSquare();
1903       break;
1904     case tok::kw_class:
1905     case tok::kw_template:
1906     case tok::kw_typename:
1907       assert(FormatTok->Previous);
1908       if (FormatTok->Previous->is(tok::less))
1909         InTemplateParameterList = true;
1910       nextToken();
1911       break;
1912     case tok::amp:
1913     case tok::star:
1914     case tok::kw_const:
1915     case tok::comma:
1916     case tok::less:
1917     case tok::greater:
1918     case tok::identifier:
1919     case tok::numeric_constant:
1920     case tok::coloncolon:
1921     case tok::kw_mutable:
1922     case tok::kw_noexcept:
1923       nextToken();
1924       break;
1925     // Specialization of a template with an integer parameter can contain
1926     // arithmetic, logical, comparison and ternary operators.
1927     //
1928     // FIXME: This also accepts sequences of operators that are not in the scope
1929     // of a template argument list.
1930     //
1931     // In a C++ lambda a template type can only occur after an arrow. We use
1932     // this as an heuristic to distinguish between Objective-C expressions
1933     // followed by an `a->b` expression, such as:
1934     // ([obj func:arg] + a->b)
1935     // Otherwise the code below would parse as a lambda.
1936     //
1937     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1938     // explicit template lists: []<bool b = true && false>(U &&u){}
1939     case tok::plus:
1940     case tok::minus:
1941     case tok::exclaim:
1942     case tok::tilde:
1943     case tok::slash:
1944     case tok::percent:
1945     case tok::lessless:
1946     case tok::pipe:
1947     case tok::pipepipe:
1948     case tok::ampamp:
1949     case tok::caret:
1950     case tok::equalequal:
1951     case tok::exclaimequal:
1952     case tok::greaterequal:
1953     case tok::lessequal:
1954     case tok::question:
1955     case tok::colon:
1956     case tok::ellipsis:
1957     case tok::kw_true:
1958     case tok::kw_false:
1959       if (SeenArrow || InTemplateParameterList) {
1960         nextToken();
1961         break;
1962       }
1963       return true;
1964     case tok::arrow:
1965       // This might or might not actually be a lambda arrow (this could be an
1966       // ObjC method invocation followed by a dereferencing arrow). We might
1967       // reset this back to TT_Unknown in TokenAnnotator.
1968       FormatTok->setType(TT_LambdaArrow);
1969       SeenArrow = true;
1970       nextToken();
1971       break;
1972     default:
1973       return true;
1974     }
1975   }
1976   FormatTok->setType(TT_LambdaLBrace);
1977   LSquare.setType(TT_LambdaLSquare);
1978   parseChildBlock();
1979   return true;
1980 }
1981 
1982 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1983   const FormatToken *Previous = FormatTok->Previous;
1984   if (Previous &&
1985       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1986                          tok::kw_delete, tok::l_square) ||
1987        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1988        Previous->isSimpleTypeSpecifier())) {
1989     nextToken();
1990     return false;
1991   }
1992   nextToken();
1993   if (FormatTok->is(tok::l_square))
1994     return false;
1995   parseSquare(/*LambdaIntroducer=*/true);
1996   return true;
1997 }
1998 
1999 void UnwrappedLineParser::tryToParseJSFunction() {
2000   assert(FormatTok->is(Keywords.kw_function) ||
2001          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2002   if (FormatTok->is(Keywords.kw_async))
2003     nextToken();
2004   // Consume "function".
2005   nextToken();
2006 
2007   // Consume * (generator function). Treat it like C++'s overloaded operators.
2008   if (FormatTok->is(tok::star)) {
2009     FormatTok->setType(TT_OverloadedOperator);
2010     nextToken();
2011   }
2012 
2013   // Consume function name.
2014   if (FormatTok->is(tok::identifier))
2015     nextToken();
2016 
2017   if (FormatTok->isNot(tok::l_paren))
2018     return;
2019 
2020   // Parse formal parameter list.
2021   parseParens();
2022 
2023   if (FormatTok->is(tok::colon)) {
2024     // Parse a type definition.
2025     nextToken();
2026 
2027     // Eat the type declaration. For braced inline object types, balance braces,
2028     // otherwise just parse until finding an l_brace for the function body.
2029     if (FormatTok->is(tok::l_brace))
2030       tryToParseBracedList();
2031     else
2032       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2033         nextToken();
2034   }
2035 
2036   if (FormatTok->is(tok::semi))
2037     return;
2038 
2039   parseChildBlock();
2040 }
2041 
2042 bool UnwrappedLineParser::tryToParseBracedList() {
2043   if (FormatTok->is(BK_Unknown))
2044     calculateBraceTypes();
2045   assert(FormatTok->isNot(BK_Unknown));
2046   if (FormatTok->is(BK_Block))
2047     return false;
2048   nextToken();
2049   parseBracedList();
2050   return true;
2051 }
2052 
2053 bool UnwrappedLineParser::tryToParseChildBlock() {
2054   assert(Style.isJavaScript() || Style.isCSharp());
2055   assert(FormatTok->is(TT_FatArrow));
2056   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2057   // They always start an expression or a child block if followed by a curly
2058   // brace.
2059   nextToken();
2060   if (FormatTok->isNot(tok::l_brace))
2061     return false;
2062   parseChildBlock();
2063   return true;
2064 }
2065 
2066 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2067                                           bool IsEnum,
2068                                           tok::TokenKind ClosingBraceKind) {
2069   bool HasError = false;
2070 
2071   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2072   // replace this by using parseAssignmentExpression() inside.
2073   do {
2074     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2075         tryToParseChildBlock())
2076       continue;
2077     if (Style.isJavaScript()) {
2078       if (FormatTok->is(Keywords.kw_function) ||
2079           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2080         tryToParseJSFunction();
2081         continue;
2082       }
2083       if (FormatTok->is(tok::l_brace)) {
2084         // Could be a method inside of a braced list `{a() { return 1; }}`.
2085         if (tryToParseBracedList())
2086           continue;
2087         parseChildBlock();
2088       }
2089     }
2090     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2091       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2092         addUnwrappedLine();
2093       nextToken();
2094       return !HasError;
2095     }
2096     switch (FormatTok->Tok.getKind()) {
2097     case tok::l_square:
2098       if (Style.isCSharp())
2099         parseSquare();
2100       else
2101         tryToParseLambda();
2102       break;
2103     case tok::l_paren:
2104       parseParens();
2105       // JavaScript can just have free standing methods and getters/setters in
2106       // object literals. Detect them by a "{" following ")".
2107       if (Style.isJavaScript()) {
2108         if (FormatTok->is(tok::l_brace))
2109           parseChildBlock();
2110         break;
2111       }
2112       break;
2113     case tok::l_brace:
2114       // Assume there are no blocks inside a braced init list apart
2115       // from the ones we explicitly parse out (like lambdas).
2116       FormatTok->setBlockKind(BK_BracedInit);
2117       nextToken();
2118       parseBracedList();
2119       break;
2120     case tok::less:
2121       if (Style.Language == FormatStyle::LK_Proto) {
2122         nextToken();
2123         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2124                         /*ClosingBraceKind=*/tok::greater);
2125       } else {
2126         nextToken();
2127       }
2128       break;
2129     case tok::semi:
2130       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2131       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2132       // used for error recovery if we have otherwise determined that this is
2133       // a braced list.
2134       if (Style.isJavaScript()) {
2135         nextToken();
2136         break;
2137       }
2138       HasError = true;
2139       if (!ContinueOnSemicolons)
2140         return !HasError;
2141       nextToken();
2142       break;
2143     case tok::comma:
2144       nextToken();
2145       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2146         addUnwrappedLine();
2147       break;
2148     default:
2149       nextToken();
2150       break;
2151     }
2152   } while (!eof());
2153   return false;
2154 }
2155 
2156 /// \brief Parses a pair of parentheses (and everything between them).
2157 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2158 /// double ampersands. This only counts for the current parens scope.
2159 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2160   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2161   nextToken();
2162   do {
2163     switch (FormatTok->Tok.getKind()) {
2164     case tok::l_paren:
2165       parseParens();
2166       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2167         parseChildBlock();
2168       break;
2169     case tok::r_paren:
2170       nextToken();
2171       return;
2172     case tok::r_brace:
2173       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2174       return;
2175     case tok::l_square:
2176       tryToParseLambda();
2177       break;
2178     case tok::l_brace:
2179       if (!tryToParseBracedList())
2180         parseChildBlock();
2181       break;
2182     case tok::at:
2183       nextToken();
2184       if (FormatTok->Tok.is(tok::l_brace)) {
2185         nextToken();
2186         parseBracedList();
2187       }
2188       break;
2189     case tok::equal:
2190       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2191         tryToParseChildBlock();
2192       else
2193         nextToken();
2194       break;
2195     case tok::kw_class:
2196       if (Style.isJavaScript())
2197         parseRecord(/*ParseAsExpr=*/true);
2198       else
2199         nextToken();
2200       break;
2201     case tok::identifier:
2202       if (Style.isJavaScript() &&
2203           (FormatTok->is(Keywords.kw_function) ||
2204            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2205         tryToParseJSFunction();
2206       else
2207         nextToken();
2208       break;
2209     case tok::kw_requires:
2210       parseRequiresExpression();
2211       break;
2212     case tok::ampamp:
2213       if (AmpAmpTokenType != TT_Unknown)
2214         FormatTok->setType(AmpAmpTokenType);
2215       LLVM_FALLTHROUGH;
2216     default:
2217       nextToken();
2218       break;
2219     }
2220   } while (!eof());
2221 }
2222 
2223 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2224   if (!LambdaIntroducer) {
2225     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2226     if (tryToParseLambda())
2227       return;
2228   }
2229   do {
2230     switch (FormatTok->Tok.getKind()) {
2231     case tok::l_paren:
2232       parseParens();
2233       break;
2234     case tok::r_square:
2235       nextToken();
2236       return;
2237     case tok::r_brace:
2238       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2239       return;
2240     case tok::l_square:
2241       parseSquare();
2242       break;
2243     case tok::l_brace: {
2244       if (!tryToParseBracedList())
2245         parseChildBlock();
2246       break;
2247     }
2248     case tok::at:
2249       nextToken();
2250       if (FormatTok->Tok.is(tok::l_brace)) {
2251         nextToken();
2252         parseBracedList();
2253       }
2254       break;
2255     default:
2256       nextToken();
2257       break;
2258     }
2259   } while (!eof());
2260 }
2261 
2262 void UnwrappedLineParser::keepAncestorBraces() {
2263   if (!Style.RemoveBracesLLVM)
2264     return;
2265 
2266   const int MaxNestingLevels = 2;
2267   const int Size = NestedTooDeep.size();
2268   if (Size >= MaxNestingLevels)
2269     NestedTooDeep[Size - MaxNestingLevels] = true;
2270   NestedTooDeep.push_back(false);
2271 }
2272 
2273 static void markOptionalBraces(FormatToken *LeftBrace) {
2274   if (!LeftBrace)
2275     return;
2276 
2277   assert(LeftBrace->is(tok::l_brace));
2278 
2279   FormatToken *RightBrace = LeftBrace->MatchingParen;
2280   if (!RightBrace) {
2281     assert(!LeftBrace->Optional);
2282     return;
2283   }
2284 
2285   assert(RightBrace->is(tok::r_brace));
2286   assert(RightBrace->MatchingParen == LeftBrace);
2287   assert(LeftBrace->Optional == RightBrace->Optional);
2288 
2289   LeftBrace->Optional = true;
2290   RightBrace->Optional = true;
2291 }
2292 
2293 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2294                                                   bool KeepBraces) {
2295   auto HandleAttributes = [this]() {
2296     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2297     if (FormatTok->is(TT_AttributeMacro))
2298       nextToken();
2299     // Handle [[likely]] / [[unlikely]] attributes.
2300     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2301       parseSquare();
2302   };
2303 
2304   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2305   nextToken();
2306   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2307     nextToken();
2308   if (FormatTok->Tok.is(tok::l_paren))
2309     parseParens();
2310   HandleAttributes();
2311 
2312   bool NeedsUnwrappedLine = false;
2313   keepAncestorBraces();
2314 
2315   FormatToken *IfLeftBrace = nullptr;
2316   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2317 
2318   if (FormatTok->Tok.is(tok::l_brace)) {
2319     IfLeftBrace = FormatTok;
2320     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2321     IfBlockKind = parseBlock();
2322     if (Style.BraceWrapping.BeforeElse)
2323       addUnwrappedLine();
2324     else
2325       NeedsUnwrappedLine = true;
2326   } else {
2327     addUnwrappedLine();
2328     ++Line->Level;
2329     parseStructuralElement();
2330     --Line->Level;
2331   }
2332 
2333   bool KeepIfBraces = false;
2334   if (Style.RemoveBracesLLVM) {
2335     assert(!NestedTooDeep.empty());
2336     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2337                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2338                    IfBlockKind == IfStmtKind::IfElseIf;
2339   }
2340 
2341   FormatToken *ElseLeftBrace = nullptr;
2342   IfStmtKind Kind = IfStmtKind::IfOnly;
2343 
2344   if (FormatTok->Tok.is(tok::kw_else)) {
2345     if (Style.RemoveBracesLLVM) {
2346       NestedTooDeep.back() = false;
2347       Kind = IfStmtKind::IfElse;
2348     }
2349     nextToken();
2350     HandleAttributes();
2351     if (FormatTok->Tok.is(tok::l_brace)) {
2352       ElseLeftBrace = FormatTok;
2353       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2354       if (parseBlock() == IfStmtKind::IfOnly)
2355         Kind = IfStmtKind::IfElseIf;
2356       addUnwrappedLine();
2357     } else if (FormatTok->Tok.is(tok::kw_if)) {
2358       FormatToken *Previous = Tokens->getPreviousToken();
2359       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2360       if (IsPrecededByComment) {
2361         addUnwrappedLine();
2362         ++Line->Level;
2363       }
2364       bool TooDeep = true;
2365       if (Style.RemoveBracesLLVM) {
2366         Kind = IfStmtKind::IfElseIf;
2367         TooDeep = NestedTooDeep.pop_back_val();
2368       }
2369       ElseLeftBrace =
2370           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2371       if (Style.RemoveBracesLLVM)
2372         NestedTooDeep.push_back(TooDeep);
2373       if (IsPrecededByComment)
2374         --Line->Level;
2375     } else {
2376       addUnwrappedLine();
2377       ++Line->Level;
2378       parseStructuralElement();
2379       if (FormatTok->is(tok::eof))
2380         addUnwrappedLine();
2381       --Line->Level;
2382     }
2383   } else {
2384     if (Style.RemoveBracesLLVM)
2385       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2386     if (NeedsUnwrappedLine)
2387       addUnwrappedLine();
2388   }
2389 
2390   if (!Style.RemoveBracesLLVM)
2391     return nullptr;
2392 
2393   assert(!NestedTooDeep.empty());
2394   const bool KeepElseBraces =
2395       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2396 
2397   NestedTooDeep.pop_back();
2398 
2399   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2400     markOptionalBraces(IfLeftBrace);
2401     markOptionalBraces(ElseLeftBrace);
2402   } else if (IfLeftBrace) {
2403     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2404     if (IfRightBrace) {
2405       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2406       assert(!IfLeftBrace->Optional);
2407       assert(!IfRightBrace->Optional);
2408       IfLeftBrace->MatchingParen = nullptr;
2409       IfRightBrace->MatchingParen = nullptr;
2410     }
2411   }
2412 
2413   if (IfKind)
2414     *IfKind = Kind;
2415 
2416   return IfLeftBrace;
2417 }
2418 
2419 void UnwrappedLineParser::parseTryCatch() {
2420   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2421   nextToken();
2422   bool NeedsUnwrappedLine = false;
2423   if (FormatTok->is(tok::colon)) {
2424     // We are in a function try block, what comes is an initializer list.
2425     nextToken();
2426 
2427     // In case identifiers were removed by clang-tidy, what might follow is
2428     // multiple commas in sequence - before the first identifier.
2429     while (FormatTok->is(tok::comma))
2430       nextToken();
2431 
2432     while (FormatTok->is(tok::identifier)) {
2433       nextToken();
2434       if (FormatTok->is(tok::l_paren))
2435         parseParens();
2436       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2437           FormatTok->is(tok::l_brace)) {
2438         do {
2439           nextToken();
2440         } while (!FormatTok->is(tok::r_brace));
2441         nextToken();
2442       }
2443 
2444       // In case identifiers were removed by clang-tidy, what might follow is
2445       // multiple commas in sequence - after the first identifier.
2446       while (FormatTok->is(tok::comma))
2447         nextToken();
2448     }
2449   }
2450   // Parse try with resource.
2451   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2452     parseParens();
2453 
2454   keepAncestorBraces();
2455 
2456   if (FormatTok->is(tok::l_brace)) {
2457     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2458     parseBlock();
2459     if (Style.BraceWrapping.BeforeCatch)
2460       addUnwrappedLine();
2461     else
2462       NeedsUnwrappedLine = true;
2463   } else if (!FormatTok->is(tok::kw_catch)) {
2464     // The C++ standard requires a compound-statement after a try.
2465     // If there's none, we try to assume there's a structuralElement
2466     // and try to continue.
2467     addUnwrappedLine();
2468     ++Line->Level;
2469     parseStructuralElement();
2470     --Line->Level;
2471   }
2472   while (true) {
2473     if (FormatTok->is(tok::at))
2474       nextToken();
2475     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2476                              tok::kw___finally) ||
2477           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2478            FormatTok->is(Keywords.kw_finally)) ||
2479           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2480            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2481       break;
2482     nextToken();
2483     while (FormatTok->isNot(tok::l_brace)) {
2484       if (FormatTok->is(tok::l_paren)) {
2485         parseParens();
2486         continue;
2487       }
2488       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2489         if (Style.RemoveBracesLLVM)
2490           NestedTooDeep.pop_back();
2491         return;
2492       }
2493       nextToken();
2494     }
2495     NeedsUnwrappedLine = false;
2496     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2497     parseBlock();
2498     if (Style.BraceWrapping.BeforeCatch)
2499       addUnwrappedLine();
2500     else
2501       NeedsUnwrappedLine = true;
2502   }
2503 
2504   if (Style.RemoveBracesLLVM)
2505     NestedTooDeep.pop_back();
2506 
2507   if (NeedsUnwrappedLine)
2508     addUnwrappedLine();
2509 }
2510 
2511 void UnwrappedLineParser::parseNamespace() {
2512   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2513          "'namespace' expected");
2514 
2515   const FormatToken &InitialToken = *FormatTok;
2516   nextToken();
2517   if (InitialToken.is(TT_NamespaceMacro)) {
2518     parseParens();
2519   } else {
2520     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2521                               tok::l_square, tok::period) ||
2522            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2523       if (FormatTok->is(tok::l_square))
2524         parseSquare();
2525       else
2526         nextToken();
2527   }
2528   if (FormatTok->Tok.is(tok::l_brace)) {
2529     if (ShouldBreakBeforeBrace(Style, InitialToken))
2530       addUnwrappedLine();
2531 
2532     unsigned AddLevels =
2533         Style.NamespaceIndentation == FormatStyle::NI_All ||
2534                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2535                  DeclarationScopeStack.size() > 1)
2536             ? 1u
2537             : 0u;
2538     bool ManageWhitesmithsBraces =
2539         AddLevels == 0u &&
2540         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2541 
2542     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2543     // the whole block.
2544     if (ManageWhitesmithsBraces)
2545       ++Line->Level;
2546 
2547     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2548                /*MunchSemi=*/true,
2549                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2550 
2551     // Munch the semicolon after a namespace. This is more common than one would
2552     // think. Putting the semicolon into its own line is very ugly.
2553     if (FormatTok->Tok.is(tok::semi))
2554       nextToken();
2555 
2556     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2557 
2558     if (ManageWhitesmithsBraces)
2559       --Line->Level;
2560   }
2561   // FIXME: Add error handling.
2562 }
2563 
2564 void UnwrappedLineParser::parseNew() {
2565   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2566   nextToken();
2567 
2568   if (Style.isCSharp()) {
2569     do {
2570       if (FormatTok->is(tok::l_brace))
2571         parseBracedList();
2572 
2573       if (FormatTok->isOneOf(tok::semi, tok::comma))
2574         return;
2575 
2576       nextToken();
2577     } while (!eof());
2578   }
2579 
2580   if (Style.Language != FormatStyle::LK_Java)
2581     return;
2582 
2583   // In Java, we can parse everything up to the parens, which aren't optional.
2584   do {
2585     // There should not be a ;, { or } before the new's open paren.
2586     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2587       return;
2588 
2589     // Consume the parens.
2590     if (FormatTok->is(tok::l_paren)) {
2591       parseParens();
2592 
2593       // If there is a class body of an anonymous class, consume that as child.
2594       if (FormatTok->is(tok::l_brace))
2595         parseChildBlock();
2596       return;
2597     }
2598     nextToken();
2599   } while (!eof());
2600 }
2601 
2602 void UnwrappedLineParser::parseForOrWhileLoop() {
2603   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2604          "'for', 'while' or foreach macro expected");
2605   nextToken();
2606   // JS' for await ( ...
2607   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2608     nextToken();
2609   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2610     nextToken();
2611   if (FormatTok->Tok.is(tok::l_paren))
2612     parseParens();
2613 
2614   keepAncestorBraces();
2615 
2616   if (FormatTok->Tok.is(tok::l_brace)) {
2617     FormatToken *LeftBrace = FormatTok;
2618     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2619     parseBlock();
2620     if (Style.RemoveBracesLLVM) {
2621       assert(!NestedTooDeep.empty());
2622       if (!NestedTooDeep.back())
2623         markOptionalBraces(LeftBrace);
2624     }
2625     addUnwrappedLine();
2626   } else {
2627     addUnwrappedLine();
2628     ++Line->Level;
2629     parseStructuralElement();
2630     --Line->Level;
2631   }
2632 
2633   if (Style.RemoveBracesLLVM)
2634     NestedTooDeep.pop_back();
2635 }
2636 
2637 void UnwrappedLineParser::parseDoWhile() {
2638   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2639   nextToken();
2640 
2641   keepAncestorBraces();
2642 
2643   if (FormatTok->Tok.is(tok::l_brace)) {
2644     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2645     parseBlock();
2646     if (Style.BraceWrapping.BeforeWhile)
2647       addUnwrappedLine();
2648   } else {
2649     addUnwrappedLine();
2650     ++Line->Level;
2651     parseStructuralElement();
2652     --Line->Level;
2653   }
2654 
2655   if (Style.RemoveBracesLLVM)
2656     NestedTooDeep.pop_back();
2657 
2658   // FIXME: Add error handling.
2659   if (!FormatTok->Tok.is(tok::kw_while)) {
2660     addUnwrappedLine();
2661     return;
2662   }
2663 
2664   // If in Whitesmiths mode, the line with the while() needs to be indented
2665   // to the same level as the block.
2666   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2667     ++Line->Level;
2668 
2669   nextToken();
2670   parseStructuralElement();
2671 }
2672 
2673 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2674   nextToken();
2675   unsigned OldLineLevel = Line->Level;
2676   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2677     --Line->Level;
2678   if (LeftAlignLabel)
2679     Line->Level = 0;
2680 
2681   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2682       FormatTok->Tok.is(tok::l_brace)) {
2683 
2684     CompoundStatementIndenter Indenter(this, Line->Level,
2685                                        Style.BraceWrapping.AfterCaseLabel,
2686                                        Style.BraceWrapping.IndentBraces);
2687     parseBlock();
2688     if (FormatTok->Tok.is(tok::kw_break)) {
2689       if (Style.BraceWrapping.AfterControlStatement ==
2690           FormatStyle::BWACS_Always) {
2691         addUnwrappedLine();
2692         if (!Style.IndentCaseBlocks &&
2693             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2694           ++Line->Level;
2695       }
2696       parseStructuralElement();
2697     }
2698     addUnwrappedLine();
2699   } else {
2700     if (FormatTok->is(tok::semi))
2701       nextToken();
2702     addUnwrappedLine();
2703   }
2704   Line->Level = OldLineLevel;
2705   if (FormatTok->isNot(tok::l_brace)) {
2706     parseStructuralElement();
2707     addUnwrappedLine();
2708   }
2709 }
2710 
2711 void UnwrappedLineParser::parseCaseLabel() {
2712   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2713 
2714   // FIXME: fix handling of complex expressions here.
2715   do {
2716     nextToken();
2717   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2718   parseLabel();
2719 }
2720 
2721 void UnwrappedLineParser::parseSwitch() {
2722   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2723   nextToken();
2724   if (FormatTok->Tok.is(tok::l_paren))
2725     parseParens();
2726 
2727   keepAncestorBraces();
2728 
2729   if (FormatTok->Tok.is(tok::l_brace)) {
2730     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2731     parseBlock();
2732     addUnwrappedLine();
2733   } else {
2734     addUnwrappedLine();
2735     ++Line->Level;
2736     parseStructuralElement();
2737     --Line->Level;
2738   }
2739 
2740   if (Style.RemoveBracesLLVM)
2741     NestedTooDeep.pop_back();
2742 }
2743 
2744 void UnwrappedLineParser::parseAccessSpecifier() {
2745   FormatToken *AccessSpecifierCandidate = FormatTok;
2746   nextToken();
2747   // Understand Qt's slots.
2748   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2749     nextToken();
2750   // Otherwise, we don't know what it is, and we'd better keep the next token.
2751   if (FormatTok->Tok.is(tok::colon)) {
2752     nextToken();
2753     addUnwrappedLine();
2754   } else if (!FormatTok->Tok.is(tok::coloncolon) &&
2755              !std::binary_search(COperatorsFollowingVar.begin(),
2756                                  COperatorsFollowingVar.end(),
2757                                  FormatTok->Tok.getKind())) {
2758     // Not a variable name nor namespace name.
2759     addUnwrappedLine();
2760   } else if (AccessSpecifierCandidate) {
2761     // Consider the access specifier to be a C identifier.
2762     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2763   }
2764 }
2765 
2766 /// \brief Parses a concept definition.
2767 /// \pre The current token has to be the concept keyword.
2768 ///
2769 /// Returns if either the concept has been completely parsed, or if it detects
2770 /// that the concept definition is incorrect.
2771 void UnwrappedLineParser::parseConcept() {
2772   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2773   nextToken();
2774   if (!FormatTok->Tok.is(tok::identifier))
2775     return;
2776   nextToken();
2777   if (!FormatTok->Tok.is(tok::equal))
2778     return;
2779   nextToken();
2780   parseConstraintExpression();
2781   if (FormatTok->Tok.is(tok::semi))
2782     nextToken();
2783   addUnwrappedLine();
2784 }
2785 
2786 /// \brief Parses a requires clause.
2787 /// \pre The current token needs to be the requires keyword.
2788 /// \sa parseRequiresExpression
2789 ///
2790 /// Returns if it either has finished parsing the clause, or it detects, that
2791 /// the clause is incorrect.
2792 void UnwrappedLineParser::parseRequiresClause() {
2793   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2794   assert(FormatTok->getType() == TT_Unknown);
2795 
2796   // If there is no previous token, we are within a requires expression,
2797   // otherwise we will always have the template or function declaration in front
2798   // of it.
2799   bool InRequiresExpression =
2800       !FormatTok->Previous ||
2801       FormatTok->Previous->is(TT_RequiresExpressionLBrace);
2802 
2803   FormatTok->setType(InRequiresExpression
2804                          ? TT_RequiresClauseInARequiresExpression
2805                          : TT_RequiresClause);
2806 
2807   nextToken();
2808   parseConstraintExpression();
2809 
2810   if (!InRequiresExpression)
2811     FormatTok->Previous->ClosesRequiresClause = true;
2812 }
2813 
2814 /// \brief Parses a requires expression.
2815 /// \pre The current token needs to be the requires keyword.
2816 /// \sa parseRequiresClause
2817 ///
2818 /// Returns if it either has finished parsing the expression, or it detects,
2819 /// that the expression is incorrect.
2820 void UnwrappedLineParser::parseRequiresExpression() {
2821   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2822   assert(FormatTok->getType() == TT_Unknown);
2823 
2824   FormatTok->setType(TT_RequiresExpression);
2825   nextToken();
2826 
2827   if (FormatTok->is(tok::l_paren)) {
2828     FormatTok->setType(TT_RequiresExpressionLParen);
2829     parseParens();
2830   }
2831 
2832   if (FormatTok->is(tok::l_brace)) {
2833     FormatTok->setType(TT_RequiresExpressionLBrace);
2834     parseChildBlock(/*CanContainBracedList=*/false,
2835                     /*NextLBracesType=*/TT_CompoundRequirementLBrace);
2836   }
2837 }
2838 
2839 /// \brief Parses a constraint expression.
2840 ///
2841 /// This is either the definition of a concept, or the body of a requires
2842 /// clause. It returns, when the parsing is complete, or the expression is
2843 /// incorrect.
2844 void UnwrappedLineParser::parseConstraintExpression() {
2845   do {
2846     switch (FormatTok->Tok.getKind()) {
2847     case tok::kw_requires:
2848       parseRequiresExpression();
2849       break;
2850 
2851     case tok::l_paren:
2852       parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
2853       break;
2854 
2855     case tok::l_square:
2856       if (!tryToParseLambda())
2857         return;
2858       break;
2859 
2860     case tok::identifier:
2861       // We need to differentiate identifiers for a template deduction guide,
2862       // variables, or function return types (the constraint expression has
2863       // ended before that), and basically all other cases. But it's easier to
2864       // check the other way around.
2865       assert(FormatTok->Previous);
2866       switch (FormatTok->Previous->Tok.getKind()) {
2867       case tok::coloncolon:  // Nested identifier.
2868       case tok::ampamp:      // Start of a function or variable for the
2869       case tok::pipepipe:    // constraint expression.
2870       case tok::kw_requires: // Initial identifier of a requires clause.
2871       case tok::equal:       // Initial identifier of a concept declaration.
2872         break;
2873       default:
2874         return;
2875       }
2876 
2877       // Read identifier with optional template declaration.
2878       nextToken();
2879       if (FormatTok->Tok.is(tok::less))
2880         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2881                         /*ClosingBraceKind=*/tok::greater);
2882       break;
2883 
2884     case tok::kw_const:
2885     case tok::semi:
2886     case tok::kw_class:
2887     case tok::kw_struct:
2888     case tok::kw_union:
2889       return;
2890 
2891     case tok::l_brace:
2892       // Potential function body.
2893       return;
2894 
2895     case tok::ampamp:
2896     case tok::pipepipe:
2897       FormatTok->setType(TT_BinaryOperator);
2898       nextToken();
2899       break;
2900 
2901     case tok::kw_true:
2902     case tok::kw_false:
2903     case tok::kw_sizeof:
2904     case tok::greater:
2905     case tok::greaterequal:
2906     case tok::greatergreater:
2907     case tok::less:
2908     case tok::lessequal:
2909     case tok::lessless:
2910     case tok::equalequal:
2911     case tok::exclaim:
2912     case tok::exclaimequal:
2913     case tok::plus:
2914     case tok::minus:
2915     case tok::star:
2916     case tok::slash:
2917     case tok::numeric_constant:
2918     case tok::kw_decltype:
2919     case tok::comment:
2920     case tok::comma:
2921     case tok::coloncolon:
2922       // Just eat them.
2923       nextToken();
2924       break;
2925 
2926     case tok::kw_static_cast:
2927     case tok::kw_const_cast:
2928     case tok::kw_reinterpret_cast:
2929     case tok::kw_dynamic_cast:
2930       nextToken();
2931       if (!FormatTok->is(tok::less))
2932         return;
2933 
2934       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2935                       /*ClosingBraceKind=*/tok::greater);
2936       break;
2937 
2938     case tok::kw_bool:
2939       // bool is only allowed if it is directly followed by a paren for a cast:
2940       // concept C = bool(...);
2941       // and bool is the only type, all other types as cast must be inside a
2942       // cast to bool an thus are handled by the other cases.
2943       nextToken();
2944       if (FormatTok->isNot(tok::l_paren))
2945         return;
2946       parseParens();
2947       break;
2948 
2949     default:
2950       return;
2951     }
2952   } while (!eof());
2953 }
2954 
2955 bool UnwrappedLineParser::parseEnum() {
2956   const FormatToken &InitialToken = *FormatTok;
2957 
2958   // Won't be 'enum' for NS_ENUMs.
2959   if (FormatTok->Tok.is(tok::kw_enum))
2960     nextToken();
2961 
2962   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2963   // declarations. An "enum" keyword followed by a colon would be a syntax
2964   // error and thus assume it is just an identifier.
2965   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
2966     return false;
2967 
2968   // In protobuf, "enum" can be used as a field name.
2969   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2970     return false;
2971 
2972   // Eat up enum class ...
2973   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2974     nextToken();
2975 
2976   while (FormatTok->Tok.getIdentifierInfo() ||
2977          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2978                             tok::greater, tok::comma, tok::question)) {
2979     nextToken();
2980     // We can have macros or attributes in between 'enum' and the enum name.
2981     if (FormatTok->is(tok::l_paren))
2982       parseParens();
2983     if (FormatTok->is(tok::identifier)) {
2984       nextToken();
2985       // If there are two identifiers in a row, this is likely an elaborate
2986       // return type. In Java, this can be "implements", etc.
2987       if (Style.isCpp() && FormatTok->is(tok::identifier))
2988         return false;
2989     }
2990   }
2991 
2992   // Just a declaration or something is wrong.
2993   if (FormatTok->isNot(tok::l_brace))
2994     return true;
2995   FormatTok->setType(TT_RecordLBrace);
2996   FormatTok->setBlockKind(BK_Block);
2997 
2998   if (Style.Language == FormatStyle::LK_Java) {
2999     // Java enums are different.
3000     parseJavaEnumBody();
3001     return true;
3002   }
3003   if (Style.Language == FormatStyle::LK_Proto) {
3004     parseBlock(/*MustBeDeclaration=*/true);
3005     return true;
3006   }
3007 
3008   if (!Style.AllowShortEnumsOnASingleLine &&
3009       ShouldBreakBeforeBrace(Style, InitialToken))
3010     addUnwrappedLine();
3011   // Parse enum body.
3012   nextToken();
3013   if (!Style.AllowShortEnumsOnASingleLine) {
3014     addUnwrappedLine();
3015     Line->Level += 1;
3016   }
3017   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3018                                    /*IsEnum=*/true);
3019   if (!Style.AllowShortEnumsOnASingleLine)
3020     Line->Level -= 1;
3021   if (HasError) {
3022     if (FormatTok->is(tok::semi))
3023       nextToken();
3024     addUnwrappedLine();
3025   }
3026   return true;
3027 
3028   // There is no addUnwrappedLine() here so that we fall through to parsing a
3029   // structural element afterwards. Thus, in "enum A {} n, m;",
3030   // "} n, m;" will end up in one unwrapped line.
3031 }
3032 
3033 bool UnwrappedLineParser::parseStructLike() {
3034   // parseRecord falls through and does not yet add an unwrapped line as a
3035   // record declaration or definition can start a structural element.
3036   parseRecord();
3037   // This does not apply to Java, JavaScript and C#.
3038   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3039       Style.isCSharp()) {
3040     if (FormatTok->is(tok::semi))
3041       nextToken();
3042     addUnwrappedLine();
3043     return true;
3044   }
3045   return false;
3046 }
3047 
3048 namespace {
3049 // A class used to set and restore the Token position when peeking
3050 // ahead in the token source.
3051 class ScopedTokenPosition {
3052   unsigned StoredPosition;
3053   FormatTokenSource *Tokens;
3054 
3055 public:
3056   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3057     assert(Tokens && "Tokens expected to not be null");
3058     StoredPosition = Tokens->getPosition();
3059   }
3060 
3061   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3062 };
3063 } // namespace
3064 
3065 // Look to see if we have [[ by looking ahead, if
3066 // its not then rewind to the original position.
3067 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3068   ScopedTokenPosition AutoPosition(Tokens);
3069   FormatToken *Tok = Tokens->getNextToken();
3070   // We already read the first [ check for the second.
3071   if (!Tok->is(tok::l_square))
3072     return false;
3073   // Double check that the attribute is just something
3074   // fairly simple.
3075   while (Tok->isNot(tok::eof)) {
3076     if (Tok->is(tok::r_square))
3077       break;
3078     Tok = Tokens->getNextToken();
3079   }
3080   if (Tok->is(tok::eof))
3081     return false;
3082   Tok = Tokens->getNextToken();
3083   if (!Tok->is(tok::r_square))
3084     return false;
3085   Tok = Tokens->getNextToken();
3086   if (Tok->is(tok::semi))
3087     return false;
3088   return true;
3089 }
3090 
3091 void UnwrappedLineParser::parseJavaEnumBody() {
3092   // Determine whether the enum is simple, i.e. does not have a semicolon or
3093   // constants with class bodies. Simple enums can be formatted like braced
3094   // lists, contracted to a single line, etc.
3095   unsigned StoredPosition = Tokens->getPosition();
3096   bool IsSimple = true;
3097   FormatToken *Tok = Tokens->getNextToken();
3098   while (!Tok->is(tok::eof)) {
3099     if (Tok->is(tok::r_brace))
3100       break;
3101     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3102       IsSimple = false;
3103       break;
3104     }
3105     // FIXME: This will also mark enums with braces in the arguments to enum
3106     // constants as "not simple". This is probably fine in practice, though.
3107     Tok = Tokens->getNextToken();
3108   }
3109   FormatTok = Tokens->setPosition(StoredPosition);
3110 
3111   if (IsSimple) {
3112     nextToken();
3113     parseBracedList();
3114     addUnwrappedLine();
3115     return;
3116   }
3117 
3118   // Parse the body of a more complex enum.
3119   // First add a line for everything up to the "{".
3120   nextToken();
3121   addUnwrappedLine();
3122   ++Line->Level;
3123 
3124   // Parse the enum constants.
3125   while (FormatTok) {
3126     if (FormatTok->is(tok::l_brace)) {
3127       // Parse the constant's class body.
3128       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3129                  /*MunchSemi=*/false);
3130     } else if (FormatTok->is(tok::l_paren)) {
3131       parseParens();
3132     } else if (FormatTok->is(tok::comma)) {
3133       nextToken();
3134       addUnwrappedLine();
3135     } else if (FormatTok->is(tok::semi)) {
3136       nextToken();
3137       addUnwrappedLine();
3138       break;
3139     } else if (FormatTok->is(tok::r_brace)) {
3140       addUnwrappedLine();
3141       break;
3142     } else {
3143       nextToken();
3144     }
3145   }
3146 
3147   // Parse the class body after the enum's ";" if any.
3148   parseLevel(/*HasOpeningBrace=*/true, /*CanContainBracedList=*/true);
3149   nextToken();
3150   --Line->Level;
3151   addUnwrappedLine();
3152 }
3153 
3154 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3155   const FormatToken &InitialToken = *FormatTok;
3156   nextToken();
3157 
3158   // The actual identifier can be a nested name specifier, and in macros
3159   // it is often token-pasted.
3160   // An [[attribute]] can be before the identifier.
3161   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3162                             tok::kw___attribute, tok::kw___declspec,
3163                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3164          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3165           FormatTok->isOneOf(tok::period, tok::comma))) {
3166     if (Style.isJavaScript() &&
3167         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3168       // JavaScript/TypeScript supports inline object types in
3169       // extends/implements positions:
3170       //     class Foo implements {bar: number} { }
3171       nextToken();
3172       if (FormatTok->is(tok::l_brace)) {
3173         tryToParseBracedList();
3174         continue;
3175       }
3176     }
3177     bool IsNonMacroIdentifier =
3178         FormatTok->is(tok::identifier) &&
3179         FormatTok->TokenText != FormatTok->TokenText.upper();
3180     nextToken();
3181     // We can have macros or attributes in between 'class' and the class name.
3182     if (!IsNonMacroIdentifier) {
3183       if (FormatTok->Tok.is(tok::l_paren)) {
3184         parseParens();
3185       } else if (FormatTok->is(TT_AttributeSquare)) {
3186         parseSquare();
3187         // Consume the closing TT_AttributeSquare.
3188         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3189           nextToken();
3190       }
3191     }
3192   }
3193 
3194   // Note that parsing away template declarations here leads to incorrectly
3195   // accepting function declarations as record declarations.
3196   // In general, we cannot solve this problem. Consider:
3197   // class A<int> B() {}
3198   // which can be a function definition or a class definition when B() is a
3199   // macro. If we find enough real-world cases where this is a problem, we
3200   // can parse for the 'template' keyword in the beginning of the statement,
3201   // and thus rule out the record production in case there is no template
3202   // (this would still leave us with an ambiguity between template function
3203   // and class declarations).
3204   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3205     while (!eof()) {
3206       if (FormatTok->is(tok::l_brace)) {
3207         calculateBraceTypes(/*ExpectClassBody=*/true);
3208         if (!tryToParseBracedList())
3209           break;
3210       }
3211       if (FormatTok->is(tok::l_square)) {
3212         FormatToken *Previous = FormatTok->Previous;
3213         if (!Previous ||
3214             !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3215           // Don't try parsing a lambda if we had a closing parenthesis before,
3216           // it was probably a pointer to an array: int (*)[].
3217           if (!tryToParseLambda())
3218             break;
3219         }
3220       }
3221       if (FormatTok->Tok.is(tok::semi))
3222         return;
3223       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3224         addUnwrappedLine();
3225         nextToken();
3226         parseCSharpGenericTypeConstraint();
3227         break;
3228       }
3229       nextToken();
3230     }
3231   }
3232   if (FormatTok->Tok.is(tok::l_brace)) {
3233     FormatTok->setType(TT_RecordLBrace);
3234     if (ParseAsExpr) {
3235       parseChildBlock();
3236     } else {
3237       if (ShouldBreakBeforeBrace(Style, InitialToken))
3238         addUnwrappedLine();
3239 
3240       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3241       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3242     }
3243   }
3244   // There is no addUnwrappedLine() here so that we fall through to parsing a
3245   // structural element afterwards. Thus, in "class A {} n, m;",
3246   // "} n, m;" will end up in one unwrapped line.
3247 }
3248 
3249 void UnwrappedLineParser::parseObjCMethod() {
3250   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
3251          "'(' or identifier expected.");
3252   do {
3253     if (FormatTok->Tok.is(tok::semi)) {
3254       nextToken();
3255       addUnwrappedLine();
3256       return;
3257     } else if (FormatTok->Tok.is(tok::l_brace)) {
3258       if (Style.BraceWrapping.AfterFunction)
3259         addUnwrappedLine();
3260       parseBlock();
3261       addUnwrappedLine();
3262       return;
3263     } else {
3264       nextToken();
3265     }
3266   } while (!eof());
3267 }
3268 
3269 void UnwrappedLineParser::parseObjCProtocolList() {
3270   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
3271   do {
3272     nextToken();
3273     // Early exit in case someone forgot a close angle.
3274     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3275         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3276       return;
3277   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
3278   nextToken(); // Skip '>'.
3279 }
3280 
3281 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3282   do {
3283     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
3284       nextToken();
3285       addUnwrappedLine();
3286       break;
3287     }
3288     if (FormatTok->is(tok::l_brace)) {
3289       parseBlock();
3290       // In ObjC interfaces, nothing should be following the "}".
3291       addUnwrappedLine();
3292     } else if (FormatTok->is(tok::r_brace)) {
3293       // Ignore stray "}". parseStructuralElement doesn't consume them.
3294       nextToken();
3295       addUnwrappedLine();
3296     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3297       nextToken();
3298       parseObjCMethod();
3299     } else {
3300       parseStructuralElement();
3301     }
3302   } while (!eof());
3303 }
3304 
3305 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3306   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3307          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3308   nextToken();
3309   nextToken(); // interface name
3310 
3311   // @interface can be followed by a lightweight generic
3312   // specialization list, then either a base class or a category.
3313   if (FormatTok->Tok.is(tok::less))
3314     parseObjCLightweightGenerics();
3315   if (FormatTok->Tok.is(tok::colon)) {
3316     nextToken();
3317     nextToken(); // base class name
3318     // The base class can also have lightweight generics applied to it.
3319     if (FormatTok->Tok.is(tok::less))
3320       parseObjCLightweightGenerics();
3321   } else if (FormatTok->Tok.is(tok::l_paren))
3322     // Skip category, if present.
3323     parseParens();
3324 
3325   if (FormatTok->Tok.is(tok::less))
3326     parseObjCProtocolList();
3327 
3328   if (FormatTok->Tok.is(tok::l_brace)) {
3329     if (Style.BraceWrapping.AfterObjCDeclaration)
3330       addUnwrappedLine();
3331     parseBlock(/*MustBeDeclaration=*/true);
3332   }
3333 
3334   // With instance variables, this puts '}' on its own line.  Without instance
3335   // variables, this ends the @interface line.
3336   addUnwrappedLine();
3337 
3338   parseObjCUntilAtEnd();
3339 }
3340 
3341 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3342   assert(FormatTok->Tok.is(tok::less));
3343   // Unlike protocol lists, generic parameterizations support
3344   // nested angles:
3345   //
3346   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3347   //     NSObject <NSCopying, NSSecureCoding>
3348   //
3349   // so we need to count how many open angles we have left.
3350   unsigned NumOpenAngles = 1;
3351   do {
3352     nextToken();
3353     // Early exit in case someone forgot a close angle.
3354     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3355         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3356       break;
3357     if (FormatTok->Tok.is(tok::less))
3358       ++NumOpenAngles;
3359     else if (FormatTok->Tok.is(tok::greater)) {
3360       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3361       --NumOpenAngles;
3362     }
3363   } while (!eof() && NumOpenAngles != 0);
3364   nextToken(); // Skip '>'.
3365 }
3366 
3367 // Returns true for the declaration/definition form of @protocol,
3368 // false for the expression form.
3369 bool UnwrappedLineParser::parseObjCProtocol() {
3370   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3371   nextToken();
3372 
3373   if (FormatTok->is(tok::l_paren))
3374     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3375     return false;
3376 
3377   // The definition/declaration form,
3378   // @protocol Foo
3379   // - (int)someMethod;
3380   // @end
3381 
3382   nextToken(); // protocol name
3383 
3384   if (FormatTok->Tok.is(tok::less))
3385     parseObjCProtocolList();
3386 
3387   // Check for protocol declaration.
3388   if (FormatTok->Tok.is(tok::semi)) {
3389     nextToken();
3390     addUnwrappedLine();
3391     return true;
3392   }
3393 
3394   addUnwrappedLine();
3395   parseObjCUntilAtEnd();
3396   return true;
3397 }
3398 
3399 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3400   bool IsImport = FormatTok->is(Keywords.kw_import);
3401   assert(IsImport || FormatTok->is(tok::kw_export));
3402   nextToken();
3403 
3404   // Consume the "default" in "export default class/function".
3405   if (FormatTok->is(tok::kw_default))
3406     nextToken();
3407 
3408   // Consume "async function", "function" and "default function", so that these
3409   // get parsed as free-standing JS functions, i.e. do not require a trailing
3410   // semicolon.
3411   if (FormatTok->is(Keywords.kw_async))
3412     nextToken();
3413   if (FormatTok->is(Keywords.kw_function)) {
3414     nextToken();
3415     return;
3416   }
3417 
3418   // For imports, `export *`, `export {...}`, consume the rest of the line up
3419   // to the terminating `;`. For everything else, just return and continue
3420   // parsing the structural element, i.e. the declaration or expression for
3421   // `export default`.
3422   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3423       !FormatTok->isStringLiteral())
3424     return;
3425 
3426   while (!eof()) {
3427     if (FormatTok->is(tok::semi))
3428       return;
3429     if (Line->Tokens.empty()) {
3430       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3431       // import statement should terminate.
3432       return;
3433     }
3434     if (FormatTok->is(tok::l_brace)) {
3435       FormatTok->setBlockKind(BK_Block);
3436       nextToken();
3437       parseBracedList();
3438     } else {
3439       nextToken();
3440     }
3441   }
3442 }
3443 
3444 void UnwrappedLineParser::parseStatementMacro() {
3445   nextToken();
3446   if (FormatTok->is(tok::l_paren))
3447     parseParens();
3448   if (FormatTok->is(tok::semi))
3449     nextToken();
3450   addUnwrappedLine();
3451 }
3452 
3453 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3454                                                  StringRef Prefix = "") {
3455   llvm::dbgs() << Prefix << "Line(" << Line.Level
3456                << ", FSC=" << Line.FirstStartColumn << ")"
3457                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3458   for (const auto &Node : Line.Tokens) {
3459     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3460                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3461                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3462   }
3463   for (const auto &Node : Line.Tokens)
3464     for (const auto &ChildNode : Node.Children)
3465       printDebugInfo(ChildNode, "\nChild: ");
3466 
3467   llvm::dbgs() << "\n";
3468 }
3469 
3470 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3471   if (Line->Tokens.empty())
3472     return;
3473   LLVM_DEBUG({
3474     if (CurrentLines == &Lines)
3475       printDebugInfo(*Line);
3476   });
3477 
3478   // If this line closes a block when in Whitesmiths mode, remember that
3479   // information so that the level can be decreased after the line is added.
3480   // This has to happen after the addition of the line since the line itself
3481   // needs to be indented.
3482   bool ClosesWhitesmithsBlock =
3483       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3484       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3485 
3486   CurrentLines->push_back(std::move(*Line));
3487   Line->Tokens.clear();
3488   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3489   Line->FirstStartColumn = 0;
3490 
3491   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3492     --Line->Level;
3493   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3494     CurrentLines->append(
3495         std::make_move_iterator(PreprocessorDirectives.begin()),
3496         std::make_move_iterator(PreprocessorDirectives.end()));
3497     PreprocessorDirectives.clear();
3498   }
3499   // Disconnect the current token from the last token on the previous line.
3500   FormatTok->Previous = nullptr;
3501 }
3502 
3503 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3504 
3505 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3506   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3507          FormatTok.NewlinesBefore > 0;
3508 }
3509 
3510 // Checks if \p FormatTok is a line comment that continues the line comment
3511 // section on \p Line.
3512 static bool
3513 continuesLineCommentSection(const FormatToken &FormatTok,
3514                             const UnwrappedLine &Line,
3515                             const llvm::Regex &CommentPragmasRegex) {
3516   if (Line.Tokens.empty())
3517     return false;
3518 
3519   StringRef IndentContent = FormatTok.TokenText;
3520   if (FormatTok.TokenText.startswith("//") ||
3521       FormatTok.TokenText.startswith("/*"))
3522     IndentContent = FormatTok.TokenText.substr(2);
3523   if (CommentPragmasRegex.match(IndentContent))
3524     return false;
3525 
3526   // If Line starts with a line comment, then FormatTok continues the comment
3527   // section if its original column is greater or equal to the original start
3528   // column of the line.
3529   //
3530   // Define the min column token of a line as follows: if a line ends in '{' or
3531   // contains a '{' followed by a line comment, then the min column token is
3532   // that '{'. Otherwise, the min column token of the line is the first token of
3533   // the line.
3534   //
3535   // If Line starts with a token other than a line comment, then FormatTok
3536   // continues the comment section if its original column is greater than the
3537   // original start column of the min column token of the line.
3538   //
3539   // For example, the second line comment continues the first in these cases:
3540   //
3541   // // first line
3542   // // second line
3543   //
3544   // and:
3545   //
3546   // // first line
3547   //  // second line
3548   //
3549   // and:
3550   //
3551   // int i; // first line
3552   //  // second line
3553   //
3554   // and:
3555   //
3556   // do { // first line
3557   //      // second line
3558   //   int i;
3559   // } while (true);
3560   //
3561   // and:
3562   //
3563   // enum {
3564   //   a, // first line
3565   //    // second line
3566   //   b
3567   // };
3568   //
3569   // The second line comment doesn't continue the first in these cases:
3570   //
3571   //   // first line
3572   //  // second line
3573   //
3574   // and:
3575   //
3576   // int i; // first line
3577   // // second line
3578   //
3579   // and:
3580   //
3581   // do { // first line
3582   //   // second line
3583   //   int i;
3584   // } while (true);
3585   //
3586   // and:
3587   //
3588   // enum {
3589   //   a, // first line
3590   //   // second line
3591   // };
3592   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3593 
3594   // Scan for '{//'. If found, use the column of '{' as a min column for line
3595   // comment section continuation.
3596   const FormatToken *PreviousToken = nullptr;
3597   for (const UnwrappedLineNode &Node : Line.Tokens) {
3598     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3599         isLineComment(*Node.Tok)) {
3600       MinColumnToken = PreviousToken;
3601       break;
3602     }
3603     PreviousToken = Node.Tok;
3604 
3605     // Grab the last newline preceding a token in this unwrapped line.
3606     if (Node.Tok->NewlinesBefore > 0)
3607       MinColumnToken = Node.Tok;
3608   }
3609   if (PreviousToken && PreviousToken->is(tok::l_brace))
3610     MinColumnToken = PreviousToken;
3611 
3612   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3613                               MinColumnToken);
3614 }
3615 
3616 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3617   bool JustComments = Line->Tokens.empty();
3618   for (FormatToken *Tok : CommentsBeforeNextToken) {
3619     // Line comments that belong to the same line comment section are put on the
3620     // same line since later we might want to reflow content between them.
3621     // Additional fine-grained breaking of line comment sections is controlled
3622     // by the class BreakableLineCommentSection in case it is desirable to keep
3623     // several line comment sections in the same unwrapped line.
3624     //
3625     // FIXME: Consider putting separate line comment sections as children to the
3626     // unwrapped line instead.
3627     Tok->ContinuesLineCommentSection =
3628         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3629     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3630       addUnwrappedLine();
3631     pushToken(Tok);
3632   }
3633   if (NewlineBeforeNext && JustComments)
3634     addUnwrappedLine();
3635   CommentsBeforeNextToken.clear();
3636 }
3637 
3638 void UnwrappedLineParser::nextToken(int LevelDifference) {
3639   if (eof())
3640     return;
3641   flushComments(isOnNewLine(*FormatTok));
3642   pushToken(FormatTok);
3643   FormatToken *Previous = FormatTok;
3644   if (!Style.isJavaScript())
3645     readToken(LevelDifference);
3646   else
3647     readTokenWithJavaScriptASI();
3648   FormatTok->Previous = Previous;
3649 }
3650 
3651 void UnwrappedLineParser::distributeComments(
3652     const SmallVectorImpl<FormatToken *> &Comments,
3653     const FormatToken *NextTok) {
3654   // Whether or not a line comment token continues a line is controlled by
3655   // the method continuesLineCommentSection, with the following caveat:
3656   //
3657   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3658   // that each comment line from the trail is aligned with the next token, if
3659   // the next token exists. If a trail exists, the beginning of the maximal
3660   // trail is marked as a start of a new comment section.
3661   //
3662   // For example in this code:
3663   //
3664   // int a; // line about a
3665   //   // line 1 about b
3666   //   // line 2 about b
3667   //   int b;
3668   //
3669   // the two lines about b form a maximal trail, so there are two sections, the
3670   // first one consisting of the single comment "// line about a" and the
3671   // second one consisting of the next two comments.
3672   if (Comments.empty())
3673     return;
3674   bool ShouldPushCommentsInCurrentLine = true;
3675   bool HasTrailAlignedWithNextToken = false;
3676   unsigned StartOfTrailAlignedWithNextToken = 0;
3677   if (NextTok) {
3678     // We are skipping the first element intentionally.
3679     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3680       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3681         HasTrailAlignedWithNextToken = true;
3682         StartOfTrailAlignedWithNextToken = i;
3683       }
3684     }
3685   }
3686   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3687     FormatToken *FormatTok = Comments[i];
3688     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3689       FormatTok->ContinuesLineCommentSection = false;
3690     } else {
3691       FormatTok->ContinuesLineCommentSection =
3692           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3693     }
3694     if (!FormatTok->ContinuesLineCommentSection &&
3695         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
3696       ShouldPushCommentsInCurrentLine = false;
3697     if (ShouldPushCommentsInCurrentLine)
3698       pushToken(FormatTok);
3699     else
3700       CommentsBeforeNextToken.push_back(FormatTok);
3701   }
3702 }
3703 
3704 void UnwrappedLineParser::readToken(int LevelDifference) {
3705   SmallVector<FormatToken *, 1> Comments;
3706   bool PreviousWasComment = false;
3707   bool FirstNonCommentOnLine = false;
3708   do {
3709     FormatTok = Tokens->getNextToken();
3710     assert(FormatTok);
3711     while (FormatTok->getType() == TT_ConflictStart ||
3712            FormatTok->getType() == TT_ConflictEnd ||
3713            FormatTok->getType() == TT_ConflictAlternative) {
3714       if (FormatTok->getType() == TT_ConflictStart)
3715         conditionalCompilationStart(/*Unreachable=*/false);
3716       else if (FormatTok->getType() == TT_ConflictAlternative)
3717         conditionalCompilationAlternative();
3718       else if (FormatTok->getType() == TT_ConflictEnd)
3719         conditionalCompilationEnd();
3720       FormatTok = Tokens->getNextToken();
3721       FormatTok->MustBreakBefore = true;
3722     }
3723 
3724     auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
3725                                       const FormatToken &Tok,
3726                                       bool PreviousWasComment) {
3727       auto IsFirstOnLine = [](const FormatToken &Tok) {
3728         return Tok.HasUnescapedNewline || Tok.IsFirst;
3729       };
3730 
3731       // Consider preprocessor directives preceded by block comments as first
3732       // on line.
3733       if (PreviousWasComment)
3734         return FirstNonCommentOnLine || IsFirstOnLine(Tok);
3735       return IsFirstOnLine(Tok);
3736     };
3737 
3738     FirstNonCommentOnLine = IsFirstNonCommentOnLine(
3739         FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
3740     PreviousWasComment = FormatTok->Tok.is(tok::comment);
3741 
3742     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3743            FirstNonCommentOnLine) {
3744       distributeComments(Comments, FormatTok);
3745       Comments.clear();
3746       // If there is an unfinished unwrapped line, we flush the preprocessor
3747       // directives only after that unwrapped line was finished later.
3748       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3749       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3750       assert((LevelDifference >= 0 ||
3751               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3752              "LevelDifference makes Line->Level negative");
3753       Line->Level += LevelDifference;
3754       // Comments stored before the preprocessor directive need to be output
3755       // before the preprocessor directive, at the same level as the
3756       // preprocessor directive, as we consider them to apply to the directive.
3757       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3758           PPBranchLevel > 0)
3759         Line->Level += PPBranchLevel;
3760       flushComments(isOnNewLine(*FormatTok));
3761       parsePPDirective();
3762       PreviousWasComment = FormatTok->Tok.is(tok::comment);
3763       FirstNonCommentOnLine = IsFirstNonCommentOnLine(
3764           FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
3765     }
3766 
3767     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3768         !Line->InPPDirective)
3769       continue;
3770 
3771     if (!FormatTok->Tok.is(tok::comment)) {
3772       distributeComments(Comments, FormatTok);
3773       Comments.clear();
3774       return;
3775     }
3776 
3777     Comments.push_back(FormatTok);
3778   } while (!eof());
3779 
3780   distributeComments(Comments, nullptr);
3781   Comments.clear();
3782 }
3783 
3784 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3785   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3786   if (MustBreakBeforeNextToken) {
3787     Line->Tokens.back().Tok->MustBreakBefore = true;
3788     MustBreakBeforeNextToken = false;
3789   }
3790 }
3791 
3792 } // end namespace format
3793 } // end namespace clang
3794