1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 
24 #define DEBUG_TYPE "format-parser"
25 
26 namespace clang {
27 namespace format {
28 
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   virtual FormatToken *getPreviousToken() = 0;
39 
40   // Returns the token that would be returned by the next call to
41   // getNextToken().
42   virtual FormatToken *peekNextToken() = 0;
43 
44   // Returns whether we are at the end of the file.
45   // This can be different from whether getNextToken() returned an eof token
46   // when the FormatTokenSource is a view on a part of the token stream.
47   virtual bool isEOF() = 0;
48 
49   // Gets the current position in the token stream, to be used by setPosition().
50   virtual unsigned getPosition() = 0;
51 
52   // Resets the token stream to the state it was in when getPosition() returned
53   // Position, and return the token at that position in the stream.
54   virtual FormatToken *setPosition(unsigned Position) = 0;
55 };
56 
57 namespace {
58 
59 class ScopedDeclarationState {
60 public:
61   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
62                          bool MustBeDeclaration)
63       : Line(Line), Stack(Stack) {
64     Line.MustBeDeclaration = MustBeDeclaration;
65     Stack.push_back(MustBeDeclaration);
66   }
67   ~ScopedDeclarationState() {
68     Stack.pop_back();
69     if (!Stack.empty())
70       Line.MustBeDeclaration = Stack.back();
71     else
72       Line.MustBeDeclaration = true;
73   }
74 
75 private:
76   UnwrappedLine &Line;
77   llvm::BitVector &Stack;
78 };
79 
80 static bool isLineComment(const FormatToken &FormatTok) {
81   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
82 }
83 
84 // Checks if \p FormatTok is a line comment that continues the line comment
85 // \p Previous. The original column of \p MinColumnToken is used to determine
86 // whether \p FormatTok is indented enough to the right to continue \p Previous.
87 static bool continuesLineComment(const FormatToken &FormatTok,
88                                  const FormatToken *Previous,
89                                  const FormatToken *MinColumnToken) {
90   if (!Previous || !MinColumnToken)
91     return false;
92   unsigned MinContinueColumn =
93       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
94   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
95          isLineComment(*Previous) &&
96          FormatTok.OriginalColumn >= MinContinueColumn;
97 }
98 
99 class ScopedMacroState : public FormatTokenSource {
100 public:
101   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
102                    FormatToken *&ResetToken)
103       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
104         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
105         Token(nullptr), PreviousToken(nullptr) {
106     FakeEOF.Tok.startToken();
107     FakeEOF.Tok.setKind(tok::eof);
108     TokenSource = this;
109     Line.Level = 0;
110     Line.InPPDirective = true;
111   }
112 
113   ~ScopedMacroState() override {
114     TokenSource = PreviousTokenSource;
115     ResetToken = Token;
116     Line.InPPDirective = false;
117     Line.Level = PreviousLineLevel;
118   }
119 
120   FormatToken *getNextToken() override {
121     // The \c UnwrappedLineParser guards against this by never calling
122     // \c getNextToken() after it has encountered the first eof token.
123     assert(!eof());
124     PreviousToken = Token;
125     Token = PreviousTokenSource->getNextToken();
126     if (eof())
127       return &FakeEOF;
128     return Token;
129   }
130 
131   FormatToken *getPreviousToken() override {
132     return PreviousTokenSource->getPreviousToken();
133   }
134 
135   FormatToken *peekNextToken() override {
136     if (eof())
137       return &FakeEOF;
138     return PreviousTokenSource->peekNextToken();
139   }
140 
141   bool isEOF() override { return PreviousTokenSource->isEOF(); }
142 
143   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
144 
145   FormatToken *setPosition(unsigned Position) override {
146     PreviousToken = nullptr;
147     Token = PreviousTokenSource->setPosition(Position);
148     return Token;
149   }
150 
151 private:
152   bool eof() {
153     return Token && Token->HasUnescapedNewline &&
154            !continuesLineComment(*Token, PreviousToken,
155                                  /*MinColumnToken=*/PreviousToken);
156   }
157 
158   FormatToken FakeEOF;
159   UnwrappedLine &Line;
160   FormatTokenSource *&TokenSource;
161   FormatToken *&ResetToken;
162   unsigned PreviousLineLevel;
163   FormatTokenSource *PreviousTokenSource;
164 
165   FormatToken *Token;
166   FormatToken *PreviousToken;
167 };
168 
169 } // end anonymous namespace
170 
171 class ScopedLineState {
172 public:
173   ScopedLineState(UnwrappedLineParser &Parser,
174                   bool SwitchToPreprocessorLines = false)
175       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
176     if (SwitchToPreprocessorLines)
177       Parser.CurrentLines = &Parser.PreprocessorDirectives;
178     else if (!Parser.Line->Tokens.empty())
179       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
180     PreBlockLine = std::move(Parser.Line);
181     Parser.Line = std::make_unique<UnwrappedLine>();
182     Parser.Line->Level = PreBlockLine->Level;
183     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
184   }
185 
186   ~ScopedLineState() {
187     if (!Parser.Line->Tokens.empty())
188       Parser.addUnwrappedLine();
189     assert(Parser.Line->Tokens.empty());
190     Parser.Line = std::move(PreBlockLine);
191     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
192       Parser.MustBreakBeforeNextToken = true;
193     Parser.CurrentLines = OriginalLines;
194   }
195 
196 private:
197   UnwrappedLineParser &Parser;
198 
199   std::unique_ptr<UnwrappedLine> PreBlockLine;
200   SmallVectorImpl<UnwrappedLine> *OriginalLines;
201 };
202 
203 class CompoundStatementIndenter {
204 public:
205   CompoundStatementIndenter(UnwrappedLineParser *Parser,
206                             const FormatStyle &Style, unsigned &LineLevel)
207       : CompoundStatementIndenter(Parser, LineLevel,
208                                   Style.BraceWrapping.AfterControlStatement,
209                                   Style.BraceWrapping.IndentBraces) {}
210   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
211                             bool WrapBrace, bool IndentBrace)
212       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
213     if (WrapBrace)
214       Parser->addUnwrappedLine();
215     if (IndentBrace)
216       ++LineLevel;
217   }
218   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
219 
220 private:
221   unsigned &LineLevel;
222   unsigned OldLineLevel;
223 };
224 
225 namespace {
226 
227 class IndexedTokenSource : public FormatTokenSource {
228 public:
229   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
230       : Tokens(Tokens), Position(-1) {}
231 
232   FormatToken *getNextToken() override {
233     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
234       LLVM_DEBUG({
235         llvm::dbgs() << "Next ";
236         dbgToken(Position);
237       });
238       return Tokens[Position];
239     }
240     ++Position;
241     LLVM_DEBUG({
242       llvm::dbgs() << "Next ";
243       dbgToken(Position);
244     });
245     return Tokens[Position];
246   }
247 
248   FormatToken *getPreviousToken() override {
249     return Position > 0 ? Tokens[Position - 1] : nullptr;
250   }
251 
252   FormatToken *peekNextToken() override {
253     int Next = Position + 1;
254     LLVM_DEBUG({
255       llvm::dbgs() << "Peeking ";
256       dbgToken(Next);
257     });
258     return Tokens[Next];
259   }
260 
261   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
262 
263   unsigned getPosition() override {
264     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
265     assert(Position >= 0);
266     return Position;
267   }
268 
269   FormatToken *setPosition(unsigned P) override {
270     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
271     Position = P;
272     return Tokens[Position];
273   }
274 
275   void reset() { Position = -1; }
276 
277 private:
278   void dbgToken(int Position, llvm::StringRef Indent = "") {
279     FormatToken *Tok = Tokens[Position];
280     llvm::dbgs() << Indent << "[" << Position
281                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
282                  << ", Macro: " << !!Tok->MacroCtx << "\n";
283   }
284 
285   ArrayRef<FormatToken *> Tokens;
286   int Position;
287 };
288 
289 } // end anonymous namespace
290 
291 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
292                                          const AdditionalKeywords &Keywords,
293                                          unsigned FirstStartColumn,
294                                          ArrayRef<FormatToken *> Tokens,
295                                          UnwrappedLineConsumer &Callback)
296     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
297       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
298       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
299       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
300       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
301                        ? IG_Rejected
302                        : IG_Inited),
303       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
304 
305 void UnwrappedLineParser::reset() {
306   PPBranchLevel = -1;
307   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
308                      ? IG_Rejected
309                      : IG_Inited;
310   IncludeGuardToken = nullptr;
311   Line.reset(new UnwrappedLine);
312   CommentsBeforeNextToken.clear();
313   FormatTok = nullptr;
314   MustBreakBeforeNextToken = false;
315   PreprocessorDirectives.clear();
316   CurrentLines = &Lines;
317   DeclarationScopeStack.clear();
318   NestedTooDeep.clear();
319   PPStack.clear();
320   Line->FirstStartColumn = FirstStartColumn;
321 }
322 
323 void UnwrappedLineParser::parse() {
324   IndexedTokenSource TokenSource(AllTokens);
325   Line->FirstStartColumn = FirstStartColumn;
326   do {
327     LLVM_DEBUG(llvm::dbgs() << "----\n");
328     reset();
329     Tokens = &TokenSource;
330     TokenSource.reset();
331 
332     readToken();
333     parseFile();
334 
335     // If we found an include guard then all preprocessor directives (other than
336     // the guard) are over-indented by one.
337     if (IncludeGuard == IG_Found)
338       for (auto &Line : Lines)
339         if (Line.InPPDirective && Line.Level > 0)
340           --Line.Level;
341 
342     // Create line with eof token.
343     pushToken(FormatTok);
344     addUnwrappedLine();
345 
346     for (const UnwrappedLine &Line : Lines)
347       Callback.consumeUnwrappedLine(Line);
348 
349     Callback.finishRun();
350     Lines.clear();
351     while (!PPLevelBranchIndex.empty() &&
352            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
353       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
354       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
355     }
356     if (!PPLevelBranchIndex.empty()) {
357       ++PPLevelBranchIndex.back();
358       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
359       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
360     }
361   } while (!PPLevelBranchIndex.empty());
362 }
363 
364 void UnwrappedLineParser::parseFile() {
365   // The top-level context in a file always has declarations, except for pre-
366   // processor directives and JavaScript files.
367   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
368   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
369                                           MustBeDeclaration);
370   if (Style.Language == FormatStyle::LK_TextProto)
371     parseBracedList();
372   else
373     parseLevel(/*HasOpeningBrace=*/false);
374   // Make sure to format the remaining tokens.
375   //
376   // LK_TextProto is special since its top-level is parsed as the body of a
377   // braced list, which does not necessarily have natural line separators such
378   // as a semicolon. Comments after the last entry that have been determined to
379   // not belong to that line, as in:
380   //   key: value
381   //   // endfile comment
382   // do not have a chance to be put on a line of their own until this point.
383   // Here we add this newline before end-of-file comments.
384   if (Style.Language == FormatStyle::LK_TextProto &&
385       !CommentsBeforeNextToken.empty())
386     addUnwrappedLine();
387   flushComments(true);
388   addUnwrappedLine();
389 }
390 
391 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
392   do {
393     switch (FormatTok->Tok.getKind()) {
394     case tok::l_brace:
395       return;
396     default:
397       if (FormatTok->is(Keywords.kw_where)) {
398         addUnwrappedLine();
399         nextToken();
400         parseCSharpGenericTypeConstraint();
401         break;
402       }
403       nextToken();
404       break;
405     }
406   } while (!eof());
407 }
408 
409 void UnwrappedLineParser::parseCSharpAttribute() {
410   int UnpairedSquareBrackets = 1;
411   do {
412     switch (FormatTok->Tok.getKind()) {
413     case tok::r_square:
414       nextToken();
415       --UnpairedSquareBrackets;
416       if (UnpairedSquareBrackets == 0) {
417         addUnwrappedLine();
418         return;
419       }
420       break;
421     case tok::l_square:
422       ++UnpairedSquareBrackets;
423       nextToken();
424       break;
425     default:
426       nextToken();
427       break;
428     }
429   } while (!eof());
430 }
431 
432 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
433   if (!Lines.empty() && Lines.back().InPPDirective)
434     return true;
435 
436   const FormatToken *Previous = Tokens->getPreviousToken();
437   return Previous && Previous->is(tok::comment) &&
438          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
439 }
440 
441 bool UnwrappedLineParser::mightFitOnOneLine() const {
442   const auto ColumnLimit = Style.ColumnLimit;
443   if (ColumnLimit == 0)
444     return true;
445 
446   if (Lines.empty())
447     return true;
448 
449   const auto &PreviousLine = Lines.back();
450   const auto &Tokens = PreviousLine.Tokens;
451   assert(!Tokens.empty());
452   const auto *LastToken = Tokens.back().Tok;
453   assert(LastToken);
454   if (!LastToken->isOneOf(tok::semi, tok::comment))
455     return true;
456 
457   AnnotatedLine Line(PreviousLine);
458   assert(Line.Last == LastToken);
459 
460   TokenAnnotator Annotator(Style, Keywords);
461   Annotator.annotate(Line);
462   Annotator.calculateFormattingInformation(Line);
463 
464   return Line.Level * Style.IndentWidth + LastToken->TotalLength <= ColumnLimit;
465 }
466 
467 // Returns true if a simple block, or false otherwise. (A simple block has a
468 // single statement that fits on a single line.)
469 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) {
470   const bool IsPrecededByCommentOrPPDirective =
471       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
472   unsigned StatementCount = 0;
473   bool SwitchLabelEncountered = false;
474   do {
475     tok::TokenKind kind = FormatTok->Tok.getKind();
476     if (FormatTok->getType() == TT_MacroBlockBegin)
477       kind = tok::l_brace;
478     else if (FormatTok->getType() == TT_MacroBlockEnd)
479       kind = tok::r_brace;
480 
481     switch (kind) {
482     case tok::comment:
483       nextToken();
484       addUnwrappedLine();
485       break;
486     case tok::l_brace:
487       // FIXME: Add parameter whether this can happen - if this happens, we must
488       // be in a non-declaration context.
489       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
490         continue;
491       parseBlock();
492       ++StatementCount;
493       assert(StatementCount > 0 && "StatementCount overflow!");
494       addUnwrappedLine();
495       break;
496     case tok::r_brace:
497       if (HasOpeningBrace) {
498         if (!Style.RemoveBracesLLVM)
499           return false;
500         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 ||
501             IsPrecededByCommentOrPPDirective ||
502             precededByCommentOrPPDirective())
503           return false;
504         const FormatToken *Next = Tokens->peekNextToken();
505         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
506           return false;
507         return mightFitOnOneLine();
508       }
509       nextToken();
510       addUnwrappedLine();
511       break;
512     case tok::kw_default: {
513       unsigned StoredPosition = Tokens->getPosition();
514       FormatToken *Next;
515       do {
516         Next = Tokens->getNextToken();
517       } while (Next->is(tok::comment));
518       FormatTok = Tokens->setPosition(StoredPosition);
519       if (Next && Next->isNot(tok::colon)) {
520         // default not followed by ':' is not a case label; treat it like
521         // an identifier.
522         parseStructuralElement();
523         break;
524       }
525       // Else, if it is 'default:', fall through to the case handling.
526       LLVM_FALLTHROUGH;
527     }
528     case tok::kw_case:
529       if (Style.isJavaScript() && Line->MustBeDeclaration) {
530         // A 'case: string' style field declaration.
531         parseStructuralElement();
532         break;
533       }
534       if (!SwitchLabelEncountered &&
535           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
536         ++Line->Level;
537       SwitchLabelEncountered = true;
538       parseStructuralElement();
539       break;
540     case tok::l_square:
541       if (Style.isCSharp()) {
542         nextToken();
543         parseCSharpAttribute();
544         break;
545       }
546       LLVM_FALLTHROUGH;
547     default:
548       parseStructuralElement(IfKind, !HasOpeningBrace);
549       ++StatementCount;
550       assert(StatementCount > 0 && "StatementCount overflow!");
551       break;
552     }
553   } while (!eof());
554   return false;
555 }
556 
557 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
558   // We'll parse forward through the tokens until we hit
559   // a closing brace or eof - note that getNextToken() will
560   // parse macros, so this will magically work inside macro
561   // definitions, too.
562   unsigned StoredPosition = Tokens->getPosition();
563   FormatToken *Tok = FormatTok;
564   const FormatToken *PrevTok = Tok->Previous;
565   // Keep a stack of positions of lbrace tokens. We will
566   // update information about whether an lbrace starts a
567   // braced init list or a different block during the loop.
568   SmallVector<FormatToken *, 8> LBraceStack;
569   assert(Tok->Tok.is(tok::l_brace));
570   do {
571     // Get next non-comment token.
572     FormatToken *NextTok;
573     unsigned ReadTokens = 0;
574     do {
575       NextTok = Tokens->getNextToken();
576       ++ReadTokens;
577     } while (NextTok->is(tok::comment));
578 
579     switch (Tok->Tok.getKind()) {
580     case tok::l_brace:
581       if (Style.isJavaScript() && PrevTok) {
582         if (PrevTok->isOneOf(tok::colon, tok::less))
583           // A ':' indicates this code is in a type, or a braced list
584           // following a label in an object literal ({a: {b: 1}}).
585           // A '<' could be an object used in a comparison, but that is nonsense
586           // code (can never return true), so more likely it is a generic type
587           // argument (`X<{a: string; b: number}>`).
588           // The code below could be confused by semicolons between the
589           // individual members in a type member list, which would normally
590           // trigger BK_Block. In both cases, this must be parsed as an inline
591           // braced init.
592           Tok->setBlockKind(BK_BracedInit);
593         else if (PrevTok->is(tok::r_paren))
594           // `) { }` can only occur in function or method declarations in JS.
595           Tok->setBlockKind(BK_Block);
596       } else {
597         Tok->setBlockKind(BK_Unknown);
598       }
599       LBraceStack.push_back(Tok);
600       break;
601     case tok::r_brace:
602       if (LBraceStack.empty())
603         break;
604       if (LBraceStack.back()->is(BK_Unknown)) {
605         bool ProbablyBracedList = false;
606         if (Style.Language == FormatStyle::LK_Proto) {
607           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
608         } else {
609           // Skip NextTok over preprocessor lines, otherwise we may not
610           // properly diagnose the block as a braced intializer
611           // if the comma separator appears after the pp directive.
612           while (NextTok->is(tok::hash)) {
613             ScopedMacroState MacroState(*Line, Tokens, NextTok);
614             do {
615               NextTok = Tokens->getNextToken();
616               ++ReadTokens;
617             } while (NextTok->isNot(tok::eof));
618           }
619 
620           // Using OriginalColumn to distinguish between ObjC methods and
621           // binary operators is a bit hacky.
622           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
623                                   NextTok->OriginalColumn == 0;
624 
625           // If there is a comma, semicolon or right paren after the closing
626           // brace, we assume this is a braced initializer list.  Note that
627           // regardless how we mark inner braces here, we will overwrite the
628           // BlockKind later if we parse a braced list (where all blocks
629           // inside are by default braced lists), or when we explicitly detect
630           // blocks (for example while parsing lambdas).
631           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
632           // braced list in JS.
633           ProbablyBracedList =
634               (Style.isJavaScript() &&
635                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
636                                 Keywords.kw_as)) ||
637               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
638               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
639                                tok::r_paren, tok::r_square, tok::l_brace,
640                                tok::ellipsis) ||
641               (NextTok->is(tok::identifier) &&
642                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
643               (NextTok->is(tok::semi) &&
644                (!ExpectClassBody || LBraceStack.size() != 1)) ||
645               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
646           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
647             // We can have an array subscript after a braced init
648             // list, but C++11 attributes are expected after blocks.
649             NextTok = Tokens->getNextToken();
650             ++ReadTokens;
651             ProbablyBracedList = NextTok->isNot(tok::l_square);
652           }
653         }
654         if (ProbablyBracedList) {
655           Tok->setBlockKind(BK_BracedInit);
656           LBraceStack.back()->setBlockKind(BK_BracedInit);
657         } else {
658           Tok->setBlockKind(BK_Block);
659           LBraceStack.back()->setBlockKind(BK_Block);
660         }
661       }
662       LBraceStack.pop_back();
663       break;
664     case tok::identifier:
665       if (!Tok->is(TT_StatementMacro))
666         break;
667       LLVM_FALLTHROUGH;
668     case tok::at:
669     case tok::semi:
670     case tok::kw_if:
671     case tok::kw_while:
672     case tok::kw_for:
673     case tok::kw_switch:
674     case tok::kw_try:
675     case tok::kw___try:
676       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
677         LBraceStack.back()->setBlockKind(BK_Block);
678       break;
679     default:
680       break;
681     }
682     PrevTok = Tok;
683     Tok = NextTok;
684   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
685 
686   // Assume other blocks for all unclosed opening braces.
687   for (FormatToken *LBrace : LBraceStack)
688     if (LBrace->is(BK_Unknown))
689       LBrace->setBlockKind(BK_Block);
690 
691   FormatTok = Tokens->setPosition(StoredPosition);
692 }
693 
694 template <class T>
695 static inline void hash_combine(std::size_t &seed, const T &v) {
696   std::hash<T> hasher;
697   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
698 }
699 
700 size_t UnwrappedLineParser::computePPHash() const {
701   size_t h = 0;
702   for (const auto &i : PPStack) {
703     hash_combine(h, size_t(i.Kind));
704     hash_combine(h, i.Line);
705   }
706   return h;
707 }
708 
709 UnwrappedLineParser::IfStmtKind
710 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
711                                 bool MunchSemi,
712                                 bool UnindentWhitesmithsBraces) {
713   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
714          "'{' or macro block token expected");
715   FormatToken *Tok = FormatTok;
716   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
717   FormatTok->setBlockKind(BK_Block);
718 
719   // For Whitesmiths mode, jump to the next level prior to skipping over the
720   // braces.
721   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
722     ++Line->Level;
723 
724   size_t PPStartHash = computePPHash();
725 
726   unsigned InitialLevel = Line->Level;
727   nextToken(/*LevelDifference=*/AddLevels);
728 
729   if (MacroBlock && FormatTok->is(tok::l_paren))
730     parseParens();
731 
732   size_t NbPreprocessorDirectives =
733       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
734   addUnwrappedLine();
735   size_t OpeningLineIndex =
736       CurrentLines->empty()
737           ? (UnwrappedLine::kInvalidIndex)
738           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
739 
740   // Whitesmiths is weird here. The brace needs to be indented for the namespace
741   // block, but the block itself may not be indented depending on the style
742   // settings. This allows the format to back up one level in those cases.
743   if (UnindentWhitesmithsBraces)
744     --Line->Level;
745 
746   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
747                                           MustBeDeclaration);
748   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
749     Line->Level += AddLevels;
750 
751   IfStmtKind IfKind = IfStmtKind::NotIf;
752   const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind);
753 
754   if (eof())
755     return IfKind;
756 
757   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
758                  : !FormatTok->is(tok::r_brace)) {
759     Line->Level = InitialLevel;
760     FormatTok->setBlockKind(BK_Block);
761     return IfKind;
762   }
763 
764   if (SimpleBlock && Tok->is(tok::l_brace)) {
765     assert(FormatTok->is(tok::r_brace));
766     const FormatToken *Previous = Tokens->getPreviousToken();
767     assert(Previous);
768     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
769       Tok->MatchingParen = FormatTok;
770       FormatTok->MatchingParen = Tok;
771     }
772   }
773 
774   size_t PPEndHash = computePPHash();
775 
776   // Munch the closing brace.
777   nextToken(/*LevelDifference=*/-AddLevels);
778 
779   if (MacroBlock && FormatTok->is(tok::l_paren))
780     parseParens();
781 
782   if (FormatTok->is(tok::arrow)) {
783     // Following the } we can find a trailing return type arrow
784     // as part of an implicit conversion constraint.
785     nextToken();
786     parseStructuralElement();
787   }
788 
789   if (MunchSemi && FormatTok->Tok.is(tok::semi))
790     nextToken();
791 
792   Line->Level = InitialLevel;
793 
794   if (PPStartHash == PPEndHash) {
795     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
796     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
797       // Update the opening line to add the forward reference as well
798       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
799           CurrentLines->size() - 1;
800     }
801   }
802 
803   return IfKind;
804 }
805 
806 static bool isGoogScope(const UnwrappedLine &Line) {
807   // FIXME: Closure-library specific stuff should not be hard-coded but be
808   // configurable.
809   if (Line.Tokens.size() < 4)
810     return false;
811   auto I = Line.Tokens.begin();
812   if (I->Tok->TokenText != "goog")
813     return false;
814   ++I;
815   if (I->Tok->isNot(tok::period))
816     return false;
817   ++I;
818   if (I->Tok->TokenText != "scope")
819     return false;
820   ++I;
821   return I->Tok->is(tok::l_paren);
822 }
823 
824 static bool isIIFE(const UnwrappedLine &Line,
825                    const AdditionalKeywords &Keywords) {
826   // Look for the start of an immediately invoked anonymous function.
827   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
828   // This is commonly done in JavaScript to create a new, anonymous scope.
829   // Example: (function() { ... })()
830   if (Line.Tokens.size() < 3)
831     return false;
832   auto I = Line.Tokens.begin();
833   if (I->Tok->isNot(tok::l_paren))
834     return false;
835   ++I;
836   if (I->Tok->isNot(Keywords.kw_function))
837     return false;
838   ++I;
839   return I->Tok->is(tok::l_paren);
840 }
841 
842 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
843                                    const FormatToken &InitialToken) {
844   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
845     return Style.BraceWrapping.AfterNamespace;
846   if (InitialToken.is(tok::kw_class))
847     return Style.BraceWrapping.AfterClass;
848   if (InitialToken.is(tok::kw_union))
849     return Style.BraceWrapping.AfterUnion;
850   if (InitialToken.is(tok::kw_struct))
851     return Style.BraceWrapping.AfterStruct;
852   if (InitialToken.is(tok::kw_enum))
853     return Style.BraceWrapping.AfterEnum;
854   return false;
855 }
856 
857 void UnwrappedLineParser::parseChildBlock() {
858   FormatTok->setBlockKind(BK_Block);
859   nextToken();
860   {
861     bool SkipIndent = (Style.isJavaScript() &&
862                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
863     ScopedLineState LineState(*this);
864     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
865                                             /*MustBeDeclaration=*/false);
866     Line->Level += SkipIndent ? 0 : 1;
867     parseLevel(/*HasOpeningBrace=*/true);
868     flushComments(isOnNewLine(*FormatTok));
869     Line->Level -= SkipIndent ? 0 : 1;
870   }
871   nextToken();
872 }
873 
874 void UnwrappedLineParser::parsePPDirective() {
875   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
876   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
877 
878   nextToken();
879 
880   if (!FormatTok->Tok.getIdentifierInfo()) {
881     parsePPUnknown();
882     return;
883   }
884 
885   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
886   case tok::pp_define:
887     parsePPDefine();
888     return;
889   case tok::pp_if:
890     parsePPIf(/*IfDef=*/false);
891     break;
892   case tok::pp_ifdef:
893   case tok::pp_ifndef:
894     parsePPIf(/*IfDef=*/true);
895     break;
896   case tok::pp_else:
897     parsePPElse();
898     break;
899   case tok::pp_elifdef:
900   case tok::pp_elifndef:
901   case tok::pp_elif:
902     parsePPElIf();
903     break;
904   case tok::pp_endif:
905     parsePPEndIf();
906     break;
907   default:
908     parsePPUnknown();
909     break;
910   }
911 }
912 
913 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
914   size_t Line = CurrentLines->size();
915   if (CurrentLines == &PreprocessorDirectives)
916     Line += Lines.size();
917 
918   if (Unreachable ||
919       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
920     PPStack.push_back({PP_Unreachable, Line});
921   else
922     PPStack.push_back({PP_Conditional, Line});
923 }
924 
925 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
926   ++PPBranchLevel;
927   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
928   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
929     PPLevelBranchIndex.push_back(0);
930     PPLevelBranchCount.push_back(0);
931   }
932   PPChainBranchIndex.push(0);
933   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
934   conditionalCompilationCondition(Unreachable || Skip);
935 }
936 
937 void UnwrappedLineParser::conditionalCompilationAlternative() {
938   if (!PPStack.empty())
939     PPStack.pop_back();
940   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
941   if (!PPChainBranchIndex.empty())
942     ++PPChainBranchIndex.top();
943   conditionalCompilationCondition(
944       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
945       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
946 }
947 
948 void UnwrappedLineParser::conditionalCompilationEnd() {
949   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
950   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
951     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
952       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
953   }
954   // Guard against #endif's without #if.
955   if (PPBranchLevel > -1)
956     --PPBranchLevel;
957   if (!PPChainBranchIndex.empty())
958     PPChainBranchIndex.pop();
959   if (!PPStack.empty())
960     PPStack.pop_back();
961 }
962 
963 void UnwrappedLineParser::parsePPIf(bool IfDef) {
964   bool IfNDef = FormatTok->is(tok::pp_ifndef);
965   nextToken();
966   bool Unreachable = false;
967   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
968     Unreachable = true;
969   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
970     Unreachable = true;
971   conditionalCompilationStart(Unreachable);
972   FormatToken *IfCondition = FormatTok;
973   // If there's a #ifndef on the first line, and the only lines before it are
974   // comments, it could be an include guard.
975   bool MaybeIncludeGuard = IfNDef;
976   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
977     for (auto &Line : Lines) {
978       if (!Line.Tokens.front().Tok->is(tok::comment)) {
979         MaybeIncludeGuard = false;
980         IncludeGuard = IG_Rejected;
981         break;
982       }
983     }
984   --PPBranchLevel;
985   parsePPUnknown();
986   ++PPBranchLevel;
987   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
988     IncludeGuard = IG_IfNdefed;
989     IncludeGuardToken = IfCondition;
990   }
991 }
992 
993 void UnwrappedLineParser::parsePPElse() {
994   // If a potential include guard has an #else, it's not an include guard.
995   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
996     IncludeGuard = IG_Rejected;
997   conditionalCompilationAlternative();
998   if (PPBranchLevel > -1)
999     --PPBranchLevel;
1000   parsePPUnknown();
1001   ++PPBranchLevel;
1002 }
1003 
1004 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1005 
1006 void UnwrappedLineParser::parsePPEndIf() {
1007   conditionalCompilationEnd();
1008   parsePPUnknown();
1009   // If the #endif of a potential include guard is the last thing in the file,
1010   // then we found an include guard.
1011   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1012       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1013     IncludeGuard = IG_Found;
1014 }
1015 
1016 void UnwrappedLineParser::parsePPDefine() {
1017   nextToken();
1018 
1019   if (!FormatTok->Tok.getIdentifierInfo()) {
1020     IncludeGuard = IG_Rejected;
1021     IncludeGuardToken = nullptr;
1022     parsePPUnknown();
1023     return;
1024   }
1025 
1026   if (IncludeGuard == IG_IfNdefed &&
1027       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1028     IncludeGuard = IG_Defined;
1029     IncludeGuardToken = nullptr;
1030     for (auto &Line : Lines) {
1031       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1032         IncludeGuard = IG_Rejected;
1033         break;
1034       }
1035     }
1036   }
1037 
1038   nextToken();
1039   if (FormatTok->Tok.getKind() == tok::l_paren &&
1040       !FormatTok->hasWhitespaceBefore())
1041     parseParens();
1042   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1043     Line->Level += PPBranchLevel + 1;
1044   addUnwrappedLine();
1045   ++Line->Level;
1046 
1047   // Errors during a preprocessor directive can only affect the layout of the
1048   // preprocessor directive, and thus we ignore them. An alternative approach
1049   // would be to use the same approach we use on the file level (no
1050   // re-indentation if there was a structural error) within the macro
1051   // definition.
1052   parseFile();
1053 }
1054 
1055 void UnwrappedLineParser::parsePPUnknown() {
1056   do {
1057     nextToken();
1058   } while (!eof());
1059   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1060     Line->Level += PPBranchLevel + 1;
1061   addUnwrappedLine();
1062 }
1063 
1064 // Here we exclude certain tokens that are not usually the first token in an
1065 // unwrapped line. This is used in attempt to distinguish macro calls without
1066 // trailing semicolons from other constructs split to several lines.
1067 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1068   // Semicolon can be a null-statement, l_square can be a start of a macro or
1069   // a C++11 attribute, but this doesn't seem to be common.
1070   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1071          Tok.isNot(TT_AttributeSquare) &&
1072          // Tokens that can only be used as binary operators and a part of
1073          // overloaded operator names.
1074          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1075          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1076          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1077          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1078          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1079          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1080          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1081          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1082          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1083          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1084          Tok.isNot(tok::lesslessequal) &&
1085          // Colon is used in labels, base class lists, initializer lists,
1086          // range-based for loops, ternary operator, but should never be the
1087          // first token in an unwrapped line.
1088          Tok.isNot(tok::colon) &&
1089          // 'noexcept' is a trailing annotation.
1090          Tok.isNot(tok::kw_noexcept);
1091 }
1092 
1093 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1094                           const FormatToken *FormatTok) {
1095   // FIXME: This returns true for C/C++ keywords like 'struct'.
1096   return FormatTok->is(tok::identifier) &&
1097          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1098           !FormatTok->isOneOf(
1099               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1100               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1101               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1102               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1103               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1104               Keywords.kw_instanceof, Keywords.kw_interface,
1105               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1106 }
1107 
1108 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1109                                  const FormatToken *FormatTok) {
1110   return FormatTok->Tok.isLiteral() ||
1111          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1112          mustBeJSIdent(Keywords, FormatTok);
1113 }
1114 
1115 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1116 // when encountered after a value (see mustBeJSIdentOrValue).
1117 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1118                            const FormatToken *FormatTok) {
1119   return FormatTok->isOneOf(
1120       tok::kw_return, Keywords.kw_yield,
1121       // conditionals
1122       tok::kw_if, tok::kw_else,
1123       // loops
1124       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1125       // switch/case
1126       tok::kw_switch, tok::kw_case,
1127       // exceptions
1128       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1129       // declaration
1130       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1131       Keywords.kw_async, Keywords.kw_function,
1132       // import/export
1133       Keywords.kw_import, tok::kw_export);
1134 }
1135 
1136 // Checks whether a token is a type in K&R C (aka C78).
1137 static bool isC78Type(const FormatToken &Tok) {
1138   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1139                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1140                      tok::identifier);
1141 }
1142 
1143 // This function checks whether a token starts the first parameter declaration
1144 // in a K&R C (aka C78) function definition, e.g.:
1145 //   int f(a, b)
1146 //   short a, b;
1147 //   {
1148 //      return a + b;
1149 //   }
1150 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1151                                const FormatToken *FuncName) {
1152   assert(Tok);
1153   assert(Next);
1154   assert(FuncName);
1155 
1156   if (FuncName->isNot(tok::identifier))
1157     return false;
1158 
1159   const FormatToken *Prev = FuncName->Previous;
1160   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1161     return false;
1162 
1163   if (!isC78Type(*Tok) &&
1164       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1165     return false;
1166 
1167   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1168     return false;
1169 
1170   Tok = Tok->Previous;
1171   if (!Tok || Tok->isNot(tok::r_paren))
1172     return false;
1173 
1174   Tok = Tok->Previous;
1175   if (!Tok || Tok->isNot(tok::identifier))
1176     return false;
1177 
1178   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1179 }
1180 
1181 void UnwrappedLineParser::parseModuleImport() {
1182   nextToken();
1183   while (!eof()) {
1184     if (FormatTok->is(tok::colon)) {
1185       FormatTok->setType(TT_ModulePartitionColon);
1186     }
1187     // Handle import <foo/bar.h> as we would an include statement.
1188     else if (FormatTok->is(tok::less)) {
1189       nextToken();
1190       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1191         // Mark tokens up to the trailing line comments as implicit string
1192         // literals.
1193         if (FormatTok->isNot(tok::comment) &&
1194             !FormatTok->TokenText.startswith("//"))
1195           FormatTok->setType(TT_ImplicitStringLiteral);
1196         nextToken();
1197       }
1198     }
1199     if (FormatTok->is(tok::semi)) {
1200       nextToken();
1201       break;
1202     }
1203     nextToken();
1204   }
1205 
1206   addUnwrappedLine();
1207 }
1208 
1209 // readTokenWithJavaScriptASI reads the next token and terminates the current
1210 // line if JavaScript Automatic Semicolon Insertion must
1211 // happen between the current token and the next token.
1212 //
1213 // This method is conservative - it cannot cover all edge cases of JavaScript,
1214 // but only aims to correctly handle certain well known cases. It *must not*
1215 // return true in speculative cases.
1216 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1217   FormatToken *Previous = FormatTok;
1218   readToken();
1219   FormatToken *Next = FormatTok;
1220 
1221   bool IsOnSameLine =
1222       CommentsBeforeNextToken.empty()
1223           ? Next->NewlinesBefore == 0
1224           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1225   if (IsOnSameLine)
1226     return;
1227 
1228   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1229   bool PreviousStartsTemplateExpr =
1230       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1231   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1232     // If the line contains an '@' sign, the previous token might be an
1233     // annotation, which can precede another identifier/value.
1234     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1235       return LineNode.Tok->is(tok::at);
1236     });
1237     if (HasAt)
1238       return;
1239   }
1240   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1241     return addUnwrappedLine();
1242   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1243   bool NextEndsTemplateExpr =
1244       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1245   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1246       (PreviousMustBeValue ||
1247        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1248                          tok::minusminus)))
1249     return addUnwrappedLine();
1250   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1251       isJSDeclOrStmt(Keywords, Next))
1252     return addUnwrappedLine();
1253 }
1254 
1255 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1256                                                  bool IsTopLevel) {
1257   if (Style.Language == FormatStyle::LK_TableGen &&
1258       FormatTok->is(tok::pp_include)) {
1259     nextToken();
1260     if (FormatTok->is(tok::string_literal))
1261       nextToken();
1262     addUnwrappedLine();
1263     return;
1264   }
1265   switch (FormatTok->Tok.getKind()) {
1266   case tok::kw_asm:
1267     nextToken();
1268     if (FormatTok->is(tok::l_brace)) {
1269       FormatTok->setType(TT_InlineASMBrace);
1270       nextToken();
1271       while (FormatTok && FormatTok->isNot(tok::eof)) {
1272         if (FormatTok->is(tok::r_brace)) {
1273           FormatTok->setType(TT_InlineASMBrace);
1274           nextToken();
1275           addUnwrappedLine();
1276           break;
1277         }
1278         FormatTok->Finalized = true;
1279         nextToken();
1280       }
1281     }
1282     break;
1283   case tok::kw_namespace:
1284     parseNamespace();
1285     return;
1286   case tok::kw_public:
1287   case tok::kw_protected:
1288   case tok::kw_private:
1289     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1290         Style.isCSharp())
1291       nextToken();
1292     else
1293       parseAccessSpecifier();
1294     return;
1295   case tok::kw_if:
1296     if (Style.isJavaScript() && Line->MustBeDeclaration)
1297       // field/method declaration.
1298       break;
1299     parseIfThenElse(IfKind);
1300     return;
1301   case tok::kw_for:
1302   case tok::kw_while:
1303     if (Style.isJavaScript() && Line->MustBeDeclaration)
1304       // field/method declaration.
1305       break;
1306     parseForOrWhileLoop();
1307     return;
1308   case tok::kw_do:
1309     if (Style.isJavaScript() && Line->MustBeDeclaration)
1310       // field/method declaration.
1311       break;
1312     parseDoWhile();
1313     return;
1314   case tok::kw_switch:
1315     if (Style.isJavaScript() && Line->MustBeDeclaration)
1316       // 'switch: string' field declaration.
1317       break;
1318     parseSwitch();
1319     return;
1320   case tok::kw_default:
1321     if (Style.isJavaScript() && Line->MustBeDeclaration)
1322       // 'default: string' field declaration.
1323       break;
1324     nextToken();
1325     if (FormatTok->is(tok::colon)) {
1326       parseLabel();
1327       return;
1328     }
1329     // e.g. "default void f() {}" in a Java interface.
1330     break;
1331   case tok::kw_case:
1332     if (Style.isJavaScript() && Line->MustBeDeclaration)
1333       // 'case: string' field declaration.
1334       break;
1335     parseCaseLabel();
1336     return;
1337   case tok::kw_try:
1338   case tok::kw___try:
1339     if (Style.isJavaScript() && Line->MustBeDeclaration)
1340       // field/method declaration.
1341       break;
1342     parseTryCatch();
1343     return;
1344   case tok::kw_extern:
1345     nextToken();
1346     if (FormatTok->Tok.is(tok::string_literal)) {
1347       nextToken();
1348       if (FormatTok->Tok.is(tok::l_brace)) {
1349         if (Style.BraceWrapping.AfterExternBlock)
1350           addUnwrappedLine();
1351         // Either we indent or for backwards compatibility we follow the
1352         // AfterExternBlock style.
1353         unsigned AddLevels =
1354             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1355                     (Style.BraceWrapping.AfterExternBlock &&
1356                      Style.IndentExternBlock ==
1357                          FormatStyle::IEBS_AfterExternBlock)
1358                 ? 1u
1359                 : 0u;
1360         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1361         addUnwrappedLine();
1362         return;
1363       }
1364     }
1365     break;
1366   case tok::kw_export:
1367     if (Style.isJavaScript()) {
1368       parseJavaScriptEs6ImportExport();
1369       return;
1370     }
1371     if (!Style.isCpp())
1372       break;
1373     // Handle C++ "(inline|export) namespace".
1374     LLVM_FALLTHROUGH;
1375   case tok::kw_inline:
1376     nextToken();
1377     if (FormatTok->Tok.is(tok::kw_namespace)) {
1378       parseNamespace();
1379       return;
1380     }
1381     break;
1382   case tok::identifier:
1383     if (FormatTok->is(TT_ForEachMacro)) {
1384       parseForOrWhileLoop();
1385       return;
1386     }
1387     if (FormatTok->is(TT_MacroBlockBegin)) {
1388       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1389                  /*MunchSemi=*/false);
1390       return;
1391     }
1392     if (FormatTok->is(Keywords.kw_import)) {
1393       if (Style.isJavaScript()) {
1394         parseJavaScriptEs6ImportExport();
1395         return;
1396       }
1397       if (Style.Language == FormatStyle::LK_Proto) {
1398         nextToken();
1399         if (FormatTok->is(tok::kw_public))
1400           nextToken();
1401         if (!FormatTok->is(tok::string_literal))
1402           return;
1403         nextToken();
1404         if (FormatTok->is(tok::semi))
1405           nextToken();
1406         addUnwrappedLine();
1407         return;
1408       }
1409       if (Style.isCpp()) {
1410         parseModuleImport();
1411         return;
1412       }
1413     }
1414     if (Style.isCpp() &&
1415         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1416                            Keywords.kw_slots, Keywords.kw_qslots)) {
1417       nextToken();
1418       if (FormatTok->is(tok::colon)) {
1419         nextToken();
1420         addUnwrappedLine();
1421         return;
1422       }
1423     }
1424     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1425       parseStatementMacro();
1426       return;
1427     }
1428     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1429       parseNamespace();
1430       return;
1431     }
1432     // In all other cases, parse the declaration.
1433     break;
1434   default:
1435     break;
1436   }
1437   do {
1438     const FormatToken *Previous = FormatTok->Previous;
1439     switch (FormatTok->Tok.getKind()) {
1440     case tok::at:
1441       nextToken();
1442       if (FormatTok->Tok.is(tok::l_brace)) {
1443         nextToken();
1444         parseBracedList();
1445         break;
1446       } else if (Style.Language == FormatStyle::LK_Java &&
1447                  FormatTok->is(Keywords.kw_interface)) {
1448         nextToken();
1449         break;
1450       }
1451       switch (FormatTok->Tok.getObjCKeywordID()) {
1452       case tok::objc_public:
1453       case tok::objc_protected:
1454       case tok::objc_package:
1455       case tok::objc_private:
1456         return parseAccessSpecifier();
1457       case tok::objc_interface:
1458       case tok::objc_implementation:
1459         return parseObjCInterfaceOrImplementation();
1460       case tok::objc_protocol:
1461         if (parseObjCProtocol())
1462           return;
1463         break;
1464       case tok::objc_end:
1465         return; // Handled by the caller.
1466       case tok::objc_optional:
1467       case tok::objc_required:
1468         nextToken();
1469         addUnwrappedLine();
1470         return;
1471       case tok::objc_autoreleasepool:
1472         nextToken();
1473         if (FormatTok->Tok.is(tok::l_brace)) {
1474           if (Style.BraceWrapping.AfterControlStatement ==
1475               FormatStyle::BWACS_Always)
1476             addUnwrappedLine();
1477           parseBlock();
1478         }
1479         addUnwrappedLine();
1480         return;
1481       case tok::objc_synchronized:
1482         nextToken();
1483         if (FormatTok->Tok.is(tok::l_paren))
1484           // Skip synchronization object
1485           parseParens();
1486         if (FormatTok->Tok.is(tok::l_brace)) {
1487           if (Style.BraceWrapping.AfterControlStatement ==
1488               FormatStyle::BWACS_Always)
1489             addUnwrappedLine();
1490           parseBlock();
1491         }
1492         addUnwrappedLine();
1493         return;
1494       case tok::objc_try:
1495         // This branch isn't strictly necessary (the kw_try case below would
1496         // do this too after the tok::at is parsed above).  But be explicit.
1497         parseTryCatch();
1498         return;
1499       default:
1500         break;
1501       }
1502       break;
1503     case tok::kw_concept:
1504       parseConcept();
1505       return;
1506     case tok::kw_requires:
1507       parseRequires();
1508       return;
1509     case tok::kw_enum:
1510       // Ignore if this is part of "template <enum ...".
1511       if (Previous && Previous->is(tok::less)) {
1512         nextToken();
1513         break;
1514       }
1515 
1516       // parseEnum falls through and does not yet add an unwrapped line as an
1517       // enum definition can start a structural element.
1518       if (!parseEnum())
1519         break;
1520       // This only applies for C++.
1521       if (!Style.isCpp()) {
1522         addUnwrappedLine();
1523         return;
1524       }
1525       break;
1526     case tok::kw_typedef:
1527       nextToken();
1528       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1529                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1530                              Keywords.kw_CF_CLOSED_ENUM,
1531                              Keywords.kw_NS_CLOSED_ENUM))
1532         parseEnum();
1533       break;
1534     case tok::kw_struct:
1535     case tok::kw_union:
1536     case tok::kw_class:
1537       if (parseStructLike())
1538         return;
1539       break;
1540     case tok::period:
1541       nextToken();
1542       // In Java, classes have an implicit static member "class".
1543       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1544           FormatTok->is(tok::kw_class))
1545         nextToken();
1546       if (Style.isJavaScript() && FormatTok &&
1547           FormatTok->Tok.getIdentifierInfo())
1548         // JavaScript only has pseudo keywords, all keywords are allowed to
1549         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1550         nextToken();
1551       break;
1552     case tok::semi:
1553       nextToken();
1554       addUnwrappedLine();
1555       return;
1556     case tok::r_brace:
1557       addUnwrappedLine();
1558       return;
1559     case tok::l_paren: {
1560       parseParens();
1561       // Break the unwrapped line if a K&R C function definition has a parameter
1562       // declaration.
1563       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1564         break;
1565       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1566         addUnwrappedLine();
1567         return;
1568       }
1569       break;
1570     }
1571     case tok::kw_operator:
1572       nextToken();
1573       if (FormatTok->isBinaryOperator())
1574         nextToken();
1575       break;
1576     case tok::caret:
1577       nextToken();
1578       if (FormatTok->Tok.isAnyIdentifier() ||
1579           FormatTok->isSimpleTypeSpecifier())
1580         nextToken();
1581       if (FormatTok->is(tok::l_paren))
1582         parseParens();
1583       if (FormatTok->is(tok::l_brace))
1584         parseChildBlock();
1585       break;
1586     case tok::l_brace:
1587       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1588         // A block outside of parentheses must be the last part of a
1589         // structural element.
1590         // FIXME: Figure out cases where this is not true, and add projections
1591         // for them (the one we know is missing are lambdas).
1592         if (Style.Language == FormatStyle::LK_Java &&
1593             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1594           // If necessary, we could set the type to something different than
1595           // TT_FunctionLBrace.
1596           if (Style.BraceWrapping.AfterControlStatement ==
1597               FormatStyle::BWACS_Always)
1598             addUnwrappedLine();
1599         } else if (Style.BraceWrapping.AfterFunction) {
1600           addUnwrappedLine();
1601         }
1602         FormatTok->setType(TT_FunctionLBrace);
1603         parseBlock();
1604         addUnwrappedLine();
1605         return;
1606       }
1607       // Otherwise this was a braced init list, and the structural
1608       // element continues.
1609       break;
1610     case tok::kw_try:
1611       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1612         // field/method declaration.
1613         nextToken();
1614         break;
1615       }
1616       // We arrive here when parsing function-try blocks.
1617       if (Style.BraceWrapping.AfterFunction)
1618         addUnwrappedLine();
1619       parseTryCatch();
1620       return;
1621     case tok::identifier: {
1622       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1623           Line->MustBeDeclaration) {
1624         addUnwrappedLine();
1625         parseCSharpGenericTypeConstraint();
1626         break;
1627       }
1628       if (FormatTok->is(TT_MacroBlockEnd)) {
1629         addUnwrappedLine();
1630         return;
1631       }
1632 
1633       // Function declarations (as opposed to function expressions) are parsed
1634       // on their own unwrapped line by continuing this loop. Function
1635       // expressions (functions that are not on their own line) must not create
1636       // a new unwrapped line, so they are special cased below.
1637       size_t TokenCount = Line->Tokens.size();
1638       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1639           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1640                                                      Keywords.kw_async)))) {
1641         tryToParseJSFunction();
1642         break;
1643       }
1644       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1645           FormatTok->is(Keywords.kw_interface)) {
1646         if (Style.isJavaScript()) {
1647           // In JavaScript/TypeScript, "interface" can be used as a standalone
1648           // identifier, e.g. in `var interface = 1;`. If "interface" is
1649           // followed by another identifier, it is very like to be an actual
1650           // interface declaration.
1651           unsigned StoredPosition = Tokens->getPosition();
1652           FormatToken *Next = Tokens->getNextToken();
1653           FormatTok = Tokens->setPosition(StoredPosition);
1654           if (!mustBeJSIdent(Keywords, Next)) {
1655             nextToken();
1656             break;
1657           }
1658         }
1659         parseRecord();
1660         addUnwrappedLine();
1661         return;
1662       }
1663 
1664       if (FormatTok->is(Keywords.kw_interface)) {
1665         if (parseStructLike())
1666           return;
1667         break;
1668       }
1669 
1670       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1671         parseStatementMacro();
1672         return;
1673       }
1674 
1675       // See if the following token should start a new unwrapped line.
1676       StringRef Text = FormatTok->TokenText;
1677 
1678       FormatToken *PreviousToken = FormatTok;
1679       nextToken();
1680 
1681       // JS doesn't have macros, and within classes colons indicate fields, not
1682       // labels.
1683       if (Style.isJavaScript())
1684         break;
1685 
1686       TokenCount = Line->Tokens.size();
1687       if (TokenCount == 1 ||
1688           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1689         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1690           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1691           parseLabel(!Style.IndentGotoLabels);
1692           return;
1693         }
1694         // Recognize function-like macro usages without trailing semicolon as
1695         // well as free-standing macros like Q_OBJECT.
1696         bool FunctionLike = FormatTok->is(tok::l_paren);
1697         if (FunctionLike)
1698           parseParens();
1699 
1700         bool FollowedByNewline =
1701             CommentsBeforeNextToken.empty()
1702                 ? FormatTok->NewlinesBefore > 0
1703                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1704 
1705         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1706             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1707           PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro);
1708           addUnwrappedLine();
1709           return;
1710         }
1711       }
1712       break;
1713     }
1714     case tok::equal:
1715       if ((Style.isJavaScript() || Style.isCSharp()) &&
1716           FormatTok->is(TT_FatArrow)) {
1717         tryToParseChildBlock();
1718         break;
1719       }
1720 
1721       nextToken();
1722       if (FormatTok->Tok.is(tok::l_brace)) {
1723         // Block kind should probably be set to BK_BracedInit for any language.
1724         // C# needs this change to ensure that array initialisers and object
1725         // initialisers are indented the same way.
1726         if (Style.isCSharp())
1727           FormatTok->setBlockKind(BK_BracedInit);
1728         nextToken();
1729         parseBracedList();
1730       } else if (Style.Language == FormatStyle::LK_Proto &&
1731                  FormatTok->Tok.is(tok::less)) {
1732         nextToken();
1733         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1734                         /*ClosingBraceKind=*/tok::greater);
1735       }
1736       break;
1737     case tok::l_square:
1738       parseSquare();
1739       break;
1740     case tok::kw_new:
1741       parseNew();
1742       break;
1743     default:
1744       nextToken();
1745       break;
1746     }
1747   } while (!eof());
1748 }
1749 
1750 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1751   assert(FormatTok->is(tok::l_brace));
1752   if (!Style.isCSharp())
1753     return false;
1754   // See if it's a property accessor.
1755   if (FormatTok->Previous->isNot(tok::identifier))
1756     return false;
1757 
1758   // See if we are inside a property accessor.
1759   //
1760   // Record the current tokenPosition so that we can advance and
1761   // reset the current token. `Next` is not set yet so we need
1762   // another way to advance along the token stream.
1763   unsigned int StoredPosition = Tokens->getPosition();
1764   FormatToken *Tok = Tokens->getNextToken();
1765 
1766   // A trivial property accessor is of the form:
1767   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1768   // Track these as they do not require line breaks to be introduced.
1769   bool HasGetOrSet = false;
1770   bool IsTrivialPropertyAccessor = true;
1771   while (!eof()) {
1772     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1773                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1774                      Keywords.kw_set)) {
1775       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1776         HasGetOrSet = true;
1777       Tok = Tokens->getNextToken();
1778       continue;
1779     }
1780     if (Tok->isNot(tok::r_brace))
1781       IsTrivialPropertyAccessor = false;
1782     break;
1783   }
1784 
1785   if (!HasGetOrSet) {
1786     Tokens->setPosition(StoredPosition);
1787     return false;
1788   }
1789 
1790   // Try to parse the property accessor:
1791   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1792   Tokens->setPosition(StoredPosition);
1793   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1794     addUnwrappedLine();
1795   nextToken();
1796   do {
1797     switch (FormatTok->Tok.getKind()) {
1798     case tok::r_brace:
1799       nextToken();
1800       if (FormatTok->is(tok::equal)) {
1801         while (!eof() && FormatTok->isNot(tok::semi))
1802           nextToken();
1803         nextToken();
1804       }
1805       addUnwrappedLine();
1806       return true;
1807     case tok::l_brace:
1808       ++Line->Level;
1809       parseBlock(/*MustBeDeclaration=*/true);
1810       addUnwrappedLine();
1811       --Line->Level;
1812       break;
1813     case tok::equal:
1814       if (FormatTok->is(TT_FatArrow)) {
1815         ++Line->Level;
1816         do {
1817           nextToken();
1818         } while (!eof() && FormatTok->isNot(tok::semi));
1819         nextToken();
1820         addUnwrappedLine();
1821         --Line->Level;
1822         break;
1823       }
1824       nextToken();
1825       break;
1826     default:
1827       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1828           !IsTrivialPropertyAccessor) {
1829         // Non-trivial get/set needs to be on its own line.
1830         addUnwrappedLine();
1831       }
1832       nextToken();
1833     }
1834   } while (!eof());
1835 
1836   // Unreachable for well-formed code (paired '{' and '}').
1837   return true;
1838 }
1839 
1840 bool UnwrappedLineParser::tryToParseLambda() {
1841   if (!Style.isCpp()) {
1842     nextToken();
1843     return false;
1844   }
1845   assert(FormatTok->is(tok::l_square));
1846   FormatToken &LSquare = *FormatTok;
1847   if (!tryToParseLambdaIntroducer())
1848     return false;
1849 
1850   bool SeenArrow = false;
1851   bool InTemplateParameterList = false;
1852 
1853   while (FormatTok->isNot(tok::l_brace)) {
1854     if (FormatTok->isSimpleTypeSpecifier()) {
1855       nextToken();
1856       continue;
1857     }
1858     switch (FormatTok->Tok.getKind()) {
1859     case tok::l_brace:
1860       break;
1861     case tok::l_paren:
1862       parseParens();
1863       break;
1864     case tok::l_square:
1865       parseSquare();
1866       break;
1867     case tok::kw_class:
1868     case tok::kw_template:
1869     case tok::kw_typename:
1870       assert(FormatTok->Previous);
1871       if (FormatTok->Previous->is(tok::less))
1872         InTemplateParameterList = true;
1873       nextToken();
1874       break;
1875     case tok::amp:
1876     case tok::star:
1877     case tok::kw_const:
1878     case tok::comma:
1879     case tok::less:
1880     case tok::greater:
1881     case tok::identifier:
1882     case tok::numeric_constant:
1883     case tok::coloncolon:
1884     case tok::kw_mutable:
1885     case tok::kw_noexcept:
1886       nextToken();
1887       break;
1888     // Specialization of a template with an integer parameter can contain
1889     // arithmetic, logical, comparison and ternary operators.
1890     //
1891     // FIXME: This also accepts sequences of operators that are not in the scope
1892     // of a template argument list.
1893     //
1894     // In a C++ lambda a template type can only occur after an arrow. We use
1895     // this as an heuristic to distinguish between Objective-C expressions
1896     // followed by an `a->b` expression, such as:
1897     // ([obj func:arg] + a->b)
1898     // Otherwise the code below would parse as a lambda.
1899     //
1900     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1901     // explicit template lists: []<bool b = true && false>(U &&u){}
1902     case tok::plus:
1903     case tok::minus:
1904     case tok::exclaim:
1905     case tok::tilde:
1906     case tok::slash:
1907     case tok::percent:
1908     case tok::lessless:
1909     case tok::pipe:
1910     case tok::pipepipe:
1911     case tok::ampamp:
1912     case tok::caret:
1913     case tok::equalequal:
1914     case tok::exclaimequal:
1915     case tok::greaterequal:
1916     case tok::lessequal:
1917     case tok::question:
1918     case tok::colon:
1919     case tok::ellipsis:
1920     case tok::kw_true:
1921     case tok::kw_false:
1922       if (SeenArrow || InTemplateParameterList) {
1923         nextToken();
1924         break;
1925       }
1926       return true;
1927     case tok::arrow:
1928       // This might or might not actually be a lambda arrow (this could be an
1929       // ObjC method invocation followed by a dereferencing arrow). We might
1930       // reset this back to TT_Unknown in TokenAnnotator.
1931       FormatTok->setType(TT_LambdaArrow);
1932       SeenArrow = true;
1933       nextToken();
1934       break;
1935     default:
1936       return true;
1937     }
1938   }
1939   FormatTok->setType(TT_LambdaLBrace);
1940   LSquare.setType(TT_LambdaLSquare);
1941   parseChildBlock();
1942   return true;
1943 }
1944 
1945 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1946   const FormatToken *Previous = FormatTok->Previous;
1947   if (Previous &&
1948       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1949                          tok::kw_delete, tok::l_square) ||
1950        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1951        Previous->isSimpleTypeSpecifier())) {
1952     nextToken();
1953     return false;
1954   }
1955   nextToken();
1956   if (FormatTok->is(tok::l_square))
1957     return false;
1958   parseSquare(/*LambdaIntroducer=*/true);
1959   return true;
1960 }
1961 
1962 void UnwrappedLineParser::tryToParseJSFunction() {
1963   assert(FormatTok->is(Keywords.kw_function) ||
1964          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1965   if (FormatTok->is(Keywords.kw_async))
1966     nextToken();
1967   // Consume "function".
1968   nextToken();
1969 
1970   // Consume * (generator function). Treat it like C++'s overloaded operators.
1971   if (FormatTok->is(tok::star)) {
1972     FormatTok->setType(TT_OverloadedOperator);
1973     nextToken();
1974   }
1975 
1976   // Consume function name.
1977   if (FormatTok->is(tok::identifier))
1978     nextToken();
1979 
1980   if (FormatTok->isNot(tok::l_paren))
1981     return;
1982 
1983   // Parse formal parameter list.
1984   parseParens();
1985 
1986   if (FormatTok->is(tok::colon)) {
1987     // Parse a type definition.
1988     nextToken();
1989 
1990     // Eat the type declaration. For braced inline object types, balance braces,
1991     // otherwise just parse until finding an l_brace for the function body.
1992     if (FormatTok->is(tok::l_brace))
1993       tryToParseBracedList();
1994     else
1995       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1996         nextToken();
1997   }
1998 
1999   if (FormatTok->is(tok::semi))
2000     return;
2001 
2002   parseChildBlock();
2003 }
2004 
2005 bool UnwrappedLineParser::tryToParseBracedList() {
2006   if (FormatTok->is(BK_Unknown))
2007     calculateBraceTypes();
2008   assert(FormatTok->isNot(BK_Unknown));
2009   if (FormatTok->is(BK_Block))
2010     return false;
2011   nextToken();
2012   parseBracedList();
2013   return true;
2014 }
2015 
2016 bool UnwrappedLineParser::tryToParseChildBlock() {
2017   assert(Style.isJavaScript() || Style.isCSharp());
2018   assert(FormatTok->is(TT_FatArrow));
2019   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2020   // They always start an expression or a child block if followed by a curly
2021   // brace.
2022   nextToken();
2023   if (FormatTok->isNot(tok::l_brace))
2024     return false;
2025   parseChildBlock();
2026   return true;
2027 }
2028 
2029 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2030                                           bool IsEnum,
2031                                           tok::TokenKind ClosingBraceKind) {
2032   bool HasError = false;
2033 
2034   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2035   // replace this by using parseAssignmentExpression() inside.
2036   do {
2037     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2038         tryToParseChildBlock())
2039       continue;
2040     if (Style.isJavaScript()) {
2041       if (FormatTok->is(Keywords.kw_function) ||
2042           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2043         tryToParseJSFunction();
2044         continue;
2045       }
2046       if (FormatTok->is(tok::l_brace)) {
2047         // Could be a method inside of a braced list `{a() { return 1; }}`.
2048         if (tryToParseBracedList())
2049           continue;
2050         parseChildBlock();
2051       }
2052     }
2053     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2054       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2055         addUnwrappedLine();
2056       nextToken();
2057       return !HasError;
2058     }
2059     switch (FormatTok->Tok.getKind()) {
2060     case tok::l_square:
2061       if (Style.isCSharp())
2062         parseSquare();
2063       else
2064         tryToParseLambda();
2065       break;
2066     case tok::l_paren:
2067       parseParens();
2068       // JavaScript can just have free standing methods and getters/setters in
2069       // object literals. Detect them by a "{" following ")".
2070       if (Style.isJavaScript()) {
2071         if (FormatTok->is(tok::l_brace))
2072           parseChildBlock();
2073         break;
2074       }
2075       break;
2076     case tok::l_brace:
2077       // Assume there are no blocks inside a braced init list apart
2078       // from the ones we explicitly parse out (like lambdas).
2079       FormatTok->setBlockKind(BK_BracedInit);
2080       nextToken();
2081       parseBracedList();
2082       break;
2083     case tok::less:
2084       if (Style.Language == FormatStyle::LK_Proto) {
2085         nextToken();
2086         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2087                         /*ClosingBraceKind=*/tok::greater);
2088       } else {
2089         nextToken();
2090       }
2091       break;
2092     case tok::semi:
2093       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2094       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2095       // used for error recovery if we have otherwise determined that this is
2096       // a braced list.
2097       if (Style.isJavaScript()) {
2098         nextToken();
2099         break;
2100       }
2101       HasError = true;
2102       if (!ContinueOnSemicolons)
2103         return !HasError;
2104       nextToken();
2105       break;
2106     case tok::comma:
2107       nextToken();
2108       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2109         addUnwrappedLine();
2110       break;
2111     default:
2112       nextToken();
2113       break;
2114     }
2115   } while (!eof());
2116   return false;
2117 }
2118 
2119 void UnwrappedLineParser::parseParens() {
2120   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2121   nextToken();
2122   do {
2123     switch (FormatTok->Tok.getKind()) {
2124     case tok::l_paren:
2125       parseParens();
2126       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2127         parseChildBlock();
2128       break;
2129     case tok::r_paren:
2130       nextToken();
2131       return;
2132     case tok::r_brace:
2133       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2134       return;
2135     case tok::l_square:
2136       tryToParseLambda();
2137       break;
2138     case tok::l_brace:
2139       if (!tryToParseBracedList())
2140         parseChildBlock();
2141       break;
2142     case tok::at:
2143       nextToken();
2144       if (FormatTok->Tok.is(tok::l_brace)) {
2145         nextToken();
2146         parseBracedList();
2147       }
2148       break;
2149     case tok::equal:
2150       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2151         tryToParseChildBlock();
2152       else
2153         nextToken();
2154       break;
2155     case tok::kw_class:
2156       if (Style.isJavaScript())
2157         parseRecord(/*ParseAsExpr=*/true);
2158       else
2159         nextToken();
2160       break;
2161     case tok::identifier:
2162       if (Style.isJavaScript() &&
2163           (FormatTok->is(Keywords.kw_function) ||
2164            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2165         tryToParseJSFunction();
2166       else
2167         nextToken();
2168       break;
2169     default:
2170       nextToken();
2171       break;
2172     }
2173   } while (!eof());
2174 }
2175 
2176 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2177   if (!LambdaIntroducer) {
2178     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2179     if (tryToParseLambda())
2180       return;
2181   }
2182   do {
2183     switch (FormatTok->Tok.getKind()) {
2184     case tok::l_paren:
2185       parseParens();
2186       break;
2187     case tok::r_square:
2188       nextToken();
2189       return;
2190     case tok::r_brace:
2191       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2192       return;
2193     case tok::l_square:
2194       parseSquare();
2195       break;
2196     case tok::l_brace: {
2197       if (!tryToParseBracedList())
2198         parseChildBlock();
2199       break;
2200     }
2201     case tok::at:
2202       nextToken();
2203       if (FormatTok->Tok.is(tok::l_brace)) {
2204         nextToken();
2205         parseBracedList();
2206       }
2207       break;
2208     default:
2209       nextToken();
2210       break;
2211     }
2212   } while (!eof());
2213 }
2214 
2215 void UnwrappedLineParser::keepAncestorBraces() {
2216   if (!Style.RemoveBracesLLVM)
2217     return;
2218 
2219   const int MaxNestingLevels = 2;
2220   const int Size = NestedTooDeep.size();
2221   if (Size >= MaxNestingLevels)
2222     NestedTooDeep[Size - MaxNestingLevels] = true;
2223   NestedTooDeep.push_back(false);
2224 }
2225 
2226 static void markOptionalBraces(FormatToken *LeftBrace) {
2227   if (!LeftBrace)
2228     return;
2229 
2230   assert(LeftBrace->is(tok::l_brace));
2231 
2232   FormatToken *RightBrace = LeftBrace->MatchingParen;
2233   if (!RightBrace) {
2234     assert(!LeftBrace->Optional);
2235     return;
2236   }
2237 
2238   assert(RightBrace->is(tok::r_brace));
2239   assert(RightBrace->MatchingParen == LeftBrace);
2240   assert(LeftBrace->Optional == RightBrace->Optional);
2241 
2242   LeftBrace->Optional = true;
2243   RightBrace->Optional = true;
2244 }
2245 
2246 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2247                                                   bool KeepBraces) {
2248   auto HandleAttributes = [this]() {
2249     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2250     if (FormatTok->is(TT_AttributeMacro))
2251       nextToken();
2252     // Handle [[likely]] / [[unlikely]] attributes.
2253     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2254       parseSquare();
2255   };
2256 
2257   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2258   nextToken();
2259   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2260     nextToken();
2261   if (FormatTok->Tok.is(tok::l_paren))
2262     parseParens();
2263   HandleAttributes();
2264 
2265   bool NeedsUnwrappedLine = false;
2266   keepAncestorBraces();
2267 
2268   FormatToken *IfLeftBrace = nullptr;
2269   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2270 
2271   if (FormatTok->Tok.is(tok::l_brace)) {
2272     IfLeftBrace = FormatTok;
2273     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2274     IfBlockKind = parseBlock();
2275     if (Style.BraceWrapping.BeforeElse)
2276       addUnwrappedLine();
2277     else
2278       NeedsUnwrappedLine = true;
2279   } else {
2280     addUnwrappedLine();
2281     ++Line->Level;
2282     parseStructuralElement();
2283     --Line->Level;
2284   }
2285 
2286   bool KeepIfBraces = false;
2287   if (Style.RemoveBracesLLVM) {
2288     assert(!NestedTooDeep.empty());
2289     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2290                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2291                    IfBlockKind == IfStmtKind::IfElseIf;
2292   }
2293 
2294   FormatToken *ElseLeftBrace = nullptr;
2295   IfStmtKind Kind = IfStmtKind::IfOnly;
2296 
2297   if (FormatTok->Tok.is(tok::kw_else)) {
2298     if (Style.RemoveBracesLLVM) {
2299       NestedTooDeep.back() = false;
2300       Kind = IfStmtKind::IfElse;
2301     }
2302     nextToken();
2303     HandleAttributes();
2304     if (FormatTok->Tok.is(tok::l_brace)) {
2305       ElseLeftBrace = FormatTok;
2306       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2307       if (parseBlock() == IfStmtKind::IfOnly)
2308         Kind = IfStmtKind::IfElseIf;
2309       addUnwrappedLine();
2310     } else if (FormatTok->Tok.is(tok::kw_if)) {
2311       FormatToken *Previous = Tokens->getPreviousToken();
2312       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2313       if (IsPrecededByComment) {
2314         addUnwrappedLine();
2315         ++Line->Level;
2316       }
2317       bool TooDeep = true;
2318       if (Style.RemoveBracesLLVM) {
2319         Kind = IfStmtKind::IfElseIf;
2320         TooDeep = NestedTooDeep.pop_back_val();
2321       }
2322       ElseLeftBrace =
2323           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2324       if (Style.RemoveBracesLLVM)
2325         NestedTooDeep.push_back(TooDeep);
2326       if (IsPrecededByComment)
2327         --Line->Level;
2328     } else {
2329       addUnwrappedLine();
2330       ++Line->Level;
2331       parseStructuralElement();
2332       if (FormatTok->is(tok::eof))
2333         addUnwrappedLine();
2334       --Line->Level;
2335     }
2336   } else {
2337     if (Style.RemoveBracesLLVM)
2338       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2339     if (NeedsUnwrappedLine)
2340       addUnwrappedLine();
2341   }
2342 
2343   if (!Style.RemoveBracesLLVM)
2344     return nullptr;
2345 
2346   assert(!NestedTooDeep.empty());
2347   const bool KeepElseBraces =
2348       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2349 
2350   NestedTooDeep.pop_back();
2351 
2352   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2353     markOptionalBraces(IfLeftBrace);
2354     markOptionalBraces(ElseLeftBrace);
2355   } else if (IfLeftBrace) {
2356     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2357     if (IfRightBrace) {
2358       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2359       assert(!IfLeftBrace->Optional);
2360       assert(!IfRightBrace->Optional);
2361       IfLeftBrace->MatchingParen = nullptr;
2362       IfRightBrace->MatchingParen = nullptr;
2363     }
2364   }
2365 
2366   if (IfKind)
2367     *IfKind = Kind;
2368 
2369   return IfLeftBrace;
2370 }
2371 
2372 void UnwrappedLineParser::parseTryCatch() {
2373   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2374   nextToken();
2375   bool NeedsUnwrappedLine = false;
2376   if (FormatTok->is(tok::colon)) {
2377     // We are in a function try block, what comes is an initializer list.
2378     nextToken();
2379 
2380     // In case identifiers were removed by clang-tidy, what might follow is
2381     // multiple commas in sequence - before the first identifier.
2382     while (FormatTok->is(tok::comma))
2383       nextToken();
2384 
2385     while (FormatTok->is(tok::identifier)) {
2386       nextToken();
2387       if (FormatTok->is(tok::l_paren))
2388         parseParens();
2389       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2390           FormatTok->is(tok::l_brace)) {
2391         do {
2392           nextToken();
2393         } while (!FormatTok->is(tok::r_brace));
2394         nextToken();
2395       }
2396 
2397       // In case identifiers were removed by clang-tidy, what might follow is
2398       // multiple commas in sequence - after the first identifier.
2399       while (FormatTok->is(tok::comma))
2400         nextToken();
2401     }
2402   }
2403   // Parse try with resource.
2404   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2405     parseParens();
2406 
2407   keepAncestorBraces();
2408 
2409   if (FormatTok->is(tok::l_brace)) {
2410     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2411     parseBlock();
2412     if (Style.BraceWrapping.BeforeCatch)
2413       addUnwrappedLine();
2414     else
2415       NeedsUnwrappedLine = true;
2416   } else if (!FormatTok->is(tok::kw_catch)) {
2417     // The C++ standard requires a compound-statement after a try.
2418     // If there's none, we try to assume there's a structuralElement
2419     // and try to continue.
2420     addUnwrappedLine();
2421     ++Line->Level;
2422     parseStructuralElement();
2423     --Line->Level;
2424   }
2425   while (true) {
2426     if (FormatTok->is(tok::at))
2427       nextToken();
2428     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2429                              tok::kw___finally) ||
2430           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2431            FormatTok->is(Keywords.kw_finally)) ||
2432           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2433            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2434       break;
2435     nextToken();
2436     while (FormatTok->isNot(tok::l_brace)) {
2437       if (FormatTok->is(tok::l_paren)) {
2438         parseParens();
2439         continue;
2440       }
2441       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2442         if (Style.RemoveBracesLLVM)
2443           NestedTooDeep.pop_back();
2444         return;
2445       }
2446       nextToken();
2447     }
2448     NeedsUnwrappedLine = false;
2449     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2450     parseBlock();
2451     if (Style.BraceWrapping.BeforeCatch)
2452       addUnwrappedLine();
2453     else
2454       NeedsUnwrappedLine = true;
2455   }
2456 
2457   if (Style.RemoveBracesLLVM)
2458     NestedTooDeep.pop_back();
2459 
2460   if (NeedsUnwrappedLine)
2461     addUnwrappedLine();
2462 }
2463 
2464 void UnwrappedLineParser::parseNamespace() {
2465   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2466          "'namespace' expected");
2467 
2468   const FormatToken &InitialToken = *FormatTok;
2469   nextToken();
2470   if (InitialToken.is(TT_NamespaceMacro)) {
2471     parseParens();
2472   } else {
2473     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2474                               tok::l_square, tok::period) ||
2475            (Style.isCSharp() && FormatTok->is(tok::kw_union)))
2476       if (FormatTok->is(tok::l_square))
2477         parseSquare();
2478       else
2479         nextToken();
2480   }
2481   if (FormatTok->Tok.is(tok::l_brace)) {
2482     if (ShouldBreakBeforeBrace(Style, InitialToken))
2483       addUnwrappedLine();
2484 
2485     unsigned AddLevels =
2486         Style.NamespaceIndentation == FormatStyle::NI_All ||
2487                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2488                  DeclarationScopeStack.size() > 1)
2489             ? 1u
2490             : 0u;
2491     bool ManageWhitesmithsBraces =
2492         AddLevels == 0u &&
2493         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2494 
2495     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2496     // the whole block.
2497     if (ManageWhitesmithsBraces)
2498       ++Line->Level;
2499 
2500     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2501                /*MunchSemi=*/true,
2502                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2503 
2504     // Munch the semicolon after a namespace. This is more common than one would
2505     // think. Putting the semicolon into its own line is very ugly.
2506     if (FormatTok->Tok.is(tok::semi))
2507       nextToken();
2508 
2509     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2510 
2511     if (ManageWhitesmithsBraces)
2512       --Line->Level;
2513   }
2514   // FIXME: Add error handling.
2515 }
2516 
2517 void UnwrappedLineParser::parseNew() {
2518   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2519   nextToken();
2520 
2521   if (Style.isCSharp()) {
2522     do {
2523       if (FormatTok->is(tok::l_brace))
2524         parseBracedList();
2525 
2526       if (FormatTok->isOneOf(tok::semi, tok::comma))
2527         return;
2528 
2529       nextToken();
2530     } while (!eof());
2531   }
2532 
2533   if (Style.Language != FormatStyle::LK_Java)
2534     return;
2535 
2536   // In Java, we can parse everything up to the parens, which aren't optional.
2537   do {
2538     // There should not be a ;, { or } before the new's open paren.
2539     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2540       return;
2541 
2542     // Consume the parens.
2543     if (FormatTok->is(tok::l_paren)) {
2544       parseParens();
2545 
2546       // If there is a class body of an anonymous class, consume that as child.
2547       if (FormatTok->is(tok::l_brace))
2548         parseChildBlock();
2549       return;
2550     }
2551     nextToken();
2552   } while (!eof());
2553 }
2554 
2555 void UnwrappedLineParser::parseForOrWhileLoop() {
2556   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2557          "'for', 'while' or foreach macro expected");
2558   nextToken();
2559   // JS' for await ( ...
2560   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2561     nextToken();
2562   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2563     nextToken();
2564   if (FormatTok->Tok.is(tok::l_paren))
2565     parseParens();
2566 
2567   keepAncestorBraces();
2568 
2569   if (FormatTok->Tok.is(tok::l_brace)) {
2570     FormatToken *LeftBrace = FormatTok;
2571     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2572     parseBlock();
2573     if (Style.RemoveBracesLLVM) {
2574       assert(!NestedTooDeep.empty());
2575       if (!NestedTooDeep.back())
2576         markOptionalBraces(LeftBrace);
2577     }
2578     addUnwrappedLine();
2579   } else {
2580     addUnwrappedLine();
2581     ++Line->Level;
2582     parseStructuralElement();
2583     --Line->Level;
2584   }
2585 
2586   if (Style.RemoveBracesLLVM)
2587     NestedTooDeep.pop_back();
2588 }
2589 
2590 void UnwrappedLineParser::parseDoWhile() {
2591   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2592   nextToken();
2593 
2594   keepAncestorBraces();
2595 
2596   if (FormatTok->Tok.is(tok::l_brace)) {
2597     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2598     parseBlock();
2599     if (Style.BraceWrapping.BeforeWhile)
2600       addUnwrappedLine();
2601   } else {
2602     addUnwrappedLine();
2603     ++Line->Level;
2604     parseStructuralElement();
2605     --Line->Level;
2606   }
2607 
2608   if (Style.RemoveBracesLLVM)
2609     NestedTooDeep.pop_back();
2610 
2611   // FIXME: Add error handling.
2612   if (!FormatTok->Tok.is(tok::kw_while)) {
2613     addUnwrappedLine();
2614     return;
2615   }
2616 
2617   // If in Whitesmiths mode, the line with the while() needs to be indented
2618   // to the same level as the block.
2619   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2620     ++Line->Level;
2621 
2622   nextToken();
2623   parseStructuralElement();
2624 }
2625 
2626 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2627   nextToken();
2628   unsigned OldLineLevel = Line->Level;
2629   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2630     --Line->Level;
2631   if (LeftAlignLabel)
2632     Line->Level = 0;
2633 
2634   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2635       FormatTok->Tok.is(tok::l_brace)) {
2636 
2637     CompoundStatementIndenter Indenter(this, Line->Level,
2638                                        Style.BraceWrapping.AfterCaseLabel,
2639                                        Style.BraceWrapping.IndentBraces);
2640     parseBlock();
2641     if (FormatTok->Tok.is(tok::kw_break)) {
2642       if (Style.BraceWrapping.AfterControlStatement ==
2643           FormatStyle::BWACS_Always) {
2644         addUnwrappedLine();
2645         if (!Style.IndentCaseBlocks &&
2646             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2647           ++Line->Level;
2648       }
2649       parseStructuralElement();
2650     }
2651     addUnwrappedLine();
2652   } else {
2653     if (FormatTok->is(tok::semi))
2654       nextToken();
2655     addUnwrappedLine();
2656   }
2657   Line->Level = OldLineLevel;
2658   if (FormatTok->isNot(tok::l_brace)) {
2659     parseStructuralElement();
2660     addUnwrappedLine();
2661   }
2662 }
2663 
2664 void UnwrappedLineParser::parseCaseLabel() {
2665   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2666 
2667   // FIXME: fix handling of complex expressions here.
2668   do {
2669     nextToken();
2670   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2671   parseLabel();
2672 }
2673 
2674 void UnwrappedLineParser::parseSwitch() {
2675   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2676   nextToken();
2677   if (FormatTok->Tok.is(tok::l_paren))
2678     parseParens();
2679 
2680   keepAncestorBraces();
2681 
2682   if (FormatTok->Tok.is(tok::l_brace)) {
2683     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2684     parseBlock();
2685     addUnwrappedLine();
2686   } else {
2687     addUnwrappedLine();
2688     ++Line->Level;
2689     parseStructuralElement();
2690     --Line->Level;
2691   }
2692 
2693   if (Style.RemoveBracesLLVM)
2694     NestedTooDeep.pop_back();
2695 }
2696 
2697 void UnwrappedLineParser::parseAccessSpecifier() {
2698   FormatToken *AccessSpecifierCandidate = FormatTok;
2699   nextToken();
2700   // Understand Qt's slots.
2701   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2702     nextToken();
2703   // Otherwise, we don't know what it is, and we'd better keep the next token.
2704   if (FormatTok->Tok.is(tok::colon)) {
2705     nextToken();
2706     addUnwrappedLine();
2707   } else if (!FormatTok->Tok.is(tok::coloncolon) &&
2708              !std::binary_search(COperatorsFollowingVar.begin(),
2709                                  COperatorsFollowingVar.end(),
2710                                  FormatTok->Tok.getKind())) {
2711     // Not a variable name nor namespace name.
2712     addUnwrappedLine();
2713   } else if (AccessSpecifierCandidate) {
2714     // Consider the access specifier to be a C identifier.
2715     AccessSpecifierCandidate->Tok.setKind(tok::identifier);
2716   }
2717 }
2718 
2719 void UnwrappedLineParser::parseConcept() {
2720   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2721   nextToken();
2722   if (!FormatTok->Tok.is(tok::identifier))
2723     return;
2724   nextToken();
2725   if (!FormatTok->Tok.is(tok::equal))
2726     return;
2727   nextToken();
2728   if (FormatTok->Tok.is(tok::kw_requires)) {
2729     nextToken();
2730     parseRequiresExpression(Line->Level);
2731   } else {
2732     parseConstraintExpression(Line->Level);
2733   }
2734 }
2735 
2736 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2737   // requires (R range)
2738   if (FormatTok->Tok.is(tok::l_paren)) {
2739     parseParens();
2740     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2741       addUnwrappedLine();
2742       --Line->Level;
2743     }
2744   }
2745 
2746   if (FormatTok->Tok.is(tok::l_brace)) {
2747     if (Style.BraceWrapping.AfterFunction)
2748       addUnwrappedLine();
2749     FormatTok->setType(TT_FunctionLBrace);
2750     parseBlock();
2751     addUnwrappedLine();
2752   } else {
2753     parseConstraintExpression(OriginalLevel);
2754   }
2755 }
2756 
2757 void UnwrappedLineParser::parseConstraintExpression(
2758     unsigned int OriginalLevel) {
2759   // requires Id<T> && Id<T> || Id<T>
2760   while (
2761       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2762     nextToken();
2763     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2764                               tok::greater, tok::comma, tok::ellipsis)) {
2765       if (FormatTok->Tok.is(tok::less)) {
2766         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2767                         /*ClosingBraceKind=*/tok::greater);
2768         continue;
2769       }
2770       nextToken();
2771     }
2772     if (FormatTok->Tok.is(tok::kw_requires))
2773       parseRequiresExpression(OriginalLevel);
2774     if (FormatTok->Tok.is(tok::less)) {
2775       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2776                       /*ClosingBraceKind=*/tok::greater);
2777     }
2778 
2779     if (FormatTok->Tok.is(tok::l_paren))
2780       parseParens();
2781     if (FormatTok->Tok.is(tok::l_brace)) {
2782       if (Style.BraceWrapping.AfterFunction)
2783         addUnwrappedLine();
2784       FormatTok->setType(TT_FunctionLBrace);
2785       parseBlock();
2786     }
2787     if (FormatTok->Tok.is(tok::semi)) {
2788       // Eat any trailing semi.
2789       nextToken();
2790       addUnwrappedLine();
2791     }
2792     if (FormatTok->Tok.is(tok::colon))
2793       return;
2794     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2795       if (FormatTok->Previous &&
2796           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2797                                         tok::coloncolon))
2798         addUnwrappedLine();
2799       if (Style.IndentRequires && OriginalLevel != Line->Level)
2800         --Line->Level;
2801       break;
2802     } else {
2803       FormatTok->setType(TT_ConstraintJunctions);
2804     }
2805 
2806     nextToken();
2807   }
2808 }
2809 
2810 void UnwrappedLineParser::parseRequires() {
2811   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2812 
2813   unsigned OriginalLevel = Line->Level;
2814   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2815     addUnwrappedLine();
2816     if (Style.IndentRequires)
2817       ++Line->Level;
2818   }
2819   nextToken();
2820 
2821   parseRequiresExpression(OriginalLevel);
2822 }
2823 
2824 bool UnwrappedLineParser::parseEnum() {
2825   const FormatToken &InitialToken = *FormatTok;
2826 
2827   // Won't be 'enum' for NS_ENUMs.
2828   if (FormatTok->Tok.is(tok::kw_enum))
2829     nextToken();
2830 
2831   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2832   // declarations. An "enum" keyword followed by a colon would be a syntax
2833   // error and thus assume it is just an identifier.
2834   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
2835     return false;
2836 
2837   // In protobuf, "enum" can be used as a field name.
2838   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2839     return false;
2840 
2841   // Eat up enum class ...
2842   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2843     nextToken();
2844 
2845   while (FormatTok->Tok.getIdentifierInfo() ||
2846          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2847                             tok::greater, tok::comma, tok::question)) {
2848     nextToken();
2849     // We can have macros or attributes in between 'enum' and the enum name.
2850     if (FormatTok->is(tok::l_paren))
2851       parseParens();
2852     if (FormatTok->is(tok::identifier)) {
2853       nextToken();
2854       // If there are two identifiers in a row, this is likely an elaborate
2855       // return type. In Java, this can be "implements", etc.
2856       if (Style.isCpp() && FormatTok->is(tok::identifier))
2857         return false;
2858     }
2859   }
2860 
2861   // Just a declaration or something is wrong.
2862   if (FormatTok->isNot(tok::l_brace))
2863     return true;
2864   FormatTok->setType(TT_RecordLBrace);
2865   FormatTok->setBlockKind(BK_Block);
2866 
2867   if (Style.Language == FormatStyle::LK_Java) {
2868     // Java enums are different.
2869     parseJavaEnumBody();
2870     return true;
2871   }
2872   if (Style.Language == FormatStyle::LK_Proto) {
2873     parseBlock(/*MustBeDeclaration=*/true);
2874     return true;
2875   }
2876 
2877   if (!Style.AllowShortEnumsOnASingleLine &&
2878       ShouldBreakBeforeBrace(Style, InitialToken))
2879     addUnwrappedLine();
2880   // Parse enum body.
2881   nextToken();
2882   if (!Style.AllowShortEnumsOnASingleLine) {
2883     addUnwrappedLine();
2884     Line->Level += 1;
2885   }
2886   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2887                                    /*IsEnum=*/true);
2888   if (!Style.AllowShortEnumsOnASingleLine)
2889     Line->Level -= 1;
2890   if (HasError) {
2891     if (FormatTok->is(tok::semi))
2892       nextToken();
2893     addUnwrappedLine();
2894   }
2895   return true;
2896 
2897   // There is no addUnwrappedLine() here so that we fall through to parsing a
2898   // structural element afterwards. Thus, in "enum A {} n, m;",
2899   // "} n, m;" will end up in one unwrapped line.
2900 }
2901 
2902 bool UnwrappedLineParser::parseStructLike() {
2903   // parseRecord falls through and does not yet add an unwrapped line as a
2904   // record declaration or definition can start a structural element.
2905   parseRecord();
2906   // This does not apply to Java, JavaScript and C#.
2907   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
2908       Style.isCSharp()) {
2909     if (FormatTok->is(tok::semi))
2910       nextToken();
2911     addUnwrappedLine();
2912     return true;
2913   }
2914   return false;
2915 }
2916 
2917 namespace {
2918 // A class used to set and restore the Token position when peeking
2919 // ahead in the token source.
2920 class ScopedTokenPosition {
2921   unsigned StoredPosition;
2922   FormatTokenSource *Tokens;
2923 
2924 public:
2925   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2926     assert(Tokens && "Tokens expected to not be null");
2927     StoredPosition = Tokens->getPosition();
2928   }
2929 
2930   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2931 };
2932 } // namespace
2933 
2934 // Look to see if we have [[ by looking ahead, if
2935 // its not then rewind to the original position.
2936 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2937   ScopedTokenPosition AutoPosition(Tokens);
2938   FormatToken *Tok = Tokens->getNextToken();
2939   // We already read the first [ check for the second.
2940   if (!Tok->is(tok::l_square))
2941     return false;
2942   // Double check that the attribute is just something
2943   // fairly simple.
2944   while (Tok->isNot(tok::eof)) {
2945     if (Tok->is(tok::r_square))
2946       break;
2947     Tok = Tokens->getNextToken();
2948   }
2949   if (Tok->is(tok::eof))
2950     return false;
2951   Tok = Tokens->getNextToken();
2952   if (!Tok->is(tok::r_square))
2953     return false;
2954   Tok = Tokens->getNextToken();
2955   if (Tok->is(tok::semi))
2956     return false;
2957   return true;
2958 }
2959 
2960 void UnwrappedLineParser::parseJavaEnumBody() {
2961   // Determine whether the enum is simple, i.e. does not have a semicolon or
2962   // constants with class bodies. Simple enums can be formatted like braced
2963   // lists, contracted to a single line, etc.
2964   unsigned StoredPosition = Tokens->getPosition();
2965   bool IsSimple = true;
2966   FormatToken *Tok = Tokens->getNextToken();
2967   while (!Tok->is(tok::eof)) {
2968     if (Tok->is(tok::r_brace))
2969       break;
2970     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2971       IsSimple = false;
2972       break;
2973     }
2974     // FIXME: This will also mark enums with braces in the arguments to enum
2975     // constants as "not simple". This is probably fine in practice, though.
2976     Tok = Tokens->getNextToken();
2977   }
2978   FormatTok = Tokens->setPosition(StoredPosition);
2979 
2980   if (IsSimple) {
2981     nextToken();
2982     parseBracedList();
2983     addUnwrappedLine();
2984     return;
2985   }
2986 
2987   // Parse the body of a more complex enum.
2988   // First add a line for everything up to the "{".
2989   nextToken();
2990   addUnwrappedLine();
2991   ++Line->Level;
2992 
2993   // Parse the enum constants.
2994   while (FormatTok) {
2995     if (FormatTok->is(tok::l_brace)) {
2996       // Parse the constant's class body.
2997       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2998                  /*MunchSemi=*/false);
2999     } else if (FormatTok->is(tok::l_paren)) {
3000       parseParens();
3001     } else if (FormatTok->is(tok::comma)) {
3002       nextToken();
3003       addUnwrappedLine();
3004     } else if (FormatTok->is(tok::semi)) {
3005       nextToken();
3006       addUnwrappedLine();
3007       break;
3008     } else if (FormatTok->is(tok::r_brace)) {
3009       addUnwrappedLine();
3010       break;
3011     } else {
3012       nextToken();
3013     }
3014   }
3015 
3016   // Parse the class body after the enum's ";" if any.
3017   parseLevel(/*HasOpeningBrace=*/true);
3018   nextToken();
3019   --Line->Level;
3020   addUnwrappedLine();
3021 }
3022 
3023 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3024   const FormatToken &InitialToken = *FormatTok;
3025   nextToken();
3026 
3027   // The actual identifier can be a nested name specifier, and in macros
3028   // it is often token-pasted.
3029   // An [[attribute]] can be before the identifier.
3030   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3031                             tok::kw___attribute, tok::kw___declspec,
3032                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3033          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3034           FormatTok->isOneOf(tok::period, tok::comma))) {
3035     if (Style.isJavaScript() &&
3036         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3037       // JavaScript/TypeScript supports inline object types in
3038       // extends/implements positions:
3039       //     class Foo implements {bar: number} { }
3040       nextToken();
3041       if (FormatTok->is(tok::l_brace)) {
3042         tryToParseBracedList();
3043         continue;
3044       }
3045     }
3046     bool IsNonMacroIdentifier =
3047         FormatTok->is(tok::identifier) &&
3048         FormatTok->TokenText != FormatTok->TokenText.upper();
3049     nextToken();
3050     // We can have macros or attributes in between 'class' and the class name.
3051     if (!IsNonMacroIdentifier) {
3052       if (FormatTok->Tok.is(tok::l_paren)) {
3053         parseParens();
3054       } else if (FormatTok->is(TT_AttributeSquare)) {
3055         parseSquare();
3056         // Consume the closing TT_AttributeSquare.
3057         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3058           nextToken();
3059       }
3060     }
3061   }
3062 
3063   // Note that parsing away template declarations here leads to incorrectly
3064   // accepting function declarations as record declarations.
3065   // In general, we cannot solve this problem. Consider:
3066   // class A<int> B() {}
3067   // which can be a function definition or a class definition when B() is a
3068   // macro. If we find enough real-world cases where this is a problem, we
3069   // can parse for the 'template' keyword in the beginning of the statement,
3070   // and thus rule out the record production in case there is no template
3071   // (this would still leave us with an ambiguity between template function
3072   // and class declarations).
3073   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3074     while (!eof()) {
3075       if (FormatTok->is(tok::l_brace)) {
3076         calculateBraceTypes(/*ExpectClassBody=*/true);
3077         if (!tryToParseBracedList())
3078           break;
3079       }
3080       if (FormatTok->is(tok::l_square)) {
3081         FormatToken *Previous = FormatTok->Previous;
3082         if (!Previous || Previous->isNot(tok::r_paren)) {
3083           // Don't try parsing a lambda if we had a closing parenthesis before,
3084           // it was probably a pointer to an array: int (*)[].
3085           if (!tryToParseLambda())
3086             break;
3087         }
3088       }
3089       if (FormatTok->Tok.is(tok::semi))
3090         return;
3091       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3092         addUnwrappedLine();
3093         nextToken();
3094         parseCSharpGenericTypeConstraint();
3095         break;
3096       }
3097       nextToken();
3098     }
3099   }
3100   if (FormatTok->Tok.is(tok::l_brace)) {
3101     FormatTok->setType(TT_RecordLBrace);
3102     if (ParseAsExpr) {
3103       parseChildBlock();
3104     } else {
3105       if (ShouldBreakBeforeBrace(Style, InitialToken))
3106         addUnwrappedLine();
3107 
3108       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3109       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3110     }
3111   }
3112   // There is no addUnwrappedLine() here so that we fall through to parsing a
3113   // structural element afterwards. Thus, in "class A {} n, m;",
3114   // "} n, m;" will end up in one unwrapped line.
3115 }
3116 
3117 void UnwrappedLineParser::parseObjCMethod() {
3118   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
3119          "'(' or identifier expected.");
3120   do {
3121     if (FormatTok->Tok.is(tok::semi)) {
3122       nextToken();
3123       addUnwrappedLine();
3124       return;
3125     } else if (FormatTok->Tok.is(tok::l_brace)) {
3126       if (Style.BraceWrapping.AfterFunction)
3127         addUnwrappedLine();
3128       parseBlock();
3129       addUnwrappedLine();
3130       return;
3131     } else {
3132       nextToken();
3133     }
3134   } while (!eof());
3135 }
3136 
3137 void UnwrappedLineParser::parseObjCProtocolList() {
3138   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
3139   do {
3140     nextToken();
3141     // Early exit in case someone forgot a close angle.
3142     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3143         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3144       return;
3145   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
3146   nextToken(); // Skip '>'.
3147 }
3148 
3149 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3150   do {
3151     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
3152       nextToken();
3153       addUnwrappedLine();
3154       break;
3155     }
3156     if (FormatTok->is(tok::l_brace)) {
3157       parseBlock();
3158       // In ObjC interfaces, nothing should be following the "}".
3159       addUnwrappedLine();
3160     } else if (FormatTok->is(tok::r_brace)) {
3161       // Ignore stray "}". parseStructuralElement doesn't consume them.
3162       nextToken();
3163       addUnwrappedLine();
3164     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3165       nextToken();
3166       parseObjCMethod();
3167     } else {
3168       parseStructuralElement();
3169     }
3170   } while (!eof());
3171 }
3172 
3173 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3174   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3175          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3176   nextToken();
3177   nextToken(); // interface name
3178 
3179   // @interface can be followed by a lightweight generic
3180   // specialization list, then either a base class or a category.
3181   if (FormatTok->Tok.is(tok::less))
3182     parseObjCLightweightGenerics();
3183   if (FormatTok->Tok.is(tok::colon)) {
3184     nextToken();
3185     nextToken(); // base class name
3186     // The base class can also have lightweight generics applied to it.
3187     if (FormatTok->Tok.is(tok::less))
3188       parseObjCLightweightGenerics();
3189   } else if (FormatTok->Tok.is(tok::l_paren))
3190     // Skip category, if present.
3191     parseParens();
3192 
3193   if (FormatTok->Tok.is(tok::less))
3194     parseObjCProtocolList();
3195 
3196   if (FormatTok->Tok.is(tok::l_brace)) {
3197     if (Style.BraceWrapping.AfterObjCDeclaration)
3198       addUnwrappedLine();
3199     parseBlock(/*MustBeDeclaration=*/true);
3200   }
3201 
3202   // With instance variables, this puts '}' on its own line.  Without instance
3203   // variables, this ends the @interface line.
3204   addUnwrappedLine();
3205 
3206   parseObjCUntilAtEnd();
3207 }
3208 
3209 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3210   assert(FormatTok->Tok.is(tok::less));
3211   // Unlike protocol lists, generic parameterizations support
3212   // nested angles:
3213   //
3214   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3215   //     NSObject <NSCopying, NSSecureCoding>
3216   //
3217   // so we need to count how many open angles we have left.
3218   unsigned NumOpenAngles = 1;
3219   do {
3220     nextToken();
3221     // Early exit in case someone forgot a close angle.
3222     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3223         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3224       break;
3225     if (FormatTok->Tok.is(tok::less))
3226       ++NumOpenAngles;
3227     else if (FormatTok->Tok.is(tok::greater)) {
3228       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3229       --NumOpenAngles;
3230     }
3231   } while (!eof() && NumOpenAngles != 0);
3232   nextToken(); // Skip '>'.
3233 }
3234 
3235 // Returns true for the declaration/definition form of @protocol,
3236 // false for the expression form.
3237 bool UnwrappedLineParser::parseObjCProtocol() {
3238   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3239   nextToken();
3240 
3241   if (FormatTok->is(tok::l_paren))
3242     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3243     return false;
3244 
3245   // The definition/declaration form,
3246   // @protocol Foo
3247   // - (int)someMethod;
3248   // @end
3249 
3250   nextToken(); // protocol name
3251 
3252   if (FormatTok->Tok.is(tok::less))
3253     parseObjCProtocolList();
3254 
3255   // Check for protocol declaration.
3256   if (FormatTok->Tok.is(tok::semi)) {
3257     nextToken();
3258     addUnwrappedLine();
3259     return true;
3260   }
3261 
3262   addUnwrappedLine();
3263   parseObjCUntilAtEnd();
3264   return true;
3265 }
3266 
3267 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3268   bool IsImport = FormatTok->is(Keywords.kw_import);
3269   assert(IsImport || FormatTok->is(tok::kw_export));
3270   nextToken();
3271 
3272   // Consume the "default" in "export default class/function".
3273   if (FormatTok->is(tok::kw_default))
3274     nextToken();
3275 
3276   // Consume "async function", "function" and "default function", so that these
3277   // get parsed as free-standing JS functions, i.e. do not require a trailing
3278   // semicolon.
3279   if (FormatTok->is(Keywords.kw_async))
3280     nextToken();
3281   if (FormatTok->is(Keywords.kw_function)) {
3282     nextToken();
3283     return;
3284   }
3285 
3286   // For imports, `export *`, `export {...}`, consume the rest of the line up
3287   // to the terminating `;`. For everything else, just return and continue
3288   // parsing the structural element, i.e. the declaration or expression for
3289   // `export default`.
3290   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3291       !FormatTok->isStringLiteral())
3292     return;
3293 
3294   while (!eof()) {
3295     if (FormatTok->is(tok::semi))
3296       return;
3297     if (Line->Tokens.empty()) {
3298       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3299       // import statement should terminate.
3300       return;
3301     }
3302     if (FormatTok->is(tok::l_brace)) {
3303       FormatTok->setBlockKind(BK_Block);
3304       nextToken();
3305       parseBracedList();
3306     } else {
3307       nextToken();
3308     }
3309   }
3310 }
3311 
3312 void UnwrappedLineParser::parseStatementMacro() {
3313   nextToken();
3314   if (FormatTok->is(tok::l_paren))
3315     parseParens();
3316   if (FormatTok->is(tok::semi))
3317     nextToken();
3318   addUnwrappedLine();
3319 }
3320 
3321 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3322                                                  StringRef Prefix = "") {
3323   llvm::dbgs() << Prefix << "Line(" << Line.Level
3324                << ", FSC=" << Line.FirstStartColumn << ")"
3325                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3326   for (const auto &Node : Line.Tokens) {
3327     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3328                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3329                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3330   }
3331   for (const auto &Node : Line.Tokens)
3332     for (const auto &ChildNode : Node.Children)
3333       printDebugInfo(ChildNode, "\nChild: ");
3334 
3335   llvm::dbgs() << "\n";
3336 }
3337 
3338 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3339   if (Line->Tokens.empty())
3340     return;
3341   LLVM_DEBUG({
3342     if (CurrentLines == &Lines)
3343       printDebugInfo(*Line);
3344   });
3345 
3346   // If this line closes a block when in Whitesmiths mode, remember that
3347   // information so that the level can be decreased after the line is added.
3348   // This has to happen after the addition of the line since the line itself
3349   // needs to be indented.
3350   bool ClosesWhitesmithsBlock =
3351       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3352       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3353 
3354   CurrentLines->push_back(std::move(*Line));
3355   Line->Tokens.clear();
3356   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3357   Line->FirstStartColumn = 0;
3358 
3359   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3360     --Line->Level;
3361   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3362     CurrentLines->append(
3363         std::make_move_iterator(PreprocessorDirectives.begin()),
3364         std::make_move_iterator(PreprocessorDirectives.end()));
3365     PreprocessorDirectives.clear();
3366   }
3367   // Disconnect the current token from the last token on the previous line.
3368   FormatTok->Previous = nullptr;
3369 }
3370 
3371 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3372 
3373 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3374   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3375          FormatTok.NewlinesBefore > 0;
3376 }
3377 
3378 // Checks if \p FormatTok is a line comment that continues the line comment
3379 // section on \p Line.
3380 static bool
3381 continuesLineCommentSection(const FormatToken &FormatTok,
3382                             const UnwrappedLine &Line,
3383                             const llvm::Regex &CommentPragmasRegex) {
3384   if (Line.Tokens.empty())
3385     return false;
3386 
3387   StringRef IndentContent = FormatTok.TokenText;
3388   if (FormatTok.TokenText.startswith("//") ||
3389       FormatTok.TokenText.startswith("/*"))
3390     IndentContent = FormatTok.TokenText.substr(2);
3391   if (CommentPragmasRegex.match(IndentContent))
3392     return false;
3393 
3394   // If Line starts with a line comment, then FormatTok continues the comment
3395   // section if its original column is greater or equal to the original start
3396   // column of the line.
3397   //
3398   // Define the min column token of a line as follows: if a line ends in '{' or
3399   // contains a '{' followed by a line comment, then the min column token is
3400   // that '{'. Otherwise, the min column token of the line is the first token of
3401   // the line.
3402   //
3403   // If Line starts with a token other than a line comment, then FormatTok
3404   // continues the comment section if its original column is greater than the
3405   // original start column of the min column token of the line.
3406   //
3407   // For example, the second line comment continues the first in these cases:
3408   //
3409   // // first line
3410   // // second line
3411   //
3412   // and:
3413   //
3414   // // first line
3415   //  // second line
3416   //
3417   // and:
3418   //
3419   // int i; // first line
3420   //  // second line
3421   //
3422   // and:
3423   //
3424   // do { // first line
3425   //      // second line
3426   //   int i;
3427   // } while (true);
3428   //
3429   // and:
3430   //
3431   // enum {
3432   //   a, // first line
3433   //    // second line
3434   //   b
3435   // };
3436   //
3437   // The second line comment doesn't continue the first in these cases:
3438   //
3439   //   // first line
3440   //  // second line
3441   //
3442   // and:
3443   //
3444   // int i; // first line
3445   // // second line
3446   //
3447   // and:
3448   //
3449   // do { // first line
3450   //   // second line
3451   //   int i;
3452   // } while (true);
3453   //
3454   // and:
3455   //
3456   // enum {
3457   //   a, // first line
3458   //   // second line
3459   // };
3460   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3461 
3462   // Scan for '{//'. If found, use the column of '{' as a min column for line
3463   // comment section continuation.
3464   const FormatToken *PreviousToken = nullptr;
3465   for (const UnwrappedLineNode &Node : Line.Tokens) {
3466     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3467         isLineComment(*Node.Tok)) {
3468       MinColumnToken = PreviousToken;
3469       break;
3470     }
3471     PreviousToken = Node.Tok;
3472 
3473     // Grab the last newline preceding a token in this unwrapped line.
3474     if (Node.Tok->NewlinesBefore > 0)
3475       MinColumnToken = Node.Tok;
3476   }
3477   if (PreviousToken && PreviousToken->is(tok::l_brace))
3478     MinColumnToken = PreviousToken;
3479 
3480   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3481                               MinColumnToken);
3482 }
3483 
3484 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3485   bool JustComments = Line->Tokens.empty();
3486   for (FormatToken *Tok : CommentsBeforeNextToken) {
3487     // Line comments that belong to the same line comment section are put on the
3488     // same line since later we might want to reflow content between them.
3489     // Additional fine-grained breaking of line comment sections is controlled
3490     // by the class BreakableLineCommentSection in case it is desirable to keep
3491     // several line comment sections in the same unwrapped line.
3492     //
3493     // FIXME: Consider putting separate line comment sections as children to the
3494     // unwrapped line instead.
3495     Tok->ContinuesLineCommentSection =
3496         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3497     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3498       addUnwrappedLine();
3499     pushToken(Tok);
3500   }
3501   if (NewlineBeforeNext && JustComments)
3502     addUnwrappedLine();
3503   CommentsBeforeNextToken.clear();
3504 }
3505 
3506 void UnwrappedLineParser::nextToken(int LevelDifference) {
3507   if (eof())
3508     return;
3509   flushComments(isOnNewLine(*FormatTok));
3510   pushToken(FormatTok);
3511   FormatToken *Previous = FormatTok;
3512   if (!Style.isJavaScript())
3513     readToken(LevelDifference);
3514   else
3515     readTokenWithJavaScriptASI();
3516   FormatTok->Previous = Previous;
3517 }
3518 
3519 void UnwrappedLineParser::distributeComments(
3520     const SmallVectorImpl<FormatToken *> &Comments,
3521     const FormatToken *NextTok) {
3522   // Whether or not a line comment token continues a line is controlled by
3523   // the method continuesLineCommentSection, with the following caveat:
3524   //
3525   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3526   // that each comment line from the trail is aligned with the next token, if
3527   // the next token exists. If a trail exists, the beginning of the maximal
3528   // trail is marked as a start of a new comment section.
3529   //
3530   // For example in this code:
3531   //
3532   // int a; // line about a
3533   //   // line 1 about b
3534   //   // line 2 about b
3535   //   int b;
3536   //
3537   // the two lines about b form a maximal trail, so there are two sections, the
3538   // first one consisting of the single comment "// line about a" and the
3539   // second one consisting of the next two comments.
3540   if (Comments.empty())
3541     return;
3542   bool ShouldPushCommentsInCurrentLine = true;
3543   bool HasTrailAlignedWithNextToken = false;
3544   unsigned StartOfTrailAlignedWithNextToken = 0;
3545   if (NextTok) {
3546     // We are skipping the first element intentionally.
3547     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3548       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3549         HasTrailAlignedWithNextToken = true;
3550         StartOfTrailAlignedWithNextToken = i;
3551       }
3552     }
3553   }
3554   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3555     FormatToken *FormatTok = Comments[i];
3556     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3557       FormatTok->ContinuesLineCommentSection = false;
3558     } else {
3559       FormatTok->ContinuesLineCommentSection =
3560           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3561     }
3562     if (!FormatTok->ContinuesLineCommentSection &&
3563         (isOnNewLine(*FormatTok) || FormatTok->IsFirst))
3564       ShouldPushCommentsInCurrentLine = false;
3565     if (ShouldPushCommentsInCurrentLine)
3566       pushToken(FormatTok);
3567     else
3568       CommentsBeforeNextToken.push_back(FormatTok);
3569   }
3570 }
3571 
3572 void UnwrappedLineParser::readToken(int LevelDifference) {
3573   SmallVector<FormatToken *, 1> Comments;
3574   do {
3575     FormatTok = Tokens->getNextToken();
3576     assert(FormatTok);
3577     while (FormatTok->getType() == TT_ConflictStart ||
3578            FormatTok->getType() == TT_ConflictEnd ||
3579            FormatTok->getType() == TT_ConflictAlternative) {
3580       if (FormatTok->getType() == TT_ConflictStart)
3581         conditionalCompilationStart(/*Unreachable=*/false);
3582       else if (FormatTok->getType() == TT_ConflictAlternative)
3583         conditionalCompilationAlternative();
3584       else if (FormatTok->getType() == TT_ConflictEnd)
3585         conditionalCompilationEnd();
3586       FormatTok = Tokens->getNextToken();
3587       FormatTok->MustBreakBefore = true;
3588     }
3589 
3590     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3591            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3592       distributeComments(Comments, FormatTok);
3593       Comments.clear();
3594       // If there is an unfinished unwrapped line, we flush the preprocessor
3595       // directives only after that unwrapped line was finished later.
3596       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3597       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3598       assert((LevelDifference >= 0 ||
3599               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3600              "LevelDifference makes Line->Level negative");
3601       Line->Level += LevelDifference;
3602       // Comments stored before the preprocessor directive need to be output
3603       // before the preprocessor directive, at the same level as the
3604       // preprocessor directive, as we consider them to apply to the directive.
3605       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3606           PPBranchLevel > 0)
3607         Line->Level += PPBranchLevel;
3608       flushComments(isOnNewLine(*FormatTok));
3609       parsePPDirective();
3610     }
3611 
3612     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3613         !Line->InPPDirective)
3614       continue;
3615 
3616     if (!FormatTok->Tok.is(tok::comment)) {
3617       distributeComments(Comments, FormatTok);
3618       Comments.clear();
3619       return;
3620     }
3621 
3622     Comments.push_back(FormatTok);
3623   } while (!eof());
3624 
3625   distributeComments(Comments, nullptr);
3626   Comments.clear();
3627 }
3628 
3629 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3630   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3631   if (MustBreakBeforeNextToken) {
3632     Line->Tokens.back().Tok->MustBreakBefore = true;
3633     MustBreakBeforeNextToken = false;
3634   }
3635 }
3636 
3637 } // end namespace format
3638 } // end namespace clang
3639