1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 
24 #define DEBUG_TYPE "format-parser"
25 
26 namespace clang {
27 namespace format {
28 
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   virtual FormatToken *getPreviousToken() = 0;
39 
40   // Returns the token that would be returned by the next call to
41   // getNextToken().
42   virtual FormatToken *peekNextToken() = 0;
43 
44   // Returns whether we are at the end of the file.
45   // This can be different from whether getNextToken() returned an eof token
46   // when the FormatTokenSource is a view on a part of the token stream.
47   virtual bool isEOF() = 0;
48 
49   // Gets the current position in the token stream, to be used by setPosition().
50   virtual unsigned getPosition() = 0;
51 
52   // Resets the token stream to the state it was in when getPosition() returned
53   // Position, and return the token at that position in the stream.
54   virtual FormatToken *setPosition(unsigned Position) = 0;
55 };
56 
57 namespace {
58 
59 class ScopedDeclarationState {
60 public:
61   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
62                          bool MustBeDeclaration)
63       : Line(Line), Stack(Stack) {
64     Line.MustBeDeclaration = MustBeDeclaration;
65     Stack.push_back(MustBeDeclaration);
66   }
67   ~ScopedDeclarationState() {
68     Stack.pop_back();
69     if (!Stack.empty())
70       Line.MustBeDeclaration = Stack.back();
71     else
72       Line.MustBeDeclaration = true;
73   }
74 
75 private:
76   UnwrappedLine &Line;
77   llvm::BitVector &Stack;
78 };
79 
80 static bool isLineComment(const FormatToken &FormatTok) {
81   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
82 }
83 
84 // Checks if \p FormatTok is a line comment that continues the line comment
85 // \p Previous. The original column of \p MinColumnToken is used to determine
86 // whether \p FormatTok is indented enough to the right to continue \p Previous.
87 static bool continuesLineComment(const FormatToken &FormatTok,
88                                  const FormatToken *Previous,
89                                  const FormatToken *MinColumnToken) {
90   if (!Previous || !MinColumnToken)
91     return false;
92   unsigned MinContinueColumn =
93       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
94   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
95          isLineComment(*Previous) &&
96          FormatTok.OriginalColumn >= MinContinueColumn;
97 }
98 
99 class ScopedMacroState : public FormatTokenSource {
100 public:
101   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
102                    FormatToken *&ResetToken)
103       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
104         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
105         Token(nullptr), PreviousToken(nullptr) {
106     FakeEOF.Tok.startToken();
107     FakeEOF.Tok.setKind(tok::eof);
108     TokenSource = this;
109     Line.Level = 0;
110     Line.InPPDirective = true;
111   }
112 
113   ~ScopedMacroState() override {
114     TokenSource = PreviousTokenSource;
115     ResetToken = Token;
116     Line.InPPDirective = false;
117     Line.Level = PreviousLineLevel;
118   }
119 
120   FormatToken *getNextToken() override {
121     // The \c UnwrappedLineParser guards against this by never calling
122     // \c getNextToken() after it has encountered the first eof token.
123     assert(!eof());
124     PreviousToken = Token;
125     Token = PreviousTokenSource->getNextToken();
126     if (eof())
127       return &FakeEOF;
128     return Token;
129   }
130 
131   FormatToken *getPreviousToken() override {
132     return PreviousTokenSource->getPreviousToken();
133   }
134 
135   FormatToken *peekNextToken() override {
136     if (eof())
137       return &FakeEOF;
138     return PreviousTokenSource->peekNextToken();
139   }
140 
141   bool isEOF() override { return PreviousTokenSource->isEOF(); }
142 
143   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
144 
145   FormatToken *setPosition(unsigned Position) override {
146     PreviousToken = nullptr;
147     Token = PreviousTokenSource->setPosition(Position);
148     return Token;
149   }
150 
151 private:
152   bool eof() {
153     return Token && Token->HasUnescapedNewline &&
154            !continuesLineComment(*Token, PreviousToken,
155                                  /*MinColumnToken=*/PreviousToken);
156   }
157 
158   FormatToken FakeEOF;
159   UnwrappedLine &Line;
160   FormatTokenSource *&TokenSource;
161   FormatToken *&ResetToken;
162   unsigned PreviousLineLevel;
163   FormatTokenSource *PreviousTokenSource;
164 
165   FormatToken *Token;
166   FormatToken *PreviousToken;
167 };
168 
169 } // end anonymous namespace
170 
171 class ScopedLineState {
172 public:
173   ScopedLineState(UnwrappedLineParser &Parser,
174                   bool SwitchToPreprocessorLines = false)
175       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
176     if (SwitchToPreprocessorLines)
177       Parser.CurrentLines = &Parser.PreprocessorDirectives;
178     else if (!Parser.Line->Tokens.empty())
179       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
180     PreBlockLine = std::move(Parser.Line);
181     Parser.Line = std::make_unique<UnwrappedLine>();
182     Parser.Line->Level = PreBlockLine->Level;
183     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
184   }
185 
186   ~ScopedLineState() {
187     if (!Parser.Line->Tokens.empty()) {
188       Parser.addUnwrappedLine();
189     }
190     assert(Parser.Line->Tokens.empty());
191     Parser.Line = std::move(PreBlockLine);
192     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
193       Parser.MustBreakBeforeNextToken = true;
194     Parser.CurrentLines = OriginalLines;
195   }
196 
197 private:
198   UnwrappedLineParser &Parser;
199 
200   std::unique_ptr<UnwrappedLine> PreBlockLine;
201   SmallVectorImpl<UnwrappedLine> *OriginalLines;
202 };
203 
204 class CompoundStatementIndenter {
205 public:
206   CompoundStatementIndenter(UnwrappedLineParser *Parser,
207                             const FormatStyle &Style, unsigned &LineLevel)
208       : CompoundStatementIndenter(Parser, LineLevel,
209                                   Style.BraceWrapping.AfterControlStatement,
210                                   Style.BraceWrapping.IndentBraces) {}
211   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
212                             bool WrapBrace, bool IndentBrace)
213       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
214     if (WrapBrace)
215       Parser->addUnwrappedLine();
216     if (IndentBrace)
217       ++LineLevel;
218   }
219   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
220 
221 private:
222   unsigned &LineLevel;
223   unsigned OldLineLevel;
224 };
225 
226 namespace {
227 
228 class IndexedTokenSource : public FormatTokenSource {
229 public:
230   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
231       : Tokens(Tokens), Position(-1) {}
232 
233   FormatToken *getNextToken() override {
234     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
235       LLVM_DEBUG({
236         llvm::dbgs() << "Next ";
237         dbgToken(Position);
238       });
239       return Tokens[Position];
240     }
241     ++Position;
242     LLVM_DEBUG({
243       llvm::dbgs() << "Next ";
244       dbgToken(Position);
245     });
246     return Tokens[Position];
247   }
248 
249   FormatToken *getPreviousToken() override {
250     return Position > 0 ? Tokens[Position - 1] : nullptr;
251   }
252 
253   FormatToken *peekNextToken() override {
254     int Next = Position + 1;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Peeking ";
257       dbgToken(Next);
258     });
259     return Tokens[Next];
260   }
261 
262   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
263 
264   unsigned getPosition() override {
265     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
266     assert(Position >= 0);
267     return Position;
268   }
269 
270   FormatToken *setPosition(unsigned P) override {
271     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
272     Position = P;
273     return Tokens[Position];
274   }
275 
276   void reset() { Position = -1; }
277 
278 private:
279   void dbgToken(int Position, llvm::StringRef Indent = "") {
280     FormatToken *Tok = Tokens[Position];
281     llvm::dbgs() << Indent << "[" << Position
282                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
283                  << ", Macro: " << !!Tok->MacroCtx << "\n";
284   }
285 
286   ArrayRef<FormatToken *> Tokens;
287   int Position;
288 };
289 
290 } // end anonymous namespace
291 
292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
293                                          const AdditionalKeywords &Keywords,
294                                          unsigned FirstStartColumn,
295                                          ArrayRef<FormatToken *> Tokens,
296                                          UnwrappedLineConsumer &Callback)
297     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
298       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
299       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
300       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
301       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
302                        ? IG_Rejected
303                        : IG_Inited),
304       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
305 
306 void UnwrappedLineParser::reset() {
307   PPBranchLevel = -1;
308   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
309                      ? IG_Rejected
310                      : IG_Inited;
311   IncludeGuardToken = nullptr;
312   Line.reset(new UnwrappedLine);
313   CommentsBeforeNextToken.clear();
314   FormatTok = nullptr;
315   MustBreakBeforeNextToken = false;
316   PreprocessorDirectives.clear();
317   CurrentLines = &Lines;
318   DeclarationScopeStack.clear();
319   NestedTooDeep.clear();
320   PPStack.clear();
321   Line->FirstStartColumn = FirstStartColumn;
322 }
323 
324 void UnwrappedLineParser::parse() {
325   IndexedTokenSource TokenSource(AllTokens);
326   Line->FirstStartColumn = FirstStartColumn;
327   do {
328     LLVM_DEBUG(llvm::dbgs() << "----\n");
329     reset();
330     Tokens = &TokenSource;
331     TokenSource.reset();
332 
333     readToken();
334     parseFile();
335 
336     // If we found an include guard then all preprocessor directives (other than
337     // the guard) are over-indented by one.
338     if (IncludeGuard == IG_Found)
339       for (auto &Line : Lines)
340         if (Line.InPPDirective && Line.Level > 0)
341           --Line.Level;
342 
343     // Create line with eof token.
344     pushToken(FormatTok);
345     addUnwrappedLine();
346 
347     for (const UnwrappedLine &Line : Lines)
348       Callback.consumeUnwrappedLine(Line);
349 
350     Callback.finishRun();
351     Lines.clear();
352     while (!PPLevelBranchIndex.empty() &&
353            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
354       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
355       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
356     }
357     if (!PPLevelBranchIndex.empty()) {
358       ++PPLevelBranchIndex.back();
359       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
360       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
361     }
362   } while (!PPLevelBranchIndex.empty());
363 }
364 
365 void UnwrappedLineParser::parseFile() {
366   // The top-level context in a file always has declarations, except for pre-
367   // processor directives and JavaScript files.
368   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
369   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
370                                           MustBeDeclaration);
371   if (Style.Language == FormatStyle::LK_TextProto)
372     parseBracedList();
373   else
374     parseLevel(/*HasOpeningBrace=*/false);
375   // Make sure to format the remaining tokens.
376   //
377   // LK_TextProto is special since its top-level is parsed as the body of a
378   // braced list, which does not necessarily have natural line separators such
379   // as a semicolon. Comments after the last entry that have been determined to
380   // not belong to that line, as in:
381   //   key: value
382   //   // endfile comment
383   // do not have a chance to be put on a line of their own until this point.
384   // Here we add this newline before end-of-file comments.
385   if (Style.Language == FormatStyle::LK_TextProto &&
386       !CommentsBeforeNextToken.empty())
387     addUnwrappedLine();
388   flushComments(true);
389   addUnwrappedLine();
390 }
391 
392 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
393   do {
394     switch (FormatTok->Tok.getKind()) {
395     case tok::l_brace:
396       return;
397     default:
398       if (FormatTok->is(Keywords.kw_where)) {
399         addUnwrappedLine();
400         nextToken();
401         parseCSharpGenericTypeConstraint();
402         break;
403       }
404       nextToken();
405       break;
406     }
407   } while (!eof());
408 }
409 
410 void UnwrappedLineParser::parseCSharpAttribute() {
411   int UnpairedSquareBrackets = 1;
412   do {
413     switch (FormatTok->Tok.getKind()) {
414     case tok::r_square:
415       nextToken();
416       --UnpairedSquareBrackets;
417       if (UnpairedSquareBrackets == 0) {
418         addUnwrappedLine();
419         return;
420       }
421       break;
422     case tok::l_square:
423       ++UnpairedSquareBrackets;
424       nextToken();
425       break;
426     default:
427       nextToken();
428       break;
429     }
430   } while (!eof());
431 }
432 
433 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
434   if (!Lines.empty() && Lines.back().InPPDirective)
435     return true;
436 
437   const FormatToken *Previous = Tokens->getPreviousToken();
438   return Previous && Previous->is(tok::comment) &&
439          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
440 }
441 
442 bool UnwrappedLineParser::mightFitOnOneLine() const {
443   const auto ColumnLimit = Style.ColumnLimit;
444   if (ColumnLimit == 0)
445     return true;
446 
447   if (Lines.empty())
448     return true;
449 
450   const auto &PreviousLine = Lines.back();
451   const auto &Tokens = PreviousLine.Tokens;
452   assert(!Tokens.empty());
453   const auto *LastToken = Tokens.back().Tok;
454   assert(LastToken);
455   if (!LastToken->isOneOf(tok::semi, tok::comment))
456     return true;
457 
458   AnnotatedLine Line(PreviousLine);
459   assert(Line.Last == LastToken);
460 
461   TokenAnnotator Annotator(Style, Keywords);
462   Annotator.annotate(Line);
463   Annotator.calculateFormattingInformation(Line);
464 
465   return Line.Level * Style.IndentWidth + LastToken->TotalLength <= ColumnLimit;
466 }
467 
468 // Returns true if a simple block, or false otherwise. (A simple block has a
469 // single statement that fits on a single line.)
470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) {
471   const bool IsPrecededByCommentOrPPDirective =
472       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
473   unsigned StatementCount = 0;
474   bool SwitchLabelEncountered = false;
475   do {
476     tok::TokenKind kind = FormatTok->Tok.getKind();
477     if (FormatTok->getType() == TT_MacroBlockBegin) {
478       kind = tok::l_brace;
479     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
480       kind = tok::r_brace;
481     }
482 
483     switch (kind) {
484     case tok::comment:
485       nextToken();
486       addUnwrappedLine();
487       break;
488     case tok::l_brace:
489       // FIXME: Add parameter whether this can happen - if this happens, we must
490       // be in a non-declaration context.
491       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
492         continue;
493       parseBlock();
494       ++StatementCount;
495       assert(StatementCount > 0 && "StatementCount overflow!");
496       addUnwrappedLine();
497       break;
498     case tok::r_brace:
499       if (HasOpeningBrace) {
500         if (!Style.RemoveBracesLLVM)
501           return false;
502         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 ||
503             IsPrecededByCommentOrPPDirective ||
504             precededByCommentOrPPDirective()) {
505           return false;
506         }
507         const FormatToken *Next = Tokens->peekNextToken();
508         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
509           return false;
510         return mightFitOnOneLine();
511       }
512       nextToken();
513       addUnwrappedLine();
514       break;
515     case tok::kw_default: {
516       unsigned StoredPosition = Tokens->getPosition();
517       FormatToken *Next;
518       do {
519         Next = Tokens->getNextToken();
520       } while (Next->is(tok::comment));
521       FormatTok = Tokens->setPosition(StoredPosition);
522       if (Next && Next->isNot(tok::colon)) {
523         // default not followed by ':' is not a case label; treat it like
524         // an identifier.
525         parseStructuralElement();
526         break;
527       }
528       // Else, if it is 'default:', fall through to the case handling.
529       LLVM_FALLTHROUGH;
530     }
531     case tok::kw_case:
532       if (Style.isJavaScript() && Line->MustBeDeclaration) {
533         // A 'case: string' style field declaration.
534         parseStructuralElement();
535         break;
536       }
537       if (!SwitchLabelEncountered &&
538           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
539         ++Line->Level;
540       SwitchLabelEncountered = true;
541       parseStructuralElement();
542       break;
543     case tok::l_square:
544       if (Style.isCSharp()) {
545         nextToken();
546         parseCSharpAttribute();
547         break;
548       }
549       LLVM_FALLTHROUGH;
550     default:
551       parseStructuralElement(IfKind, !HasOpeningBrace);
552       ++StatementCount;
553       assert(StatementCount > 0 && "StatementCount overflow!");
554       break;
555     }
556   } while (!eof());
557   return false;
558 }
559 
560 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
561   // We'll parse forward through the tokens until we hit
562   // a closing brace or eof - note that getNextToken() will
563   // parse macros, so this will magically work inside macro
564   // definitions, too.
565   unsigned StoredPosition = Tokens->getPosition();
566   FormatToken *Tok = FormatTok;
567   const FormatToken *PrevTok = Tok->Previous;
568   // Keep a stack of positions of lbrace tokens. We will
569   // update information about whether an lbrace starts a
570   // braced init list or a different block during the loop.
571   SmallVector<FormatToken *, 8> LBraceStack;
572   assert(Tok->Tok.is(tok::l_brace));
573   do {
574     // Get next non-comment token.
575     FormatToken *NextTok;
576     unsigned ReadTokens = 0;
577     do {
578       NextTok = Tokens->getNextToken();
579       ++ReadTokens;
580     } while (NextTok->is(tok::comment));
581 
582     switch (Tok->Tok.getKind()) {
583     case tok::l_brace:
584       if (Style.isJavaScript() && PrevTok) {
585         if (PrevTok->isOneOf(tok::colon, tok::less))
586           // A ':' indicates this code is in a type, or a braced list
587           // following a label in an object literal ({a: {b: 1}}).
588           // A '<' could be an object used in a comparison, but that is nonsense
589           // code (can never return true), so more likely it is a generic type
590           // argument (`X<{a: string; b: number}>`).
591           // The code below could be confused by semicolons between the
592           // individual members in a type member list, which would normally
593           // trigger BK_Block. In both cases, this must be parsed as an inline
594           // braced init.
595           Tok->setBlockKind(BK_BracedInit);
596         else if (PrevTok->is(tok::r_paren))
597           // `) { }` can only occur in function or method declarations in JS.
598           Tok->setBlockKind(BK_Block);
599       } else {
600         Tok->setBlockKind(BK_Unknown);
601       }
602       LBraceStack.push_back(Tok);
603       break;
604     case tok::r_brace:
605       if (LBraceStack.empty())
606         break;
607       if (LBraceStack.back()->is(BK_Unknown)) {
608         bool ProbablyBracedList = false;
609         if (Style.Language == FormatStyle::LK_Proto) {
610           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
611         } else {
612           // Skip NextTok over preprocessor lines, otherwise we may not
613           // properly diagnose the block as a braced intializer
614           // if the comma separator appears after the pp directive.
615           while (NextTok->is(tok::hash)) {
616             ScopedMacroState MacroState(*Line, Tokens, NextTok);
617             do {
618               NextTok = Tokens->getNextToken();
619               ++ReadTokens;
620             } while (NextTok->isNot(tok::eof));
621           }
622 
623           // Using OriginalColumn to distinguish between ObjC methods and
624           // binary operators is a bit hacky.
625           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
626                                   NextTok->OriginalColumn == 0;
627 
628           // If there is a comma, semicolon or right paren after the closing
629           // brace, we assume this is a braced initializer list.  Note that
630           // regardless how we mark inner braces here, we will overwrite the
631           // BlockKind later if we parse a braced list (where all blocks
632           // inside are by default braced lists), or when we explicitly detect
633           // blocks (for example while parsing lambdas).
634           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
635           // braced list in JS.
636           ProbablyBracedList =
637               (Style.isJavaScript() &&
638                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
639                                 Keywords.kw_as)) ||
640               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
641               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
642                                tok::r_paren, tok::r_square, tok::l_brace,
643                                tok::ellipsis) ||
644               (NextTok->is(tok::identifier) &&
645                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
646               (NextTok->is(tok::semi) &&
647                (!ExpectClassBody || LBraceStack.size() != 1)) ||
648               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
649           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
650             // We can have an array subscript after a braced init
651             // list, but C++11 attributes are expected after blocks.
652             NextTok = Tokens->getNextToken();
653             ++ReadTokens;
654             ProbablyBracedList = NextTok->isNot(tok::l_square);
655           }
656         }
657         if (ProbablyBracedList) {
658           Tok->setBlockKind(BK_BracedInit);
659           LBraceStack.back()->setBlockKind(BK_BracedInit);
660         } else {
661           Tok->setBlockKind(BK_Block);
662           LBraceStack.back()->setBlockKind(BK_Block);
663         }
664       }
665       LBraceStack.pop_back();
666       break;
667     case tok::identifier:
668       if (!Tok->is(TT_StatementMacro))
669         break;
670       LLVM_FALLTHROUGH;
671     case tok::at:
672     case tok::semi:
673     case tok::kw_if:
674     case tok::kw_while:
675     case tok::kw_for:
676     case tok::kw_switch:
677     case tok::kw_try:
678     case tok::kw___try:
679       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
680         LBraceStack.back()->setBlockKind(BK_Block);
681       break;
682     default:
683       break;
684     }
685     PrevTok = Tok;
686     Tok = NextTok;
687   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
688 
689   // Assume other blocks for all unclosed opening braces.
690   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
691     if (LBraceStack[i]->is(BK_Unknown))
692       LBraceStack[i]->setBlockKind(BK_Block);
693   }
694 
695   FormatTok = Tokens->setPosition(StoredPosition);
696 }
697 
698 template <class T>
699 static inline void hash_combine(std::size_t &seed, const T &v) {
700   std::hash<T> hasher;
701   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
702 }
703 
704 size_t UnwrappedLineParser::computePPHash() const {
705   size_t h = 0;
706   for (const auto &i : PPStack) {
707     hash_combine(h, size_t(i.Kind));
708     hash_combine(h, i.Line);
709   }
710   return h;
711 }
712 
713 UnwrappedLineParser::IfStmtKind
714 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
715                                 bool MunchSemi,
716                                 bool UnindentWhitesmithsBraces) {
717   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
718          "'{' or macro block token expected");
719   FormatToken *Tok = FormatTok;
720   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
721   FormatTok->setBlockKind(BK_Block);
722 
723   // For Whitesmiths mode, jump to the next level prior to skipping over the
724   // braces.
725   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
726     ++Line->Level;
727 
728   size_t PPStartHash = computePPHash();
729 
730   unsigned InitialLevel = Line->Level;
731   nextToken(/*LevelDifference=*/AddLevels);
732 
733   if (MacroBlock && FormatTok->is(tok::l_paren))
734     parseParens();
735 
736   size_t NbPreprocessorDirectives =
737       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
738   addUnwrappedLine();
739   size_t OpeningLineIndex =
740       CurrentLines->empty()
741           ? (UnwrappedLine::kInvalidIndex)
742           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
743 
744   // Whitesmiths is weird here. The brace needs to be indented for the namespace
745   // block, but the block itself may not be indented depending on the style
746   // settings. This allows the format to back up one level in those cases.
747   if (UnindentWhitesmithsBraces)
748     --Line->Level;
749 
750   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
751                                           MustBeDeclaration);
752   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
753     Line->Level += AddLevels;
754 
755   IfStmtKind IfKind = IfStmtKind::NotIf;
756   const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind);
757 
758   if (eof())
759     return IfKind;
760 
761   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
762                  : !FormatTok->is(tok::r_brace)) {
763     Line->Level = InitialLevel;
764     FormatTok->setBlockKind(BK_Block);
765     return IfKind;
766   }
767 
768   if (SimpleBlock && Tok->is(tok::l_brace)) {
769     assert(FormatTok->is(tok::r_brace));
770     const FormatToken *Previous = Tokens->getPreviousToken();
771     assert(Previous);
772     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
773       Tok->MatchingParen = FormatTok;
774       FormatTok->MatchingParen = Tok;
775     }
776   }
777 
778   size_t PPEndHash = computePPHash();
779 
780   // Munch the closing brace.
781   nextToken(/*LevelDifference=*/-AddLevels);
782 
783   if (MacroBlock && FormatTok->is(tok::l_paren))
784     parseParens();
785 
786   if (FormatTok->is(tok::arrow)) {
787     // Following the } we can find a trailing return type arrow
788     // as part of an implicit conversion constraint.
789     nextToken();
790     parseStructuralElement();
791   }
792 
793   if (MunchSemi && FormatTok->Tok.is(tok::semi))
794     nextToken();
795 
796   Line->Level = InitialLevel;
797 
798   if (PPStartHash == PPEndHash) {
799     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
800     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
801       // Update the opening line to add the forward reference as well
802       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
803           CurrentLines->size() - 1;
804     }
805   }
806 
807   return IfKind;
808 }
809 
810 static bool isGoogScope(const UnwrappedLine &Line) {
811   // FIXME: Closure-library specific stuff should not be hard-coded but be
812   // configurable.
813   if (Line.Tokens.size() < 4)
814     return false;
815   auto I = Line.Tokens.begin();
816   if (I->Tok->TokenText != "goog")
817     return false;
818   ++I;
819   if (I->Tok->isNot(tok::period))
820     return false;
821   ++I;
822   if (I->Tok->TokenText != "scope")
823     return false;
824   ++I;
825   return I->Tok->is(tok::l_paren);
826 }
827 
828 static bool isIIFE(const UnwrappedLine &Line,
829                    const AdditionalKeywords &Keywords) {
830   // Look for the start of an immediately invoked anonymous function.
831   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
832   // This is commonly done in JavaScript to create a new, anonymous scope.
833   // Example: (function() { ... })()
834   if (Line.Tokens.size() < 3)
835     return false;
836   auto I = Line.Tokens.begin();
837   if (I->Tok->isNot(tok::l_paren))
838     return false;
839   ++I;
840   if (I->Tok->isNot(Keywords.kw_function))
841     return false;
842   ++I;
843   return I->Tok->is(tok::l_paren);
844 }
845 
846 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
847                                    const FormatToken &InitialToken) {
848   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
849     return Style.BraceWrapping.AfterNamespace;
850   if (InitialToken.is(tok::kw_class))
851     return Style.BraceWrapping.AfterClass;
852   if (InitialToken.is(tok::kw_union))
853     return Style.BraceWrapping.AfterUnion;
854   if (InitialToken.is(tok::kw_struct))
855     return Style.BraceWrapping.AfterStruct;
856   if (InitialToken.is(tok::kw_enum))
857     return Style.BraceWrapping.AfterEnum;
858   return false;
859 }
860 
861 void UnwrappedLineParser::parseChildBlock() {
862   FormatTok->setBlockKind(BK_Block);
863   nextToken();
864   {
865     bool SkipIndent = (Style.isJavaScript() &&
866                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
867     ScopedLineState LineState(*this);
868     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
869                                             /*MustBeDeclaration=*/false);
870     Line->Level += SkipIndent ? 0 : 1;
871     parseLevel(/*HasOpeningBrace=*/true);
872     flushComments(isOnNewLine(*FormatTok));
873     Line->Level -= SkipIndent ? 0 : 1;
874   }
875   nextToken();
876 }
877 
878 void UnwrappedLineParser::parsePPDirective() {
879   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
880   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
881 
882   nextToken();
883 
884   if (!FormatTok->Tok.getIdentifierInfo()) {
885     parsePPUnknown();
886     return;
887   }
888 
889   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
890   case tok::pp_define:
891     parsePPDefine();
892     return;
893   case tok::pp_if:
894     parsePPIf(/*IfDef=*/false);
895     break;
896   case tok::pp_ifdef:
897   case tok::pp_ifndef:
898     parsePPIf(/*IfDef=*/true);
899     break;
900   case tok::pp_else:
901     parsePPElse();
902     break;
903   case tok::pp_elifdef:
904   case tok::pp_elifndef:
905   case tok::pp_elif:
906     parsePPElIf();
907     break;
908   case tok::pp_endif:
909     parsePPEndIf();
910     break;
911   default:
912     parsePPUnknown();
913     break;
914   }
915 }
916 
917 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
918   size_t Line = CurrentLines->size();
919   if (CurrentLines == &PreprocessorDirectives)
920     Line += Lines.size();
921 
922   if (Unreachable ||
923       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
924     PPStack.push_back({PP_Unreachable, Line});
925   else
926     PPStack.push_back({PP_Conditional, Line});
927 }
928 
929 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
930   ++PPBranchLevel;
931   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
932   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
933     PPLevelBranchIndex.push_back(0);
934     PPLevelBranchCount.push_back(0);
935   }
936   PPChainBranchIndex.push(0);
937   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
938   conditionalCompilationCondition(Unreachable || Skip);
939 }
940 
941 void UnwrappedLineParser::conditionalCompilationAlternative() {
942   if (!PPStack.empty())
943     PPStack.pop_back();
944   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
945   if (!PPChainBranchIndex.empty())
946     ++PPChainBranchIndex.top();
947   conditionalCompilationCondition(
948       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
949       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
950 }
951 
952 void UnwrappedLineParser::conditionalCompilationEnd() {
953   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
954   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
955     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
956       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
957     }
958   }
959   // Guard against #endif's without #if.
960   if (PPBranchLevel > -1)
961     --PPBranchLevel;
962   if (!PPChainBranchIndex.empty())
963     PPChainBranchIndex.pop();
964   if (!PPStack.empty())
965     PPStack.pop_back();
966 }
967 
968 void UnwrappedLineParser::parsePPIf(bool IfDef) {
969   bool IfNDef = FormatTok->is(tok::pp_ifndef);
970   nextToken();
971   bool Unreachable = false;
972   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
973     Unreachable = true;
974   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
975     Unreachable = true;
976   conditionalCompilationStart(Unreachable);
977   FormatToken *IfCondition = FormatTok;
978   // If there's a #ifndef on the first line, and the only lines before it are
979   // comments, it could be an include guard.
980   bool MaybeIncludeGuard = IfNDef;
981   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
982     for (auto &Line : Lines) {
983       if (!Line.Tokens.front().Tok->is(tok::comment)) {
984         MaybeIncludeGuard = false;
985         IncludeGuard = IG_Rejected;
986         break;
987       }
988     }
989   --PPBranchLevel;
990   parsePPUnknown();
991   ++PPBranchLevel;
992   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
993     IncludeGuard = IG_IfNdefed;
994     IncludeGuardToken = IfCondition;
995   }
996 }
997 
998 void UnwrappedLineParser::parsePPElse() {
999   // If a potential include guard has an #else, it's not an include guard.
1000   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1001     IncludeGuard = IG_Rejected;
1002   conditionalCompilationAlternative();
1003   if (PPBranchLevel > -1)
1004     --PPBranchLevel;
1005   parsePPUnknown();
1006   ++PPBranchLevel;
1007 }
1008 
1009 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1010 
1011 void UnwrappedLineParser::parsePPEndIf() {
1012   conditionalCompilationEnd();
1013   parsePPUnknown();
1014   // If the #endif of a potential include guard is the last thing in the file,
1015   // then we found an include guard.
1016   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1017       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1018     IncludeGuard = IG_Found;
1019 }
1020 
1021 void UnwrappedLineParser::parsePPDefine() {
1022   nextToken();
1023 
1024   if (!FormatTok->Tok.getIdentifierInfo()) {
1025     IncludeGuard = IG_Rejected;
1026     IncludeGuardToken = nullptr;
1027     parsePPUnknown();
1028     return;
1029   }
1030 
1031   if (IncludeGuard == IG_IfNdefed &&
1032       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1033     IncludeGuard = IG_Defined;
1034     IncludeGuardToken = nullptr;
1035     for (auto &Line : Lines) {
1036       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1037         IncludeGuard = IG_Rejected;
1038         break;
1039       }
1040     }
1041   }
1042 
1043   nextToken();
1044   if (FormatTok->Tok.getKind() == tok::l_paren &&
1045       !FormatTok->hasWhitespaceBefore()) {
1046     parseParens();
1047   }
1048   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1049     Line->Level += PPBranchLevel + 1;
1050   addUnwrappedLine();
1051   ++Line->Level;
1052 
1053   // Errors during a preprocessor directive can only affect the layout of the
1054   // preprocessor directive, and thus we ignore them. An alternative approach
1055   // would be to use the same approach we use on the file level (no
1056   // re-indentation if there was a structural error) within the macro
1057   // definition.
1058   parseFile();
1059 }
1060 
1061 void UnwrappedLineParser::parsePPUnknown() {
1062   do {
1063     nextToken();
1064   } while (!eof());
1065   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1066     Line->Level += PPBranchLevel + 1;
1067   addUnwrappedLine();
1068 }
1069 
1070 // Here we exclude certain tokens that are not usually the first token in an
1071 // unwrapped line. This is used in attempt to distinguish macro calls without
1072 // trailing semicolons from other constructs split to several lines.
1073 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1074   // Semicolon can be a null-statement, l_square can be a start of a macro or
1075   // a C++11 attribute, but this doesn't seem to be common.
1076   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1077          Tok.isNot(TT_AttributeSquare) &&
1078          // Tokens that can only be used as binary operators and a part of
1079          // overloaded operator names.
1080          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1081          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1082          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1083          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1084          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1085          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1086          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1087          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1088          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1089          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1090          Tok.isNot(tok::lesslessequal) &&
1091          // Colon is used in labels, base class lists, initializer lists,
1092          // range-based for loops, ternary operator, but should never be the
1093          // first token in an unwrapped line.
1094          Tok.isNot(tok::colon) &&
1095          // 'noexcept' is a trailing annotation.
1096          Tok.isNot(tok::kw_noexcept);
1097 }
1098 
1099 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1100                           const FormatToken *FormatTok) {
1101   // FIXME: This returns true for C/C++ keywords like 'struct'.
1102   return FormatTok->is(tok::identifier) &&
1103          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1104           !FormatTok->isOneOf(
1105               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1106               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1107               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1108               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1109               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1110               Keywords.kw_instanceof, Keywords.kw_interface,
1111               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1112 }
1113 
1114 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1115                                  const FormatToken *FormatTok) {
1116   return FormatTok->Tok.isLiteral() ||
1117          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1118          mustBeJSIdent(Keywords, FormatTok);
1119 }
1120 
1121 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1122 // when encountered after a value (see mustBeJSIdentOrValue).
1123 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1124                            const FormatToken *FormatTok) {
1125   return FormatTok->isOneOf(
1126       tok::kw_return, Keywords.kw_yield,
1127       // conditionals
1128       tok::kw_if, tok::kw_else,
1129       // loops
1130       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1131       // switch/case
1132       tok::kw_switch, tok::kw_case,
1133       // exceptions
1134       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1135       // declaration
1136       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1137       Keywords.kw_async, Keywords.kw_function,
1138       // import/export
1139       Keywords.kw_import, tok::kw_export);
1140 }
1141 
1142 // Checks whether a token is a type in K&R C (aka C78).
1143 static bool isC78Type(const FormatToken &Tok) {
1144   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1145                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1146                      tok::identifier);
1147 }
1148 
1149 // This function checks whether a token starts the first parameter declaration
1150 // in a K&R C (aka C78) function definition, e.g.:
1151 //   int f(a, b)
1152 //   short a, b;
1153 //   {
1154 //      return a + b;
1155 //   }
1156 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1157                                const FormatToken *FuncName) {
1158   assert(Tok);
1159   assert(Next);
1160   assert(FuncName);
1161 
1162   if (FuncName->isNot(tok::identifier))
1163     return false;
1164 
1165   const FormatToken *Prev = FuncName->Previous;
1166   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1167     return false;
1168 
1169   if (!isC78Type(*Tok) &&
1170       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1171     return false;
1172 
1173   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1174     return false;
1175 
1176   Tok = Tok->Previous;
1177   if (!Tok || Tok->isNot(tok::r_paren))
1178     return false;
1179 
1180   Tok = Tok->Previous;
1181   if (!Tok || Tok->isNot(tok::identifier))
1182     return false;
1183 
1184   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1185 }
1186 
1187 void UnwrappedLineParser::parseModuleImport() {
1188   nextToken();
1189   while (!eof()) {
1190     if (FormatTok->is(tok::colon)) {
1191       FormatTok->setType(TT_ModulePartitionColon);
1192     }
1193     // Handle import <foo/bar.h> as we would an include statement.
1194     else if (FormatTok->is(tok::less)) {
1195       nextToken();
1196       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1197         // Mark tokens up to the trailing line comments as implicit string
1198         // literals.
1199         if (FormatTok->isNot(tok::comment) &&
1200             !FormatTok->TokenText.startswith("//"))
1201           FormatTok->setType(TT_ImplicitStringLiteral);
1202         nextToken();
1203       }
1204     }
1205     if (FormatTok->is(tok::semi)) {
1206       nextToken();
1207       break;
1208     }
1209     nextToken();
1210   }
1211 
1212   addUnwrappedLine();
1213 }
1214 
1215 // readTokenWithJavaScriptASI reads the next token and terminates the current
1216 // line if JavaScript Automatic Semicolon Insertion must
1217 // happen between the current token and the next token.
1218 //
1219 // This method is conservative - it cannot cover all edge cases of JavaScript,
1220 // but only aims to correctly handle certain well known cases. It *must not*
1221 // return true in speculative cases.
1222 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1223   FormatToken *Previous = FormatTok;
1224   readToken();
1225   FormatToken *Next = FormatTok;
1226 
1227   bool IsOnSameLine =
1228       CommentsBeforeNextToken.empty()
1229           ? Next->NewlinesBefore == 0
1230           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1231   if (IsOnSameLine)
1232     return;
1233 
1234   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1235   bool PreviousStartsTemplateExpr =
1236       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1237   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1238     // If the line contains an '@' sign, the previous token might be an
1239     // annotation, which can precede another identifier/value.
1240     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1241       return LineNode.Tok->is(tok::at);
1242     });
1243     if (HasAt)
1244       return;
1245   }
1246   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1247     return addUnwrappedLine();
1248   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1249   bool NextEndsTemplateExpr =
1250       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1251   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1252       (PreviousMustBeValue ||
1253        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1254                          tok::minusminus)))
1255     return addUnwrappedLine();
1256   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1257       isJSDeclOrStmt(Keywords, Next))
1258     return addUnwrappedLine();
1259 }
1260 
1261 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1262                                                  bool IsTopLevel) {
1263   if (Style.Language == FormatStyle::LK_TableGen &&
1264       FormatTok->is(tok::pp_include)) {
1265     nextToken();
1266     if (FormatTok->is(tok::string_literal))
1267       nextToken();
1268     addUnwrappedLine();
1269     return;
1270   }
1271   switch (FormatTok->Tok.getKind()) {
1272   case tok::kw_asm:
1273     nextToken();
1274     if (FormatTok->is(tok::l_brace)) {
1275       FormatTok->setType(TT_InlineASMBrace);
1276       nextToken();
1277       while (FormatTok && FormatTok->isNot(tok::eof)) {
1278         if (FormatTok->is(tok::r_brace)) {
1279           FormatTok->setType(TT_InlineASMBrace);
1280           nextToken();
1281           addUnwrappedLine();
1282           break;
1283         }
1284         FormatTok->Finalized = true;
1285         nextToken();
1286       }
1287     }
1288     break;
1289   case tok::kw_namespace:
1290     parseNamespace();
1291     return;
1292   case tok::kw_public:
1293   case tok::kw_protected:
1294   case tok::kw_private:
1295     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1296         Style.isCSharp())
1297       nextToken();
1298     else
1299       parseAccessSpecifier();
1300     return;
1301   case tok::kw_if:
1302     if (Style.isJavaScript() && Line->MustBeDeclaration)
1303       // field/method declaration.
1304       break;
1305     parseIfThenElse(IfKind);
1306     return;
1307   case tok::kw_for:
1308   case tok::kw_while:
1309     if (Style.isJavaScript() && Line->MustBeDeclaration)
1310       // field/method declaration.
1311       break;
1312     parseForOrWhileLoop();
1313     return;
1314   case tok::kw_do:
1315     if (Style.isJavaScript() && Line->MustBeDeclaration)
1316       // field/method declaration.
1317       break;
1318     parseDoWhile();
1319     return;
1320   case tok::kw_switch:
1321     if (Style.isJavaScript() && Line->MustBeDeclaration)
1322       // 'switch: string' field declaration.
1323       break;
1324     parseSwitch();
1325     return;
1326   case tok::kw_default:
1327     if (Style.isJavaScript() && Line->MustBeDeclaration)
1328       // 'default: string' field declaration.
1329       break;
1330     nextToken();
1331     if (FormatTok->is(tok::colon)) {
1332       parseLabel();
1333       return;
1334     }
1335     // e.g. "default void f() {}" in a Java interface.
1336     break;
1337   case tok::kw_case:
1338     if (Style.isJavaScript() && Line->MustBeDeclaration)
1339       // 'case: string' field declaration.
1340       break;
1341     parseCaseLabel();
1342     return;
1343   case tok::kw_try:
1344   case tok::kw___try:
1345     if (Style.isJavaScript() && Line->MustBeDeclaration)
1346       // field/method declaration.
1347       break;
1348     parseTryCatch();
1349     return;
1350   case tok::kw_extern:
1351     nextToken();
1352     if (FormatTok->Tok.is(tok::string_literal)) {
1353       nextToken();
1354       if (FormatTok->Tok.is(tok::l_brace)) {
1355         if (Style.BraceWrapping.AfterExternBlock)
1356           addUnwrappedLine();
1357         // Either we indent or for backwards compatibility we follow the
1358         // AfterExternBlock style.
1359         unsigned AddLevels =
1360             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1361                     (Style.BraceWrapping.AfterExternBlock &&
1362                      Style.IndentExternBlock ==
1363                          FormatStyle::IEBS_AfterExternBlock)
1364                 ? 1u
1365                 : 0u;
1366         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1367         addUnwrappedLine();
1368         return;
1369       }
1370     }
1371     break;
1372   case tok::kw_export:
1373     if (Style.isJavaScript()) {
1374       parseJavaScriptEs6ImportExport();
1375       return;
1376     }
1377     if (!Style.isCpp())
1378       break;
1379     // Handle C++ "(inline|export) namespace".
1380     LLVM_FALLTHROUGH;
1381   case tok::kw_inline:
1382     nextToken();
1383     if (FormatTok->Tok.is(tok::kw_namespace)) {
1384       parseNamespace();
1385       return;
1386     }
1387     break;
1388   case tok::identifier:
1389     if (FormatTok->is(TT_ForEachMacro)) {
1390       parseForOrWhileLoop();
1391       return;
1392     }
1393     if (FormatTok->is(TT_MacroBlockBegin)) {
1394       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1395                  /*MunchSemi=*/false);
1396       return;
1397     }
1398     if (FormatTok->is(Keywords.kw_import)) {
1399       if (Style.isJavaScript()) {
1400         parseJavaScriptEs6ImportExport();
1401         return;
1402       }
1403       if (Style.Language == FormatStyle::LK_Proto) {
1404         nextToken();
1405         if (FormatTok->is(tok::kw_public))
1406           nextToken();
1407         if (!FormatTok->is(tok::string_literal))
1408           return;
1409         nextToken();
1410         if (FormatTok->is(tok::semi))
1411           nextToken();
1412         addUnwrappedLine();
1413         return;
1414       }
1415       if (Style.isCpp()) {
1416         parseModuleImport();
1417         return;
1418       }
1419     }
1420     if (Style.isCpp() &&
1421         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1422                            Keywords.kw_slots, Keywords.kw_qslots)) {
1423       nextToken();
1424       if (FormatTok->is(tok::colon)) {
1425         nextToken();
1426         addUnwrappedLine();
1427         return;
1428       }
1429     }
1430     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1431       parseStatementMacro();
1432       return;
1433     }
1434     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1435       parseNamespace();
1436       return;
1437     }
1438     // In all other cases, parse the declaration.
1439     break;
1440   default:
1441     break;
1442   }
1443   do {
1444     const FormatToken *Previous = FormatTok->Previous;
1445     switch (FormatTok->Tok.getKind()) {
1446     case tok::at:
1447       nextToken();
1448       if (FormatTok->Tok.is(tok::l_brace)) {
1449         nextToken();
1450         parseBracedList();
1451         break;
1452       } else if (Style.Language == FormatStyle::LK_Java &&
1453                  FormatTok->is(Keywords.kw_interface)) {
1454         nextToken();
1455         break;
1456       }
1457       switch (FormatTok->Tok.getObjCKeywordID()) {
1458       case tok::objc_public:
1459       case tok::objc_protected:
1460       case tok::objc_package:
1461       case tok::objc_private:
1462         return parseAccessSpecifier();
1463       case tok::objc_interface:
1464       case tok::objc_implementation:
1465         return parseObjCInterfaceOrImplementation();
1466       case tok::objc_protocol:
1467         if (parseObjCProtocol())
1468           return;
1469         break;
1470       case tok::objc_end:
1471         return; // Handled by the caller.
1472       case tok::objc_optional:
1473       case tok::objc_required:
1474         nextToken();
1475         addUnwrappedLine();
1476         return;
1477       case tok::objc_autoreleasepool:
1478         nextToken();
1479         if (FormatTok->Tok.is(tok::l_brace)) {
1480           if (Style.BraceWrapping.AfterControlStatement ==
1481               FormatStyle::BWACS_Always)
1482             addUnwrappedLine();
1483           parseBlock();
1484         }
1485         addUnwrappedLine();
1486         return;
1487       case tok::objc_synchronized:
1488         nextToken();
1489         if (FormatTok->Tok.is(tok::l_paren))
1490           // Skip synchronization object
1491           parseParens();
1492         if (FormatTok->Tok.is(tok::l_brace)) {
1493           if (Style.BraceWrapping.AfterControlStatement ==
1494               FormatStyle::BWACS_Always)
1495             addUnwrappedLine();
1496           parseBlock();
1497         }
1498         addUnwrappedLine();
1499         return;
1500       case tok::objc_try:
1501         // This branch isn't strictly necessary (the kw_try case below would
1502         // do this too after the tok::at is parsed above).  But be explicit.
1503         parseTryCatch();
1504         return;
1505       default:
1506         break;
1507       }
1508       break;
1509     case tok::kw_concept:
1510       parseConcept();
1511       return;
1512     case tok::kw_requires:
1513       parseRequires();
1514       return;
1515     case tok::kw_enum:
1516       // Ignore if this is part of "template <enum ...".
1517       if (Previous && Previous->is(tok::less)) {
1518         nextToken();
1519         break;
1520       }
1521 
1522       // parseEnum falls through and does not yet add an unwrapped line as an
1523       // enum definition can start a structural element.
1524       if (!parseEnum())
1525         break;
1526       // This only applies for C++.
1527       if (!Style.isCpp()) {
1528         addUnwrappedLine();
1529         return;
1530       }
1531       break;
1532     case tok::kw_typedef:
1533       nextToken();
1534       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1535                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1536                              Keywords.kw_CF_CLOSED_ENUM,
1537                              Keywords.kw_NS_CLOSED_ENUM))
1538         parseEnum();
1539       break;
1540     case tok::kw_struct:
1541     case tok::kw_union:
1542     case tok::kw_class:
1543       if (parseStructLike()) {
1544         return;
1545       }
1546       break;
1547     case tok::period:
1548       nextToken();
1549       // In Java, classes have an implicit static member "class".
1550       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1551           FormatTok->is(tok::kw_class))
1552         nextToken();
1553       if (Style.isJavaScript() && FormatTok &&
1554           FormatTok->Tok.getIdentifierInfo())
1555         // JavaScript only has pseudo keywords, all keywords are allowed to
1556         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1557         nextToken();
1558       break;
1559     case tok::semi:
1560       nextToken();
1561       addUnwrappedLine();
1562       return;
1563     case tok::r_brace:
1564       addUnwrappedLine();
1565       return;
1566     case tok::l_paren: {
1567       parseParens();
1568       // Break the unwrapped line if a K&R C function definition has a parameter
1569       // declaration.
1570       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1571         break;
1572       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1573         addUnwrappedLine();
1574         return;
1575       }
1576       break;
1577     }
1578     case tok::kw_operator:
1579       nextToken();
1580       if (FormatTok->isBinaryOperator())
1581         nextToken();
1582       break;
1583     case tok::caret:
1584       nextToken();
1585       if (FormatTok->Tok.isAnyIdentifier() ||
1586           FormatTok->isSimpleTypeSpecifier())
1587         nextToken();
1588       if (FormatTok->is(tok::l_paren))
1589         parseParens();
1590       if (FormatTok->is(tok::l_brace))
1591         parseChildBlock();
1592       break;
1593     case tok::l_brace:
1594       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1595         // A block outside of parentheses must be the last part of a
1596         // structural element.
1597         // FIXME: Figure out cases where this is not true, and add projections
1598         // for them (the one we know is missing are lambdas).
1599         if (Style.Language == FormatStyle::LK_Java &&
1600             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1601           // If necessary, we could set the type to something different than
1602           // TT_FunctionLBrace.
1603           if (Style.BraceWrapping.AfterControlStatement ==
1604               FormatStyle::BWACS_Always)
1605             addUnwrappedLine();
1606         } else if (Style.BraceWrapping.AfterFunction) {
1607           addUnwrappedLine();
1608         }
1609         FormatTok->setType(TT_FunctionLBrace);
1610         parseBlock();
1611         addUnwrappedLine();
1612         return;
1613       }
1614       // Otherwise this was a braced init list, and the structural
1615       // element continues.
1616       break;
1617     case tok::kw_try:
1618       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1619         // field/method declaration.
1620         nextToken();
1621         break;
1622       }
1623       // We arrive here when parsing function-try blocks.
1624       if (Style.BraceWrapping.AfterFunction)
1625         addUnwrappedLine();
1626       parseTryCatch();
1627       return;
1628     case tok::identifier: {
1629       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1630           Line->MustBeDeclaration) {
1631         addUnwrappedLine();
1632         parseCSharpGenericTypeConstraint();
1633         break;
1634       }
1635       if (FormatTok->is(TT_MacroBlockEnd)) {
1636         addUnwrappedLine();
1637         return;
1638       }
1639 
1640       // Function declarations (as opposed to function expressions) are parsed
1641       // on their own unwrapped line by continuing this loop. Function
1642       // expressions (functions that are not on their own line) must not create
1643       // a new unwrapped line, so they are special cased below.
1644       size_t TokenCount = Line->Tokens.size();
1645       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1646           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1647                                                      Keywords.kw_async)))) {
1648         tryToParseJSFunction();
1649         break;
1650       }
1651       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1652           FormatTok->is(Keywords.kw_interface)) {
1653         if (Style.isJavaScript()) {
1654           // In JavaScript/TypeScript, "interface" can be used as a standalone
1655           // identifier, e.g. in `var interface = 1;`. If "interface" is
1656           // followed by another identifier, it is very like to be an actual
1657           // interface declaration.
1658           unsigned StoredPosition = Tokens->getPosition();
1659           FormatToken *Next = Tokens->getNextToken();
1660           FormatTok = Tokens->setPosition(StoredPosition);
1661           if (!mustBeJSIdent(Keywords, Next)) {
1662             nextToken();
1663             break;
1664           }
1665         }
1666         parseRecord();
1667         addUnwrappedLine();
1668         return;
1669       }
1670 
1671       if (FormatTok->is(Keywords.kw_interface)) {
1672         if (parseStructLike()) {
1673           return;
1674         }
1675         break;
1676       }
1677 
1678       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1679         parseStatementMacro();
1680         return;
1681       }
1682 
1683       // See if the following token should start a new unwrapped line.
1684       StringRef Text = FormatTok->TokenText;
1685 
1686       FormatToken *PreviousToken = FormatTok;
1687       nextToken();
1688 
1689       // JS doesn't have macros, and within classes colons indicate fields, not
1690       // labels.
1691       if (Style.isJavaScript())
1692         break;
1693 
1694       TokenCount = Line->Tokens.size();
1695       if (TokenCount == 1 ||
1696           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1697         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1698           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1699           parseLabel(!Style.IndentGotoLabels);
1700           return;
1701         }
1702         // Recognize function-like macro usages without trailing semicolon as
1703         // well as free-standing macros like Q_OBJECT.
1704         bool FunctionLike = FormatTok->is(tok::l_paren);
1705         if (FunctionLike)
1706           parseParens();
1707 
1708         bool FollowedByNewline =
1709             CommentsBeforeNextToken.empty()
1710                 ? FormatTok->NewlinesBefore > 0
1711                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1712 
1713         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1714             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1715           PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro);
1716           addUnwrappedLine();
1717           return;
1718         }
1719       }
1720       break;
1721     }
1722     case tok::equal:
1723       if ((Style.isJavaScript() || Style.isCSharp()) &&
1724           FormatTok->is(TT_FatArrow)) {
1725         tryToParseChildBlock();
1726         break;
1727       }
1728 
1729       nextToken();
1730       if (FormatTok->Tok.is(tok::l_brace)) {
1731         // Block kind should probably be set to BK_BracedInit for any language.
1732         // C# needs this change to ensure that array initialisers and object
1733         // initialisers are indented the same way.
1734         if (Style.isCSharp())
1735           FormatTok->setBlockKind(BK_BracedInit);
1736         nextToken();
1737         parseBracedList();
1738       } else if (Style.Language == FormatStyle::LK_Proto &&
1739                  FormatTok->Tok.is(tok::less)) {
1740         nextToken();
1741         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1742                         /*ClosingBraceKind=*/tok::greater);
1743       }
1744       break;
1745     case tok::l_square:
1746       parseSquare();
1747       break;
1748     case tok::kw_new:
1749       parseNew();
1750       break;
1751     default:
1752       nextToken();
1753       break;
1754     }
1755   } while (!eof());
1756 }
1757 
1758 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1759   assert(FormatTok->is(tok::l_brace));
1760   if (!Style.isCSharp())
1761     return false;
1762   // See if it's a property accessor.
1763   if (FormatTok->Previous->isNot(tok::identifier))
1764     return false;
1765 
1766   // See if we are inside a property accessor.
1767   //
1768   // Record the current tokenPosition so that we can advance and
1769   // reset the current token. `Next` is not set yet so we need
1770   // another way to advance along the token stream.
1771   unsigned int StoredPosition = Tokens->getPosition();
1772   FormatToken *Tok = Tokens->getNextToken();
1773 
1774   // A trivial property accessor is of the form:
1775   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1776   // Track these as they do not require line breaks to be introduced.
1777   bool HasGetOrSet = false;
1778   bool IsTrivialPropertyAccessor = true;
1779   while (!eof()) {
1780     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1781                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1782                      Keywords.kw_set)) {
1783       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1784         HasGetOrSet = true;
1785       Tok = Tokens->getNextToken();
1786       continue;
1787     }
1788     if (Tok->isNot(tok::r_brace))
1789       IsTrivialPropertyAccessor = false;
1790     break;
1791   }
1792 
1793   if (!HasGetOrSet) {
1794     Tokens->setPosition(StoredPosition);
1795     return false;
1796   }
1797 
1798   // Try to parse the property accessor:
1799   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1800   Tokens->setPosition(StoredPosition);
1801   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1802     addUnwrappedLine();
1803   nextToken();
1804   do {
1805     switch (FormatTok->Tok.getKind()) {
1806     case tok::r_brace:
1807       nextToken();
1808       if (FormatTok->is(tok::equal)) {
1809         while (!eof() && FormatTok->isNot(tok::semi))
1810           nextToken();
1811         nextToken();
1812       }
1813       addUnwrappedLine();
1814       return true;
1815     case tok::l_brace:
1816       ++Line->Level;
1817       parseBlock(/*MustBeDeclaration=*/true);
1818       addUnwrappedLine();
1819       --Line->Level;
1820       break;
1821     case tok::equal:
1822       if (FormatTok->is(TT_FatArrow)) {
1823         ++Line->Level;
1824         do {
1825           nextToken();
1826         } while (!eof() && FormatTok->isNot(tok::semi));
1827         nextToken();
1828         addUnwrappedLine();
1829         --Line->Level;
1830         break;
1831       }
1832       nextToken();
1833       break;
1834     default:
1835       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1836           !IsTrivialPropertyAccessor) {
1837         // Non-trivial get/set needs to be on its own line.
1838         addUnwrappedLine();
1839       }
1840       nextToken();
1841     }
1842   } while (!eof());
1843 
1844   // Unreachable for well-formed code (paired '{' and '}').
1845   return true;
1846 }
1847 
1848 bool UnwrappedLineParser::tryToParseLambda() {
1849   if (!Style.isCpp()) {
1850     nextToken();
1851     return false;
1852   }
1853   assert(FormatTok->is(tok::l_square));
1854   FormatToken &LSquare = *FormatTok;
1855   if (!tryToParseLambdaIntroducer())
1856     return false;
1857 
1858   bool SeenArrow = false;
1859 
1860   while (FormatTok->isNot(tok::l_brace)) {
1861     if (FormatTok->isSimpleTypeSpecifier()) {
1862       nextToken();
1863       continue;
1864     }
1865     switch (FormatTok->Tok.getKind()) {
1866     case tok::l_brace:
1867       break;
1868     case tok::l_paren:
1869       parseParens();
1870       break;
1871     case tok::l_square:
1872       parseSquare();
1873       break;
1874     case tok::amp:
1875     case tok::star:
1876     case tok::kw_const:
1877     case tok::comma:
1878     case tok::less:
1879     case tok::greater:
1880     case tok::identifier:
1881     case tok::numeric_constant:
1882     case tok::coloncolon:
1883     case tok::kw_class:
1884     case tok::kw_mutable:
1885     case tok::kw_noexcept:
1886     case tok::kw_template:
1887     case tok::kw_typename:
1888       nextToken();
1889       break;
1890     // Specialization of a template with an integer parameter can contain
1891     // arithmetic, logical, comparison and ternary operators.
1892     //
1893     // FIXME: This also accepts sequences of operators that are not in the scope
1894     // of a template argument list.
1895     //
1896     // In a C++ lambda a template type can only occur after an arrow. We use
1897     // this as an heuristic to distinguish between Objective-C expressions
1898     // followed by an `a->b` expression, such as:
1899     // ([obj func:arg] + a->b)
1900     // Otherwise the code below would parse as a lambda.
1901     //
1902     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1903     // explicit template lists: []<bool b = true && false>(U &&u){}
1904     case tok::plus:
1905     case tok::minus:
1906     case tok::exclaim:
1907     case tok::tilde:
1908     case tok::slash:
1909     case tok::percent:
1910     case tok::lessless:
1911     case tok::pipe:
1912     case tok::pipepipe:
1913     case tok::ampamp:
1914     case tok::caret:
1915     case tok::equalequal:
1916     case tok::exclaimequal:
1917     case tok::greaterequal:
1918     case tok::lessequal:
1919     case tok::question:
1920     case tok::colon:
1921     case tok::ellipsis:
1922     case tok::kw_true:
1923     case tok::kw_false:
1924       if (SeenArrow) {
1925         nextToken();
1926         break;
1927       }
1928       return true;
1929     case tok::arrow:
1930       // This might or might not actually be a lambda arrow (this could be an
1931       // ObjC method invocation followed by a dereferencing arrow). We might
1932       // reset this back to TT_Unknown in TokenAnnotator.
1933       FormatTok->setType(TT_LambdaArrow);
1934       SeenArrow = true;
1935       nextToken();
1936       break;
1937     default:
1938       return true;
1939     }
1940   }
1941   FormatTok->setType(TT_LambdaLBrace);
1942   LSquare.setType(TT_LambdaLSquare);
1943   parseChildBlock();
1944   return true;
1945 }
1946 
1947 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1948   const FormatToken *Previous = FormatTok->Previous;
1949   if (Previous &&
1950       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1951                          tok::kw_delete, tok::l_square) ||
1952        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1953        Previous->isSimpleTypeSpecifier())) {
1954     nextToken();
1955     return false;
1956   }
1957   nextToken();
1958   if (FormatTok->is(tok::l_square)) {
1959     return false;
1960   }
1961   parseSquare(/*LambdaIntroducer=*/true);
1962   return true;
1963 }
1964 
1965 void UnwrappedLineParser::tryToParseJSFunction() {
1966   assert(FormatTok->is(Keywords.kw_function) ||
1967          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1968   if (FormatTok->is(Keywords.kw_async))
1969     nextToken();
1970   // Consume "function".
1971   nextToken();
1972 
1973   // Consume * (generator function). Treat it like C++'s overloaded operators.
1974   if (FormatTok->is(tok::star)) {
1975     FormatTok->setType(TT_OverloadedOperator);
1976     nextToken();
1977   }
1978 
1979   // Consume function name.
1980   if (FormatTok->is(tok::identifier))
1981     nextToken();
1982 
1983   if (FormatTok->isNot(tok::l_paren))
1984     return;
1985 
1986   // Parse formal parameter list.
1987   parseParens();
1988 
1989   if (FormatTok->is(tok::colon)) {
1990     // Parse a type definition.
1991     nextToken();
1992 
1993     // Eat the type declaration. For braced inline object types, balance braces,
1994     // otherwise just parse until finding an l_brace for the function body.
1995     if (FormatTok->is(tok::l_brace))
1996       tryToParseBracedList();
1997     else
1998       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1999         nextToken();
2000   }
2001 
2002   if (FormatTok->is(tok::semi))
2003     return;
2004 
2005   parseChildBlock();
2006 }
2007 
2008 bool UnwrappedLineParser::tryToParseBracedList() {
2009   if (FormatTok->is(BK_Unknown))
2010     calculateBraceTypes();
2011   assert(FormatTok->isNot(BK_Unknown));
2012   if (FormatTok->is(BK_Block))
2013     return false;
2014   nextToken();
2015   parseBracedList();
2016   return true;
2017 }
2018 
2019 bool UnwrappedLineParser::tryToParseChildBlock() {
2020   assert(Style.isJavaScript() || Style.isCSharp());
2021   assert(FormatTok->is(TT_FatArrow));
2022   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2023   // They always start an expression or a child block if followed by a curly
2024   // brace.
2025   nextToken();
2026   if (FormatTok->isNot(tok::l_brace))
2027     return false;
2028   parseChildBlock();
2029   return true;
2030 }
2031 
2032 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2033                                           bool IsEnum,
2034                                           tok::TokenKind ClosingBraceKind) {
2035   bool HasError = false;
2036 
2037   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2038   // replace this by using parseAssignmentExpression() inside.
2039   do {
2040     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2041         tryToParseChildBlock())
2042       continue;
2043     if (Style.isJavaScript()) {
2044       if (FormatTok->is(Keywords.kw_function) ||
2045           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2046         tryToParseJSFunction();
2047         continue;
2048       }
2049       if (FormatTok->is(tok::l_brace)) {
2050         // Could be a method inside of a braced list `{a() { return 1; }}`.
2051         if (tryToParseBracedList())
2052           continue;
2053         parseChildBlock();
2054       }
2055     }
2056     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2057       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2058         addUnwrappedLine();
2059       nextToken();
2060       return !HasError;
2061     }
2062     switch (FormatTok->Tok.getKind()) {
2063     case tok::l_square:
2064       if (Style.isCSharp())
2065         parseSquare();
2066       else
2067         tryToParseLambda();
2068       break;
2069     case tok::l_paren:
2070       parseParens();
2071       // JavaScript can just have free standing methods and getters/setters in
2072       // object literals. Detect them by a "{" following ")".
2073       if (Style.isJavaScript()) {
2074         if (FormatTok->is(tok::l_brace))
2075           parseChildBlock();
2076         break;
2077       }
2078       break;
2079     case tok::l_brace:
2080       // Assume there are no blocks inside a braced init list apart
2081       // from the ones we explicitly parse out (like lambdas).
2082       FormatTok->setBlockKind(BK_BracedInit);
2083       nextToken();
2084       parseBracedList();
2085       break;
2086     case tok::less:
2087       if (Style.Language == FormatStyle::LK_Proto) {
2088         nextToken();
2089         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2090                         /*ClosingBraceKind=*/tok::greater);
2091       } else {
2092         nextToken();
2093       }
2094       break;
2095     case tok::semi:
2096       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2097       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2098       // used for error recovery if we have otherwise determined that this is
2099       // a braced list.
2100       if (Style.isJavaScript()) {
2101         nextToken();
2102         break;
2103       }
2104       HasError = true;
2105       if (!ContinueOnSemicolons)
2106         return !HasError;
2107       nextToken();
2108       break;
2109     case tok::comma:
2110       nextToken();
2111       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2112         addUnwrappedLine();
2113       break;
2114     default:
2115       nextToken();
2116       break;
2117     }
2118   } while (!eof());
2119   return false;
2120 }
2121 
2122 void UnwrappedLineParser::parseParens() {
2123   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2124   nextToken();
2125   do {
2126     switch (FormatTok->Tok.getKind()) {
2127     case tok::l_paren:
2128       parseParens();
2129       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2130         parseChildBlock();
2131       break;
2132     case tok::r_paren:
2133       nextToken();
2134       return;
2135     case tok::r_brace:
2136       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2137       return;
2138     case tok::l_square:
2139       tryToParseLambda();
2140       break;
2141     case tok::l_brace:
2142       if (!tryToParseBracedList())
2143         parseChildBlock();
2144       break;
2145     case tok::at:
2146       nextToken();
2147       if (FormatTok->Tok.is(tok::l_brace)) {
2148         nextToken();
2149         parseBracedList();
2150       }
2151       break;
2152     case tok::equal:
2153       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2154         tryToParseChildBlock();
2155       else
2156         nextToken();
2157       break;
2158     case tok::kw_class:
2159       if (Style.isJavaScript())
2160         parseRecord(/*ParseAsExpr=*/true);
2161       else
2162         nextToken();
2163       break;
2164     case tok::identifier:
2165       if (Style.isJavaScript() &&
2166           (FormatTok->is(Keywords.kw_function) ||
2167            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2168         tryToParseJSFunction();
2169       else
2170         nextToken();
2171       break;
2172     default:
2173       nextToken();
2174       break;
2175     }
2176   } while (!eof());
2177 }
2178 
2179 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2180   if (!LambdaIntroducer) {
2181     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2182     if (tryToParseLambda())
2183       return;
2184   }
2185   do {
2186     switch (FormatTok->Tok.getKind()) {
2187     case tok::l_paren:
2188       parseParens();
2189       break;
2190     case tok::r_square:
2191       nextToken();
2192       return;
2193     case tok::r_brace:
2194       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2195       return;
2196     case tok::l_square:
2197       parseSquare();
2198       break;
2199     case tok::l_brace: {
2200       if (!tryToParseBracedList())
2201         parseChildBlock();
2202       break;
2203     }
2204     case tok::at:
2205       nextToken();
2206       if (FormatTok->Tok.is(tok::l_brace)) {
2207         nextToken();
2208         parseBracedList();
2209       }
2210       break;
2211     default:
2212       nextToken();
2213       break;
2214     }
2215   } while (!eof());
2216 }
2217 
2218 void UnwrappedLineParser::keepAncestorBraces() {
2219   if (!Style.RemoveBracesLLVM)
2220     return;
2221 
2222   const int MaxNestingLevels = 2;
2223   const int Size = NestedTooDeep.size();
2224   if (Size >= MaxNestingLevels)
2225     NestedTooDeep[Size - MaxNestingLevels] = true;
2226   NestedTooDeep.push_back(false);
2227 }
2228 
2229 static void markOptionalBraces(FormatToken *LeftBrace) {
2230   if (!LeftBrace)
2231     return;
2232 
2233   assert(LeftBrace->is(tok::l_brace));
2234 
2235   FormatToken *RightBrace = LeftBrace->MatchingParen;
2236   if (!RightBrace) {
2237     assert(!LeftBrace->Optional);
2238     return;
2239   }
2240 
2241   assert(RightBrace->is(tok::r_brace));
2242   assert(RightBrace->MatchingParen == LeftBrace);
2243   assert(LeftBrace->Optional == RightBrace->Optional);
2244 
2245   LeftBrace->Optional = true;
2246   RightBrace->Optional = true;
2247 }
2248 
2249 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2250                                                   bool KeepBraces) {
2251   auto HandleAttributes = [this]() {
2252     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2253     if (FormatTok->is(TT_AttributeMacro))
2254       nextToken();
2255     // Handle [[likely]] / [[unlikely]] attributes.
2256     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2257       parseSquare();
2258   };
2259 
2260   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2261   nextToken();
2262   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2263     nextToken();
2264   if (FormatTok->Tok.is(tok::l_paren))
2265     parseParens();
2266   HandleAttributes();
2267 
2268   bool NeedsUnwrappedLine = false;
2269   keepAncestorBraces();
2270 
2271   FormatToken *IfLeftBrace = nullptr;
2272   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2273 
2274   if (FormatTok->Tok.is(tok::l_brace)) {
2275     IfLeftBrace = FormatTok;
2276     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2277     IfBlockKind = parseBlock();
2278     if (Style.BraceWrapping.BeforeElse)
2279       addUnwrappedLine();
2280     else
2281       NeedsUnwrappedLine = true;
2282   } else {
2283     addUnwrappedLine();
2284     ++Line->Level;
2285     parseStructuralElement();
2286     --Line->Level;
2287   }
2288 
2289   bool KeepIfBraces = false;
2290   if (Style.RemoveBracesLLVM) {
2291     assert(!NestedTooDeep.empty());
2292     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2293                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2294                    IfBlockKind == IfStmtKind::IfElseIf;
2295   }
2296 
2297   FormatToken *ElseLeftBrace = nullptr;
2298   IfStmtKind Kind = IfStmtKind::IfOnly;
2299 
2300   if (FormatTok->Tok.is(tok::kw_else)) {
2301     if (Style.RemoveBracesLLVM) {
2302       NestedTooDeep.back() = false;
2303       Kind = IfStmtKind::IfElse;
2304     }
2305     nextToken();
2306     HandleAttributes();
2307     if (FormatTok->Tok.is(tok::l_brace)) {
2308       ElseLeftBrace = FormatTok;
2309       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2310       if (parseBlock() == IfStmtKind::IfOnly)
2311         Kind = IfStmtKind::IfElseIf;
2312       addUnwrappedLine();
2313     } else if (FormatTok->Tok.is(tok::kw_if)) {
2314       FormatToken *Previous = Tokens->getPreviousToken();
2315       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2316       if (IsPrecededByComment) {
2317         addUnwrappedLine();
2318         ++Line->Level;
2319       }
2320       bool TooDeep = true;
2321       if (Style.RemoveBracesLLVM) {
2322         Kind = IfStmtKind::IfElseIf;
2323         TooDeep = NestedTooDeep.pop_back_val();
2324       }
2325       ElseLeftBrace =
2326           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2327       if (Style.RemoveBracesLLVM)
2328         NestedTooDeep.push_back(TooDeep);
2329       if (IsPrecededByComment)
2330         --Line->Level;
2331     } else {
2332       addUnwrappedLine();
2333       ++Line->Level;
2334       parseStructuralElement();
2335       if (FormatTok->is(tok::eof))
2336         addUnwrappedLine();
2337       --Line->Level;
2338     }
2339   } else {
2340     if (Style.RemoveBracesLLVM)
2341       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2342     if (NeedsUnwrappedLine)
2343       addUnwrappedLine();
2344   }
2345 
2346   if (!Style.RemoveBracesLLVM)
2347     return nullptr;
2348 
2349   assert(!NestedTooDeep.empty());
2350   const bool KeepElseBraces =
2351       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2352 
2353   NestedTooDeep.pop_back();
2354 
2355   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2356     markOptionalBraces(IfLeftBrace);
2357     markOptionalBraces(ElseLeftBrace);
2358   } else if (IfLeftBrace) {
2359     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2360     if (IfRightBrace) {
2361       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2362       assert(!IfLeftBrace->Optional);
2363       assert(!IfRightBrace->Optional);
2364       IfLeftBrace->MatchingParen = nullptr;
2365       IfRightBrace->MatchingParen = nullptr;
2366     }
2367   }
2368 
2369   if (IfKind)
2370     *IfKind = Kind;
2371 
2372   return IfLeftBrace;
2373 }
2374 
2375 void UnwrappedLineParser::parseTryCatch() {
2376   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2377   nextToken();
2378   bool NeedsUnwrappedLine = false;
2379   if (FormatTok->is(tok::colon)) {
2380     // We are in a function try block, what comes is an initializer list.
2381     nextToken();
2382 
2383     // In case identifiers were removed by clang-tidy, what might follow is
2384     // multiple commas in sequence - before the first identifier.
2385     while (FormatTok->is(tok::comma))
2386       nextToken();
2387 
2388     while (FormatTok->is(tok::identifier)) {
2389       nextToken();
2390       if (FormatTok->is(tok::l_paren))
2391         parseParens();
2392       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2393           FormatTok->is(tok::l_brace)) {
2394         do {
2395           nextToken();
2396         } while (!FormatTok->is(tok::r_brace));
2397         nextToken();
2398       }
2399 
2400       // In case identifiers were removed by clang-tidy, what might follow is
2401       // multiple commas in sequence - after the first identifier.
2402       while (FormatTok->is(tok::comma))
2403         nextToken();
2404     }
2405   }
2406   // Parse try with resource.
2407   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2408     parseParens();
2409   }
2410 
2411   keepAncestorBraces();
2412 
2413   if (FormatTok->is(tok::l_brace)) {
2414     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2415     parseBlock();
2416     if (Style.BraceWrapping.BeforeCatch) {
2417       addUnwrappedLine();
2418     } else {
2419       NeedsUnwrappedLine = true;
2420     }
2421   } else if (!FormatTok->is(tok::kw_catch)) {
2422     // The C++ standard requires a compound-statement after a try.
2423     // If there's none, we try to assume there's a structuralElement
2424     // and try to continue.
2425     addUnwrappedLine();
2426     ++Line->Level;
2427     parseStructuralElement();
2428     --Line->Level;
2429   }
2430   while (true) {
2431     if (FormatTok->is(tok::at))
2432       nextToken();
2433     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2434                              tok::kw___finally) ||
2435           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2436            FormatTok->is(Keywords.kw_finally)) ||
2437           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2438            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2439       break;
2440     nextToken();
2441     while (FormatTok->isNot(tok::l_brace)) {
2442       if (FormatTok->is(tok::l_paren)) {
2443         parseParens();
2444         continue;
2445       }
2446       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2447         if (Style.RemoveBracesLLVM)
2448           NestedTooDeep.pop_back();
2449         return;
2450       }
2451       nextToken();
2452     }
2453     NeedsUnwrappedLine = false;
2454     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2455     parseBlock();
2456     if (Style.BraceWrapping.BeforeCatch)
2457       addUnwrappedLine();
2458     else
2459       NeedsUnwrappedLine = true;
2460   }
2461 
2462   if (Style.RemoveBracesLLVM)
2463     NestedTooDeep.pop_back();
2464 
2465   if (NeedsUnwrappedLine)
2466     addUnwrappedLine();
2467 }
2468 
2469 void UnwrappedLineParser::parseNamespace() {
2470   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2471          "'namespace' expected");
2472 
2473   const FormatToken &InitialToken = *FormatTok;
2474   nextToken();
2475   if (InitialToken.is(TT_NamespaceMacro)) {
2476     parseParens();
2477   } else {
2478     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2479                               tok::l_square, tok::period)) {
2480       if (FormatTok->is(tok::l_square))
2481         parseSquare();
2482       else
2483         nextToken();
2484     }
2485   }
2486   if (FormatTok->Tok.is(tok::l_brace)) {
2487     if (ShouldBreakBeforeBrace(Style, InitialToken))
2488       addUnwrappedLine();
2489 
2490     unsigned AddLevels =
2491         Style.NamespaceIndentation == FormatStyle::NI_All ||
2492                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2493                  DeclarationScopeStack.size() > 1)
2494             ? 1u
2495             : 0u;
2496     bool ManageWhitesmithsBraces =
2497         AddLevels == 0u &&
2498         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2499 
2500     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2501     // the whole block.
2502     if (ManageWhitesmithsBraces)
2503       ++Line->Level;
2504 
2505     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2506                /*MunchSemi=*/true,
2507                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2508 
2509     // Munch the semicolon after a namespace. This is more common than one would
2510     // think. Putting the semicolon into its own line is very ugly.
2511     if (FormatTok->Tok.is(tok::semi))
2512       nextToken();
2513 
2514     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2515 
2516     if (ManageWhitesmithsBraces)
2517       --Line->Level;
2518   }
2519   // FIXME: Add error handling.
2520 }
2521 
2522 void UnwrappedLineParser::parseNew() {
2523   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2524   nextToken();
2525 
2526   if (Style.isCSharp()) {
2527     do {
2528       if (FormatTok->is(tok::l_brace))
2529         parseBracedList();
2530 
2531       if (FormatTok->isOneOf(tok::semi, tok::comma))
2532         return;
2533 
2534       nextToken();
2535     } while (!eof());
2536   }
2537 
2538   if (Style.Language != FormatStyle::LK_Java)
2539     return;
2540 
2541   // In Java, we can parse everything up to the parens, which aren't optional.
2542   do {
2543     // There should not be a ;, { or } before the new's open paren.
2544     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2545       return;
2546 
2547     // Consume the parens.
2548     if (FormatTok->is(tok::l_paren)) {
2549       parseParens();
2550 
2551       // If there is a class body of an anonymous class, consume that as child.
2552       if (FormatTok->is(tok::l_brace))
2553         parseChildBlock();
2554       return;
2555     }
2556     nextToken();
2557   } while (!eof());
2558 }
2559 
2560 void UnwrappedLineParser::parseForOrWhileLoop() {
2561   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2562          "'for', 'while' or foreach macro expected");
2563   nextToken();
2564   // JS' for await ( ...
2565   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2566     nextToken();
2567   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2568     nextToken();
2569   if (FormatTok->Tok.is(tok::l_paren))
2570     parseParens();
2571 
2572   keepAncestorBraces();
2573 
2574   if (FormatTok->Tok.is(tok::l_brace)) {
2575     FormatToken *LeftBrace = FormatTok;
2576     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2577     parseBlock();
2578     if (Style.RemoveBracesLLVM) {
2579       assert(!NestedTooDeep.empty());
2580       if (!NestedTooDeep.back())
2581         markOptionalBraces(LeftBrace);
2582     }
2583     addUnwrappedLine();
2584   } else {
2585     addUnwrappedLine();
2586     ++Line->Level;
2587     parseStructuralElement();
2588     --Line->Level;
2589   }
2590 
2591   if (Style.RemoveBracesLLVM)
2592     NestedTooDeep.pop_back();
2593 }
2594 
2595 void UnwrappedLineParser::parseDoWhile() {
2596   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2597   nextToken();
2598 
2599   keepAncestorBraces();
2600 
2601   if (FormatTok->Tok.is(tok::l_brace)) {
2602     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2603     parseBlock();
2604     if (Style.BraceWrapping.BeforeWhile)
2605       addUnwrappedLine();
2606   } else {
2607     addUnwrappedLine();
2608     ++Line->Level;
2609     parseStructuralElement();
2610     --Line->Level;
2611   }
2612 
2613   if (Style.RemoveBracesLLVM)
2614     NestedTooDeep.pop_back();
2615 
2616   // FIXME: Add error handling.
2617   if (!FormatTok->Tok.is(tok::kw_while)) {
2618     addUnwrappedLine();
2619     return;
2620   }
2621 
2622   // If in Whitesmiths mode, the line with the while() needs to be indented
2623   // to the same level as the block.
2624   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2625     ++Line->Level;
2626 
2627   nextToken();
2628   parseStructuralElement();
2629 }
2630 
2631 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2632   nextToken();
2633   unsigned OldLineLevel = Line->Level;
2634   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2635     --Line->Level;
2636   if (LeftAlignLabel)
2637     Line->Level = 0;
2638 
2639   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2640       FormatTok->Tok.is(tok::l_brace)) {
2641 
2642     CompoundStatementIndenter Indenter(this, Line->Level,
2643                                        Style.BraceWrapping.AfterCaseLabel,
2644                                        Style.BraceWrapping.IndentBraces);
2645     parseBlock();
2646     if (FormatTok->Tok.is(tok::kw_break)) {
2647       if (Style.BraceWrapping.AfterControlStatement ==
2648           FormatStyle::BWACS_Always) {
2649         addUnwrappedLine();
2650         if (!Style.IndentCaseBlocks &&
2651             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2652           ++Line->Level;
2653         }
2654       }
2655       parseStructuralElement();
2656     }
2657     addUnwrappedLine();
2658   } else {
2659     if (FormatTok->is(tok::semi))
2660       nextToken();
2661     addUnwrappedLine();
2662   }
2663   Line->Level = OldLineLevel;
2664   if (FormatTok->isNot(tok::l_brace)) {
2665     parseStructuralElement();
2666     addUnwrappedLine();
2667   }
2668 }
2669 
2670 void UnwrappedLineParser::parseCaseLabel() {
2671   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2672 
2673   // FIXME: fix handling of complex expressions here.
2674   do {
2675     nextToken();
2676   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2677   parseLabel();
2678 }
2679 
2680 void UnwrappedLineParser::parseSwitch() {
2681   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2682   nextToken();
2683   if (FormatTok->Tok.is(tok::l_paren))
2684     parseParens();
2685 
2686   keepAncestorBraces();
2687 
2688   if (FormatTok->Tok.is(tok::l_brace)) {
2689     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2690     parseBlock();
2691     addUnwrappedLine();
2692   } else {
2693     addUnwrappedLine();
2694     ++Line->Level;
2695     parseStructuralElement();
2696     --Line->Level;
2697   }
2698 
2699   if (Style.RemoveBracesLLVM)
2700     NestedTooDeep.pop_back();
2701 }
2702 
2703 void UnwrappedLineParser::parseAccessSpecifier() {
2704   nextToken();
2705   // Understand Qt's slots.
2706   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2707     nextToken();
2708   // Otherwise, we don't know what it is, and we'd better keep the next token.
2709   if (FormatTok->Tok.is(tok::colon))
2710     nextToken();
2711   addUnwrappedLine();
2712 }
2713 
2714 void UnwrappedLineParser::parseConcept() {
2715   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2716   nextToken();
2717   if (!FormatTok->Tok.is(tok::identifier))
2718     return;
2719   nextToken();
2720   if (!FormatTok->Tok.is(tok::equal))
2721     return;
2722   nextToken();
2723   if (FormatTok->Tok.is(tok::kw_requires)) {
2724     nextToken();
2725     parseRequiresExpression(Line->Level);
2726   } else {
2727     parseConstraintExpression(Line->Level);
2728   }
2729 }
2730 
2731 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2732   // requires (R range)
2733   if (FormatTok->Tok.is(tok::l_paren)) {
2734     parseParens();
2735     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2736       addUnwrappedLine();
2737       --Line->Level;
2738     }
2739   }
2740 
2741   if (FormatTok->Tok.is(tok::l_brace)) {
2742     if (Style.BraceWrapping.AfterFunction)
2743       addUnwrappedLine();
2744     FormatTok->setType(TT_FunctionLBrace);
2745     parseBlock();
2746     addUnwrappedLine();
2747   } else {
2748     parseConstraintExpression(OriginalLevel);
2749   }
2750 }
2751 
2752 void UnwrappedLineParser::parseConstraintExpression(
2753     unsigned int OriginalLevel) {
2754   // requires Id<T> && Id<T> || Id<T>
2755   while (
2756       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2757     nextToken();
2758     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2759                               tok::greater, tok::comma, tok::ellipsis)) {
2760       if (FormatTok->Tok.is(tok::less)) {
2761         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2762                         /*ClosingBraceKind=*/tok::greater);
2763         continue;
2764       }
2765       nextToken();
2766     }
2767     if (FormatTok->Tok.is(tok::kw_requires)) {
2768       parseRequiresExpression(OriginalLevel);
2769     }
2770     if (FormatTok->Tok.is(tok::less)) {
2771       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2772                       /*ClosingBraceKind=*/tok::greater);
2773     }
2774 
2775     if (FormatTok->Tok.is(tok::l_paren)) {
2776       parseParens();
2777     }
2778     if (FormatTok->Tok.is(tok::l_brace)) {
2779       if (Style.BraceWrapping.AfterFunction)
2780         addUnwrappedLine();
2781       FormatTok->setType(TT_FunctionLBrace);
2782       parseBlock();
2783     }
2784     if (FormatTok->Tok.is(tok::semi)) {
2785       // Eat any trailing semi.
2786       nextToken();
2787       addUnwrappedLine();
2788     }
2789     if (FormatTok->Tok.is(tok::colon)) {
2790       return;
2791     }
2792     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2793       if (FormatTok->Previous &&
2794           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2795                                         tok::coloncolon)) {
2796         addUnwrappedLine();
2797       }
2798       if (Style.IndentRequires && OriginalLevel != Line->Level) {
2799         --Line->Level;
2800       }
2801       break;
2802     } else {
2803       FormatTok->setType(TT_ConstraintJunctions);
2804     }
2805 
2806     nextToken();
2807   }
2808 }
2809 
2810 void UnwrappedLineParser::parseRequires() {
2811   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2812 
2813   unsigned OriginalLevel = Line->Level;
2814   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2815     addUnwrappedLine();
2816     if (Style.IndentRequires) {
2817       ++Line->Level;
2818     }
2819   }
2820   nextToken();
2821 
2822   parseRequiresExpression(OriginalLevel);
2823 }
2824 
2825 bool UnwrappedLineParser::parseEnum() {
2826   const FormatToken &InitialToken = *FormatTok;
2827 
2828   // Won't be 'enum' for NS_ENUMs.
2829   if (FormatTok->Tok.is(tok::kw_enum))
2830     nextToken();
2831 
2832   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2833   // declarations. An "enum" keyword followed by a colon would be a syntax
2834   // error and thus assume it is just an identifier.
2835   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
2836     return false;
2837 
2838   // In protobuf, "enum" can be used as a field name.
2839   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2840     return false;
2841 
2842   // Eat up enum class ...
2843   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2844     nextToken();
2845 
2846   while (FormatTok->Tok.getIdentifierInfo() ||
2847          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2848                             tok::greater, tok::comma, tok::question)) {
2849     nextToken();
2850     // We can have macros or attributes in between 'enum' and the enum name.
2851     if (FormatTok->is(tok::l_paren))
2852       parseParens();
2853     if (FormatTok->is(tok::identifier)) {
2854       nextToken();
2855       // If there are two identifiers in a row, this is likely an elaborate
2856       // return type. In Java, this can be "implements", etc.
2857       if (Style.isCpp() && FormatTok->is(tok::identifier))
2858         return false;
2859     }
2860   }
2861 
2862   // Just a declaration or something is wrong.
2863   if (FormatTok->isNot(tok::l_brace))
2864     return true;
2865   FormatTok->setBlockKind(BK_Block);
2866 
2867   if (Style.Language == FormatStyle::LK_Java) {
2868     // Java enums are different.
2869     parseJavaEnumBody();
2870     return true;
2871   }
2872   if (Style.Language == FormatStyle::LK_Proto) {
2873     parseBlock(/*MustBeDeclaration=*/true);
2874     return true;
2875   }
2876 
2877   if (!Style.AllowShortEnumsOnASingleLine &&
2878       ShouldBreakBeforeBrace(Style, InitialToken))
2879     addUnwrappedLine();
2880   // Parse enum body.
2881   nextToken();
2882   if (!Style.AllowShortEnumsOnASingleLine) {
2883     addUnwrappedLine();
2884     Line->Level += 1;
2885   }
2886   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2887                                    /*IsEnum=*/true);
2888   if (!Style.AllowShortEnumsOnASingleLine)
2889     Line->Level -= 1;
2890   if (HasError) {
2891     if (FormatTok->is(tok::semi))
2892       nextToken();
2893     addUnwrappedLine();
2894   }
2895   return true;
2896 
2897   // There is no addUnwrappedLine() here so that we fall through to parsing a
2898   // structural element afterwards. Thus, in "enum A {} n, m;",
2899   // "} n, m;" will end up in one unwrapped line.
2900 }
2901 
2902 bool UnwrappedLineParser::parseStructLike() {
2903   // parseRecord falls through and does not yet add an unwrapped line as a
2904   // record declaration or definition can start a structural element.
2905   parseRecord();
2906   // This does not apply to Java, JavaScript and C#.
2907   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
2908       Style.isCSharp()) {
2909     if (FormatTok->is(tok::semi))
2910       nextToken();
2911     addUnwrappedLine();
2912     return true;
2913   }
2914   return false;
2915 }
2916 
2917 namespace {
2918 // A class used to set and restore the Token position when peeking
2919 // ahead in the token source.
2920 class ScopedTokenPosition {
2921   unsigned StoredPosition;
2922   FormatTokenSource *Tokens;
2923 
2924 public:
2925   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2926     assert(Tokens && "Tokens expected to not be null");
2927     StoredPosition = Tokens->getPosition();
2928   }
2929 
2930   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2931 };
2932 } // namespace
2933 
2934 // Look to see if we have [[ by looking ahead, if
2935 // its not then rewind to the original position.
2936 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2937   ScopedTokenPosition AutoPosition(Tokens);
2938   FormatToken *Tok = Tokens->getNextToken();
2939   // We already read the first [ check for the second.
2940   if (!Tok->is(tok::l_square)) {
2941     return false;
2942   }
2943   // Double check that the attribute is just something
2944   // fairly simple.
2945   while (Tok->isNot(tok::eof)) {
2946     if (Tok->is(tok::r_square)) {
2947       break;
2948     }
2949     Tok = Tokens->getNextToken();
2950   }
2951   if (Tok->is(tok::eof))
2952     return false;
2953   Tok = Tokens->getNextToken();
2954   if (!Tok->is(tok::r_square)) {
2955     return false;
2956   }
2957   Tok = Tokens->getNextToken();
2958   if (Tok->is(tok::semi)) {
2959     return false;
2960   }
2961   return true;
2962 }
2963 
2964 void UnwrappedLineParser::parseJavaEnumBody() {
2965   // Determine whether the enum is simple, i.e. does not have a semicolon or
2966   // constants with class bodies. Simple enums can be formatted like braced
2967   // lists, contracted to a single line, etc.
2968   unsigned StoredPosition = Tokens->getPosition();
2969   bool IsSimple = true;
2970   FormatToken *Tok = Tokens->getNextToken();
2971   while (!Tok->is(tok::eof)) {
2972     if (Tok->is(tok::r_brace))
2973       break;
2974     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2975       IsSimple = false;
2976       break;
2977     }
2978     // FIXME: This will also mark enums with braces in the arguments to enum
2979     // constants as "not simple". This is probably fine in practice, though.
2980     Tok = Tokens->getNextToken();
2981   }
2982   FormatTok = Tokens->setPosition(StoredPosition);
2983 
2984   if (IsSimple) {
2985     nextToken();
2986     parseBracedList();
2987     addUnwrappedLine();
2988     return;
2989   }
2990 
2991   // Parse the body of a more complex enum.
2992   // First add a line for everything up to the "{".
2993   nextToken();
2994   addUnwrappedLine();
2995   ++Line->Level;
2996 
2997   // Parse the enum constants.
2998   while (FormatTok) {
2999     if (FormatTok->is(tok::l_brace)) {
3000       // Parse the constant's class body.
3001       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3002                  /*MunchSemi=*/false);
3003     } else if (FormatTok->is(tok::l_paren)) {
3004       parseParens();
3005     } else if (FormatTok->is(tok::comma)) {
3006       nextToken();
3007       addUnwrappedLine();
3008     } else if (FormatTok->is(tok::semi)) {
3009       nextToken();
3010       addUnwrappedLine();
3011       break;
3012     } else if (FormatTok->is(tok::r_brace)) {
3013       addUnwrappedLine();
3014       break;
3015     } else {
3016       nextToken();
3017     }
3018   }
3019 
3020   // Parse the class body after the enum's ";" if any.
3021   parseLevel(/*HasOpeningBrace=*/true);
3022   nextToken();
3023   --Line->Level;
3024   addUnwrappedLine();
3025 }
3026 
3027 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3028   const FormatToken &InitialToken = *FormatTok;
3029   nextToken();
3030 
3031   // The actual identifier can be a nested name specifier, and in macros
3032   // it is often token-pasted.
3033   // An [[attribute]] can be before the identifier.
3034   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3035                             tok::kw___attribute, tok::kw___declspec,
3036                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3037          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3038           FormatTok->isOneOf(tok::period, tok::comma))) {
3039     if (Style.isJavaScript() &&
3040         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3041       // JavaScript/TypeScript supports inline object types in
3042       // extends/implements positions:
3043       //     class Foo implements {bar: number} { }
3044       nextToken();
3045       if (FormatTok->is(tok::l_brace)) {
3046         tryToParseBracedList();
3047         continue;
3048       }
3049     }
3050     bool IsNonMacroIdentifier =
3051         FormatTok->is(tok::identifier) &&
3052         FormatTok->TokenText != FormatTok->TokenText.upper();
3053     nextToken();
3054     // We can have macros or attributes in between 'class' and the class name.
3055     if (!IsNonMacroIdentifier) {
3056       if (FormatTok->Tok.is(tok::l_paren)) {
3057         parseParens();
3058       } else if (FormatTok->is(TT_AttributeSquare)) {
3059         parseSquare();
3060         // Consume the closing TT_AttributeSquare.
3061         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3062           nextToken();
3063       }
3064     }
3065   }
3066 
3067   // Note that parsing away template declarations here leads to incorrectly
3068   // accepting function declarations as record declarations.
3069   // In general, we cannot solve this problem. Consider:
3070   // class A<int> B() {}
3071   // which can be a function definition or a class definition when B() is a
3072   // macro. If we find enough real-world cases where this is a problem, we
3073   // can parse for the 'template' keyword in the beginning of the statement,
3074   // and thus rule out the record production in case there is no template
3075   // (this would still leave us with an ambiguity between template function
3076   // and class declarations).
3077   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3078     while (!eof()) {
3079       if (FormatTok->is(tok::l_brace)) {
3080         calculateBraceTypes(/*ExpectClassBody=*/true);
3081         if (!tryToParseBracedList())
3082           break;
3083       }
3084       if (FormatTok->is(tok::l_square) && !tryToParseLambda())
3085         break;
3086       if (FormatTok->Tok.is(tok::semi))
3087         return;
3088       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3089         addUnwrappedLine();
3090         nextToken();
3091         parseCSharpGenericTypeConstraint();
3092         break;
3093       }
3094       nextToken();
3095     }
3096   }
3097   if (FormatTok->Tok.is(tok::l_brace)) {
3098     if (ParseAsExpr) {
3099       parseChildBlock();
3100     } else {
3101       if (ShouldBreakBeforeBrace(Style, InitialToken))
3102         addUnwrappedLine();
3103 
3104       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3105       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3106     }
3107   }
3108   // There is no addUnwrappedLine() here so that we fall through to parsing a
3109   // structural element afterwards. Thus, in "class A {} n, m;",
3110   // "} n, m;" will end up in one unwrapped line.
3111 }
3112 
3113 void UnwrappedLineParser::parseObjCMethod() {
3114   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
3115          "'(' or identifier expected.");
3116   do {
3117     if (FormatTok->Tok.is(tok::semi)) {
3118       nextToken();
3119       addUnwrappedLine();
3120       return;
3121     } else if (FormatTok->Tok.is(tok::l_brace)) {
3122       if (Style.BraceWrapping.AfterFunction)
3123         addUnwrappedLine();
3124       parseBlock();
3125       addUnwrappedLine();
3126       return;
3127     } else {
3128       nextToken();
3129     }
3130   } while (!eof());
3131 }
3132 
3133 void UnwrappedLineParser::parseObjCProtocolList() {
3134   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
3135   do {
3136     nextToken();
3137     // Early exit in case someone forgot a close angle.
3138     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3139         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3140       return;
3141   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
3142   nextToken(); // Skip '>'.
3143 }
3144 
3145 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3146   do {
3147     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
3148       nextToken();
3149       addUnwrappedLine();
3150       break;
3151     }
3152     if (FormatTok->is(tok::l_brace)) {
3153       parseBlock();
3154       // In ObjC interfaces, nothing should be following the "}".
3155       addUnwrappedLine();
3156     } else if (FormatTok->is(tok::r_brace)) {
3157       // Ignore stray "}". parseStructuralElement doesn't consume them.
3158       nextToken();
3159       addUnwrappedLine();
3160     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3161       nextToken();
3162       parseObjCMethod();
3163     } else {
3164       parseStructuralElement();
3165     }
3166   } while (!eof());
3167 }
3168 
3169 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3170   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3171          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3172   nextToken();
3173   nextToken(); // interface name
3174 
3175   // @interface can be followed by a lightweight generic
3176   // specialization list, then either a base class or a category.
3177   if (FormatTok->Tok.is(tok::less)) {
3178     parseObjCLightweightGenerics();
3179   }
3180   if (FormatTok->Tok.is(tok::colon)) {
3181     nextToken();
3182     nextToken(); // base class name
3183     // The base class can also have lightweight generics applied to it.
3184     if (FormatTok->Tok.is(tok::less)) {
3185       parseObjCLightweightGenerics();
3186     }
3187   } else if (FormatTok->Tok.is(tok::l_paren))
3188     // Skip category, if present.
3189     parseParens();
3190 
3191   if (FormatTok->Tok.is(tok::less))
3192     parseObjCProtocolList();
3193 
3194   if (FormatTok->Tok.is(tok::l_brace)) {
3195     if (Style.BraceWrapping.AfterObjCDeclaration)
3196       addUnwrappedLine();
3197     parseBlock(/*MustBeDeclaration=*/true);
3198   }
3199 
3200   // With instance variables, this puts '}' on its own line.  Without instance
3201   // variables, this ends the @interface line.
3202   addUnwrappedLine();
3203 
3204   parseObjCUntilAtEnd();
3205 }
3206 
3207 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3208   assert(FormatTok->Tok.is(tok::less));
3209   // Unlike protocol lists, generic parameterizations support
3210   // nested angles:
3211   //
3212   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3213   //     NSObject <NSCopying, NSSecureCoding>
3214   //
3215   // so we need to count how many open angles we have left.
3216   unsigned NumOpenAngles = 1;
3217   do {
3218     nextToken();
3219     // Early exit in case someone forgot a close angle.
3220     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3221         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3222       break;
3223     if (FormatTok->Tok.is(tok::less))
3224       ++NumOpenAngles;
3225     else if (FormatTok->Tok.is(tok::greater)) {
3226       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3227       --NumOpenAngles;
3228     }
3229   } while (!eof() && NumOpenAngles != 0);
3230   nextToken(); // Skip '>'.
3231 }
3232 
3233 // Returns true for the declaration/definition form of @protocol,
3234 // false for the expression form.
3235 bool UnwrappedLineParser::parseObjCProtocol() {
3236   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3237   nextToken();
3238 
3239   if (FormatTok->is(tok::l_paren))
3240     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3241     return false;
3242 
3243   // The definition/declaration form,
3244   // @protocol Foo
3245   // - (int)someMethod;
3246   // @end
3247 
3248   nextToken(); // protocol name
3249 
3250   if (FormatTok->Tok.is(tok::less))
3251     parseObjCProtocolList();
3252 
3253   // Check for protocol declaration.
3254   if (FormatTok->Tok.is(tok::semi)) {
3255     nextToken();
3256     addUnwrappedLine();
3257     return true;
3258   }
3259 
3260   addUnwrappedLine();
3261   parseObjCUntilAtEnd();
3262   return true;
3263 }
3264 
3265 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3266   bool IsImport = FormatTok->is(Keywords.kw_import);
3267   assert(IsImport || FormatTok->is(tok::kw_export));
3268   nextToken();
3269 
3270   // Consume the "default" in "export default class/function".
3271   if (FormatTok->is(tok::kw_default))
3272     nextToken();
3273 
3274   // Consume "async function", "function" and "default function", so that these
3275   // get parsed as free-standing JS functions, i.e. do not require a trailing
3276   // semicolon.
3277   if (FormatTok->is(Keywords.kw_async))
3278     nextToken();
3279   if (FormatTok->is(Keywords.kw_function)) {
3280     nextToken();
3281     return;
3282   }
3283 
3284   // For imports, `export *`, `export {...}`, consume the rest of the line up
3285   // to the terminating `;`. For everything else, just return and continue
3286   // parsing the structural element, i.e. the declaration or expression for
3287   // `export default`.
3288   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3289       !FormatTok->isStringLiteral())
3290     return;
3291 
3292   while (!eof()) {
3293     if (FormatTok->is(tok::semi))
3294       return;
3295     if (Line->Tokens.empty()) {
3296       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3297       // import statement should terminate.
3298       return;
3299     }
3300     if (FormatTok->is(tok::l_brace)) {
3301       FormatTok->setBlockKind(BK_Block);
3302       nextToken();
3303       parseBracedList();
3304     } else {
3305       nextToken();
3306     }
3307   }
3308 }
3309 
3310 void UnwrappedLineParser::parseStatementMacro() {
3311   nextToken();
3312   if (FormatTok->is(tok::l_paren))
3313     parseParens();
3314   if (FormatTok->is(tok::semi))
3315     nextToken();
3316   addUnwrappedLine();
3317 }
3318 
3319 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3320                                                  StringRef Prefix = "") {
3321   llvm::dbgs() << Prefix << "Line(" << Line.Level
3322                << ", FSC=" << Line.FirstStartColumn << ")"
3323                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3324   for (const auto &Node : Line.Tokens) {
3325     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3326                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3327                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3328   }
3329   for (const auto &Node : Line.Tokens)
3330     for (const auto &ChildNode : Node.Children)
3331       printDebugInfo(ChildNode, "\nChild: ");
3332 
3333   llvm::dbgs() << "\n";
3334 }
3335 
3336 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3337   if (Line->Tokens.empty())
3338     return;
3339   LLVM_DEBUG({
3340     if (CurrentLines == &Lines)
3341       printDebugInfo(*Line);
3342   });
3343 
3344   // If this line closes a block when in Whitesmiths mode, remember that
3345   // information so that the level can be decreased after the line is added.
3346   // This has to happen after the addition of the line since the line itself
3347   // needs to be indented.
3348   bool ClosesWhitesmithsBlock =
3349       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3350       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3351 
3352   CurrentLines->push_back(std::move(*Line));
3353   Line->Tokens.clear();
3354   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3355   Line->FirstStartColumn = 0;
3356 
3357   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3358     --Line->Level;
3359   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3360     CurrentLines->append(
3361         std::make_move_iterator(PreprocessorDirectives.begin()),
3362         std::make_move_iterator(PreprocessorDirectives.end()));
3363     PreprocessorDirectives.clear();
3364   }
3365   // Disconnect the current token from the last token on the previous line.
3366   FormatTok->Previous = nullptr;
3367 }
3368 
3369 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3370 
3371 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3372   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3373          FormatTok.NewlinesBefore > 0;
3374 }
3375 
3376 // Checks if \p FormatTok is a line comment that continues the line comment
3377 // section on \p Line.
3378 static bool
3379 continuesLineCommentSection(const FormatToken &FormatTok,
3380                             const UnwrappedLine &Line,
3381                             const llvm::Regex &CommentPragmasRegex) {
3382   if (Line.Tokens.empty())
3383     return false;
3384 
3385   StringRef IndentContent = FormatTok.TokenText;
3386   if (FormatTok.TokenText.startswith("//") ||
3387       FormatTok.TokenText.startswith("/*"))
3388     IndentContent = FormatTok.TokenText.substr(2);
3389   if (CommentPragmasRegex.match(IndentContent))
3390     return false;
3391 
3392   // If Line starts with a line comment, then FormatTok continues the comment
3393   // section if its original column is greater or equal to the original start
3394   // column of the line.
3395   //
3396   // Define the min column token of a line as follows: if a line ends in '{' or
3397   // contains a '{' followed by a line comment, then the min column token is
3398   // that '{'. Otherwise, the min column token of the line is the first token of
3399   // the line.
3400   //
3401   // If Line starts with a token other than a line comment, then FormatTok
3402   // continues the comment section if its original column is greater than the
3403   // original start column of the min column token of the line.
3404   //
3405   // For example, the second line comment continues the first in these cases:
3406   //
3407   // // first line
3408   // // second line
3409   //
3410   // and:
3411   //
3412   // // first line
3413   //  // second line
3414   //
3415   // and:
3416   //
3417   // int i; // first line
3418   //  // second line
3419   //
3420   // and:
3421   //
3422   // do { // first line
3423   //      // second line
3424   //   int i;
3425   // } while (true);
3426   //
3427   // and:
3428   //
3429   // enum {
3430   //   a, // first line
3431   //    // second line
3432   //   b
3433   // };
3434   //
3435   // The second line comment doesn't continue the first in these cases:
3436   //
3437   //   // first line
3438   //  // second line
3439   //
3440   // and:
3441   //
3442   // int i; // first line
3443   // // second line
3444   //
3445   // and:
3446   //
3447   // do { // first line
3448   //   // second line
3449   //   int i;
3450   // } while (true);
3451   //
3452   // and:
3453   //
3454   // enum {
3455   //   a, // first line
3456   //   // second line
3457   // };
3458   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3459 
3460   // Scan for '{//'. If found, use the column of '{' as a min column for line
3461   // comment section continuation.
3462   const FormatToken *PreviousToken = nullptr;
3463   for (const UnwrappedLineNode &Node : Line.Tokens) {
3464     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3465         isLineComment(*Node.Tok)) {
3466       MinColumnToken = PreviousToken;
3467       break;
3468     }
3469     PreviousToken = Node.Tok;
3470 
3471     // Grab the last newline preceding a token in this unwrapped line.
3472     if (Node.Tok->NewlinesBefore > 0) {
3473       MinColumnToken = Node.Tok;
3474     }
3475   }
3476   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3477     MinColumnToken = PreviousToken;
3478   }
3479 
3480   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3481                               MinColumnToken);
3482 }
3483 
3484 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3485   bool JustComments = Line->Tokens.empty();
3486   for (FormatToken *Tok : CommentsBeforeNextToken) {
3487     // Line comments that belong to the same line comment section are put on the
3488     // same line since later we might want to reflow content between them.
3489     // Additional fine-grained breaking of line comment sections is controlled
3490     // by the class BreakableLineCommentSection in case it is desirable to keep
3491     // several line comment sections in the same unwrapped line.
3492     //
3493     // FIXME: Consider putting separate line comment sections as children to the
3494     // unwrapped line instead.
3495     Tok->ContinuesLineCommentSection =
3496         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3497     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3498       addUnwrappedLine();
3499     pushToken(Tok);
3500   }
3501   if (NewlineBeforeNext && JustComments)
3502     addUnwrappedLine();
3503   CommentsBeforeNextToken.clear();
3504 }
3505 
3506 void UnwrappedLineParser::nextToken(int LevelDifference) {
3507   if (eof())
3508     return;
3509   flushComments(isOnNewLine(*FormatTok));
3510   pushToken(FormatTok);
3511   FormatToken *Previous = FormatTok;
3512   if (!Style.isJavaScript())
3513     readToken(LevelDifference);
3514   else
3515     readTokenWithJavaScriptASI();
3516   FormatTok->Previous = Previous;
3517 }
3518 
3519 void UnwrappedLineParser::distributeComments(
3520     const SmallVectorImpl<FormatToken *> &Comments,
3521     const FormatToken *NextTok) {
3522   // Whether or not a line comment token continues a line is controlled by
3523   // the method continuesLineCommentSection, with the following caveat:
3524   //
3525   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3526   // that each comment line from the trail is aligned with the next token, if
3527   // the next token exists. If a trail exists, the beginning of the maximal
3528   // trail is marked as a start of a new comment section.
3529   //
3530   // For example in this code:
3531   //
3532   // int a; // line about a
3533   //   // line 1 about b
3534   //   // line 2 about b
3535   //   int b;
3536   //
3537   // the two lines about b form a maximal trail, so there are two sections, the
3538   // first one consisting of the single comment "// line about a" and the
3539   // second one consisting of the next two comments.
3540   if (Comments.empty())
3541     return;
3542   bool ShouldPushCommentsInCurrentLine = true;
3543   bool HasTrailAlignedWithNextToken = false;
3544   unsigned StartOfTrailAlignedWithNextToken = 0;
3545   if (NextTok) {
3546     // We are skipping the first element intentionally.
3547     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3548       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3549         HasTrailAlignedWithNextToken = true;
3550         StartOfTrailAlignedWithNextToken = i;
3551       }
3552     }
3553   }
3554   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3555     FormatToken *FormatTok = Comments[i];
3556     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3557       FormatTok->ContinuesLineCommentSection = false;
3558     } else {
3559       FormatTok->ContinuesLineCommentSection =
3560           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3561     }
3562     if (!FormatTok->ContinuesLineCommentSection &&
3563         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3564       ShouldPushCommentsInCurrentLine = false;
3565     }
3566     if (ShouldPushCommentsInCurrentLine) {
3567       pushToken(FormatTok);
3568     } else {
3569       CommentsBeforeNextToken.push_back(FormatTok);
3570     }
3571   }
3572 }
3573 
3574 void UnwrappedLineParser::readToken(int LevelDifference) {
3575   SmallVector<FormatToken *, 1> Comments;
3576   do {
3577     FormatTok = Tokens->getNextToken();
3578     assert(FormatTok);
3579     while (FormatTok->getType() == TT_ConflictStart ||
3580            FormatTok->getType() == TT_ConflictEnd ||
3581            FormatTok->getType() == TT_ConflictAlternative) {
3582       if (FormatTok->getType() == TT_ConflictStart) {
3583         conditionalCompilationStart(/*Unreachable=*/false);
3584       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3585         conditionalCompilationAlternative();
3586       } else if (FormatTok->getType() == TT_ConflictEnd) {
3587         conditionalCompilationEnd();
3588       }
3589       FormatTok = Tokens->getNextToken();
3590       FormatTok->MustBreakBefore = true;
3591     }
3592 
3593     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3594            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3595       distributeComments(Comments, FormatTok);
3596       Comments.clear();
3597       // If there is an unfinished unwrapped line, we flush the preprocessor
3598       // directives only after that unwrapped line was finished later.
3599       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3600       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3601       assert((LevelDifference >= 0 ||
3602               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3603              "LevelDifference makes Line->Level negative");
3604       Line->Level += LevelDifference;
3605       // Comments stored before the preprocessor directive need to be output
3606       // before the preprocessor directive, at the same level as the
3607       // preprocessor directive, as we consider them to apply to the directive.
3608       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3609           PPBranchLevel > 0)
3610         Line->Level += PPBranchLevel;
3611       flushComments(isOnNewLine(*FormatTok));
3612       parsePPDirective();
3613     }
3614 
3615     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3616         !Line->InPPDirective) {
3617       continue;
3618     }
3619 
3620     if (!FormatTok->Tok.is(tok::comment)) {
3621       distributeComments(Comments, FormatTok);
3622       Comments.clear();
3623       return;
3624     }
3625 
3626     Comments.push_back(FormatTok);
3627   } while (!eof());
3628 
3629   distributeComments(Comments, nullptr);
3630   Comments.clear();
3631 }
3632 
3633 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3634   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3635   if (MustBreakBeforeNextToken) {
3636     Line->Tokens.back().Tok->MustBreakBefore = true;
3637     MustBreakBeforeNextToken = false;
3638   }
3639 }
3640 
3641 } // end namespace format
3642 } // end namespace clang
3643