1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 
24 #define DEBUG_TYPE "format-parser"
25 
26 namespace clang {
27 namespace format {
28 
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   virtual FormatToken *getPreviousToken() = 0;
39 
40   // Returns the token that would be returned by the next call to
41   // getNextToken().
42   virtual FormatToken *peekNextToken() = 0;
43 
44   // Returns whether we are at the end of the file.
45   // This can be different from whether getNextToken() returned an eof token
46   // when the FormatTokenSource is a view on a part of the token stream.
47   virtual bool isEOF() = 0;
48 
49   // Gets the current position in the token stream, to be used by setPosition().
50   virtual unsigned getPosition() = 0;
51 
52   // Resets the token stream to the state it was in when getPosition() returned
53   // Position, and return the token at that position in the stream.
54   virtual FormatToken *setPosition(unsigned Position) = 0;
55 };
56 
57 namespace {
58 
59 class ScopedDeclarationState {
60 public:
61   ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
62                          bool MustBeDeclaration)
63       : Line(Line), Stack(Stack) {
64     Line.MustBeDeclaration = MustBeDeclaration;
65     Stack.push_back(MustBeDeclaration);
66   }
67   ~ScopedDeclarationState() {
68     Stack.pop_back();
69     if (!Stack.empty())
70       Line.MustBeDeclaration = Stack.back();
71     else
72       Line.MustBeDeclaration = true;
73   }
74 
75 private:
76   UnwrappedLine &Line;
77   llvm::BitVector &Stack;
78 };
79 
80 static bool isLineComment(const FormatToken &FormatTok) {
81   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
82 }
83 
84 // Checks if \p FormatTok is a line comment that continues the line comment
85 // \p Previous. The original column of \p MinColumnToken is used to determine
86 // whether \p FormatTok is indented enough to the right to continue \p Previous.
87 static bool continuesLineComment(const FormatToken &FormatTok,
88                                  const FormatToken *Previous,
89                                  const FormatToken *MinColumnToken) {
90   if (!Previous || !MinColumnToken)
91     return false;
92   unsigned MinContinueColumn =
93       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
94   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
95          isLineComment(*Previous) &&
96          FormatTok.OriginalColumn >= MinContinueColumn;
97 }
98 
99 class ScopedMacroState : public FormatTokenSource {
100 public:
101   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
102                    FormatToken *&ResetToken)
103       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
104         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
105         Token(nullptr), PreviousToken(nullptr) {
106     FakeEOF.Tok.startToken();
107     FakeEOF.Tok.setKind(tok::eof);
108     TokenSource = this;
109     Line.Level = 0;
110     Line.InPPDirective = true;
111   }
112 
113   ~ScopedMacroState() override {
114     TokenSource = PreviousTokenSource;
115     ResetToken = Token;
116     Line.InPPDirective = false;
117     Line.Level = PreviousLineLevel;
118   }
119 
120   FormatToken *getNextToken() override {
121     // The \c UnwrappedLineParser guards against this by never calling
122     // \c getNextToken() after it has encountered the first eof token.
123     assert(!eof());
124     PreviousToken = Token;
125     Token = PreviousTokenSource->getNextToken();
126     if (eof())
127       return &FakeEOF;
128     return Token;
129   }
130 
131   FormatToken *getPreviousToken() override {
132     return PreviousTokenSource->getPreviousToken();
133   }
134 
135   FormatToken *peekNextToken() override {
136     if (eof())
137       return &FakeEOF;
138     return PreviousTokenSource->peekNextToken();
139   }
140 
141   bool isEOF() override { return PreviousTokenSource->isEOF(); }
142 
143   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
144 
145   FormatToken *setPosition(unsigned Position) override {
146     PreviousToken = nullptr;
147     Token = PreviousTokenSource->setPosition(Position);
148     return Token;
149   }
150 
151 private:
152   bool eof() {
153     return Token && Token->HasUnescapedNewline &&
154            !continuesLineComment(*Token, PreviousToken,
155                                  /*MinColumnToken=*/PreviousToken);
156   }
157 
158   FormatToken FakeEOF;
159   UnwrappedLine &Line;
160   FormatTokenSource *&TokenSource;
161   FormatToken *&ResetToken;
162   unsigned PreviousLineLevel;
163   FormatTokenSource *PreviousTokenSource;
164 
165   FormatToken *Token;
166   FormatToken *PreviousToken;
167 };
168 
169 } // end anonymous namespace
170 
171 class ScopedLineState {
172 public:
173   ScopedLineState(UnwrappedLineParser &Parser,
174                   bool SwitchToPreprocessorLines = false)
175       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
176     if (SwitchToPreprocessorLines)
177       Parser.CurrentLines = &Parser.PreprocessorDirectives;
178     else if (!Parser.Line->Tokens.empty())
179       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
180     PreBlockLine = std::move(Parser.Line);
181     Parser.Line = std::make_unique<UnwrappedLine>();
182     Parser.Line->Level = PreBlockLine->Level;
183     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
184   }
185 
186   ~ScopedLineState() {
187     if (!Parser.Line->Tokens.empty()) {
188       Parser.addUnwrappedLine();
189     }
190     assert(Parser.Line->Tokens.empty());
191     Parser.Line = std::move(PreBlockLine);
192     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
193       Parser.MustBreakBeforeNextToken = true;
194     Parser.CurrentLines = OriginalLines;
195   }
196 
197 private:
198   UnwrappedLineParser &Parser;
199 
200   std::unique_ptr<UnwrappedLine> PreBlockLine;
201   SmallVectorImpl<UnwrappedLine> *OriginalLines;
202 };
203 
204 class CompoundStatementIndenter {
205 public:
206   CompoundStatementIndenter(UnwrappedLineParser *Parser,
207                             const FormatStyle &Style, unsigned &LineLevel)
208       : CompoundStatementIndenter(Parser, LineLevel,
209                                   Style.BraceWrapping.AfterControlStatement,
210                                   Style.BraceWrapping.IndentBraces) {}
211   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
212                             bool WrapBrace, bool IndentBrace)
213       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
214     if (WrapBrace)
215       Parser->addUnwrappedLine();
216     if (IndentBrace)
217       ++LineLevel;
218   }
219   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
220 
221 private:
222   unsigned &LineLevel;
223   unsigned OldLineLevel;
224 };
225 
226 namespace {
227 
228 class IndexedTokenSource : public FormatTokenSource {
229 public:
230   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
231       : Tokens(Tokens), Position(-1) {}
232 
233   FormatToken *getNextToken() override {
234     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
235       LLVM_DEBUG({
236         llvm::dbgs() << "Next ";
237         dbgToken(Position);
238       });
239       return Tokens[Position];
240     }
241     ++Position;
242     LLVM_DEBUG({
243       llvm::dbgs() << "Next ";
244       dbgToken(Position);
245     });
246     return Tokens[Position];
247   }
248 
249   FormatToken *getPreviousToken() override {
250     return Position > 0 ? Tokens[Position - 1] : nullptr;
251   }
252 
253   FormatToken *peekNextToken() override {
254     int Next = Position + 1;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Peeking ";
257       dbgToken(Next);
258     });
259     return Tokens[Next];
260   }
261 
262   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
263 
264   unsigned getPosition() override {
265     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
266     assert(Position >= 0);
267     return Position;
268   }
269 
270   FormatToken *setPosition(unsigned P) override {
271     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
272     Position = P;
273     return Tokens[Position];
274   }
275 
276   void reset() { Position = -1; }
277 
278 private:
279   void dbgToken(int Position, llvm::StringRef Indent = "") {
280     FormatToken *Tok = Tokens[Position];
281     llvm::dbgs() << Indent << "[" << Position
282                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
283                  << ", Macro: " << !!Tok->MacroCtx << "\n";
284   }
285 
286   ArrayRef<FormatToken *> Tokens;
287   int Position;
288 };
289 
290 } // end anonymous namespace
291 
292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
293                                          const AdditionalKeywords &Keywords,
294                                          unsigned FirstStartColumn,
295                                          ArrayRef<FormatToken *> Tokens,
296                                          UnwrappedLineConsumer &Callback)
297     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
298       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
299       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
300       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
301       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
302                        ? IG_Rejected
303                        : IG_Inited),
304       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
305 
306 void UnwrappedLineParser::reset() {
307   PPBranchLevel = -1;
308   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
309                      ? IG_Rejected
310                      : IG_Inited;
311   IncludeGuardToken = nullptr;
312   Line.reset(new UnwrappedLine);
313   CommentsBeforeNextToken.clear();
314   FormatTok = nullptr;
315   MustBreakBeforeNextToken = false;
316   PreprocessorDirectives.clear();
317   CurrentLines = &Lines;
318   DeclarationScopeStack.clear();
319   NestedTooDeep.clear();
320   PPStack.clear();
321   Line->FirstStartColumn = FirstStartColumn;
322 }
323 
324 void UnwrappedLineParser::parse() {
325   IndexedTokenSource TokenSource(AllTokens);
326   Line->FirstStartColumn = FirstStartColumn;
327   do {
328     LLVM_DEBUG(llvm::dbgs() << "----\n");
329     reset();
330     Tokens = &TokenSource;
331     TokenSource.reset();
332 
333     readToken();
334     parseFile();
335 
336     // If we found an include guard then all preprocessor directives (other than
337     // the guard) are over-indented by one.
338     if (IncludeGuard == IG_Found)
339       for (auto &Line : Lines)
340         if (Line.InPPDirective && Line.Level > 0)
341           --Line.Level;
342 
343     // Create line with eof token.
344     pushToken(FormatTok);
345     addUnwrappedLine();
346 
347     for (const UnwrappedLine &Line : Lines)
348       Callback.consumeUnwrappedLine(Line);
349 
350     Callback.finishRun();
351     Lines.clear();
352     while (!PPLevelBranchIndex.empty() &&
353            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
354       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
355       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
356     }
357     if (!PPLevelBranchIndex.empty()) {
358       ++PPLevelBranchIndex.back();
359       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
360       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
361     }
362   } while (!PPLevelBranchIndex.empty());
363 }
364 
365 void UnwrappedLineParser::parseFile() {
366   // The top-level context in a file always has declarations, except for pre-
367   // processor directives and JavaScript files.
368   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
369   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
370                                           MustBeDeclaration);
371   if (Style.Language == FormatStyle::LK_TextProto)
372     parseBracedList();
373   else
374     parseLevel(/*HasOpeningBrace=*/false);
375   // Make sure to format the remaining tokens.
376   //
377   // LK_TextProto is special since its top-level is parsed as the body of a
378   // braced list, which does not necessarily have natural line separators such
379   // as a semicolon. Comments after the last entry that have been determined to
380   // not belong to that line, as in:
381   //   key: value
382   //   // endfile comment
383   // do not have a chance to be put on a line of their own until this point.
384   // Here we add this newline before end-of-file comments.
385   if (Style.Language == FormatStyle::LK_TextProto &&
386       !CommentsBeforeNextToken.empty())
387     addUnwrappedLine();
388   flushComments(true);
389   addUnwrappedLine();
390 }
391 
392 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
393   do {
394     switch (FormatTok->Tok.getKind()) {
395     case tok::l_brace:
396       return;
397     default:
398       if (FormatTok->is(Keywords.kw_where)) {
399         addUnwrappedLine();
400         nextToken();
401         parseCSharpGenericTypeConstraint();
402         break;
403       }
404       nextToken();
405       break;
406     }
407   } while (!eof());
408 }
409 
410 void UnwrappedLineParser::parseCSharpAttribute() {
411   int UnpairedSquareBrackets = 1;
412   do {
413     switch (FormatTok->Tok.getKind()) {
414     case tok::r_square:
415       nextToken();
416       --UnpairedSquareBrackets;
417       if (UnpairedSquareBrackets == 0) {
418         addUnwrappedLine();
419         return;
420       }
421       break;
422     case tok::l_square:
423       ++UnpairedSquareBrackets;
424       nextToken();
425       break;
426     default:
427       nextToken();
428       break;
429     }
430   } while (!eof());
431 }
432 
433 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
434   if (!Lines.empty() && Lines.back().InPPDirective)
435     return true;
436 
437   const FormatToken *Previous = Tokens->getPreviousToken();
438   return Previous && Previous->is(tok::comment) &&
439          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
440 }
441 
442 bool UnwrappedLineParser::mightFitOnOneLine() const {
443   const auto ColumnLimit = Style.ColumnLimit;
444   if (ColumnLimit == 0)
445     return true;
446 
447   if (Lines.empty())
448     return true;
449 
450   const auto &PreviousLine = Lines.back();
451   const auto &Tokens = PreviousLine.Tokens;
452   assert(!Tokens.empty());
453   const auto *LastToken = Tokens.back().Tok;
454   assert(LastToken);
455   if (!LastToken->isOneOf(tok::semi, tok::comment))
456     return true;
457 
458   AnnotatedLine Line(PreviousLine);
459   assert(Line.Last == LastToken);
460 
461   TokenAnnotator Annotator(Style, Keywords);
462   Annotator.annotate(Line);
463   Annotator.calculateFormattingInformation(Line);
464 
465   return Line.Level * Style.IndentWidth + LastToken->TotalLength <= ColumnLimit;
466 }
467 
468 // Returns true if a simple block, or false otherwise. (A simple block has a
469 // single statement that fits on a single line.)
470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) {
471   const bool IsPrecededByCommentOrPPDirective =
472       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
473   unsigned StatementCount = 0;
474   bool SwitchLabelEncountered = false;
475   do {
476     tok::TokenKind kind = FormatTok->Tok.getKind();
477     if (FormatTok->getType() == TT_MacroBlockBegin) {
478       kind = tok::l_brace;
479     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
480       kind = tok::r_brace;
481     }
482 
483     switch (kind) {
484     case tok::comment:
485       nextToken();
486       addUnwrappedLine();
487       break;
488     case tok::l_brace:
489       // FIXME: Add parameter whether this can happen - if this happens, we must
490       // be in a non-declaration context.
491       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
492         continue;
493       parseBlock();
494       ++StatementCount;
495       assert(StatementCount > 0 && "StatementCount overflow!");
496       addUnwrappedLine();
497       break;
498     case tok::r_brace:
499       if (HasOpeningBrace) {
500         if (!Style.RemoveBracesLLVM)
501           return false;
502         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 ||
503             IsPrecededByCommentOrPPDirective ||
504             precededByCommentOrPPDirective()) {
505           return false;
506         }
507         const FormatToken *Next = Tokens->peekNextToken();
508         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
509           return false;
510         return mightFitOnOneLine();
511       }
512       nextToken();
513       addUnwrappedLine();
514       break;
515     case tok::kw_default: {
516       unsigned StoredPosition = Tokens->getPosition();
517       FormatToken *Next;
518       do {
519         Next = Tokens->getNextToken();
520       } while (Next->is(tok::comment));
521       FormatTok = Tokens->setPosition(StoredPosition);
522       if (Next && Next->isNot(tok::colon)) {
523         // default not followed by ':' is not a case label; treat it like
524         // an identifier.
525         parseStructuralElement();
526         break;
527       }
528       // Else, if it is 'default:', fall through to the case handling.
529       LLVM_FALLTHROUGH;
530     }
531     case tok::kw_case:
532       if (Style.isJavaScript() && Line->MustBeDeclaration) {
533         // A 'case: string' style field declaration.
534         parseStructuralElement();
535         break;
536       }
537       if (!SwitchLabelEncountered &&
538           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
539         ++Line->Level;
540       SwitchLabelEncountered = true;
541       parseStructuralElement();
542       break;
543     case tok::l_square:
544       if (Style.isCSharp()) {
545         nextToken();
546         parseCSharpAttribute();
547         break;
548       }
549       LLVM_FALLTHROUGH;
550     default:
551       parseStructuralElement(IfKind, !HasOpeningBrace);
552       ++StatementCount;
553       assert(StatementCount > 0 && "StatementCount overflow!");
554       break;
555     }
556   } while (!eof());
557   return false;
558 }
559 
560 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
561   // We'll parse forward through the tokens until we hit
562   // a closing brace or eof - note that getNextToken() will
563   // parse macros, so this will magically work inside macro
564   // definitions, too.
565   unsigned StoredPosition = Tokens->getPosition();
566   FormatToken *Tok = FormatTok;
567   const FormatToken *PrevTok = Tok->Previous;
568   // Keep a stack of positions of lbrace tokens. We will
569   // update information about whether an lbrace starts a
570   // braced init list or a different block during the loop.
571   SmallVector<FormatToken *, 8> LBraceStack;
572   assert(Tok->Tok.is(tok::l_brace));
573   do {
574     // Get next non-comment token.
575     FormatToken *NextTok;
576     unsigned ReadTokens = 0;
577     do {
578       NextTok = Tokens->getNextToken();
579       ++ReadTokens;
580     } while (NextTok->is(tok::comment));
581 
582     switch (Tok->Tok.getKind()) {
583     case tok::l_brace:
584       if (Style.isJavaScript() && PrevTok) {
585         if (PrevTok->isOneOf(tok::colon, tok::less))
586           // A ':' indicates this code is in a type, or a braced list
587           // following a label in an object literal ({a: {b: 1}}).
588           // A '<' could be an object used in a comparison, but that is nonsense
589           // code (can never return true), so more likely it is a generic type
590           // argument (`X<{a: string; b: number}>`).
591           // The code below could be confused by semicolons between the
592           // individual members in a type member list, which would normally
593           // trigger BK_Block. In both cases, this must be parsed as an inline
594           // braced init.
595           Tok->setBlockKind(BK_BracedInit);
596         else if (PrevTok->is(tok::r_paren))
597           // `) { }` can only occur in function or method declarations in JS.
598           Tok->setBlockKind(BK_Block);
599       } else {
600         Tok->setBlockKind(BK_Unknown);
601       }
602       LBraceStack.push_back(Tok);
603       break;
604     case tok::r_brace:
605       if (LBraceStack.empty())
606         break;
607       if (LBraceStack.back()->is(BK_Unknown)) {
608         bool ProbablyBracedList = false;
609         if (Style.Language == FormatStyle::LK_Proto) {
610           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
611         } else {
612           // Skip NextTok over preprocessor lines, otherwise we may not
613           // properly diagnose the block as a braced intializer
614           // if the comma separator appears after the pp directive.
615           while (NextTok->is(tok::hash)) {
616             ScopedMacroState MacroState(*Line, Tokens, NextTok);
617             do {
618               NextTok = Tokens->getNextToken();
619               ++ReadTokens;
620             } while (NextTok->isNot(tok::eof));
621           }
622 
623           // Using OriginalColumn to distinguish between ObjC methods and
624           // binary operators is a bit hacky.
625           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
626                                   NextTok->OriginalColumn == 0;
627 
628           // If there is a comma, semicolon or right paren after the closing
629           // brace, we assume this is a braced initializer list.  Note that
630           // regardless how we mark inner braces here, we will overwrite the
631           // BlockKind later if we parse a braced list (where all blocks
632           // inside are by default braced lists), or when we explicitly detect
633           // blocks (for example while parsing lambdas).
634           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
635           // braced list in JS.
636           ProbablyBracedList =
637               (Style.isJavaScript() &&
638                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
639                                 Keywords.kw_as)) ||
640               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
641               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
642                                tok::r_paren, tok::r_square, tok::l_brace,
643                                tok::ellipsis) ||
644               (NextTok->is(tok::identifier) &&
645                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
646               (NextTok->is(tok::semi) &&
647                (!ExpectClassBody || LBraceStack.size() != 1)) ||
648               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
649           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
650             // We can have an array subscript after a braced init
651             // list, but C++11 attributes are expected after blocks.
652             NextTok = Tokens->getNextToken();
653             ++ReadTokens;
654             ProbablyBracedList = NextTok->isNot(tok::l_square);
655           }
656         }
657         if (ProbablyBracedList) {
658           Tok->setBlockKind(BK_BracedInit);
659           LBraceStack.back()->setBlockKind(BK_BracedInit);
660         } else {
661           Tok->setBlockKind(BK_Block);
662           LBraceStack.back()->setBlockKind(BK_Block);
663         }
664       }
665       LBraceStack.pop_back();
666       break;
667     case tok::identifier:
668       if (!Tok->is(TT_StatementMacro))
669         break;
670       LLVM_FALLTHROUGH;
671     case tok::at:
672     case tok::semi:
673     case tok::kw_if:
674     case tok::kw_while:
675     case tok::kw_for:
676     case tok::kw_switch:
677     case tok::kw_try:
678     case tok::kw___try:
679       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
680         LBraceStack.back()->setBlockKind(BK_Block);
681       break;
682     default:
683       break;
684     }
685     PrevTok = Tok;
686     Tok = NextTok;
687   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
688 
689   // Assume other blocks for all unclosed opening braces.
690   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
691     if (LBraceStack[i]->is(BK_Unknown))
692       LBraceStack[i]->setBlockKind(BK_Block);
693   }
694 
695   FormatTok = Tokens->setPosition(StoredPosition);
696 }
697 
698 template <class T>
699 static inline void hash_combine(std::size_t &seed, const T &v) {
700   std::hash<T> hasher;
701   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
702 }
703 
704 size_t UnwrappedLineParser::computePPHash() const {
705   size_t h = 0;
706   for (const auto &i : PPStack) {
707     hash_combine(h, size_t(i.Kind));
708     hash_combine(h, i.Line);
709   }
710   return h;
711 }
712 
713 UnwrappedLineParser::IfStmtKind
714 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
715                                 bool MunchSemi,
716                                 bool UnindentWhitesmithsBraces) {
717   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
718          "'{' or macro block token expected");
719   FormatToken *Tok = FormatTok;
720   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
721   FormatTok->setBlockKind(BK_Block);
722 
723   // For Whitesmiths mode, jump to the next level prior to skipping over the
724   // braces.
725   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
726     ++Line->Level;
727 
728   size_t PPStartHash = computePPHash();
729 
730   unsigned InitialLevel = Line->Level;
731   nextToken(/*LevelDifference=*/AddLevels);
732 
733   if (MacroBlock && FormatTok->is(tok::l_paren))
734     parseParens();
735 
736   size_t NbPreprocessorDirectives =
737       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
738   addUnwrappedLine();
739   size_t OpeningLineIndex =
740       CurrentLines->empty()
741           ? (UnwrappedLine::kInvalidIndex)
742           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
743 
744   // Whitesmiths is weird here. The brace needs to be indented for the namespace
745   // block, but the block itself may not be indented depending on the style
746   // settings. This allows the format to back up one level in those cases.
747   if (UnindentWhitesmithsBraces)
748     --Line->Level;
749 
750   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
751                                           MustBeDeclaration);
752   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
753     Line->Level += AddLevels;
754 
755   IfStmtKind IfKind = IfStmtKind::NotIf;
756   const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind);
757 
758   if (eof())
759     return IfKind;
760 
761   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
762                  : !FormatTok->is(tok::r_brace)) {
763     Line->Level = InitialLevel;
764     FormatTok->setBlockKind(BK_Block);
765     return IfKind;
766   }
767 
768   if (SimpleBlock && Tok->is(tok::l_brace)) {
769     assert(FormatTok->is(tok::r_brace));
770     const FormatToken *Previous = Tokens->getPreviousToken();
771     assert(Previous);
772     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
773       Tok->MatchingParen = FormatTok;
774       FormatTok->MatchingParen = Tok;
775     }
776   }
777 
778   size_t PPEndHash = computePPHash();
779 
780   // Munch the closing brace.
781   nextToken(/*LevelDifference=*/-AddLevels);
782 
783   if (MacroBlock && FormatTok->is(tok::l_paren))
784     parseParens();
785 
786   if (FormatTok->is(tok::arrow)) {
787     // Following the } we can find a trailing return type arrow
788     // as part of an implicit conversion constraint.
789     nextToken();
790     parseStructuralElement();
791   }
792 
793   if (MunchSemi && FormatTok->Tok.is(tok::semi))
794     nextToken();
795 
796   Line->Level = InitialLevel;
797 
798   if (PPStartHash == PPEndHash) {
799     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
800     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
801       // Update the opening line to add the forward reference as well
802       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
803           CurrentLines->size() - 1;
804     }
805   }
806 
807   return IfKind;
808 }
809 
810 static bool isGoogScope(const UnwrappedLine &Line) {
811   // FIXME: Closure-library specific stuff should not be hard-coded but be
812   // configurable.
813   if (Line.Tokens.size() < 4)
814     return false;
815   auto I = Line.Tokens.begin();
816   if (I->Tok->TokenText != "goog")
817     return false;
818   ++I;
819   if (I->Tok->isNot(tok::period))
820     return false;
821   ++I;
822   if (I->Tok->TokenText != "scope")
823     return false;
824   ++I;
825   return I->Tok->is(tok::l_paren);
826 }
827 
828 static bool isIIFE(const UnwrappedLine &Line,
829                    const AdditionalKeywords &Keywords) {
830   // Look for the start of an immediately invoked anonymous function.
831   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
832   // This is commonly done in JavaScript to create a new, anonymous scope.
833   // Example: (function() { ... })()
834   if (Line.Tokens.size() < 3)
835     return false;
836   auto I = Line.Tokens.begin();
837   if (I->Tok->isNot(tok::l_paren))
838     return false;
839   ++I;
840   if (I->Tok->isNot(Keywords.kw_function))
841     return false;
842   ++I;
843   return I->Tok->is(tok::l_paren);
844 }
845 
846 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
847                                    const FormatToken &InitialToken) {
848   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
849     return Style.BraceWrapping.AfterNamespace;
850   if (InitialToken.is(tok::kw_class))
851     return Style.BraceWrapping.AfterClass;
852   if (InitialToken.is(tok::kw_union))
853     return Style.BraceWrapping.AfterUnion;
854   if (InitialToken.is(tok::kw_struct))
855     return Style.BraceWrapping.AfterStruct;
856   if (InitialToken.is(tok::kw_enum))
857     return Style.BraceWrapping.AfterEnum;
858   return false;
859 }
860 
861 void UnwrappedLineParser::parseChildBlock() {
862   FormatTok->setBlockKind(BK_Block);
863   nextToken();
864   {
865     bool SkipIndent = (Style.isJavaScript() &&
866                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
867     ScopedLineState LineState(*this);
868     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
869                                             /*MustBeDeclaration=*/false);
870     Line->Level += SkipIndent ? 0 : 1;
871     parseLevel(/*HasOpeningBrace=*/true);
872     flushComments(isOnNewLine(*FormatTok));
873     Line->Level -= SkipIndent ? 0 : 1;
874   }
875   nextToken();
876 }
877 
878 void UnwrappedLineParser::parsePPDirective() {
879   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
880   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
881 
882   nextToken();
883 
884   if (!FormatTok->Tok.getIdentifierInfo()) {
885     parsePPUnknown();
886     return;
887   }
888 
889   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
890   case tok::pp_define:
891     parsePPDefine();
892     return;
893   case tok::pp_if:
894     parsePPIf(/*IfDef=*/false);
895     break;
896   case tok::pp_ifdef:
897   case tok::pp_ifndef:
898     parsePPIf(/*IfDef=*/true);
899     break;
900   case tok::pp_else:
901     parsePPElse();
902     break;
903   case tok::pp_elifdef:
904   case tok::pp_elifndef:
905   case tok::pp_elif:
906     parsePPElIf();
907     break;
908   case tok::pp_endif:
909     parsePPEndIf();
910     break;
911   default:
912     parsePPUnknown();
913     break;
914   }
915 }
916 
917 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
918   size_t Line = CurrentLines->size();
919   if (CurrentLines == &PreprocessorDirectives)
920     Line += Lines.size();
921 
922   if (Unreachable ||
923       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
924     PPStack.push_back({PP_Unreachable, Line});
925   else
926     PPStack.push_back({PP_Conditional, Line});
927 }
928 
929 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
930   ++PPBranchLevel;
931   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
932   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
933     PPLevelBranchIndex.push_back(0);
934     PPLevelBranchCount.push_back(0);
935   }
936   PPChainBranchIndex.push(0);
937   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
938   conditionalCompilationCondition(Unreachable || Skip);
939 }
940 
941 void UnwrappedLineParser::conditionalCompilationAlternative() {
942   if (!PPStack.empty())
943     PPStack.pop_back();
944   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
945   if (!PPChainBranchIndex.empty())
946     ++PPChainBranchIndex.top();
947   conditionalCompilationCondition(
948       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
949       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
950 }
951 
952 void UnwrappedLineParser::conditionalCompilationEnd() {
953   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
954   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
955     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
956       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
957     }
958   }
959   // Guard against #endif's without #if.
960   if (PPBranchLevel > -1)
961     --PPBranchLevel;
962   if (!PPChainBranchIndex.empty())
963     PPChainBranchIndex.pop();
964   if (!PPStack.empty())
965     PPStack.pop_back();
966 }
967 
968 void UnwrappedLineParser::parsePPIf(bool IfDef) {
969   bool IfNDef = FormatTok->is(tok::pp_ifndef);
970   nextToken();
971   bool Unreachable = false;
972   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
973     Unreachable = true;
974   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
975     Unreachable = true;
976   conditionalCompilationStart(Unreachable);
977   FormatToken *IfCondition = FormatTok;
978   // If there's a #ifndef on the first line, and the only lines before it are
979   // comments, it could be an include guard.
980   bool MaybeIncludeGuard = IfNDef;
981   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
982     for (auto &Line : Lines) {
983       if (!Line.Tokens.front().Tok->is(tok::comment)) {
984         MaybeIncludeGuard = false;
985         IncludeGuard = IG_Rejected;
986         break;
987       }
988     }
989   --PPBranchLevel;
990   parsePPUnknown();
991   ++PPBranchLevel;
992   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
993     IncludeGuard = IG_IfNdefed;
994     IncludeGuardToken = IfCondition;
995   }
996 }
997 
998 void UnwrappedLineParser::parsePPElse() {
999   // If a potential include guard has an #else, it's not an include guard.
1000   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1001     IncludeGuard = IG_Rejected;
1002   conditionalCompilationAlternative();
1003   if (PPBranchLevel > -1)
1004     --PPBranchLevel;
1005   parsePPUnknown();
1006   ++PPBranchLevel;
1007 }
1008 
1009 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1010 
1011 void UnwrappedLineParser::parsePPEndIf() {
1012   conditionalCompilationEnd();
1013   parsePPUnknown();
1014   // If the #endif of a potential include guard is the last thing in the file,
1015   // then we found an include guard.
1016   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1017       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1018     IncludeGuard = IG_Found;
1019 }
1020 
1021 void UnwrappedLineParser::parsePPDefine() {
1022   nextToken();
1023 
1024   if (!FormatTok->Tok.getIdentifierInfo()) {
1025     IncludeGuard = IG_Rejected;
1026     IncludeGuardToken = nullptr;
1027     parsePPUnknown();
1028     return;
1029   }
1030 
1031   if (IncludeGuard == IG_IfNdefed &&
1032       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1033     IncludeGuard = IG_Defined;
1034     IncludeGuardToken = nullptr;
1035     for (auto &Line : Lines) {
1036       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1037         IncludeGuard = IG_Rejected;
1038         break;
1039       }
1040     }
1041   }
1042 
1043   nextToken();
1044   if (FormatTok->Tok.getKind() == tok::l_paren &&
1045       !FormatTok->hasWhitespaceBefore()) {
1046     parseParens();
1047   }
1048   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1049     Line->Level += PPBranchLevel + 1;
1050   addUnwrappedLine();
1051   ++Line->Level;
1052 
1053   // Errors during a preprocessor directive can only affect the layout of the
1054   // preprocessor directive, and thus we ignore them. An alternative approach
1055   // would be to use the same approach we use on the file level (no
1056   // re-indentation if there was a structural error) within the macro
1057   // definition.
1058   parseFile();
1059 }
1060 
1061 void UnwrappedLineParser::parsePPUnknown() {
1062   do {
1063     nextToken();
1064   } while (!eof());
1065   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1066     Line->Level += PPBranchLevel + 1;
1067   addUnwrappedLine();
1068 }
1069 
1070 // Here we exclude certain tokens that are not usually the first token in an
1071 // unwrapped line. This is used in attempt to distinguish macro calls without
1072 // trailing semicolons from other constructs split to several lines.
1073 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1074   // Semicolon can be a null-statement, l_square can be a start of a macro or
1075   // a C++11 attribute, but this doesn't seem to be common.
1076   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1077          Tok.isNot(TT_AttributeSquare) &&
1078          // Tokens that can only be used as binary operators and a part of
1079          // overloaded operator names.
1080          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1081          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1082          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1083          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1084          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1085          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1086          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1087          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1088          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1089          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1090          Tok.isNot(tok::lesslessequal) &&
1091          // Colon is used in labels, base class lists, initializer lists,
1092          // range-based for loops, ternary operator, but should never be the
1093          // first token in an unwrapped line.
1094          Tok.isNot(tok::colon) &&
1095          // 'noexcept' is a trailing annotation.
1096          Tok.isNot(tok::kw_noexcept);
1097 }
1098 
1099 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1100                           const FormatToken *FormatTok) {
1101   // FIXME: This returns true for C/C++ keywords like 'struct'.
1102   return FormatTok->is(tok::identifier) &&
1103          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1104           !FormatTok->isOneOf(
1105               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1106               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1107               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1108               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1109               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1110               Keywords.kw_instanceof, Keywords.kw_interface,
1111               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1112 }
1113 
1114 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1115                                  const FormatToken *FormatTok) {
1116   return FormatTok->Tok.isLiteral() ||
1117          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1118          mustBeJSIdent(Keywords, FormatTok);
1119 }
1120 
1121 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1122 // when encountered after a value (see mustBeJSIdentOrValue).
1123 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1124                            const FormatToken *FormatTok) {
1125   return FormatTok->isOneOf(
1126       tok::kw_return, Keywords.kw_yield,
1127       // conditionals
1128       tok::kw_if, tok::kw_else,
1129       // loops
1130       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1131       // switch/case
1132       tok::kw_switch, tok::kw_case,
1133       // exceptions
1134       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1135       // declaration
1136       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1137       Keywords.kw_async, Keywords.kw_function,
1138       // import/export
1139       Keywords.kw_import, tok::kw_export);
1140 }
1141 
1142 // Checks whether a token is a type in K&R C (aka C78).
1143 static bool isC78Type(const FormatToken &Tok) {
1144   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1145                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1146                      tok::identifier);
1147 }
1148 
1149 // This function checks whether a token starts the first parameter declaration
1150 // in a K&R C (aka C78) function definition, e.g.:
1151 //   int f(a, b)
1152 //   short a, b;
1153 //   {
1154 //      return a + b;
1155 //   }
1156 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1157                                const FormatToken *FuncName) {
1158   assert(Tok);
1159   assert(Next);
1160   assert(FuncName);
1161 
1162   if (FuncName->isNot(tok::identifier))
1163     return false;
1164 
1165   const FormatToken *Prev = FuncName->Previous;
1166   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1167     return false;
1168 
1169   if (!isC78Type(*Tok) &&
1170       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1171     return false;
1172 
1173   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1174     return false;
1175 
1176   Tok = Tok->Previous;
1177   if (!Tok || Tok->isNot(tok::r_paren))
1178     return false;
1179 
1180   Tok = Tok->Previous;
1181   if (!Tok || Tok->isNot(tok::identifier))
1182     return false;
1183 
1184   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1185 }
1186 
1187 void UnwrappedLineParser::parseModuleImport() {
1188   nextToken();
1189   while (!eof()) {
1190     if (FormatTok->is(tok::colon)) {
1191       FormatTok->setType(TT_ModulePartitionColon);
1192     }
1193     // Handle import <foo/bar.h> as we would an include statement.
1194     else if (FormatTok->is(tok::less)) {
1195       nextToken();
1196       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1197         // Mark tokens up to the trailing line comments as implicit string
1198         // literals.
1199         if (FormatTok->isNot(tok::comment) &&
1200             !FormatTok->TokenText.startswith("//"))
1201           FormatTok->setType(TT_ImplicitStringLiteral);
1202         nextToken();
1203       }
1204     }
1205     if (FormatTok->is(tok::semi)) {
1206       nextToken();
1207       break;
1208     }
1209     nextToken();
1210   }
1211 
1212   addUnwrappedLine();
1213 }
1214 
1215 // readTokenWithJavaScriptASI reads the next token and terminates the current
1216 // line if JavaScript Automatic Semicolon Insertion must
1217 // happen between the current token and the next token.
1218 //
1219 // This method is conservative - it cannot cover all edge cases of JavaScript,
1220 // but only aims to correctly handle certain well known cases. It *must not*
1221 // return true in speculative cases.
1222 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1223   FormatToken *Previous = FormatTok;
1224   readToken();
1225   FormatToken *Next = FormatTok;
1226 
1227   bool IsOnSameLine =
1228       CommentsBeforeNextToken.empty()
1229           ? Next->NewlinesBefore == 0
1230           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1231   if (IsOnSameLine)
1232     return;
1233 
1234   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1235   bool PreviousStartsTemplateExpr =
1236       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1237   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1238     // If the line contains an '@' sign, the previous token might be an
1239     // annotation, which can precede another identifier/value.
1240     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1241       return LineNode.Tok->is(tok::at);
1242     });
1243     if (HasAt)
1244       return;
1245   }
1246   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1247     return addUnwrappedLine();
1248   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1249   bool NextEndsTemplateExpr =
1250       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1251   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1252       (PreviousMustBeValue ||
1253        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1254                          tok::minusminus)))
1255     return addUnwrappedLine();
1256   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1257       isJSDeclOrStmt(Keywords, Next))
1258     return addUnwrappedLine();
1259 }
1260 
1261 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1262                                                  bool IsTopLevel) {
1263   if (Style.Language == FormatStyle::LK_TableGen &&
1264       FormatTok->is(tok::pp_include)) {
1265     nextToken();
1266     if (FormatTok->is(tok::string_literal))
1267       nextToken();
1268     addUnwrappedLine();
1269     return;
1270   }
1271   switch (FormatTok->Tok.getKind()) {
1272   case tok::kw_asm:
1273     nextToken();
1274     if (FormatTok->is(tok::l_brace)) {
1275       FormatTok->setType(TT_InlineASMBrace);
1276       nextToken();
1277       while (FormatTok && FormatTok->isNot(tok::eof)) {
1278         if (FormatTok->is(tok::r_brace)) {
1279           FormatTok->setType(TT_InlineASMBrace);
1280           nextToken();
1281           addUnwrappedLine();
1282           break;
1283         }
1284         FormatTok->Finalized = true;
1285         nextToken();
1286       }
1287     }
1288     break;
1289   case tok::kw_namespace:
1290     parseNamespace();
1291     return;
1292   case tok::kw_public:
1293   case tok::kw_protected:
1294   case tok::kw_private:
1295     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1296         Style.isCSharp())
1297       nextToken();
1298     else
1299       parseAccessSpecifier();
1300     return;
1301   case tok::kw_if:
1302     if (Style.isJavaScript() && Line->MustBeDeclaration)
1303       // field/method declaration.
1304       break;
1305     parseIfThenElse(IfKind);
1306     return;
1307   case tok::kw_for:
1308   case tok::kw_while:
1309     if (Style.isJavaScript() && Line->MustBeDeclaration)
1310       // field/method declaration.
1311       break;
1312     parseForOrWhileLoop();
1313     return;
1314   case tok::kw_do:
1315     if (Style.isJavaScript() && Line->MustBeDeclaration)
1316       // field/method declaration.
1317       break;
1318     parseDoWhile();
1319     return;
1320   case tok::kw_switch:
1321     if (Style.isJavaScript() && Line->MustBeDeclaration)
1322       // 'switch: string' field declaration.
1323       break;
1324     parseSwitch();
1325     return;
1326   case tok::kw_default:
1327     if (Style.isJavaScript() && Line->MustBeDeclaration)
1328       // 'default: string' field declaration.
1329       break;
1330     nextToken();
1331     if (FormatTok->is(tok::colon)) {
1332       parseLabel();
1333       return;
1334     }
1335     // e.g. "default void f() {}" in a Java interface.
1336     break;
1337   case tok::kw_case:
1338     if (Style.isJavaScript() && Line->MustBeDeclaration)
1339       // 'case: string' field declaration.
1340       break;
1341     parseCaseLabel();
1342     return;
1343   case tok::kw_try:
1344   case tok::kw___try:
1345     if (Style.isJavaScript() && Line->MustBeDeclaration)
1346       // field/method declaration.
1347       break;
1348     parseTryCatch();
1349     return;
1350   case tok::kw_extern:
1351     nextToken();
1352     if (FormatTok->Tok.is(tok::string_literal)) {
1353       nextToken();
1354       if (FormatTok->Tok.is(tok::l_brace)) {
1355         if (Style.BraceWrapping.AfterExternBlock)
1356           addUnwrappedLine();
1357         // Either we indent or for backwards compatibility we follow the
1358         // AfterExternBlock style.
1359         unsigned AddLevels =
1360             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1361                     (Style.BraceWrapping.AfterExternBlock &&
1362                      Style.IndentExternBlock ==
1363                          FormatStyle::IEBS_AfterExternBlock)
1364                 ? 1u
1365                 : 0u;
1366         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1367         addUnwrappedLine();
1368         return;
1369       }
1370     }
1371     break;
1372   case tok::kw_export:
1373     if (Style.isJavaScript()) {
1374       parseJavaScriptEs6ImportExport();
1375       return;
1376     }
1377     if (!Style.isCpp())
1378       break;
1379     // Handle C++ "(inline|export) namespace".
1380     LLVM_FALLTHROUGH;
1381   case tok::kw_inline:
1382     nextToken();
1383     if (FormatTok->Tok.is(tok::kw_namespace)) {
1384       parseNamespace();
1385       return;
1386     }
1387     break;
1388   case tok::identifier:
1389     if (FormatTok->is(TT_ForEachMacro)) {
1390       parseForOrWhileLoop();
1391       return;
1392     }
1393     if (FormatTok->is(TT_MacroBlockBegin)) {
1394       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1395                  /*MunchSemi=*/false);
1396       return;
1397     }
1398     if (FormatTok->is(Keywords.kw_import)) {
1399       if (Style.isJavaScript()) {
1400         parseJavaScriptEs6ImportExport();
1401         return;
1402       }
1403       if (Style.Language == FormatStyle::LK_Proto) {
1404         nextToken();
1405         if (FormatTok->is(tok::kw_public))
1406           nextToken();
1407         if (!FormatTok->is(tok::string_literal))
1408           return;
1409         nextToken();
1410         if (FormatTok->is(tok::semi))
1411           nextToken();
1412         addUnwrappedLine();
1413         return;
1414       }
1415       if (Style.isCpp()) {
1416         parseModuleImport();
1417         return;
1418       }
1419     }
1420     if (Style.isCpp() &&
1421         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1422                            Keywords.kw_slots, Keywords.kw_qslots)) {
1423       nextToken();
1424       if (FormatTok->is(tok::colon)) {
1425         nextToken();
1426         addUnwrappedLine();
1427         return;
1428       }
1429     }
1430     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1431       parseStatementMacro();
1432       return;
1433     }
1434     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1435       parseNamespace();
1436       return;
1437     }
1438     // In all other cases, parse the declaration.
1439     break;
1440   default:
1441     break;
1442   }
1443   do {
1444     const FormatToken *Previous = FormatTok->Previous;
1445     switch (FormatTok->Tok.getKind()) {
1446     case tok::at:
1447       nextToken();
1448       if (FormatTok->Tok.is(tok::l_brace)) {
1449         nextToken();
1450         parseBracedList();
1451         break;
1452       } else if (Style.Language == FormatStyle::LK_Java &&
1453                  FormatTok->is(Keywords.kw_interface)) {
1454         nextToken();
1455         break;
1456       }
1457       switch (FormatTok->Tok.getObjCKeywordID()) {
1458       case tok::objc_public:
1459       case tok::objc_protected:
1460       case tok::objc_package:
1461       case tok::objc_private:
1462         return parseAccessSpecifier();
1463       case tok::objc_interface:
1464       case tok::objc_implementation:
1465         return parseObjCInterfaceOrImplementation();
1466       case tok::objc_protocol:
1467         if (parseObjCProtocol())
1468           return;
1469         break;
1470       case tok::objc_end:
1471         return; // Handled by the caller.
1472       case tok::objc_optional:
1473       case tok::objc_required:
1474         nextToken();
1475         addUnwrappedLine();
1476         return;
1477       case tok::objc_autoreleasepool:
1478         nextToken();
1479         if (FormatTok->Tok.is(tok::l_brace)) {
1480           if (Style.BraceWrapping.AfterControlStatement ==
1481               FormatStyle::BWACS_Always)
1482             addUnwrappedLine();
1483           parseBlock();
1484         }
1485         addUnwrappedLine();
1486         return;
1487       case tok::objc_synchronized:
1488         nextToken();
1489         if (FormatTok->Tok.is(tok::l_paren))
1490           // Skip synchronization object
1491           parseParens();
1492         if (FormatTok->Tok.is(tok::l_brace)) {
1493           if (Style.BraceWrapping.AfterControlStatement ==
1494               FormatStyle::BWACS_Always)
1495             addUnwrappedLine();
1496           parseBlock();
1497         }
1498         addUnwrappedLine();
1499         return;
1500       case tok::objc_try:
1501         // This branch isn't strictly necessary (the kw_try case below would
1502         // do this too after the tok::at is parsed above).  But be explicit.
1503         parseTryCatch();
1504         return;
1505       default:
1506         break;
1507       }
1508       break;
1509     case tok::kw_concept:
1510       parseConcept();
1511       return;
1512     case tok::kw_requires:
1513       parseRequires();
1514       return;
1515     case tok::kw_enum:
1516       // Ignore if this is part of "template <enum ...".
1517       if (Previous && Previous->is(tok::less)) {
1518         nextToken();
1519         break;
1520       }
1521 
1522       // parseEnum falls through and does not yet add an unwrapped line as an
1523       // enum definition can start a structural element.
1524       if (!parseEnum())
1525         break;
1526       // This only applies for C++.
1527       if (!Style.isCpp()) {
1528         addUnwrappedLine();
1529         return;
1530       }
1531       break;
1532     case tok::kw_typedef:
1533       nextToken();
1534       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1535                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1536                              Keywords.kw_CF_CLOSED_ENUM,
1537                              Keywords.kw_NS_CLOSED_ENUM))
1538         parseEnum();
1539       break;
1540     case tok::kw_struct:
1541     case tok::kw_union:
1542     case tok::kw_class:
1543       if (parseStructLike()) {
1544         return;
1545       }
1546       break;
1547     case tok::period:
1548       nextToken();
1549       // In Java, classes have an implicit static member "class".
1550       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1551           FormatTok->is(tok::kw_class))
1552         nextToken();
1553       if (Style.isJavaScript() && FormatTok &&
1554           FormatTok->Tok.getIdentifierInfo())
1555         // JavaScript only has pseudo keywords, all keywords are allowed to
1556         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1557         nextToken();
1558       break;
1559     case tok::semi:
1560       nextToken();
1561       addUnwrappedLine();
1562       return;
1563     case tok::r_brace:
1564       addUnwrappedLine();
1565       return;
1566     case tok::l_paren: {
1567       parseParens();
1568       // Break the unwrapped line if a K&R C function definition has a parameter
1569       // declaration.
1570       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1571         break;
1572       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1573         addUnwrappedLine();
1574         return;
1575       }
1576       break;
1577     }
1578     case tok::kw_operator:
1579       nextToken();
1580       if (FormatTok->isBinaryOperator())
1581         nextToken();
1582       break;
1583     case tok::caret:
1584       nextToken();
1585       if (FormatTok->Tok.isAnyIdentifier() ||
1586           FormatTok->isSimpleTypeSpecifier())
1587         nextToken();
1588       if (FormatTok->is(tok::l_paren))
1589         parseParens();
1590       if (FormatTok->is(tok::l_brace))
1591         parseChildBlock();
1592       break;
1593     case tok::l_brace:
1594       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1595         // A block outside of parentheses must be the last part of a
1596         // structural element.
1597         // FIXME: Figure out cases where this is not true, and add projections
1598         // for them (the one we know is missing are lambdas).
1599         if (Style.Language == FormatStyle::LK_Java &&
1600             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1601           // If necessary, we could set the type to something different than
1602           // TT_FunctionLBrace.
1603           if (Style.BraceWrapping.AfterControlStatement ==
1604               FormatStyle::BWACS_Always)
1605             addUnwrappedLine();
1606         } else if (Style.BraceWrapping.AfterFunction) {
1607           addUnwrappedLine();
1608         }
1609         FormatTok->setType(TT_FunctionLBrace);
1610         parseBlock();
1611         addUnwrappedLine();
1612         return;
1613       }
1614       // Otherwise this was a braced init list, and the structural
1615       // element continues.
1616       break;
1617     case tok::kw_try:
1618       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1619         // field/method declaration.
1620         nextToken();
1621         break;
1622       }
1623       // We arrive here when parsing function-try blocks.
1624       if (Style.BraceWrapping.AfterFunction)
1625         addUnwrappedLine();
1626       parseTryCatch();
1627       return;
1628     case tok::identifier: {
1629       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1630           Line->MustBeDeclaration) {
1631         addUnwrappedLine();
1632         parseCSharpGenericTypeConstraint();
1633         break;
1634       }
1635       if (FormatTok->is(TT_MacroBlockEnd)) {
1636         addUnwrappedLine();
1637         return;
1638       }
1639 
1640       // Function declarations (as opposed to function expressions) are parsed
1641       // on their own unwrapped line by continuing this loop. Function
1642       // expressions (functions that are not on their own line) must not create
1643       // a new unwrapped line, so they are special cased below.
1644       size_t TokenCount = Line->Tokens.size();
1645       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1646           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1647                                                      Keywords.kw_async)))) {
1648         tryToParseJSFunction();
1649         break;
1650       }
1651       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1652           FormatTok->is(Keywords.kw_interface)) {
1653         if (Style.isJavaScript()) {
1654           // In JavaScript/TypeScript, "interface" can be used as a standalone
1655           // identifier, e.g. in `var interface = 1;`. If "interface" is
1656           // followed by another identifier, it is very like to be an actual
1657           // interface declaration.
1658           unsigned StoredPosition = Tokens->getPosition();
1659           FormatToken *Next = Tokens->getNextToken();
1660           FormatTok = Tokens->setPosition(StoredPosition);
1661           if (!mustBeJSIdent(Keywords, Next)) {
1662             nextToken();
1663             break;
1664           }
1665         }
1666         parseRecord();
1667         addUnwrappedLine();
1668         return;
1669       }
1670 
1671       if (FormatTok->is(Keywords.kw_interface)) {
1672         if (parseStructLike()) {
1673           return;
1674         }
1675         break;
1676       }
1677 
1678       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1679         parseStatementMacro();
1680         return;
1681       }
1682 
1683       // See if the following token should start a new unwrapped line.
1684       StringRef Text = FormatTok->TokenText;
1685       nextToken();
1686 
1687       // JS doesn't have macros, and within classes colons indicate fields, not
1688       // labels.
1689       if (Style.isJavaScript())
1690         break;
1691 
1692       TokenCount = Line->Tokens.size();
1693       if (TokenCount == 1 ||
1694           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1695         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1696           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1697           parseLabel(!Style.IndentGotoLabels);
1698           return;
1699         }
1700         // Recognize function-like macro usages without trailing semicolon as
1701         // well as free-standing macros like Q_OBJECT.
1702         bool FunctionLike = FormatTok->is(tok::l_paren);
1703         if (FunctionLike)
1704           parseParens();
1705 
1706         bool FollowedByNewline =
1707             CommentsBeforeNextToken.empty()
1708                 ? FormatTok->NewlinesBefore > 0
1709                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1710 
1711         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1712             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1713           addUnwrappedLine();
1714           return;
1715         }
1716       }
1717       break;
1718     }
1719     case tok::equal:
1720       if ((Style.isJavaScript() || Style.isCSharp()) &&
1721           FormatTok->is(TT_FatArrow)) {
1722         tryToParseChildBlock();
1723         break;
1724       }
1725 
1726       nextToken();
1727       if (FormatTok->Tok.is(tok::l_brace)) {
1728         // Block kind should probably be set to BK_BracedInit for any language.
1729         // C# needs this change to ensure that array initialisers and object
1730         // initialisers are indented the same way.
1731         if (Style.isCSharp())
1732           FormatTok->setBlockKind(BK_BracedInit);
1733         nextToken();
1734         parseBracedList();
1735       } else if (Style.Language == FormatStyle::LK_Proto &&
1736                  FormatTok->Tok.is(tok::less)) {
1737         nextToken();
1738         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1739                         /*ClosingBraceKind=*/tok::greater);
1740       }
1741       break;
1742     case tok::l_square:
1743       parseSquare();
1744       break;
1745     case tok::kw_new:
1746       parseNew();
1747       break;
1748     default:
1749       nextToken();
1750       break;
1751     }
1752   } while (!eof());
1753 }
1754 
1755 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1756   assert(FormatTok->is(tok::l_brace));
1757   if (!Style.isCSharp())
1758     return false;
1759   // See if it's a property accessor.
1760   if (FormatTok->Previous->isNot(tok::identifier))
1761     return false;
1762 
1763   // See if we are inside a property accessor.
1764   //
1765   // Record the current tokenPosition so that we can advance and
1766   // reset the current token. `Next` is not set yet so we need
1767   // another way to advance along the token stream.
1768   unsigned int StoredPosition = Tokens->getPosition();
1769   FormatToken *Tok = Tokens->getNextToken();
1770 
1771   // A trivial property accessor is of the form:
1772   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1773   // Track these as they do not require line breaks to be introduced.
1774   bool HasGetOrSet = false;
1775   bool IsTrivialPropertyAccessor = true;
1776   while (!eof()) {
1777     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1778                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1779                      Keywords.kw_set)) {
1780       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1781         HasGetOrSet = true;
1782       Tok = Tokens->getNextToken();
1783       continue;
1784     }
1785     if (Tok->isNot(tok::r_brace))
1786       IsTrivialPropertyAccessor = false;
1787     break;
1788   }
1789 
1790   if (!HasGetOrSet) {
1791     Tokens->setPosition(StoredPosition);
1792     return false;
1793   }
1794 
1795   // Try to parse the property accessor:
1796   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1797   Tokens->setPosition(StoredPosition);
1798   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1799     addUnwrappedLine();
1800   nextToken();
1801   do {
1802     switch (FormatTok->Tok.getKind()) {
1803     case tok::r_brace:
1804       nextToken();
1805       if (FormatTok->is(tok::equal)) {
1806         while (!eof() && FormatTok->isNot(tok::semi))
1807           nextToken();
1808         nextToken();
1809       }
1810       addUnwrappedLine();
1811       return true;
1812     case tok::l_brace:
1813       ++Line->Level;
1814       parseBlock(/*MustBeDeclaration=*/true);
1815       addUnwrappedLine();
1816       --Line->Level;
1817       break;
1818     case tok::equal:
1819       if (FormatTok->is(TT_FatArrow)) {
1820         ++Line->Level;
1821         do {
1822           nextToken();
1823         } while (!eof() && FormatTok->isNot(tok::semi));
1824         nextToken();
1825         addUnwrappedLine();
1826         --Line->Level;
1827         break;
1828       }
1829       nextToken();
1830       break;
1831     default:
1832       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1833           !IsTrivialPropertyAccessor) {
1834         // Non-trivial get/set needs to be on its own line.
1835         addUnwrappedLine();
1836       }
1837       nextToken();
1838     }
1839   } while (!eof());
1840 
1841   // Unreachable for well-formed code (paired '{' and '}').
1842   return true;
1843 }
1844 
1845 bool UnwrappedLineParser::tryToParseLambda() {
1846   if (!Style.isCpp()) {
1847     nextToken();
1848     return false;
1849   }
1850   assert(FormatTok->is(tok::l_square));
1851   FormatToken &LSquare = *FormatTok;
1852   if (!tryToParseLambdaIntroducer())
1853     return false;
1854 
1855   bool SeenArrow = false;
1856 
1857   while (FormatTok->isNot(tok::l_brace)) {
1858     if (FormatTok->isSimpleTypeSpecifier()) {
1859       nextToken();
1860       continue;
1861     }
1862     switch (FormatTok->Tok.getKind()) {
1863     case tok::l_brace:
1864       break;
1865     case tok::l_paren:
1866       parseParens();
1867       break;
1868     case tok::l_square:
1869       parseSquare();
1870       break;
1871     case tok::amp:
1872     case tok::star:
1873     case tok::kw_const:
1874     case tok::comma:
1875     case tok::less:
1876     case tok::greater:
1877     case tok::identifier:
1878     case tok::numeric_constant:
1879     case tok::coloncolon:
1880     case tok::kw_class:
1881     case tok::kw_mutable:
1882     case tok::kw_noexcept:
1883     case tok::kw_template:
1884     case tok::kw_typename:
1885       nextToken();
1886       break;
1887     // Specialization of a template with an integer parameter can contain
1888     // arithmetic, logical, comparison and ternary operators.
1889     //
1890     // FIXME: This also accepts sequences of operators that are not in the scope
1891     // of a template argument list.
1892     //
1893     // In a C++ lambda a template type can only occur after an arrow. We use
1894     // this as an heuristic to distinguish between Objective-C expressions
1895     // followed by an `a->b` expression, such as:
1896     // ([obj func:arg] + a->b)
1897     // Otherwise the code below would parse as a lambda.
1898     //
1899     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1900     // explicit template lists: []<bool b = true && false>(U &&u){}
1901     case tok::plus:
1902     case tok::minus:
1903     case tok::exclaim:
1904     case tok::tilde:
1905     case tok::slash:
1906     case tok::percent:
1907     case tok::lessless:
1908     case tok::pipe:
1909     case tok::pipepipe:
1910     case tok::ampamp:
1911     case tok::caret:
1912     case tok::equalequal:
1913     case tok::exclaimequal:
1914     case tok::greaterequal:
1915     case tok::lessequal:
1916     case tok::question:
1917     case tok::colon:
1918     case tok::ellipsis:
1919     case tok::kw_true:
1920     case tok::kw_false:
1921       if (SeenArrow) {
1922         nextToken();
1923         break;
1924       }
1925       return true;
1926     case tok::arrow:
1927       // This might or might not actually be a lambda arrow (this could be an
1928       // ObjC method invocation followed by a dereferencing arrow). We might
1929       // reset this back to TT_Unknown in TokenAnnotator.
1930       FormatTok->setType(TT_LambdaArrow);
1931       SeenArrow = true;
1932       nextToken();
1933       break;
1934     default:
1935       return true;
1936     }
1937   }
1938   FormatTok->setType(TT_LambdaLBrace);
1939   LSquare.setType(TT_LambdaLSquare);
1940   parseChildBlock();
1941   return true;
1942 }
1943 
1944 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1945   const FormatToken *Previous = FormatTok->Previous;
1946   if (Previous &&
1947       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1948                          tok::kw_delete, tok::l_square) ||
1949        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1950        Previous->isSimpleTypeSpecifier())) {
1951     nextToken();
1952     return false;
1953   }
1954   nextToken();
1955   if (FormatTok->is(tok::l_square)) {
1956     return false;
1957   }
1958   parseSquare(/*LambdaIntroducer=*/true);
1959   return true;
1960 }
1961 
1962 void UnwrappedLineParser::tryToParseJSFunction() {
1963   assert(FormatTok->is(Keywords.kw_function) ||
1964          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1965   if (FormatTok->is(Keywords.kw_async))
1966     nextToken();
1967   // Consume "function".
1968   nextToken();
1969 
1970   // Consume * (generator function). Treat it like C++'s overloaded operators.
1971   if (FormatTok->is(tok::star)) {
1972     FormatTok->setType(TT_OverloadedOperator);
1973     nextToken();
1974   }
1975 
1976   // Consume function name.
1977   if (FormatTok->is(tok::identifier))
1978     nextToken();
1979 
1980   if (FormatTok->isNot(tok::l_paren))
1981     return;
1982 
1983   // Parse formal parameter list.
1984   parseParens();
1985 
1986   if (FormatTok->is(tok::colon)) {
1987     // Parse a type definition.
1988     nextToken();
1989 
1990     // Eat the type declaration. For braced inline object types, balance braces,
1991     // otherwise just parse until finding an l_brace for the function body.
1992     if (FormatTok->is(tok::l_brace))
1993       tryToParseBracedList();
1994     else
1995       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1996         nextToken();
1997   }
1998 
1999   if (FormatTok->is(tok::semi))
2000     return;
2001 
2002   parseChildBlock();
2003 }
2004 
2005 bool UnwrappedLineParser::tryToParseBracedList() {
2006   if (FormatTok->is(BK_Unknown))
2007     calculateBraceTypes();
2008   assert(FormatTok->isNot(BK_Unknown));
2009   if (FormatTok->is(BK_Block))
2010     return false;
2011   nextToken();
2012   parseBracedList();
2013   return true;
2014 }
2015 
2016 bool UnwrappedLineParser::tryToParseChildBlock() {
2017   assert(Style.isJavaScript() || Style.isCSharp());
2018   assert(FormatTok->is(TT_FatArrow));
2019   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2020   // They always start an expression or a child block if followed by a curly
2021   // brace.
2022   nextToken();
2023   if (FormatTok->isNot(tok::l_brace))
2024     return false;
2025   parseChildBlock();
2026   return true;
2027 }
2028 
2029 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2030                                           bool IsEnum,
2031                                           tok::TokenKind ClosingBraceKind) {
2032   bool HasError = false;
2033 
2034   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2035   // replace this by using parseAssignmentExpression() inside.
2036   do {
2037     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2038         tryToParseChildBlock())
2039       continue;
2040     if (Style.isJavaScript()) {
2041       if (FormatTok->is(Keywords.kw_function) ||
2042           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2043         tryToParseJSFunction();
2044         continue;
2045       }
2046       if (FormatTok->is(tok::l_brace)) {
2047         // Could be a method inside of a braced list `{a() { return 1; }}`.
2048         if (tryToParseBracedList())
2049           continue;
2050         parseChildBlock();
2051       }
2052     }
2053     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2054       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2055         addUnwrappedLine();
2056       nextToken();
2057       return !HasError;
2058     }
2059     switch (FormatTok->Tok.getKind()) {
2060     case tok::l_square:
2061       if (Style.isCSharp())
2062         parseSquare();
2063       else
2064         tryToParseLambda();
2065       break;
2066     case tok::l_paren:
2067       parseParens();
2068       // JavaScript can just have free standing methods and getters/setters in
2069       // object literals. Detect them by a "{" following ")".
2070       if (Style.isJavaScript()) {
2071         if (FormatTok->is(tok::l_brace))
2072           parseChildBlock();
2073         break;
2074       }
2075       break;
2076     case tok::l_brace:
2077       // Assume there are no blocks inside a braced init list apart
2078       // from the ones we explicitly parse out (like lambdas).
2079       FormatTok->setBlockKind(BK_BracedInit);
2080       nextToken();
2081       parseBracedList();
2082       break;
2083     case tok::less:
2084       if (Style.Language == FormatStyle::LK_Proto) {
2085         nextToken();
2086         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2087                         /*ClosingBraceKind=*/tok::greater);
2088       } else {
2089         nextToken();
2090       }
2091       break;
2092     case tok::semi:
2093       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2094       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2095       // used for error recovery if we have otherwise determined that this is
2096       // a braced list.
2097       if (Style.isJavaScript()) {
2098         nextToken();
2099         break;
2100       }
2101       HasError = true;
2102       if (!ContinueOnSemicolons)
2103         return !HasError;
2104       nextToken();
2105       break;
2106     case tok::comma:
2107       nextToken();
2108       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2109         addUnwrappedLine();
2110       break;
2111     default:
2112       nextToken();
2113       break;
2114     }
2115   } while (!eof());
2116   return false;
2117 }
2118 
2119 void UnwrappedLineParser::parseParens() {
2120   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2121   nextToken();
2122   do {
2123     switch (FormatTok->Tok.getKind()) {
2124     case tok::l_paren:
2125       parseParens();
2126       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2127         parseChildBlock();
2128       break;
2129     case tok::r_paren:
2130       nextToken();
2131       return;
2132     case tok::r_brace:
2133       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2134       return;
2135     case tok::l_square:
2136       tryToParseLambda();
2137       break;
2138     case tok::l_brace:
2139       if (!tryToParseBracedList())
2140         parseChildBlock();
2141       break;
2142     case tok::at:
2143       nextToken();
2144       if (FormatTok->Tok.is(tok::l_brace)) {
2145         nextToken();
2146         parseBracedList();
2147       }
2148       break;
2149     case tok::equal:
2150       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2151         tryToParseChildBlock();
2152       else
2153         nextToken();
2154       break;
2155     case tok::kw_class:
2156       if (Style.isJavaScript())
2157         parseRecord(/*ParseAsExpr=*/true);
2158       else
2159         nextToken();
2160       break;
2161     case tok::identifier:
2162       if (Style.isJavaScript() &&
2163           (FormatTok->is(Keywords.kw_function) ||
2164            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2165         tryToParseJSFunction();
2166       else
2167         nextToken();
2168       break;
2169     default:
2170       nextToken();
2171       break;
2172     }
2173   } while (!eof());
2174 }
2175 
2176 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2177   if (!LambdaIntroducer) {
2178     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2179     if (tryToParseLambda())
2180       return;
2181   }
2182   do {
2183     switch (FormatTok->Tok.getKind()) {
2184     case tok::l_paren:
2185       parseParens();
2186       break;
2187     case tok::r_square:
2188       nextToken();
2189       return;
2190     case tok::r_brace:
2191       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2192       return;
2193     case tok::l_square:
2194       parseSquare();
2195       break;
2196     case tok::l_brace: {
2197       if (!tryToParseBracedList())
2198         parseChildBlock();
2199       break;
2200     }
2201     case tok::at:
2202       nextToken();
2203       if (FormatTok->Tok.is(tok::l_brace)) {
2204         nextToken();
2205         parseBracedList();
2206       }
2207       break;
2208     default:
2209       nextToken();
2210       break;
2211     }
2212   } while (!eof());
2213 }
2214 
2215 void UnwrappedLineParser::keepAncestorBraces() {
2216   if (!Style.RemoveBracesLLVM)
2217     return;
2218 
2219   const int MaxNestingLevels = 2;
2220   const int Size = NestedTooDeep.size();
2221   if (Size >= MaxNestingLevels)
2222     NestedTooDeep[Size - MaxNestingLevels] = true;
2223   NestedTooDeep.push_back(false);
2224 }
2225 
2226 static void markOptionalBraces(FormatToken *LeftBrace) {
2227   if (!LeftBrace)
2228     return;
2229 
2230   assert(LeftBrace->is(tok::l_brace));
2231 
2232   FormatToken *RightBrace = LeftBrace->MatchingParen;
2233   if (!RightBrace) {
2234     assert(!LeftBrace->Optional);
2235     return;
2236   }
2237 
2238   assert(RightBrace->is(tok::r_brace));
2239   assert(RightBrace->MatchingParen == LeftBrace);
2240   assert(LeftBrace->Optional == RightBrace->Optional);
2241 
2242   LeftBrace->Optional = true;
2243   RightBrace->Optional = true;
2244 }
2245 
2246 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2247                                                   bool KeepBraces) {
2248   auto HandleAttributes = [this]() {
2249     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2250     if (FormatTok->is(TT_AttributeMacro))
2251       nextToken();
2252     // Handle [[likely]] / [[unlikely]] attributes.
2253     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2254       parseSquare();
2255   };
2256 
2257   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2258   nextToken();
2259   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2260     nextToken();
2261   if (FormatTok->Tok.is(tok::l_paren))
2262     parseParens();
2263   HandleAttributes();
2264 
2265   bool NeedsUnwrappedLine = false;
2266   keepAncestorBraces();
2267 
2268   FormatToken *IfLeftBrace = nullptr;
2269   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2270 
2271   if (FormatTok->Tok.is(tok::l_brace)) {
2272     IfLeftBrace = FormatTok;
2273     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2274     IfBlockKind = parseBlock();
2275     if (Style.BraceWrapping.BeforeElse)
2276       addUnwrappedLine();
2277     else
2278       NeedsUnwrappedLine = true;
2279   } else {
2280     addUnwrappedLine();
2281     ++Line->Level;
2282     parseStructuralElement();
2283     --Line->Level;
2284   }
2285 
2286   bool KeepIfBraces = false;
2287   if (Style.RemoveBracesLLVM) {
2288     assert(!NestedTooDeep.empty());
2289     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2290                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2291                    IfBlockKind == IfStmtKind::IfElseIf;
2292   }
2293 
2294   FormatToken *ElseLeftBrace = nullptr;
2295   IfStmtKind Kind = IfStmtKind::IfOnly;
2296 
2297   if (FormatTok->Tok.is(tok::kw_else)) {
2298     if (Style.RemoveBracesLLVM) {
2299       NestedTooDeep.back() = false;
2300       Kind = IfStmtKind::IfElse;
2301     }
2302     nextToken();
2303     HandleAttributes();
2304     if (FormatTok->Tok.is(tok::l_brace)) {
2305       ElseLeftBrace = FormatTok;
2306       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2307       if (parseBlock() == IfStmtKind::IfOnly)
2308         Kind = IfStmtKind::IfElseIf;
2309       addUnwrappedLine();
2310     } else if (FormatTok->Tok.is(tok::kw_if)) {
2311       FormatToken *Previous = Tokens->getPreviousToken();
2312       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2313       if (IsPrecededByComment) {
2314         addUnwrappedLine();
2315         ++Line->Level;
2316       }
2317       bool TooDeep = true;
2318       if (Style.RemoveBracesLLVM) {
2319         Kind = IfStmtKind::IfElseIf;
2320         TooDeep = NestedTooDeep.pop_back_val();
2321       }
2322       ElseLeftBrace =
2323           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2324       if (Style.RemoveBracesLLVM)
2325         NestedTooDeep.push_back(TooDeep);
2326       if (IsPrecededByComment)
2327         --Line->Level;
2328     } else {
2329       addUnwrappedLine();
2330       ++Line->Level;
2331       parseStructuralElement();
2332       if (FormatTok->is(tok::eof))
2333         addUnwrappedLine();
2334       --Line->Level;
2335     }
2336   } else {
2337     if (Style.RemoveBracesLLVM)
2338       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2339     if (NeedsUnwrappedLine)
2340       addUnwrappedLine();
2341   }
2342 
2343   if (!Style.RemoveBracesLLVM)
2344     return nullptr;
2345 
2346   assert(!NestedTooDeep.empty());
2347   const bool KeepElseBraces =
2348       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2349 
2350   NestedTooDeep.pop_back();
2351 
2352   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2353     markOptionalBraces(IfLeftBrace);
2354     markOptionalBraces(ElseLeftBrace);
2355   } else if (IfLeftBrace) {
2356     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2357     if (IfRightBrace) {
2358       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2359       assert(!IfLeftBrace->Optional);
2360       assert(!IfRightBrace->Optional);
2361       IfLeftBrace->MatchingParen = nullptr;
2362       IfRightBrace->MatchingParen = nullptr;
2363     }
2364   }
2365 
2366   if (IfKind)
2367     *IfKind = Kind;
2368 
2369   return IfLeftBrace;
2370 }
2371 
2372 void UnwrappedLineParser::parseTryCatch() {
2373   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2374   nextToken();
2375   bool NeedsUnwrappedLine = false;
2376   if (FormatTok->is(tok::colon)) {
2377     // We are in a function try block, what comes is an initializer list.
2378     nextToken();
2379 
2380     // In case identifiers were removed by clang-tidy, what might follow is
2381     // multiple commas in sequence - before the first identifier.
2382     while (FormatTok->is(tok::comma))
2383       nextToken();
2384 
2385     while (FormatTok->is(tok::identifier)) {
2386       nextToken();
2387       if (FormatTok->is(tok::l_paren))
2388         parseParens();
2389       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2390           FormatTok->is(tok::l_brace)) {
2391         do {
2392           nextToken();
2393         } while (!FormatTok->is(tok::r_brace));
2394         nextToken();
2395       }
2396 
2397       // In case identifiers were removed by clang-tidy, what might follow is
2398       // multiple commas in sequence - after the first identifier.
2399       while (FormatTok->is(tok::comma))
2400         nextToken();
2401     }
2402   }
2403   // Parse try with resource.
2404   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2405     parseParens();
2406   }
2407 
2408   keepAncestorBraces();
2409 
2410   if (FormatTok->is(tok::l_brace)) {
2411     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2412     parseBlock();
2413     if (Style.BraceWrapping.BeforeCatch) {
2414       addUnwrappedLine();
2415     } else {
2416       NeedsUnwrappedLine = true;
2417     }
2418   } else if (!FormatTok->is(tok::kw_catch)) {
2419     // The C++ standard requires a compound-statement after a try.
2420     // If there's none, we try to assume there's a structuralElement
2421     // and try to continue.
2422     addUnwrappedLine();
2423     ++Line->Level;
2424     parseStructuralElement();
2425     --Line->Level;
2426   }
2427   while (true) {
2428     if (FormatTok->is(tok::at))
2429       nextToken();
2430     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2431                              tok::kw___finally) ||
2432           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2433            FormatTok->is(Keywords.kw_finally)) ||
2434           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2435            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2436       break;
2437     nextToken();
2438     while (FormatTok->isNot(tok::l_brace)) {
2439       if (FormatTok->is(tok::l_paren)) {
2440         parseParens();
2441         continue;
2442       }
2443       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2444         if (Style.RemoveBracesLLVM)
2445           NestedTooDeep.pop_back();
2446         return;
2447       }
2448       nextToken();
2449     }
2450     NeedsUnwrappedLine = false;
2451     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2452     parseBlock();
2453     if (Style.BraceWrapping.BeforeCatch)
2454       addUnwrappedLine();
2455     else
2456       NeedsUnwrappedLine = true;
2457   }
2458 
2459   if (Style.RemoveBracesLLVM)
2460     NestedTooDeep.pop_back();
2461 
2462   if (NeedsUnwrappedLine)
2463     addUnwrappedLine();
2464 }
2465 
2466 void UnwrappedLineParser::parseNamespace() {
2467   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2468          "'namespace' expected");
2469 
2470   const FormatToken &InitialToken = *FormatTok;
2471   nextToken();
2472   if (InitialToken.is(TT_NamespaceMacro)) {
2473     parseParens();
2474   } else {
2475     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2476                               tok::l_square, tok::period)) {
2477       if (FormatTok->is(tok::l_square))
2478         parseSquare();
2479       else
2480         nextToken();
2481     }
2482   }
2483   if (FormatTok->Tok.is(tok::l_brace)) {
2484     if (ShouldBreakBeforeBrace(Style, InitialToken))
2485       addUnwrappedLine();
2486 
2487     unsigned AddLevels =
2488         Style.NamespaceIndentation == FormatStyle::NI_All ||
2489                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2490                  DeclarationScopeStack.size() > 1)
2491             ? 1u
2492             : 0u;
2493     bool ManageWhitesmithsBraces =
2494         AddLevels == 0u &&
2495         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2496 
2497     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2498     // the whole block.
2499     if (ManageWhitesmithsBraces)
2500       ++Line->Level;
2501 
2502     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2503                /*MunchSemi=*/true,
2504                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2505 
2506     // Munch the semicolon after a namespace. This is more common than one would
2507     // think. Putting the semicolon into its own line is very ugly.
2508     if (FormatTok->Tok.is(tok::semi))
2509       nextToken();
2510 
2511     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2512 
2513     if (ManageWhitesmithsBraces)
2514       --Line->Level;
2515   }
2516   // FIXME: Add error handling.
2517 }
2518 
2519 void UnwrappedLineParser::parseNew() {
2520   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2521   nextToken();
2522 
2523   if (Style.isCSharp()) {
2524     do {
2525       if (FormatTok->is(tok::l_brace))
2526         parseBracedList();
2527 
2528       if (FormatTok->isOneOf(tok::semi, tok::comma))
2529         return;
2530 
2531       nextToken();
2532     } while (!eof());
2533   }
2534 
2535   if (Style.Language != FormatStyle::LK_Java)
2536     return;
2537 
2538   // In Java, we can parse everything up to the parens, which aren't optional.
2539   do {
2540     // There should not be a ;, { or } before the new's open paren.
2541     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2542       return;
2543 
2544     // Consume the parens.
2545     if (FormatTok->is(tok::l_paren)) {
2546       parseParens();
2547 
2548       // If there is a class body of an anonymous class, consume that as child.
2549       if (FormatTok->is(tok::l_brace))
2550         parseChildBlock();
2551       return;
2552     }
2553     nextToken();
2554   } while (!eof());
2555 }
2556 
2557 void UnwrappedLineParser::parseForOrWhileLoop() {
2558   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2559          "'for', 'while' or foreach macro expected");
2560   nextToken();
2561   // JS' for await ( ...
2562   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2563     nextToken();
2564   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2565     nextToken();
2566   if (FormatTok->Tok.is(tok::l_paren))
2567     parseParens();
2568 
2569   keepAncestorBraces();
2570 
2571   if (FormatTok->Tok.is(tok::l_brace)) {
2572     FormatToken *LeftBrace = FormatTok;
2573     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2574     parseBlock();
2575     if (Style.RemoveBracesLLVM) {
2576       assert(!NestedTooDeep.empty());
2577       if (!NestedTooDeep.back())
2578         markOptionalBraces(LeftBrace);
2579     }
2580     addUnwrappedLine();
2581   } else {
2582     addUnwrappedLine();
2583     ++Line->Level;
2584     parseStructuralElement();
2585     --Line->Level;
2586   }
2587 
2588   if (Style.RemoveBracesLLVM)
2589     NestedTooDeep.pop_back();
2590 }
2591 
2592 void UnwrappedLineParser::parseDoWhile() {
2593   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2594   nextToken();
2595 
2596   keepAncestorBraces();
2597 
2598   if (FormatTok->Tok.is(tok::l_brace)) {
2599     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2600     parseBlock();
2601     if (Style.BraceWrapping.BeforeWhile)
2602       addUnwrappedLine();
2603   } else {
2604     addUnwrappedLine();
2605     ++Line->Level;
2606     parseStructuralElement();
2607     --Line->Level;
2608   }
2609 
2610   if (Style.RemoveBracesLLVM)
2611     NestedTooDeep.pop_back();
2612 
2613   // FIXME: Add error handling.
2614   if (!FormatTok->Tok.is(tok::kw_while)) {
2615     addUnwrappedLine();
2616     return;
2617   }
2618 
2619   // If in Whitesmiths mode, the line with the while() needs to be indented
2620   // to the same level as the block.
2621   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2622     ++Line->Level;
2623 
2624   nextToken();
2625   parseStructuralElement();
2626 }
2627 
2628 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2629   nextToken();
2630   unsigned OldLineLevel = Line->Level;
2631   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2632     --Line->Level;
2633   if (LeftAlignLabel)
2634     Line->Level = 0;
2635 
2636   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2637       FormatTok->Tok.is(tok::l_brace)) {
2638 
2639     CompoundStatementIndenter Indenter(this, Line->Level,
2640                                        Style.BraceWrapping.AfterCaseLabel,
2641                                        Style.BraceWrapping.IndentBraces);
2642     parseBlock();
2643     if (FormatTok->Tok.is(tok::kw_break)) {
2644       if (Style.BraceWrapping.AfterControlStatement ==
2645           FormatStyle::BWACS_Always) {
2646         addUnwrappedLine();
2647         if (!Style.IndentCaseBlocks &&
2648             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2649           ++Line->Level;
2650         }
2651       }
2652       parseStructuralElement();
2653     }
2654     addUnwrappedLine();
2655   } else {
2656     if (FormatTok->is(tok::semi))
2657       nextToken();
2658     addUnwrappedLine();
2659   }
2660   Line->Level = OldLineLevel;
2661   if (FormatTok->isNot(tok::l_brace)) {
2662     parseStructuralElement();
2663     addUnwrappedLine();
2664   }
2665 }
2666 
2667 void UnwrappedLineParser::parseCaseLabel() {
2668   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2669 
2670   // FIXME: fix handling of complex expressions here.
2671   do {
2672     nextToken();
2673   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2674   parseLabel();
2675 }
2676 
2677 void UnwrappedLineParser::parseSwitch() {
2678   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2679   nextToken();
2680   if (FormatTok->Tok.is(tok::l_paren))
2681     parseParens();
2682 
2683   keepAncestorBraces();
2684 
2685   if (FormatTok->Tok.is(tok::l_brace)) {
2686     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2687     parseBlock();
2688     addUnwrappedLine();
2689   } else {
2690     addUnwrappedLine();
2691     ++Line->Level;
2692     parseStructuralElement();
2693     --Line->Level;
2694   }
2695 
2696   if (Style.RemoveBracesLLVM)
2697     NestedTooDeep.pop_back();
2698 }
2699 
2700 void UnwrappedLineParser::parseAccessSpecifier() {
2701   nextToken();
2702   // Understand Qt's slots.
2703   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2704     nextToken();
2705   // Otherwise, we don't know what it is, and we'd better keep the next token.
2706   if (FormatTok->Tok.is(tok::colon))
2707     nextToken();
2708   addUnwrappedLine();
2709 }
2710 
2711 void UnwrappedLineParser::parseConcept() {
2712   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2713   nextToken();
2714   if (!FormatTok->Tok.is(tok::identifier))
2715     return;
2716   nextToken();
2717   if (!FormatTok->Tok.is(tok::equal))
2718     return;
2719   nextToken();
2720   if (FormatTok->Tok.is(tok::kw_requires)) {
2721     nextToken();
2722     parseRequiresExpression(Line->Level);
2723   } else {
2724     parseConstraintExpression(Line->Level);
2725   }
2726 }
2727 
2728 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2729   // requires (R range)
2730   if (FormatTok->Tok.is(tok::l_paren)) {
2731     parseParens();
2732     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2733       addUnwrappedLine();
2734       --Line->Level;
2735     }
2736   }
2737 
2738   if (FormatTok->Tok.is(tok::l_brace)) {
2739     if (Style.BraceWrapping.AfterFunction)
2740       addUnwrappedLine();
2741     FormatTok->setType(TT_FunctionLBrace);
2742     parseBlock();
2743     addUnwrappedLine();
2744   } else {
2745     parseConstraintExpression(OriginalLevel);
2746   }
2747 }
2748 
2749 void UnwrappedLineParser::parseConstraintExpression(
2750     unsigned int OriginalLevel) {
2751   // requires Id<T> && Id<T> || Id<T>
2752   while (
2753       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2754     nextToken();
2755     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2756                               tok::greater, tok::comma, tok::ellipsis)) {
2757       if (FormatTok->Tok.is(tok::less)) {
2758         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2759                         /*ClosingBraceKind=*/tok::greater);
2760         continue;
2761       }
2762       nextToken();
2763     }
2764     if (FormatTok->Tok.is(tok::kw_requires)) {
2765       parseRequiresExpression(OriginalLevel);
2766     }
2767     if (FormatTok->Tok.is(tok::less)) {
2768       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2769                       /*ClosingBraceKind=*/tok::greater);
2770     }
2771 
2772     if (FormatTok->Tok.is(tok::l_paren)) {
2773       parseParens();
2774     }
2775     if (FormatTok->Tok.is(tok::l_brace)) {
2776       if (Style.BraceWrapping.AfterFunction)
2777         addUnwrappedLine();
2778       FormatTok->setType(TT_FunctionLBrace);
2779       parseBlock();
2780     }
2781     if (FormatTok->Tok.is(tok::semi)) {
2782       // Eat any trailing semi.
2783       nextToken();
2784       addUnwrappedLine();
2785     }
2786     if (FormatTok->Tok.is(tok::colon)) {
2787       return;
2788     }
2789     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2790       if (FormatTok->Previous &&
2791           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2792                                         tok::coloncolon)) {
2793         addUnwrappedLine();
2794       }
2795       if (Style.IndentRequires && OriginalLevel != Line->Level) {
2796         --Line->Level;
2797       }
2798       break;
2799     } else {
2800       FormatTok->setType(TT_ConstraintJunctions);
2801     }
2802 
2803     nextToken();
2804   }
2805 }
2806 
2807 void UnwrappedLineParser::parseRequires() {
2808   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2809 
2810   unsigned OriginalLevel = Line->Level;
2811   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2812     addUnwrappedLine();
2813     if (Style.IndentRequires) {
2814       ++Line->Level;
2815     }
2816   }
2817   nextToken();
2818 
2819   parseRequiresExpression(OriginalLevel);
2820 }
2821 
2822 bool UnwrappedLineParser::parseEnum() {
2823   const FormatToken &InitialToken = *FormatTok;
2824 
2825   // Won't be 'enum' for NS_ENUMs.
2826   if (FormatTok->Tok.is(tok::kw_enum))
2827     nextToken();
2828 
2829   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2830   // declarations. An "enum" keyword followed by a colon would be a syntax
2831   // error and thus assume it is just an identifier.
2832   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
2833     return false;
2834 
2835   // In protobuf, "enum" can be used as a field name.
2836   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2837     return false;
2838 
2839   // Eat up enum class ...
2840   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2841     nextToken();
2842 
2843   while (FormatTok->Tok.getIdentifierInfo() ||
2844          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2845                             tok::greater, tok::comma, tok::question)) {
2846     nextToken();
2847     // We can have macros or attributes in between 'enum' and the enum name.
2848     if (FormatTok->is(tok::l_paren))
2849       parseParens();
2850     if (FormatTok->is(tok::identifier)) {
2851       nextToken();
2852       // If there are two identifiers in a row, this is likely an elaborate
2853       // return type. In Java, this can be "implements", etc.
2854       if (Style.isCpp() && FormatTok->is(tok::identifier))
2855         return false;
2856     }
2857   }
2858 
2859   // Just a declaration or something is wrong.
2860   if (FormatTok->isNot(tok::l_brace))
2861     return true;
2862   FormatTok->setBlockKind(BK_Block);
2863 
2864   if (Style.Language == FormatStyle::LK_Java) {
2865     // Java enums are different.
2866     parseJavaEnumBody();
2867     return true;
2868   }
2869   if (Style.Language == FormatStyle::LK_Proto) {
2870     parseBlock(/*MustBeDeclaration=*/true);
2871     return true;
2872   }
2873 
2874   if (!Style.AllowShortEnumsOnASingleLine &&
2875       ShouldBreakBeforeBrace(Style, InitialToken))
2876     addUnwrappedLine();
2877   // Parse enum body.
2878   nextToken();
2879   if (!Style.AllowShortEnumsOnASingleLine) {
2880     addUnwrappedLine();
2881     Line->Level += 1;
2882   }
2883   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2884                                    /*IsEnum=*/true);
2885   if (!Style.AllowShortEnumsOnASingleLine)
2886     Line->Level -= 1;
2887   if (HasError) {
2888     if (FormatTok->is(tok::semi))
2889       nextToken();
2890     addUnwrappedLine();
2891   }
2892   return true;
2893 
2894   // There is no addUnwrappedLine() here so that we fall through to parsing a
2895   // structural element afterwards. Thus, in "enum A {} n, m;",
2896   // "} n, m;" will end up in one unwrapped line.
2897 }
2898 
2899 bool UnwrappedLineParser::parseStructLike() {
2900   // parseRecord falls through and does not yet add an unwrapped line as a
2901   // record declaration or definition can start a structural element.
2902   parseRecord();
2903   // This does not apply to Java, JavaScript and C#.
2904   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
2905       Style.isCSharp()) {
2906     if (FormatTok->is(tok::semi))
2907       nextToken();
2908     addUnwrappedLine();
2909     return true;
2910   }
2911   return false;
2912 }
2913 
2914 namespace {
2915 // A class used to set and restore the Token position when peeking
2916 // ahead in the token source.
2917 class ScopedTokenPosition {
2918   unsigned StoredPosition;
2919   FormatTokenSource *Tokens;
2920 
2921 public:
2922   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2923     assert(Tokens && "Tokens expected to not be null");
2924     StoredPosition = Tokens->getPosition();
2925   }
2926 
2927   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2928 };
2929 } // namespace
2930 
2931 // Look to see if we have [[ by looking ahead, if
2932 // its not then rewind to the original position.
2933 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2934   ScopedTokenPosition AutoPosition(Tokens);
2935   FormatToken *Tok = Tokens->getNextToken();
2936   // We already read the first [ check for the second.
2937   if (!Tok->is(tok::l_square)) {
2938     return false;
2939   }
2940   // Double check that the attribute is just something
2941   // fairly simple.
2942   while (Tok->isNot(tok::eof)) {
2943     if (Tok->is(tok::r_square)) {
2944       break;
2945     }
2946     Tok = Tokens->getNextToken();
2947   }
2948   if (Tok->is(tok::eof))
2949     return false;
2950   Tok = Tokens->getNextToken();
2951   if (!Tok->is(tok::r_square)) {
2952     return false;
2953   }
2954   Tok = Tokens->getNextToken();
2955   if (Tok->is(tok::semi)) {
2956     return false;
2957   }
2958   return true;
2959 }
2960 
2961 void UnwrappedLineParser::parseJavaEnumBody() {
2962   // Determine whether the enum is simple, i.e. does not have a semicolon or
2963   // constants with class bodies. Simple enums can be formatted like braced
2964   // lists, contracted to a single line, etc.
2965   unsigned StoredPosition = Tokens->getPosition();
2966   bool IsSimple = true;
2967   FormatToken *Tok = Tokens->getNextToken();
2968   while (!Tok->is(tok::eof)) {
2969     if (Tok->is(tok::r_brace))
2970       break;
2971     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2972       IsSimple = false;
2973       break;
2974     }
2975     // FIXME: This will also mark enums with braces in the arguments to enum
2976     // constants as "not simple". This is probably fine in practice, though.
2977     Tok = Tokens->getNextToken();
2978   }
2979   FormatTok = Tokens->setPosition(StoredPosition);
2980 
2981   if (IsSimple) {
2982     nextToken();
2983     parseBracedList();
2984     addUnwrappedLine();
2985     return;
2986   }
2987 
2988   // Parse the body of a more complex enum.
2989   // First add a line for everything up to the "{".
2990   nextToken();
2991   addUnwrappedLine();
2992   ++Line->Level;
2993 
2994   // Parse the enum constants.
2995   while (FormatTok) {
2996     if (FormatTok->is(tok::l_brace)) {
2997       // Parse the constant's class body.
2998       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2999                  /*MunchSemi=*/false);
3000     } else if (FormatTok->is(tok::l_paren)) {
3001       parseParens();
3002     } else if (FormatTok->is(tok::comma)) {
3003       nextToken();
3004       addUnwrappedLine();
3005     } else if (FormatTok->is(tok::semi)) {
3006       nextToken();
3007       addUnwrappedLine();
3008       break;
3009     } else if (FormatTok->is(tok::r_brace)) {
3010       addUnwrappedLine();
3011       break;
3012     } else {
3013       nextToken();
3014     }
3015   }
3016 
3017   // Parse the class body after the enum's ";" if any.
3018   parseLevel(/*HasOpeningBrace=*/true);
3019   nextToken();
3020   --Line->Level;
3021   addUnwrappedLine();
3022 }
3023 
3024 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3025   const FormatToken &InitialToken = *FormatTok;
3026   nextToken();
3027 
3028   // The actual identifier can be a nested name specifier, and in macros
3029   // it is often token-pasted.
3030   // An [[attribute]] can be before the identifier.
3031   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3032                             tok::kw___attribute, tok::kw___declspec,
3033                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3034          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3035           FormatTok->isOneOf(tok::period, tok::comma))) {
3036     if (Style.isJavaScript() &&
3037         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3038       // JavaScript/TypeScript supports inline object types in
3039       // extends/implements positions:
3040       //     class Foo implements {bar: number} { }
3041       nextToken();
3042       if (FormatTok->is(tok::l_brace)) {
3043         tryToParseBracedList();
3044         continue;
3045       }
3046     }
3047     bool IsNonMacroIdentifier =
3048         FormatTok->is(tok::identifier) &&
3049         FormatTok->TokenText != FormatTok->TokenText.upper();
3050     nextToken();
3051     // We can have macros or attributes in between 'class' and the class name.
3052     if (!IsNonMacroIdentifier) {
3053       if (FormatTok->Tok.is(tok::l_paren)) {
3054         parseParens();
3055       } else if (FormatTok->is(TT_AttributeSquare)) {
3056         parseSquare();
3057         // Consume the closing TT_AttributeSquare.
3058         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3059           nextToken();
3060       }
3061     }
3062   }
3063 
3064   // Note that parsing away template declarations here leads to incorrectly
3065   // accepting function declarations as record declarations.
3066   // In general, we cannot solve this problem. Consider:
3067   // class A<int> B() {}
3068   // which can be a function definition or a class definition when B() is a
3069   // macro. If we find enough real-world cases where this is a problem, we
3070   // can parse for the 'template' keyword in the beginning of the statement,
3071   // and thus rule out the record production in case there is no template
3072   // (this would still leave us with an ambiguity between template function
3073   // and class declarations).
3074   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3075     while (!eof()) {
3076       if (FormatTok->is(tok::l_brace)) {
3077         calculateBraceTypes(/*ExpectClassBody=*/true);
3078         if (!tryToParseBracedList())
3079           break;
3080       }
3081       if (FormatTok->is(tok::l_square) && !tryToParseLambda())
3082         break;
3083       if (FormatTok->Tok.is(tok::semi))
3084         return;
3085       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3086         addUnwrappedLine();
3087         nextToken();
3088         parseCSharpGenericTypeConstraint();
3089         break;
3090       }
3091       nextToken();
3092     }
3093   }
3094   if (FormatTok->Tok.is(tok::l_brace)) {
3095     if (ParseAsExpr) {
3096       parseChildBlock();
3097     } else {
3098       if (ShouldBreakBeforeBrace(Style, InitialToken))
3099         addUnwrappedLine();
3100 
3101       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3102       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3103     }
3104   }
3105   // There is no addUnwrappedLine() here so that we fall through to parsing a
3106   // structural element afterwards. Thus, in "class A {} n, m;",
3107   // "} n, m;" will end up in one unwrapped line.
3108 }
3109 
3110 void UnwrappedLineParser::parseObjCMethod() {
3111   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
3112          "'(' or identifier expected.");
3113   do {
3114     if (FormatTok->Tok.is(tok::semi)) {
3115       nextToken();
3116       addUnwrappedLine();
3117       return;
3118     } else if (FormatTok->Tok.is(tok::l_brace)) {
3119       if (Style.BraceWrapping.AfterFunction)
3120         addUnwrappedLine();
3121       parseBlock();
3122       addUnwrappedLine();
3123       return;
3124     } else {
3125       nextToken();
3126     }
3127   } while (!eof());
3128 }
3129 
3130 void UnwrappedLineParser::parseObjCProtocolList() {
3131   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
3132   do {
3133     nextToken();
3134     // Early exit in case someone forgot a close angle.
3135     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3136         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3137       return;
3138   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
3139   nextToken(); // Skip '>'.
3140 }
3141 
3142 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3143   do {
3144     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
3145       nextToken();
3146       addUnwrappedLine();
3147       break;
3148     }
3149     if (FormatTok->is(tok::l_brace)) {
3150       parseBlock();
3151       // In ObjC interfaces, nothing should be following the "}".
3152       addUnwrappedLine();
3153     } else if (FormatTok->is(tok::r_brace)) {
3154       // Ignore stray "}". parseStructuralElement doesn't consume them.
3155       nextToken();
3156       addUnwrappedLine();
3157     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3158       nextToken();
3159       parseObjCMethod();
3160     } else {
3161       parseStructuralElement();
3162     }
3163   } while (!eof());
3164 }
3165 
3166 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3167   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3168          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3169   nextToken();
3170   nextToken(); // interface name
3171 
3172   // @interface can be followed by a lightweight generic
3173   // specialization list, then either a base class or a category.
3174   if (FormatTok->Tok.is(tok::less)) {
3175     parseObjCLightweightGenerics();
3176   }
3177   if (FormatTok->Tok.is(tok::colon)) {
3178     nextToken();
3179     nextToken(); // base class name
3180     // The base class can also have lightweight generics applied to it.
3181     if (FormatTok->Tok.is(tok::less)) {
3182       parseObjCLightweightGenerics();
3183     }
3184   } else if (FormatTok->Tok.is(tok::l_paren))
3185     // Skip category, if present.
3186     parseParens();
3187 
3188   if (FormatTok->Tok.is(tok::less))
3189     parseObjCProtocolList();
3190 
3191   if (FormatTok->Tok.is(tok::l_brace)) {
3192     if (Style.BraceWrapping.AfterObjCDeclaration)
3193       addUnwrappedLine();
3194     parseBlock(/*MustBeDeclaration=*/true);
3195   }
3196 
3197   // With instance variables, this puts '}' on its own line.  Without instance
3198   // variables, this ends the @interface line.
3199   addUnwrappedLine();
3200 
3201   parseObjCUntilAtEnd();
3202 }
3203 
3204 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3205   assert(FormatTok->Tok.is(tok::less));
3206   // Unlike protocol lists, generic parameterizations support
3207   // nested angles:
3208   //
3209   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3210   //     NSObject <NSCopying, NSSecureCoding>
3211   //
3212   // so we need to count how many open angles we have left.
3213   unsigned NumOpenAngles = 1;
3214   do {
3215     nextToken();
3216     // Early exit in case someone forgot a close angle.
3217     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3218         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3219       break;
3220     if (FormatTok->Tok.is(tok::less))
3221       ++NumOpenAngles;
3222     else if (FormatTok->Tok.is(tok::greater)) {
3223       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3224       --NumOpenAngles;
3225     }
3226   } while (!eof() && NumOpenAngles != 0);
3227   nextToken(); // Skip '>'.
3228 }
3229 
3230 // Returns true for the declaration/definition form of @protocol,
3231 // false for the expression form.
3232 bool UnwrappedLineParser::parseObjCProtocol() {
3233   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3234   nextToken();
3235 
3236   if (FormatTok->is(tok::l_paren))
3237     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3238     return false;
3239 
3240   // The definition/declaration form,
3241   // @protocol Foo
3242   // - (int)someMethod;
3243   // @end
3244 
3245   nextToken(); // protocol name
3246 
3247   if (FormatTok->Tok.is(tok::less))
3248     parseObjCProtocolList();
3249 
3250   // Check for protocol declaration.
3251   if (FormatTok->Tok.is(tok::semi)) {
3252     nextToken();
3253     addUnwrappedLine();
3254     return true;
3255   }
3256 
3257   addUnwrappedLine();
3258   parseObjCUntilAtEnd();
3259   return true;
3260 }
3261 
3262 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3263   bool IsImport = FormatTok->is(Keywords.kw_import);
3264   assert(IsImport || FormatTok->is(tok::kw_export));
3265   nextToken();
3266 
3267   // Consume the "default" in "export default class/function".
3268   if (FormatTok->is(tok::kw_default))
3269     nextToken();
3270 
3271   // Consume "async function", "function" and "default function", so that these
3272   // get parsed as free-standing JS functions, i.e. do not require a trailing
3273   // semicolon.
3274   if (FormatTok->is(Keywords.kw_async))
3275     nextToken();
3276   if (FormatTok->is(Keywords.kw_function)) {
3277     nextToken();
3278     return;
3279   }
3280 
3281   // For imports, `export *`, `export {...}`, consume the rest of the line up
3282   // to the terminating `;`. For everything else, just return and continue
3283   // parsing the structural element, i.e. the declaration or expression for
3284   // `export default`.
3285   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3286       !FormatTok->isStringLiteral())
3287     return;
3288 
3289   while (!eof()) {
3290     if (FormatTok->is(tok::semi))
3291       return;
3292     if (Line->Tokens.empty()) {
3293       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3294       // import statement should terminate.
3295       return;
3296     }
3297     if (FormatTok->is(tok::l_brace)) {
3298       FormatTok->setBlockKind(BK_Block);
3299       nextToken();
3300       parseBracedList();
3301     } else {
3302       nextToken();
3303     }
3304   }
3305 }
3306 
3307 void UnwrappedLineParser::parseStatementMacro() {
3308   nextToken();
3309   if (FormatTok->is(tok::l_paren))
3310     parseParens();
3311   if (FormatTok->is(tok::semi))
3312     nextToken();
3313   addUnwrappedLine();
3314 }
3315 
3316 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3317                                                  StringRef Prefix = "") {
3318   llvm::dbgs() << Prefix << "Line(" << Line.Level
3319                << ", FSC=" << Line.FirstStartColumn << ")"
3320                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3321   for (const auto &Node : Line.Tokens) {
3322     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3323                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3324                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3325   }
3326   for (const auto &Node : Line.Tokens)
3327     for (const auto &ChildNode : Node.Children)
3328       printDebugInfo(ChildNode, "\nChild: ");
3329 
3330   llvm::dbgs() << "\n";
3331 }
3332 
3333 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3334   if (Line->Tokens.empty())
3335     return;
3336   LLVM_DEBUG({
3337     if (CurrentLines == &Lines)
3338       printDebugInfo(*Line);
3339   });
3340 
3341   // If this line closes a block when in Whitesmiths mode, remember that
3342   // information so that the level can be decreased after the line is added.
3343   // This has to happen after the addition of the line since the line itself
3344   // needs to be indented.
3345   bool ClosesWhitesmithsBlock =
3346       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3347       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3348 
3349   CurrentLines->push_back(std::move(*Line));
3350   Line->Tokens.clear();
3351   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3352   Line->FirstStartColumn = 0;
3353 
3354   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3355     --Line->Level;
3356   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3357     CurrentLines->append(
3358         std::make_move_iterator(PreprocessorDirectives.begin()),
3359         std::make_move_iterator(PreprocessorDirectives.end()));
3360     PreprocessorDirectives.clear();
3361   }
3362   // Disconnect the current token from the last token on the previous line.
3363   FormatTok->Previous = nullptr;
3364 }
3365 
3366 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3367 
3368 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3369   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3370          FormatTok.NewlinesBefore > 0;
3371 }
3372 
3373 // Checks if \p FormatTok is a line comment that continues the line comment
3374 // section on \p Line.
3375 static bool
3376 continuesLineCommentSection(const FormatToken &FormatTok,
3377                             const UnwrappedLine &Line,
3378                             const llvm::Regex &CommentPragmasRegex) {
3379   if (Line.Tokens.empty())
3380     return false;
3381 
3382   StringRef IndentContent = FormatTok.TokenText;
3383   if (FormatTok.TokenText.startswith("//") ||
3384       FormatTok.TokenText.startswith("/*"))
3385     IndentContent = FormatTok.TokenText.substr(2);
3386   if (CommentPragmasRegex.match(IndentContent))
3387     return false;
3388 
3389   // If Line starts with a line comment, then FormatTok continues the comment
3390   // section if its original column is greater or equal to the original start
3391   // column of the line.
3392   //
3393   // Define the min column token of a line as follows: if a line ends in '{' or
3394   // contains a '{' followed by a line comment, then the min column token is
3395   // that '{'. Otherwise, the min column token of the line is the first token of
3396   // the line.
3397   //
3398   // If Line starts with a token other than a line comment, then FormatTok
3399   // continues the comment section if its original column is greater than the
3400   // original start column of the min column token of the line.
3401   //
3402   // For example, the second line comment continues the first in these cases:
3403   //
3404   // // first line
3405   // // second line
3406   //
3407   // and:
3408   //
3409   // // first line
3410   //  // second line
3411   //
3412   // and:
3413   //
3414   // int i; // first line
3415   //  // second line
3416   //
3417   // and:
3418   //
3419   // do { // first line
3420   //      // second line
3421   //   int i;
3422   // } while (true);
3423   //
3424   // and:
3425   //
3426   // enum {
3427   //   a, // first line
3428   //    // second line
3429   //   b
3430   // };
3431   //
3432   // The second line comment doesn't continue the first in these cases:
3433   //
3434   //   // first line
3435   //  // second line
3436   //
3437   // and:
3438   //
3439   // int i; // first line
3440   // // second line
3441   //
3442   // and:
3443   //
3444   // do { // first line
3445   //   // second line
3446   //   int i;
3447   // } while (true);
3448   //
3449   // and:
3450   //
3451   // enum {
3452   //   a, // first line
3453   //   // second line
3454   // };
3455   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3456 
3457   // Scan for '{//'. If found, use the column of '{' as a min column for line
3458   // comment section continuation.
3459   const FormatToken *PreviousToken = nullptr;
3460   for (const UnwrappedLineNode &Node : Line.Tokens) {
3461     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3462         isLineComment(*Node.Tok)) {
3463       MinColumnToken = PreviousToken;
3464       break;
3465     }
3466     PreviousToken = Node.Tok;
3467 
3468     // Grab the last newline preceding a token in this unwrapped line.
3469     if (Node.Tok->NewlinesBefore > 0) {
3470       MinColumnToken = Node.Tok;
3471     }
3472   }
3473   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3474     MinColumnToken = PreviousToken;
3475   }
3476 
3477   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3478                               MinColumnToken);
3479 }
3480 
3481 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3482   bool JustComments = Line->Tokens.empty();
3483   for (FormatToken *Tok : CommentsBeforeNextToken) {
3484     // Line comments that belong to the same line comment section are put on the
3485     // same line since later we might want to reflow content between them.
3486     // Additional fine-grained breaking of line comment sections is controlled
3487     // by the class BreakableLineCommentSection in case it is desirable to keep
3488     // several line comment sections in the same unwrapped line.
3489     //
3490     // FIXME: Consider putting separate line comment sections as children to the
3491     // unwrapped line instead.
3492     Tok->ContinuesLineCommentSection =
3493         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3494     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3495       addUnwrappedLine();
3496     pushToken(Tok);
3497   }
3498   if (NewlineBeforeNext && JustComments)
3499     addUnwrappedLine();
3500   CommentsBeforeNextToken.clear();
3501 }
3502 
3503 void UnwrappedLineParser::nextToken(int LevelDifference) {
3504   if (eof())
3505     return;
3506   flushComments(isOnNewLine(*FormatTok));
3507   pushToken(FormatTok);
3508   FormatToken *Previous = FormatTok;
3509   if (!Style.isJavaScript())
3510     readToken(LevelDifference);
3511   else
3512     readTokenWithJavaScriptASI();
3513   FormatTok->Previous = Previous;
3514 }
3515 
3516 void UnwrappedLineParser::distributeComments(
3517     const SmallVectorImpl<FormatToken *> &Comments,
3518     const FormatToken *NextTok) {
3519   // Whether or not a line comment token continues a line is controlled by
3520   // the method continuesLineCommentSection, with the following caveat:
3521   //
3522   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3523   // that each comment line from the trail is aligned with the next token, if
3524   // the next token exists. If a trail exists, the beginning of the maximal
3525   // trail is marked as a start of a new comment section.
3526   //
3527   // For example in this code:
3528   //
3529   // int a; // line about a
3530   //   // line 1 about b
3531   //   // line 2 about b
3532   //   int b;
3533   //
3534   // the two lines about b form a maximal trail, so there are two sections, the
3535   // first one consisting of the single comment "// line about a" and the
3536   // second one consisting of the next two comments.
3537   if (Comments.empty())
3538     return;
3539   bool ShouldPushCommentsInCurrentLine = true;
3540   bool HasTrailAlignedWithNextToken = false;
3541   unsigned StartOfTrailAlignedWithNextToken = 0;
3542   if (NextTok) {
3543     // We are skipping the first element intentionally.
3544     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3545       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3546         HasTrailAlignedWithNextToken = true;
3547         StartOfTrailAlignedWithNextToken = i;
3548       }
3549     }
3550   }
3551   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3552     FormatToken *FormatTok = Comments[i];
3553     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3554       FormatTok->ContinuesLineCommentSection = false;
3555     } else {
3556       FormatTok->ContinuesLineCommentSection =
3557           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3558     }
3559     if (!FormatTok->ContinuesLineCommentSection &&
3560         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3561       ShouldPushCommentsInCurrentLine = false;
3562     }
3563     if (ShouldPushCommentsInCurrentLine) {
3564       pushToken(FormatTok);
3565     } else {
3566       CommentsBeforeNextToken.push_back(FormatTok);
3567     }
3568   }
3569 }
3570 
3571 void UnwrappedLineParser::readToken(int LevelDifference) {
3572   SmallVector<FormatToken *, 1> Comments;
3573   do {
3574     FormatTok = Tokens->getNextToken();
3575     assert(FormatTok);
3576     while (FormatTok->getType() == TT_ConflictStart ||
3577            FormatTok->getType() == TT_ConflictEnd ||
3578            FormatTok->getType() == TT_ConflictAlternative) {
3579       if (FormatTok->getType() == TT_ConflictStart) {
3580         conditionalCompilationStart(/*Unreachable=*/false);
3581       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3582         conditionalCompilationAlternative();
3583       } else if (FormatTok->getType() == TT_ConflictEnd) {
3584         conditionalCompilationEnd();
3585       }
3586       FormatTok = Tokens->getNextToken();
3587       FormatTok->MustBreakBefore = true;
3588     }
3589 
3590     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3591            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3592       distributeComments(Comments, FormatTok);
3593       Comments.clear();
3594       // If there is an unfinished unwrapped line, we flush the preprocessor
3595       // directives only after that unwrapped line was finished later.
3596       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3597       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3598       assert((LevelDifference >= 0 ||
3599               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3600              "LevelDifference makes Line->Level negative");
3601       Line->Level += LevelDifference;
3602       // Comments stored before the preprocessor directive need to be output
3603       // before the preprocessor directive, at the same level as the
3604       // preprocessor directive, as we consider them to apply to the directive.
3605       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3606           PPBranchLevel > 0)
3607         Line->Level += PPBranchLevel;
3608       flushComments(isOnNewLine(*FormatTok));
3609       parsePPDirective();
3610     }
3611 
3612     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3613         !Line->InPPDirective) {
3614       continue;
3615     }
3616 
3617     if (!FormatTok->Tok.is(tok::comment)) {
3618       distributeComments(Comments, FormatTok);
3619       Comments.clear();
3620       return;
3621     }
3622 
3623     Comments.push_back(FormatTok);
3624   } while (!eof());
3625 
3626   distributeComments(Comments, nullptr);
3627   Comments.clear();
3628 }
3629 
3630 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3631   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3632   if (MustBreakBeforeNextToken) {
3633     Line->Tokens.back().Tok->MustBreakBefore = true;
3634     MustBreakBeforeNextToken = false;
3635   }
3636 }
3637 
3638 } // end namespace format
3639 } // end namespace clang
3640