1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/Support/Debug.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 #include <algorithm>
23 
24 #define DEBUG_TYPE "format-parser"
25 
26 namespace clang {
27 namespace format {
28 
29 class FormatTokenSource {
30 public:
31   virtual ~FormatTokenSource() {}
32 
33   // Returns the next token in the token stream.
34   virtual FormatToken *getNextToken() = 0;
35 
36   // Returns the token preceding the token returned by the last call to
37   // getNextToken() in the token stream, or nullptr if no such token exists.
38   virtual FormatToken *getPreviousToken() = 0;
39 
40   // Returns the token that would be returned by the next call to
41   // getNextToken().
42   virtual FormatToken *peekNextToken() = 0;
43 
44   // Returns whether we are at the end of the file.
45   // This can be different from whether getNextToken() returned an eof token
46   // when the FormatTokenSource is a view on a part of the token stream.
47   virtual bool isEOF() = 0;
48 
49   // Gets the current position in the token stream, to be used by setPosition().
50   virtual unsigned getPosition() = 0;
51 
52   // Resets the token stream to the state it was in when getPosition() returned
53   // Position, and return the token at that position in the stream.
54   virtual FormatToken *setPosition(unsigned Position) = 0;
55 };
56 
57 namespace {
58 
59 class ScopedDeclarationState {
60 public:
61   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
62                          bool MustBeDeclaration)
63       : Line(Line), Stack(Stack) {
64     Line.MustBeDeclaration = MustBeDeclaration;
65     Stack.push_back(MustBeDeclaration);
66   }
67   ~ScopedDeclarationState() {
68     Stack.pop_back();
69     if (!Stack.empty())
70       Line.MustBeDeclaration = Stack.back();
71     else
72       Line.MustBeDeclaration = true;
73   }
74 
75 private:
76   UnwrappedLine &Line;
77   std::vector<bool> &Stack;
78 };
79 
80 static bool isLineComment(const FormatToken &FormatTok) {
81   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
82 }
83 
84 // Checks if \p FormatTok is a line comment that continues the line comment
85 // \p Previous. The original column of \p MinColumnToken is used to determine
86 // whether \p FormatTok is indented enough to the right to continue \p Previous.
87 static bool continuesLineComment(const FormatToken &FormatTok,
88                                  const FormatToken *Previous,
89                                  const FormatToken *MinColumnToken) {
90   if (!Previous || !MinColumnToken)
91     return false;
92   unsigned MinContinueColumn =
93       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
94   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
95          isLineComment(*Previous) &&
96          FormatTok.OriginalColumn >= MinContinueColumn;
97 }
98 
99 class ScopedMacroState : public FormatTokenSource {
100 public:
101   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
102                    FormatToken *&ResetToken)
103       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
104         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
105         Token(nullptr), PreviousToken(nullptr) {
106     FakeEOF.Tok.startToken();
107     FakeEOF.Tok.setKind(tok::eof);
108     TokenSource = this;
109     Line.Level = 0;
110     Line.InPPDirective = true;
111   }
112 
113   ~ScopedMacroState() override {
114     TokenSource = PreviousTokenSource;
115     ResetToken = Token;
116     Line.InPPDirective = false;
117     Line.Level = PreviousLineLevel;
118   }
119 
120   FormatToken *getNextToken() override {
121     // The \c UnwrappedLineParser guards against this by never calling
122     // \c getNextToken() after it has encountered the first eof token.
123     assert(!eof());
124     PreviousToken = Token;
125     Token = PreviousTokenSource->getNextToken();
126     if (eof())
127       return &FakeEOF;
128     return Token;
129   }
130 
131   FormatToken *getPreviousToken() override {
132     return PreviousTokenSource->getPreviousToken();
133   }
134 
135   FormatToken *peekNextToken() override {
136     if (eof())
137       return &FakeEOF;
138     return PreviousTokenSource->peekNextToken();
139   }
140 
141   bool isEOF() override { return PreviousTokenSource->isEOF(); }
142 
143   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
144 
145   FormatToken *setPosition(unsigned Position) override {
146     PreviousToken = nullptr;
147     Token = PreviousTokenSource->setPosition(Position);
148     return Token;
149   }
150 
151 private:
152   bool eof() {
153     return Token && Token->HasUnescapedNewline &&
154            !continuesLineComment(*Token, PreviousToken,
155                                  /*MinColumnToken=*/PreviousToken);
156   }
157 
158   FormatToken FakeEOF;
159   UnwrappedLine &Line;
160   FormatTokenSource *&TokenSource;
161   FormatToken *&ResetToken;
162   unsigned PreviousLineLevel;
163   FormatTokenSource *PreviousTokenSource;
164 
165   FormatToken *Token;
166   FormatToken *PreviousToken;
167 };
168 
169 } // end anonymous namespace
170 
171 class ScopedLineState {
172 public:
173   ScopedLineState(UnwrappedLineParser &Parser,
174                   bool SwitchToPreprocessorLines = false)
175       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
176     if (SwitchToPreprocessorLines)
177       Parser.CurrentLines = &Parser.PreprocessorDirectives;
178     else if (!Parser.Line->Tokens.empty())
179       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
180     PreBlockLine = std::move(Parser.Line);
181     Parser.Line = std::make_unique<UnwrappedLine>();
182     Parser.Line->Level = PreBlockLine->Level;
183     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
184   }
185 
186   ~ScopedLineState() {
187     if (!Parser.Line->Tokens.empty()) {
188       Parser.addUnwrappedLine();
189     }
190     assert(Parser.Line->Tokens.empty());
191     Parser.Line = std::move(PreBlockLine);
192     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
193       Parser.MustBreakBeforeNextToken = true;
194     Parser.CurrentLines = OriginalLines;
195   }
196 
197 private:
198   UnwrappedLineParser &Parser;
199 
200   std::unique_ptr<UnwrappedLine> PreBlockLine;
201   SmallVectorImpl<UnwrappedLine> *OriginalLines;
202 };
203 
204 class CompoundStatementIndenter {
205 public:
206   CompoundStatementIndenter(UnwrappedLineParser *Parser,
207                             const FormatStyle &Style, unsigned &LineLevel)
208       : CompoundStatementIndenter(Parser, LineLevel,
209                                   Style.BraceWrapping.AfterControlStatement,
210                                   Style.BraceWrapping.IndentBraces) {}
211   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
212                             bool WrapBrace, bool IndentBrace)
213       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
214     if (WrapBrace)
215       Parser->addUnwrappedLine();
216     if (IndentBrace)
217       ++LineLevel;
218   }
219   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
220 
221 private:
222   unsigned &LineLevel;
223   unsigned OldLineLevel;
224 };
225 
226 namespace {
227 
228 class IndexedTokenSource : public FormatTokenSource {
229 public:
230   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
231       : Tokens(Tokens), Position(-1) {}
232 
233   FormatToken *getNextToken() override {
234     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
235       LLVM_DEBUG({
236         llvm::dbgs() << "Next ";
237         dbgToken(Position);
238       });
239       return Tokens[Position];
240     }
241     ++Position;
242     LLVM_DEBUG({
243       llvm::dbgs() << "Next ";
244       dbgToken(Position);
245     });
246     return Tokens[Position];
247   }
248 
249   FormatToken *getPreviousToken() override {
250     return Position > 0 ? Tokens[Position - 1] : nullptr;
251   }
252 
253   FormatToken *peekNextToken() override {
254     int Next = Position + 1;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Peeking ";
257       dbgToken(Next);
258     });
259     return Tokens[Next];
260   }
261 
262   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
263 
264   unsigned getPosition() override {
265     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
266     assert(Position >= 0);
267     return Position;
268   }
269 
270   FormatToken *setPosition(unsigned P) override {
271     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
272     Position = P;
273     return Tokens[Position];
274   }
275 
276   void reset() { Position = -1; }
277 
278 private:
279   void dbgToken(int Position, llvm::StringRef Indent = "") {
280     FormatToken *Tok = Tokens[Position];
281     llvm::dbgs() << Indent << "[" << Position
282                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
283                  << ", Macro: " << !!Tok->MacroCtx << "\n";
284   }
285 
286   ArrayRef<FormatToken *> Tokens;
287   int Position;
288 };
289 
290 } // end anonymous namespace
291 
292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
293                                          const AdditionalKeywords &Keywords,
294                                          unsigned FirstStartColumn,
295                                          ArrayRef<FormatToken *> Tokens,
296                                          UnwrappedLineConsumer &Callback)
297     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
298       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
299       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
300       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
301       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
302                        ? IG_Rejected
303                        : IG_Inited),
304       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
305 
306 void UnwrappedLineParser::reset() {
307   PPBranchLevel = -1;
308   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
309                      ? IG_Rejected
310                      : IG_Inited;
311   IncludeGuardToken = nullptr;
312   Line.reset(new UnwrappedLine);
313   CommentsBeforeNextToken.clear();
314   FormatTok = nullptr;
315   MustBreakBeforeNextToken = false;
316   PreprocessorDirectives.clear();
317   CurrentLines = &Lines;
318   DeclarationScopeStack.clear();
319   NestedTooDeep.clear();
320   PPStack.clear();
321   Line->FirstStartColumn = FirstStartColumn;
322 }
323 
324 void UnwrappedLineParser::parse() {
325   IndexedTokenSource TokenSource(AllTokens);
326   Line->FirstStartColumn = FirstStartColumn;
327   do {
328     LLVM_DEBUG(llvm::dbgs() << "----\n");
329     reset();
330     Tokens = &TokenSource;
331     TokenSource.reset();
332 
333     readToken();
334     parseFile();
335 
336     // If we found an include guard then all preprocessor directives (other than
337     // the guard) are over-indented by one.
338     if (IncludeGuard == IG_Found)
339       for (auto &Line : Lines)
340         if (Line.InPPDirective && Line.Level > 0)
341           --Line.Level;
342 
343     // Create line with eof token.
344     pushToken(FormatTok);
345     addUnwrappedLine();
346 
347     for (const UnwrappedLine &Line : Lines)
348       Callback.consumeUnwrappedLine(Line);
349 
350     Callback.finishRun();
351     Lines.clear();
352     while (!PPLevelBranchIndex.empty() &&
353            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
354       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
355       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
356     }
357     if (!PPLevelBranchIndex.empty()) {
358       ++PPLevelBranchIndex.back();
359       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
360       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
361     }
362   } while (!PPLevelBranchIndex.empty());
363 }
364 
365 void UnwrappedLineParser::parseFile() {
366   // The top-level context in a file always has declarations, except for pre-
367   // processor directives and JavaScript files.
368   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
369   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
370                                           MustBeDeclaration);
371   if (Style.Language == FormatStyle::LK_TextProto)
372     parseBracedList();
373   else
374     parseLevel(/*HasOpeningBrace=*/false);
375   // Make sure to format the remaining tokens.
376   //
377   // LK_TextProto is special since its top-level is parsed as the body of a
378   // braced list, which does not necessarily have natural line separators such
379   // as a semicolon. Comments after the last entry that have been determined to
380   // not belong to that line, as in:
381   //   key: value
382   //   // endfile comment
383   // do not have a chance to be put on a line of their own until this point.
384   // Here we add this newline before end-of-file comments.
385   if (Style.Language == FormatStyle::LK_TextProto &&
386       !CommentsBeforeNextToken.empty())
387     addUnwrappedLine();
388   flushComments(true);
389   addUnwrappedLine();
390 }
391 
392 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
393   do {
394     switch (FormatTok->Tok.getKind()) {
395     case tok::l_brace:
396       return;
397     default:
398       if (FormatTok->is(Keywords.kw_where)) {
399         addUnwrappedLine();
400         nextToken();
401         parseCSharpGenericTypeConstraint();
402         break;
403       }
404       nextToken();
405       break;
406     }
407   } while (!eof());
408 }
409 
410 void UnwrappedLineParser::parseCSharpAttribute() {
411   int UnpairedSquareBrackets = 1;
412   do {
413     switch (FormatTok->Tok.getKind()) {
414     case tok::r_square:
415       nextToken();
416       --UnpairedSquareBrackets;
417       if (UnpairedSquareBrackets == 0) {
418         addUnwrappedLine();
419         return;
420       }
421       break;
422     case tok::l_square:
423       ++UnpairedSquareBrackets;
424       nextToken();
425       break;
426     default:
427       nextToken();
428       break;
429     }
430   } while (!eof());
431 }
432 
433 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
434   if (!Lines.empty() && Lines.back().InPPDirective)
435     return true;
436 
437   const FormatToken *Previous = Tokens->getPreviousToken();
438   return Previous && Previous->is(tok::comment) &&
439          (Previous->IsMultiline || Previous->NewlinesBefore > 0);
440 }
441 
442 bool UnwrappedLineParser::mightFitOnOneLine() const {
443   const auto ColumnLimit = Style.ColumnLimit;
444   if (ColumnLimit == 0)
445     return true;
446 
447   if (Lines.empty())
448     return true;
449 
450   const auto &PreviousLine = Lines.back();
451   const auto &Tokens = PreviousLine.Tokens;
452   assert(!Tokens.empty());
453   const auto *LastToken = Tokens.back().Tok;
454   assert(LastToken);
455   if (!LastToken->isOneOf(tok::semi, tok::comment))
456     return true;
457 
458   AnnotatedLine Line(PreviousLine);
459   assert(Line.Last == LastToken);
460 
461   TokenAnnotator Annotator(Style, Keywords);
462   Annotator.annotate(Line);
463   Annotator.calculateFormattingInformation(Line);
464 
465   return Line.Level * Style.IndentWidth + LastToken->TotalLength <= ColumnLimit;
466 }
467 
468 // Returns true if a simple block, or false otherwise. (A simple block has a
469 // single statement that fits on a single line.)
470 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace, IfStmtKind *IfKind) {
471   const bool IsPrecededByCommentOrPPDirective =
472       !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
473   unsigned StatementCount = 0;
474   bool SwitchLabelEncountered = false;
475   do {
476     tok::TokenKind kind = FormatTok->Tok.getKind();
477     if (FormatTok->getType() == TT_MacroBlockBegin) {
478       kind = tok::l_brace;
479     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
480       kind = tok::r_brace;
481     }
482 
483     switch (kind) {
484     case tok::comment:
485       nextToken();
486       addUnwrappedLine();
487       break;
488     case tok::l_brace:
489       // FIXME: Add parameter whether this can happen - if this happens, we must
490       // be in a non-declaration context.
491       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
492         continue;
493       parseBlock();
494       ++StatementCount;
495       assert(StatementCount > 0 && "StatementCount overflow!");
496       addUnwrappedLine();
497       break;
498     case tok::r_brace:
499       if (HasOpeningBrace) {
500         if (!Style.RemoveBracesLLVM)
501           return false;
502         if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 ||
503             IsPrecededByCommentOrPPDirective ||
504             precededByCommentOrPPDirective()) {
505           return false;
506         }
507         const FormatToken *Next = Tokens->peekNextToken();
508         if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
509           return false;
510         return mightFitOnOneLine();
511       }
512       nextToken();
513       addUnwrappedLine();
514       break;
515     case tok::kw_default: {
516       unsigned StoredPosition = Tokens->getPosition();
517       FormatToken *Next;
518       do {
519         Next = Tokens->getNextToken();
520       } while (Next->is(tok::comment));
521       FormatTok = Tokens->setPosition(StoredPosition);
522       if (Next && Next->isNot(tok::colon)) {
523         // default not followed by ':' is not a case label; treat it like
524         // an identifier.
525         parseStructuralElement();
526         break;
527       }
528       // Else, if it is 'default:', fall through to the case handling.
529       LLVM_FALLTHROUGH;
530     }
531     case tok::kw_case:
532       if (Style.isJavaScript() && Line->MustBeDeclaration) {
533         // A 'case: string' style field declaration.
534         parseStructuralElement();
535         break;
536       }
537       if (!SwitchLabelEncountered &&
538           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
539         ++Line->Level;
540       SwitchLabelEncountered = true;
541       parseStructuralElement();
542       break;
543     case tok::l_square:
544       if (Style.isCSharp()) {
545         nextToken();
546         parseCSharpAttribute();
547         break;
548       }
549       LLVM_FALLTHROUGH;
550     default:
551       parseStructuralElement(IfKind, !HasOpeningBrace);
552       ++StatementCount;
553       assert(StatementCount > 0 && "StatementCount overflow!");
554       break;
555     }
556   } while (!eof());
557   return false;
558 }
559 
560 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
561   // We'll parse forward through the tokens until we hit
562   // a closing brace or eof - note that getNextToken() will
563   // parse macros, so this will magically work inside macro
564   // definitions, too.
565   unsigned StoredPosition = Tokens->getPosition();
566   FormatToken *Tok = FormatTok;
567   const FormatToken *PrevTok = Tok->Previous;
568   // Keep a stack of positions of lbrace tokens. We will
569   // update information about whether an lbrace starts a
570   // braced init list or a different block during the loop.
571   SmallVector<FormatToken *, 8> LBraceStack;
572   assert(Tok->Tok.is(tok::l_brace));
573   do {
574     // Get next non-comment token.
575     FormatToken *NextTok;
576     unsigned ReadTokens = 0;
577     do {
578       NextTok = Tokens->getNextToken();
579       ++ReadTokens;
580     } while (NextTok->is(tok::comment));
581 
582     switch (Tok->Tok.getKind()) {
583     case tok::l_brace:
584       if (Style.isJavaScript() && PrevTok) {
585         if (PrevTok->isOneOf(tok::colon, tok::less))
586           // A ':' indicates this code is in a type, or a braced list
587           // following a label in an object literal ({a: {b: 1}}).
588           // A '<' could be an object used in a comparison, but that is nonsense
589           // code (can never return true), so more likely it is a generic type
590           // argument (`X<{a: string; b: number}>`).
591           // The code below could be confused by semicolons between the
592           // individual members in a type member list, which would normally
593           // trigger BK_Block. In both cases, this must be parsed as an inline
594           // braced init.
595           Tok->setBlockKind(BK_BracedInit);
596         else if (PrevTok->is(tok::r_paren))
597           // `) { }` can only occur in function or method declarations in JS.
598           Tok->setBlockKind(BK_Block);
599       } else {
600         Tok->setBlockKind(BK_Unknown);
601       }
602       LBraceStack.push_back(Tok);
603       break;
604     case tok::r_brace:
605       if (LBraceStack.empty())
606         break;
607       if (LBraceStack.back()->is(BK_Unknown)) {
608         bool ProbablyBracedList = false;
609         if (Style.Language == FormatStyle::LK_Proto) {
610           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
611         } else {
612           // Skip NextTok over preprocessor lines, otherwise we may not
613           // properly diagnose the block as a braced intializer
614           // if the comma separator appears after the pp directive.
615           while (NextTok->is(tok::hash)) {
616             ScopedMacroState MacroState(*Line, Tokens, NextTok);
617             do {
618               NextTok = Tokens->getNextToken();
619               ++ReadTokens;
620             } while (NextTok->isNot(tok::eof));
621           }
622 
623           // Using OriginalColumn to distinguish between ObjC methods and
624           // binary operators is a bit hacky.
625           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
626                                   NextTok->OriginalColumn == 0;
627 
628           // If there is a comma, semicolon or right paren after the closing
629           // brace, we assume this is a braced initializer list.  Note that
630           // regardless how we mark inner braces here, we will overwrite the
631           // BlockKind later if we parse a braced list (where all blocks
632           // inside are by default braced lists), or when we explicitly detect
633           // blocks (for example while parsing lambdas).
634           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
635           // braced list in JS.
636           ProbablyBracedList =
637               (Style.isJavaScript() &&
638                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
639                                 Keywords.kw_as)) ||
640               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
641               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
642                                tok::r_paren, tok::r_square, tok::l_brace,
643                                tok::ellipsis) ||
644               (NextTok->is(tok::identifier) &&
645                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
646               (NextTok->is(tok::semi) &&
647                (!ExpectClassBody || LBraceStack.size() != 1)) ||
648               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
649           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
650             // We can have an array subscript after a braced init
651             // list, but C++11 attributes are expected after blocks.
652             NextTok = Tokens->getNextToken();
653             ++ReadTokens;
654             ProbablyBracedList = NextTok->isNot(tok::l_square);
655           }
656         }
657         if (ProbablyBracedList) {
658           Tok->setBlockKind(BK_BracedInit);
659           LBraceStack.back()->setBlockKind(BK_BracedInit);
660         } else {
661           Tok->setBlockKind(BK_Block);
662           LBraceStack.back()->setBlockKind(BK_Block);
663         }
664       }
665       LBraceStack.pop_back();
666       break;
667     case tok::identifier:
668       if (!Tok->is(TT_StatementMacro))
669         break;
670       LLVM_FALLTHROUGH;
671     case tok::at:
672     case tok::semi:
673     case tok::kw_if:
674     case tok::kw_while:
675     case tok::kw_for:
676     case tok::kw_switch:
677     case tok::kw_try:
678     case tok::kw___try:
679       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
680         LBraceStack.back()->setBlockKind(BK_Block);
681       break;
682     default:
683       break;
684     }
685     PrevTok = Tok;
686     Tok = NextTok;
687   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
688 
689   // Assume other blocks for all unclosed opening braces.
690   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
691     if (LBraceStack[i]->is(BK_Unknown))
692       LBraceStack[i]->setBlockKind(BK_Block);
693   }
694 
695   FormatTok = Tokens->setPosition(StoredPosition);
696 }
697 
698 template <class T>
699 static inline void hash_combine(std::size_t &seed, const T &v) {
700   std::hash<T> hasher;
701   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
702 }
703 
704 size_t UnwrappedLineParser::computePPHash() const {
705   size_t h = 0;
706   for (const auto &i : PPStack) {
707     hash_combine(h, size_t(i.Kind));
708     hash_combine(h, i.Line);
709   }
710   return h;
711 }
712 
713 UnwrappedLineParser::IfStmtKind
714 UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
715                                 bool MunchSemi,
716                                 bool UnindentWhitesmithsBraces) {
717   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
718          "'{' or macro block token expected");
719   FormatToken *Tok = FormatTok;
720   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
721   FormatTok->setBlockKind(BK_Block);
722 
723   // For Whitesmiths mode, jump to the next level prior to skipping over the
724   // braces.
725   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
726     ++Line->Level;
727 
728   size_t PPStartHash = computePPHash();
729 
730   unsigned InitialLevel = Line->Level;
731   nextToken(/*LevelDifference=*/AddLevels);
732 
733   if (MacroBlock && FormatTok->is(tok::l_paren))
734     parseParens();
735 
736   size_t NbPreprocessorDirectives =
737       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
738   addUnwrappedLine();
739   size_t OpeningLineIndex =
740       CurrentLines->empty()
741           ? (UnwrappedLine::kInvalidIndex)
742           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
743 
744   // Whitesmiths is weird here. The brace needs to be indented for the namespace
745   // block, but the block itself may not be indented depending on the style
746   // settings. This allows the format to back up one level in those cases.
747   if (UnindentWhitesmithsBraces)
748     --Line->Level;
749 
750   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
751                                           MustBeDeclaration);
752   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
753     Line->Level += AddLevels;
754 
755   IfStmtKind IfKind = IfStmtKind::NotIf;
756   const bool SimpleBlock = parseLevel(/*HasOpeningBrace=*/true, &IfKind);
757 
758   if (eof())
759     return IfKind;
760 
761   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
762                  : !FormatTok->is(tok::r_brace)) {
763     Line->Level = InitialLevel;
764     FormatTok->setBlockKind(BK_Block);
765     return IfKind;
766   }
767 
768   if (SimpleBlock && Tok->is(tok::l_brace)) {
769     assert(FormatTok->is(tok::r_brace));
770     const FormatToken *Previous = Tokens->getPreviousToken();
771     assert(Previous);
772     if (Previous->isNot(tok::r_brace) || Previous->Optional) {
773       Tok->MatchingParen = FormatTok;
774       FormatTok->MatchingParen = Tok;
775     }
776   }
777 
778   size_t PPEndHash = computePPHash();
779 
780   // Munch the closing brace.
781   nextToken(/*LevelDifference=*/-AddLevels);
782 
783   if (MacroBlock && FormatTok->is(tok::l_paren))
784     parseParens();
785 
786   if (FormatTok->is(tok::arrow)) {
787     // Following the } we can find a trailing return type arrow
788     // as part of an implicit conversion constraint.
789     nextToken();
790     parseStructuralElement();
791   }
792 
793   if (MunchSemi && FormatTok->Tok.is(tok::semi))
794     nextToken();
795 
796   Line->Level = InitialLevel;
797 
798   if (PPStartHash == PPEndHash) {
799     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
800     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
801       // Update the opening line to add the forward reference as well
802       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
803           CurrentLines->size() - 1;
804     }
805   }
806 
807   return IfKind;
808 }
809 
810 static bool isGoogScope(const UnwrappedLine &Line) {
811   // FIXME: Closure-library specific stuff should not be hard-coded but be
812   // configurable.
813   if (Line.Tokens.size() < 4)
814     return false;
815   auto I = Line.Tokens.begin();
816   if (I->Tok->TokenText != "goog")
817     return false;
818   ++I;
819   if (I->Tok->isNot(tok::period))
820     return false;
821   ++I;
822   if (I->Tok->TokenText != "scope")
823     return false;
824   ++I;
825   return I->Tok->is(tok::l_paren);
826 }
827 
828 static bool isIIFE(const UnwrappedLine &Line,
829                    const AdditionalKeywords &Keywords) {
830   // Look for the start of an immediately invoked anonymous function.
831   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
832   // This is commonly done in JavaScript to create a new, anonymous scope.
833   // Example: (function() { ... })()
834   if (Line.Tokens.size() < 3)
835     return false;
836   auto I = Line.Tokens.begin();
837   if (I->Tok->isNot(tok::l_paren))
838     return false;
839   ++I;
840   if (I->Tok->isNot(Keywords.kw_function))
841     return false;
842   ++I;
843   return I->Tok->is(tok::l_paren);
844 }
845 
846 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
847                                    const FormatToken &InitialToken) {
848   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
849     return Style.BraceWrapping.AfterNamespace;
850   if (InitialToken.is(tok::kw_class))
851     return Style.BraceWrapping.AfterClass;
852   if (InitialToken.is(tok::kw_union))
853     return Style.BraceWrapping.AfterUnion;
854   if (InitialToken.is(tok::kw_struct))
855     return Style.BraceWrapping.AfterStruct;
856   if (InitialToken.is(tok::kw_enum))
857     return Style.BraceWrapping.AfterEnum;
858   return false;
859 }
860 
861 void UnwrappedLineParser::parseChildBlock() {
862   FormatTok->setBlockKind(BK_Block);
863   nextToken();
864   {
865     bool SkipIndent = (Style.isJavaScript() &&
866                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
867     ScopedLineState LineState(*this);
868     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
869                                             /*MustBeDeclaration=*/false);
870     Line->Level += SkipIndent ? 0 : 1;
871     parseLevel(/*HasOpeningBrace=*/true);
872     flushComments(isOnNewLine(*FormatTok));
873     Line->Level -= SkipIndent ? 0 : 1;
874   }
875   nextToken();
876 }
877 
878 void UnwrappedLineParser::parsePPDirective() {
879   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
880   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
881 
882   nextToken();
883 
884   if (!FormatTok->Tok.getIdentifierInfo()) {
885     parsePPUnknown();
886     return;
887   }
888 
889   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
890   case tok::pp_define:
891     parsePPDefine();
892     return;
893   case tok::pp_if:
894     parsePPIf(/*IfDef=*/false);
895     break;
896   case tok::pp_ifdef:
897   case tok::pp_ifndef:
898     parsePPIf(/*IfDef=*/true);
899     break;
900   case tok::pp_else:
901     parsePPElse();
902     break;
903   case tok::pp_elifdef:
904   case tok::pp_elifndef:
905   case tok::pp_elif:
906     parsePPElIf();
907     break;
908   case tok::pp_endif:
909     parsePPEndIf();
910     break;
911   default:
912     parsePPUnknown();
913     break;
914   }
915 }
916 
917 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
918   size_t Line = CurrentLines->size();
919   if (CurrentLines == &PreprocessorDirectives)
920     Line += Lines.size();
921 
922   if (Unreachable ||
923       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
924     PPStack.push_back({PP_Unreachable, Line});
925   else
926     PPStack.push_back({PP_Conditional, Line});
927 }
928 
929 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
930   ++PPBranchLevel;
931   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
932   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
933     PPLevelBranchIndex.push_back(0);
934     PPLevelBranchCount.push_back(0);
935   }
936   PPChainBranchIndex.push(0);
937   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
938   conditionalCompilationCondition(Unreachable || Skip);
939 }
940 
941 void UnwrappedLineParser::conditionalCompilationAlternative() {
942   if (!PPStack.empty())
943     PPStack.pop_back();
944   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
945   if (!PPChainBranchIndex.empty())
946     ++PPChainBranchIndex.top();
947   conditionalCompilationCondition(
948       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
949       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
950 }
951 
952 void UnwrappedLineParser::conditionalCompilationEnd() {
953   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
954   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
955     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
956       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
957     }
958   }
959   // Guard against #endif's without #if.
960   if (PPBranchLevel > -1)
961     --PPBranchLevel;
962   if (!PPChainBranchIndex.empty())
963     PPChainBranchIndex.pop();
964   if (!PPStack.empty())
965     PPStack.pop_back();
966 }
967 
968 void UnwrappedLineParser::parsePPIf(bool IfDef) {
969   bool IfNDef = FormatTok->is(tok::pp_ifndef);
970   nextToken();
971   bool Unreachable = false;
972   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
973     Unreachable = true;
974   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
975     Unreachable = true;
976   conditionalCompilationStart(Unreachable);
977   FormatToken *IfCondition = FormatTok;
978   // If there's a #ifndef on the first line, and the only lines before it are
979   // comments, it could be an include guard.
980   bool MaybeIncludeGuard = IfNDef;
981   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
982     for (auto &Line : Lines) {
983       if (!Line.Tokens.front().Tok->is(tok::comment)) {
984         MaybeIncludeGuard = false;
985         IncludeGuard = IG_Rejected;
986         break;
987       }
988     }
989   --PPBranchLevel;
990   parsePPUnknown();
991   ++PPBranchLevel;
992   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
993     IncludeGuard = IG_IfNdefed;
994     IncludeGuardToken = IfCondition;
995   }
996 }
997 
998 void UnwrappedLineParser::parsePPElse() {
999   // If a potential include guard has an #else, it's not an include guard.
1000   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1001     IncludeGuard = IG_Rejected;
1002   conditionalCompilationAlternative();
1003   if (PPBranchLevel > -1)
1004     --PPBranchLevel;
1005   parsePPUnknown();
1006   ++PPBranchLevel;
1007 }
1008 
1009 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1010 
1011 void UnwrappedLineParser::parsePPEndIf() {
1012   conditionalCompilationEnd();
1013   parsePPUnknown();
1014   // If the #endif of a potential include guard is the last thing in the file,
1015   // then we found an include guard.
1016   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1017       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1018     IncludeGuard = IG_Found;
1019 }
1020 
1021 void UnwrappedLineParser::parsePPDefine() {
1022   nextToken();
1023 
1024   if (!FormatTok->Tok.getIdentifierInfo()) {
1025     IncludeGuard = IG_Rejected;
1026     IncludeGuardToken = nullptr;
1027     parsePPUnknown();
1028     return;
1029   }
1030 
1031   if (IncludeGuard == IG_IfNdefed &&
1032       IncludeGuardToken->TokenText == FormatTok->TokenText) {
1033     IncludeGuard = IG_Defined;
1034     IncludeGuardToken = nullptr;
1035     for (auto &Line : Lines) {
1036       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1037         IncludeGuard = IG_Rejected;
1038         break;
1039       }
1040     }
1041   }
1042 
1043   nextToken();
1044   if (FormatTok->Tok.getKind() == tok::l_paren &&
1045       FormatTok->WhitespaceRange.getBegin() ==
1046           FormatTok->WhitespaceRange.getEnd()) {
1047     parseParens();
1048   }
1049   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1050     Line->Level += PPBranchLevel + 1;
1051   addUnwrappedLine();
1052   ++Line->Level;
1053 
1054   // Errors during a preprocessor directive can only affect the layout of the
1055   // preprocessor directive, and thus we ignore them. An alternative approach
1056   // would be to use the same approach we use on the file level (no
1057   // re-indentation if there was a structural error) within the macro
1058   // definition.
1059   parseFile();
1060 }
1061 
1062 void UnwrappedLineParser::parsePPUnknown() {
1063   do {
1064     nextToken();
1065   } while (!eof());
1066   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1067     Line->Level += PPBranchLevel + 1;
1068   addUnwrappedLine();
1069 }
1070 
1071 // Here we exclude certain tokens that are not usually the first token in an
1072 // unwrapped line. This is used in attempt to distinguish macro calls without
1073 // trailing semicolons from other constructs split to several lines.
1074 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1075   // Semicolon can be a null-statement, l_square can be a start of a macro or
1076   // a C++11 attribute, but this doesn't seem to be common.
1077   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1078          Tok.isNot(TT_AttributeSquare) &&
1079          // Tokens that can only be used as binary operators and a part of
1080          // overloaded operator names.
1081          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1082          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1083          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1084          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1085          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1086          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1087          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1088          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1089          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1090          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1091          Tok.isNot(tok::lesslessequal) &&
1092          // Colon is used in labels, base class lists, initializer lists,
1093          // range-based for loops, ternary operator, but should never be the
1094          // first token in an unwrapped line.
1095          Tok.isNot(tok::colon) &&
1096          // 'noexcept' is a trailing annotation.
1097          Tok.isNot(tok::kw_noexcept);
1098 }
1099 
1100 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1101                           const FormatToken *FormatTok) {
1102   // FIXME: This returns true for C/C++ keywords like 'struct'.
1103   return FormatTok->is(tok::identifier) &&
1104          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1105           !FormatTok->isOneOf(
1106               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1107               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1108               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1109               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1110               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1111               Keywords.kw_instanceof, Keywords.kw_interface,
1112               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1113 }
1114 
1115 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1116                                  const FormatToken *FormatTok) {
1117   return FormatTok->Tok.isLiteral() ||
1118          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1119          mustBeJSIdent(Keywords, FormatTok);
1120 }
1121 
1122 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1123 // when encountered after a value (see mustBeJSIdentOrValue).
1124 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1125                            const FormatToken *FormatTok) {
1126   return FormatTok->isOneOf(
1127       tok::kw_return, Keywords.kw_yield,
1128       // conditionals
1129       tok::kw_if, tok::kw_else,
1130       // loops
1131       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1132       // switch/case
1133       tok::kw_switch, tok::kw_case,
1134       // exceptions
1135       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1136       // declaration
1137       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1138       Keywords.kw_async, Keywords.kw_function,
1139       // import/export
1140       Keywords.kw_import, tok::kw_export);
1141 }
1142 
1143 // Checks whether a token is a type in K&R C (aka C78).
1144 static bool isC78Type(const FormatToken &Tok) {
1145   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1146                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1147                      tok::identifier);
1148 }
1149 
1150 // This function checks whether a token starts the first parameter declaration
1151 // in a K&R C (aka C78) function definition, e.g.:
1152 //   int f(a, b)
1153 //   short a, b;
1154 //   {
1155 //      return a + b;
1156 //   }
1157 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1158                                const FormatToken *FuncName) {
1159   assert(Tok);
1160   assert(Next);
1161   assert(FuncName);
1162 
1163   if (FuncName->isNot(tok::identifier))
1164     return false;
1165 
1166   const FormatToken *Prev = FuncName->Previous;
1167   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1168     return false;
1169 
1170   if (!isC78Type(*Tok) &&
1171       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1172     return false;
1173 
1174   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1175     return false;
1176 
1177   Tok = Tok->Previous;
1178   if (!Tok || Tok->isNot(tok::r_paren))
1179     return false;
1180 
1181   Tok = Tok->Previous;
1182   if (!Tok || Tok->isNot(tok::identifier))
1183     return false;
1184 
1185   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1186 }
1187 
1188 void UnwrappedLineParser::parseModuleImport() {
1189   nextToken();
1190   while (!eof()) {
1191     if (FormatTok->is(tok::colon)) {
1192       FormatTok->setType(TT_ModulePartitionColon);
1193     }
1194     // Handle import <foo/bar.h> as we would an include statement.
1195     else if (FormatTok->is(tok::less)) {
1196       nextToken();
1197       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1198         // Mark tokens up to the trailing line comments as implicit string
1199         // literals.
1200         if (FormatTok->isNot(tok::comment) &&
1201             !FormatTok->TokenText.startswith("//"))
1202           FormatTok->setType(TT_ImplicitStringLiteral);
1203         nextToken();
1204       }
1205     }
1206     if (FormatTok->is(tok::semi)) {
1207       nextToken();
1208       break;
1209     }
1210     nextToken();
1211   }
1212 
1213   addUnwrappedLine();
1214 }
1215 
1216 // readTokenWithJavaScriptASI reads the next token and terminates the current
1217 // line if JavaScript Automatic Semicolon Insertion must
1218 // happen between the current token and the next token.
1219 //
1220 // This method is conservative - it cannot cover all edge cases of JavaScript,
1221 // but only aims to correctly handle certain well known cases. It *must not*
1222 // return true in speculative cases.
1223 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1224   FormatToken *Previous = FormatTok;
1225   readToken();
1226   FormatToken *Next = FormatTok;
1227 
1228   bool IsOnSameLine =
1229       CommentsBeforeNextToken.empty()
1230           ? Next->NewlinesBefore == 0
1231           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1232   if (IsOnSameLine)
1233     return;
1234 
1235   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1236   bool PreviousStartsTemplateExpr =
1237       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1238   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1239     // If the line contains an '@' sign, the previous token might be an
1240     // annotation, which can precede another identifier/value.
1241     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1242       return LineNode.Tok->is(tok::at);
1243     });
1244     if (HasAt)
1245       return;
1246   }
1247   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1248     return addUnwrappedLine();
1249   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1250   bool NextEndsTemplateExpr =
1251       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1252   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1253       (PreviousMustBeValue ||
1254        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1255                          tok::minusminus)))
1256     return addUnwrappedLine();
1257   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1258       isJSDeclOrStmt(Keywords, Next))
1259     return addUnwrappedLine();
1260 }
1261 
1262 void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
1263                                                  bool IsTopLevel) {
1264   if (Style.Language == FormatStyle::LK_TableGen &&
1265       FormatTok->is(tok::pp_include)) {
1266     nextToken();
1267     if (FormatTok->is(tok::string_literal))
1268       nextToken();
1269     addUnwrappedLine();
1270     return;
1271   }
1272   switch (FormatTok->Tok.getKind()) {
1273   case tok::kw_asm:
1274     nextToken();
1275     if (FormatTok->is(tok::l_brace)) {
1276       FormatTok->setType(TT_InlineASMBrace);
1277       nextToken();
1278       while (FormatTok && FormatTok->isNot(tok::eof)) {
1279         if (FormatTok->is(tok::r_brace)) {
1280           FormatTok->setType(TT_InlineASMBrace);
1281           nextToken();
1282           addUnwrappedLine();
1283           break;
1284         }
1285         FormatTok->Finalized = true;
1286         nextToken();
1287       }
1288     }
1289     break;
1290   case tok::kw_namespace:
1291     parseNamespace();
1292     return;
1293   case tok::kw_public:
1294   case tok::kw_protected:
1295   case tok::kw_private:
1296     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1297         Style.isCSharp())
1298       nextToken();
1299     else
1300       parseAccessSpecifier();
1301     return;
1302   case tok::kw_if:
1303     if (Style.isJavaScript() && Line->MustBeDeclaration)
1304       // field/method declaration.
1305       break;
1306     parseIfThenElse(IfKind);
1307     return;
1308   case tok::kw_for:
1309   case tok::kw_while:
1310     if (Style.isJavaScript() && Line->MustBeDeclaration)
1311       // field/method declaration.
1312       break;
1313     parseForOrWhileLoop();
1314     return;
1315   case tok::kw_do:
1316     if (Style.isJavaScript() && Line->MustBeDeclaration)
1317       // field/method declaration.
1318       break;
1319     parseDoWhile();
1320     return;
1321   case tok::kw_switch:
1322     if (Style.isJavaScript() && Line->MustBeDeclaration)
1323       // 'switch: string' field declaration.
1324       break;
1325     parseSwitch();
1326     return;
1327   case tok::kw_default:
1328     if (Style.isJavaScript() && Line->MustBeDeclaration)
1329       // 'default: string' field declaration.
1330       break;
1331     nextToken();
1332     if (FormatTok->is(tok::colon)) {
1333       parseLabel();
1334       return;
1335     }
1336     // e.g. "default void f() {}" in a Java interface.
1337     break;
1338   case tok::kw_case:
1339     if (Style.isJavaScript() && Line->MustBeDeclaration)
1340       // 'case: string' field declaration.
1341       break;
1342     parseCaseLabel();
1343     return;
1344   case tok::kw_try:
1345   case tok::kw___try:
1346     if (Style.isJavaScript() && Line->MustBeDeclaration)
1347       // field/method declaration.
1348       break;
1349     parseTryCatch();
1350     return;
1351   case tok::kw_extern:
1352     nextToken();
1353     if (FormatTok->Tok.is(tok::string_literal)) {
1354       nextToken();
1355       if (FormatTok->Tok.is(tok::l_brace)) {
1356         if (Style.BraceWrapping.AfterExternBlock)
1357           addUnwrappedLine();
1358         // Either we indent or for backwards compatibility we follow the
1359         // AfterExternBlock style.
1360         unsigned AddLevels =
1361             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1362                     (Style.BraceWrapping.AfterExternBlock &&
1363                      Style.IndentExternBlock ==
1364                          FormatStyle::IEBS_AfterExternBlock)
1365                 ? 1u
1366                 : 0u;
1367         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1368         addUnwrappedLine();
1369         return;
1370       }
1371     }
1372     break;
1373   case tok::kw_export:
1374     if (Style.isJavaScript()) {
1375       parseJavaScriptEs6ImportExport();
1376       return;
1377     }
1378     if (!Style.isCpp())
1379       break;
1380     // Handle C++ "(inline|export) namespace".
1381     LLVM_FALLTHROUGH;
1382   case tok::kw_inline:
1383     nextToken();
1384     if (FormatTok->Tok.is(tok::kw_namespace)) {
1385       parseNamespace();
1386       return;
1387     }
1388     break;
1389   case tok::identifier:
1390     if (FormatTok->is(TT_ForEachMacro)) {
1391       parseForOrWhileLoop();
1392       return;
1393     }
1394     if (FormatTok->is(TT_MacroBlockBegin)) {
1395       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1396                  /*MunchSemi=*/false);
1397       return;
1398     }
1399     if (FormatTok->is(Keywords.kw_import)) {
1400       if (Style.isJavaScript()) {
1401         parseJavaScriptEs6ImportExport();
1402         return;
1403       }
1404       if (Style.Language == FormatStyle::LK_Proto) {
1405         nextToken();
1406         if (FormatTok->is(tok::kw_public))
1407           nextToken();
1408         if (!FormatTok->is(tok::string_literal))
1409           return;
1410         nextToken();
1411         if (FormatTok->is(tok::semi))
1412           nextToken();
1413         addUnwrappedLine();
1414         return;
1415       }
1416       if (Style.isCpp()) {
1417         parseModuleImport();
1418         return;
1419       }
1420     }
1421     if (Style.isCpp() &&
1422         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1423                            Keywords.kw_slots, Keywords.kw_qslots)) {
1424       nextToken();
1425       if (FormatTok->is(tok::colon)) {
1426         nextToken();
1427         addUnwrappedLine();
1428         return;
1429       }
1430     }
1431     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1432       parseStatementMacro();
1433       return;
1434     }
1435     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1436       parseNamespace();
1437       return;
1438     }
1439     // In all other cases, parse the declaration.
1440     break;
1441   default:
1442     break;
1443   }
1444   do {
1445     const FormatToken *Previous = FormatTok->Previous;
1446     switch (FormatTok->Tok.getKind()) {
1447     case tok::at:
1448       nextToken();
1449       if (FormatTok->Tok.is(tok::l_brace)) {
1450         nextToken();
1451         parseBracedList();
1452         break;
1453       } else if (Style.Language == FormatStyle::LK_Java &&
1454                  FormatTok->is(Keywords.kw_interface)) {
1455         nextToken();
1456         break;
1457       }
1458       switch (FormatTok->Tok.getObjCKeywordID()) {
1459       case tok::objc_public:
1460       case tok::objc_protected:
1461       case tok::objc_package:
1462       case tok::objc_private:
1463         return parseAccessSpecifier();
1464       case tok::objc_interface:
1465       case tok::objc_implementation:
1466         return parseObjCInterfaceOrImplementation();
1467       case tok::objc_protocol:
1468         if (parseObjCProtocol())
1469           return;
1470         break;
1471       case tok::objc_end:
1472         return; // Handled by the caller.
1473       case tok::objc_optional:
1474       case tok::objc_required:
1475         nextToken();
1476         addUnwrappedLine();
1477         return;
1478       case tok::objc_autoreleasepool:
1479         nextToken();
1480         if (FormatTok->Tok.is(tok::l_brace)) {
1481           if (Style.BraceWrapping.AfterControlStatement ==
1482               FormatStyle::BWACS_Always)
1483             addUnwrappedLine();
1484           parseBlock();
1485         }
1486         addUnwrappedLine();
1487         return;
1488       case tok::objc_synchronized:
1489         nextToken();
1490         if (FormatTok->Tok.is(tok::l_paren))
1491           // Skip synchronization object
1492           parseParens();
1493         if (FormatTok->Tok.is(tok::l_brace)) {
1494           if (Style.BraceWrapping.AfterControlStatement ==
1495               FormatStyle::BWACS_Always)
1496             addUnwrappedLine();
1497           parseBlock();
1498         }
1499         addUnwrappedLine();
1500         return;
1501       case tok::objc_try:
1502         // This branch isn't strictly necessary (the kw_try case below would
1503         // do this too after the tok::at is parsed above).  But be explicit.
1504         parseTryCatch();
1505         return;
1506       default:
1507         break;
1508       }
1509       break;
1510     case tok::kw_concept:
1511       parseConcept();
1512       return;
1513     case tok::kw_requires:
1514       parseRequires();
1515       return;
1516     case tok::kw_enum:
1517       // Ignore if this is part of "template <enum ...".
1518       if (Previous && Previous->is(tok::less)) {
1519         nextToken();
1520         break;
1521       }
1522 
1523       // parseEnum falls through and does not yet add an unwrapped line as an
1524       // enum definition can start a structural element.
1525       if (!parseEnum())
1526         break;
1527       // This only applies for C++.
1528       if (!Style.isCpp()) {
1529         addUnwrappedLine();
1530         return;
1531       }
1532       break;
1533     case tok::kw_typedef:
1534       nextToken();
1535       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1536                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1537                              Keywords.kw_CF_CLOSED_ENUM,
1538                              Keywords.kw_NS_CLOSED_ENUM))
1539         parseEnum();
1540       break;
1541     case tok::kw_struct:
1542     case tok::kw_union:
1543     case tok::kw_class:
1544       if (parseStructLike()) {
1545         return;
1546       }
1547       break;
1548     case tok::period:
1549       nextToken();
1550       // In Java, classes have an implicit static member "class".
1551       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1552           FormatTok->is(tok::kw_class))
1553         nextToken();
1554       if (Style.isJavaScript() && FormatTok &&
1555           FormatTok->Tok.getIdentifierInfo())
1556         // JavaScript only has pseudo keywords, all keywords are allowed to
1557         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1558         nextToken();
1559       break;
1560     case tok::semi:
1561       nextToken();
1562       addUnwrappedLine();
1563       return;
1564     case tok::r_brace:
1565       addUnwrappedLine();
1566       return;
1567     case tok::l_paren: {
1568       parseParens();
1569       // Break the unwrapped line if a K&R C function definition has a parameter
1570       // declaration.
1571       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1572         break;
1573       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1574         addUnwrappedLine();
1575         return;
1576       }
1577       break;
1578     }
1579     case tok::kw_operator:
1580       nextToken();
1581       if (FormatTok->isBinaryOperator())
1582         nextToken();
1583       break;
1584     case tok::caret:
1585       nextToken();
1586       if (FormatTok->Tok.isAnyIdentifier() ||
1587           FormatTok->isSimpleTypeSpecifier())
1588         nextToken();
1589       if (FormatTok->is(tok::l_paren))
1590         parseParens();
1591       if (FormatTok->is(tok::l_brace))
1592         parseChildBlock();
1593       break;
1594     case tok::l_brace:
1595       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1596         // A block outside of parentheses must be the last part of a
1597         // structural element.
1598         // FIXME: Figure out cases where this is not true, and add projections
1599         // for them (the one we know is missing are lambdas).
1600         if (Style.Language == FormatStyle::LK_Java &&
1601             Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1602           // If necessary, we could set the type to something different than
1603           // TT_FunctionLBrace.
1604           if (Style.BraceWrapping.AfterControlStatement ==
1605               FormatStyle::BWACS_Always)
1606             addUnwrappedLine();
1607         } else if (Style.BraceWrapping.AfterFunction) {
1608           addUnwrappedLine();
1609         }
1610         FormatTok->setType(TT_FunctionLBrace);
1611         parseBlock();
1612         addUnwrappedLine();
1613         return;
1614       }
1615       // Otherwise this was a braced init list, and the structural
1616       // element continues.
1617       break;
1618     case tok::kw_try:
1619       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1620         // field/method declaration.
1621         nextToken();
1622         break;
1623       }
1624       // We arrive here when parsing function-try blocks.
1625       if (Style.BraceWrapping.AfterFunction)
1626         addUnwrappedLine();
1627       parseTryCatch();
1628       return;
1629     case tok::identifier: {
1630       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1631           Line->MustBeDeclaration) {
1632         addUnwrappedLine();
1633         parseCSharpGenericTypeConstraint();
1634         break;
1635       }
1636       if (FormatTok->is(TT_MacroBlockEnd)) {
1637         addUnwrappedLine();
1638         return;
1639       }
1640 
1641       // Function declarations (as opposed to function expressions) are parsed
1642       // on their own unwrapped line by continuing this loop. Function
1643       // expressions (functions that are not on their own line) must not create
1644       // a new unwrapped line, so they are special cased below.
1645       size_t TokenCount = Line->Tokens.size();
1646       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1647           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1648                                                      Keywords.kw_async)))) {
1649         tryToParseJSFunction();
1650         break;
1651       }
1652       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1653           FormatTok->is(Keywords.kw_interface)) {
1654         if (Style.isJavaScript()) {
1655           // In JavaScript/TypeScript, "interface" can be used as a standalone
1656           // identifier, e.g. in `var interface = 1;`. If "interface" is
1657           // followed by another identifier, it is very like to be an actual
1658           // interface declaration.
1659           unsigned StoredPosition = Tokens->getPosition();
1660           FormatToken *Next = Tokens->getNextToken();
1661           FormatTok = Tokens->setPosition(StoredPosition);
1662           if (!mustBeJSIdent(Keywords, Next)) {
1663             nextToken();
1664             break;
1665           }
1666         }
1667         parseRecord();
1668         addUnwrappedLine();
1669         return;
1670       }
1671 
1672       if (FormatTok->is(Keywords.kw_interface)) {
1673         if (parseStructLike()) {
1674           return;
1675         }
1676         break;
1677       }
1678 
1679       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1680         parseStatementMacro();
1681         return;
1682       }
1683 
1684       // See if the following token should start a new unwrapped line.
1685       StringRef Text = FormatTok->TokenText;
1686       nextToken();
1687 
1688       // JS doesn't have macros, and within classes colons indicate fields, not
1689       // labels.
1690       if (Style.isJavaScript())
1691         break;
1692 
1693       TokenCount = Line->Tokens.size();
1694       if (TokenCount == 1 ||
1695           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1696         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1697           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1698           parseLabel(!Style.IndentGotoLabels);
1699           return;
1700         }
1701         // Recognize function-like macro usages without trailing semicolon as
1702         // well as free-standing macros like Q_OBJECT.
1703         bool FunctionLike = FormatTok->is(tok::l_paren);
1704         if (FunctionLike)
1705           parseParens();
1706 
1707         bool FollowedByNewline =
1708             CommentsBeforeNextToken.empty()
1709                 ? FormatTok->NewlinesBefore > 0
1710                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1711 
1712         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1713             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1714           addUnwrappedLine();
1715           return;
1716         }
1717       }
1718       break;
1719     }
1720     case tok::equal:
1721       if ((Style.isJavaScript() || Style.isCSharp()) &&
1722           FormatTok->is(TT_FatArrow)) {
1723         tryToParseChildBlock();
1724         break;
1725       }
1726 
1727       nextToken();
1728       if (FormatTok->Tok.is(tok::l_brace)) {
1729         // Block kind should probably be set to BK_BracedInit for any language.
1730         // C# needs this change to ensure that array initialisers and object
1731         // initialisers are indented the same way.
1732         if (Style.isCSharp())
1733           FormatTok->setBlockKind(BK_BracedInit);
1734         nextToken();
1735         parseBracedList();
1736       } else if (Style.Language == FormatStyle::LK_Proto &&
1737                  FormatTok->Tok.is(tok::less)) {
1738         nextToken();
1739         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1740                         /*ClosingBraceKind=*/tok::greater);
1741       }
1742       break;
1743     case tok::l_square:
1744       parseSquare();
1745       break;
1746     case tok::kw_new:
1747       parseNew();
1748       break;
1749     default:
1750       nextToken();
1751       break;
1752     }
1753   } while (!eof());
1754 }
1755 
1756 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1757   assert(FormatTok->is(tok::l_brace));
1758   if (!Style.isCSharp())
1759     return false;
1760   // See if it's a property accessor.
1761   if (FormatTok->Previous->isNot(tok::identifier))
1762     return false;
1763 
1764   // See if we are inside a property accessor.
1765   //
1766   // Record the current tokenPosition so that we can advance and
1767   // reset the current token. `Next` is not set yet so we need
1768   // another way to advance along the token stream.
1769   unsigned int StoredPosition = Tokens->getPosition();
1770   FormatToken *Tok = Tokens->getNextToken();
1771 
1772   // A trivial property accessor is of the form:
1773   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1774   // Track these as they do not require line breaks to be introduced.
1775   bool HasGetOrSet = false;
1776   bool IsTrivialPropertyAccessor = true;
1777   while (!eof()) {
1778     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1779                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1780                      Keywords.kw_set)) {
1781       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1782         HasGetOrSet = true;
1783       Tok = Tokens->getNextToken();
1784       continue;
1785     }
1786     if (Tok->isNot(tok::r_brace))
1787       IsTrivialPropertyAccessor = false;
1788     break;
1789   }
1790 
1791   if (!HasGetOrSet) {
1792     Tokens->setPosition(StoredPosition);
1793     return false;
1794   }
1795 
1796   // Try to parse the property accessor:
1797   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1798   Tokens->setPosition(StoredPosition);
1799   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1800     addUnwrappedLine();
1801   nextToken();
1802   do {
1803     switch (FormatTok->Tok.getKind()) {
1804     case tok::r_brace:
1805       nextToken();
1806       if (FormatTok->is(tok::equal)) {
1807         while (!eof() && FormatTok->isNot(tok::semi))
1808           nextToken();
1809         nextToken();
1810       }
1811       addUnwrappedLine();
1812       return true;
1813     case tok::l_brace:
1814       ++Line->Level;
1815       parseBlock(/*MustBeDeclaration=*/true);
1816       addUnwrappedLine();
1817       --Line->Level;
1818       break;
1819     case tok::equal:
1820       if (FormatTok->is(TT_FatArrow)) {
1821         ++Line->Level;
1822         do {
1823           nextToken();
1824         } while (!eof() && FormatTok->isNot(tok::semi));
1825         nextToken();
1826         addUnwrappedLine();
1827         --Line->Level;
1828         break;
1829       }
1830       nextToken();
1831       break;
1832     default:
1833       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1834           !IsTrivialPropertyAccessor) {
1835         // Non-trivial get/set needs to be on its own line.
1836         addUnwrappedLine();
1837       }
1838       nextToken();
1839     }
1840   } while (!eof());
1841 
1842   // Unreachable for well-formed code (paired '{' and '}').
1843   return true;
1844 }
1845 
1846 bool UnwrappedLineParser::tryToParseLambda() {
1847   if (!Style.isCpp()) {
1848     nextToken();
1849     return false;
1850   }
1851   assert(FormatTok->is(tok::l_square));
1852   FormatToken &LSquare = *FormatTok;
1853   if (!tryToParseLambdaIntroducer())
1854     return false;
1855 
1856   bool SeenArrow = false;
1857 
1858   while (FormatTok->isNot(tok::l_brace)) {
1859     if (FormatTok->isSimpleTypeSpecifier()) {
1860       nextToken();
1861       continue;
1862     }
1863     switch (FormatTok->Tok.getKind()) {
1864     case tok::l_brace:
1865       break;
1866     case tok::l_paren:
1867       parseParens();
1868       break;
1869     case tok::l_square:
1870       parseSquare();
1871       break;
1872     case tok::amp:
1873     case tok::star:
1874     case tok::kw_const:
1875     case tok::comma:
1876     case tok::less:
1877     case tok::greater:
1878     case tok::identifier:
1879     case tok::numeric_constant:
1880     case tok::coloncolon:
1881     case tok::kw_class:
1882     case tok::kw_mutable:
1883     case tok::kw_noexcept:
1884     case tok::kw_template:
1885     case tok::kw_typename:
1886       nextToken();
1887       break;
1888     // Specialization of a template with an integer parameter can contain
1889     // arithmetic, logical, comparison and ternary operators.
1890     //
1891     // FIXME: This also accepts sequences of operators that are not in the scope
1892     // of a template argument list.
1893     //
1894     // In a C++ lambda a template type can only occur after an arrow. We use
1895     // this as an heuristic to distinguish between Objective-C expressions
1896     // followed by an `a->b` expression, such as:
1897     // ([obj func:arg] + a->b)
1898     // Otherwise the code below would parse as a lambda.
1899     //
1900     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1901     // explicit template lists: []<bool b = true && false>(U &&u){}
1902     case tok::plus:
1903     case tok::minus:
1904     case tok::exclaim:
1905     case tok::tilde:
1906     case tok::slash:
1907     case tok::percent:
1908     case tok::lessless:
1909     case tok::pipe:
1910     case tok::pipepipe:
1911     case tok::ampamp:
1912     case tok::caret:
1913     case tok::equalequal:
1914     case tok::exclaimequal:
1915     case tok::greaterequal:
1916     case tok::lessequal:
1917     case tok::question:
1918     case tok::colon:
1919     case tok::ellipsis:
1920     case tok::kw_true:
1921     case tok::kw_false:
1922       if (SeenArrow) {
1923         nextToken();
1924         break;
1925       }
1926       return true;
1927     case tok::arrow:
1928       // This might or might not actually be a lambda arrow (this could be an
1929       // ObjC method invocation followed by a dereferencing arrow). We might
1930       // reset this back to TT_Unknown in TokenAnnotator.
1931       FormatTok->setType(TT_LambdaArrow);
1932       SeenArrow = true;
1933       nextToken();
1934       break;
1935     default:
1936       return true;
1937     }
1938   }
1939   FormatTok->setType(TT_LambdaLBrace);
1940   LSquare.setType(TT_LambdaLSquare);
1941   parseChildBlock();
1942   return true;
1943 }
1944 
1945 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1946   const FormatToken *Previous = FormatTok->Previous;
1947   if (Previous &&
1948       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1949                          tok::kw_delete, tok::l_square) ||
1950        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1951        Previous->isSimpleTypeSpecifier())) {
1952     nextToken();
1953     return false;
1954   }
1955   nextToken();
1956   if (FormatTok->is(tok::l_square)) {
1957     return false;
1958   }
1959   parseSquare(/*LambdaIntroducer=*/true);
1960   return true;
1961 }
1962 
1963 void UnwrappedLineParser::tryToParseJSFunction() {
1964   assert(FormatTok->is(Keywords.kw_function) ||
1965          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1966   if (FormatTok->is(Keywords.kw_async))
1967     nextToken();
1968   // Consume "function".
1969   nextToken();
1970 
1971   // Consume * (generator function). Treat it like C++'s overloaded operators.
1972   if (FormatTok->is(tok::star)) {
1973     FormatTok->setType(TT_OverloadedOperator);
1974     nextToken();
1975   }
1976 
1977   // Consume function name.
1978   if (FormatTok->is(tok::identifier))
1979     nextToken();
1980 
1981   if (FormatTok->isNot(tok::l_paren))
1982     return;
1983 
1984   // Parse formal parameter list.
1985   parseParens();
1986 
1987   if (FormatTok->is(tok::colon)) {
1988     // Parse a type definition.
1989     nextToken();
1990 
1991     // Eat the type declaration. For braced inline object types, balance braces,
1992     // otherwise just parse until finding an l_brace for the function body.
1993     if (FormatTok->is(tok::l_brace))
1994       tryToParseBracedList();
1995     else
1996       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1997         nextToken();
1998   }
1999 
2000   if (FormatTok->is(tok::semi))
2001     return;
2002 
2003   parseChildBlock();
2004 }
2005 
2006 bool UnwrappedLineParser::tryToParseBracedList() {
2007   if (FormatTok->is(BK_Unknown))
2008     calculateBraceTypes();
2009   assert(FormatTok->isNot(BK_Unknown));
2010   if (FormatTok->is(BK_Block))
2011     return false;
2012   nextToken();
2013   parseBracedList();
2014   return true;
2015 }
2016 
2017 bool UnwrappedLineParser::tryToParseChildBlock() {
2018   assert(Style.isJavaScript() || Style.isCSharp());
2019   assert(FormatTok->is(TT_FatArrow));
2020   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2021   // They always start an expression or a child block if followed by a curly
2022   // brace.
2023   nextToken();
2024   if (FormatTok->isNot(tok::l_brace))
2025     return false;
2026   parseChildBlock();
2027   return true;
2028 }
2029 
2030 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2031                                           bool IsEnum,
2032                                           tok::TokenKind ClosingBraceKind) {
2033   bool HasError = false;
2034 
2035   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2036   // replace this by using parseAssignmentExpression() inside.
2037   do {
2038     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2039         tryToParseChildBlock())
2040       continue;
2041     if (Style.isJavaScript()) {
2042       if (FormatTok->is(Keywords.kw_function) ||
2043           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2044         tryToParseJSFunction();
2045         continue;
2046       }
2047       if (FormatTok->is(tok::l_brace)) {
2048         // Could be a method inside of a braced list `{a() { return 1; }}`.
2049         if (tryToParseBracedList())
2050           continue;
2051         parseChildBlock();
2052       }
2053     }
2054     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2055       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2056         addUnwrappedLine();
2057       nextToken();
2058       return !HasError;
2059     }
2060     switch (FormatTok->Tok.getKind()) {
2061     case tok::l_square:
2062       if (Style.isCSharp())
2063         parseSquare();
2064       else
2065         tryToParseLambda();
2066       break;
2067     case tok::l_paren:
2068       parseParens();
2069       // JavaScript can just have free standing methods and getters/setters in
2070       // object literals. Detect them by a "{" following ")".
2071       if (Style.isJavaScript()) {
2072         if (FormatTok->is(tok::l_brace))
2073           parseChildBlock();
2074         break;
2075       }
2076       break;
2077     case tok::l_brace:
2078       // Assume there are no blocks inside a braced init list apart
2079       // from the ones we explicitly parse out (like lambdas).
2080       FormatTok->setBlockKind(BK_BracedInit);
2081       nextToken();
2082       parseBracedList();
2083       break;
2084     case tok::less:
2085       if (Style.Language == FormatStyle::LK_Proto) {
2086         nextToken();
2087         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2088                         /*ClosingBraceKind=*/tok::greater);
2089       } else {
2090         nextToken();
2091       }
2092       break;
2093     case tok::semi:
2094       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2095       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2096       // used for error recovery if we have otherwise determined that this is
2097       // a braced list.
2098       if (Style.isJavaScript()) {
2099         nextToken();
2100         break;
2101       }
2102       HasError = true;
2103       if (!ContinueOnSemicolons)
2104         return !HasError;
2105       nextToken();
2106       break;
2107     case tok::comma:
2108       nextToken();
2109       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2110         addUnwrappedLine();
2111       break;
2112     default:
2113       nextToken();
2114       break;
2115     }
2116   } while (!eof());
2117   return false;
2118 }
2119 
2120 void UnwrappedLineParser::parseParens() {
2121   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2122   nextToken();
2123   do {
2124     switch (FormatTok->Tok.getKind()) {
2125     case tok::l_paren:
2126       parseParens();
2127       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2128         parseChildBlock();
2129       break;
2130     case tok::r_paren:
2131       nextToken();
2132       return;
2133     case tok::r_brace:
2134       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2135       return;
2136     case tok::l_square:
2137       tryToParseLambda();
2138       break;
2139     case tok::l_brace:
2140       if (!tryToParseBracedList())
2141         parseChildBlock();
2142       break;
2143     case tok::at:
2144       nextToken();
2145       if (FormatTok->Tok.is(tok::l_brace)) {
2146         nextToken();
2147         parseBracedList();
2148       }
2149       break;
2150     case tok::equal:
2151       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2152         tryToParseChildBlock();
2153       else
2154         nextToken();
2155       break;
2156     case tok::kw_class:
2157       if (Style.isJavaScript())
2158         parseRecord(/*ParseAsExpr=*/true);
2159       else
2160         nextToken();
2161       break;
2162     case tok::identifier:
2163       if (Style.isJavaScript() &&
2164           (FormatTok->is(Keywords.kw_function) ||
2165            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2166         tryToParseJSFunction();
2167       else
2168         nextToken();
2169       break;
2170     default:
2171       nextToken();
2172       break;
2173     }
2174   } while (!eof());
2175 }
2176 
2177 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2178   if (!LambdaIntroducer) {
2179     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2180     if (tryToParseLambda())
2181       return;
2182   }
2183   do {
2184     switch (FormatTok->Tok.getKind()) {
2185     case tok::l_paren:
2186       parseParens();
2187       break;
2188     case tok::r_square:
2189       nextToken();
2190       return;
2191     case tok::r_brace:
2192       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2193       return;
2194     case tok::l_square:
2195       parseSquare();
2196       break;
2197     case tok::l_brace: {
2198       if (!tryToParseBracedList())
2199         parseChildBlock();
2200       break;
2201     }
2202     case tok::at:
2203       nextToken();
2204       if (FormatTok->Tok.is(tok::l_brace)) {
2205         nextToken();
2206         parseBracedList();
2207       }
2208       break;
2209     default:
2210       nextToken();
2211       break;
2212     }
2213   } while (!eof());
2214 }
2215 
2216 void UnwrappedLineParser::keepAncestorBraces() {
2217   if (!Style.RemoveBracesLLVM)
2218     return;
2219 
2220   const int MaxNestingLevels = 2;
2221   const int Size = NestedTooDeep.size();
2222   if (Size >= MaxNestingLevels)
2223     NestedTooDeep[Size - MaxNestingLevels] = true;
2224   NestedTooDeep.push_back(false);
2225 }
2226 
2227 static void markOptionalBraces(FormatToken *LeftBrace) {
2228   if (!LeftBrace)
2229     return;
2230 
2231   assert(LeftBrace->is(tok::l_brace));
2232 
2233   FormatToken *RightBrace = LeftBrace->MatchingParen;
2234   if (!RightBrace) {
2235     assert(!LeftBrace->Optional);
2236     return;
2237   }
2238 
2239   assert(RightBrace->is(tok::r_brace));
2240   assert(RightBrace->MatchingParen == LeftBrace);
2241   assert(LeftBrace->Optional == RightBrace->Optional);
2242 
2243   LeftBrace->Optional = true;
2244   RightBrace->Optional = true;
2245 }
2246 
2247 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2248                                                   bool KeepBraces) {
2249   auto HandleAttributes = [this]() {
2250     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2251     if (FormatTok->is(TT_AttributeMacro))
2252       nextToken();
2253     // Handle [[likely]] / [[unlikely]] attributes.
2254     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2255       parseSquare();
2256   };
2257 
2258   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2259   nextToken();
2260   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2261     nextToken();
2262   if (FormatTok->Tok.is(tok::l_paren))
2263     parseParens();
2264   HandleAttributes();
2265 
2266   bool NeedsUnwrappedLine = false;
2267   keepAncestorBraces();
2268 
2269   FormatToken *IfLeftBrace = nullptr;
2270   IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2271 
2272   if (FormatTok->Tok.is(tok::l_brace)) {
2273     IfLeftBrace = FormatTok;
2274     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2275     IfBlockKind = parseBlock();
2276     if (Style.BraceWrapping.BeforeElse)
2277       addUnwrappedLine();
2278     else
2279       NeedsUnwrappedLine = true;
2280   } else {
2281     addUnwrappedLine();
2282     ++Line->Level;
2283     parseStructuralElement();
2284     --Line->Level;
2285   }
2286 
2287   bool KeepIfBraces = false;
2288   if (Style.RemoveBracesLLVM) {
2289     assert(!NestedTooDeep.empty());
2290     KeepIfBraces = (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2291                    NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2292                    IfBlockKind == IfStmtKind::IfElseIf;
2293   }
2294 
2295   FormatToken *ElseLeftBrace = nullptr;
2296   IfStmtKind Kind = IfStmtKind::IfOnly;
2297 
2298   if (FormatTok->Tok.is(tok::kw_else)) {
2299     if (Style.RemoveBracesLLVM) {
2300       NestedTooDeep.back() = false;
2301       Kind = IfStmtKind::IfElse;
2302     }
2303     nextToken();
2304     HandleAttributes();
2305     if (FormatTok->Tok.is(tok::l_brace)) {
2306       ElseLeftBrace = FormatTok;
2307       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2308       if (parseBlock() == IfStmtKind::IfOnly)
2309         Kind = IfStmtKind::IfElseIf;
2310       addUnwrappedLine();
2311     } else if (FormatTok->Tok.is(tok::kw_if)) {
2312       FormatToken *Previous = Tokens->getPreviousToken();
2313       const bool IsPrecededByComment = Previous && Previous->is(tok::comment);
2314       if (IsPrecededByComment) {
2315         addUnwrappedLine();
2316         ++Line->Level;
2317       }
2318       bool TooDeep = true;
2319       if (Style.RemoveBracesLLVM) {
2320         Kind = IfStmtKind::IfElseIf;
2321         TooDeep = NestedTooDeep.pop_back_val();
2322       }
2323       ElseLeftBrace =
2324           parseIfThenElse(/*IfKind=*/nullptr, KeepBraces || KeepIfBraces);
2325       if (Style.RemoveBracesLLVM)
2326         NestedTooDeep.push_back(TooDeep);
2327       if (IsPrecededByComment)
2328         --Line->Level;
2329     } else {
2330       addUnwrappedLine();
2331       ++Line->Level;
2332       parseStructuralElement();
2333       if (FormatTok->is(tok::eof))
2334         addUnwrappedLine();
2335       --Line->Level;
2336     }
2337   } else {
2338     if (Style.RemoveBracesLLVM)
2339       KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2340     if (NeedsUnwrappedLine)
2341       addUnwrappedLine();
2342   }
2343 
2344   if (!Style.RemoveBracesLLVM)
2345     return nullptr;
2346 
2347   assert(!NestedTooDeep.empty());
2348   const bool KeepElseBraces =
2349       (ElseLeftBrace && !ElseLeftBrace->MatchingParen) || NestedTooDeep.back();
2350 
2351   NestedTooDeep.pop_back();
2352 
2353   if (!KeepBraces && !KeepIfBraces && !KeepElseBraces) {
2354     markOptionalBraces(IfLeftBrace);
2355     markOptionalBraces(ElseLeftBrace);
2356   } else if (IfLeftBrace) {
2357     FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2358     if (IfRightBrace) {
2359       assert(IfRightBrace->MatchingParen == IfLeftBrace);
2360       assert(!IfLeftBrace->Optional);
2361       assert(!IfRightBrace->Optional);
2362       IfLeftBrace->MatchingParen = nullptr;
2363       IfRightBrace->MatchingParen = nullptr;
2364     }
2365   }
2366 
2367   if (IfKind)
2368     *IfKind = Kind;
2369 
2370   return IfLeftBrace;
2371 }
2372 
2373 void UnwrappedLineParser::parseTryCatch() {
2374   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2375   nextToken();
2376   bool NeedsUnwrappedLine = false;
2377   if (FormatTok->is(tok::colon)) {
2378     // We are in a function try block, what comes is an initializer list.
2379     nextToken();
2380 
2381     // In case identifiers were removed by clang-tidy, what might follow is
2382     // multiple commas in sequence - before the first identifier.
2383     while (FormatTok->is(tok::comma))
2384       nextToken();
2385 
2386     while (FormatTok->is(tok::identifier)) {
2387       nextToken();
2388       if (FormatTok->is(tok::l_paren))
2389         parseParens();
2390       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2391           FormatTok->is(tok::l_brace)) {
2392         do {
2393           nextToken();
2394         } while (!FormatTok->is(tok::r_brace));
2395         nextToken();
2396       }
2397 
2398       // In case identifiers were removed by clang-tidy, what might follow is
2399       // multiple commas in sequence - after the first identifier.
2400       while (FormatTok->is(tok::comma))
2401         nextToken();
2402     }
2403   }
2404   // Parse try with resource.
2405   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2406     parseParens();
2407   }
2408 
2409   keepAncestorBraces();
2410 
2411   if (FormatTok->is(tok::l_brace)) {
2412     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2413     parseBlock();
2414     if (Style.BraceWrapping.BeforeCatch) {
2415       addUnwrappedLine();
2416     } else {
2417       NeedsUnwrappedLine = true;
2418     }
2419   } else if (!FormatTok->is(tok::kw_catch)) {
2420     // The C++ standard requires a compound-statement after a try.
2421     // If there's none, we try to assume there's a structuralElement
2422     // and try to continue.
2423     addUnwrappedLine();
2424     ++Line->Level;
2425     parseStructuralElement();
2426     --Line->Level;
2427   }
2428   while (true) {
2429     if (FormatTok->is(tok::at))
2430       nextToken();
2431     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2432                              tok::kw___finally) ||
2433           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2434            FormatTok->is(Keywords.kw_finally)) ||
2435           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2436            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2437       break;
2438     nextToken();
2439     while (FormatTok->isNot(tok::l_brace)) {
2440       if (FormatTok->is(tok::l_paren)) {
2441         parseParens();
2442         continue;
2443       }
2444       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2445         if (Style.RemoveBracesLLVM)
2446           NestedTooDeep.pop_back();
2447         return;
2448       }
2449       nextToken();
2450     }
2451     NeedsUnwrappedLine = false;
2452     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2453     parseBlock();
2454     if (Style.BraceWrapping.BeforeCatch)
2455       addUnwrappedLine();
2456     else
2457       NeedsUnwrappedLine = true;
2458   }
2459 
2460   if (Style.RemoveBracesLLVM)
2461     NestedTooDeep.pop_back();
2462 
2463   if (NeedsUnwrappedLine)
2464     addUnwrappedLine();
2465 }
2466 
2467 void UnwrappedLineParser::parseNamespace() {
2468   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2469          "'namespace' expected");
2470 
2471   const FormatToken &InitialToken = *FormatTok;
2472   nextToken();
2473   if (InitialToken.is(TT_NamespaceMacro)) {
2474     parseParens();
2475   } else {
2476     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2477                               tok::l_square, tok::period)) {
2478       if (FormatTok->is(tok::l_square))
2479         parseSquare();
2480       else
2481         nextToken();
2482     }
2483   }
2484   if (FormatTok->Tok.is(tok::l_brace)) {
2485     if (ShouldBreakBeforeBrace(Style, InitialToken))
2486       addUnwrappedLine();
2487 
2488     unsigned AddLevels =
2489         Style.NamespaceIndentation == FormatStyle::NI_All ||
2490                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2491                  DeclarationScopeStack.size() > 1)
2492             ? 1u
2493             : 0u;
2494     bool ManageWhitesmithsBraces =
2495         AddLevels == 0u &&
2496         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2497 
2498     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2499     // the whole block.
2500     if (ManageWhitesmithsBraces)
2501       ++Line->Level;
2502 
2503     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2504                /*MunchSemi=*/true,
2505                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2506 
2507     // Munch the semicolon after a namespace. This is more common than one would
2508     // think. Putting the semicolon into its own line is very ugly.
2509     if (FormatTok->Tok.is(tok::semi))
2510       nextToken();
2511 
2512     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2513 
2514     if (ManageWhitesmithsBraces)
2515       --Line->Level;
2516   }
2517   // FIXME: Add error handling.
2518 }
2519 
2520 void UnwrappedLineParser::parseNew() {
2521   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2522   nextToken();
2523 
2524   if (Style.isCSharp()) {
2525     do {
2526       if (FormatTok->is(tok::l_brace))
2527         parseBracedList();
2528 
2529       if (FormatTok->isOneOf(tok::semi, tok::comma))
2530         return;
2531 
2532       nextToken();
2533     } while (!eof());
2534   }
2535 
2536   if (Style.Language != FormatStyle::LK_Java)
2537     return;
2538 
2539   // In Java, we can parse everything up to the parens, which aren't optional.
2540   do {
2541     // There should not be a ;, { or } before the new's open paren.
2542     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2543       return;
2544 
2545     // Consume the parens.
2546     if (FormatTok->is(tok::l_paren)) {
2547       parseParens();
2548 
2549       // If there is a class body of an anonymous class, consume that as child.
2550       if (FormatTok->is(tok::l_brace))
2551         parseChildBlock();
2552       return;
2553     }
2554     nextToken();
2555   } while (!eof());
2556 }
2557 
2558 void UnwrappedLineParser::parseForOrWhileLoop() {
2559   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2560          "'for', 'while' or foreach macro expected");
2561   nextToken();
2562   // JS' for await ( ...
2563   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2564     nextToken();
2565   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2566     nextToken();
2567   if (FormatTok->Tok.is(tok::l_paren))
2568     parseParens();
2569 
2570   keepAncestorBraces();
2571 
2572   if (FormatTok->Tok.is(tok::l_brace)) {
2573     FormatToken *LeftBrace = FormatTok;
2574     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2575     parseBlock();
2576     if (Style.RemoveBracesLLVM) {
2577       assert(!NestedTooDeep.empty());
2578       if (!NestedTooDeep.back())
2579         markOptionalBraces(LeftBrace);
2580     }
2581     addUnwrappedLine();
2582   } else {
2583     addUnwrappedLine();
2584     ++Line->Level;
2585     parseStructuralElement();
2586     --Line->Level;
2587   }
2588 
2589   if (Style.RemoveBracesLLVM)
2590     NestedTooDeep.pop_back();
2591 }
2592 
2593 void UnwrappedLineParser::parseDoWhile() {
2594   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2595   nextToken();
2596 
2597   keepAncestorBraces();
2598 
2599   if (FormatTok->Tok.is(tok::l_brace)) {
2600     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2601     parseBlock();
2602     if (Style.BraceWrapping.BeforeWhile)
2603       addUnwrappedLine();
2604   } else {
2605     addUnwrappedLine();
2606     ++Line->Level;
2607     parseStructuralElement();
2608     --Line->Level;
2609   }
2610 
2611   if (Style.RemoveBracesLLVM)
2612     NestedTooDeep.pop_back();
2613 
2614   // FIXME: Add error handling.
2615   if (!FormatTok->Tok.is(tok::kw_while)) {
2616     addUnwrappedLine();
2617     return;
2618   }
2619 
2620   // If in Whitesmiths mode, the line with the while() needs to be indented
2621   // to the same level as the block.
2622   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2623     ++Line->Level;
2624 
2625   nextToken();
2626   parseStructuralElement();
2627 }
2628 
2629 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2630   nextToken();
2631   unsigned OldLineLevel = Line->Level;
2632   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2633     --Line->Level;
2634   if (LeftAlignLabel)
2635     Line->Level = 0;
2636 
2637   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2638       FormatTok->Tok.is(tok::l_brace)) {
2639 
2640     CompoundStatementIndenter Indenter(this, Line->Level,
2641                                        Style.BraceWrapping.AfterCaseLabel,
2642                                        Style.BraceWrapping.IndentBraces);
2643     parseBlock();
2644     if (FormatTok->Tok.is(tok::kw_break)) {
2645       if (Style.BraceWrapping.AfterControlStatement ==
2646           FormatStyle::BWACS_Always) {
2647         addUnwrappedLine();
2648         if (!Style.IndentCaseBlocks &&
2649             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2650           ++Line->Level;
2651         }
2652       }
2653       parseStructuralElement();
2654     }
2655     addUnwrappedLine();
2656   } else {
2657     if (FormatTok->is(tok::semi))
2658       nextToken();
2659     addUnwrappedLine();
2660   }
2661   Line->Level = OldLineLevel;
2662   if (FormatTok->isNot(tok::l_brace)) {
2663     parseStructuralElement();
2664     addUnwrappedLine();
2665   }
2666 }
2667 
2668 void UnwrappedLineParser::parseCaseLabel() {
2669   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2670 
2671   // FIXME: fix handling of complex expressions here.
2672   do {
2673     nextToken();
2674   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2675   parseLabel();
2676 }
2677 
2678 void UnwrappedLineParser::parseSwitch() {
2679   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2680   nextToken();
2681   if (FormatTok->Tok.is(tok::l_paren))
2682     parseParens();
2683 
2684   keepAncestorBraces();
2685 
2686   if (FormatTok->Tok.is(tok::l_brace)) {
2687     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2688     parseBlock();
2689     addUnwrappedLine();
2690   } else {
2691     addUnwrappedLine();
2692     ++Line->Level;
2693     parseStructuralElement();
2694     --Line->Level;
2695   }
2696 
2697   if (Style.RemoveBracesLLVM)
2698     NestedTooDeep.pop_back();
2699 }
2700 
2701 void UnwrappedLineParser::parseAccessSpecifier() {
2702   nextToken();
2703   // Understand Qt's slots.
2704   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2705     nextToken();
2706   // Otherwise, we don't know what it is, and we'd better keep the next token.
2707   if (FormatTok->Tok.is(tok::colon))
2708     nextToken();
2709   addUnwrappedLine();
2710 }
2711 
2712 void UnwrappedLineParser::parseConcept() {
2713   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2714   nextToken();
2715   if (!FormatTok->Tok.is(tok::identifier))
2716     return;
2717   nextToken();
2718   if (!FormatTok->Tok.is(tok::equal))
2719     return;
2720   nextToken();
2721   if (FormatTok->Tok.is(tok::kw_requires)) {
2722     nextToken();
2723     parseRequiresExpression(Line->Level);
2724   } else {
2725     parseConstraintExpression(Line->Level);
2726   }
2727 }
2728 
2729 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2730   // requires (R range)
2731   if (FormatTok->Tok.is(tok::l_paren)) {
2732     parseParens();
2733     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2734       addUnwrappedLine();
2735       --Line->Level;
2736     }
2737   }
2738 
2739   if (FormatTok->Tok.is(tok::l_brace)) {
2740     if (Style.BraceWrapping.AfterFunction)
2741       addUnwrappedLine();
2742     FormatTok->setType(TT_FunctionLBrace);
2743     parseBlock();
2744     addUnwrappedLine();
2745   } else {
2746     parseConstraintExpression(OriginalLevel);
2747   }
2748 }
2749 
2750 void UnwrappedLineParser::parseConstraintExpression(
2751     unsigned int OriginalLevel) {
2752   // requires Id<T> && Id<T> || Id<T>
2753   while (
2754       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2755     nextToken();
2756     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2757                               tok::greater, tok::comma, tok::ellipsis)) {
2758       if (FormatTok->Tok.is(tok::less)) {
2759         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2760                         /*ClosingBraceKind=*/tok::greater);
2761         continue;
2762       }
2763       nextToken();
2764     }
2765     if (FormatTok->Tok.is(tok::kw_requires)) {
2766       parseRequiresExpression(OriginalLevel);
2767     }
2768     if (FormatTok->Tok.is(tok::less)) {
2769       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2770                       /*ClosingBraceKind=*/tok::greater);
2771     }
2772 
2773     if (FormatTok->Tok.is(tok::l_paren)) {
2774       parseParens();
2775     }
2776     if (FormatTok->Tok.is(tok::l_brace)) {
2777       if (Style.BraceWrapping.AfterFunction)
2778         addUnwrappedLine();
2779       FormatTok->setType(TT_FunctionLBrace);
2780       parseBlock();
2781     }
2782     if (FormatTok->Tok.is(tok::semi)) {
2783       // Eat any trailing semi.
2784       nextToken();
2785       addUnwrappedLine();
2786     }
2787     if (FormatTok->Tok.is(tok::colon)) {
2788       return;
2789     }
2790     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2791       if (FormatTok->Previous &&
2792           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2793                                         tok::coloncolon)) {
2794         addUnwrappedLine();
2795       }
2796       if (Style.IndentRequires && OriginalLevel != Line->Level) {
2797         --Line->Level;
2798       }
2799       break;
2800     } else {
2801       FormatTok->setType(TT_ConstraintJunctions);
2802     }
2803 
2804     nextToken();
2805   }
2806 }
2807 
2808 void UnwrappedLineParser::parseRequires() {
2809   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2810 
2811   unsigned OriginalLevel = Line->Level;
2812   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2813     addUnwrappedLine();
2814     if (Style.IndentRequires) {
2815       ++Line->Level;
2816     }
2817   }
2818   nextToken();
2819 
2820   parseRequiresExpression(OriginalLevel);
2821 }
2822 
2823 bool UnwrappedLineParser::parseEnum() {
2824   const FormatToken &InitialToken = *FormatTok;
2825 
2826   // Won't be 'enum' for NS_ENUMs.
2827   if (FormatTok->Tok.is(tok::kw_enum))
2828     nextToken();
2829 
2830   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2831   // declarations. An "enum" keyword followed by a colon would be a syntax
2832   // error and thus assume it is just an identifier.
2833   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
2834     return false;
2835 
2836   // In protobuf, "enum" can be used as a field name.
2837   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2838     return false;
2839 
2840   // Eat up enum class ...
2841   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2842     nextToken();
2843 
2844   while (FormatTok->Tok.getIdentifierInfo() ||
2845          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2846                             tok::greater, tok::comma, tok::question)) {
2847     nextToken();
2848     // We can have macros or attributes in between 'enum' and the enum name.
2849     if (FormatTok->is(tok::l_paren))
2850       parseParens();
2851     if (FormatTok->is(tok::identifier)) {
2852       nextToken();
2853       // If there are two identifiers in a row, this is likely an elaborate
2854       // return type. In Java, this can be "implements", etc.
2855       if (Style.isCpp() && FormatTok->is(tok::identifier))
2856         return false;
2857     }
2858   }
2859 
2860   // Just a declaration or something is wrong.
2861   if (FormatTok->isNot(tok::l_brace))
2862     return true;
2863   FormatTok->setBlockKind(BK_Block);
2864 
2865   if (Style.Language == FormatStyle::LK_Java) {
2866     // Java enums are different.
2867     parseJavaEnumBody();
2868     return true;
2869   }
2870   if (Style.Language == FormatStyle::LK_Proto) {
2871     parseBlock(/*MustBeDeclaration=*/true);
2872     return true;
2873   }
2874 
2875   if (!Style.AllowShortEnumsOnASingleLine &&
2876       ShouldBreakBeforeBrace(Style, InitialToken))
2877     addUnwrappedLine();
2878   // Parse enum body.
2879   nextToken();
2880   if (!Style.AllowShortEnumsOnASingleLine) {
2881     addUnwrappedLine();
2882     Line->Level += 1;
2883   }
2884   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2885                                    /*IsEnum=*/true);
2886   if (!Style.AllowShortEnumsOnASingleLine)
2887     Line->Level -= 1;
2888   if (HasError) {
2889     if (FormatTok->is(tok::semi))
2890       nextToken();
2891     addUnwrappedLine();
2892   }
2893   return true;
2894 
2895   // There is no addUnwrappedLine() here so that we fall through to parsing a
2896   // structural element afterwards. Thus, in "enum A {} n, m;",
2897   // "} n, m;" will end up in one unwrapped line.
2898 }
2899 
2900 bool UnwrappedLineParser::parseStructLike() {
2901   // parseRecord falls through and does not yet add an unwrapped line as a
2902   // record declaration or definition can start a structural element.
2903   parseRecord();
2904   // This does not apply to Java, JavaScript and C#.
2905   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
2906       Style.isCSharp()) {
2907     if (FormatTok->is(tok::semi))
2908       nextToken();
2909     addUnwrappedLine();
2910     return true;
2911   }
2912   return false;
2913 }
2914 
2915 namespace {
2916 // A class used to set and restore the Token position when peeking
2917 // ahead in the token source.
2918 class ScopedTokenPosition {
2919   unsigned StoredPosition;
2920   FormatTokenSource *Tokens;
2921 
2922 public:
2923   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2924     assert(Tokens && "Tokens expected to not be null");
2925     StoredPosition = Tokens->getPosition();
2926   }
2927 
2928   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2929 };
2930 } // namespace
2931 
2932 // Look to see if we have [[ by looking ahead, if
2933 // its not then rewind to the original position.
2934 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2935   ScopedTokenPosition AutoPosition(Tokens);
2936   FormatToken *Tok = Tokens->getNextToken();
2937   // We already read the first [ check for the second.
2938   if (!Tok->is(tok::l_square)) {
2939     return false;
2940   }
2941   // Double check that the attribute is just something
2942   // fairly simple.
2943   while (Tok->isNot(tok::eof)) {
2944     if (Tok->is(tok::r_square)) {
2945       break;
2946     }
2947     Tok = Tokens->getNextToken();
2948   }
2949   if (Tok->is(tok::eof))
2950     return false;
2951   Tok = Tokens->getNextToken();
2952   if (!Tok->is(tok::r_square)) {
2953     return false;
2954   }
2955   Tok = Tokens->getNextToken();
2956   if (Tok->is(tok::semi)) {
2957     return false;
2958   }
2959   return true;
2960 }
2961 
2962 void UnwrappedLineParser::parseJavaEnumBody() {
2963   // Determine whether the enum is simple, i.e. does not have a semicolon or
2964   // constants with class bodies. Simple enums can be formatted like braced
2965   // lists, contracted to a single line, etc.
2966   unsigned StoredPosition = Tokens->getPosition();
2967   bool IsSimple = true;
2968   FormatToken *Tok = Tokens->getNextToken();
2969   while (!Tok->is(tok::eof)) {
2970     if (Tok->is(tok::r_brace))
2971       break;
2972     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2973       IsSimple = false;
2974       break;
2975     }
2976     // FIXME: This will also mark enums with braces in the arguments to enum
2977     // constants as "not simple". This is probably fine in practice, though.
2978     Tok = Tokens->getNextToken();
2979   }
2980   FormatTok = Tokens->setPosition(StoredPosition);
2981 
2982   if (IsSimple) {
2983     nextToken();
2984     parseBracedList();
2985     addUnwrappedLine();
2986     return;
2987   }
2988 
2989   // Parse the body of a more complex enum.
2990   // First add a line for everything up to the "{".
2991   nextToken();
2992   addUnwrappedLine();
2993   ++Line->Level;
2994 
2995   // Parse the enum constants.
2996   while (FormatTok) {
2997     if (FormatTok->is(tok::l_brace)) {
2998       // Parse the constant's class body.
2999       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3000                  /*MunchSemi=*/false);
3001     } else if (FormatTok->is(tok::l_paren)) {
3002       parseParens();
3003     } else if (FormatTok->is(tok::comma)) {
3004       nextToken();
3005       addUnwrappedLine();
3006     } else if (FormatTok->is(tok::semi)) {
3007       nextToken();
3008       addUnwrappedLine();
3009       break;
3010     } else if (FormatTok->is(tok::r_brace)) {
3011       addUnwrappedLine();
3012       break;
3013     } else {
3014       nextToken();
3015     }
3016   }
3017 
3018   // Parse the class body after the enum's ";" if any.
3019   parseLevel(/*HasOpeningBrace=*/true);
3020   nextToken();
3021   --Line->Level;
3022   addUnwrappedLine();
3023 }
3024 
3025 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3026   const FormatToken &InitialToken = *FormatTok;
3027   nextToken();
3028 
3029   // The actual identifier can be a nested name specifier, and in macros
3030   // it is often token-pasted.
3031   // An [[attribute]] can be before the identifier.
3032   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3033                             tok::kw___attribute, tok::kw___declspec,
3034                             tok::kw_alignas, tok::l_square, tok::r_square) ||
3035          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3036           FormatTok->isOneOf(tok::period, tok::comma))) {
3037     if (Style.isJavaScript() &&
3038         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3039       // JavaScript/TypeScript supports inline object types in
3040       // extends/implements positions:
3041       //     class Foo implements {bar: number} { }
3042       nextToken();
3043       if (FormatTok->is(tok::l_brace)) {
3044         tryToParseBracedList();
3045         continue;
3046       }
3047     }
3048     bool IsNonMacroIdentifier =
3049         FormatTok->is(tok::identifier) &&
3050         FormatTok->TokenText != FormatTok->TokenText.upper();
3051     nextToken();
3052     // We can have macros or attributes in between 'class' and the class name.
3053     if (!IsNonMacroIdentifier) {
3054       if (FormatTok->Tok.is(tok::l_paren)) {
3055         parseParens();
3056       } else if (FormatTok->is(TT_AttributeSquare)) {
3057         parseSquare();
3058         // Consume the closing TT_AttributeSquare.
3059         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3060           nextToken();
3061       }
3062     }
3063   }
3064 
3065   // Note that parsing away template declarations here leads to incorrectly
3066   // accepting function declarations as record declarations.
3067   // In general, we cannot solve this problem. Consider:
3068   // class A<int> B() {}
3069   // which can be a function definition or a class definition when B() is a
3070   // macro. If we find enough real-world cases where this is a problem, we
3071   // can parse for the 'template' keyword in the beginning of the statement,
3072   // and thus rule out the record production in case there is no template
3073   // (this would still leave us with an ambiguity between template function
3074   // and class declarations).
3075   if (FormatTok->isOneOf(tok::colon, tok::less)) {
3076     while (!eof()) {
3077       if (FormatTok->is(tok::l_brace)) {
3078         calculateBraceTypes(/*ExpectClassBody=*/true);
3079         if (!tryToParseBracedList())
3080           break;
3081       }
3082       if (FormatTok->is(tok::l_square) && !tryToParseLambda())
3083         break;
3084       if (FormatTok->Tok.is(tok::semi))
3085         return;
3086       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3087         addUnwrappedLine();
3088         nextToken();
3089         parseCSharpGenericTypeConstraint();
3090         break;
3091       }
3092       nextToken();
3093     }
3094   }
3095   if (FormatTok->Tok.is(tok::l_brace)) {
3096     if (ParseAsExpr) {
3097       parseChildBlock();
3098     } else {
3099       if (ShouldBreakBeforeBrace(Style, InitialToken))
3100         addUnwrappedLine();
3101 
3102       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3103       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3104     }
3105   }
3106   // There is no addUnwrappedLine() here so that we fall through to parsing a
3107   // structural element afterwards. Thus, in "class A {} n, m;",
3108   // "} n, m;" will end up in one unwrapped line.
3109 }
3110 
3111 void UnwrappedLineParser::parseObjCMethod() {
3112   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
3113          "'(' or identifier expected.");
3114   do {
3115     if (FormatTok->Tok.is(tok::semi)) {
3116       nextToken();
3117       addUnwrappedLine();
3118       return;
3119     } else if (FormatTok->Tok.is(tok::l_brace)) {
3120       if (Style.BraceWrapping.AfterFunction)
3121         addUnwrappedLine();
3122       parseBlock();
3123       addUnwrappedLine();
3124       return;
3125     } else {
3126       nextToken();
3127     }
3128   } while (!eof());
3129 }
3130 
3131 void UnwrappedLineParser::parseObjCProtocolList() {
3132   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
3133   do {
3134     nextToken();
3135     // Early exit in case someone forgot a close angle.
3136     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3137         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3138       return;
3139   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
3140   nextToken(); // Skip '>'.
3141 }
3142 
3143 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3144   do {
3145     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
3146       nextToken();
3147       addUnwrappedLine();
3148       break;
3149     }
3150     if (FormatTok->is(tok::l_brace)) {
3151       parseBlock();
3152       // In ObjC interfaces, nothing should be following the "}".
3153       addUnwrappedLine();
3154     } else if (FormatTok->is(tok::r_brace)) {
3155       // Ignore stray "}". parseStructuralElement doesn't consume them.
3156       nextToken();
3157       addUnwrappedLine();
3158     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3159       nextToken();
3160       parseObjCMethod();
3161     } else {
3162       parseStructuralElement();
3163     }
3164   } while (!eof());
3165 }
3166 
3167 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3168   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3169          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3170   nextToken();
3171   nextToken(); // interface name
3172 
3173   // @interface can be followed by a lightweight generic
3174   // specialization list, then either a base class or a category.
3175   if (FormatTok->Tok.is(tok::less)) {
3176     parseObjCLightweightGenerics();
3177   }
3178   if (FormatTok->Tok.is(tok::colon)) {
3179     nextToken();
3180     nextToken(); // base class name
3181     // The base class can also have lightweight generics applied to it.
3182     if (FormatTok->Tok.is(tok::less)) {
3183       parseObjCLightweightGenerics();
3184     }
3185   } else if (FormatTok->Tok.is(tok::l_paren))
3186     // Skip category, if present.
3187     parseParens();
3188 
3189   if (FormatTok->Tok.is(tok::less))
3190     parseObjCProtocolList();
3191 
3192   if (FormatTok->Tok.is(tok::l_brace)) {
3193     if (Style.BraceWrapping.AfterObjCDeclaration)
3194       addUnwrappedLine();
3195     parseBlock(/*MustBeDeclaration=*/true);
3196   }
3197 
3198   // With instance variables, this puts '}' on its own line.  Without instance
3199   // variables, this ends the @interface line.
3200   addUnwrappedLine();
3201 
3202   parseObjCUntilAtEnd();
3203 }
3204 
3205 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3206   assert(FormatTok->Tok.is(tok::less));
3207   // Unlike protocol lists, generic parameterizations support
3208   // nested angles:
3209   //
3210   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3211   //     NSObject <NSCopying, NSSecureCoding>
3212   //
3213   // so we need to count how many open angles we have left.
3214   unsigned NumOpenAngles = 1;
3215   do {
3216     nextToken();
3217     // Early exit in case someone forgot a close angle.
3218     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3219         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3220       break;
3221     if (FormatTok->Tok.is(tok::less))
3222       ++NumOpenAngles;
3223     else if (FormatTok->Tok.is(tok::greater)) {
3224       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3225       --NumOpenAngles;
3226     }
3227   } while (!eof() && NumOpenAngles != 0);
3228   nextToken(); // Skip '>'.
3229 }
3230 
3231 // Returns true for the declaration/definition form of @protocol,
3232 // false for the expression form.
3233 bool UnwrappedLineParser::parseObjCProtocol() {
3234   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3235   nextToken();
3236 
3237   if (FormatTok->is(tok::l_paren))
3238     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3239     return false;
3240 
3241   // The definition/declaration form,
3242   // @protocol Foo
3243   // - (int)someMethod;
3244   // @end
3245 
3246   nextToken(); // protocol name
3247 
3248   if (FormatTok->Tok.is(tok::less))
3249     parseObjCProtocolList();
3250 
3251   // Check for protocol declaration.
3252   if (FormatTok->Tok.is(tok::semi)) {
3253     nextToken();
3254     addUnwrappedLine();
3255     return true;
3256   }
3257 
3258   addUnwrappedLine();
3259   parseObjCUntilAtEnd();
3260   return true;
3261 }
3262 
3263 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3264   bool IsImport = FormatTok->is(Keywords.kw_import);
3265   assert(IsImport || FormatTok->is(tok::kw_export));
3266   nextToken();
3267 
3268   // Consume the "default" in "export default class/function".
3269   if (FormatTok->is(tok::kw_default))
3270     nextToken();
3271 
3272   // Consume "async function", "function" and "default function", so that these
3273   // get parsed as free-standing JS functions, i.e. do not require a trailing
3274   // semicolon.
3275   if (FormatTok->is(Keywords.kw_async))
3276     nextToken();
3277   if (FormatTok->is(Keywords.kw_function)) {
3278     nextToken();
3279     return;
3280   }
3281 
3282   // For imports, `export *`, `export {...}`, consume the rest of the line up
3283   // to the terminating `;`. For everything else, just return and continue
3284   // parsing the structural element, i.e. the declaration or expression for
3285   // `export default`.
3286   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3287       !FormatTok->isStringLiteral())
3288     return;
3289 
3290   while (!eof()) {
3291     if (FormatTok->is(tok::semi))
3292       return;
3293     if (Line->Tokens.empty()) {
3294       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3295       // import statement should terminate.
3296       return;
3297     }
3298     if (FormatTok->is(tok::l_brace)) {
3299       FormatTok->setBlockKind(BK_Block);
3300       nextToken();
3301       parseBracedList();
3302     } else {
3303       nextToken();
3304     }
3305   }
3306 }
3307 
3308 void UnwrappedLineParser::parseStatementMacro() {
3309   nextToken();
3310   if (FormatTok->is(tok::l_paren))
3311     parseParens();
3312   if (FormatTok->is(tok::semi))
3313     nextToken();
3314   addUnwrappedLine();
3315 }
3316 
3317 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3318                                                  StringRef Prefix = "") {
3319   llvm::dbgs() << Prefix << "Line(" << Line.Level
3320                << ", FSC=" << Line.FirstStartColumn << ")"
3321                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3322   for (const auto &Node : Line.Tokens) {
3323     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3324                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3325                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3326   }
3327   for (const auto &Node : Line.Tokens)
3328     for (const auto &ChildNode : Node.Children)
3329       printDebugInfo(ChildNode, "\nChild: ");
3330 
3331   llvm::dbgs() << "\n";
3332 }
3333 
3334 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3335   if (Line->Tokens.empty())
3336     return;
3337   LLVM_DEBUG({
3338     if (CurrentLines == &Lines)
3339       printDebugInfo(*Line);
3340   });
3341 
3342   // If this line closes a block when in Whitesmiths mode, remember that
3343   // information so that the level can be decreased after the line is added.
3344   // This has to happen after the addition of the line since the line itself
3345   // needs to be indented.
3346   bool ClosesWhitesmithsBlock =
3347       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3348       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3349 
3350   CurrentLines->push_back(std::move(*Line));
3351   Line->Tokens.clear();
3352   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3353   Line->FirstStartColumn = 0;
3354 
3355   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3356     --Line->Level;
3357   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3358     CurrentLines->append(
3359         std::make_move_iterator(PreprocessorDirectives.begin()),
3360         std::make_move_iterator(PreprocessorDirectives.end()));
3361     PreprocessorDirectives.clear();
3362   }
3363   // Disconnect the current token from the last token on the previous line.
3364   FormatTok->Previous = nullptr;
3365 }
3366 
3367 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3368 
3369 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3370   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3371          FormatTok.NewlinesBefore > 0;
3372 }
3373 
3374 // Checks if \p FormatTok is a line comment that continues the line comment
3375 // section on \p Line.
3376 static bool
3377 continuesLineCommentSection(const FormatToken &FormatTok,
3378                             const UnwrappedLine &Line,
3379                             const llvm::Regex &CommentPragmasRegex) {
3380   if (Line.Tokens.empty())
3381     return false;
3382 
3383   StringRef IndentContent = FormatTok.TokenText;
3384   if (FormatTok.TokenText.startswith("//") ||
3385       FormatTok.TokenText.startswith("/*"))
3386     IndentContent = FormatTok.TokenText.substr(2);
3387   if (CommentPragmasRegex.match(IndentContent))
3388     return false;
3389 
3390   // If Line starts with a line comment, then FormatTok continues the comment
3391   // section if its original column is greater or equal to the original start
3392   // column of the line.
3393   //
3394   // Define the min column token of a line as follows: if a line ends in '{' or
3395   // contains a '{' followed by a line comment, then the min column token is
3396   // that '{'. Otherwise, the min column token of the line is the first token of
3397   // the line.
3398   //
3399   // If Line starts with a token other than a line comment, then FormatTok
3400   // continues the comment section if its original column is greater than the
3401   // original start column of the min column token of the line.
3402   //
3403   // For example, the second line comment continues the first in these cases:
3404   //
3405   // // first line
3406   // // second line
3407   //
3408   // and:
3409   //
3410   // // first line
3411   //  // second line
3412   //
3413   // and:
3414   //
3415   // int i; // first line
3416   //  // second line
3417   //
3418   // and:
3419   //
3420   // do { // first line
3421   //      // second line
3422   //   int i;
3423   // } while (true);
3424   //
3425   // and:
3426   //
3427   // enum {
3428   //   a, // first line
3429   //    // second line
3430   //   b
3431   // };
3432   //
3433   // The second line comment doesn't continue the first in these cases:
3434   //
3435   //   // first line
3436   //  // second line
3437   //
3438   // and:
3439   //
3440   // int i; // first line
3441   // // second line
3442   //
3443   // and:
3444   //
3445   // do { // first line
3446   //   // second line
3447   //   int i;
3448   // } while (true);
3449   //
3450   // and:
3451   //
3452   // enum {
3453   //   a, // first line
3454   //   // second line
3455   // };
3456   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3457 
3458   // Scan for '{//'. If found, use the column of '{' as a min column for line
3459   // comment section continuation.
3460   const FormatToken *PreviousToken = nullptr;
3461   for (const UnwrappedLineNode &Node : Line.Tokens) {
3462     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3463         isLineComment(*Node.Tok)) {
3464       MinColumnToken = PreviousToken;
3465       break;
3466     }
3467     PreviousToken = Node.Tok;
3468 
3469     // Grab the last newline preceding a token in this unwrapped line.
3470     if (Node.Tok->NewlinesBefore > 0) {
3471       MinColumnToken = Node.Tok;
3472     }
3473   }
3474   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3475     MinColumnToken = PreviousToken;
3476   }
3477 
3478   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3479                               MinColumnToken);
3480 }
3481 
3482 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3483   bool JustComments = Line->Tokens.empty();
3484   for (FormatToken *Tok : CommentsBeforeNextToken) {
3485     // Line comments that belong to the same line comment section are put on the
3486     // same line since later we might want to reflow content between them.
3487     // Additional fine-grained breaking of line comment sections is controlled
3488     // by the class BreakableLineCommentSection in case it is desirable to keep
3489     // several line comment sections in the same unwrapped line.
3490     //
3491     // FIXME: Consider putting separate line comment sections as children to the
3492     // unwrapped line instead.
3493     Tok->ContinuesLineCommentSection =
3494         continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
3495     if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
3496       addUnwrappedLine();
3497     pushToken(Tok);
3498   }
3499   if (NewlineBeforeNext && JustComments)
3500     addUnwrappedLine();
3501   CommentsBeforeNextToken.clear();
3502 }
3503 
3504 void UnwrappedLineParser::nextToken(int LevelDifference) {
3505   if (eof())
3506     return;
3507   flushComments(isOnNewLine(*FormatTok));
3508   pushToken(FormatTok);
3509   FormatToken *Previous = FormatTok;
3510   if (!Style.isJavaScript())
3511     readToken(LevelDifference);
3512   else
3513     readTokenWithJavaScriptASI();
3514   FormatTok->Previous = Previous;
3515 }
3516 
3517 void UnwrappedLineParser::distributeComments(
3518     const SmallVectorImpl<FormatToken *> &Comments,
3519     const FormatToken *NextTok) {
3520   // Whether or not a line comment token continues a line is controlled by
3521   // the method continuesLineCommentSection, with the following caveat:
3522   //
3523   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3524   // that each comment line from the trail is aligned with the next token, if
3525   // the next token exists. If a trail exists, the beginning of the maximal
3526   // trail is marked as a start of a new comment section.
3527   //
3528   // For example in this code:
3529   //
3530   // int a; // line about a
3531   //   // line 1 about b
3532   //   // line 2 about b
3533   //   int b;
3534   //
3535   // the two lines about b form a maximal trail, so there are two sections, the
3536   // first one consisting of the single comment "// line about a" and the
3537   // second one consisting of the next two comments.
3538   if (Comments.empty())
3539     return;
3540   bool ShouldPushCommentsInCurrentLine = true;
3541   bool HasTrailAlignedWithNextToken = false;
3542   unsigned StartOfTrailAlignedWithNextToken = 0;
3543   if (NextTok) {
3544     // We are skipping the first element intentionally.
3545     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3546       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3547         HasTrailAlignedWithNextToken = true;
3548         StartOfTrailAlignedWithNextToken = i;
3549       }
3550     }
3551   }
3552   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3553     FormatToken *FormatTok = Comments[i];
3554     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3555       FormatTok->ContinuesLineCommentSection = false;
3556     } else {
3557       FormatTok->ContinuesLineCommentSection =
3558           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3559     }
3560     if (!FormatTok->ContinuesLineCommentSection &&
3561         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3562       ShouldPushCommentsInCurrentLine = false;
3563     }
3564     if (ShouldPushCommentsInCurrentLine) {
3565       pushToken(FormatTok);
3566     } else {
3567       CommentsBeforeNextToken.push_back(FormatTok);
3568     }
3569   }
3570 }
3571 
3572 void UnwrappedLineParser::readToken(int LevelDifference) {
3573   SmallVector<FormatToken *, 1> Comments;
3574   do {
3575     FormatTok = Tokens->getNextToken();
3576     assert(FormatTok);
3577     while (FormatTok->getType() == TT_ConflictStart ||
3578            FormatTok->getType() == TT_ConflictEnd ||
3579            FormatTok->getType() == TT_ConflictAlternative) {
3580       if (FormatTok->getType() == TT_ConflictStart) {
3581         conditionalCompilationStart(/*Unreachable=*/false);
3582       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3583         conditionalCompilationAlternative();
3584       } else if (FormatTok->getType() == TT_ConflictEnd) {
3585         conditionalCompilationEnd();
3586       }
3587       FormatTok = Tokens->getNextToken();
3588       FormatTok->MustBreakBefore = true;
3589     }
3590 
3591     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3592            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3593       distributeComments(Comments, FormatTok);
3594       Comments.clear();
3595       // If there is an unfinished unwrapped line, we flush the preprocessor
3596       // directives only after that unwrapped line was finished later.
3597       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3598       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3599       assert((LevelDifference >= 0 ||
3600               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3601              "LevelDifference makes Line->Level negative");
3602       Line->Level += LevelDifference;
3603       // Comments stored before the preprocessor directive need to be output
3604       // before the preprocessor directive, at the same level as the
3605       // preprocessor directive, as we consider them to apply to the directive.
3606       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3607           PPBranchLevel > 0)
3608         Line->Level += PPBranchLevel;
3609       flushComments(isOnNewLine(*FormatTok));
3610       parsePPDirective();
3611     }
3612 
3613     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3614         !Line->InPPDirective) {
3615       continue;
3616     }
3617 
3618     if (!FormatTok->Tok.is(tok::comment)) {
3619       distributeComments(Comments, FormatTok);
3620       Comments.clear();
3621       return;
3622     }
3623 
3624     Comments.push_back(FormatTok);
3625   } while (!eof());
3626 
3627   distributeComments(Comments, nullptr);
3628   Comments.clear();
3629 }
3630 
3631 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3632   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3633   if (MustBreakBeforeNextToken) {
3634     Line->Tokens.back().Tok->MustBreakBefore = true;
3635     MustBreakBeforeNextToken = false;
3636   }
3637 }
3638 
3639 } // end namespace format
3640 } // end namespace clang
3641