1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31 
32   // Returns the next token in the token stream.
33   virtual FormatToken *getNextToken() = 0;
34 
35   // Returns the token precedint the token returned by the last call to
36   // getNextToken() in the token stream, or nullptr if no such token exists.
37   virtual FormatToken *getPreviousToken() = 0;
38 
39   // Returns the token that would be returned by the next call to
40   // getNextToken().
41   virtual FormatToken *peekNextToken() = 0;
42 
43   // Returns whether we are at the end of the file.
44   // This can be different from whether getNextToken() returned an eof token
45   // when the FormatTokenSource is a view on a part of the token stream.
46   virtual bool isEOF() = 0;
47 
48   // Gets the current position in the token stream, to be used by setPosition().
49   virtual unsigned getPosition() = 0;
50 
51   // Resets the token stream to the state it was in when getPosition() returned
52   // Position, and return the token at that position in the stream.
53   virtual FormatToken *setPosition(unsigned Position) = 0;
54 };
55 
56 namespace {
57 
58 class ScopedDeclarationState {
59 public:
60   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
61                          bool MustBeDeclaration)
62       : Line(Line), Stack(Stack) {
63     Line.MustBeDeclaration = MustBeDeclaration;
64     Stack.push_back(MustBeDeclaration);
65   }
66   ~ScopedDeclarationState() {
67     Stack.pop_back();
68     if (!Stack.empty())
69       Line.MustBeDeclaration = Stack.back();
70     else
71       Line.MustBeDeclaration = true;
72   }
73 
74 private:
75   UnwrappedLine &Line;
76   std::vector<bool> &Stack;
77 };
78 
79 static bool isLineComment(const FormatToken &FormatTok) {
80   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
81 }
82 
83 // Checks if \p FormatTok is a line comment that continues the line comment
84 // \p Previous. The original column of \p MinColumnToken is used to determine
85 // whether \p FormatTok is indented enough to the right to continue \p Previous.
86 static bool continuesLineComment(const FormatToken &FormatTok,
87                                  const FormatToken *Previous,
88                                  const FormatToken *MinColumnToken) {
89   if (!Previous || !MinColumnToken)
90     return false;
91   unsigned MinContinueColumn =
92       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
93   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
94          isLineComment(*Previous) &&
95          FormatTok.OriginalColumn >= MinContinueColumn;
96 }
97 
98 class ScopedMacroState : public FormatTokenSource {
99 public:
100   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
101                    FormatToken *&ResetToken)
102       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
103         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
104         Token(nullptr), PreviousToken(nullptr) {
105     FakeEOF.Tok.startToken();
106     FakeEOF.Tok.setKind(tok::eof);
107     TokenSource = this;
108     Line.Level = 0;
109     Line.InPPDirective = true;
110   }
111 
112   ~ScopedMacroState() override {
113     TokenSource = PreviousTokenSource;
114     ResetToken = Token;
115     Line.InPPDirective = false;
116     Line.Level = PreviousLineLevel;
117   }
118 
119   FormatToken *getNextToken() override {
120     // The \c UnwrappedLineParser guards against this by never calling
121     // \c getNextToken() after it has encountered the first eof token.
122     assert(!eof());
123     PreviousToken = Token;
124     Token = PreviousTokenSource->getNextToken();
125     if (eof())
126       return &FakeEOF;
127     return Token;
128   }
129 
130   FormatToken *getPreviousToken() override {
131     return PreviousTokenSource->getPreviousToken();
132   }
133 
134   FormatToken *peekNextToken() override {
135     if (eof())
136       return &FakeEOF;
137     return PreviousTokenSource->peekNextToken();
138   }
139 
140   bool isEOF() override { return PreviousTokenSource->isEOF(); }
141 
142   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
143 
144   FormatToken *setPosition(unsigned Position) override {
145     PreviousToken = nullptr;
146     Token = PreviousTokenSource->setPosition(Position);
147     return Token;
148   }
149 
150 private:
151   bool eof() {
152     return Token && Token->HasUnescapedNewline &&
153            !continuesLineComment(*Token, PreviousToken,
154                                  /*MinColumnToken=*/PreviousToken);
155   }
156 
157   FormatToken FakeEOF;
158   UnwrappedLine &Line;
159   FormatTokenSource *&TokenSource;
160   FormatToken *&ResetToken;
161   unsigned PreviousLineLevel;
162   FormatTokenSource *PreviousTokenSource;
163 
164   FormatToken *Token;
165   FormatToken *PreviousToken;
166 };
167 
168 } // end anonymous namespace
169 
170 class ScopedLineState {
171 public:
172   ScopedLineState(UnwrappedLineParser &Parser,
173                   bool SwitchToPreprocessorLines = false)
174       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
175     if (SwitchToPreprocessorLines)
176       Parser.CurrentLines = &Parser.PreprocessorDirectives;
177     else if (!Parser.Line->Tokens.empty())
178       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
179     PreBlockLine = std::move(Parser.Line);
180     Parser.Line = std::make_unique<UnwrappedLine>();
181     Parser.Line->Level = PreBlockLine->Level;
182     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
183   }
184 
185   ~ScopedLineState() {
186     if (!Parser.Line->Tokens.empty()) {
187       Parser.addUnwrappedLine();
188     }
189     assert(Parser.Line->Tokens.empty());
190     Parser.Line = std::move(PreBlockLine);
191     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
192       Parser.MustBreakBeforeNextToken = true;
193     Parser.CurrentLines = OriginalLines;
194   }
195 
196 private:
197   UnwrappedLineParser &Parser;
198 
199   std::unique_ptr<UnwrappedLine> PreBlockLine;
200   SmallVectorImpl<UnwrappedLine> *OriginalLines;
201 };
202 
203 class CompoundStatementIndenter {
204 public:
205   CompoundStatementIndenter(UnwrappedLineParser *Parser,
206                             const FormatStyle &Style, unsigned &LineLevel)
207       : CompoundStatementIndenter(Parser, LineLevel,
208                                   Style.BraceWrapping.AfterControlStatement,
209                                   Style.BraceWrapping.IndentBraces) {}
210   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
211                             bool WrapBrace, bool IndentBrace)
212       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
213     if (WrapBrace)
214       Parser->addUnwrappedLine();
215     if (IndentBrace)
216       ++LineLevel;
217   }
218   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
219 
220 private:
221   unsigned &LineLevel;
222   unsigned OldLineLevel;
223 };
224 
225 namespace {
226 
227 class IndexedTokenSource : public FormatTokenSource {
228 public:
229   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
230       : Tokens(Tokens), Position(-1) {}
231 
232   FormatToken *getNextToken() override {
233     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
234       LLVM_DEBUG({
235         llvm::dbgs() << "Next ";
236         dbgToken(Position);
237       });
238       return Tokens[Position];
239     }
240     ++Position;
241     LLVM_DEBUG({
242       llvm::dbgs() << "Next ";
243       dbgToken(Position);
244     });
245     return Tokens[Position];
246   }
247 
248   FormatToken *getPreviousToken() override {
249     assert(Position > 0);
250     return Tokens[Position - 1];
251   }
252 
253   FormatToken *peekNextToken() override {
254     int Next = Position + 1;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Peeking ";
257       dbgToken(Next);
258     });
259     return Tokens[Next];
260   }
261 
262   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
263 
264   unsigned getPosition() override {
265     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
266     assert(Position >= 0);
267     return Position;
268   }
269 
270   FormatToken *setPosition(unsigned P) override {
271     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
272     Position = P;
273     return Tokens[Position];
274   }
275 
276   void reset() { Position = -1; }
277 
278 private:
279   void dbgToken(int Position, llvm::StringRef Indent = "") {
280     FormatToken *Tok = Tokens[Position];
281     llvm::dbgs() << Indent << "[" << Position
282                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
283                  << ", Macro: " << !!Tok->MacroCtx << "\n";
284   }
285 
286   ArrayRef<FormatToken *> Tokens;
287   int Position;
288 };
289 
290 } // end anonymous namespace
291 
292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
293                                          const AdditionalKeywords &Keywords,
294                                          unsigned FirstStartColumn,
295                                          ArrayRef<FormatToken *> Tokens,
296                                          UnwrappedLineConsumer &Callback)
297     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
298       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
299       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
300       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
301       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
302                        ? IG_Rejected
303                        : IG_Inited),
304       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
305 
306 void UnwrappedLineParser::reset() {
307   PPBranchLevel = -1;
308   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
309                      ? IG_Rejected
310                      : IG_Inited;
311   IncludeGuardToken = nullptr;
312   Line.reset(new UnwrappedLine);
313   CommentsBeforeNextToken.clear();
314   FormatTok = nullptr;
315   MustBreakBeforeNextToken = false;
316   PreprocessorDirectives.clear();
317   CurrentLines = &Lines;
318   DeclarationScopeStack.clear();
319   PPStack.clear();
320   Line->FirstStartColumn = FirstStartColumn;
321 }
322 
323 void UnwrappedLineParser::parse() {
324   IndexedTokenSource TokenSource(AllTokens);
325   Line->FirstStartColumn = FirstStartColumn;
326   do {
327     LLVM_DEBUG(llvm::dbgs() << "----\n");
328     reset();
329     Tokens = &TokenSource;
330     TokenSource.reset();
331 
332     readToken();
333     parseFile();
334 
335     // If we found an include guard then all preprocessor directives (other than
336     // the guard) are over-indented by one.
337     if (IncludeGuard == IG_Found)
338       for (auto &Line : Lines)
339         if (Line.InPPDirective && Line.Level > 0)
340           --Line.Level;
341 
342     // Create line with eof token.
343     pushToken(FormatTok);
344     addUnwrappedLine();
345 
346     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
347                                                   E = Lines.end();
348          I != E; ++I) {
349       Callback.consumeUnwrappedLine(*I);
350     }
351     Callback.finishRun();
352     Lines.clear();
353     while (!PPLevelBranchIndex.empty() &&
354            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
355       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
356       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
357     }
358     if (!PPLevelBranchIndex.empty()) {
359       ++PPLevelBranchIndex.back();
360       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
361       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
362     }
363   } while (!PPLevelBranchIndex.empty());
364 }
365 
366 void UnwrappedLineParser::parseFile() {
367   // The top-level context in a file always has declarations, except for pre-
368   // processor directives and JavaScript files.
369   bool MustBeDeclaration =
370       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
371   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
372                                           MustBeDeclaration);
373   if (Style.Language == FormatStyle::LK_TextProto)
374     parseBracedList();
375   else
376     parseLevel(/*HasOpeningBrace=*/false);
377   // Make sure to format the remaining tokens.
378   //
379   // LK_TextProto is special since its top-level is parsed as the body of a
380   // braced list, which does not necessarily have natural line separators such
381   // as a semicolon. Comments after the last entry that have been determined to
382   // not belong to that line, as in:
383   //   key: value
384   //   // endfile comment
385   // do not have a chance to be put on a line of their own until this point.
386   // Here we add this newline before end-of-file comments.
387   if (Style.Language == FormatStyle::LK_TextProto &&
388       !CommentsBeforeNextToken.empty())
389     addUnwrappedLine();
390   flushComments(true);
391   addUnwrappedLine();
392 }
393 
394 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
395   do {
396     switch (FormatTok->Tok.getKind()) {
397     case tok::l_brace:
398       return;
399     default:
400       if (FormatTok->is(Keywords.kw_where)) {
401         addUnwrappedLine();
402         nextToken();
403         parseCSharpGenericTypeConstraint();
404         break;
405       }
406       nextToken();
407       break;
408     }
409   } while (!eof());
410 }
411 
412 void UnwrappedLineParser::parseCSharpAttribute() {
413   int UnpairedSquareBrackets = 1;
414   do {
415     switch (FormatTok->Tok.getKind()) {
416     case tok::r_square:
417       nextToken();
418       --UnpairedSquareBrackets;
419       if (UnpairedSquareBrackets == 0) {
420         addUnwrappedLine();
421         return;
422       }
423       break;
424     case tok::l_square:
425       ++UnpairedSquareBrackets;
426       nextToken();
427       break;
428     default:
429       nextToken();
430       break;
431     }
432   } while (!eof());
433 }
434 
435 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
436   bool SwitchLabelEncountered = false;
437   do {
438     tok::TokenKind kind = FormatTok->Tok.getKind();
439     if (FormatTok->getType() == TT_MacroBlockBegin) {
440       kind = tok::l_brace;
441     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
442       kind = tok::r_brace;
443     }
444 
445     switch (kind) {
446     case tok::comment:
447       nextToken();
448       addUnwrappedLine();
449       break;
450     case tok::l_brace:
451       // FIXME: Add parameter whether this can happen - if this happens, we must
452       // be in a non-declaration context.
453       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
454         continue;
455       parseBlock();
456       addUnwrappedLine();
457       break;
458     case tok::r_brace:
459       if (HasOpeningBrace)
460         return;
461       nextToken();
462       addUnwrappedLine();
463       break;
464     case tok::kw_default: {
465       unsigned StoredPosition = Tokens->getPosition();
466       FormatToken *Next;
467       do {
468         Next = Tokens->getNextToken();
469       } while (Next->is(tok::comment));
470       FormatTok = Tokens->setPosition(StoredPosition);
471       if (Next && Next->isNot(tok::colon)) {
472         // default not followed by ':' is not a case label; treat it like
473         // an identifier.
474         parseStructuralElement();
475         break;
476       }
477       // Else, if it is 'default:', fall through to the case handling.
478       LLVM_FALLTHROUGH;
479     }
480     case tok::kw_case:
481       if (Style.Language == FormatStyle::LK_JavaScript &&
482           Line->MustBeDeclaration) {
483         // A 'case: string' style field declaration.
484         parseStructuralElement();
485         break;
486       }
487       if (!SwitchLabelEncountered &&
488           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
489         ++Line->Level;
490       SwitchLabelEncountered = true;
491       parseStructuralElement();
492       break;
493     case tok::l_square:
494       if (Style.isCSharp()) {
495         nextToken();
496         parseCSharpAttribute();
497         break;
498       }
499       LLVM_FALLTHROUGH;
500     default:
501       parseStructuralElement(!HasOpeningBrace);
502       break;
503     }
504   } while (!eof());
505 }
506 
507 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
508   // We'll parse forward through the tokens until we hit
509   // a closing brace or eof - note that getNextToken() will
510   // parse macros, so this will magically work inside macro
511   // definitions, too.
512   unsigned StoredPosition = Tokens->getPosition();
513   FormatToken *Tok = FormatTok;
514   const FormatToken *PrevTok = Tok->Previous;
515   // Keep a stack of positions of lbrace tokens. We will
516   // update information about whether an lbrace starts a
517   // braced init list or a different block during the loop.
518   SmallVector<FormatToken *, 8> LBraceStack;
519   assert(Tok->Tok.is(tok::l_brace));
520   do {
521     // Get next non-comment token.
522     FormatToken *NextTok;
523     unsigned ReadTokens = 0;
524     do {
525       NextTok = Tokens->getNextToken();
526       ++ReadTokens;
527     } while (NextTok->is(tok::comment));
528 
529     switch (Tok->Tok.getKind()) {
530     case tok::l_brace:
531       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
532         if (PrevTok->isOneOf(tok::colon, tok::less))
533           // A ':' indicates this code is in a type, or a braced list
534           // following a label in an object literal ({a: {b: 1}}).
535           // A '<' could be an object used in a comparison, but that is nonsense
536           // code (can never return true), so more likely it is a generic type
537           // argument (`X<{a: string; b: number}>`).
538           // The code below could be confused by semicolons between the
539           // individual members in a type member list, which would normally
540           // trigger BK_Block. In both cases, this must be parsed as an inline
541           // braced init.
542           Tok->setBlockKind(BK_BracedInit);
543         else if (PrevTok->is(tok::r_paren))
544           // `) { }` can only occur in function or method declarations in JS.
545           Tok->setBlockKind(BK_Block);
546       } else {
547         Tok->setBlockKind(BK_Unknown);
548       }
549       LBraceStack.push_back(Tok);
550       break;
551     case tok::r_brace:
552       if (LBraceStack.empty())
553         break;
554       if (LBraceStack.back()->is(BK_Unknown)) {
555         bool ProbablyBracedList = false;
556         if (Style.Language == FormatStyle::LK_Proto) {
557           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
558         } else {
559           // Skip NextTok over preprocessor lines, otherwise we may not
560           // properly diagnose the block as a braced intializer
561           // if the comma separator appears after the pp directive.
562           while (NextTok->is(tok::hash)) {
563             ScopedMacroState MacroState(*Line, Tokens, NextTok);
564             do {
565               NextTok = Tokens->getNextToken();
566               ++ReadTokens;
567             } while (NextTok->isNot(tok::eof));
568           }
569 
570           // Using OriginalColumn to distinguish between ObjC methods and
571           // binary operators is a bit hacky.
572           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
573                                   NextTok->OriginalColumn == 0;
574 
575           // If there is a comma, semicolon or right paren after the closing
576           // brace, we assume this is a braced initializer list.  Note that
577           // regardless how we mark inner braces here, we will overwrite the
578           // BlockKind later if we parse a braced list (where all blocks
579           // inside are by default braced lists), or when we explicitly detect
580           // blocks (for example while parsing lambdas).
581           ProbablyBracedList =
582               (Style.Language == FormatStyle::LK_JavaScript &&
583                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
584                                 Keywords.kw_as)) ||
585               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
586               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
587                                tok::r_paren, tok::r_square, tok::ellipsis) ||
588               (NextTok->isOneOf(tok::l_brace, tok::identifier) &&
589                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
590               (NextTok->is(tok::semi) &&
591                (!ExpectClassBody || LBraceStack.size() != 1)) ||
592               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
593           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
594             // We can have an array subscript after a braced init
595             // list, but C++11 attributes are expected after blocks.
596             NextTok = Tokens->getNextToken();
597             ++ReadTokens;
598             ProbablyBracedList = NextTok->isNot(tok::l_square);
599           }
600         }
601         if (ProbablyBracedList) {
602           Tok->setBlockKind(BK_BracedInit);
603           LBraceStack.back()->setBlockKind(BK_BracedInit);
604         } else {
605           Tok->setBlockKind(BK_Block);
606           LBraceStack.back()->setBlockKind(BK_Block);
607         }
608       }
609       LBraceStack.pop_back();
610       break;
611     case tok::identifier:
612       if (!Tok->is(TT_StatementMacro))
613         break;
614       LLVM_FALLTHROUGH;
615     case tok::at:
616     case tok::semi:
617     case tok::kw_if:
618     case tok::kw_while:
619     case tok::kw_for:
620     case tok::kw_switch:
621     case tok::kw_try:
622     case tok::kw___try:
623       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
624         LBraceStack.back()->setBlockKind(BK_Block);
625       break;
626     default:
627       break;
628     }
629     PrevTok = Tok;
630     Tok = NextTok;
631   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
632 
633   // Assume other blocks for all unclosed opening braces.
634   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
635     if (LBraceStack[i]->is(BK_Unknown))
636       LBraceStack[i]->setBlockKind(BK_Block);
637   }
638 
639   FormatTok = Tokens->setPosition(StoredPosition);
640 }
641 
642 template <class T>
643 static inline void hash_combine(std::size_t &seed, const T &v) {
644   std::hash<T> hasher;
645   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
646 }
647 
648 size_t UnwrappedLineParser::computePPHash() const {
649   size_t h = 0;
650   for (const auto &i : PPStack) {
651     hash_combine(h, size_t(i.Kind));
652     hash_combine(h, i.Line);
653   }
654   return h;
655 }
656 
657 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
658                                      bool MunchSemi,
659                                      bool UnindentWhitesmithsBraces) {
660   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
661          "'{' or macro block token expected");
662   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
663   FormatTok->setBlockKind(BK_Block);
664 
665   // For Whitesmiths mode, jump to the next level prior to skipping over the
666   // braces.
667   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
668     ++Line->Level;
669 
670   size_t PPStartHash = computePPHash();
671 
672   unsigned InitialLevel = Line->Level;
673   nextToken(/*LevelDifference=*/AddLevels);
674 
675   if (MacroBlock && FormatTok->is(tok::l_paren))
676     parseParens();
677 
678   size_t NbPreprocessorDirectives =
679       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
680   addUnwrappedLine();
681   size_t OpeningLineIndex =
682       CurrentLines->empty()
683           ? (UnwrappedLine::kInvalidIndex)
684           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
685 
686   // Whitesmiths is weird here. The brace needs to be indented for the namespace
687   // block, but the block itself may not be indented depending on the style
688   // settings. This allows the format to back up one level in those cases.
689   if (UnindentWhitesmithsBraces)
690     --Line->Level;
691 
692   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
693                                           MustBeDeclaration);
694   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
695     Line->Level += AddLevels;
696   parseLevel(/*HasOpeningBrace=*/true);
697 
698   if (eof())
699     return;
700 
701   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
702                  : !FormatTok->is(tok::r_brace)) {
703     Line->Level = InitialLevel;
704     FormatTok->setBlockKind(BK_Block);
705     return;
706   }
707 
708   size_t PPEndHash = computePPHash();
709 
710   // Munch the closing brace.
711   nextToken(/*LevelDifference=*/-AddLevels);
712 
713   if (MacroBlock && FormatTok->is(tok::l_paren))
714     parseParens();
715 
716   if (FormatTok->is(tok::arrow)) {
717     // Following the } we can find a trailing return type arrow
718     // as part of an implicit conversion constraint.
719     nextToken();
720     parseStructuralElement();
721   }
722 
723   if (MunchSemi && FormatTok->Tok.is(tok::semi))
724     nextToken();
725 
726   Line->Level = InitialLevel;
727 
728   if (PPStartHash == PPEndHash) {
729     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
730     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
731       // Update the opening line to add the forward reference as well
732       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
733           CurrentLines->size() - 1;
734     }
735   }
736 }
737 
738 static bool isGoogScope(const UnwrappedLine &Line) {
739   // FIXME: Closure-library specific stuff should not be hard-coded but be
740   // configurable.
741   if (Line.Tokens.size() < 4)
742     return false;
743   auto I = Line.Tokens.begin();
744   if (I->Tok->TokenText != "goog")
745     return false;
746   ++I;
747   if (I->Tok->isNot(tok::period))
748     return false;
749   ++I;
750   if (I->Tok->TokenText != "scope")
751     return false;
752   ++I;
753   return I->Tok->is(tok::l_paren);
754 }
755 
756 static bool isIIFE(const UnwrappedLine &Line,
757                    const AdditionalKeywords &Keywords) {
758   // Look for the start of an immediately invoked anonymous function.
759   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
760   // This is commonly done in JavaScript to create a new, anonymous scope.
761   // Example: (function() { ... })()
762   if (Line.Tokens.size() < 3)
763     return false;
764   auto I = Line.Tokens.begin();
765   if (I->Tok->isNot(tok::l_paren))
766     return false;
767   ++I;
768   if (I->Tok->isNot(Keywords.kw_function))
769     return false;
770   ++I;
771   return I->Tok->is(tok::l_paren);
772 }
773 
774 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
775                                    const FormatToken &InitialToken) {
776   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
777     return Style.BraceWrapping.AfterNamespace;
778   if (InitialToken.is(tok::kw_class))
779     return Style.BraceWrapping.AfterClass;
780   if (InitialToken.is(tok::kw_union))
781     return Style.BraceWrapping.AfterUnion;
782   if (InitialToken.is(tok::kw_struct))
783     return Style.BraceWrapping.AfterStruct;
784   return false;
785 }
786 
787 void UnwrappedLineParser::parseChildBlock() {
788   FormatTok->setBlockKind(BK_Block);
789   nextToken();
790   {
791     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
792                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
793     ScopedLineState LineState(*this);
794     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
795                                             /*MustBeDeclaration=*/false);
796     Line->Level += SkipIndent ? 0 : 1;
797     parseLevel(/*HasOpeningBrace=*/true);
798     flushComments(isOnNewLine(*FormatTok));
799     Line->Level -= SkipIndent ? 0 : 1;
800   }
801   nextToken();
802 }
803 
804 void UnwrappedLineParser::parsePPDirective() {
805   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
806   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
807 
808   nextToken();
809 
810   if (!FormatTok->Tok.getIdentifierInfo()) {
811     parsePPUnknown();
812     return;
813   }
814 
815   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
816   case tok::pp_define:
817     parsePPDefine();
818     return;
819   case tok::pp_if:
820     parsePPIf(/*IfDef=*/false);
821     break;
822   case tok::pp_ifdef:
823   case tok::pp_ifndef:
824     parsePPIf(/*IfDef=*/true);
825     break;
826   case tok::pp_else:
827     parsePPElse();
828     break;
829   case tok::pp_elifdef:
830   case tok::pp_elifndef:
831   case tok::pp_elif:
832     parsePPElIf();
833     break;
834   case tok::pp_endif:
835     parsePPEndIf();
836     break;
837   default:
838     parsePPUnknown();
839     break;
840   }
841 }
842 
843 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
844   size_t Line = CurrentLines->size();
845   if (CurrentLines == &PreprocessorDirectives)
846     Line += Lines.size();
847 
848   if (Unreachable ||
849       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
850     PPStack.push_back({PP_Unreachable, Line});
851   else
852     PPStack.push_back({PP_Conditional, Line});
853 }
854 
855 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
856   ++PPBranchLevel;
857   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
858   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
859     PPLevelBranchIndex.push_back(0);
860     PPLevelBranchCount.push_back(0);
861   }
862   PPChainBranchIndex.push(0);
863   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
864   conditionalCompilationCondition(Unreachable || Skip);
865 }
866 
867 void UnwrappedLineParser::conditionalCompilationAlternative() {
868   if (!PPStack.empty())
869     PPStack.pop_back();
870   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
871   if (!PPChainBranchIndex.empty())
872     ++PPChainBranchIndex.top();
873   conditionalCompilationCondition(
874       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
875       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
876 }
877 
878 void UnwrappedLineParser::conditionalCompilationEnd() {
879   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
880   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
881     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
882       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
883     }
884   }
885   // Guard against #endif's without #if.
886   if (PPBranchLevel > -1)
887     --PPBranchLevel;
888   if (!PPChainBranchIndex.empty())
889     PPChainBranchIndex.pop();
890   if (!PPStack.empty())
891     PPStack.pop_back();
892 }
893 
894 void UnwrappedLineParser::parsePPIf(bool IfDef) {
895   bool IfNDef = FormatTok->is(tok::pp_ifndef);
896   nextToken();
897   bool Unreachable = false;
898   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
899     Unreachable = true;
900   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
901     Unreachable = true;
902   conditionalCompilationStart(Unreachable);
903   FormatToken *IfCondition = FormatTok;
904   // If there's a #ifndef on the first line, and the only lines before it are
905   // comments, it could be an include guard.
906   bool MaybeIncludeGuard = IfNDef;
907   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
908     for (auto &Line : Lines) {
909       if (!Line.Tokens.front().Tok->is(tok::comment)) {
910         MaybeIncludeGuard = false;
911         IncludeGuard = IG_Rejected;
912         break;
913       }
914     }
915   --PPBranchLevel;
916   parsePPUnknown();
917   ++PPBranchLevel;
918   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
919     IncludeGuard = IG_IfNdefed;
920     IncludeGuardToken = IfCondition;
921   }
922 }
923 
924 void UnwrappedLineParser::parsePPElse() {
925   // If a potential include guard has an #else, it's not an include guard.
926   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
927     IncludeGuard = IG_Rejected;
928   conditionalCompilationAlternative();
929   if (PPBranchLevel > -1)
930     --PPBranchLevel;
931   parsePPUnknown();
932   ++PPBranchLevel;
933 }
934 
935 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
936 
937 void UnwrappedLineParser::parsePPEndIf() {
938   conditionalCompilationEnd();
939   parsePPUnknown();
940   // If the #endif of a potential include guard is the last thing in the file,
941   // then we found an include guard.
942   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
943       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
944     IncludeGuard = IG_Found;
945 }
946 
947 void UnwrappedLineParser::parsePPDefine() {
948   nextToken();
949 
950   if (!FormatTok->Tok.getIdentifierInfo()) {
951     IncludeGuard = IG_Rejected;
952     IncludeGuardToken = nullptr;
953     parsePPUnknown();
954     return;
955   }
956 
957   if (IncludeGuard == IG_IfNdefed &&
958       IncludeGuardToken->TokenText == FormatTok->TokenText) {
959     IncludeGuard = IG_Defined;
960     IncludeGuardToken = nullptr;
961     for (auto &Line : Lines) {
962       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
963         IncludeGuard = IG_Rejected;
964         break;
965       }
966     }
967   }
968 
969   nextToken();
970   if (FormatTok->Tok.getKind() == tok::l_paren &&
971       FormatTok->WhitespaceRange.getBegin() ==
972           FormatTok->WhitespaceRange.getEnd()) {
973     parseParens();
974   }
975   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
976     Line->Level += PPBranchLevel + 1;
977   addUnwrappedLine();
978   ++Line->Level;
979 
980   // Errors during a preprocessor directive can only affect the layout of the
981   // preprocessor directive, and thus we ignore them. An alternative approach
982   // would be to use the same approach we use on the file level (no
983   // re-indentation if there was a structural error) within the macro
984   // definition.
985   parseFile();
986 }
987 
988 void UnwrappedLineParser::parsePPUnknown() {
989   do {
990     nextToken();
991   } while (!eof());
992   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
993     Line->Level += PPBranchLevel + 1;
994   addUnwrappedLine();
995 }
996 
997 // Here we exclude certain tokens that are not usually the first token in an
998 // unwrapped line. This is used in attempt to distinguish macro calls without
999 // trailing semicolons from other constructs split to several lines.
1000 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1001   // Semicolon can be a null-statement, l_square can be a start of a macro or
1002   // a C++11 attribute, but this doesn't seem to be common.
1003   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1004          Tok.isNot(TT_AttributeSquare) &&
1005          // Tokens that can only be used as binary operators and a part of
1006          // overloaded operator names.
1007          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1008          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1009          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1010          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1011          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1012          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1013          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1014          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1015          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1016          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1017          Tok.isNot(tok::lesslessequal) &&
1018          // Colon is used in labels, base class lists, initializer lists,
1019          // range-based for loops, ternary operator, but should never be the
1020          // first token in an unwrapped line.
1021          Tok.isNot(tok::colon) &&
1022          // 'noexcept' is a trailing annotation.
1023          Tok.isNot(tok::kw_noexcept);
1024 }
1025 
1026 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1027                           const FormatToken *FormatTok) {
1028   // FIXME: This returns true for C/C++ keywords like 'struct'.
1029   return FormatTok->is(tok::identifier) &&
1030          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1031           !FormatTok->isOneOf(
1032               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1033               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1034               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1035               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1036               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1037               Keywords.kw_instanceof, Keywords.kw_interface,
1038               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1039 }
1040 
1041 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1042                                  const FormatToken *FormatTok) {
1043   return FormatTok->Tok.isLiteral() ||
1044          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1045          mustBeJSIdent(Keywords, FormatTok);
1046 }
1047 
1048 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1049 // when encountered after a value (see mustBeJSIdentOrValue).
1050 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1051                            const FormatToken *FormatTok) {
1052   return FormatTok->isOneOf(
1053       tok::kw_return, Keywords.kw_yield,
1054       // conditionals
1055       tok::kw_if, tok::kw_else,
1056       // loops
1057       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1058       // switch/case
1059       tok::kw_switch, tok::kw_case,
1060       // exceptions
1061       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1062       // declaration
1063       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1064       Keywords.kw_async, Keywords.kw_function,
1065       // import/export
1066       Keywords.kw_import, tok::kw_export);
1067 }
1068 
1069 // Checks whether a token is a type in K&R C (aka C78).
1070 static bool isC78Type(const FormatToken &Tok) {
1071   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1072                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1073                      tok::identifier);
1074 }
1075 
1076 // This function checks whether a token starts the first parameter declaration
1077 // in a K&R C (aka C78) function definition, e.g.:
1078 //   int f(a, b)
1079 //   short a, b;
1080 //   {
1081 //      return a + b;
1082 //   }
1083 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1084                                const FormatToken *FuncName) {
1085   assert(Tok);
1086   assert(Next);
1087   assert(FuncName);
1088 
1089   if (FuncName->isNot(tok::identifier))
1090     return false;
1091 
1092   const FormatToken *Prev = FuncName->Previous;
1093   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1094     return false;
1095 
1096   if (!isC78Type(*Tok) &&
1097       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1098     return false;
1099 
1100   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1101     return false;
1102 
1103   Tok = Tok->Previous;
1104   if (!Tok || Tok->isNot(tok::r_paren))
1105     return false;
1106 
1107   Tok = Tok->Previous;
1108   if (!Tok || Tok->isNot(tok::identifier))
1109     return false;
1110 
1111   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1112 }
1113 
1114 void UnwrappedLineParser::parseModuleImport() {
1115   nextToken();
1116   while (!eof()) {
1117     if (FormatTok->is(tok::colon)) {
1118       FormatTok->setType(TT_ModulePartitionColon);
1119     }
1120     // Handle import <foo/bar.h> as we would an include statement.
1121     else if (FormatTok->is(tok::less)) {
1122       nextToken();
1123       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1124         // Mark tokens up to the trailing line comments as implicit string
1125         // literals.
1126         if (FormatTok->isNot(tok::comment) &&
1127             !FormatTok->TokenText.startswith("//"))
1128           FormatTok->setType(TT_ImplicitStringLiteral);
1129         nextToken();
1130       }
1131     }
1132     if (FormatTok->is(tok::semi)) {
1133       nextToken();
1134       break;
1135     }
1136     nextToken();
1137   }
1138 
1139   addUnwrappedLine();
1140   return;
1141 }
1142 
1143 // readTokenWithJavaScriptASI reads the next token and terminates the current
1144 // line if JavaScript Automatic Semicolon Insertion must
1145 // happen between the current token and the next token.
1146 //
1147 // This method is conservative - it cannot cover all edge cases of JavaScript,
1148 // but only aims to correctly handle certain well known cases. It *must not*
1149 // return true in speculative cases.
1150 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1151   FormatToken *Previous = FormatTok;
1152   readToken();
1153   FormatToken *Next = FormatTok;
1154 
1155   bool IsOnSameLine =
1156       CommentsBeforeNextToken.empty()
1157           ? Next->NewlinesBefore == 0
1158           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1159   if (IsOnSameLine)
1160     return;
1161 
1162   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1163   bool PreviousStartsTemplateExpr =
1164       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1165   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1166     // If the line contains an '@' sign, the previous token might be an
1167     // annotation, which can precede another identifier/value.
1168     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1169       return LineNode.Tok->is(tok::at);
1170     });
1171     if (HasAt)
1172       return;
1173   }
1174   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1175     return addUnwrappedLine();
1176   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1177   bool NextEndsTemplateExpr =
1178       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1179   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1180       (PreviousMustBeValue ||
1181        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1182                          tok::minusminus)))
1183     return addUnwrappedLine();
1184   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1185       isJSDeclOrStmt(Keywords, Next))
1186     return addUnwrappedLine();
1187 }
1188 
1189 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
1190   if (Style.Language == FormatStyle::LK_TableGen &&
1191       FormatTok->is(tok::pp_include)) {
1192     nextToken();
1193     if (FormatTok->is(tok::string_literal))
1194       nextToken();
1195     addUnwrappedLine();
1196     return;
1197   }
1198   switch (FormatTok->Tok.getKind()) {
1199   case tok::kw_asm:
1200     nextToken();
1201     if (FormatTok->is(tok::l_brace)) {
1202       FormatTok->setType(TT_InlineASMBrace);
1203       nextToken();
1204       while (FormatTok && FormatTok->isNot(tok::eof)) {
1205         if (FormatTok->is(tok::r_brace)) {
1206           FormatTok->setType(TT_InlineASMBrace);
1207           nextToken();
1208           addUnwrappedLine();
1209           break;
1210         }
1211         FormatTok->Finalized = true;
1212         nextToken();
1213       }
1214     }
1215     break;
1216   case tok::kw_namespace:
1217     parseNamespace();
1218     return;
1219   case tok::kw_public:
1220   case tok::kw_protected:
1221   case tok::kw_private:
1222     if (Style.Language == FormatStyle::LK_Java ||
1223         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1224       nextToken();
1225     else
1226       parseAccessSpecifier();
1227     return;
1228   case tok::kw_if:
1229     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1230       // field/method declaration.
1231       break;
1232     parseIfThenElse();
1233     return;
1234   case tok::kw_for:
1235   case tok::kw_while:
1236     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1237       // field/method declaration.
1238       break;
1239     parseForOrWhileLoop();
1240     return;
1241   case tok::kw_do:
1242     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1243       // field/method declaration.
1244       break;
1245     parseDoWhile();
1246     return;
1247   case tok::kw_switch:
1248     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1249       // 'switch: string' field declaration.
1250       break;
1251     parseSwitch();
1252     return;
1253   case tok::kw_default:
1254     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1255       // 'default: string' field declaration.
1256       break;
1257     nextToken();
1258     if (FormatTok->is(tok::colon)) {
1259       parseLabel();
1260       return;
1261     }
1262     // e.g. "default void f() {}" in a Java interface.
1263     break;
1264   case tok::kw_case:
1265     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1266       // 'case: string' field declaration.
1267       break;
1268     parseCaseLabel();
1269     return;
1270   case tok::kw_try:
1271   case tok::kw___try:
1272     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1273       // field/method declaration.
1274       break;
1275     parseTryCatch();
1276     return;
1277   case tok::kw_extern:
1278     nextToken();
1279     if (FormatTok->Tok.is(tok::string_literal)) {
1280       nextToken();
1281       if (FormatTok->Tok.is(tok::l_brace)) {
1282         if (!Style.IndentExternBlock) {
1283           if (Style.BraceWrapping.AfterExternBlock) {
1284             addUnwrappedLine();
1285           }
1286           unsigned AddLevels = Style.BraceWrapping.AfterExternBlock ? 1u : 0u;
1287           parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1288         } else {
1289           unsigned AddLevels =
1290               Style.IndentExternBlock == FormatStyle::IEBS_Indent ? 1u : 0u;
1291           parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1292         }
1293         addUnwrappedLine();
1294         return;
1295       }
1296     }
1297     break;
1298   case tok::kw_export:
1299     if (Style.Language == FormatStyle::LK_JavaScript) {
1300       parseJavaScriptEs6ImportExport();
1301       return;
1302     }
1303     if (!Style.isCpp())
1304       break;
1305     // Handle C++ "(inline|export) namespace".
1306     LLVM_FALLTHROUGH;
1307   case tok::kw_inline:
1308     nextToken();
1309     if (FormatTok->Tok.is(tok::kw_namespace)) {
1310       parseNamespace();
1311       return;
1312     }
1313     break;
1314   case tok::identifier:
1315     if (FormatTok->is(TT_ForEachMacro)) {
1316       parseForOrWhileLoop();
1317       return;
1318     }
1319     if (FormatTok->is(TT_MacroBlockBegin)) {
1320       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1321                  /*MunchSemi=*/false);
1322       return;
1323     }
1324     if (FormatTok->is(Keywords.kw_import)) {
1325       if (Style.Language == FormatStyle::LK_JavaScript) {
1326         parseJavaScriptEs6ImportExport();
1327         return;
1328       }
1329       if (Style.Language == FormatStyle::LK_Proto) {
1330         nextToken();
1331         if (FormatTok->is(tok::kw_public))
1332           nextToken();
1333         if (!FormatTok->is(tok::string_literal))
1334           return;
1335         nextToken();
1336         if (FormatTok->is(tok::semi))
1337           nextToken();
1338         addUnwrappedLine();
1339         return;
1340       }
1341       if (Style.isCpp()) {
1342         parseModuleImport();
1343         return;
1344       }
1345     }
1346     if (Style.isCpp() &&
1347         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1348                            Keywords.kw_slots, Keywords.kw_qslots)) {
1349       nextToken();
1350       if (FormatTok->is(tok::colon)) {
1351         nextToken();
1352         addUnwrappedLine();
1353         return;
1354       }
1355     }
1356     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1357       parseStatementMacro();
1358       return;
1359     }
1360     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1361       parseNamespace();
1362       return;
1363     }
1364     // In all other cases, parse the declaration.
1365     break;
1366   default:
1367     break;
1368   }
1369   do {
1370     const FormatToken *Previous = FormatTok->Previous;
1371     switch (FormatTok->Tok.getKind()) {
1372     case tok::at:
1373       nextToken();
1374       if (FormatTok->Tok.is(tok::l_brace)) {
1375         nextToken();
1376         parseBracedList();
1377         break;
1378       } else if (Style.Language == FormatStyle::LK_Java &&
1379                  FormatTok->is(Keywords.kw_interface)) {
1380         nextToken();
1381         break;
1382       }
1383       switch (FormatTok->Tok.getObjCKeywordID()) {
1384       case tok::objc_public:
1385       case tok::objc_protected:
1386       case tok::objc_package:
1387       case tok::objc_private:
1388         return parseAccessSpecifier();
1389       case tok::objc_interface:
1390       case tok::objc_implementation:
1391         return parseObjCInterfaceOrImplementation();
1392       case tok::objc_protocol:
1393         if (parseObjCProtocol())
1394           return;
1395         break;
1396       case tok::objc_end:
1397         return; // Handled by the caller.
1398       case tok::objc_optional:
1399       case tok::objc_required:
1400         nextToken();
1401         addUnwrappedLine();
1402         return;
1403       case tok::objc_autoreleasepool:
1404         nextToken();
1405         if (FormatTok->Tok.is(tok::l_brace)) {
1406           if (Style.BraceWrapping.AfterControlStatement ==
1407               FormatStyle::BWACS_Always)
1408             addUnwrappedLine();
1409           parseBlock();
1410         }
1411         addUnwrappedLine();
1412         return;
1413       case tok::objc_synchronized:
1414         nextToken();
1415         if (FormatTok->Tok.is(tok::l_paren))
1416           // Skip synchronization object
1417           parseParens();
1418         if (FormatTok->Tok.is(tok::l_brace)) {
1419           if (Style.BraceWrapping.AfterControlStatement ==
1420               FormatStyle::BWACS_Always)
1421             addUnwrappedLine();
1422           parseBlock();
1423         }
1424         addUnwrappedLine();
1425         return;
1426       case tok::objc_try:
1427         // This branch isn't strictly necessary (the kw_try case below would
1428         // do this too after the tok::at is parsed above).  But be explicit.
1429         parseTryCatch();
1430         return;
1431       default:
1432         break;
1433       }
1434       break;
1435     case tok::kw_concept:
1436       parseConcept();
1437       break;
1438     case tok::kw_requires:
1439       parseRequires();
1440       break;
1441     case tok::kw_enum:
1442       // Ignore if this is part of "template <enum ...".
1443       if (Previous && Previous->is(tok::less)) {
1444         nextToken();
1445         break;
1446       }
1447 
1448       // parseEnum falls through and does not yet add an unwrapped line as an
1449       // enum definition can start a structural element.
1450       if (!parseEnum())
1451         break;
1452       // This only applies for C++.
1453       if (!Style.isCpp()) {
1454         addUnwrappedLine();
1455         return;
1456       }
1457       break;
1458     case tok::kw_typedef:
1459       nextToken();
1460       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1461                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1462                              Keywords.kw_CF_CLOSED_ENUM,
1463                              Keywords.kw_NS_CLOSED_ENUM))
1464         parseEnum();
1465       break;
1466     case tok::kw_struct:
1467     case tok::kw_union:
1468     case tok::kw_class:
1469       if (parseStructLike()) {
1470         return;
1471       }
1472       break;
1473     case tok::period:
1474       nextToken();
1475       // In Java, classes have an implicit static member "class".
1476       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1477           FormatTok->is(tok::kw_class))
1478         nextToken();
1479       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1480           FormatTok->Tok.getIdentifierInfo())
1481         // JavaScript only has pseudo keywords, all keywords are allowed to
1482         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1483         nextToken();
1484       break;
1485     case tok::semi:
1486       nextToken();
1487       addUnwrappedLine();
1488       return;
1489     case tok::r_brace:
1490       addUnwrappedLine();
1491       return;
1492     case tok::l_paren: {
1493       parseParens();
1494       // Break the unwrapped line if a K&R C function definition has a parameter
1495       // declaration.
1496       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1497         break;
1498       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1499         addUnwrappedLine();
1500         return;
1501       }
1502       break;
1503     }
1504     case tok::kw_operator:
1505       nextToken();
1506       if (FormatTok->isBinaryOperator())
1507         nextToken();
1508       break;
1509     case tok::caret:
1510       nextToken();
1511       if (FormatTok->Tok.isAnyIdentifier() ||
1512           FormatTok->isSimpleTypeSpecifier())
1513         nextToken();
1514       if (FormatTok->is(tok::l_paren))
1515         parseParens();
1516       if (FormatTok->is(tok::l_brace))
1517         parseChildBlock();
1518       break;
1519     case tok::l_brace:
1520       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1521         // A block outside of parentheses must be the last part of a
1522         // structural element.
1523         // FIXME: Figure out cases where this is not true, and add projections
1524         // for them (the one we know is missing are lambdas).
1525         if (Style.BraceWrapping.AfterFunction)
1526           addUnwrappedLine();
1527         FormatTok->setType(TT_FunctionLBrace);
1528         parseBlock();
1529         addUnwrappedLine();
1530         return;
1531       }
1532       // Otherwise this was a braced init list, and the structural
1533       // element continues.
1534       break;
1535     case tok::kw_try:
1536       if (Style.Language == FormatStyle::LK_JavaScript &&
1537           Line->MustBeDeclaration) {
1538         // field/method declaration.
1539         nextToken();
1540         break;
1541       }
1542       // We arrive here when parsing function-try blocks.
1543       if (Style.BraceWrapping.AfterFunction)
1544         addUnwrappedLine();
1545       parseTryCatch();
1546       return;
1547     case tok::identifier: {
1548       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1549           Line->MustBeDeclaration) {
1550         addUnwrappedLine();
1551         parseCSharpGenericTypeConstraint();
1552         break;
1553       }
1554       if (FormatTok->is(TT_MacroBlockEnd)) {
1555         addUnwrappedLine();
1556         return;
1557       }
1558 
1559       // Function declarations (as opposed to function expressions) are parsed
1560       // on their own unwrapped line by continuing this loop. Function
1561       // expressions (functions that are not on their own line) must not create
1562       // a new unwrapped line, so they are special cased below.
1563       size_t TokenCount = Line->Tokens.size();
1564       if (Style.Language == FormatStyle::LK_JavaScript &&
1565           FormatTok->is(Keywords.kw_function) &&
1566           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1567                                                      Keywords.kw_async)))) {
1568         tryToParseJSFunction();
1569         break;
1570       }
1571       if ((Style.Language == FormatStyle::LK_JavaScript ||
1572            Style.Language == FormatStyle::LK_Java) &&
1573           FormatTok->is(Keywords.kw_interface)) {
1574         if (Style.Language == FormatStyle::LK_JavaScript) {
1575           // In JavaScript/TypeScript, "interface" can be used as a standalone
1576           // identifier, e.g. in `var interface = 1;`. If "interface" is
1577           // followed by another identifier, it is very like to be an actual
1578           // interface declaration.
1579           unsigned StoredPosition = Tokens->getPosition();
1580           FormatToken *Next = Tokens->getNextToken();
1581           FormatTok = Tokens->setPosition(StoredPosition);
1582           if (!mustBeJSIdent(Keywords, Next)) {
1583             nextToken();
1584             break;
1585           }
1586         }
1587         parseRecord();
1588         addUnwrappedLine();
1589         return;
1590       }
1591 
1592       if (FormatTok->is(Keywords.kw_interface)) {
1593         if (parseStructLike()) {
1594           return;
1595         }
1596         break;
1597       }
1598 
1599       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1600         parseStatementMacro();
1601         return;
1602       }
1603 
1604       // See if the following token should start a new unwrapped line.
1605       StringRef Text = FormatTok->TokenText;
1606       nextToken();
1607 
1608       // JS doesn't have macros, and within classes colons indicate fields, not
1609       // labels.
1610       if (Style.Language == FormatStyle::LK_JavaScript)
1611         break;
1612 
1613       TokenCount = Line->Tokens.size();
1614       if (TokenCount == 1 ||
1615           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1616         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1617           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1618           parseLabel(!Style.IndentGotoLabels);
1619           return;
1620         }
1621         // Recognize function-like macro usages without trailing semicolon as
1622         // well as free-standing macros like Q_OBJECT.
1623         bool FunctionLike = FormatTok->is(tok::l_paren);
1624         if (FunctionLike)
1625           parseParens();
1626 
1627         bool FollowedByNewline =
1628             CommentsBeforeNextToken.empty()
1629                 ? FormatTok->NewlinesBefore > 0
1630                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1631 
1632         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1633             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1634           addUnwrappedLine();
1635           return;
1636         }
1637       }
1638       break;
1639     }
1640     case tok::equal:
1641       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1642       // TT_FatArrow. They always start an expression or a child block if
1643       // followed by a curly brace.
1644       if (FormatTok->is(TT_FatArrow)) {
1645         nextToken();
1646         if (FormatTok->is(tok::l_brace)) {
1647           // C# may break after => if the next character is a newline.
1648           if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1649             // calling `addUnwrappedLine()` here causes odd parsing errors.
1650             FormatTok->MustBreakBefore = true;
1651           }
1652           parseChildBlock();
1653         }
1654         break;
1655       }
1656 
1657       nextToken();
1658       if (FormatTok->Tok.is(tok::l_brace)) {
1659         // Block kind should probably be set to BK_BracedInit for any language.
1660         // C# needs this change to ensure that array initialisers and object
1661         // initialisers are indented the same way.
1662         if (Style.isCSharp())
1663           FormatTok->setBlockKind(BK_BracedInit);
1664         nextToken();
1665         parseBracedList();
1666       } else if (Style.Language == FormatStyle::LK_Proto &&
1667                  FormatTok->Tok.is(tok::less)) {
1668         nextToken();
1669         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1670                         /*ClosingBraceKind=*/tok::greater);
1671       }
1672       break;
1673     case tok::l_square:
1674       parseSquare();
1675       break;
1676     case tok::kw_new:
1677       parseNew();
1678       break;
1679     default:
1680       nextToken();
1681       break;
1682     }
1683   } while (!eof());
1684 }
1685 
1686 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1687   assert(FormatTok->is(tok::l_brace));
1688   if (!Style.isCSharp())
1689     return false;
1690   // See if it's a property accessor.
1691   if (FormatTok->Previous->isNot(tok::identifier))
1692     return false;
1693 
1694   // See if we are inside a property accessor.
1695   //
1696   // Record the current tokenPosition so that we can advance and
1697   // reset the current token. `Next` is not set yet so we need
1698   // another way to advance along the token stream.
1699   unsigned int StoredPosition = Tokens->getPosition();
1700   FormatToken *Tok = Tokens->getNextToken();
1701 
1702   // A trivial property accessor is of the form:
1703   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1704   // Track these as they do not require line breaks to be introduced.
1705   bool HasGetOrSet = false;
1706   bool IsTrivialPropertyAccessor = true;
1707   while (!eof()) {
1708     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1709                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1710                      Keywords.kw_set)) {
1711       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1712         HasGetOrSet = true;
1713       Tok = Tokens->getNextToken();
1714       continue;
1715     }
1716     if (Tok->isNot(tok::r_brace))
1717       IsTrivialPropertyAccessor = false;
1718     break;
1719   }
1720 
1721   if (!HasGetOrSet) {
1722     Tokens->setPosition(StoredPosition);
1723     return false;
1724   }
1725 
1726   // Try to parse the property accessor:
1727   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1728   Tokens->setPosition(StoredPosition);
1729   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1730     addUnwrappedLine();
1731   nextToken();
1732   do {
1733     switch (FormatTok->Tok.getKind()) {
1734     case tok::r_brace:
1735       nextToken();
1736       if (FormatTok->is(tok::equal)) {
1737         while (!eof() && FormatTok->isNot(tok::semi))
1738           nextToken();
1739         nextToken();
1740       }
1741       addUnwrappedLine();
1742       return true;
1743     case tok::l_brace:
1744       ++Line->Level;
1745       parseBlock(/*MustBeDeclaration=*/true);
1746       addUnwrappedLine();
1747       --Line->Level;
1748       break;
1749     case tok::equal:
1750       if (FormatTok->is(TT_FatArrow)) {
1751         ++Line->Level;
1752         do {
1753           nextToken();
1754         } while (!eof() && FormatTok->isNot(tok::semi));
1755         nextToken();
1756         addUnwrappedLine();
1757         --Line->Level;
1758         break;
1759       }
1760       nextToken();
1761       break;
1762     default:
1763       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1764           !IsTrivialPropertyAccessor) {
1765         // Non-trivial get/set needs to be on its own line.
1766         addUnwrappedLine();
1767       }
1768       nextToken();
1769     }
1770   } while (!eof());
1771 
1772   // Unreachable for well-formed code (paired '{' and '}').
1773   return true;
1774 }
1775 
1776 bool UnwrappedLineParser::tryToParseLambda() {
1777   if (!Style.isCpp()) {
1778     nextToken();
1779     return false;
1780   }
1781   assert(FormatTok->is(tok::l_square));
1782   FormatToken &LSquare = *FormatTok;
1783   if (!tryToParseLambdaIntroducer())
1784     return false;
1785 
1786   bool SeenArrow = false;
1787 
1788   while (FormatTok->isNot(tok::l_brace)) {
1789     if (FormatTok->isSimpleTypeSpecifier()) {
1790       nextToken();
1791       continue;
1792     }
1793     switch (FormatTok->Tok.getKind()) {
1794     case tok::l_brace:
1795       break;
1796     case tok::l_paren:
1797       parseParens();
1798       break;
1799     case tok::amp:
1800     case tok::star:
1801     case tok::kw_const:
1802     case tok::comma:
1803     case tok::less:
1804     case tok::greater:
1805     case tok::identifier:
1806     case tok::numeric_constant:
1807     case tok::coloncolon:
1808     case tok::kw_class:
1809     case tok::kw_mutable:
1810     case tok::kw_noexcept:
1811     case tok::kw_template:
1812     case tok::kw_typename:
1813       nextToken();
1814       break;
1815     // Specialization of a template with an integer parameter can contain
1816     // arithmetic, logical, comparison and ternary operators.
1817     //
1818     // FIXME: This also accepts sequences of operators that are not in the scope
1819     // of a template argument list.
1820     //
1821     // In a C++ lambda a template type can only occur after an arrow. We use
1822     // this as an heuristic to distinguish between Objective-C expressions
1823     // followed by an `a->b` expression, such as:
1824     // ([obj func:arg] + a->b)
1825     // Otherwise the code below would parse as a lambda.
1826     //
1827     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1828     // explicit template lists: []<bool b = true && false>(U &&u){}
1829     case tok::plus:
1830     case tok::minus:
1831     case tok::exclaim:
1832     case tok::tilde:
1833     case tok::slash:
1834     case tok::percent:
1835     case tok::lessless:
1836     case tok::pipe:
1837     case tok::pipepipe:
1838     case tok::ampamp:
1839     case tok::caret:
1840     case tok::equalequal:
1841     case tok::exclaimequal:
1842     case tok::greaterequal:
1843     case tok::lessequal:
1844     case tok::question:
1845     case tok::colon:
1846     case tok::ellipsis:
1847     case tok::kw_true:
1848     case tok::kw_false:
1849       if (SeenArrow) {
1850         nextToken();
1851         break;
1852       }
1853       return true;
1854     case tok::arrow:
1855       // This might or might not actually be a lambda arrow (this could be an
1856       // ObjC method invocation followed by a dereferencing arrow). We might
1857       // reset this back to TT_Unknown in TokenAnnotator.
1858       FormatTok->setType(TT_LambdaArrow);
1859       SeenArrow = true;
1860       nextToken();
1861       break;
1862     default:
1863       return true;
1864     }
1865   }
1866   FormatTok->setType(TT_LambdaLBrace);
1867   LSquare.setType(TT_LambdaLSquare);
1868   parseChildBlock();
1869   return true;
1870 }
1871 
1872 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1873   const FormatToken *Previous = FormatTok->Previous;
1874   if (Previous &&
1875       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1876                          tok::kw_delete, tok::l_square) ||
1877        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1878        Previous->isSimpleTypeSpecifier())) {
1879     nextToken();
1880     return false;
1881   }
1882   nextToken();
1883   if (FormatTok->is(tok::l_square)) {
1884     return false;
1885   }
1886   parseSquare(/*LambdaIntroducer=*/true);
1887   return true;
1888 }
1889 
1890 void UnwrappedLineParser::tryToParseJSFunction() {
1891   assert(FormatTok->is(Keywords.kw_function) ||
1892          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1893   if (FormatTok->is(Keywords.kw_async))
1894     nextToken();
1895   // Consume "function".
1896   nextToken();
1897 
1898   // Consume * (generator function). Treat it like C++'s overloaded operators.
1899   if (FormatTok->is(tok::star)) {
1900     FormatTok->setType(TT_OverloadedOperator);
1901     nextToken();
1902   }
1903 
1904   // Consume function name.
1905   if (FormatTok->is(tok::identifier))
1906     nextToken();
1907 
1908   if (FormatTok->isNot(tok::l_paren))
1909     return;
1910 
1911   // Parse formal parameter list.
1912   parseParens();
1913 
1914   if (FormatTok->is(tok::colon)) {
1915     // Parse a type definition.
1916     nextToken();
1917 
1918     // Eat the type declaration. For braced inline object types, balance braces,
1919     // otherwise just parse until finding an l_brace for the function body.
1920     if (FormatTok->is(tok::l_brace))
1921       tryToParseBracedList();
1922     else
1923       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1924         nextToken();
1925   }
1926 
1927   if (FormatTok->is(tok::semi))
1928     return;
1929 
1930   parseChildBlock();
1931 }
1932 
1933 bool UnwrappedLineParser::tryToParseBracedList() {
1934   if (FormatTok->is(BK_Unknown))
1935     calculateBraceTypes();
1936   assert(FormatTok->isNot(BK_Unknown));
1937   if (FormatTok->is(BK_Block))
1938     return false;
1939   nextToken();
1940   parseBracedList();
1941   return true;
1942 }
1943 
1944 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1945                                           bool IsEnum,
1946                                           tok::TokenKind ClosingBraceKind) {
1947   bool HasError = false;
1948 
1949   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1950   // replace this by using parseAssignmentExpression() inside.
1951   do {
1952     if (Style.isCSharp()) {
1953       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1954       // TT_FatArrow. They always start an expression or a child block if
1955       // followed by a curly brace.
1956       if (FormatTok->is(TT_FatArrow)) {
1957         nextToken();
1958         if (FormatTok->is(tok::l_brace)) {
1959           // C# may break after => if the next character is a newline.
1960           if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1961             // calling `addUnwrappedLine()` here causes odd parsing errors.
1962             FormatTok->MustBreakBefore = true;
1963           }
1964           parseChildBlock();
1965           continue;
1966         }
1967       }
1968     }
1969     if (Style.Language == FormatStyle::LK_JavaScript) {
1970       if (FormatTok->is(Keywords.kw_function) ||
1971           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1972         tryToParseJSFunction();
1973         continue;
1974       }
1975       if (FormatTok->is(TT_FatArrow)) {
1976         nextToken();
1977         // Fat arrows can be followed by simple expressions or by child blocks
1978         // in curly braces.
1979         if (FormatTok->is(tok::l_brace)) {
1980           parseChildBlock();
1981           continue;
1982         }
1983       }
1984       if (FormatTok->is(tok::l_brace)) {
1985         // Could be a method inside of a braced list `{a() { return 1; }}`.
1986         if (tryToParseBracedList())
1987           continue;
1988         parseChildBlock();
1989       }
1990     }
1991     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1992       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1993         addUnwrappedLine();
1994       nextToken();
1995       return !HasError;
1996     }
1997     switch (FormatTok->Tok.getKind()) {
1998     case tok::caret:
1999       nextToken();
2000       if (FormatTok->is(tok::l_brace)) {
2001         parseChildBlock();
2002       }
2003       break;
2004     case tok::l_square:
2005       if (Style.isCSharp())
2006         parseSquare();
2007       else
2008         tryToParseLambda();
2009       break;
2010     case tok::l_paren:
2011       parseParens();
2012       // JavaScript can just have free standing methods and getters/setters in
2013       // object literals. Detect them by a "{" following ")".
2014       if (Style.Language == FormatStyle::LK_JavaScript) {
2015         if (FormatTok->is(tok::l_brace))
2016           parseChildBlock();
2017         break;
2018       }
2019       break;
2020     case tok::l_brace:
2021       // Assume there are no blocks inside a braced init list apart
2022       // from the ones we explicitly parse out (like lambdas).
2023       FormatTok->setBlockKind(BK_BracedInit);
2024       nextToken();
2025       parseBracedList();
2026       break;
2027     case tok::less:
2028       if (Style.Language == FormatStyle::LK_Proto) {
2029         nextToken();
2030         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2031                         /*ClosingBraceKind=*/tok::greater);
2032       } else {
2033         nextToken();
2034       }
2035       break;
2036     case tok::semi:
2037       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2038       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2039       // used for error recovery if we have otherwise determined that this is
2040       // a braced list.
2041       if (Style.Language == FormatStyle::LK_JavaScript) {
2042         nextToken();
2043         break;
2044       }
2045       HasError = true;
2046       if (!ContinueOnSemicolons)
2047         return !HasError;
2048       nextToken();
2049       break;
2050     case tok::comma:
2051       nextToken();
2052       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2053         addUnwrappedLine();
2054       break;
2055     default:
2056       nextToken();
2057       break;
2058     }
2059   } while (!eof());
2060   return false;
2061 }
2062 
2063 void UnwrappedLineParser::parseParens() {
2064   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2065   nextToken();
2066   do {
2067     switch (FormatTok->Tok.getKind()) {
2068     case tok::l_paren:
2069       parseParens();
2070       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2071         parseChildBlock();
2072       break;
2073     case tok::r_paren:
2074       nextToken();
2075       return;
2076     case tok::r_brace:
2077       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2078       return;
2079     case tok::l_square:
2080       tryToParseLambda();
2081       break;
2082     case tok::l_brace:
2083       if (!tryToParseBracedList())
2084         parseChildBlock();
2085       break;
2086     case tok::at:
2087       nextToken();
2088       if (FormatTok->Tok.is(tok::l_brace)) {
2089         nextToken();
2090         parseBracedList();
2091       }
2092       break;
2093     case tok::equal:
2094       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2095         parseStructuralElement();
2096       else
2097         nextToken();
2098       break;
2099     case tok::kw_class:
2100       if (Style.Language == FormatStyle::LK_JavaScript)
2101         parseRecord(/*ParseAsExpr=*/true);
2102       else
2103         nextToken();
2104       break;
2105     case tok::identifier:
2106       if (Style.Language == FormatStyle::LK_JavaScript &&
2107           (FormatTok->is(Keywords.kw_function) ||
2108            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2109         tryToParseJSFunction();
2110       else
2111         nextToken();
2112       break;
2113     default:
2114       nextToken();
2115       break;
2116     }
2117   } while (!eof());
2118 }
2119 
2120 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2121   if (!LambdaIntroducer) {
2122     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2123     if (tryToParseLambda())
2124       return;
2125   }
2126   do {
2127     switch (FormatTok->Tok.getKind()) {
2128     case tok::l_paren:
2129       parseParens();
2130       break;
2131     case tok::r_square:
2132       nextToken();
2133       return;
2134     case tok::r_brace:
2135       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2136       return;
2137     case tok::l_square:
2138       parseSquare();
2139       break;
2140     case tok::l_brace: {
2141       if (!tryToParseBracedList())
2142         parseChildBlock();
2143       break;
2144     }
2145     case tok::at:
2146       nextToken();
2147       if (FormatTok->Tok.is(tok::l_brace)) {
2148         nextToken();
2149         parseBracedList();
2150       }
2151       break;
2152     default:
2153       nextToken();
2154       break;
2155     }
2156   } while (!eof());
2157 }
2158 
2159 void UnwrappedLineParser::parseIfThenElse() {
2160   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2161   nextToken();
2162   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2163     nextToken();
2164   if (FormatTok->Tok.is(tok::l_paren))
2165     parseParens();
2166   // handle  AttributeMacro  if (x) UNLIKELY
2167   if (FormatTok->is(TT_AttributeMacro))
2168     nextToken();
2169   // handle [[likely]] / [[unlikely]]
2170   if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2171     parseSquare();
2172   bool NeedsUnwrappedLine = false;
2173   if (FormatTok->Tok.is(tok::l_brace)) {
2174     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2175     parseBlock();
2176     if (Style.BraceWrapping.BeforeElse)
2177       addUnwrappedLine();
2178     else
2179       NeedsUnwrappedLine = true;
2180   } else {
2181     addUnwrappedLine();
2182     ++Line->Level;
2183     parseStructuralElement();
2184     --Line->Level;
2185   }
2186   if (FormatTok->Tok.is(tok::kw_else)) {
2187     nextToken();
2188     // handle  AttributeMacro  else UNLIKELY
2189     if (FormatTok->is(TT_AttributeMacro))
2190       nextToken();
2191     // handle [[likely]] / [[unlikely]]
2192     if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
2193       parseSquare();
2194     if (FormatTok->Tok.is(tok::l_brace)) {
2195       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2196       parseBlock();
2197       addUnwrappedLine();
2198     } else if (FormatTok->Tok.is(tok::kw_if)) {
2199       FormatToken *Previous = Tokens->getPreviousToken();
2200       bool PrecededByComment = Previous && Previous->is(tok::comment);
2201       if (PrecededByComment) {
2202         addUnwrappedLine();
2203         ++Line->Level;
2204       }
2205       parseIfThenElse();
2206       if (PrecededByComment)
2207         --Line->Level;
2208     } else {
2209       addUnwrappedLine();
2210       ++Line->Level;
2211       parseStructuralElement();
2212       if (FormatTok->is(tok::eof))
2213         addUnwrappedLine();
2214       --Line->Level;
2215     }
2216   } else if (NeedsUnwrappedLine) {
2217     addUnwrappedLine();
2218   }
2219 }
2220 
2221 void UnwrappedLineParser::parseTryCatch() {
2222   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2223   nextToken();
2224   bool NeedsUnwrappedLine = false;
2225   if (FormatTok->is(tok::colon)) {
2226     // We are in a function try block, what comes is an initializer list.
2227     nextToken();
2228 
2229     // In case identifiers were removed by clang-tidy, what might follow is
2230     // multiple commas in sequence - before the first identifier.
2231     while (FormatTok->is(tok::comma))
2232       nextToken();
2233 
2234     while (FormatTok->is(tok::identifier)) {
2235       nextToken();
2236       if (FormatTok->is(tok::l_paren))
2237         parseParens();
2238       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2239           FormatTok->is(tok::l_brace)) {
2240         do {
2241           nextToken();
2242         } while (!FormatTok->is(tok::r_brace));
2243         nextToken();
2244       }
2245 
2246       // In case identifiers were removed by clang-tidy, what might follow is
2247       // multiple commas in sequence - after the first identifier.
2248       while (FormatTok->is(tok::comma))
2249         nextToken();
2250     }
2251   }
2252   // Parse try with resource.
2253   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2254     parseParens();
2255   }
2256   if (FormatTok->is(tok::l_brace)) {
2257     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2258     parseBlock();
2259     if (Style.BraceWrapping.BeforeCatch) {
2260       addUnwrappedLine();
2261     } else {
2262       NeedsUnwrappedLine = true;
2263     }
2264   } else if (!FormatTok->is(tok::kw_catch)) {
2265     // The C++ standard requires a compound-statement after a try.
2266     // If there's none, we try to assume there's a structuralElement
2267     // and try to continue.
2268     addUnwrappedLine();
2269     ++Line->Level;
2270     parseStructuralElement();
2271     --Line->Level;
2272   }
2273   while (1) {
2274     if (FormatTok->is(tok::at))
2275       nextToken();
2276     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2277                              tok::kw___finally) ||
2278           ((Style.Language == FormatStyle::LK_Java ||
2279             Style.Language == FormatStyle::LK_JavaScript) &&
2280            FormatTok->is(Keywords.kw_finally)) ||
2281           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2282            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2283       break;
2284     nextToken();
2285     while (FormatTok->isNot(tok::l_brace)) {
2286       if (FormatTok->is(tok::l_paren)) {
2287         parseParens();
2288         continue;
2289       }
2290       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2291         return;
2292       nextToken();
2293     }
2294     NeedsUnwrappedLine = false;
2295     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2296     parseBlock();
2297     if (Style.BraceWrapping.BeforeCatch)
2298       addUnwrappedLine();
2299     else
2300       NeedsUnwrappedLine = true;
2301   }
2302   if (NeedsUnwrappedLine)
2303     addUnwrappedLine();
2304 }
2305 
2306 void UnwrappedLineParser::parseNamespace() {
2307   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2308          "'namespace' expected");
2309 
2310   const FormatToken &InitialToken = *FormatTok;
2311   nextToken();
2312   if (InitialToken.is(TT_NamespaceMacro)) {
2313     parseParens();
2314   } else {
2315     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2316                               tok::l_square, tok::period)) {
2317       if (FormatTok->is(tok::l_square))
2318         parseSquare();
2319       else
2320         nextToken();
2321     }
2322   }
2323   if (FormatTok->Tok.is(tok::l_brace)) {
2324     if (ShouldBreakBeforeBrace(Style, InitialToken))
2325       addUnwrappedLine();
2326 
2327     unsigned AddLevels =
2328         Style.NamespaceIndentation == FormatStyle::NI_All ||
2329                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2330                  DeclarationScopeStack.size() > 1)
2331             ? 1u
2332             : 0u;
2333     bool ManageWhitesmithsBraces =
2334         AddLevels == 0u &&
2335         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2336 
2337     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2338     // the whole block.
2339     if (ManageWhitesmithsBraces)
2340       ++Line->Level;
2341 
2342     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2343                /*MunchSemi=*/true,
2344                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2345 
2346     // Munch the semicolon after a namespace. This is more common than one would
2347     // think. Putting the semicolon into its own line is very ugly.
2348     if (FormatTok->Tok.is(tok::semi))
2349       nextToken();
2350 
2351     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2352 
2353     if (ManageWhitesmithsBraces)
2354       --Line->Level;
2355   }
2356   // FIXME: Add error handling.
2357 }
2358 
2359 void UnwrappedLineParser::parseNew() {
2360   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2361   nextToken();
2362 
2363   if (Style.isCSharp()) {
2364     do {
2365       if (FormatTok->is(tok::l_brace))
2366         parseBracedList();
2367 
2368       if (FormatTok->isOneOf(tok::semi, tok::comma))
2369         return;
2370 
2371       nextToken();
2372     } while (!eof());
2373   }
2374 
2375   if (Style.Language != FormatStyle::LK_Java)
2376     return;
2377 
2378   // In Java, we can parse everything up to the parens, which aren't optional.
2379   do {
2380     // There should not be a ;, { or } before the new's open paren.
2381     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2382       return;
2383 
2384     // Consume the parens.
2385     if (FormatTok->is(tok::l_paren)) {
2386       parseParens();
2387 
2388       // If there is a class body of an anonymous class, consume that as child.
2389       if (FormatTok->is(tok::l_brace))
2390         parseChildBlock();
2391       return;
2392     }
2393     nextToken();
2394   } while (!eof());
2395 }
2396 
2397 void UnwrappedLineParser::parseForOrWhileLoop() {
2398   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2399          "'for', 'while' or foreach macro expected");
2400   nextToken();
2401   // JS' for await ( ...
2402   if (Style.Language == FormatStyle::LK_JavaScript &&
2403       FormatTok->is(Keywords.kw_await))
2404     nextToken();
2405   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2406     nextToken();
2407   if (FormatTok->Tok.is(tok::l_paren))
2408     parseParens();
2409   if (FormatTok->Tok.is(tok::l_brace)) {
2410     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2411     parseBlock();
2412     addUnwrappedLine();
2413   } else {
2414     addUnwrappedLine();
2415     ++Line->Level;
2416     parseStructuralElement();
2417     --Line->Level;
2418   }
2419 }
2420 
2421 void UnwrappedLineParser::parseDoWhile() {
2422   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2423   nextToken();
2424   if (FormatTok->Tok.is(tok::l_brace)) {
2425     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2426     parseBlock();
2427     if (Style.BraceWrapping.BeforeWhile)
2428       addUnwrappedLine();
2429   } else {
2430     addUnwrappedLine();
2431     ++Line->Level;
2432     parseStructuralElement();
2433     --Line->Level;
2434   }
2435 
2436   // FIXME: Add error handling.
2437   if (!FormatTok->Tok.is(tok::kw_while)) {
2438     addUnwrappedLine();
2439     return;
2440   }
2441 
2442   // If in Whitesmiths mode, the line with the while() needs to be indented
2443   // to the same level as the block.
2444   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2445     ++Line->Level;
2446 
2447   nextToken();
2448   parseStructuralElement();
2449 }
2450 
2451 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2452   nextToken();
2453   unsigned OldLineLevel = Line->Level;
2454   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2455     --Line->Level;
2456   if (LeftAlignLabel)
2457     Line->Level = 0;
2458 
2459   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2460       FormatTok->Tok.is(tok::l_brace)) {
2461 
2462     CompoundStatementIndenter Indenter(this, Line->Level,
2463                                        Style.BraceWrapping.AfterCaseLabel,
2464                                        Style.BraceWrapping.IndentBraces);
2465     parseBlock();
2466     if (FormatTok->Tok.is(tok::kw_break)) {
2467       if (Style.BraceWrapping.AfterControlStatement ==
2468           FormatStyle::BWACS_Always) {
2469         addUnwrappedLine();
2470         if (!Style.IndentCaseBlocks &&
2471             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2472           Line->Level++;
2473         }
2474       }
2475       parseStructuralElement();
2476     }
2477     addUnwrappedLine();
2478   } else {
2479     if (FormatTok->is(tok::semi))
2480       nextToken();
2481     addUnwrappedLine();
2482   }
2483   Line->Level = OldLineLevel;
2484   if (FormatTok->isNot(tok::l_brace)) {
2485     parseStructuralElement();
2486     addUnwrappedLine();
2487   }
2488 }
2489 
2490 void UnwrappedLineParser::parseCaseLabel() {
2491   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2492 
2493   // FIXME: fix handling of complex expressions here.
2494   do {
2495     nextToken();
2496   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2497   parseLabel();
2498 }
2499 
2500 void UnwrappedLineParser::parseSwitch() {
2501   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2502   nextToken();
2503   if (FormatTok->Tok.is(tok::l_paren))
2504     parseParens();
2505   if (FormatTok->Tok.is(tok::l_brace)) {
2506     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2507     parseBlock();
2508     addUnwrappedLine();
2509   } else {
2510     addUnwrappedLine();
2511     ++Line->Level;
2512     parseStructuralElement();
2513     --Line->Level;
2514   }
2515 }
2516 
2517 void UnwrappedLineParser::parseAccessSpecifier() {
2518   nextToken();
2519   // Understand Qt's slots.
2520   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2521     nextToken();
2522   // Otherwise, we don't know what it is, and we'd better keep the next token.
2523   if (FormatTok->Tok.is(tok::colon))
2524     nextToken();
2525   addUnwrappedLine();
2526 }
2527 
2528 void UnwrappedLineParser::parseConcept() {
2529   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2530   nextToken();
2531   if (!FormatTok->Tok.is(tok::identifier))
2532     return;
2533   nextToken();
2534   if (!FormatTok->Tok.is(tok::equal))
2535     return;
2536   nextToken();
2537   if (FormatTok->Tok.is(tok::kw_requires)) {
2538     nextToken();
2539     parseRequiresExpression(Line->Level);
2540   } else {
2541     parseConstraintExpression(Line->Level);
2542   }
2543 }
2544 
2545 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2546   // requires (R range)
2547   if (FormatTok->Tok.is(tok::l_paren)) {
2548     parseParens();
2549     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2550       addUnwrappedLine();
2551       --Line->Level;
2552     }
2553   }
2554 
2555   if (FormatTok->Tok.is(tok::l_brace)) {
2556     if (Style.BraceWrapping.AfterFunction)
2557       addUnwrappedLine();
2558     FormatTok->setType(TT_FunctionLBrace);
2559     parseBlock();
2560     addUnwrappedLine();
2561   } else {
2562     parseConstraintExpression(OriginalLevel);
2563   }
2564 }
2565 
2566 void UnwrappedLineParser::parseConstraintExpression(
2567     unsigned int OriginalLevel) {
2568   // requires Id<T> && Id<T> || Id<T>
2569   while (
2570       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2571     nextToken();
2572     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2573                               tok::greater, tok::comma, tok::ellipsis)) {
2574       if (FormatTok->Tok.is(tok::less)) {
2575         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2576                         /*ClosingBraceKind=*/tok::greater);
2577         continue;
2578       }
2579       nextToken();
2580     }
2581     if (FormatTok->Tok.is(tok::kw_requires)) {
2582       parseRequiresExpression(OriginalLevel);
2583     }
2584     if (FormatTok->Tok.is(tok::less)) {
2585       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2586                       /*ClosingBraceKind=*/tok::greater);
2587     }
2588 
2589     if (FormatTok->Tok.is(tok::l_paren)) {
2590       parseParens();
2591     }
2592     if (FormatTok->Tok.is(tok::l_brace)) {
2593       if (Style.BraceWrapping.AfterFunction)
2594         addUnwrappedLine();
2595       FormatTok->setType(TT_FunctionLBrace);
2596       parseBlock();
2597     }
2598     if (FormatTok->Tok.is(tok::semi)) {
2599       // Eat any trailing semi.
2600       nextToken();
2601       addUnwrappedLine();
2602     }
2603     if (FormatTok->Tok.is(tok::colon)) {
2604       return;
2605     }
2606     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2607       if (FormatTok->Previous &&
2608           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2609                                         tok::coloncolon)) {
2610         addUnwrappedLine();
2611       }
2612       if (Style.IndentRequires && OriginalLevel != Line->Level) {
2613         --Line->Level;
2614       }
2615       break;
2616     } else {
2617       FormatTok->setType(TT_ConstraintJunctions);
2618     }
2619 
2620     nextToken();
2621   }
2622 }
2623 
2624 void UnwrappedLineParser::parseRequires() {
2625   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2626 
2627   unsigned OriginalLevel = Line->Level;
2628   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2629     addUnwrappedLine();
2630     if (Style.IndentRequires) {
2631       Line->Level++;
2632     }
2633   }
2634   nextToken();
2635 
2636   parseRequiresExpression(OriginalLevel);
2637 }
2638 
2639 bool UnwrappedLineParser::parseEnum() {
2640   // Won't be 'enum' for NS_ENUMs.
2641   if (FormatTok->Tok.is(tok::kw_enum))
2642     nextToken();
2643 
2644   const FormatToken &InitialToken = *FormatTok;
2645 
2646   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2647   // declarations. An "enum" keyword followed by a colon would be a syntax
2648   // error and thus assume it is just an identifier.
2649   if (Style.Language == FormatStyle::LK_JavaScript &&
2650       FormatTok->isOneOf(tok::colon, tok::question))
2651     return false;
2652 
2653   // In protobuf, "enum" can be used as a field name.
2654   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2655     return false;
2656 
2657   // Eat up enum class ...
2658   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2659     nextToken();
2660 
2661   while (FormatTok->Tok.getIdentifierInfo() ||
2662          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2663                             tok::greater, tok::comma, tok::question)) {
2664     nextToken();
2665     // We can have macros or attributes in between 'enum' and the enum name.
2666     if (FormatTok->is(tok::l_paren))
2667       parseParens();
2668     if (FormatTok->is(tok::identifier)) {
2669       nextToken();
2670       // If there are two identifiers in a row, this is likely an elaborate
2671       // return type. In Java, this can be "implements", etc.
2672       if (Style.isCpp() && FormatTok->is(tok::identifier))
2673         return false;
2674     }
2675   }
2676 
2677   // Just a declaration or something is wrong.
2678   if (FormatTok->isNot(tok::l_brace))
2679     return true;
2680   FormatTok->setBlockKind(BK_Block);
2681 
2682   if (Style.Language == FormatStyle::LK_Java) {
2683     // Java enums are different.
2684     parseJavaEnumBody();
2685     return true;
2686   }
2687   if (Style.Language == FormatStyle::LK_Proto) {
2688     parseBlock(/*MustBeDeclaration=*/true);
2689     return true;
2690   }
2691 
2692   if (!Style.AllowShortEnumsOnASingleLine &&
2693       ShouldBreakBeforeBrace(Style, InitialToken))
2694     addUnwrappedLine();
2695   // Parse enum body.
2696   nextToken();
2697   if (!Style.AllowShortEnumsOnASingleLine) {
2698     addUnwrappedLine();
2699     Line->Level += 1;
2700   }
2701   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2702                                    /*IsEnum=*/true);
2703   if (!Style.AllowShortEnumsOnASingleLine)
2704     Line->Level -= 1;
2705   if (HasError) {
2706     if (FormatTok->is(tok::semi))
2707       nextToken();
2708     addUnwrappedLine();
2709   }
2710   return true;
2711 
2712   // There is no addUnwrappedLine() here so that we fall through to parsing a
2713   // structural element afterwards. Thus, in "enum A {} n, m;",
2714   // "} n, m;" will end up in one unwrapped line.
2715 }
2716 
2717 bool UnwrappedLineParser::parseStructLike() {
2718   // parseRecord falls through and does not yet add an unwrapped line as a
2719   // record declaration or definition can start a structural element.
2720   parseRecord();
2721   // This does not apply to Java, JavaScript and C#.
2722   if (Style.Language == FormatStyle::LK_Java ||
2723       Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
2724     if (FormatTok->is(tok::semi))
2725       nextToken();
2726     addUnwrappedLine();
2727     return true;
2728   }
2729   return false;
2730 }
2731 
2732 namespace {
2733 // A class used to set and restore the Token position when peeking
2734 // ahead in the token source.
2735 class ScopedTokenPosition {
2736   unsigned StoredPosition;
2737   FormatTokenSource *Tokens;
2738 
2739 public:
2740   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2741     assert(Tokens && "Tokens expected to not be null");
2742     StoredPosition = Tokens->getPosition();
2743   }
2744 
2745   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2746 };
2747 } // namespace
2748 
2749 // Look to see if we have [[ by looking ahead, if
2750 // its not then rewind to the original position.
2751 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2752   ScopedTokenPosition AutoPosition(Tokens);
2753   FormatToken *Tok = Tokens->getNextToken();
2754   // We already read the first [ check for the second.
2755   if (!Tok->is(tok::l_square)) {
2756     return false;
2757   }
2758   // Double check that the attribute is just something
2759   // fairly simple.
2760   while (Tok->isNot(tok::eof)) {
2761     if (Tok->is(tok::r_square)) {
2762       break;
2763     }
2764     Tok = Tokens->getNextToken();
2765   }
2766   if (Tok->is(tok::eof))
2767     return false;
2768   Tok = Tokens->getNextToken();
2769   if (!Tok->is(tok::r_square)) {
2770     return false;
2771   }
2772   Tok = Tokens->getNextToken();
2773   if (Tok->is(tok::semi)) {
2774     return false;
2775   }
2776   return true;
2777 }
2778 
2779 void UnwrappedLineParser::parseJavaEnumBody() {
2780   // Determine whether the enum is simple, i.e. does not have a semicolon or
2781   // constants with class bodies. Simple enums can be formatted like braced
2782   // lists, contracted to a single line, etc.
2783   unsigned StoredPosition = Tokens->getPosition();
2784   bool IsSimple = true;
2785   FormatToken *Tok = Tokens->getNextToken();
2786   while (!Tok->is(tok::eof)) {
2787     if (Tok->is(tok::r_brace))
2788       break;
2789     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2790       IsSimple = false;
2791       break;
2792     }
2793     // FIXME: This will also mark enums with braces in the arguments to enum
2794     // constants as "not simple". This is probably fine in practice, though.
2795     Tok = Tokens->getNextToken();
2796   }
2797   FormatTok = Tokens->setPosition(StoredPosition);
2798 
2799   if (IsSimple) {
2800     nextToken();
2801     parseBracedList();
2802     addUnwrappedLine();
2803     return;
2804   }
2805 
2806   // Parse the body of a more complex enum.
2807   // First add a line for everything up to the "{".
2808   nextToken();
2809   addUnwrappedLine();
2810   ++Line->Level;
2811 
2812   // Parse the enum constants.
2813   while (FormatTok) {
2814     if (FormatTok->is(tok::l_brace)) {
2815       // Parse the constant's class body.
2816       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2817                  /*MunchSemi=*/false);
2818     } else if (FormatTok->is(tok::l_paren)) {
2819       parseParens();
2820     } else if (FormatTok->is(tok::comma)) {
2821       nextToken();
2822       addUnwrappedLine();
2823     } else if (FormatTok->is(tok::semi)) {
2824       nextToken();
2825       addUnwrappedLine();
2826       break;
2827     } else if (FormatTok->is(tok::r_brace)) {
2828       addUnwrappedLine();
2829       break;
2830     } else {
2831       nextToken();
2832     }
2833   }
2834 
2835   // Parse the class body after the enum's ";" if any.
2836   parseLevel(/*HasOpeningBrace=*/true);
2837   nextToken();
2838   --Line->Level;
2839   addUnwrappedLine();
2840 }
2841 
2842 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2843   const FormatToken &InitialToken = *FormatTok;
2844   nextToken();
2845 
2846   // The actual identifier can be a nested name specifier, and in macros
2847   // it is often token-pasted.
2848   // An [[attribute]] can be before the identifier.
2849   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2850                             tok::kw___attribute, tok::kw___declspec,
2851                             tok::kw_alignas, tok::l_square, tok::r_square) ||
2852          ((Style.Language == FormatStyle::LK_Java ||
2853            Style.Language == FormatStyle::LK_JavaScript) &&
2854           FormatTok->isOneOf(tok::period, tok::comma))) {
2855     if (Style.Language == FormatStyle::LK_JavaScript &&
2856         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2857       // JavaScript/TypeScript supports inline object types in
2858       // extends/implements positions:
2859       //     class Foo implements {bar: number} { }
2860       nextToken();
2861       if (FormatTok->is(tok::l_brace)) {
2862         parseBracedList();
2863         continue;
2864       }
2865     }
2866     bool IsNonMacroIdentifier =
2867         FormatTok->is(tok::identifier) &&
2868         FormatTok->TokenText != FormatTok->TokenText.upper();
2869     nextToken();
2870     // We can have macros or attributes in between 'class' and the class name.
2871     if (!IsNonMacroIdentifier) {
2872       if (FormatTok->Tok.is(tok::l_paren)) {
2873         parseParens();
2874       } else if (FormatTok->is(TT_AttributeSquare)) {
2875         parseSquare();
2876         // Consume the closing TT_AttributeSquare.
2877         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2878           nextToken();
2879       }
2880     }
2881   }
2882 
2883   // Note that parsing away template declarations here leads to incorrectly
2884   // accepting function declarations as record declarations.
2885   // In general, we cannot solve this problem. Consider:
2886   // class A<int> B() {}
2887   // which can be a function definition or a class definition when B() is a
2888   // macro. If we find enough real-world cases where this is a problem, we
2889   // can parse for the 'template' keyword in the beginning of the statement,
2890   // and thus rule out the record production in case there is no template
2891   // (this would still leave us with an ambiguity between template function
2892   // and class declarations).
2893   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2894     while (!eof()) {
2895       if (FormatTok->is(tok::l_brace)) {
2896         calculateBraceTypes(/*ExpectClassBody=*/true);
2897         if (!tryToParseBracedList())
2898           break;
2899       }
2900       if (FormatTok->Tok.is(tok::semi))
2901         return;
2902       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2903         addUnwrappedLine();
2904         nextToken();
2905         parseCSharpGenericTypeConstraint();
2906         break;
2907       }
2908       nextToken();
2909     }
2910   }
2911   if (FormatTok->Tok.is(tok::l_brace)) {
2912     if (ParseAsExpr) {
2913       parseChildBlock();
2914     } else {
2915       if (ShouldBreakBeforeBrace(Style, InitialToken))
2916         addUnwrappedLine();
2917 
2918       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2919       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2920     }
2921   }
2922   // There is no addUnwrappedLine() here so that we fall through to parsing a
2923   // structural element afterwards. Thus, in "class A {} n, m;",
2924   // "} n, m;" will end up in one unwrapped line.
2925 }
2926 
2927 void UnwrappedLineParser::parseObjCMethod() {
2928   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2929          "'(' or identifier expected.");
2930   do {
2931     if (FormatTok->Tok.is(tok::semi)) {
2932       nextToken();
2933       addUnwrappedLine();
2934       return;
2935     } else if (FormatTok->Tok.is(tok::l_brace)) {
2936       if (Style.BraceWrapping.AfterFunction)
2937         addUnwrappedLine();
2938       parseBlock();
2939       addUnwrappedLine();
2940       return;
2941     } else {
2942       nextToken();
2943     }
2944   } while (!eof());
2945 }
2946 
2947 void UnwrappedLineParser::parseObjCProtocolList() {
2948   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2949   do {
2950     nextToken();
2951     // Early exit in case someone forgot a close angle.
2952     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2953         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2954       return;
2955   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2956   nextToken(); // Skip '>'.
2957 }
2958 
2959 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2960   do {
2961     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2962       nextToken();
2963       addUnwrappedLine();
2964       break;
2965     }
2966     if (FormatTok->is(tok::l_brace)) {
2967       parseBlock();
2968       // In ObjC interfaces, nothing should be following the "}".
2969       addUnwrappedLine();
2970     } else if (FormatTok->is(tok::r_brace)) {
2971       // Ignore stray "}". parseStructuralElement doesn't consume them.
2972       nextToken();
2973       addUnwrappedLine();
2974     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2975       nextToken();
2976       parseObjCMethod();
2977     } else {
2978       parseStructuralElement();
2979     }
2980   } while (!eof());
2981 }
2982 
2983 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2984   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2985          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2986   nextToken();
2987   nextToken(); // interface name
2988 
2989   // @interface can be followed by a lightweight generic
2990   // specialization list, then either a base class or a category.
2991   if (FormatTok->Tok.is(tok::less)) {
2992     parseObjCLightweightGenerics();
2993   }
2994   if (FormatTok->Tok.is(tok::colon)) {
2995     nextToken();
2996     nextToken(); // base class name
2997     // The base class can also have lightweight generics applied to it.
2998     if (FormatTok->Tok.is(tok::less)) {
2999       parseObjCLightweightGenerics();
3000     }
3001   } else if (FormatTok->Tok.is(tok::l_paren))
3002     // Skip category, if present.
3003     parseParens();
3004 
3005   if (FormatTok->Tok.is(tok::less))
3006     parseObjCProtocolList();
3007 
3008   if (FormatTok->Tok.is(tok::l_brace)) {
3009     if (Style.BraceWrapping.AfterObjCDeclaration)
3010       addUnwrappedLine();
3011     parseBlock(/*MustBeDeclaration=*/true);
3012   }
3013 
3014   // With instance variables, this puts '}' on its own line.  Without instance
3015   // variables, this ends the @interface line.
3016   addUnwrappedLine();
3017 
3018   parseObjCUntilAtEnd();
3019 }
3020 
3021 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3022   assert(FormatTok->Tok.is(tok::less));
3023   // Unlike protocol lists, generic parameterizations support
3024   // nested angles:
3025   //
3026   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3027   //     NSObject <NSCopying, NSSecureCoding>
3028   //
3029   // so we need to count how many open angles we have left.
3030   unsigned NumOpenAngles = 1;
3031   do {
3032     nextToken();
3033     // Early exit in case someone forgot a close angle.
3034     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3035         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3036       break;
3037     if (FormatTok->Tok.is(tok::less))
3038       ++NumOpenAngles;
3039     else if (FormatTok->Tok.is(tok::greater)) {
3040       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3041       --NumOpenAngles;
3042     }
3043   } while (!eof() && NumOpenAngles != 0);
3044   nextToken(); // Skip '>'.
3045 }
3046 
3047 // Returns true for the declaration/definition form of @protocol,
3048 // false for the expression form.
3049 bool UnwrappedLineParser::parseObjCProtocol() {
3050   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3051   nextToken();
3052 
3053   if (FormatTok->is(tok::l_paren))
3054     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3055     return false;
3056 
3057   // The definition/declaration form,
3058   // @protocol Foo
3059   // - (int)someMethod;
3060   // @end
3061 
3062   nextToken(); // protocol name
3063 
3064   if (FormatTok->Tok.is(tok::less))
3065     parseObjCProtocolList();
3066 
3067   // Check for protocol declaration.
3068   if (FormatTok->Tok.is(tok::semi)) {
3069     nextToken();
3070     addUnwrappedLine();
3071     return true;
3072   }
3073 
3074   addUnwrappedLine();
3075   parseObjCUntilAtEnd();
3076   return true;
3077 }
3078 
3079 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3080   bool IsImport = FormatTok->is(Keywords.kw_import);
3081   assert(IsImport || FormatTok->is(tok::kw_export));
3082   nextToken();
3083 
3084   // Consume the "default" in "export default class/function".
3085   if (FormatTok->is(tok::kw_default))
3086     nextToken();
3087 
3088   // Consume "async function", "function" and "default function", so that these
3089   // get parsed as free-standing JS functions, i.e. do not require a trailing
3090   // semicolon.
3091   if (FormatTok->is(Keywords.kw_async))
3092     nextToken();
3093   if (FormatTok->is(Keywords.kw_function)) {
3094     nextToken();
3095     return;
3096   }
3097 
3098   // For imports, `export *`, `export {...}`, consume the rest of the line up
3099   // to the terminating `;`. For everything else, just return and continue
3100   // parsing the structural element, i.e. the declaration or expression for
3101   // `export default`.
3102   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3103       !FormatTok->isStringLiteral())
3104     return;
3105 
3106   while (!eof()) {
3107     if (FormatTok->is(tok::semi))
3108       return;
3109     if (Line->Tokens.empty()) {
3110       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3111       // import statement should terminate.
3112       return;
3113     }
3114     if (FormatTok->is(tok::l_brace)) {
3115       FormatTok->setBlockKind(BK_Block);
3116       nextToken();
3117       parseBracedList();
3118     } else {
3119       nextToken();
3120     }
3121   }
3122 }
3123 
3124 void UnwrappedLineParser::parseStatementMacro() {
3125   nextToken();
3126   if (FormatTok->is(tok::l_paren))
3127     parseParens();
3128   if (FormatTok->is(tok::semi))
3129     nextToken();
3130   addUnwrappedLine();
3131 }
3132 
3133 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3134                                                  StringRef Prefix = "") {
3135   llvm::dbgs() << Prefix << "Line(" << Line.Level
3136                << ", FSC=" << Line.FirstStartColumn << ")"
3137                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3138   for (const auto &Node : Line.Tokens) {
3139     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3140                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3141                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3142   }
3143   for (const auto &Node : Line.Tokens)
3144     for (const auto &ChildNode : Node.Children)
3145       printDebugInfo(ChildNode, "\nChild: ");
3146 
3147   llvm::dbgs() << "\n";
3148 }
3149 
3150 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3151   if (Line->Tokens.empty())
3152     return;
3153   LLVM_DEBUG({
3154     if (CurrentLines == &Lines)
3155       printDebugInfo(*Line);
3156   });
3157 
3158   // If this line closes a block when in Whitesmiths mode, remember that
3159   // information so that the level can be decreased after the line is added.
3160   // This has to happen after the addition of the line since the line itself
3161   // needs to be indented.
3162   bool ClosesWhitesmithsBlock =
3163       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3164       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3165 
3166   CurrentLines->push_back(std::move(*Line));
3167   Line->Tokens.clear();
3168   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3169   Line->FirstStartColumn = 0;
3170 
3171   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3172     --Line->Level;
3173   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3174     CurrentLines->append(
3175         std::make_move_iterator(PreprocessorDirectives.begin()),
3176         std::make_move_iterator(PreprocessorDirectives.end()));
3177     PreprocessorDirectives.clear();
3178   }
3179   // Disconnect the current token from the last token on the previous line.
3180   FormatTok->Previous = nullptr;
3181 }
3182 
3183 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3184 
3185 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3186   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3187          FormatTok.NewlinesBefore > 0;
3188 }
3189 
3190 // Checks if \p FormatTok is a line comment that continues the line comment
3191 // section on \p Line.
3192 static bool
3193 continuesLineCommentSection(const FormatToken &FormatTok,
3194                             const UnwrappedLine &Line,
3195                             const llvm::Regex &CommentPragmasRegex) {
3196   if (Line.Tokens.empty())
3197     return false;
3198 
3199   StringRef IndentContent = FormatTok.TokenText;
3200   if (FormatTok.TokenText.startswith("//") ||
3201       FormatTok.TokenText.startswith("/*"))
3202     IndentContent = FormatTok.TokenText.substr(2);
3203   if (CommentPragmasRegex.match(IndentContent))
3204     return false;
3205 
3206   // If Line starts with a line comment, then FormatTok continues the comment
3207   // section if its original column is greater or equal to the original start
3208   // column of the line.
3209   //
3210   // Define the min column token of a line as follows: if a line ends in '{' or
3211   // contains a '{' followed by a line comment, then the min column token is
3212   // that '{'. Otherwise, the min column token of the line is the first token of
3213   // the line.
3214   //
3215   // If Line starts with a token other than a line comment, then FormatTok
3216   // continues the comment section if its original column is greater than the
3217   // original start column of the min column token of the line.
3218   //
3219   // For example, the second line comment continues the first in these cases:
3220   //
3221   // // first line
3222   // // second line
3223   //
3224   // and:
3225   //
3226   // // first line
3227   //  // second line
3228   //
3229   // and:
3230   //
3231   // int i; // first line
3232   //  // second line
3233   //
3234   // and:
3235   //
3236   // do { // first line
3237   //      // second line
3238   //   int i;
3239   // } while (true);
3240   //
3241   // and:
3242   //
3243   // enum {
3244   //   a, // first line
3245   //    // second line
3246   //   b
3247   // };
3248   //
3249   // The second line comment doesn't continue the first in these cases:
3250   //
3251   //   // first line
3252   //  // second line
3253   //
3254   // and:
3255   //
3256   // int i; // first line
3257   // // second line
3258   //
3259   // and:
3260   //
3261   // do { // first line
3262   //   // second line
3263   //   int i;
3264   // } while (true);
3265   //
3266   // and:
3267   //
3268   // enum {
3269   //   a, // first line
3270   //   // second line
3271   // };
3272   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3273 
3274   // Scan for '{//'. If found, use the column of '{' as a min column for line
3275   // comment section continuation.
3276   const FormatToken *PreviousToken = nullptr;
3277   for (const UnwrappedLineNode &Node : Line.Tokens) {
3278     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3279         isLineComment(*Node.Tok)) {
3280       MinColumnToken = PreviousToken;
3281       break;
3282     }
3283     PreviousToken = Node.Tok;
3284 
3285     // Grab the last newline preceding a token in this unwrapped line.
3286     if (Node.Tok->NewlinesBefore > 0) {
3287       MinColumnToken = Node.Tok;
3288     }
3289   }
3290   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3291     MinColumnToken = PreviousToken;
3292   }
3293 
3294   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3295                               MinColumnToken);
3296 }
3297 
3298 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3299   bool JustComments = Line->Tokens.empty();
3300   for (SmallVectorImpl<FormatToken *>::const_iterator
3301            I = CommentsBeforeNextToken.begin(),
3302            E = CommentsBeforeNextToken.end();
3303        I != E; ++I) {
3304     // Line comments that belong to the same line comment section are put on the
3305     // same line since later we might want to reflow content between them.
3306     // Additional fine-grained breaking of line comment sections is controlled
3307     // by the class BreakableLineCommentSection in case it is desirable to keep
3308     // several line comment sections in the same unwrapped line.
3309     //
3310     // FIXME: Consider putting separate line comment sections as children to the
3311     // unwrapped line instead.
3312     (*I)->ContinuesLineCommentSection =
3313         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3314     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3315       addUnwrappedLine();
3316     pushToken(*I);
3317   }
3318   if (NewlineBeforeNext && JustComments)
3319     addUnwrappedLine();
3320   CommentsBeforeNextToken.clear();
3321 }
3322 
3323 void UnwrappedLineParser::nextToken(int LevelDifference) {
3324   if (eof())
3325     return;
3326   flushComments(isOnNewLine(*FormatTok));
3327   pushToken(FormatTok);
3328   FormatToken *Previous = FormatTok;
3329   if (Style.Language != FormatStyle::LK_JavaScript)
3330     readToken(LevelDifference);
3331   else
3332     readTokenWithJavaScriptASI();
3333   FormatTok->Previous = Previous;
3334 }
3335 
3336 void UnwrappedLineParser::distributeComments(
3337     const SmallVectorImpl<FormatToken *> &Comments,
3338     const FormatToken *NextTok) {
3339   // Whether or not a line comment token continues a line is controlled by
3340   // the method continuesLineCommentSection, with the following caveat:
3341   //
3342   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3343   // that each comment line from the trail is aligned with the next token, if
3344   // the next token exists. If a trail exists, the beginning of the maximal
3345   // trail is marked as a start of a new comment section.
3346   //
3347   // For example in this code:
3348   //
3349   // int a; // line about a
3350   //   // line 1 about b
3351   //   // line 2 about b
3352   //   int b;
3353   //
3354   // the two lines about b form a maximal trail, so there are two sections, the
3355   // first one consisting of the single comment "// line about a" and the
3356   // second one consisting of the next two comments.
3357   if (Comments.empty())
3358     return;
3359   bool ShouldPushCommentsInCurrentLine = true;
3360   bool HasTrailAlignedWithNextToken = false;
3361   unsigned StartOfTrailAlignedWithNextToken = 0;
3362   if (NextTok) {
3363     // We are skipping the first element intentionally.
3364     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3365       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3366         HasTrailAlignedWithNextToken = true;
3367         StartOfTrailAlignedWithNextToken = i;
3368       }
3369     }
3370   }
3371   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3372     FormatToken *FormatTok = Comments[i];
3373     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3374       FormatTok->ContinuesLineCommentSection = false;
3375     } else {
3376       FormatTok->ContinuesLineCommentSection =
3377           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3378     }
3379     if (!FormatTok->ContinuesLineCommentSection &&
3380         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3381       ShouldPushCommentsInCurrentLine = false;
3382     }
3383     if (ShouldPushCommentsInCurrentLine) {
3384       pushToken(FormatTok);
3385     } else {
3386       CommentsBeforeNextToken.push_back(FormatTok);
3387     }
3388   }
3389 }
3390 
3391 void UnwrappedLineParser::readToken(int LevelDifference) {
3392   SmallVector<FormatToken *, 1> Comments;
3393   do {
3394     FormatTok = Tokens->getNextToken();
3395     assert(FormatTok);
3396     while (FormatTok->getType() == TT_ConflictStart ||
3397            FormatTok->getType() == TT_ConflictEnd ||
3398            FormatTok->getType() == TT_ConflictAlternative) {
3399       if (FormatTok->getType() == TT_ConflictStart) {
3400         conditionalCompilationStart(/*Unreachable=*/false);
3401       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3402         conditionalCompilationAlternative();
3403       } else if (FormatTok->getType() == TT_ConflictEnd) {
3404         conditionalCompilationEnd();
3405       }
3406       FormatTok = Tokens->getNextToken();
3407       FormatTok->MustBreakBefore = true;
3408     }
3409 
3410     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3411            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3412       distributeComments(Comments, FormatTok);
3413       Comments.clear();
3414       // If there is an unfinished unwrapped line, we flush the preprocessor
3415       // directives only after that unwrapped line was finished later.
3416       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3417       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3418       assert((LevelDifference >= 0 ||
3419               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3420              "LevelDifference makes Line->Level negative");
3421       Line->Level += LevelDifference;
3422       // Comments stored before the preprocessor directive need to be output
3423       // before the preprocessor directive, at the same level as the
3424       // preprocessor directive, as we consider them to apply to the directive.
3425       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3426           PPBranchLevel > 0)
3427         Line->Level += PPBranchLevel;
3428       flushComments(isOnNewLine(*FormatTok));
3429       parsePPDirective();
3430     }
3431 
3432     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3433         !Line->InPPDirective) {
3434       continue;
3435     }
3436 
3437     if (!FormatTok->Tok.is(tok::comment)) {
3438       distributeComments(Comments, FormatTok);
3439       Comments.clear();
3440       return;
3441     }
3442 
3443     Comments.push_back(FormatTok);
3444   } while (!eof());
3445 
3446   distributeComments(Comments, nullptr);
3447   Comments.clear();
3448 }
3449 
3450 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3451   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3452   if (MustBreakBeforeNextToken) {
3453     Line->Tokens.back().Tok->MustBreakBefore = true;
3454     MustBreakBeforeNextToken = false;
3455   }
3456 }
3457 
3458 } // end namespace format
3459 } // end namespace clang
3460