1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31 
32   // Returns the next token in the token stream.
33   virtual FormatToken *getNextToken() = 0;
34 
35   // Returns the token precedint the token returned by the last call to
36   // getNextToken() in the token stream, or nullptr if no such token exists.
37   virtual FormatToken *getPreviousToken() = 0;
38 
39   // Returns the token that would be returned by the next call to
40   // getNextToken().
41   virtual FormatToken *peekNextToken() = 0;
42 
43   // Returns whether we are at the end of the file.
44   // This can be different from whether getNextToken() returned an eof token
45   // when the FormatTokenSource is a view on a part of the token stream.
46   virtual bool isEOF() = 0;
47 
48   // Gets the current position in the token stream, to be used by setPosition().
49   virtual unsigned getPosition() = 0;
50 
51   // Resets the token stream to the state it was in when getPosition() returned
52   // Position, and return the token at that position in the stream.
53   virtual FormatToken *setPosition(unsigned Position) = 0;
54 };
55 
56 namespace {
57 
58 class ScopedDeclarationState {
59 public:
60   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
61                          bool MustBeDeclaration)
62       : Line(Line), Stack(Stack) {
63     Line.MustBeDeclaration = MustBeDeclaration;
64     Stack.push_back(MustBeDeclaration);
65   }
66   ~ScopedDeclarationState() {
67     Stack.pop_back();
68     if (!Stack.empty())
69       Line.MustBeDeclaration = Stack.back();
70     else
71       Line.MustBeDeclaration = true;
72   }
73 
74 private:
75   UnwrappedLine &Line;
76   std::vector<bool> &Stack;
77 };
78 
79 static bool isLineComment(const FormatToken &FormatTok) {
80   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
81 }
82 
83 // Checks if \p FormatTok is a line comment that continues the line comment
84 // \p Previous. The original column of \p MinColumnToken is used to determine
85 // whether \p FormatTok is indented enough to the right to continue \p Previous.
86 static bool continuesLineComment(const FormatToken &FormatTok,
87                                  const FormatToken *Previous,
88                                  const FormatToken *MinColumnToken) {
89   if (!Previous || !MinColumnToken)
90     return false;
91   unsigned MinContinueColumn =
92       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
93   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
94          isLineComment(*Previous) &&
95          FormatTok.OriginalColumn >= MinContinueColumn;
96 }
97 
98 class ScopedMacroState : public FormatTokenSource {
99 public:
100   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
101                    FormatToken *&ResetToken)
102       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
103         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
104         Token(nullptr), PreviousToken(nullptr) {
105     FakeEOF.Tok.startToken();
106     FakeEOF.Tok.setKind(tok::eof);
107     TokenSource = this;
108     Line.Level = 0;
109     Line.InPPDirective = true;
110   }
111 
112   ~ScopedMacroState() override {
113     TokenSource = PreviousTokenSource;
114     ResetToken = Token;
115     Line.InPPDirective = false;
116     Line.Level = PreviousLineLevel;
117   }
118 
119   FormatToken *getNextToken() override {
120     // The \c UnwrappedLineParser guards against this by never calling
121     // \c getNextToken() after it has encountered the first eof token.
122     assert(!eof());
123     PreviousToken = Token;
124     Token = PreviousTokenSource->getNextToken();
125     if (eof())
126       return &FakeEOF;
127     return Token;
128   }
129 
130   FormatToken *getPreviousToken() override {
131     return PreviousTokenSource->getPreviousToken();
132   }
133 
134   FormatToken *peekNextToken() override {
135     if (eof())
136       return &FakeEOF;
137     return PreviousTokenSource->peekNextToken();
138   }
139 
140   bool isEOF() override { return PreviousTokenSource->isEOF(); }
141 
142   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
143 
144   FormatToken *setPosition(unsigned Position) override {
145     PreviousToken = nullptr;
146     Token = PreviousTokenSource->setPosition(Position);
147     return Token;
148   }
149 
150 private:
151   bool eof() {
152     return Token && Token->HasUnescapedNewline &&
153            !continuesLineComment(*Token, PreviousToken,
154                                  /*MinColumnToken=*/PreviousToken);
155   }
156 
157   FormatToken FakeEOF;
158   UnwrappedLine &Line;
159   FormatTokenSource *&TokenSource;
160   FormatToken *&ResetToken;
161   unsigned PreviousLineLevel;
162   FormatTokenSource *PreviousTokenSource;
163 
164   FormatToken *Token;
165   FormatToken *PreviousToken;
166 };
167 
168 } // end anonymous namespace
169 
170 class ScopedLineState {
171 public:
172   ScopedLineState(UnwrappedLineParser &Parser,
173                   bool SwitchToPreprocessorLines = false)
174       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
175     if (SwitchToPreprocessorLines)
176       Parser.CurrentLines = &Parser.PreprocessorDirectives;
177     else if (!Parser.Line->Tokens.empty())
178       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
179     PreBlockLine = std::move(Parser.Line);
180     Parser.Line = std::make_unique<UnwrappedLine>();
181     Parser.Line->Level = PreBlockLine->Level;
182     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
183   }
184 
185   ~ScopedLineState() {
186     if (!Parser.Line->Tokens.empty()) {
187       Parser.addUnwrappedLine();
188     }
189     assert(Parser.Line->Tokens.empty());
190     Parser.Line = std::move(PreBlockLine);
191     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
192       Parser.MustBreakBeforeNextToken = true;
193     Parser.CurrentLines = OriginalLines;
194   }
195 
196 private:
197   UnwrappedLineParser &Parser;
198 
199   std::unique_ptr<UnwrappedLine> PreBlockLine;
200   SmallVectorImpl<UnwrappedLine> *OriginalLines;
201 };
202 
203 class CompoundStatementIndenter {
204 public:
205   CompoundStatementIndenter(UnwrappedLineParser *Parser,
206                             const FormatStyle &Style, unsigned &LineLevel)
207       : CompoundStatementIndenter(Parser, LineLevel,
208                                   Style.BraceWrapping.AfterControlStatement,
209                                   Style.BraceWrapping.IndentBraces) {}
210   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
211                             bool WrapBrace, bool IndentBrace)
212       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
213     if (WrapBrace)
214       Parser->addUnwrappedLine();
215     if (IndentBrace)
216       ++LineLevel;
217   }
218   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
219 
220 private:
221   unsigned &LineLevel;
222   unsigned OldLineLevel;
223 };
224 
225 namespace {
226 
227 class IndexedTokenSource : public FormatTokenSource {
228 public:
229   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
230       : Tokens(Tokens), Position(-1) {}
231 
232   FormatToken *getNextToken() override {
233     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
234       LLVM_DEBUG({
235         llvm::dbgs() << "Next ";
236         dbgToken(Position);
237       });
238       return Tokens[Position];
239     }
240     ++Position;
241     LLVM_DEBUG({
242       llvm::dbgs() << "Next ";
243       dbgToken(Position);
244     });
245     return Tokens[Position];
246   }
247 
248   FormatToken *getPreviousToken() override {
249     assert(Position > 0);
250     return Tokens[Position - 1];
251   }
252 
253   FormatToken *peekNextToken() override {
254     int Next = Position + 1;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Peeking ";
257       dbgToken(Next);
258     });
259     return Tokens[Next];
260   }
261 
262   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
263 
264   unsigned getPosition() override {
265     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
266     assert(Position >= 0);
267     return Position;
268   }
269 
270   FormatToken *setPosition(unsigned P) override {
271     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
272     Position = P;
273     return Tokens[Position];
274   }
275 
276   void reset() { Position = -1; }
277 
278 private:
279   void dbgToken(int Position, llvm::StringRef Indent = "") {
280     FormatToken *Tok = Tokens[Position];
281     llvm::dbgs() << Indent << "[" << Position
282                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
283                  << ", Macro: " << !!Tok->MacroCtx << "\n";
284   }
285 
286   ArrayRef<FormatToken *> Tokens;
287   int Position;
288 };
289 
290 } // end anonymous namespace
291 
292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
293                                          const AdditionalKeywords &Keywords,
294                                          unsigned FirstStartColumn,
295                                          ArrayRef<FormatToken *> Tokens,
296                                          UnwrappedLineConsumer &Callback)
297     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
298       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
299       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
300       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
301       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
302                        ? IG_Rejected
303                        : IG_Inited),
304       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
305 
306 void UnwrappedLineParser::reset() {
307   PPBranchLevel = -1;
308   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
309                      ? IG_Rejected
310                      : IG_Inited;
311   IncludeGuardToken = nullptr;
312   Line.reset(new UnwrappedLine);
313   CommentsBeforeNextToken.clear();
314   FormatTok = nullptr;
315   MustBreakBeforeNextToken = false;
316   PreprocessorDirectives.clear();
317   CurrentLines = &Lines;
318   DeclarationScopeStack.clear();
319   PPStack.clear();
320   Line->FirstStartColumn = FirstStartColumn;
321 }
322 
323 void UnwrappedLineParser::parse() {
324   IndexedTokenSource TokenSource(AllTokens);
325   Line->FirstStartColumn = FirstStartColumn;
326   do {
327     LLVM_DEBUG(llvm::dbgs() << "----\n");
328     reset();
329     Tokens = &TokenSource;
330     TokenSource.reset();
331 
332     readToken();
333     parseFile();
334 
335     // If we found an include guard then all preprocessor directives (other than
336     // the guard) are over-indented by one.
337     if (IncludeGuard == IG_Found)
338       for (auto &Line : Lines)
339         if (Line.InPPDirective && Line.Level > 0)
340           --Line.Level;
341 
342     // Create line with eof token.
343     pushToken(FormatTok);
344     addUnwrappedLine();
345 
346     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
347                                                   E = Lines.end();
348          I != E; ++I) {
349       Callback.consumeUnwrappedLine(*I);
350     }
351     Callback.finishRun();
352     Lines.clear();
353     while (!PPLevelBranchIndex.empty() &&
354            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
355       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
356       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
357     }
358     if (!PPLevelBranchIndex.empty()) {
359       ++PPLevelBranchIndex.back();
360       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
361       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
362     }
363   } while (!PPLevelBranchIndex.empty());
364 }
365 
366 void UnwrappedLineParser::parseFile() {
367   // The top-level context in a file always has declarations, except for pre-
368   // processor directives and JavaScript files.
369   bool MustBeDeclaration =
370       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
371   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
372                                           MustBeDeclaration);
373   if (Style.Language == FormatStyle::LK_TextProto)
374     parseBracedList();
375   else
376     parseLevel(/*HasOpeningBrace=*/false);
377   // Make sure to format the remaining tokens.
378   //
379   // LK_TextProto is special since its top-level is parsed as the body of a
380   // braced list, which does not necessarily have natural line separators such
381   // as a semicolon. Comments after the last entry that have been determined to
382   // not belong to that line, as in:
383   //   key: value
384   //   // endfile comment
385   // do not have a chance to be put on a line of their own until this point.
386   // Here we add this newline before end-of-file comments.
387   if (Style.Language == FormatStyle::LK_TextProto &&
388       !CommentsBeforeNextToken.empty())
389     addUnwrappedLine();
390   flushComments(true);
391   addUnwrappedLine();
392 }
393 
394 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
395   do {
396     switch (FormatTok->Tok.getKind()) {
397     case tok::l_brace:
398       return;
399     default:
400       if (FormatTok->is(Keywords.kw_where)) {
401         addUnwrappedLine();
402         nextToken();
403         parseCSharpGenericTypeConstraint();
404         break;
405       }
406       nextToken();
407       break;
408     }
409   } while (!eof());
410 }
411 
412 void UnwrappedLineParser::parseCSharpAttribute() {
413   int UnpairedSquareBrackets = 1;
414   do {
415     switch (FormatTok->Tok.getKind()) {
416     case tok::r_square:
417       nextToken();
418       --UnpairedSquareBrackets;
419       if (UnpairedSquareBrackets == 0) {
420         addUnwrappedLine();
421         return;
422       }
423       break;
424     case tok::l_square:
425       ++UnpairedSquareBrackets;
426       nextToken();
427       break;
428     default:
429       nextToken();
430       break;
431     }
432   } while (!eof());
433 }
434 
435 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
436   bool SwitchLabelEncountered = false;
437   do {
438     tok::TokenKind kind = FormatTok->Tok.getKind();
439     if (FormatTok->getType() == TT_MacroBlockBegin) {
440       kind = tok::l_brace;
441     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
442       kind = tok::r_brace;
443     }
444 
445     switch (kind) {
446     case tok::comment:
447       nextToken();
448       addUnwrappedLine();
449       break;
450     case tok::l_brace:
451       // FIXME: Add parameter whether this can happen - if this happens, we must
452       // be in a non-declaration context.
453       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
454         continue;
455       parseBlock();
456       addUnwrappedLine();
457       break;
458     case tok::r_brace:
459       if (HasOpeningBrace)
460         return;
461       nextToken();
462       addUnwrappedLine();
463       break;
464     case tok::kw_default: {
465       unsigned StoredPosition = Tokens->getPosition();
466       FormatToken *Next;
467       do {
468         Next = Tokens->getNextToken();
469       } while (Next->is(tok::comment));
470       FormatTok = Tokens->setPosition(StoredPosition);
471       if (Next && Next->isNot(tok::colon)) {
472         // default not followed by ':' is not a case label; treat it like
473         // an identifier.
474         parseStructuralElement();
475         break;
476       }
477       // Else, if it is 'default:', fall through to the case handling.
478       LLVM_FALLTHROUGH;
479     }
480     case tok::kw_case:
481       if (Style.Language == FormatStyle::LK_JavaScript &&
482           Line->MustBeDeclaration) {
483         // A 'case: string' style field declaration.
484         parseStructuralElement();
485         break;
486       }
487       if (!SwitchLabelEncountered &&
488           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
489         ++Line->Level;
490       SwitchLabelEncountered = true;
491       parseStructuralElement();
492       break;
493     case tok::l_square:
494       if (Style.isCSharp()) {
495         nextToken();
496         parseCSharpAttribute();
497         break;
498       }
499       LLVM_FALLTHROUGH;
500     default:
501       parseStructuralElement(!HasOpeningBrace);
502       break;
503     }
504   } while (!eof());
505 }
506 
507 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
508   // We'll parse forward through the tokens until we hit
509   // a closing brace or eof - note that getNextToken() will
510   // parse macros, so this will magically work inside macro
511   // definitions, too.
512   unsigned StoredPosition = Tokens->getPosition();
513   FormatToken *Tok = FormatTok;
514   const FormatToken *PrevTok = Tok->Previous;
515   // Keep a stack of positions of lbrace tokens. We will
516   // update information about whether an lbrace starts a
517   // braced init list or a different block during the loop.
518   SmallVector<FormatToken *, 8> LBraceStack;
519   assert(Tok->Tok.is(tok::l_brace));
520   do {
521     // Get next non-comment token.
522     FormatToken *NextTok;
523     unsigned ReadTokens = 0;
524     do {
525       NextTok = Tokens->getNextToken();
526       ++ReadTokens;
527     } while (NextTok->is(tok::comment));
528 
529     switch (Tok->Tok.getKind()) {
530     case tok::l_brace:
531       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
532         if (PrevTok->isOneOf(tok::colon, tok::less))
533           // A ':' indicates this code is in a type, or a braced list
534           // following a label in an object literal ({a: {b: 1}}).
535           // A '<' could be an object used in a comparison, but that is nonsense
536           // code (can never return true), so more likely it is a generic type
537           // argument (`X<{a: string; b: number}>`).
538           // The code below could be confused by semicolons between the
539           // individual members in a type member list, which would normally
540           // trigger BK_Block. In both cases, this must be parsed as an inline
541           // braced init.
542           Tok->setBlockKind(BK_BracedInit);
543         else if (PrevTok->is(tok::r_paren))
544           // `) { }` can only occur in function or method declarations in JS.
545           Tok->setBlockKind(BK_Block);
546       } else {
547         Tok->setBlockKind(BK_Unknown);
548       }
549       LBraceStack.push_back(Tok);
550       break;
551     case tok::r_brace:
552       if (LBraceStack.empty())
553         break;
554       if (LBraceStack.back()->is(BK_Unknown)) {
555         bool ProbablyBracedList = false;
556         if (Style.Language == FormatStyle::LK_Proto) {
557           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
558         } else {
559           // Skip NextTok over preprocessor lines, otherwise we may not
560           // properly diagnose the block as a braced intializer
561           // if the comma separator appears after the pp directive.
562           while (NextTok->is(tok::hash)) {
563             ScopedMacroState MacroState(*Line, Tokens, NextTok);
564             do {
565               NextTok = Tokens->getNextToken();
566               ++ReadTokens;
567             } while (NextTok->isNot(tok::eof));
568           }
569 
570           // Using OriginalColumn to distinguish between ObjC methods and
571           // binary operators is a bit hacky.
572           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
573                                   NextTok->OriginalColumn == 0;
574 
575           // If there is a comma, semicolon or right paren after the closing
576           // brace, we assume this is a braced initializer list.  Note that
577           // regardless how we mark inner braces here, we will overwrite the
578           // BlockKind later if we parse a braced list (where all blocks
579           // inside are by default braced lists), or when we explicitly detect
580           // blocks (for example while parsing lambdas).
581           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
582           // braced list in JS.
583           ProbablyBracedList =
584               (Style.Language == FormatStyle::LK_JavaScript &&
585                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
586                                 Keywords.kw_as)) ||
587               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
588               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
589                                tok::r_paren, tok::r_square, tok::l_brace,
590                                tok::ellipsis) ||
591               (NextTok->is(tok::identifier) &&
592                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
593               (NextTok->is(tok::semi) &&
594                (!ExpectClassBody || LBraceStack.size() != 1)) ||
595               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
596           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
597             // We can have an array subscript after a braced init
598             // list, but C++11 attributes are expected after blocks.
599             NextTok = Tokens->getNextToken();
600             ++ReadTokens;
601             ProbablyBracedList = NextTok->isNot(tok::l_square);
602           }
603         }
604         if (ProbablyBracedList) {
605           Tok->setBlockKind(BK_BracedInit);
606           LBraceStack.back()->setBlockKind(BK_BracedInit);
607         } else {
608           Tok->setBlockKind(BK_Block);
609           LBraceStack.back()->setBlockKind(BK_Block);
610         }
611       }
612       LBraceStack.pop_back();
613       break;
614     case tok::identifier:
615       if (!Tok->is(TT_StatementMacro))
616         break;
617       LLVM_FALLTHROUGH;
618     case tok::at:
619     case tok::semi:
620     case tok::kw_if:
621     case tok::kw_while:
622     case tok::kw_for:
623     case tok::kw_switch:
624     case tok::kw_try:
625     case tok::kw___try:
626       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
627         LBraceStack.back()->setBlockKind(BK_Block);
628       break;
629     default:
630       break;
631     }
632     PrevTok = Tok;
633     Tok = NextTok;
634   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
635 
636   // Assume other blocks for all unclosed opening braces.
637   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
638     if (LBraceStack[i]->is(BK_Unknown))
639       LBraceStack[i]->setBlockKind(BK_Block);
640   }
641 
642   FormatTok = Tokens->setPosition(StoredPosition);
643 }
644 
645 template <class T>
646 static inline void hash_combine(std::size_t &seed, const T &v) {
647   std::hash<T> hasher;
648   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
649 }
650 
651 size_t UnwrappedLineParser::computePPHash() const {
652   size_t h = 0;
653   for (const auto &i : PPStack) {
654     hash_combine(h, size_t(i.Kind));
655     hash_combine(h, i.Line);
656   }
657   return h;
658 }
659 
660 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
661                                      bool MunchSemi,
662                                      bool UnindentWhitesmithsBraces) {
663   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
664          "'{' or macro block token expected");
665   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
666   FormatTok->setBlockKind(BK_Block);
667 
668   // For Whitesmiths mode, jump to the next level prior to skipping over the
669   // braces.
670   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
671     ++Line->Level;
672 
673   size_t PPStartHash = computePPHash();
674 
675   unsigned InitialLevel = Line->Level;
676   nextToken(/*LevelDifference=*/AddLevels);
677 
678   if (MacroBlock && FormatTok->is(tok::l_paren))
679     parseParens();
680 
681   size_t NbPreprocessorDirectives =
682       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
683   addUnwrappedLine();
684   size_t OpeningLineIndex =
685       CurrentLines->empty()
686           ? (UnwrappedLine::kInvalidIndex)
687           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
688 
689   // Whitesmiths is weird here. The brace needs to be indented for the namespace
690   // block, but the block itself may not be indented depending on the style
691   // settings. This allows the format to back up one level in those cases.
692   if (UnindentWhitesmithsBraces)
693     --Line->Level;
694 
695   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
696                                           MustBeDeclaration);
697   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
698     Line->Level += AddLevels;
699   parseLevel(/*HasOpeningBrace=*/true);
700 
701   if (eof())
702     return;
703 
704   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
705                  : !FormatTok->is(tok::r_brace)) {
706     Line->Level = InitialLevel;
707     FormatTok->setBlockKind(BK_Block);
708     return;
709   }
710 
711   size_t PPEndHash = computePPHash();
712 
713   // Munch the closing brace.
714   nextToken(/*LevelDifference=*/-AddLevels);
715 
716   if (MacroBlock && FormatTok->is(tok::l_paren))
717     parseParens();
718 
719   if (FormatTok->is(tok::arrow)) {
720     // Following the } we can find a trailing return type arrow
721     // as part of an implicit conversion constraint.
722     nextToken();
723     parseStructuralElement();
724   }
725 
726   if (MunchSemi && FormatTok->Tok.is(tok::semi))
727     nextToken();
728 
729   Line->Level = InitialLevel;
730 
731   if (PPStartHash == PPEndHash) {
732     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
733     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
734       // Update the opening line to add the forward reference as well
735       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
736           CurrentLines->size() - 1;
737     }
738   }
739 }
740 
741 static bool isGoogScope(const UnwrappedLine &Line) {
742   // FIXME: Closure-library specific stuff should not be hard-coded but be
743   // configurable.
744   if (Line.Tokens.size() < 4)
745     return false;
746   auto I = Line.Tokens.begin();
747   if (I->Tok->TokenText != "goog")
748     return false;
749   ++I;
750   if (I->Tok->isNot(tok::period))
751     return false;
752   ++I;
753   if (I->Tok->TokenText != "scope")
754     return false;
755   ++I;
756   return I->Tok->is(tok::l_paren);
757 }
758 
759 static bool isIIFE(const UnwrappedLine &Line,
760                    const AdditionalKeywords &Keywords) {
761   // Look for the start of an immediately invoked anonymous function.
762   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
763   // This is commonly done in JavaScript to create a new, anonymous scope.
764   // Example: (function() { ... })()
765   if (Line.Tokens.size() < 3)
766     return false;
767   auto I = Line.Tokens.begin();
768   if (I->Tok->isNot(tok::l_paren))
769     return false;
770   ++I;
771   if (I->Tok->isNot(Keywords.kw_function))
772     return false;
773   ++I;
774   return I->Tok->is(tok::l_paren);
775 }
776 
777 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
778                                    const FormatToken &InitialToken) {
779   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
780     return Style.BraceWrapping.AfterNamespace;
781   if (InitialToken.is(tok::kw_class))
782     return Style.BraceWrapping.AfterClass;
783   if (InitialToken.is(tok::kw_union))
784     return Style.BraceWrapping.AfterUnion;
785   if (InitialToken.is(tok::kw_struct))
786     return Style.BraceWrapping.AfterStruct;
787   return false;
788 }
789 
790 void UnwrappedLineParser::parseChildBlock() {
791   FormatTok->setBlockKind(BK_Block);
792   nextToken();
793   {
794     bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
795                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
796     ScopedLineState LineState(*this);
797     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
798                                             /*MustBeDeclaration=*/false);
799     Line->Level += SkipIndent ? 0 : 1;
800     parseLevel(/*HasOpeningBrace=*/true);
801     flushComments(isOnNewLine(*FormatTok));
802     Line->Level -= SkipIndent ? 0 : 1;
803   }
804   nextToken();
805 }
806 
807 void UnwrappedLineParser::parsePPDirective() {
808   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
809   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
810 
811   nextToken();
812 
813   if (!FormatTok->Tok.getIdentifierInfo()) {
814     parsePPUnknown();
815     return;
816   }
817 
818   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
819   case tok::pp_define:
820     parsePPDefine();
821     return;
822   case tok::pp_if:
823     parsePPIf(/*IfDef=*/false);
824     break;
825   case tok::pp_ifdef:
826   case tok::pp_ifndef:
827     parsePPIf(/*IfDef=*/true);
828     break;
829   case tok::pp_else:
830     parsePPElse();
831     break;
832   case tok::pp_elifdef:
833   case tok::pp_elifndef:
834   case tok::pp_elif:
835     parsePPElIf();
836     break;
837   case tok::pp_endif:
838     parsePPEndIf();
839     break;
840   default:
841     parsePPUnknown();
842     break;
843   }
844 }
845 
846 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
847   size_t Line = CurrentLines->size();
848   if (CurrentLines == &PreprocessorDirectives)
849     Line += Lines.size();
850 
851   if (Unreachable ||
852       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
853     PPStack.push_back({PP_Unreachable, Line});
854   else
855     PPStack.push_back({PP_Conditional, Line});
856 }
857 
858 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
859   ++PPBranchLevel;
860   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
861   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
862     PPLevelBranchIndex.push_back(0);
863     PPLevelBranchCount.push_back(0);
864   }
865   PPChainBranchIndex.push(0);
866   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
867   conditionalCompilationCondition(Unreachable || Skip);
868 }
869 
870 void UnwrappedLineParser::conditionalCompilationAlternative() {
871   if (!PPStack.empty())
872     PPStack.pop_back();
873   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
874   if (!PPChainBranchIndex.empty())
875     ++PPChainBranchIndex.top();
876   conditionalCompilationCondition(
877       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
878       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
879 }
880 
881 void UnwrappedLineParser::conditionalCompilationEnd() {
882   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
883   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
884     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
885       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
886     }
887   }
888   // Guard against #endif's without #if.
889   if (PPBranchLevel > -1)
890     --PPBranchLevel;
891   if (!PPChainBranchIndex.empty())
892     PPChainBranchIndex.pop();
893   if (!PPStack.empty())
894     PPStack.pop_back();
895 }
896 
897 void UnwrappedLineParser::parsePPIf(bool IfDef) {
898   bool IfNDef = FormatTok->is(tok::pp_ifndef);
899   nextToken();
900   bool Unreachable = false;
901   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
902     Unreachable = true;
903   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
904     Unreachable = true;
905   conditionalCompilationStart(Unreachable);
906   FormatToken *IfCondition = FormatTok;
907   // If there's a #ifndef on the first line, and the only lines before it are
908   // comments, it could be an include guard.
909   bool MaybeIncludeGuard = IfNDef;
910   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
911     for (auto &Line : Lines) {
912       if (!Line.Tokens.front().Tok->is(tok::comment)) {
913         MaybeIncludeGuard = false;
914         IncludeGuard = IG_Rejected;
915         break;
916       }
917     }
918   --PPBranchLevel;
919   parsePPUnknown();
920   ++PPBranchLevel;
921   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
922     IncludeGuard = IG_IfNdefed;
923     IncludeGuardToken = IfCondition;
924   }
925 }
926 
927 void UnwrappedLineParser::parsePPElse() {
928   // If a potential include guard has an #else, it's not an include guard.
929   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
930     IncludeGuard = IG_Rejected;
931   conditionalCompilationAlternative();
932   if (PPBranchLevel > -1)
933     --PPBranchLevel;
934   parsePPUnknown();
935   ++PPBranchLevel;
936 }
937 
938 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
939 
940 void UnwrappedLineParser::parsePPEndIf() {
941   conditionalCompilationEnd();
942   parsePPUnknown();
943   // If the #endif of a potential include guard is the last thing in the file,
944   // then we found an include guard.
945   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
946       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
947     IncludeGuard = IG_Found;
948 }
949 
950 void UnwrappedLineParser::parsePPDefine() {
951   nextToken();
952 
953   if (!FormatTok->Tok.getIdentifierInfo()) {
954     IncludeGuard = IG_Rejected;
955     IncludeGuardToken = nullptr;
956     parsePPUnknown();
957     return;
958   }
959 
960   if (IncludeGuard == IG_IfNdefed &&
961       IncludeGuardToken->TokenText == FormatTok->TokenText) {
962     IncludeGuard = IG_Defined;
963     IncludeGuardToken = nullptr;
964     for (auto &Line : Lines) {
965       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
966         IncludeGuard = IG_Rejected;
967         break;
968       }
969     }
970   }
971 
972   nextToken();
973   if (FormatTok->Tok.getKind() == tok::l_paren &&
974       FormatTok->WhitespaceRange.getBegin() ==
975           FormatTok->WhitespaceRange.getEnd()) {
976     parseParens();
977   }
978   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
979     Line->Level += PPBranchLevel + 1;
980   addUnwrappedLine();
981   ++Line->Level;
982 
983   // Errors during a preprocessor directive can only affect the layout of the
984   // preprocessor directive, and thus we ignore them. An alternative approach
985   // would be to use the same approach we use on the file level (no
986   // re-indentation if there was a structural error) within the macro
987   // definition.
988   parseFile();
989 }
990 
991 void UnwrappedLineParser::parsePPUnknown() {
992   do {
993     nextToken();
994   } while (!eof());
995   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
996     Line->Level += PPBranchLevel + 1;
997   addUnwrappedLine();
998 }
999 
1000 // Here we exclude certain tokens that are not usually the first token in an
1001 // unwrapped line. This is used in attempt to distinguish macro calls without
1002 // trailing semicolons from other constructs split to several lines.
1003 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1004   // Semicolon can be a null-statement, l_square can be a start of a macro or
1005   // a C++11 attribute, but this doesn't seem to be common.
1006   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1007          Tok.isNot(TT_AttributeSquare) &&
1008          // Tokens that can only be used as binary operators and a part of
1009          // overloaded operator names.
1010          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1011          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1012          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1013          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1014          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1015          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1016          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1017          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1018          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1019          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1020          Tok.isNot(tok::lesslessequal) &&
1021          // Colon is used in labels, base class lists, initializer lists,
1022          // range-based for loops, ternary operator, but should never be the
1023          // first token in an unwrapped line.
1024          Tok.isNot(tok::colon) &&
1025          // 'noexcept' is a trailing annotation.
1026          Tok.isNot(tok::kw_noexcept);
1027 }
1028 
1029 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1030                           const FormatToken *FormatTok) {
1031   // FIXME: This returns true for C/C++ keywords like 'struct'.
1032   return FormatTok->is(tok::identifier) &&
1033          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1034           !FormatTok->isOneOf(
1035               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1036               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1037               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1038               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1039               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1040               Keywords.kw_instanceof, Keywords.kw_interface,
1041               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1042 }
1043 
1044 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1045                                  const FormatToken *FormatTok) {
1046   return FormatTok->Tok.isLiteral() ||
1047          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1048          mustBeJSIdent(Keywords, FormatTok);
1049 }
1050 
1051 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1052 // when encountered after a value (see mustBeJSIdentOrValue).
1053 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1054                            const FormatToken *FormatTok) {
1055   return FormatTok->isOneOf(
1056       tok::kw_return, Keywords.kw_yield,
1057       // conditionals
1058       tok::kw_if, tok::kw_else,
1059       // loops
1060       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1061       // switch/case
1062       tok::kw_switch, tok::kw_case,
1063       // exceptions
1064       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1065       // declaration
1066       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1067       Keywords.kw_async, Keywords.kw_function,
1068       // import/export
1069       Keywords.kw_import, tok::kw_export);
1070 }
1071 
1072 // Checks whether a token is a type in K&R C (aka C78).
1073 static bool isC78Type(const FormatToken &Tok) {
1074   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1075                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1076                      tok::identifier);
1077 }
1078 
1079 // This function checks whether a token starts the first parameter declaration
1080 // in a K&R C (aka C78) function definition, e.g.:
1081 //   int f(a, b)
1082 //   short a, b;
1083 //   {
1084 //      return a + b;
1085 //   }
1086 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1087                                const FormatToken *FuncName) {
1088   assert(Tok);
1089   assert(Next);
1090   assert(FuncName);
1091 
1092   if (FuncName->isNot(tok::identifier))
1093     return false;
1094 
1095   const FormatToken *Prev = FuncName->Previous;
1096   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1097     return false;
1098 
1099   if (!isC78Type(*Tok) &&
1100       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1101     return false;
1102 
1103   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1104     return false;
1105 
1106   Tok = Tok->Previous;
1107   if (!Tok || Tok->isNot(tok::r_paren))
1108     return false;
1109 
1110   Tok = Tok->Previous;
1111   if (!Tok || Tok->isNot(tok::identifier))
1112     return false;
1113 
1114   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1115 }
1116 
1117 void UnwrappedLineParser::parseModuleImport() {
1118   nextToken();
1119   while (!eof()) {
1120     if (FormatTok->is(tok::colon)) {
1121       FormatTok->setType(TT_ModulePartitionColon);
1122     }
1123     // Handle import <foo/bar.h> as we would an include statement.
1124     else if (FormatTok->is(tok::less)) {
1125       nextToken();
1126       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1127         // Mark tokens up to the trailing line comments as implicit string
1128         // literals.
1129         if (FormatTok->isNot(tok::comment) &&
1130             !FormatTok->TokenText.startswith("//"))
1131           FormatTok->setType(TT_ImplicitStringLiteral);
1132         nextToken();
1133       }
1134     }
1135     if (FormatTok->is(tok::semi)) {
1136       nextToken();
1137       break;
1138     }
1139     nextToken();
1140   }
1141 
1142   addUnwrappedLine();
1143   return;
1144 }
1145 
1146 // readTokenWithJavaScriptASI reads the next token and terminates the current
1147 // line if JavaScript Automatic Semicolon Insertion must
1148 // happen between the current token and the next token.
1149 //
1150 // This method is conservative - it cannot cover all edge cases of JavaScript,
1151 // but only aims to correctly handle certain well known cases. It *must not*
1152 // return true in speculative cases.
1153 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1154   FormatToken *Previous = FormatTok;
1155   readToken();
1156   FormatToken *Next = FormatTok;
1157 
1158   bool IsOnSameLine =
1159       CommentsBeforeNextToken.empty()
1160           ? Next->NewlinesBefore == 0
1161           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1162   if (IsOnSameLine)
1163     return;
1164 
1165   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1166   bool PreviousStartsTemplateExpr =
1167       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1168   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1169     // If the line contains an '@' sign, the previous token might be an
1170     // annotation, which can precede another identifier/value.
1171     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1172       return LineNode.Tok->is(tok::at);
1173     });
1174     if (HasAt)
1175       return;
1176   }
1177   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1178     return addUnwrappedLine();
1179   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1180   bool NextEndsTemplateExpr =
1181       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1182   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1183       (PreviousMustBeValue ||
1184        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1185                          tok::minusminus)))
1186     return addUnwrappedLine();
1187   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1188       isJSDeclOrStmt(Keywords, Next))
1189     return addUnwrappedLine();
1190 }
1191 
1192 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
1193   if (Style.Language == FormatStyle::LK_TableGen &&
1194       FormatTok->is(tok::pp_include)) {
1195     nextToken();
1196     if (FormatTok->is(tok::string_literal))
1197       nextToken();
1198     addUnwrappedLine();
1199     return;
1200   }
1201   switch (FormatTok->Tok.getKind()) {
1202   case tok::kw_asm:
1203     nextToken();
1204     if (FormatTok->is(tok::l_brace)) {
1205       FormatTok->setType(TT_InlineASMBrace);
1206       nextToken();
1207       while (FormatTok && FormatTok->isNot(tok::eof)) {
1208         if (FormatTok->is(tok::r_brace)) {
1209           FormatTok->setType(TT_InlineASMBrace);
1210           nextToken();
1211           addUnwrappedLine();
1212           break;
1213         }
1214         FormatTok->Finalized = true;
1215         nextToken();
1216       }
1217     }
1218     break;
1219   case tok::kw_namespace:
1220     parseNamespace();
1221     return;
1222   case tok::kw_public:
1223   case tok::kw_protected:
1224   case tok::kw_private:
1225     if (Style.Language == FormatStyle::LK_Java ||
1226         Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1227       nextToken();
1228     else
1229       parseAccessSpecifier();
1230     return;
1231   case tok::kw_if:
1232     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1233       // field/method declaration.
1234       break;
1235     parseIfThenElse();
1236     return;
1237   case tok::kw_for:
1238   case tok::kw_while:
1239     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1240       // field/method declaration.
1241       break;
1242     parseForOrWhileLoop();
1243     return;
1244   case tok::kw_do:
1245     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1246       // field/method declaration.
1247       break;
1248     parseDoWhile();
1249     return;
1250   case tok::kw_switch:
1251     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1252       // 'switch: string' field declaration.
1253       break;
1254     parseSwitch();
1255     return;
1256   case tok::kw_default:
1257     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1258       // 'default: string' field declaration.
1259       break;
1260     nextToken();
1261     if (FormatTok->is(tok::colon)) {
1262       parseLabel();
1263       return;
1264     }
1265     // e.g. "default void f() {}" in a Java interface.
1266     break;
1267   case tok::kw_case:
1268     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1269       // 'case: string' field declaration.
1270       break;
1271     parseCaseLabel();
1272     return;
1273   case tok::kw_try:
1274   case tok::kw___try:
1275     if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1276       // field/method declaration.
1277       break;
1278     parseTryCatch();
1279     return;
1280   case tok::kw_extern:
1281     nextToken();
1282     if (FormatTok->Tok.is(tok::string_literal)) {
1283       nextToken();
1284       if (FormatTok->Tok.is(tok::l_brace)) {
1285         if (Style.BraceWrapping.AfterExternBlock)
1286           addUnwrappedLine();
1287         // Either we indent or for backwards compatibility we follow the
1288         // AfterExternBlock style.
1289         unsigned AddLevels =
1290             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1291                     (Style.BraceWrapping.AfterExternBlock &&
1292                      Style.IndentExternBlock ==
1293                          FormatStyle::IEBS_AfterExternBlock)
1294                 ? 1u
1295                 : 0u;
1296         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1297         addUnwrappedLine();
1298         return;
1299       }
1300     }
1301     break;
1302   case tok::kw_export:
1303     if (Style.Language == FormatStyle::LK_JavaScript) {
1304       parseJavaScriptEs6ImportExport();
1305       return;
1306     }
1307     if (!Style.isCpp())
1308       break;
1309     // Handle C++ "(inline|export) namespace".
1310     LLVM_FALLTHROUGH;
1311   case tok::kw_inline:
1312     nextToken();
1313     if (FormatTok->Tok.is(tok::kw_namespace)) {
1314       parseNamespace();
1315       return;
1316     }
1317     break;
1318   case tok::identifier:
1319     if (FormatTok->is(TT_ForEachMacro)) {
1320       parseForOrWhileLoop();
1321       return;
1322     }
1323     if (FormatTok->is(TT_MacroBlockBegin)) {
1324       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1325                  /*MunchSemi=*/false);
1326       return;
1327     }
1328     if (FormatTok->is(Keywords.kw_import)) {
1329       if (Style.Language == FormatStyle::LK_JavaScript) {
1330         parseJavaScriptEs6ImportExport();
1331         return;
1332       }
1333       if (Style.Language == FormatStyle::LK_Proto) {
1334         nextToken();
1335         if (FormatTok->is(tok::kw_public))
1336           nextToken();
1337         if (!FormatTok->is(tok::string_literal))
1338           return;
1339         nextToken();
1340         if (FormatTok->is(tok::semi))
1341           nextToken();
1342         addUnwrappedLine();
1343         return;
1344       }
1345       if (Style.isCpp()) {
1346         parseModuleImport();
1347         return;
1348       }
1349     }
1350     if (Style.isCpp() &&
1351         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1352                            Keywords.kw_slots, Keywords.kw_qslots)) {
1353       nextToken();
1354       if (FormatTok->is(tok::colon)) {
1355         nextToken();
1356         addUnwrappedLine();
1357         return;
1358       }
1359     }
1360     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1361       parseStatementMacro();
1362       return;
1363     }
1364     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1365       parseNamespace();
1366       return;
1367     }
1368     // In all other cases, parse the declaration.
1369     break;
1370   default:
1371     break;
1372   }
1373   do {
1374     const FormatToken *Previous = FormatTok->Previous;
1375     switch (FormatTok->Tok.getKind()) {
1376     case tok::at:
1377       nextToken();
1378       if (FormatTok->Tok.is(tok::l_brace)) {
1379         nextToken();
1380         parseBracedList();
1381         break;
1382       } else if (Style.Language == FormatStyle::LK_Java &&
1383                  FormatTok->is(Keywords.kw_interface)) {
1384         nextToken();
1385         break;
1386       }
1387       switch (FormatTok->Tok.getObjCKeywordID()) {
1388       case tok::objc_public:
1389       case tok::objc_protected:
1390       case tok::objc_package:
1391       case tok::objc_private:
1392         return parseAccessSpecifier();
1393       case tok::objc_interface:
1394       case tok::objc_implementation:
1395         return parseObjCInterfaceOrImplementation();
1396       case tok::objc_protocol:
1397         if (parseObjCProtocol())
1398           return;
1399         break;
1400       case tok::objc_end:
1401         return; // Handled by the caller.
1402       case tok::objc_optional:
1403       case tok::objc_required:
1404         nextToken();
1405         addUnwrappedLine();
1406         return;
1407       case tok::objc_autoreleasepool:
1408         nextToken();
1409         if (FormatTok->Tok.is(tok::l_brace)) {
1410           if (Style.BraceWrapping.AfterControlStatement ==
1411               FormatStyle::BWACS_Always)
1412             addUnwrappedLine();
1413           parseBlock();
1414         }
1415         addUnwrappedLine();
1416         return;
1417       case tok::objc_synchronized:
1418         nextToken();
1419         if (FormatTok->Tok.is(tok::l_paren))
1420           // Skip synchronization object
1421           parseParens();
1422         if (FormatTok->Tok.is(tok::l_brace)) {
1423           if (Style.BraceWrapping.AfterControlStatement ==
1424               FormatStyle::BWACS_Always)
1425             addUnwrappedLine();
1426           parseBlock();
1427         }
1428         addUnwrappedLine();
1429         return;
1430       case tok::objc_try:
1431         // This branch isn't strictly necessary (the kw_try case below would
1432         // do this too after the tok::at is parsed above).  But be explicit.
1433         parseTryCatch();
1434         return;
1435       default:
1436         break;
1437       }
1438       break;
1439     case tok::kw_concept:
1440       parseConcept();
1441       return;
1442     case tok::kw_requires:
1443       parseRequires();
1444       break;
1445     case tok::kw_enum:
1446       // Ignore if this is part of "template <enum ...".
1447       if (Previous && Previous->is(tok::less)) {
1448         nextToken();
1449         break;
1450       }
1451 
1452       // parseEnum falls through and does not yet add an unwrapped line as an
1453       // enum definition can start a structural element.
1454       if (!parseEnum())
1455         break;
1456       // This only applies for C++.
1457       if (!Style.isCpp()) {
1458         addUnwrappedLine();
1459         return;
1460       }
1461       break;
1462     case tok::kw_typedef:
1463       nextToken();
1464       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1465                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1466                              Keywords.kw_CF_CLOSED_ENUM,
1467                              Keywords.kw_NS_CLOSED_ENUM))
1468         parseEnum();
1469       break;
1470     case tok::kw_struct:
1471     case tok::kw_union:
1472     case tok::kw_class:
1473       if (parseStructLike()) {
1474         return;
1475       }
1476       break;
1477     case tok::period:
1478       nextToken();
1479       // In Java, classes have an implicit static member "class".
1480       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1481           FormatTok->is(tok::kw_class))
1482         nextToken();
1483       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1484           FormatTok->Tok.getIdentifierInfo())
1485         // JavaScript only has pseudo keywords, all keywords are allowed to
1486         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1487         nextToken();
1488       break;
1489     case tok::semi:
1490       nextToken();
1491       addUnwrappedLine();
1492       return;
1493     case tok::r_brace:
1494       addUnwrappedLine();
1495       return;
1496     case tok::l_paren: {
1497       parseParens();
1498       // Break the unwrapped line if a K&R C function definition has a parameter
1499       // declaration.
1500       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1501         break;
1502       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1503         addUnwrappedLine();
1504         return;
1505       }
1506       break;
1507     }
1508     case tok::kw_operator:
1509       nextToken();
1510       if (FormatTok->isBinaryOperator())
1511         nextToken();
1512       break;
1513     case tok::caret:
1514       nextToken();
1515       if (FormatTok->Tok.isAnyIdentifier() ||
1516           FormatTok->isSimpleTypeSpecifier())
1517         nextToken();
1518       if (FormatTok->is(tok::l_paren))
1519         parseParens();
1520       if (FormatTok->is(tok::l_brace))
1521         parseChildBlock();
1522       break;
1523     case tok::l_brace:
1524       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1525         // A block outside of parentheses must be the last part of a
1526         // structural element.
1527         // FIXME: Figure out cases where this is not true, and add projections
1528         // for them (the one we know is missing are lambdas).
1529         if (Style.BraceWrapping.AfterFunction)
1530           addUnwrappedLine();
1531         FormatTok->setType(TT_FunctionLBrace);
1532         parseBlock();
1533         addUnwrappedLine();
1534         return;
1535       }
1536       // Otherwise this was a braced init list, and the structural
1537       // element continues.
1538       break;
1539     case tok::kw_try:
1540       if (Style.Language == FormatStyle::LK_JavaScript &&
1541           Line->MustBeDeclaration) {
1542         // field/method declaration.
1543         nextToken();
1544         break;
1545       }
1546       // We arrive here when parsing function-try blocks.
1547       if (Style.BraceWrapping.AfterFunction)
1548         addUnwrappedLine();
1549       parseTryCatch();
1550       return;
1551     case tok::identifier: {
1552       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1553           Line->MustBeDeclaration) {
1554         addUnwrappedLine();
1555         parseCSharpGenericTypeConstraint();
1556         break;
1557       }
1558       if (FormatTok->is(TT_MacroBlockEnd)) {
1559         addUnwrappedLine();
1560         return;
1561       }
1562 
1563       // Function declarations (as opposed to function expressions) are parsed
1564       // on their own unwrapped line by continuing this loop. Function
1565       // expressions (functions that are not on their own line) must not create
1566       // a new unwrapped line, so they are special cased below.
1567       size_t TokenCount = Line->Tokens.size();
1568       if (Style.Language == FormatStyle::LK_JavaScript &&
1569           FormatTok->is(Keywords.kw_function) &&
1570           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1571                                                      Keywords.kw_async)))) {
1572         tryToParseJSFunction();
1573         break;
1574       }
1575       if ((Style.Language == FormatStyle::LK_JavaScript ||
1576            Style.Language == FormatStyle::LK_Java) &&
1577           FormatTok->is(Keywords.kw_interface)) {
1578         if (Style.Language == FormatStyle::LK_JavaScript) {
1579           // In JavaScript/TypeScript, "interface" can be used as a standalone
1580           // identifier, e.g. in `var interface = 1;`. If "interface" is
1581           // followed by another identifier, it is very like to be an actual
1582           // interface declaration.
1583           unsigned StoredPosition = Tokens->getPosition();
1584           FormatToken *Next = Tokens->getNextToken();
1585           FormatTok = Tokens->setPosition(StoredPosition);
1586           if (!mustBeJSIdent(Keywords, Next)) {
1587             nextToken();
1588             break;
1589           }
1590         }
1591         parseRecord();
1592         addUnwrappedLine();
1593         return;
1594       }
1595 
1596       if (FormatTok->is(Keywords.kw_interface)) {
1597         if (parseStructLike()) {
1598           return;
1599         }
1600         break;
1601       }
1602 
1603       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1604         parseStatementMacro();
1605         return;
1606       }
1607 
1608       // See if the following token should start a new unwrapped line.
1609       StringRef Text = FormatTok->TokenText;
1610       nextToken();
1611 
1612       // JS doesn't have macros, and within classes colons indicate fields, not
1613       // labels.
1614       if (Style.Language == FormatStyle::LK_JavaScript)
1615         break;
1616 
1617       TokenCount = Line->Tokens.size();
1618       if (TokenCount == 1 ||
1619           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1620         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1621           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1622           parseLabel(!Style.IndentGotoLabels);
1623           return;
1624         }
1625         // Recognize function-like macro usages without trailing semicolon as
1626         // well as free-standing macros like Q_OBJECT.
1627         bool FunctionLike = FormatTok->is(tok::l_paren);
1628         if (FunctionLike)
1629           parseParens();
1630 
1631         bool FollowedByNewline =
1632             CommentsBeforeNextToken.empty()
1633                 ? FormatTok->NewlinesBefore > 0
1634                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1635 
1636         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1637             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1638           addUnwrappedLine();
1639           return;
1640         }
1641       }
1642       break;
1643     }
1644     case tok::equal:
1645       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1646       // TT_FatArrow. They always start an expression or a child block if
1647       // followed by a curly brace.
1648       if (FormatTok->is(TT_FatArrow)) {
1649         nextToken();
1650         if (FormatTok->is(tok::l_brace)) {
1651           // C# may break after => if the next character is a newline.
1652           if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1653             // calling `addUnwrappedLine()` here causes odd parsing errors.
1654             FormatTok->MustBreakBefore = true;
1655           }
1656           parseChildBlock();
1657         }
1658         break;
1659       }
1660 
1661       nextToken();
1662       if (FormatTok->Tok.is(tok::l_brace)) {
1663         // Block kind should probably be set to BK_BracedInit for any language.
1664         // C# needs this change to ensure that array initialisers and object
1665         // initialisers are indented the same way.
1666         if (Style.isCSharp())
1667           FormatTok->setBlockKind(BK_BracedInit);
1668         nextToken();
1669         parseBracedList();
1670       } else if (Style.Language == FormatStyle::LK_Proto &&
1671                  FormatTok->Tok.is(tok::less)) {
1672         nextToken();
1673         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1674                         /*ClosingBraceKind=*/tok::greater);
1675       }
1676       break;
1677     case tok::l_square:
1678       parseSquare();
1679       break;
1680     case tok::kw_new:
1681       parseNew();
1682       break;
1683     default:
1684       nextToken();
1685       break;
1686     }
1687   } while (!eof());
1688 }
1689 
1690 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1691   assert(FormatTok->is(tok::l_brace));
1692   if (!Style.isCSharp())
1693     return false;
1694   // See if it's a property accessor.
1695   if (FormatTok->Previous->isNot(tok::identifier))
1696     return false;
1697 
1698   // See if we are inside a property accessor.
1699   //
1700   // Record the current tokenPosition so that we can advance and
1701   // reset the current token. `Next` is not set yet so we need
1702   // another way to advance along the token stream.
1703   unsigned int StoredPosition = Tokens->getPosition();
1704   FormatToken *Tok = Tokens->getNextToken();
1705 
1706   // A trivial property accessor is of the form:
1707   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1708   // Track these as they do not require line breaks to be introduced.
1709   bool HasGetOrSet = false;
1710   bool IsTrivialPropertyAccessor = true;
1711   while (!eof()) {
1712     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1713                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1714                      Keywords.kw_set)) {
1715       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1716         HasGetOrSet = true;
1717       Tok = Tokens->getNextToken();
1718       continue;
1719     }
1720     if (Tok->isNot(tok::r_brace))
1721       IsTrivialPropertyAccessor = false;
1722     break;
1723   }
1724 
1725   if (!HasGetOrSet) {
1726     Tokens->setPosition(StoredPosition);
1727     return false;
1728   }
1729 
1730   // Try to parse the property accessor:
1731   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1732   Tokens->setPosition(StoredPosition);
1733   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1734     addUnwrappedLine();
1735   nextToken();
1736   do {
1737     switch (FormatTok->Tok.getKind()) {
1738     case tok::r_brace:
1739       nextToken();
1740       if (FormatTok->is(tok::equal)) {
1741         while (!eof() && FormatTok->isNot(tok::semi))
1742           nextToken();
1743         nextToken();
1744       }
1745       addUnwrappedLine();
1746       return true;
1747     case tok::l_brace:
1748       ++Line->Level;
1749       parseBlock(/*MustBeDeclaration=*/true);
1750       addUnwrappedLine();
1751       --Line->Level;
1752       break;
1753     case tok::equal:
1754       if (FormatTok->is(TT_FatArrow)) {
1755         ++Line->Level;
1756         do {
1757           nextToken();
1758         } while (!eof() && FormatTok->isNot(tok::semi));
1759         nextToken();
1760         addUnwrappedLine();
1761         --Line->Level;
1762         break;
1763       }
1764       nextToken();
1765       break;
1766     default:
1767       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1768           !IsTrivialPropertyAccessor) {
1769         // Non-trivial get/set needs to be on its own line.
1770         addUnwrappedLine();
1771       }
1772       nextToken();
1773     }
1774   } while (!eof());
1775 
1776   // Unreachable for well-formed code (paired '{' and '}').
1777   return true;
1778 }
1779 
1780 bool UnwrappedLineParser::tryToParseLambda() {
1781   if (!Style.isCpp()) {
1782     nextToken();
1783     return false;
1784   }
1785   assert(FormatTok->is(tok::l_square));
1786   FormatToken &LSquare = *FormatTok;
1787   if (!tryToParseLambdaIntroducer())
1788     return false;
1789 
1790   bool SeenArrow = false;
1791 
1792   while (FormatTok->isNot(tok::l_brace)) {
1793     if (FormatTok->isSimpleTypeSpecifier()) {
1794       nextToken();
1795       continue;
1796     }
1797     switch (FormatTok->Tok.getKind()) {
1798     case tok::l_brace:
1799       break;
1800     case tok::l_paren:
1801       parseParens();
1802       break;
1803     case tok::amp:
1804     case tok::star:
1805     case tok::kw_const:
1806     case tok::comma:
1807     case tok::less:
1808     case tok::greater:
1809     case tok::identifier:
1810     case tok::numeric_constant:
1811     case tok::coloncolon:
1812     case tok::kw_class:
1813     case tok::kw_mutable:
1814     case tok::kw_noexcept:
1815     case tok::kw_template:
1816     case tok::kw_typename:
1817       nextToken();
1818       break;
1819     // Specialization of a template with an integer parameter can contain
1820     // arithmetic, logical, comparison and ternary operators.
1821     //
1822     // FIXME: This also accepts sequences of operators that are not in the scope
1823     // of a template argument list.
1824     //
1825     // In a C++ lambda a template type can only occur after an arrow. We use
1826     // this as an heuristic to distinguish between Objective-C expressions
1827     // followed by an `a->b` expression, such as:
1828     // ([obj func:arg] + a->b)
1829     // Otherwise the code below would parse as a lambda.
1830     //
1831     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1832     // explicit template lists: []<bool b = true && false>(U &&u){}
1833     case tok::plus:
1834     case tok::minus:
1835     case tok::exclaim:
1836     case tok::tilde:
1837     case tok::slash:
1838     case tok::percent:
1839     case tok::lessless:
1840     case tok::pipe:
1841     case tok::pipepipe:
1842     case tok::ampamp:
1843     case tok::caret:
1844     case tok::equalequal:
1845     case tok::exclaimequal:
1846     case tok::greaterequal:
1847     case tok::lessequal:
1848     case tok::question:
1849     case tok::colon:
1850     case tok::ellipsis:
1851     case tok::kw_true:
1852     case tok::kw_false:
1853       if (SeenArrow) {
1854         nextToken();
1855         break;
1856       }
1857       return true;
1858     case tok::arrow:
1859       // This might or might not actually be a lambda arrow (this could be an
1860       // ObjC method invocation followed by a dereferencing arrow). We might
1861       // reset this back to TT_Unknown in TokenAnnotator.
1862       FormatTok->setType(TT_LambdaArrow);
1863       SeenArrow = true;
1864       nextToken();
1865       break;
1866     default:
1867       return true;
1868     }
1869   }
1870   FormatTok->setType(TT_LambdaLBrace);
1871   LSquare.setType(TT_LambdaLSquare);
1872   parseChildBlock();
1873   return true;
1874 }
1875 
1876 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1877   const FormatToken *Previous = FormatTok->Previous;
1878   if (Previous &&
1879       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1880                          tok::kw_delete, tok::l_square) ||
1881        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1882        Previous->isSimpleTypeSpecifier())) {
1883     nextToken();
1884     return false;
1885   }
1886   nextToken();
1887   if (FormatTok->is(tok::l_square)) {
1888     return false;
1889   }
1890   parseSquare(/*LambdaIntroducer=*/true);
1891   return true;
1892 }
1893 
1894 void UnwrappedLineParser::tryToParseJSFunction() {
1895   assert(FormatTok->is(Keywords.kw_function) ||
1896          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1897   if (FormatTok->is(Keywords.kw_async))
1898     nextToken();
1899   // Consume "function".
1900   nextToken();
1901 
1902   // Consume * (generator function). Treat it like C++'s overloaded operators.
1903   if (FormatTok->is(tok::star)) {
1904     FormatTok->setType(TT_OverloadedOperator);
1905     nextToken();
1906   }
1907 
1908   // Consume function name.
1909   if (FormatTok->is(tok::identifier))
1910     nextToken();
1911 
1912   if (FormatTok->isNot(tok::l_paren))
1913     return;
1914 
1915   // Parse formal parameter list.
1916   parseParens();
1917 
1918   if (FormatTok->is(tok::colon)) {
1919     // Parse a type definition.
1920     nextToken();
1921 
1922     // Eat the type declaration. For braced inline object types, balance braces,
1923     // otherwise just parse until finding an l_brace for the function body.
1924     if (FormatTok->is(tok::l_brace))
1925       tryToParseBracedList();
1926     else
1927       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1928         nextToken();
1929   }
1930 
1931   if (FormatTok->is(tok::semi))
1932     return;
1933 
1934   parseChildBlock();
1935 }
1936 
1937 bool UnwrappedLineParser::tryToParseBracedList() {
1938   if (FormatTok->is(BK_Unknown))
1939     calculateBraceTypes();
1940   assert(FormatTok->isNot(BK_Unknown));
1941   if (FormatTok->is(BK_Block))
1942     return false;
1943   nextToken();
1944   parseBracedList();
1945   return true;
1946 }
1947 
1948 bool UnwrappedLineParser::tryToParseCSharpLambda() {
1949   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1950   // TT_FatArrow. They always start an expression or a child block if
1951   // followed by a curly brace.
1952   nextToken();
1953   if (FormatTok->isNot(tok::l_brace))
1954     return false;
1955   // C# may break after => if the next character is a newline.
1956   if (Style.BraceWrapping.AfterFunction) {
1957     // calling `addUnwrappedLine()` here causes odd parsing errors.
1958     FormatTok->MustBreakBefore = true;
1959   }
1960   parseChildBlock();
1961   return true;
1962 }
1963 
1964 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1965                                           bool IsEnum,
1966                                           tok::TokenKind ClosingBraceKind) {
1967   bool HasError = false;
1968 
1969   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1970   // replace this by using parseAssignmentExpression() inside.
1971   do {
1972     if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
1973       if (tryToParseCSharpLambda())
1974         continue;
1975     if (Style.Language == FormatStyle::LK_JavaScript) {
1976       if (FormatTok->is(Keywords.kw_function) ||
1977           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1978         tryToParseJSFunction();
1979         continue;
1980       }
1981       if (FormatTok->is(TT_FatArrow)) {
1982         nextToken();
1983         // Fat arrows can be followed by simple expressions or by child blocks
1984         // in curly braces.
1985         if (FormatTok->is(tok::l_brace)) {
1986           parseChildBlock();
1987           continue;
1988         }
1989       }
1990       if (FormatTok->is(tok::l_brace)) {
1991         // Could be a method inside of a braced list `{a() { return 1; }}`.
1992         if (tryToParseBracedList())
1993           continue;
1994         parseChildBlock();
1995       }
1996     }
1997     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1998       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1999         addUnwrappedLine();
2000       nextToken();
2001       return !HasError;
2002     }
2003     switch (FormatTok->Tok.getKind()) {
2004     case tok::caret:
2005       nextToken();
2006       if (FormatTok->is(tok::l_brace)) {
2007         parseChildBlock();
2008       }
2009       break;
2010     case tok::l_square:
2011       if (Style.isCSharp())
2012         parseSquare();
2013       else
2014         tryToParseLambda();
2015       break;
2016     case tok::l_paren:
2017       parseParens();
2018       // JavaScript can just have free standing methods and getters/setters in
2019       // object literals. Detect them by a "{" following ")".
2020       if (Style.Language == FormatStyle::LK_JavaScript) {
2021         if (FormatTok->is(tok::l_brace))
2022           parseChildBlock();
2023         break;
2024       }
2025       break;
2026     case tok::l_brace:
2027       // Assume there are no blocks inside a braced init list apart
2028       // from the ones we explicitly parse out (like lambdas).
2029       FormatTok->setBlockKind(BK_BracedInit);
2030       nextToken();
2031       parseBracedList();
2032       break;
2033     case tok::less:
2034       if (Style.Language == FormatStyle::LK_Proto) {
2035         nextToken();
2036         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2037                         /*ClosingBraceKind=*/tok::greater);
2038       } else {
2039         nextToken();
2040       }
2041       break;
2042     case tok::semi:
2043       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2044       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2045       // used for error recovery if we have otherwise determined that this is
2046       // a braced list.
2047       if (Style.Language == FormatStyle::LK_JavaScript) {
2048         nextToken();
2049         break;
2050       }
2051       HasError = true;
2052       if (!ContinueOnSemicolons)
2053         return !HasError;
2054       nextToken();
2055       break;
2056     case tok::comma:
2057       nextToken();
2058       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2059         addUnwrappedLine();
2060       break;
2061     default:
2062       nextToken();
2063       break;
2064     }
2065   } while (!eof());
2066   return false;
2067 }
2068 
2069 void UnwrappedLineParser::parseParens() {
2070   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2071   nextToken();
2072   do {
2073     switch (FormatTok->Tok.getKind()) {
2074     case tok::l_paren:
2075       parseParens();
2076       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2077         parseChildBlock();
2078       break;
2079     case tok::r_paren:
2080       nextToken();
2081       return;
2082     case tok::r_brace:
2083       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2084       return;
2085     case tok::l_square:
2086       tryToParseLambda();
2087       break;
2088     case tok::l_brace:
2089       if (!tryToParseBracedList())
2090         parseChildBlock();
2091       break;
2092     case tok::at:
2093       nextToken();
2094       if (FormatTok->Tok.is(tok::l_brace)) {
2095         nextToken();
2096         parseBracedList();
2097       }
2098       break;
2099     case tok::equal:
2100       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2101         tryToParseCSharpLambda();
2102       else
2103         nextToken();
2104       break;
2105     case tok::kw_class:
2106       if (Style.Language == FormatStyle::LK_JavaScript)
2107         parseRecord(/*ParseAsExpr=*/true);
2108       else
2109         nextToken();
2110       break;
2111     case tok::identifier:
2112       if (Style.Language == FormatStyle::LK_JavaScript &&
2113           (FormatTok->is(Keywords.kw_function) ||
2114            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2115         tryToParseJSFunction();
2116       else
2117         nextToken();
2118       break;
2119     default:
2120       nextToken();
2121       break;
2122     }
2123   } while (!eof());
2124 }
2125 
2126 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2127   if (!LambdaIntroducer) {
2128     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2129     if (tryToParseLambda())
2130       return;
2131   }
2132   do {
2133     switch (FormatTok->Tok.getKind()) {
2134     case tok::l_paren:
2135       parseParens();
2136       break;
2137     case tok::r_square:
2138       nextToken();
2139       return;
2140     case tok::r_brace:
2141       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2142       return;
2143     case tok::l_square:
2144       parseSquare();
2145       break;
2146     case tok::l_brace: {
2147       if (!tryToParseBracedList())
2148         parseChildBlock();
2149       break;
2150     }
2151     case tok::at:
2152       nextToken();
2153       if (FormatTok->Tok.is(tok::l_brace)) {
2154         nextToken();
2155         parseBracedList();
2156       }
2157       break;
2158     default:
2159       nextToken();
2160       break;
2161     }
2162   } while (!eof());
2163 }
2164 
2165 void UnwrappedLineParser::parseIfThenElse() {
2166   auto HandleAttributes = [this]() {
2167     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2168     if (FormatTok->is(TT_AttributeMacro))
2169       nextToken();
2170     // Handle [[likely]] / [[unlikely]] attributes.
2171     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2172       parseSquare();
2173   };
2174 
2175   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2176   nextToken();
2177   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2178     nextToken();
2179   if (FormatTok->Tok.is(tok::l_paren))
2180     parseParens();
2181   HandleAttributes();
2182   bool NeedsUnwrappedLine = false;
2183   if (FormatTok->Tok.is(tok::l_brace)) {
2184     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2185     parseBlock();
2186     if (Style.BraceWrapping.BeforeElse)
2187       addUnwrappedLine();
2188     else
2189       NeedsUnwrappedLine = true;
2190   } else {
2191     addUnwrappedLine();
2192     ++Line->Level;
2193     parseStructuralElement();
2194     --Line->Level;
2195   }
2196   if (FormatTok->Tok.is(tok::kw_else)) {
2197     nextToken();
2198     HandleAttributes();
2199     if (FormatTok->Tok.is(tok::l_brace)) {
2200       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2201       parseBlock();
2202       addUnwrappedLine();
2203     } else if (FormatTok->Tok.is(tok::kw_if)) {
2204       FormatToken *Previous = Tokens->getPreviousToken();
2205       bool PrecededByComment = Previous && Previous->is(tok::comment);
2206       if (PrecededByComment) {
2207         addUnwrappedLine();
2208         ++Line->Level;
2209       }
2210       parseIfThenElse();
2211       if (PrecededByComment)
2212         --Line->Level;
2213     } else {
2214       addUnwrappedLine();
2215       ++Line->Level;
2216       parseStructuralElement();
2217       if (FormatTok->is(tok::eof))
2218         addUnwrappedLine();
2219       --Line->Level;
2220     }
2221   } else if (NeedsUnwrappedLine) {
2222     addUnwrappedLine();
2223   }
2224 }
2225 
2226 void UnwrappedLineParser::parseTryCatch() {
2227   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2228   nextToken();
2229   bool NeedsUnwrappedLine = false;
2230   if (FormatTok->is(tok::colon)) {
2231     // We are in a function try block, what comes is an initializer list.
2232     nextToken();
2233 
2234     // In case identifiers were removed by clang-tidy, what might follow is
2235     // multiple commas in sequence - before the first identifier.
2236     while (FormatTok->is(tok::comma))
2237       nextToken();
2238 
2239     while (FormatTok->is(tok::identifier)) {
2240       nextToken();
2241       if (FormatTok->is(tok::l_paren))
2242         parseParens();
2243       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2244           FormatTok->is(tok::l_brace)) {
2245         do {
2246           nextToken();
2247         } while (!FormatTok->is(tok::r_brace));
2248         nextToken();
2249       }
2250 
2251       // In case identifiers were removed by clang-tidy, what might follow is
2252       // multiple commas in sequence - after the first identifier.
2253       while (FormatTok->is(tok::comma))
2254         nextToken();
2255     }
2256   }
2257   // Parse try with resource.
2258   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2259     parseParens();
2260   }
2261   if (FormatTok->is(tok::l_brace)) {
2262     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2263     parseBlock();
2264     if (Style.BraceWrapping.BeforeCatch) {
2265       addUnwrappedLine();
2266     } else {
2267       NeedsUnwrappedLine = true;
2268     }
2269   } else if (!FormatTok->is(tok::kw_catch)) {
2270     // The C++ standard requires a compound-statement after a try.
2271     // If there's none, we try to assume there's a structuralElement
2272     // and try to continue.
2273     addUnwrappedLine();
2274     ++Line->Level;
2275     parseStructuralElement();
2276     --Line->Level;
2277   }
2278   while (1) {
2279     if (FormatTok->is(tok::at))
2280       nextToken();
2281     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2282                              tok::kw___finally) ||
2283           ((Style.Language == FormatStyle::LK_Java ||
2284             Style.Language == FormatStyle::LK_JavaScript) &&
2285            FormatTok->is(Keywords.kw_finally)) ||
2286           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2287            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2288       break;
2289     nextToken();
2290     while (FormatTok->isNot(tok::l_brace)) {
2291       if (FormatTok->is(tok::l_paren)) {
2292         parseParens();
2293         continue;
2294       }
2295       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2296         return;
2297       nextToken();
2298     }
2299     NeedsUnwrappedLine = false;
2300     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2301     parseBlock();
2302     if (Style.BraceWrapping.BeforeCatch)
2303       addUnwrappedLine();
2304     else
2305       NeedsUnwrappedLine = true;
2306   }
2307   if (NeedsUnwrappedLine)
2308     addUnwrappedLine();
2309 }
2310 
2311 void UnwrappedLineParser::parseNamespace() {
2312   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2313          "'namespace' expected");
2314 
2315   const FormatToken &InitialToken = *FormatTok;
2316   nextToken();
2317   if (InitialToken.is(TT_NamespaceMacro)) {
2318     parseParens();
2319   } else {
2320     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2321                               tok::l_square, tok::period)) {
2322       if (FormatTok->is(tok::l_square))
2323         parseSquare();
2324       else
2325         nextToken();
2326     }
2327   }
2328   if (FormatTok->Tok.is(tok::l_brace)) {
2329     if (ShouldBreakBeforeBrace(Style, InitialToken))
2330       addUnwrappedLine();
2331 
2332     unsigned AddLevels =
2333         Style.NamespaceIndentation == FormatStyle::NI_All ||
2334                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2335                  DeclarationScopeStack.size() > 1)
2336             ? 1u
2337             : 0u;
2338     bool ManageWhitesmithsBraces =
2339         AddLevels == 0u &&
2340         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2341 
2342     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2343     // the whole block.
2344     if (ManageWhitesmithsBraces)
2345       ++Line->Level;
2346 
2347     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2348                /*MunchSemi=*/true,
2349                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2350 
2351     // Munch the semicolon after a namespace. This is more common than one would
2352     // think. Putting the semicolon into its own line is very ugly.
2353     if (FormatTok->Tok.is(tok::semi))
2354       nextToken();
2355 
2356     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2357 
2358     if (ManageWhitesmithsBraces)
2359       --Line->Level;
2360   }
2361   // FIXME: Add error handling.
2362 }
2363 
2364 void UnwrappedLineParser::parseNew() {
2365   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2366   nextToken();
2367 
2368   if (Style.isCSharp()) {
2369     do {
2370       if (FormatTok->is(tok::l_brace))
2371         parseBracedList();
2372 
2373       if (FormatTok->isOneOf(tok::semi, tok::comma))
2374         return;
2375 
2376       nextToken();
2377     } while (!eof());
2378   }
2379 
2380   if (Style.Language != FormatStyle::LK_Java)
2381     return;
2382 
2383   // In Java, we can parse everything up to the parens, which aren't optional.
2384   do {
2385     // There should not be a ;, { or } before the new's open paren.
2386     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2387       return;
2388 
2389     // Consume the parens.
2390     if (FormatTok->is(tok::l_paren)) {
2391       parseParens();
2392 
2393       // If there is a class body of an anonymous class, consume that as child.
2394       if (FormatTok->is(tok::l_brace))
2395         parseChildBlock();
2396       return;
2397     }
2398     nextToken();
2399   } while (!eof());
2400 }
2401 
2402 void UnwrappedLineParser::parseForOrWhileLoop() {
2403   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2404          "'for', 'while' or foreach macro expected");
2405   nextToken();
2406   // JS' for await ( ...
2407   if (Style.Language == FormatStyle::LK_JavaScript &&
2408       FormatTok->is(Keywords.kw_await))
2409     nextToken();
2410   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2411     nextToken();
2412   if (FormatTok->Tok.is(tok::l_paren))
2413     parseParens();
2414   if (FormatTok->Tok.is(tok::l_brace)) {
2415     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2416     parseBlock();
2417     addUnwrappedLine();
2418   } else {
2419     addUnwrappedLine();
2420     ++Line->Level;
2421     parseStructuralElement();
2422     --Line->Level;
2423   }
2424 }
2425 
2426 void UnwrappedLineParser::parseDoWhile() {
2427   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2428   nextToken();
2429   if (FormatTok->Tok.is(tok::l_brace)) {
2430     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2431     parseBlock();
2432     if (Style.BraceWrapping.BeforeWhile)
2433       addUnwrappedLine();
2434   } else {
2435     addUnwrappedLine();
2436     ++Line->Level;
2437     parseStructuralElement();
2438     --Line->Level;
2439   }
2440 
2441   // FIXME: Add error handling.
2442   if (!FormatTok->Tok.is(tok::kw_while)) {
2443     addUnwrappedLine();
2444     return;
2445   }
2446 
2447   // If in Whitesmiths mode, the line with the while() needs to be indented
2448   // to the same level as the block.
2449   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2450     ++Line->Level;
2451 
2452   nextToken();
2453   parseStructuralElement();
2454 }
2455 
2456 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2457   nextToken();
2458   unsigned OldLineLevel = Line->Level;
2459   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2460     --Line->Level;
2461   if (LeftAlignLabel)
2462     Line->Level = 0;
2463 
2464   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2465       FormatTok->Tok.is(tok::l_brace)) {
2466 
2467     CompoundStatementIndenter Indenter(this, Line->Level,
2468                                        Style.BraceWrapping.AfterCaseLabel,
2469                                        Style.BraceWrapping.IndentBraces);
2470     parseBlock();
2471     if (FormatTok->Tok.is(tok::kw_break)) {
2472       if (Style.BraceWrapping.AfterControlStatement ==
2473           FormatStyle::BWACS_Always) {
2474         addUnwrappedLine();
2475         if (!Style.IndentCaseBlocks &&
2476             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2477           Line->Level++;
2478         }
2479       }
2480       parseStructuralElement();
2481     }
2482     addUnwrappedLine();
2483   } else {
2484     if (FormatTok->is(tok::semi))
2485       nextToken();
2486     addUnwrappedLine();
2487   }
2488   Line->Level = OldLineLevel;
2489   if (FormatTok->isNot(tok::l_brace)) {
2490     parseStructuralElement();
2491     addUnwrappedLine();
2492   }
2493 }
2494 
2495 void UnwrappedLineParser::parseCaseLabel() {
2496   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2497 
2498   // FIXME: fix handling of complex expressions here.
2499   do {
2500     nextToken();
2501   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2502   parseLabel();
2503 }
2504 
2505 void UnwrappedLineParser::parseSwitch() {
2506   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2507   nextToken();
2508   if (FormatTok->Tok.is(tok::l_paren))
2509     parseParens();
2510   if (FormatTok->Tok.is(tok::l_brace)) {
2511     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2512     parseBlock();
2513     addUnwrappedLine();
2514   } else {
2515     addUnwrappedLine();
2516     ++Line->Level;
2517     parseStructuralElement();
2518     --Line->Level;
2519   }
2520 }
2521 
2522 void UnwrappedLineParser::parseAccessSpecifier() {
2523   nextToken();
2524   // Understand Qt's slots.
2525   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2526     nextToken();
2527   // Otherwise, we don't know what it is, and we'd better keep the next token.
2528   if (FormatTok->Tok.is(tok::colon))
2529     nextToken();
2530   addUnwrappedLine();
2531 }
2532 
2533 void UnwrappedLineParser::parseConcept() {
2534   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2535   nextToken();
2536   if (!FormatTok->Tok.is(tok::identifier))
2537     return;
2538   nextToken();
2539   if (!FormatTok->Tok.is(tok::equal))
2540     return;
2541   nextToken();
2542   if (FormatTok->Tok.is(tok::kw_requires)) {
2543     nextToken();
2544     parseRequiresExpression(Line->Level);
2545   } else {
2546     parseConstraintExpression(Line->Level);
2547   }
2548 }
2549 
2550 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2551   // requires (R range)
2552   if (FormatTok->Tok.is(tok::l_paren)) {
2553     parseParens();
2554     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2555       addUnwrappedLine();
2556       --Line->Level;
2557     }
2558   }
2559 
2560   if (FormatTok->Tok.is(tok::l_brace)) {
2561     if (Style.BraceWrapping.AfterFunction)
2562       addUnwrappedLine();
2563     FormatTok->setType(TT_FunctionLBrace);
2564     parseBlock();
2565     addUnwrappedLine();
2566   } else {
2567     parseConstraintExpression(OriginalLevel);
2568   }
2569 }
2570 
2571 void UnwrappedLineParser::parseConstraintExpression(
2572     unsigned int OriginalLevel) {
2573   // requires Id<T> && Id<T> || Id<T>
2574   while (
2575       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2576     nextToken();
2577     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2578                               tok::greater, tok::comma, tok::ellipsis)) {
2579       if (FormatTok->Tok.is(tok::less)) {
2580         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2581                         /*ClosingBraceKind=*/tok::greater);
2582         continue;
2583       }
2584       nextToken();
2585     }
2586     if (FormatTok->Tok.is(tok::kw_requires)) {
2587       parseRequiresExpression(OriginalLevel);
2588     }
2589     if (FormatTok->Tok.is(tok::less)) {
2590       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2591                       /*ClosingBraceKind=*/tok::greater);
2592     }
2593 
2594     if (FormatTok->Tok.is(tok::l_paren)) {
2595       parseParens();
2596     }
2597     if (FormatTok->Tok.is(tok::l_brace)) {
2598       if (Style.BraceWrapping.AfterFunction)
2599         addUnwrappedLine();
2600       FormatTok->setType(TT_FunctionLBrace);
2601       parseBlock();
2602     }
2603     if (FormatTok->Tok.is(tok::semi)) {
2604       // Eat any trailing semi.
2605       nextToken();
2606       addUnwrappedLine();
2607     }
2608     if (FormatTok->Tok.is(tok::colon)) {
2609       return;
2610     }
2611     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2612       if (FormatTok->Previous &&
2613           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2614                                         tok::coloncolon)) {
2615         addUnwrappedLine();
2616       }
2617       if (Style.IndentRequires && OriginalLevel != Line->Level) {
2618         --Line->Level;
2619       }
2620       break;
2621     } else {
2622       FormatTok->setType(TT_ConstraintJunctions);
2623     }
2624 
2625     nextToken();
2626   }
2627 }
2628 
2629 void UnwrappedLineParser::parseRequires() {
2630   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2631 
2632   unsigned OriginalLevel = Line->Level;
2633   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2634     addUnwrappedLine();
2635     if (Style.IndentRequires) {
2636       Line->Level++;
2637     }
2638   }
2639   nextToken();
2640 
2641   parseRequiresExpression(OriginalLevel);
2642 }
2643 
2644 bool UnwrappedLineParser::parseEnum() {
2645   // Won't be 'enum' for NS_ENUMs.
2646   if (FormatTok->Tok.is(tok::kw_enum))
2647     nextToken();
2648 
2649   const FormatToken &InitialToken = *FormatTok;
2650 
2651   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2652   // declarations. An "enum" keyword followed by a colon would be a syntax
2653   // error and thus assume it is just an identifier.
2654   if (Style.Language == FormatStyle::LK_JavaScript &&
2655       FormatTok->isOneOf(tok::colon, tok::question))
2656     return false;
2657 
2658   // In protobuf, "enum" can be used as a field name.
2659   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2660     return false;
2661 
2662   // Eat up enum class ...
2663   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2664     nextToken();
2665 
2666   while (FormatTok->Tok.getIdentifierInfo() ||
2667          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2668                             tok::greater, tok::comma, tok::question)) {
2669     nextToken();
2670     // We can have macros or attributes in between 'enum' and the enum name.
2671     if (FormatTok->is(tok::l_paren))
2672       parseParens();
2673     if (FormatTok->is(tok::identifier)) {
2674       nextToken();
2675       // If there are two identifiers in a row, this is likely an elaborate
2676       // return type. In Java, this can be "implements", etc.
2677       if (Style.isCpp() && FormatTok->is(tok::identifier))
2678         return false;
2679     }
2680   }
2681 
2682   // Just a declaration or something is wrong.
2683   if (FormatTok->isNot(tok::l_brace))
2684     return true;
2685   FormatTok->setBlockKind(BK_Block);
2686 
2687   if (Style.Language == FormatStyle::LK_Java) {
2688     // Java enums are different.
2689     parseJavaEnumBody();
2690     return true;
2691   }
2692   if (Style.Language == FormatStyle::LK_Proto) {
2693     parseBlock(/*MustBeDeclaration=*/true);
2694     return true;
2695   }
2696 
2697   if (!Style.AllowShortEnumsOnASingleLine &&
2698       ShouldBreakBeforeBrace(Style, InitialToken))
2699     addUnwrappedLine();
2700   // Parse enum body.
2701   nextToken();
2702   if (!Style.AllowShortEnumsOnASingleLine) {
2703     addUnwrappedLine();
2704     Line->Level += 1;
2705   }
2706   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2707                                    /*IsEnum=*/true);
2708   if (!Style.AllowShortEnumsOnASingleLine)
2709     Line->Level -= 1;
2710   if (HasError) {
2711     if (FormatTok->is(tok::semi))
2712       nextToken();
2713     addUnwrappedLine();
2714   }
2715   return true;
2716 
2717   // There is no addUnwrappedLine() here so that we fall through to parsing a
2718   // structural element afterwards. Thus, in "enum A {} n, m;",
2719   // "} n, m;" will end up in one unwrapped line.
2720 }
2721 
2722 bool UnwrappedLineParser::parseStructLike() {
2723   // parseRecord falls through and does not yet add an unwrapped line as a
2724   // record declaration or definition can start a structural element.
2725   parseRecord();
2726   // This does not apply to Java, JavaScript and C#.
2727   if (Style.Language == FormatStyle::LK_Java ||
2728       Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
2729     if (FormatTok->is(tok::semi))
2730       nextToken();
2731     addUnwrappedLine();
2732     return true;
2733   }
2734   return false;
2735 }
2736 
2737 namespace {
2738 // A class used to set and restore the Token position when peeking
2739 // ahead in the token source.
2740 class ScopedTokenPosition {
2741   unsigned StoredPosition;
2742   FormatTokenSource *Tokens;
2743 
2744 public:
2745   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2746     assert(Tokens && "Tokens expected to not be null");
2747     StoredPosition = Tokens->getPosition();
2748   }
2749 
2750   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2751 };
2752 } // namespace
2753 
2754 // Look to see if we have [[ by looking ahead, if
2755 // its not then rewind to the original position.
2756 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2757   ScopedTokenPosition AutoPosition(Tokens);
2758   FormatToken *Tok = Tokens->getNextToken();
2759   // We already read the first [ check for the second.
2760   if (!Tok->is(tok::l_square)) {
2761     return false;
2762   }
2763   // Double check that the attribute is just something
2764   // fairly simple.
2765   while (Tok->isNot(tok::eof)) {
2766     if (Tok->is(tok::r_square)) {
2767       break;
2768     }
2769     Tok = Tokens->getNextToken();
2770   }
2771   if (Tok->is(tok::eof))
2772     return false;
2773   Tok = Tokens->getNextToken();
2774   if (!Tok->is(tok::r_square)) {
2775     return false;
2776   }
2777   Tok = Tokens->getNextToken();
2778   if (Tok->is(tok::semi)) {
2779     return false;
2780   }
2781   return true;
2782 }
2783 
2784 void UnwrappedLineParser::parseJavaEnumBody() {
2785   // Determine whether the enum is simple, i.e. does not have a semicolon or
2786   // constants with class bodies. Simple enums can be formatted like braced
2787   // lists, contracted to a single line, etc.
2788   unsigned StoredPosition = Tokens->getPosition();
2789   bool IsSimple = true;
2790   FormatToken *Tok = Tokens->getNextToken();
2791   while (!Tok->is(tok::eof)) {
2792     if (Tok->is(tok::r_brace))
2793       break;
2794     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2795       IsSimple = false;
2796       break;
2797     }
2798     // FIXME: This will also mark enums with braces in the arguments to enum
2799     // constants as "not simple". This is probably fine in practice, though.
2800     Tok = Tokens->getNextToken();
2801   }
2802   FormatTok = Tokens->setPosition(StoredPosition);
2803 
2804   if (IsSimple) {
2805     nextToken();
2806     parseBracedList();
2807     addUnwrappedLine();
2808     return;
2809   }
2810 
2811   // Parse the body of a more complex enum.
2812   // First add a line for everything up to the "{".
2813   nextToken();
2814   addUnwrappedLine();
2815   ++Line->Level;
2816 
2817   // Parse the enum constants.
2818   while (FormatTok) {
2819     if (FormatTok->is(tok::l_brace)) {
2820       // Parse the constant's class body.
2821       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2822                  /*MunchSemi=*/false);
2823     } else if (FormatTok->is(tok::l_paren)) {
2824       parseParens();
2825     } else if (FormatTok->is(tok::comma)) {
2826       nextToken();
2827       addUnwrappedLine();
2828     } else if (FormatTok->is(tok::semi)) {
2829       nextToken();
2830       addUnwrappedLine();
2831       break;
2832     } else if (FormatTok->is(tok::r_brace)) {
2833       addUnwrappedLine();
2834       break;
2835     } else {
2836       nextToken();
2837     }
2838   }
2839 
2840   // Parse the class body after the enum's ";" if any.
2841   parseLevel(/*HasOpeningBrace=*/true);
2842   nextToken();
2843   --Line->Level;
2844   addUnwrappedLine();
2845 }
2846 
2847 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2848   const FormatToken &InitialToken = *FormatTok;
2849   nextToken();
2850 
2851   // The actual identifier can be a nested name specifier, and in macros
2852   // it is often token-pasted.
2853   // An [[attribute]] can be before the identifier.
2854   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2855                             tok::kw___attribute, tok::kw___declspec,
2856                             tok::kw_alignas, tok::l_square, tok::r_square) ||
2857          ((Style.Language == FormatStyle::LK_Java ||
2858            Style.Language == FormatStyle::LK_JavaScript) &&
2859           FormatTok->isOneOf(tok::period, tok::comma))) {
2860     if (Style.Language == FormatStyle::LK_JavaScript &&
2861         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2862       // JavaScript/TypeScript supports inline object types in
2863       // extends/implements positions:
2864       //     class Foo implements {bar: number} { }
2865       nextToken();
2866       if (FormatTok->is(tok::l_brace)) {
2867         tryToParseBracedList();
2868         continue;
2869       }
2870     }
2871     bool IsNonMacroIdentifier =
2872         FormatTok->is(tok::identifier) &&
2873         FormatTok->TokenText != FormatTok->TokenText.upper();
2874     nextToken();
2875     // We can have macros or attributes in between 'class' and the class name.
2876     if (!IsNonMacroIdentifier) {
2877       if (FormatTok->Tok.is(tok::l_paren)) {
2878         parseParens();
2879       } else if (FormatTok->is(TT_AttributeSquare)) {
2880         parseSquare();
2881         // Consume the closing TT_AttributeSquare.
2882         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2883           nextToken();
2884       }
2885     }
2886   }
2887 
2888   // Note that parsing away template declarations here leads to incorrectly
2889   // accepting function declarations as record declarations.
2890   // In general, we cannot solve this problem. Consider:
2891   // class A<int> B() {}
2892   // which can be a function definition or a class definition when B() is a
2893   // macro. If we find enough real-world cases where this is a problem, we
2894   // can parse for the 'template' keyword in the beginning of the statement,
2895   // and thus rule out the record production in case there is no template
2896   // (this would still leave us with an ambiguity between template function
2897   // and class declarations).
2898   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2899     while (!eof()) {
2900       if (FormatTok->is(tok::l_brace)) {
2901         calculateBraceTypes(/*ExpectClassBody=*/true);
2902         if (!tryToParseBracedList())
2903           break;
2904       }
2905       if (FormatTok->Tok.is(tok::semi))
2906         return;
2907       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2908         addUnwrappedLine();
2909         nextToken();
2910         parseCSharpGenericTypeConstraint();
2911         break;
2912       }
2913       nextToken();
2914     }
2915   }
2916   if (FormatTok->Tok.is(tok::l_brace)) {
2917     if (ParseAsExpr) {
2918       parseChildBlock();
2919     } else {
2920       if (ShouldBreakBeforeBrace(Style, InitialToken))
2921         addUnwrappedLine();
2922 
2923       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2924       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2925     }
2926   }
2927   // There is no addUnwrappedLine() here so that we fall through to parsing a
2928   // structural element afterwards. Thus, in "class A {} n, m;",
2929   // "} n, m;" will end up in one unwrapped line.
2930 }
2931 
2932 void UnwrappedLineParser::parseObjCMethod() {
2933   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2934          "'(' or identifier expected.");
2935   do {
2936     if (FormatTok->Tok.is(tok::semi)) {
2937       nextToken();
2938       addUnwrappedLine();
2939       return;
2940     } else if (FormatTok->Tok.is(tok::l_brace)) {
2941       if (Style.BraceWrapping.AfterFunction)
2942         addUnwrappedLine();
2943       parseBlock();
2944       addUnwrappedLine();
2945       return;
2946     } else {
2947       nextToken();
2948     }
2949   } while (!eof());
2950 }
2951 
2952 void UnwrappedLineParser::parseObjCProtocolList() {
2953   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2954   do {
2955     nextToken();
2956     // Early exit in case someone forgot a close angle.
2957     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2958         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2959       return;
2960   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2961   nextToken(); // Skip '>'.
2962 }
2963 
2964 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2965   do {
2966     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2967       nextToken();
2968       addUnwrappedLine();
2969       break;
2970     }
2971     if (FormatTok->is(tok::l_brace)) {
2972       parseBlock();
2973       // In ObjC interfaces, nothing should be following the "}".
2974       addUnwrappedLine();
2975     } else if (FormatTok->is(tok::r_brace)) {
2976       // Ignore stray "}". parseStructuralElement doesn't consume them.
2977       nextToken();
2978       addUnwrappedLine();
2979     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2980       nextToken();
2981       parseObjCMethod();
2982     } else {
2983       parseStructuralElement();
2984     }
2985   } while (!eof());
2986 }
2987 
2988 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2989   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2990          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2991   nextToken();
2992   nextToken(); // interface name
2993 
2994   // @interface can be followed by a lightweight generic
2995   // specialization list, then either a base class or a category.
2996   if (FormatTok->Tok.is(tok::less)) {
2997     parseObjCLightweightGenerics();
2998   }
2999   if (FormatTok->Tok.is(tok::colon)) {
3000     nextToken();
3001     nextToken(); // base class name
3002     // The base class can also have lightweight generics applied to it.
3003     if (FormatTok->Tok.is(tok::less)) {
3004       parseObjCLightweightGenerics();
3005     }
3006   } else if (FormatTok->Tok.is(tok::l_paren))
3007     // Skip category, if present.
3008     parseParens();
3009 
3010   if (FormatTok->Tok.is(tok::less))
3011     parseObjCProtocolList();
3012 
3013   if (FormatTok->Tok.is(tok::l_brace)) {
3014     if (Style.BraceWrapping.AfterObjCDeclaration)
3015       addUnwrappedLine();
3016     parseBlock(/*MustBeDeclaration=*/true);
3017   }
3018 
3019   // With instance variables, this puts '}' on its own line.  Without instance
3020   // variables, this ends the @interface line.
3021   addUnwrappedLine();
3022 
3023   parseObjCUntilAtEnd();
3024 }
3025 
3026 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3027   assert(FormatTok->Tok.is(tok::less));
3028   // Unlike protocol lists, generic parameterizations support
3029   // nested angles:
3030   //
3031   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3032   //     NSObject <NSCopying, NSSecureCoding>
3033   //
3034   // so we need to count how many open angles we have left.
3035   unsigned NumOpenAngles = 1;
3036   do {
3037     nextToken();
3038     // Early exit in case someone forgot a close angle.
3039     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3040         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3041       break;
3042     if (FormatTok->Tok.is(tok::less))
3043       ++NumOpenAngles;
3044     else if (FormatTok->Tok.is(tok::greater)) {
3045       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3046       --NumOpenAngles;
3047     }
3048   } while (!eof() && NumOpenAngles != 0);
3049   nextToken(); // Skip '>'.
3050 }
3051 
3052 // Returns true for the declaration/definition form of @protocol,
3053 // false for the expression form.
3054 bool UnwrappedLineParser::parseObjCProtocol() {
3055   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3056   nextToken();
3057 
3058   if (FormatTok->is(tok::l_paren))
3059     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3060     return false;
3061 
3062   // The definition/declaration form,
3063   // @protocol Foo
3064   // - (int)someMethod;
3065   // @end
3066 
3067   nextToken(); // protocol name
3068 
3069   if (FormatTok->Tok.is(tok::less))
3070     parseObjCProtocolList();
3071 
3072   // Check for protocol declaration.
3073   if (FormatTok->Tok.is(tok::semi)) {
3074     nextToken();
3075     addUnwrappedLine();
3076     return true;
3077   }
3078 
3079   addUnwrappedLine();
3080   parseObjCUntilAtEnd();
3081   return true;
3082 }
3083 
3084 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3085   bool IsImport = FormatTok->is(Keywords.kw_import);
3086   assert(IsImport || FormatTok->is(tok::kw_export));
3087   nextToken();
3088 
3089   // Consume the "default" in "export default class/function".
3090   if (FormatTok->is(tok::kw_default))
3091     nextToken();
3092 
3093   // Consume "async function", "function" and "default function", so that these
3094   // get parsed as free-standing JS functions, i.e. do not require a trailing
3095   // semicolon.
3096   if (FormatTok->is(Keywords.kw_async))
3097     nextToken();
3098   if (FormatTok->is(Keywords.kw_function)) {
3099     nextToken();
3100     return;
3101   }
3102 
3103   // For imports, `export *`, `export {...}`, consume the rest of the line up
3104   // to the terminating `;`. For everything else, just return and continue
3105   // parsing the structural element, i.e. the declaration or expression for
3106   // `export default`.
3107   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3108       !FormatTok->isStringLiteral())
3109     return;
3110 
3111   while (!eof()) {
3112     if (FormatTok->is(tok::semi))
3113       return;
3114     if (Line->Tokens.empty()) {
3115       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3116       // import statement should terminate.
3117       return;
3118     }
3119     if (FormatTok->is(tok::l_brace)) {
3120       FormatTok->setBlockKind(BK_Block);
3121       nextToken();
3122       parseBracedList();
3123     } else {
3124       nextToken();
3125     }
3126   }
3127 }
3128 
3129 void UnwrappedLineParser::parseStatementMacro() {
3130   nextToken();
3131   if (FormatTok->is(tok::l_paren))
3132     parseParens();
3133   if (FormatTok->is(tok::semi))
3134     nextToken();
3135   addUnwrappedLine();
3136 }
3137 
3138 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3139                                                  StringRef Prefix = "") {
3140   llvm::dbgs() << Prefix << "Line(" << Line.Level
3141                << ", FSC=" << Line.FirstStartColumn << ")"
3142                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3143   for (const auto &Node : Line.Tokens) {
3144     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3145                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3146                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3147   }
3148   for (const auto &Node : Line.Tokens)
3149     for (const auto &ChildNode : Node.Children)
3150       printDebugInfo(ChildNode, "\nChild: ");
3151 
3152   llvm::dbgs() << "\n";
3153 }
3154 
3155 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3156   if (Line->Tokens.empty())
3157     return;
3158   LLVM_DEBUG({
3159     if (CurrentLines == &Lines)
3160       printDebugInfo(*Line);
3161   });
3162 
3163   // If this line closes a block when in Whitesmiths mode, remember that
3164   // information so that the level can be decreased after the line is added.
3165   // This has to happen after the addition of the line since the line itself
3166   // needs to be indented.
3167   bool ClosesWhitesmithsBlock =
3168       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3169       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3170 
3171   CurrentLines->push_back(std::move(*Line));
3172   Line->Tokens.clear();
3173   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3174   Line->FirstStartColumn = 0;
3175 
3176   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3177     --Line->Level;
3178   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3179     CurrentLines->append(
3180         std::make_move_iterator(PreprocessorDirectives.begin()),
3181         std::make_move_iterator(PreprocessorDirectives.end()));
3182     PreprocessorDirectives.clear();
3183   }
3184   // Disconnect the current token from the last token on the previous line.
3185   FormatTok->Previous = nullptr;
3186 }
3187 
3188 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3189 
3190 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3191   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3192          FormatTok.NewlinesBefore > 0;
3193 }
3194 
3195 // Checks if \p FormatTok is a line comment that continues the line comment
3196 // section on \p Line.
3197 static bool
3198 continuesLineCommentSection(const FormatToken &FormatTok,
3199                             const UnwrappedLine &Line,
3200                             const llvm::Regex &CommentPragmasRegex) {
3201   if (Line.Tokens.empty())
3202     return false;
3203 
3204   StringRef IndentContent = FormatTok.TokenText;
3205   if (FormatTok.TokenText.startswith("//") ||
3206       FormatTok.TokenText.startswith("/*"))
3207     IndentContent = FormatTok.TokenText.substr(2);
3208   if (CommentPragmasRegex.match(IndentContent))
3209     return false;
3210 
3211   // If Line starts with a line comment, then FormatTok continues the comment
3212   // section if its original column is greater or equal to the original start
3213   // column of the line.
3214   //
3215   // Define the min column token of a line as follows: if a line ends in '{' or
3216   // contains a '{' followed by a line comment, then the min column token is
3217   // that '{'. Otherwise, the min column token of the line is the first token of
3218   // the line.
3219   //
3220   // If Line starts with a token other than a line comment, then FormatTok
3221   // continues the comment section if its original column is greater than the
3222   // original start column of the min column token of the line.
3223   //
3224   // For example, the second line comment continues the first in these cases:
3225   //
3226   // // first line
3227   // // second line
3228   //
3229   // and:
3230   //
3231   // // first line
3232   //  // second line
3233   //
3234   // and:
3235   //
3236   // int i; // first line
3237   //  // second line
3238   //
3239   // and:
3240   //
3241   // do { // first line
3242   //      // second line
3243   //   int i;
3244   // } while (true);
3245   //
3246   // and:
3247   //
3248   // enum {
3249   //   a, // first line
3250   //    // second line
3251   //   b
3252   // };
3253   //
3254   // The second line comment doesn't continue the first in these cases:
3255   //
3256   //   // first line
3257   //  // second line
3258   //
3259   // and:
3260   //
3261   // int i; // first line
3262   // // second line
3263   //
3264   // and:
3265   //
3266   // do { // first line
3267   //   // second line
3268   //   int i;
3269   // } while (true);
3270   //
3271   // and:
3272   //
3273   // enum {
3274   //   a, // first line
3275   //   // second line
3276   // };
3277   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3278 
3279   // Scan for '{//'. If found, use the column of '{' as a min column for line
3280   // comment section continuation.
3281   const FormatToken *PreviousToken = nullptr;
3282   for (const UnwrappedLineNode &Node : Line.Tokens) {
3283     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3284         isLineComment(*Node.Tok)) {
3285       MinColumnToken = PreviousToken;
3286       break;
3287     }
3288     PreviousToken = Node.Tok;
3289 
3290     // Grab the last newline preceding a token in this unwrapped line.
3291     if (Node.Tok->NewlinesBefore > 0) {
3292       MinColumnToken = Node.Tok;
3293     }
3294   }
3295   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3296     MinColumnToken = PreviousToken;
3297   }
3298 
3299   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3300                               MinColumnToken);
3301 }
3302 
3303 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3304   bool JustComments = Line->Tokens.empty();
3305   for (SmallVectorImpl<FormatToken *>::const_iterator
3306            I = CommentsBeforeNextToken.begin(),
3307            E = CommentsBeforeNextToken.end();
3308        I != E; ++I) {
3309     // Line comments that belong to the same line comment section are put on the
3310     // same line since later we might want to reflow content between them.
3311     // Additional fine-grained breaking of line comment sections is controlled
3312     // by the class BreakableLineCommentSection in case it is desirable to keep
3313     // several line comment sections in the same unwrapped line.
3314     //
3315     // FIXME: Consider putting separate line comment sections as children to the
3316     // unwrapped line instead.
3317     (*I)->ContinuesLineCommentSection =
3318         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3319     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3320       addUnwrappedLine();
3321     pushToken(*I);
3322   }
3323   if (NewlineBeforeNext && JustComments)
3324     addUnwrappedLine();
3325   CommentsBeforeNextToken.clear();
3326 }
3327 
3328 void UnwrappedLineParser::nextToken(int LevelDifference) {
3329   if (eof())
3330     return;
3331   flushComments(isOnNewLine(*FormatTok));
3332   pushToken(FormatTok);
3333   FormatToken *Previous = FormatTok;
3334   if (Style.Language != FormatStyle::LK_JavaScript)
3335     readToken(LevelDifference);
3336   else
3337     readTokenWithJavaScriptASI();
3338   FormatTok->Previous = Previous;
3339 }
3340 
3341 void UnwrappedLineParser::distributeComments(
3342     const SmallVectorImpl<FormatToken *> &Comments,
3343     const FormatToken *NextTok) {
3344   // Whether or not a line comment token continues a line is controlled by
3345   // the method continuesLineCommentSection, with the following caveat:
3346   //
3347   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3348   // that each comment line from the trail is aligned with the next token, if
3349   // the next token exists. If a trail exists, the beginning of the maximal
3350   // trail is marked as a start of a new comment section.
3351   //
3352   // For example in this code:
3353   //
3354   // int a; // line about a
3355   //   // line 1 about b
3356   //   // line 2 about b
3357   //   int b;
3358   //
3359   // the two lines about b form a maximal trail, so there are two sections, the
3360   // first one consisting of the single comment "// line about a" and the
3361   // second one consisting of the next two comments.
3362   if (Comments.empty())
3363     return;
3364   bool ShouldPushCommentsInCurrentLine = true;
3365   bool HasTrailAlignedWithNextToken = false;
3366   unsigned StartOfTrailAlignedWithNextToken = 0;
3367   if (NextTok) {
3368     // We are skipping the first element intentionally.
3369     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3370       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3371         HasTrailAlignedWithNextToken = true;
3372         StartOfTrailAlignedWithNextToken = i;
3373       }
3374     }
3375   }
3376   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3377     FormatToken *FormatTok = Comments[i];
3378     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3379       FormatTok->ContinuesLineCommentSection = false;
3380     } else {
3381       FormatTok->ContinuesLineCommentSection =
3382           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3383     }
3384     if (!FormatTok->ContinuesLineCommentSection &&
3385         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3386       ShouldPushCommentsInCurrentLine = false;
3387     }
3388     if (ShouldPushCommentsInCurrentLine) {
3389       pushToken(FormatTok);
3390     } else {
3391       CommentsBeforeNextToken.push_back(FormatTok);
3392     }
3393   }
3394 }
3395 
3396 void UnwrappedLineParser::readToken(int LevelDifference) {
3397   SmallVector<FormatToken *, 1> Comments;
3398   do {
3399     FormatTok = Tokens->getNextToken();
3400     assert(FormatTok);
3401     while (FormatTok->getType() == TT_ConflictStart ||
3402            FormatTok->getType() == TT_ConflictEnd ||
3403            FormatTok->getType() == TT_ConflictAlternative) {
3404       if (FormatTok->getType() == TT_ConflictStart) {
3405         conditionalCompilationStart(/*Unreachable=*/false);
3406       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3407         conditionalCompilationAlternative();
3408       } else if (FormatTok->getType() == TT_ConflictEnd) {
3409         conditionalCompilationEnd();
3410       }
3411       FormatTok = Tokens->getNextToken();
3412       FormatTok->MustBreakBefore = true;
3413     }
3414 
3415     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3416            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3417       distributeComments(Comments, FormatTok);
3418       Comments.clear();
3419       // If there is an unfinished unwrapped line, we flush the preprocessor
3420       // directives only after that unwrapped line was finished later.
3421       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3422       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3423       assert((LevelDifference >= 0 ||
3424               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3425              "LevelDifference makes Line->Level negative");
3426       Line->Level += LevelDifference;
3427       // Comments stored before the preprocessor directive need to be output
3428       // before the preprocessor directive, at the same level as the
3429       // preprocessor directive, as we consider them to apply to the directive.
3430       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3431           PPBranchLevel > 0)
3432         Line->Level += PPBranchLevel;
3433       flushComments(isOnNewLine(*FormatTok));
3434       parsePPDirective();
3435     }
3436 
3437     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3438         !Line->InPPDirective) {
3439       continue;
3440     }
3441 
3442     if (!FormatTok->Tok.is(tok::comment)) {
3443       distributeComments(Comments, FormatTok);
3444       Comments.clear();
3445       return;
3446     }
3447 
3448     Comments.push_back(FormatTok);
3449   } while (!eof());
3450 
3451   distributeComments(Comments, nullptr);
3452   Comments.clear();
3453 }
3454 
3455 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3456   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3457   if (MustBreakBeforeNextToken) {
3458     Line->Tokens.back().Tok->MustBreakBefore = true;
3459     MustBreakBeforeNextToken = false;
3460   }
3461 }
3462 
3463 } // end namespace format
3464 } // end namespace clang
3465