1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31 
32   // Returns the next token in the token stream.
33   virtual FormatToken *getNextToken() = 0;
34 
35   // Returns the token precedint the token returned by the last call to
36   // getNextToken() in the token stream, or nullptr if no such token exists.
37   virtual FormatToken *getPreviousToken() = 0;
38 
39   // Returns the token that would be returned by the next call to
40   // getNextToken().
41   virtual FormatToken *peekNextToken() = 0;
42 
43   // Returns whether we are at the end of the file.
44   // This can be different from whether getNextToken() returned an eof token
45   // when the FormatTokenSource is a view on a part of the token stream.
46   virtual bool isEOF() = 0;
47 
48   // Gets the current position in the token stream, to be used by setPosition().
49   virtual unsigned getPosition() = 0;
50 
51   // Resets the token stream to the state it was in when getPosition() returned
52   // Position, and return the token at that position in the stream.
53   virtual FormatToken *setPosition(unsigned Position) = 0;
54 };
55 
56 namespace {
57 
58 class ScopedDeclarationState {
59 public:
60   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
61                          bool MustBeDeclaration)
62       : Line(Line), Stack(Stack) {
63     Line.MustBeDeclaration = MustBeDeclaration;
64     Stack.push_back(MustBeDeclaration);
65   }
66   ~ScopedDeclarationState() {
67     Stack.pop_back();
68     if (!Stack.empty())
69       Line.MustBeDeclaration = Stack.back();
70     else
71       Line.MustBeDeclaration = true;
72   }
73 
74 private:
75   UnwrappedLine &Line;
76   std::vector<bool> &Stack;
77 };
78 
79 static bool isLineComment(const FormatToken &FormatTok) {
80   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
81 }
82 
83 // Checks if \p FormatTok is a line comment that continues the line comment
84 // \p Previous. The original column of \p MinColumnToken is used to determine
85 // whether \p FormatTok is indented enough to the right to continue \p Previous.
86 static bool continuesLineComment(const FormatToken &FormatTok,
87                                  const FormatToken *Previous,
88                                  const FormatToken *MinColumnToken) {
89   if (!Previous || !MinColumnToken)
90     return false;
91   unsigned MinContinueColumn =
92       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
93   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
94          isLineComment(*Previous) &&
95          FormatTok.OriginalColumn >= MinContinueColumn;
96 }
97 
98 class ScopedMacroState : public FormatTokenSource {
99 public:
100   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
101                    FormatToken *&ResetToken)
102       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
103         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
104         Token(nullptr), PreviousToken(nullptr) {
105     FakeEOF.Tok.startToken();
106     FakeEOF.Tok.setKind(tok::eof);
107     TokenSource = this;
108     Line.Level = 0;
109     Line.InPPDirective = true;
110   }
111 
112   ~ScopedMacroState() override {
113     TokenSource = PreviousTokenSource;
114     ResetToken = Token;
115     Line.InPPDirective = false;
116     Line.Level = PreviousLineLevel;
117   }
118 
119   FormatToken *getNextToken() override {
120     // The \c UnwrappedLineParser guards against this by never calling
121     // \c getNextToken() after it has encountered the first eof token.
122     assert(!eof());
123     PreviousToken = Token;
124     Token = PreviousTokenSource->getNextToken();
125     if (eof())
126       return &FakeEOF;
127     return Token;
128   }
129 
130   FormatToken *getPreviousToken() override {
131     return PreviousTokenSource->getPreviousToken();
132   }
133 
134   FormatToken *peekNextToken() override {
135     if (eof())
136       return &FakeEOF;
137     return PreviousTokenSource->peekNextToken();
138   }
139 
140   bool isEOF() override { return PreviousTokenSource->isEOF(); }
141 
142   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
143 
144   FormatToken *setPosition(unsigned Position) override {
145     PreviousToken = nullptr;
146     Token = PreviousTokenSource->setPosition(Position);
147     return Token;
148   }
149 
150 private:
151   bool eof() {
152     return Token && Token->HasUnescapedNewline &&
153            !continuesLineComment(*Token, PreviousToken,
154                                  /*MinColumnToken=*/PreviousToken);
155   }
156 
157   FormatToken FakeEOF;
158   UnwrappedLine &Line;
159   FormatTokenSource *&TokenSource;
160   FormatToken *&ResetToken;
161   unsigned PreviousLineLevel;
162   FormatTokenSource *PreviousTokenSource;
163 
164   FormatToken *Token;
165   FormatToken *PreviousToken;
166 };
167 
168 } // end anonymous namespace
169 
170 class ScopedLineState {
171 public:
172   ScopedLineState(UnwrappedLineParser &Parser,
173                   bool SwitchToPreprocessorLines = false)
174       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
175     if (SwitchToPreprocessorLines)
176       Parser.CurrentLines = &Parser.PreprocessorDirectives;
177     else if (!Parser.Line->Tokens.empty())
178       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
179     PreBlockLine = std::move(Parser.Line);
180     Parser.Line = std::make_unique<UnwrappedLine>();
181     Parser.Line->Level = PreBlockLine->Level;
182     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
183   }
184 
185   ~ScopedLineState() {
186     if (!Parser.Line->Tokens.empty()) {
187       Parser.addUnwrappedLine();
188     }
189     assert(Parser.Line->Tokens.empty());
190     Parser.Line = std::move(PreBlockLine);
191     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
192       Parser.MustBreakBeforeNextToken = true;
193     Parser.CurrentLines = OriginalLines;
194   }
195 
196 private:
197   UnwrappedLineParser &Parser;
198 
199   std::unique_ptr<UnwrappedLine> PreBlockLine;
200   SmallVectorImpl<UnwrappedLine> *OriginalLines;
201 };
202 
203 class CompoundStatementIndenter {
204 public:
205   CompoundStatementIndenter(UnwrappedLineParser *Parser,
206                             const FormatStyle &Style, unsigned &LineLevel)
207       : CompoundStatementIndenter(Parser, LineLevel,
208                                   Style.BraceWrapping.AfterControlStatement,
209                                   Style.BraceWrapping.IndentBraces) {}
210   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
211                             bool WrapBrace, bool IndentBrace)
212       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
213     if (WrapBrace)
214       Parser->addUnwrappedLine();
215     if (IndentBrace)
216       ++LineLevel;
217   }
218   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
219 
220 private:
221   unsigned &LineLevel;
222   unsigned OldLineLevel;
223 };
224 
225 namespace {
226 
227 class IndexedTokenSource : public FormatTokenSource {
228 public:
229   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
230       : Tokens(Tokens), Position(-1) {}
231 
232   FormatToken *getNextToken() override {
233     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
234       LLVM_DEBUG({
235         llvm::dbgs() << "Next ";
236         dbgToken(Position);
237       });
238       return Tokens[Position];
239     }
240     ++Position;
241     LLVM_DEBUG({
242       llvm::dbgs() << "Next ";
243       dbgToken(Position);
244     });
245     return Tokens[Position];
246   }
247 
248   FormatToken *getPreviousToken() override {
249     assert(Position > 0);
250     return Tokens[Position - 1];
251   }
252 
253   FormatToken *peekNextToken() override {
254     int Next = Position + 1;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Peeking ";
257       dbgToken(Next);
258     });
259     return Tokens[Next];
260   }
261 
262   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
263 
264   unsigned getPosition() override {
265     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
266     assert(Position >= 0);
267     return Position;
268   }
269 
270   FormatToken *setPosition(unsigned P) override {
271     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
272     Position = P;
273     return Tokens[Position];
274   }
275 
276   void reset() { Position = -1; }
277 
278 private:
279   void dbgToken(int Position, llvm::StringRef Indent = "") {
280     FormatToken *Tok = Tokens[Position];
281     llvm::dbgs() << Indent << "[" << Position
282                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
283                  << ", Macro: " << !!Tok->MacroCtx << "\n";
284   }
285 
286   ArrayRef<FormatToken *> Tokens;
287   int Position;
288 };
289 
290 } // end anonymous namespace
291 
292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
293                                          const AdditionalKeywords &Keywords,
294                                          unsigned FirstStartColumn,
295                                          ArrayRef<FormatToken *> Tokens,
296                                          UnwrappedLineConsumer &Callback)
297     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
298       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
299       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
300       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
301       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
302                        ? IG_Rejected
303                        : IG_Inited),
304       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
305 
306 void UnwrappedLineParser::reset() {
307   PPBranchLevel = -1;
308   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
309                      ? IG_Rejected
310                      : IG_Inited;
311   IncludeGuardToken = nullptr;
312   Line.reset(new UnwrappedLine);
313   CommentsBeforeNextToken.clear();
314   FormatTok = nullptr;
315   MustBreakBeforeNextToken = false;
316   PreprocessorDirectives.clear();
317   CurrentLines = &Lines;
318   DeclarationScopeStack.clear();
319   PPStack.clear();
320   Line->FirstStartColumn = FirstStartColumn;
321 }
322 
323 void UnwrappedLineParser::parse() {
324   IndexedTokenSource TokenSource(AllTokens);
325   Line->FirstStartColumn = FirstStartColumn;
326   do {
327     LLVM_DEBUG(llvm::dbgs() << "----\n");
328     reset();
329     Tokens = &TokenSource;
330     TokenSource.reset();
331 
332     readToken();
333     parseFile();
334 
335     // If we found an include guard then all preprocessor directives (other than
336     // the guard) are over-indented by one.
337     if (IncludeGuard == IG_Found)
338       for (auto &Line : Lines)
339         if (Line.InPPDirective && Line.Level > 0)
340           --Line.Level;
341 
342     // Create line with eof token.
343     pushToken(FormatTok);
344     addUnwrappedLine();
345 
346     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
347                                                   E = Lines.end();
348          I != E; ++I) {
349       Callback.consumeUnwrappedLine(*I);
350     }
351     Callback.finishRun();
352     Lines.clear();
353     while (!PPLevelBranchIndex.empty() &&
354            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
355       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
356       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
357     }
358     if (!PPLevelBranchIndex.empty()) {
359       ++PPLevelBranchIndex.back();
360       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
361       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
362     }
363   } while (!PPLevelBranchIndex.empty());
364 }
365 
366 void UnwrappedLineParser::parseFile() {
367   // The top-level context in a file always has declarations, except for pre-
368   // processor directives and JavaScript files.
369   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
370   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
371                                           MustBeDeclaration);
372   if (Style.Language == FormatStyle::LK_TextProto)
373     parseBracedList();
374   else
375     parseLevel(/*HasOpeningBrace=*/false);
376   // Make sure to format the remaining tokens.
377   //
378   // LK_TextProto is special since its top-level is parsed as the body of a
379   // braced list, which does not necessarily have natural line separators such
380   // as a semicolon. Comments after the last entry that have been determined to
381   // not belong to that line, as in:
382   //   key: value
383   //   // endfile comment
384   // do not have a chance to be put on a line of their own until this point.
385   // Here we add this newline before end-of-file comments.
386   if (Style.Language == FormatStyle::LK_TextProto &&
387       !CommentsBeforeNextToken.empty())
388     addUnwrappedLine();
389   flushComments(true);
390   addUnwrappedLine();
391 }
392 
393 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
394   do {
395     switch (FormatTok->Tok.getKind()) {
396     case tok::l_brace:
397       return;
398     default:
399       if (FormatTok->is(Keywords.kw_where)) {
400         addUnwrappedLine();
401         nextToken();
402         parseCSharpGenericTypeConstraint();
403         break;
404       }
405       nextToken();
406       break;
407     }
408   } while (!eof());
409 }
410 
411 void UnwrappedLineParser::parseCSharpAttribute() {
412   int UnpairedSquareBrackets = 1;
413   do {
414     switch (FormatTok->Tok.getKind()) {
415     case tok::r_square:
416       nextToken();
417       --UnpairedSquareBrackets;
418       if (UnpairedSquareBrackets == 0) {
419         addUnwrappedLine();
420         return;
421       }
422       break;
423     case tok::l_square:
424       ++UnpairedSquareBrackets;
425       nextToken();
426       break;
427     default:
428       nextToken();
429       break;
430     }
431   } while (!eof());
432 }
433 
434 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
435   bool SwitchLabelEncountered = false;
436   do {
437     tok::TokenKind kind = FormatTok->Tok.getKind();
438     if (FormatTok->getType() == TT_MacroBlockBegin) {
439       kind = tok::l_brace;
440     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
441       kind = tok::r_brace;
442     }
443 
444     switch (kind) {
445     case tok::comment:
446       nextToken();
447       addUnwrappedLine();
448       break;
449     case tok::l_brace:
450       // FIXME: Add parameter whether this can happen - if this happens, we must
451       // be in a non-declaration context.
452       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
453         continue;
454       parseBlock();
455       addUnwrappedLine();
456       break;
457     case tok::r_brace:
458       if (HasOpeningBrace)
459         return;
460       nextToken();
461       addUnwrappedLine();
462       break;
463     case tok::kw_default: {
464       unsigned StoredPosition = Tokens->getPosition();
465       FormatToken *Next;
466       do {
467         Next = Tokens->getNextToken();
468       } while (Next->is(tok::comment));
469       FormatTok = Tokens->setPosition(StoredPosition);
470       if (Next && Next->isNot(tok::colon)) {
471         // default not followed by ':' is not a case label; treat it like
472         // an identifier.
473         parseStructuralElement();
474         break;
475       }
476       // Else, if it is 'default:', fall through to the case handling.
477       LLVM_FALLTHROUGH;
478     }
479     case tok::kw_case:
480       if (Style.isJavaScript() && Line->MustBeDeclaration) {
481         // A 'case: string' style field declaration.
482         parseStructuralElement();
483         break;
484       }
485       if (!SwitchLabelEncountered &&
486           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
487         ++Line->Level;
488       SwitchLabelEncountered = true;
489       parseStructuralElement();
490       break;
491     case tok::l_square:
492       if (Style.isCSharp()) {
493         nextToken();
494         parseCSharpAttribute();
495         break;
496       }
497       LLVM_FALLTHROUGH;
498     default:
499       parseStructuralElement(!HasOpeningBrace);
500       break;
501     }
502   } while (!eof());
503 }
504 
505 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
506   // We'll parse forward through the tokens until we hit
507   // a closing brace or eof - note that getNextToken() will
508   // parse macros, so this will magically work inside macro
509   // definitions, too.
510   unsigned StoredPosition = Tokens->getPosition();
511   FormatToken *Tok = FormatTok;
512   const FormatToken *PrevTok = Tok->Previous;
513   // Keep a stack of positions of lbrace tokens. We will
514   // update information about whether an lbrace starts a
515   // braced init list or a different block during the loop.
516   SmallVector<FormatToken *, 8> LBraceStack;
517   assert(Tok->Tok.is(tok::l_brace));
518   do {
519     // Get next non-comment token.
520     FormatToken *NextTok;
521     unsigned ReadTokens = 0;
522     do {
523       NextTok = Tokens->getNextToken();
524       ++ReadTokens;
525     } while (NextTok->is(tok::comment));
526 
527     switch (Tok->Tok.getKind()) {
528     case tok::l_brace:
529       if (Style.isJavaScript() && PrevTok) {
530         if (PrevTok->isOneOf(tok::colon, tok::less))
531           // A ':' indicates this code is in a type, or a braced list
532           // following a label in an object literal ({a: {b: 1}}).
533           // A '<' could be an object used in a comparison, but that is nonsense
534           // code (can never return true), so more likely it is a generic type
535           // argument (`X<{a: string; b: number}>`).
536           // The code below could be confused by semicolons between the
537           // individual members in a type member list, which would normally
538           // trigger BK_Block. In both cases, this must be parsed as an inline
539           // braced init.
540           Tok->setBlockKind(BK_BracedInit);
541         else if (PrevTok->is(tok::r_paren))
542           // `) { }` can only occur in function or method declarations in JS.
543           Tok->setBlockKind(BK_Block);
544       } else {
545         Tok->setBlockKind(BK_Unknown);
546       }
547       LBraceStack.push_back(Tok);
548       break;
549     case tok::r_brace:
550       if (LBraceStack.empty())
551         break;
552       if (LBraceStack.back()->is(BK_Unknown)) {
553         bool ProbablyBracedList = false;
554         if (Style.Language == FormatStyle::LK_Proto) {
555           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
556         } else {
557           // Skip NextTok over preprocessor lines, otherwise we may not
558           // properly diagnose the block as a braced intializer
559           // if the comma separator appears after the pp directive.
560           while (NextTok->is(tok::hash)) {
561             ScopedMacroState MacroState(*Line, Tokens, NextTok);
562             do {
563               NextTok = Tokens->getNextToken();
564               ++ReadTokens;
565             } while (NextTok->isNot(tok::eof));
566           }
567 
568           // Using OriginalColumn to distinguish between ObjC methods and
569           // binary operators is a bit hacky.
570           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
571                                   NextTok->OriginalColumn == 0;
572 
573           // If there is a comma, semicolon or right paren after the closing
574           // brace, we assume this is a braced initializer list.  Note that
575           // regardless how we mark inner braces here, we will overwrite the
576           // BlockKind later if we parse a braced list (where all blocks
577           // inside are by default braced lists), or when we explicitly detect
578           // blocks (for example while parsing lambdas).
579           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
580           // braced list in JS.
581           ProbablyBracedList =
582               (Style.isJavaScript() &&
583                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
584                                 Keywords.kw_as)) ||
585               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
586               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
587                                tok::r_paren, tok::r_square, tok::l_brace,
588                                tok::ellipsis) ||
589               (NextTok->is(tok::identifier) &&
590                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
591               (NextTok->is(tok::semi) &&
592                (!ExpectClassBody || LBraceStack.size() != 1)) ||
593               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
594           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
595             // We can have an array subscript after a braced init
596             // list, but C++11 attributes are expected after blocks.
597             NextTok = Tokens->getNextToken();
598             ++ReadTokens;
599             ProbablyBracedList = NextTok->isNot(tok::l_square);
600           }
601         }
602         if (ProbablyBracedList) {
603           Tok->setBlockKind(BK_BracedInit);
604           LBraceStack.back()->setBlockKind(BK_BracedInit);
605         } else {
606           Tok->setBlockKind(BK_Block);
607           LBraceStack.back()->setBlockKind(BK_Block);
608         }
609       }
610       LBraceStack.pop_back();
611       break;
612     case tok::identifier:
613       if (!Tok->is(TT_StatementMacro))
614         break;
615       LLVM_FALLTHROUGH;
616     case tok::at:
617     case tok::semi:
618     case tok::kw_if:
619     case tok::kw_while:
620     case tok::kw_for:
621     case tok::kw_switch:
622     case tok::kw_try:
623     case tok::kw___try:
624       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
625         LBraceStack.back()->setBlockKind(BK_Block);
626       break;
627     default:
628       break;
629     }
630     PrevTok = Tok;
631     Tok = NextTok;
632   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
633 
634   // Assume other blocks for all unclosed opening braces.
635   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
636     if (LBraceStack[i]->is(BK_Unknown))
637       LBraceStack[i]->setBlockKind(BK_Block);
638   }
639 
640   FormatTok = Tokens->setPosition(StoredPosition);
641 }
642 
643 template <class T>
644 static inline void hash_combine(std::size_t &seed, const T &v) {
645   std::hash<T> hasher;
646   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
647 }
648 
649 size_t UnwrappedLineParser::computePPHash() const {
650   size_t h = 0;
651   for (const auto &i : PPStack) {
652     hash_combine(h, size_t(i.Kind));
653     hash_combine(h, i.Line);
654   }
655   return h;
656 }
657 
658 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
659                                      bool MunchSemi,
660                                      bool UnindentWhitesmithsBraces) {
661   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
662          "'{' or macro block token expected");
663   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
664   FormatTok->setBlockKind(BK_Block);
665 
666   // For Whitesmiths mode, jump to the next level prior to skipping over the
667   // braces.
668   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
669     ++Line->Level;
670 
671   size_t PPStartHash = computePPHash();
672 
673   unsigned InitialLevel = Line->Level;
674   nextToken(/*LevelDifference=*/AddLevels);
675 
676   if (MacroBlock && FormatTok->is(tok::l_paren))
677     parseParens();
678 
679   size_t NbPreprocessorDirectives =
680       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
681   addUnwrappedLine();
682   size_t OpeningLineIndex =
683       CurrentLines->empty()
684           ? (UnwrappedLine::kInvalidIndex)
685           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
686 
687   // Whitesmiths is weird here. The brace needs to be indented for the namespace
688   // block, but the block itself may not be indented depending on the style
689   // settings. This allows the format to back up one level in those cases.
690   if (UnindentWhitesmithsBraces)
691     --Line->Level;
692 
693   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
694                                           MustBeDeclaration);
695   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
696     Line->Level += AddLevels;
697   parseLevel(/*HasOpeningBrace=*/true);
698 
699   if (eof())
700     return;
701 
702   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
703                  : !FormatTok->is(tok::r_brace)) {
704     Line->Level = InitialLevel;
705     FormatTok->setBlockKind(BK_Block);
706     return;
707   }
708 
709   size_t PPEndHash = computePPHash();
710 
711   // Munch the closing brace.
712   nextToken(/*LevelDifference=*/-AddLevels);
713 
714   if (MacroBlock && FormatTok->is(tok::l_paren))
715     parseParens();
716 
717   if (FormatTok->is(tok::arrow)) {
718     // Following the } we can find a trailing return type arrow
719     // as part of an implicit conversion constraint.
720     nextToken();
721     parseStructuralElement();
722   }
723 
724   if (MunchSemi && FormatTok->Tok.is(tok::semi))
725     nextToken();
726 
727   Line->Level = InitialLevel;
728 
729   if (PPStartHash == PPEndHash) {
730     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
731     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
732       // Update the opening line to add the forward reference as well
733       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
734           CurrentLines->size() - 1;
735     }
736   }
737 }
738 
739 static bool isGoogScope(const UnwrappedLine &Line) {
740   // FIXME: Closure-library specific stuff should not be hard-coded but be
741   // configurable.
742   if (Line.Tokens.size() < 4)
743     return false;
744   auto I = Line.Tokens.begin();
745   if (I->Tok->TokenText != "goog")
746     return false;
747   ++I;
748   if (I->Tok->isNot(tok::period))
749     return false;
750   ++I;
751   if (I->Tok->TokenText != "scope")
752     return false;
753   ++I;
754   return I->Tok->is(tok::l_paren);
755 }
756 
757 static bool isIIFE(const UnwrappedLine &Line,
758                    const AdditionalKeywords &Keywords) {
759   // Look for the start of an immediately invoked anonymous function.
760   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
761   // This is commonly done in JavaScript to create a new, anonymous scope.
762   // Example: (function() { ... })()
763   if (Line.Tokens.size() < 3)
764     return false;
765   auto I = Line.Tokens.begin();
766   if (I->Tok->isNot(tok::l_paren))
767     return false;
768   ++I;
769   if (I->Tok->isNot(Keywords.kw_function))
770     return false;
771   ++I;
772   return I->Tok->is(tok::l_paren);
773 }
774 
775 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
776                                    const FormatToken &InitialToken) {
777   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
778     return Style.BraceWrapping.AfterNamespace;
779   if (InitialToken.is(tok::kw_class))
780     return Style.BraceWrapping.AfterClass;
781   if (InitialToken.is(tok::kw_union))
782     return Style.BraceWrapping.AfterUnion;
783   if (InitialToken.is(tok::kw_struct))
784     return Style.BraceWrapping.AfterStruct;
785   return false;
786 }
787 
788 void UnwrappedLineParser::parseChildBlock() {
789   FormatTok->setBlockKind(BK_Block);
790   nextToken();
791   {
792     bool SkipIndent = (Style.isJavaScript() &&
793                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
794     ScopedLineState LineState(*this);
795     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
796                                             /*MustBeDeclaration=*/false);
797     Line->Level += SkipIndent ? 0 : 1;
798     parseLevel(/*HasOpeningBrace=*/true);
799     flushComments(isOnNewLine(*FormatTok));
800     Line->Level -= SkipIndent ? 0 : 1;
801   }
802   nextToken();
803 }
804 
805 void UnwrappedLineParser::parsePPDirective() {
806   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
807   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
808 
809   nextToken();
810 
811   if (!FormatTok->Tok.getIdentifierInfo()) {
812     parsePPUnknown();
813     return;
814   }
815 
816   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
817   case tok::pp_define:
818     parsePPDefine();
819     return;
820   case tok::pp_if:
821     parsePPIf(/*IfDef=*/false);
822     break;
823   case tok::pp_ifdef:
824   case tok::pp_ifndef:
825     parsePPIf(/*IfDef=*/true);
826     break;
827   case tok::pp_else:
828     parsePPElse();
829     break;
830   case tok::pp_elifdef:
831   case tok::pp_elifndef:
832   case tok::pp_elif:
833     parsePPElIf();
834     break;
835   case tok::pp_endif:
836     parsePPEndIf();
837     break;
838   default:
839     parsePPUnknown();
840     break;
841   }
842 }
843 
844 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
845   size_t Line = CurrentLines->size();
846   if (CurrentLines == &PreprocessorDirectives)
847     Line += Lines.size();
848 
849   if (Unreachable ||
850       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
851     PPStack.push_back({PP_Unreachable, Line});
852   else
853     PPStack.push_back({PP_Conditional, Line});
854 }
855 
856 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
857   ++PPBranchLevel;
858   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
859   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
860     PPLevelBranchIndex.push_back(0);
861     PPLevelBranchCount.push_back(0);
862   }
863   PPChainBranchIndex.push(0);
864   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
865   conditionalCompilationCondition(Unreachable || Skip);
866 }
867 
868 void UnwrappedLineParser::conditionalCompilationAlternative() {
869   if (!PPStack.empty())
870     PPStack.pop_back();
871   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
872   if (!PPChainBranchIndex.empty())
873     ++PPChainBranchIndex.top();
874   conditionalCompilationCondition(
875       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
876       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
877 }
878 
879 void UnwrappedLineParser::conditionalCompilationEnd() {
880   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
881   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
882     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
883       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
884     }
885   }
886   // Guard against #endif's without #if.
887   if (PPBranchLevel > -1)
888     --PPBranchLevel;
889   if (!PPChainBranchIndex.empty())
890     PPChainBranchIndex.pop();
891   if (!PPStack.empty())
892     PPStack.pop_back();
893 }
894 
895 void UnwrappedLineParser::parsePPIf(bool IfDef) {
896   bool IfNDef = FormatTok->is(tok::pp_ifndef);
897   nextToken();
898   bool Unreachable = false;
899   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
900     Unreachable = true;
901   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
902     Unreachable = true;
903   conditionalCompilationStart(Unreachable);
904   FormatToken *IfCondition = FormatTok;
905   // If there's a #ifndef on the first line, and the only lines before it are
906   // comments, it could be an include guard.
907   bool MaybeIncludeGuard = IfNDef;
908   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
909     for (auto &Line : Lines) {
910       if (!Line.Tokens.front().Tok->is(tok::comment)) {
911         MaybeIncludeGuard = false;
912         IncludeGuard = IG_Rejected;
913         break;
914       }
915     }
916   --PPBranchLevel;
917   parsePPUnknown();
918   ++PPBranchLevel;
919   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
920     IncludeGuard = IG_IfNdefed;
921     IncludeGuardToken = IfCondition;
922   }
923 }
924 
925 void UnwrappedLineParser::parsePPElse() {
926   // If a potential include guard has an #else, it's not an include guard.
927   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
928     IncludeGuard = IG_Rejected;
929   conditionalCompilationAlternative();
930   if (PPBranchLevel > -1)
931     --PPBranchLevel;
932   parsePPUnknown();
933   ++PPBranchLevel;
934 }
935 
936 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
937 
938 void UnwrappedLineParser::parsePPEndIf() {
939   conditionalCompilationEnd();
940   parsePPUnknown();
941   // If the #endif of a potential include guard is the last thing in the file,
942   // then we found an include guard.
943   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
944       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
945     IncludeGuard = IG_Found;
946 }
947 
948 void UnwrappedLineParser::parsePPDefine() {
949   nextToken();
950 
951   if (!FormatTok->Tok.getIdentifierInfo()) {
952     IncludeGuard = IG_Rejected;
953     IncludeGuardToken = nullptr;
954     parsePPUnknown();
955     return;
956   }
957 
958   if (IncludeGuard == IG_IfNdefed &&
959       IncludeGuardToken->TokenText == FormatTok->TokenText) {
960     IncludeGuard = IG_Defined;
961     IncludeGuardToken = nullptr;
962     for (auto &Line : Lines) {
963       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
964         IncludeGuard = IG_Rejected;
965         break;
966       }
967     }
968   }
969 
970   nextToken();
971   if (FormatTok->Tok.getKind() == tok::l_paren &&
972       FormatTok->WhitespaceRange.getBegin() ==
973           FormatTok->WhitespaceRange.getEnd()) {
974     parseParens();
975   }
976   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
977     Line->Level += PPBranchLevel + 1;
978   addUnwrappedLine();
979   ++Line->Level;
980 
981   // Errors during a preprocessor directive can only affect the layout of the
982   // preprocessor directive, and thus we ignore them. An alternative approach
983   // would be to use the same approach we use on the file level (no
984   // re-indentation if there was a structural error) within the macro
985   // definition.
986   parseFile();
987 }
988 
989 void UnwrappedLineParser::parsePPUnknown() {
990   do {
991     nextToken();
992   } while (!eof());
993   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
994     Line->Level += PPBranchLevel + 1;
995   addUnwrappedLine();
996 }
997 
998 // Here we exclude certain tokens that are not usually the first token in an
999 // unwrapped line. This is used in attempt to distinguish macro calls without
1000 // trailing semicolons from other constructs split to several lines.
1001 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1002   // Semicolon can be a null-statement, l_square can be a start of a macro or
1003   // a C++11 attribute, but this doesn't seem to be common.
1004   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1005          Tok.isNot(TT_AttributeSquare) &&
1006          // Tokens that can only be used as binary operators and a part of
1007          // overloaded operator names.
1008          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1009          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1010          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1011          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1012          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1013          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1014          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1015          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1016          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1017          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1018          Tok.isNot(tok::lesslessequal) &&
1019          // Colon is used in labels, base class lists, initializer lists,
1020          // range-based for loops, ternary operator, but should never be the
1021          // first token in an unwrapped line.
1022          Tok.isNot(tok::colon) &&
1023          // 'noexcept' is a trailing annotation.
1024          Tok.isNot(tok::kw_noexcept);
1025 }
1026 
1027 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1028                           const FormatToken *FormatTok) {
1029   // FIXME: This returns true for C/C++ keywords like 'struct'.
1030   return FormatTok->is(tok::identifier) &&
1031          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1032           !FormatTok->isOneOf(
1033               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1034               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1035               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1036               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1037               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1038               Keywords.kw_instanceof, Keywords.kw_interface,
1039               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1040 }
1041 
1042 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1043                                  const FormatToken *FormatTok) {
1044   return FormatTok->Tok.isLiteral() ||
1045          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1046          mustBeJSIdent(Keywords, FormatTok);
1047 }
1048 
1049 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1050 // when encountered after a value (see mustBeJSIdentOrValue).
1051 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1052                            const FormatToken *FormatTok) {
1053   return FormatTok->isOneOf(
1054       tok::kw_return, Keywords.kw_yield,
1055       // conditionals
1056       tok::kw_if, tok::kw_else,
1057       // loops
1058       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1059       // switch/case
1060       tok::kw_switch, tok::kw_case,
1061       // exceptions
1062       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1063       // declaration
1064       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1065       Keywords.kw_async, Keywords.kw_function,
1066       // import/export
1067       Keywords.kw_import, tok::kw_export);
1068 }
1069 
1070 // Checks whether a token is a type in K&R C (aka C78).
1071 static bool isC78Type(const FormatToken &Tok) {
1072   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1073                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1074                      tok::identifier);
1075 }
1076 
1077 // This function checks whether a token starts the first parameter declaration
1078 // in a K&R C (aka C78) function definition, e.g.:
1079 //   int f(a, b)
1080 //   short a, b;
1081 //   {
1082 //      return a + b;
1083 //   }
1084 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1085                                const FormatToken *FuncName) {
1086   assert(Tok);
1087   assert(Next);
1088   assert(FuncName);
1089 
1090   if (FuncName->isNot(tok::identifier))
1091     return false;
1092 
1093   const FormatToken *Prev = FuncName->Previous;
1094   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1095     return false;
1096 
1097   if (!isC78Type(*Tok) &&
1098       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1099     return false;
1100 
1101   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1102     return false;
1103 
1104   Tok = Tok->Previous;
1105   if (!Tok || Tok->isNot(tok::r_paren))
1106     return false;
1107 
1108   Tok = Tok->Previous;
1109   if (!Tok || Tok->isNot(tok::identifier))
1110     return false;
1111 
1112   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1113 }
1114 
1115 void UnwrappedLineParser::parseModuleImport() {
1116   nextToken();
1117   while (!eof()) {
1118     if (FormatTok->is(tok::colon)) {
1119       FormatTok->setType(TT_ModulePartitionColon);
1120     }
1121     // Handle import <foo/bar.h> as we would an include statement.
1122     else if (FormatTok->is(tok::less)) {
1123       nextToken();
1124       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1125         // Mark tokens up to the trailing line comments as implicit string
1126         // literals.
1127         if (FormatTok->isNot(tok::comment) &&
1128             !FormatTok->TokenText.startswith("//"))
1129           FormatTok->setType(TT_ImplicitStringLiteral);
1130         nextToken();
1131       }
1132     }
1133     if (FormatTok->is(tok::semi)) {
1134       nextToken();
1135       break;
1136     }
1137     nextToken();
1138   }
1139 
1140   addUnwrappedLine();
1141   return;
1142 }
1143 
1144 // readTokenWithJavaScriptASI reads the next token and terminates the current
1145 // line if JavaScript Automatic Semicolon Insertion must
1146 // happen between the current token and the next token.
1147 //
1148 // This method is conservative - it cannot cover all edge cases of JavaScript,
1149 // but only aims to correctly handle certain well known cases. It *must not*
1150 // return true in speculative cases.
1151 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1152   FormatToken *Previous = FormatTok;
1153   readToken();
1154   FormatToken *Next = FormatTok;
1155 
1156   bool IsOnSameLine =
1157       CommentsBeforeNextToken.empty()
1158           ? Next->NewlinesBefore == 0
1159           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1160   if (IsOnSameLine)
1161     return;
1162 
1163   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1164   bool PreviousStartsTemplateExpr =
1165       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1166   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1167     // If the line contains an '@' sign, the previous token might be an
1168     // annotation, which can precede another identifier/value.
1169     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1170       return LineNode.Tok->is(tok::at);
1171     });
1172     if (HasAt)
1173       return;
1174   }
1175   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1176     return addUnwrappedLine();
1177   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1178   bool NextEndsTemplateExpr =
1179       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1180   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1181       (PreviousMustBeValue ||
1182        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1183                          tok::minusminus)))
1184     return addUnwrappedLine();
1185   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1186       isJSDeclOrStmt(Keywords, Next))
1187     return addUnwrappedLine();
1188 }
1189 
1190 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
1191   if (Style.Language == FormatStyle::LK_TableGen &&
1192       FormatTok->is(tok::pp_include)) {
1193     nextToken();
1194     if (FormatTok->is(tok::string_literal))
1195       nextToken();
1196     addUnwrappedLine();
1197     return;
1198   }
1199   switch (FormatTok->Tok.getKind()) {
1200   case tok::kw_asm:
1201     nextToken();
1202     if (FormatTok->is(tok::l_brace)) {
1203       FormatTok->setType(TT_InlineASMBrace);
1204       nextToken();
1205       while (FormatTok && FormatTok->isNot(tok::eof)) {
1206         if (FormatTok->is(tok::r_brace)) {
1207           FormatTok->setType(TT_InlineASMBrace);
1208           nextToken();
1209           addUnwrappedLine();
1210           break;
1211         }
1212         FormatTok->Finalized = true;
1213         nextToken();
1214       }
1215     }
1216     break;
1217   case tok::kw_namespace:
1218     parseNamespace();
1219     return;
1220   case tok::kw_public:
1221   case tok::kw_protected:
1222   case tok::kw_private:
1223     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1224         Style.isCSharp())
1225       nextToken();
1226     else
1227       parseAccessSpecifier();
1228     return;
1229   case tok::kw_if:
1230     if (Style.isJavaScript() && Line->MustBeDeclaration)
1231       // field/method declaration.
1232       break;
1233     parseIfThenElse();
1234     return;
1235   case tok::kw_for:
1236   case tok::kw_while:
1237     if (Style.isJavaScript() && Line->MustBeDeclaration)
1238       // field/method declaration.
1239       break;
1240     parseForOrWhileLoop();
1241     return;
1242   case tok::kw_do:
1243     if (Style.isJavaScript() && Line->MustBeDeclaration)
1244       // field/method declaration.
1245       break;
1246     parseDoWhile();
1247     return;
1248   case tok::kw_switch:
1249     if (Style.isJavaScript() && Line->MustBeDeclaration)
1250       // 'switch: string' field declaration.
1251       break;
1252     parseSwitch();
1253     return;
1254   case tok::kw_default:
1255     if (Style.isJavaScript() && Line->MustBeDeclaration)
1256       // 'default: string' field declaration.
1257       break;
1258     nextToken();
1259     if (FormatTok->is(tok::colon)) {
1260       parseLabel();
1261       return;
1262     }
1263     // e.g. "default void f() {}" in a Java interface.
1264     break;
1265   case tok::kw_case:
1266     if (Style.isJavaScript() && Line->MustBeDeclaration)
1267       // 'case: string' field declaration.
1268       break;
1269     parseCaseLabel();
1270     return;
1271   case tok::kw_try:
1272   case tok::kw___try:
1273     if (Style.isJavaScript() && Line->MustBeDeclaration)
1274       // field/method declaration.
1275       break;
1276     parseTryCatch();
1277     return;
1278   case tok::kw_extern:
1279     nextToken();
1280     if (FormatTok->Tok.is(tok::string_literal)) {
1281       nextToken();
1282       if (FormatTok->Tok.is(tok::l_brace)) {
1283         if (Style.BraceWrapping.AfterExternBlock)
1284           addUnwrappedLine();
1285         // Either we indent or for backwards compatibility we follow the
1286         // AfterExternBlock style.
1287         unsigned AddLevels =
1288             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1289                     (Style.BraceWrapping.AfterExternBlock &&
1290                      Style.IndentExternBlock ==
1291                          FormatStyle::IEBS_AfterExternBlock)
1292                 ? 1u
1293                 : 0u;
1294         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1295         addUnwrappedLine();
1296         return;
1297       }
1298     }
1299     break;
1300   case tok::kw_export:
1301     if (Style.isJavaScript()) {
1302       parseJavaScriptEs6ImportExport();
1303       return;
1304     }
1305     if (!Style.isCpp())
1306       break;
1307     // Handle C++ "(inline|export) namespace".
1308     LLVM_FALLTHROUGH;
1309   case tok::kw_inline:
1310     nextToken();
1311     if (FormatTok->Tok.is(tok::kw_namespace)) {
1312       parseNamespace();
1313       return;
1314     }
1315     break;
1316   case tok::identifier:
1317     if (FormatTok->is(TT_ForEachMacro)) {
1318       parseForOrWhileLoop();
1319       return;
1320     }
1321     if (FormatTok->is(TT_MacroBlockBegin)) {
1322       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1323                  /*MunchSemi=*/false);
1324       return;
1325     }
1326     if (FormatTok->is(Keywords.kw_import)) {
1327       if (Style.isJavaScript()) {
1328         parseJavaScriptEs6ImportExport();
1329         return;
1330       }
1331       if (Style.Language == FormatStyle::LK_Proto) {
1332         nextToken();
1333         if (FormatTok->is(tok::kw_public))
1334           nextToken();
1335         if (!FormatTok->is(tok::string_literal))
1336           return;
1337         nextToken();
1338         if (FormatTok->is(tok::semi))
1339           nextToken();
1340         addUnwrappedLine();
1341         return;
1342       }
1343       if (Style.isCpp()) {
1344         parseModuleImport();
1345         return;
1346       }
1347     }
1348     if (Style.isCpp() &&
1349         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1350                            Keywords.kw_slots, Keywords.kw_qslots)) {
1351       nextToken();
1352       if (FormatTok->is(tok::colon)) {
1353         nextToken();
1354         addUnwrappedLine();
1355         return;
1356       }
1357     }
1358     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1359       parseStatementMacro();
1360       return;
1361     }
1362     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1363       parseNamespace();
1364       return;
1365     }
1366     // In all other cases, parse the declaration.
1367     break;
1368   default:
1369     break;
1370   }
1371   do {
1372     const FormatToken *Previous = FormatTok->Previous;
1373     switch (FormatTok->Tok.getKind()) {
1374     case tok::at:
1375       nextToken();
1376       if (FormatTok->Tok.is(tok::l_brace)) {
1377         nextToken();
1378         parseBracedList();
1379         break;
1380       } else if (Style.Language == FormatStyle::LK_Java &&
1381                  FormatTok->is(Keywords.kw_interface)) {
1382         nextToken();
1383         break;
1384       }
1385       switch (FormatTok->Tok.getObjCKeywordID()) {
1386       case tok::objc_public:
1387       case tok::objc_protected:
1388       case tok::objc_package:
1389       case tok::objc_private:
1390         return parseAccessSpecifier();
1391       case tok::objc_interface:
1392       case tok::objc_implementation:
1393         return parseObjCInterfaceOrImplementation();
1394       case tok::objc_protocol:
1395         if (parseObjCProtocol())
1396           return;
1397         break;
1398       case tok::objc_end:
1399         return; // Handled by the caller.
1400       case tok::objc_optional:
1401       case tok::objc_required:
1402         nextToken();
1403         addUnwrappedLine();
1404         return;
1405       case tok::objc_autoreleasepool:
1406         nextToken();
1407         if (FormatTok->Tok.is(tok::l_brace)) {
1408           if (Style.BraceWrapping.AfterControlStatement ==
1409               FormatStyle::BWACS_Always)
1410             addUnwrappedLine();
1411           parseBlock();
1412         }
1413         addUnwrappedLine();
1414         return;
1415       case tok::objc_synchronized:
1416         nextToken();
1417         if (FormatTok->Tok.is(tok::l_paren))
1418           // Skip synchronization object
1419           parseParens();
1420         if (FormatTok->Tok.is(tok::l_brace)) {
1421           if (Style.BraceWrapping.AfterControlStatement ==
1422               FormatStyle::BWACS_Always)
1423             addUnwrappedLine();
1424           parseBlock();
1425         }
1426         addUnwrappedLine();
1427         return;
1428       case tok::objc_try:
1429         // This branch isn't strictly necessary (the kw_try case below would
1430         // do this too after the tok::at is parsed above).  But be explicit.
1431         parseTryCatch();
1432         return;
1433       default:
1434         break;
1435       }
1436       break;
1437     case tok::kw_concept:
1438       parseConcept();
1439       return;
1440     case tok::kw_requires:
1441       parseRequires();
1442       break;
1443     case tok::kw_enum:
1444       // Ignore if this is part of "template <enum ...".
1445       if (Previous && Previous->is(tok::less)) {
1446         nextToken();
1447         break;
1448       }
1449 
1450       // parseEnum falls through and does not yet add an unwrapped line as an
1451       // enum definition can start a structural element.
1452       if (!parseEnum())
1453         break;
1454       // This only applies for C++.
1455       if (!Style.isCpp()) {
1456         addUnwrappedLine();
1457         return;
1458       }
1459       break;
1460     case tok::kw_typedef:
1461       nextToken();
1462       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1463                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1464                              Keywords.kw_CF_CLOSED_ENUM,
1465                              Keywords.kw_NS_CLOSED_ENUM))
1466         parseEnum();
1467       break;
1468     case tok::kw_struct:
1469     case tok::kw_union:
1470     case tok::kw_class:
1471       if (parseStructLike()) {
1472         return;
1473       }
1474       break;
1475     case tok::period:
1476       nextToken();
1477       // In Java, classes have an implicit static member "class".
1478       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1479           FormatTok->is(tok::kw_class))
1480         nextToken();
1481       if (Style.isJavaScript() && FormatTok &&
1482           FormatTok->Tok.getIdentifierInfo())
1483         // JavaScript only has pseudo keywords, all keywords are allowed to
1484         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1485         nextToken();
1486       break;
1487     case tok::semi:
1488       nextToken();
1489       addUnwrappedLine();
1490       return;
1491     case tok::r_brace:
1492       addUnwrappedLine();
1493       return;
1494     case tok::l_paren: {
1495       parseParens();
1496       // Break the unwrapped line if a K&R C function definition has a parameter
1497       // declaration.
1498       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1499         break;
1500       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1501         addUnwrappedLine();
1502         return;
1503       }
1504       break;
1505     }
1506     case tok::kw_operator:
1507       nextToken();
1508       if (FormatTok->isBinaryOperator())
1509         nextToken();
1510       break;
1511     case tok::caret:
1512       nextToken();
1513       if (FormatTok->Tok.isAnyIdentifier() ||
1514           FormatTok->isSimpleTypeSpecifier())
1515         nextToken();
1516       if (FormatTok->is(tok::l_paren))
1517         parseParens();
1518       if (FormatTok->is(tok::l_brace))
1519         parseChildBlock();
1520       break;
1521     case tok::l_brace:
1522       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1523         // A block outside of parentheses must be the last part of a
1524         // structural element.
1525         // FIXME: Figure out cases where this is not true, and add projections
1526         // for them (the one we know is missing are lambdas).
1527         if (Style.BraceWrapping.AfterFunction)
1528           addUnwrappedLine();
1529         FormatTok->setType(TT_FunctionLBrace);
1530         parseBlock();
1531         addUnwrappedLine();
1532         return;
1533       }
1534       // Otherwise this was a braced init list, and the structural
1535       // element continues.
1536       break;
1537     case tok::kw_try:
1538       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1539         // field/method declaration.
1540         nextToken();
1541         break;
1542       }
1543       // We arrive here when parsing function-try blocks.
1544       if (Style.BraceWrapping.AfterFunction)
1545         addUnwrappedLine();
1546       parseTryCatch();
1547       return;
1548     case tok::identifier: {
1549       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1550           Line->MustBeDeclaration) {
1551         addUnwrappedLine();
1552         parseCSharpGenericTypeConstraint();
1553         break;
1554       }
1555       if (FormatTok->is(TT_MacroBlockEnd)) {
1556         addUnwrappedLine();
1557         return;
1558       }
1559 
1560       // Function declarations (as opposed to function expressions) are parsed
1561       // on their own unwrapped line by continuing this loop. Function
1562       // expressions (functions that are not on their own line) must not create
1563       // a new unwrapped line, so they are special cased below.
1564       size_t TokenCount = Line->Tokens.size();
1565       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1566           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1567                                                      Keywords.kw_async)))) {
1568         tryToParseJSFunction();
1569         break;
1570       }
1571       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1572           FormatTok->is(Keywords.kw_interface)) {
1573         if (Style.isJavaScript()) {
1574           // In JavaScript/TypeScript, "interface" can be used as a standalone
1575           // identifier, e.g. in `var interface = 1;`. If "interface" is
1576           // followed by another identifier, it is very like to be an actual
1577           // interface declaration.
1578           unsigned StoredPosition = Tokens->getPosition();
1579           FormatToken *Next = Tokens->getNextToken();
1580           FormatTok = Tokens->setPosition(StoredPosition);
1581           if (!mustBeJSIdent(Keywords, Next)) {
1582             nextToken();
1583             break;
1584           }
1585         }
1586         parseRecord();
1587         addUnwrappedLine();
1588         return;
1589       }
1590 
1591       if (FormatTok->is(Keywords.kw_interface)) {
1592         if (parseStructLike()) {
1593           return;
1594         }
1595         break;
1596       }
1597 
1598       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1599         parseStatementMacro();
1600         return;
1601       }
1602 
1603       // See if the following token should start a new unwrapped line.
1604       StringRef Text = FormatTok->TokenText;
1605       nextToken();
1606 
1607       // JS doesn't have macros, and within classes colons indicate fields, not
1608       // labels.
1609       if (Style.isJavaScript())
1610         break;
1611 
1612       TokenCount = Line->Tokens.size();
1613       if (TokenCount == 1 ||
1614           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1615         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1616           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1617           parseLabel(!Style.IndentGotoLabels);
1618           return;
1619         }
1620         // Recognize function-like macro usages without trailing semicolon as
1621         // well as free-standing macros like Q_OBJECT.
1622         bool FunctionLike = FormatTok->is(tok::l_paren);
1623         if (FunctionLike)
1624           parseParens();
1625 
1626         bool FollowedByNewline =
1627             CommentsBeforeNextToken.empty()
1628                 ? FormatTok->NewlinesBefore > 0
1629                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1630 
1631         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1632             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1633           addUnwrappedLine();
1634           return;
1635         }
1636       }
1637       break;
1638     }
1639     case tok::equal:
1640       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1641       // TT_FatArrow. They always start an expression or a child block if
1642       // followed by a curly brace.
1643       if (FormatTok->is(TT_FatArrow)) {
1644         nextToken();
1645         if (FormatTok->is(tok::l_brace)) {
1646           // C# may break after => if the next character is a newline.
1647           if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1648             // calling `addUnwrappedLine()` here causes odd parsing errors.
1649             FormatTok->MustBreakBefore = true;
1650           }
1651           parseChildBlock();
1652         }
1653         break;
1654       }
1655 
1656       nextToken();
1657       if (FormatTok->Tok.is(tok::l_brace)) {
1658         // Block kind should probably be set to BK_BracedInit for any language.
1659         // C# needs this change to ensure that array initialisers and object
1660         // initialisers are indented the same way.
1661         if (Style.isCSharp())
1662           FormatTok->setBlockKind(BK_BracedInit);
1663         nextToken();
1664         parseBracedList();
1665       } else if (Style.Language == FormatStyle::LK_Proto &&
1666                  FormatTok->Tok.is(tok::less)) {
1667         nextToken();
1668         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1669                         /*ClosingBraceKind=*/tok::greater);
1670       }
1671       break;
1672     case tok::l_square:
1673       parseSquare();
1674       break;
1675     case tok::kw_new:
1676       parseNew();
1677       break;
1678     default:
1679       nextToken();
1680       break;
1681     }
1682   } while (!eof());
1683 }
1684 
1685 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1686   assert(FormatTok->is(tok::l_brace));
1687   if (!Style.isCSharp())
1688     return false;
1689   // See if it's a property accessor.
1690   if (FormatTok->Previous->isNot(tok::identifier))
1691     return false;
1692 
1693   // See if we are inside a property accessor.
1694   //
1695   // Record the current tokenPosition so that we can advance and
1696   // reset the current token. `Next` is not set yet so we need
1697   // another way to advance along the token stream.
1698   unsigned int StoredPosition = Tokens->getPosition();
1699   FormatToken *Tok = Tokens->getNextToken();
1700 
1701   // A trivial property accessor is of the form:
1702   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1703   // Track these as they do not require line breaks to be introduced.
1704   bool HasGetOrSet = false;
1705   bool IsTrivialPropertyAccessor = true;
1706   while (!eof()) {
1707     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1708                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1709                      Keywords.kw_set)) {
1710       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1711         HasGetOrSet = true;
1712       Tok = Tokens->getNextToken();
1713       continue;
1714     }
1715     if (Tok->isNot(tok::r_brace))
1716       IsTrivialPropertyAccessor = false;
1717     break;
1718   }
1719 
1720   if (!HasGetOrSet) {
1721     Tokens->setPosition(StoredPosition);
1722     return false;
1723   }
1724 
1725   // Try to parse the property accessor:
1726   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1727   Tokens->setPosition(StoredPosition);
1728   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1729     addUnwrappedLine();
1730   nextToken();
1731   do {
1732     switch (FormatTok->Tok.getKind()) {
1733     case tok::r_brace:
1734       nextToken();
1735       if (FormatTok->is(tok::equal)) {
1736         while (!eof() && FormatTok->isNot(tok::semi))
1737           nextToken();
1738         nextToken();
1739       }
1740       addUnwrappedLine();
1741       return true;
1742     case tok::l_brace:
1743       ++Line->Level;
1744       parseBlock(/*MustBeDeclaration=*/true);
1745       addUnwrappedLine();
1746       --Line->Level;
1747       break;
1748     case tok::equal:
1749       if (FormatTok->is(TT_FatArrow)) {
1750         ++Line->Level;
1751         do {
1752           nextToken();
1753         } while (!eof() && FormatTok->isNot(tok::semi));
1754         nextToken();
1755         addUnwrappedLine();
1756         --Line->Level;
1757         break;
1758       }
1759       nextToken();
1760       break;
1761     default:
1762       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1763           !IsTrivialPropertyAccessor) {
1764         // Non-trivial get/set needs to be on its own line.
1765         addUnwrappedLine();
1766       }
1767       nextToken();
1768     }
1769   } while (!eof());
1770 
1771   // Unreachable for well-formed code (paired '{' and '}').
1772   return true;
1773 }
1774 
1775 bool UnwrappedLineParser::tryToParseLambda() {
1776   if (!Style.isCpp()) {
1777     nextToken();
1778     return false;
1779   }
1780   assert(FormatTok->is(tok::l_square));
1781   FormatToken &LSquare = *FormatTok;
1782   if (!tryToParseLambdaIntroducer())
1783     return false;
1784 
1785   bool SeenArrow = false;
1786 
1787   while (FormatTok->isNot(tok::l_brace)) {
1788     if (FormatTok->isSimpleTypeSpecifier()) {
1789       nextToken();
1790       continue;
1791     }
1792     switch (FormatTok->Tok.getKind()) {
1793     case tok::l_brace:
1794       break;
1795     case tok::l_paren:
1796       parseParens();
1797       break;
1798     case tok::amp:
1799     case tok::star:
1800     case tok::kw_const:
1801     case tok::comma:
1802     case tok::less:
1803     case tok::greater:
1804     case tok::identifier:
1805     case tok::numeric_constant:
1806     case tok::coloncolon:
1807     case tok::kw_class:
1808     case tok::kw_mutable:
1809     case tok::kw_noexcept:
1810     case tok::kw_template:
1811     case tok::kw_typename:
1812       nextToken();
1813       break;
1814     // Specialization of a template with an integer parameter can contain
1815     // arithmetic, logical, comparison and ternary operators.
1816     //
1817     // FIXME: This also accepts sequences of operators that are not in the scope
1818     // of a template argument list.
1819     //
1820     // In a C++ lambda a template type can only occur after an arrow. We use
1821     // this as an heuristic to distinguish between Objective-C expressions
1822     // followed by an `a->b` expression, such as:
1823     // ([obj func:arg] + a->b)
1824     // Otherwise the code below would parse as a lambda.
1825     //
1826     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1827     // explicit template lists: []<bool b = true && false>(U &&u){}
1828     case tok::plus:
1829     case tok::minus:
1830     case tok::exclaim:
1831     case tok::tilde:
1832     case tok::slash:
1833     case tok::percent:
1834     case tok::lessless:
1835     case tok::pipe:
1836     case tok::pipepipe:
1837     case tok::ampamp:
1838     case tok::caret:
1839     case tok::equalequal:
1840     case tok::exclaimequal:
1841     case tok::greaterequal:
1842     case tok::lessequal:
1843     case tok::question:
1844     case tok::colon:
1845     case tok::ellipsis:
1846     case tok::kw_true:
1847     case tok::kw_false:
1848       if (SeenArrow) {
1849         nextToken();
1850         break;
1851       }
1852       return true;
1853     case tok::arrow:
1854       // This might or might not actually be a lambda arrow (this could be an
1855       // ObjC method invocation followed by a dereferencing arrow). We might
1856       // reset this back to TT_Unknown in TokenAnnotator.
1857       FormatTok->setType(TT_LambdaArrow);
1858       SeenArrow = true;
1859       nextToken();
1860       break;
1861     default:
1862       return true;
1863     }
1864   }
1865   FormatTok->setType(TT_LambdaLBrace);
1866   LSquare.setType(TT_LambdaLSquare);
1867   parseChildBlock();
1868   return true;
1869 }
1870 
1871 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1872   const FormatToken *Previous = FormatTok->Previous;
1873   if (Previous &&
1874       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1875                          tok::kw_delete, tok::l_square) ||
1876        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1877        Previous->isSimpleTypeSpecifier())) {
1878     nextToken();
1879     return false;
1880   }
1881   nextToken();
1882   if (FormatTok->is(tok::l_square)) {
1883     return false;
1884   }
1885   parseSquare(/*LambdaIntroducer=*/true);
1886   return true;
1887 }
1888 
1889 void UnwrappedLineParser::tryToParseJSFunction() {
1890   assert(FormatTok->is(Keywords.kw_function) ||
1891          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1892   if (FormatTok->is(Keywords.kw_async))
1893     nextToken();
1894   // Consume "function".
1895   nextToken();
1896 
1897   // Consume * (generator function). Treat it like C++'s overloaded operators.
1898   if (FormatTok->is(tok::star)) {
1899     FormatTok->setType(TT_OverloadedOperator);
1900     nextToken();
1901   }
1902 
1903   // Consume function name.
1904   if (FormatTok->is(tok::identifier))
1905     nextToken();
1906 
1907   if (FormatTok->isNot(tok::l_paren))
1908     return;
1909 
1910   // Parse formal parameter list.
1911   parseParens();
1912 
1913   if (FormatTok->is(tok::colon)) {
1914     // Parse a type definition.
1915     nextToken();
1916 
1917     // Eat the type declaration. For braced inline object types, balance braces,
1918     // otherwise just parse until finding an l_brace for the function body.
1919     if (FormatTok->is(tok::l_brace))
1920       tryToParseBracedList();
1921     else
1922       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1923         nextToken();
1924   }
1925 
1926   if (FormatTok->is(tok::semi))
1927     return;
1928 
1929   parseChildBlock();
1930 }
1931 
1932 bool UnwrappedLineParser::tryToParseBracedList() {
1933   if (FormatTok->is(BK_Unknown))
1934     calculateBraceTypes();
1935   assert(FormatTok->isNot(BK_Unknown));
1936   if (FormatTok->is(BK_Block))
1937     return false;
1938   nextToken();
1939   parseBracedList();
1940   return true;
1941 }
1942 
1943 bool UnwrappedLineParser::tryToParseCSharpLambda() {
1944   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1945   // TT_FatArrow. They always start an expression or a child block if
1946   // followed by a curly brace.
1947   nextToken();
1948   if (FormatTok->isNot(tok::l_brace))
1949     return false;
1950   // C# may break after => if the next character is a newline.
1951   if (Style.BraceWrapping.AfterFunction) {
1952     // calling `addUnwrappedLine()` here causes odd parsing errors.
1953     FormatTok->MustBreakBefore = true;
1954   }
1955   parseChildBlock();
1956   return true;
1957 }
1958 
1959 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1960                                           bool IsEnum,
1961                                           tok::TokenKind ClosingBraceKind) {
1962   bool HasError = false;
1963 
1964   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1965   // replace this by using parseAssignmentExpression() inside.
1966   do {
1967     if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
1968       if (tryToParseCSharpLambda())
1969         continue;
1970     if (Style.isJavaScript()) {
1971       if (FormatTok->is(Keywords.kw_function) ||
1972           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1973         tryToParseJSFunction();
1974         continue;
1975       }
1976       if (FormatTok->is(TT_FatArrow)) {
1977         nextToken();
1978         // Fat arrows can be followed by simple expressions or by child blocks
1979         // in curly braces.
1980         if (FormatTok->is(tok::l_brace)) {
1981           parseChildBlock();
1982           continue;
1983         }
1984       }
1985       if (FormatTok->is(tok::l_brace)) {
1986         // Could be a method inside of a braced list `{a() { return 1; }}`.
1987         if (tryToParseBracedList())
1988           continue;
1989         parseChildBlock();
1990       }
1991     }
1992     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1993       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1994         addUnwrappedLine();
1995       nextToken();
1996       return !HasError;
1997     }
1998     switch (FormatTok->Tok.getKind()) {
1999     case tok::caret:
2000       nextToken();
2001       if (FormatTok->is(tok::l_brace)) {
2002         parseChildBlock();
2003       }
2004       break;
2005     case tok::l_square:
2006       if (Style.isCSharp())
2007         parseSquare();
2008       else
2009         tryToParseLambda();
2010       break;
2011     case tok::l_paren:
2012       parseParens();
2013       // JavaScript can just have free standing methods and getters/setters in
2014       // object literals. Detect them by a "{" following ")".
2015       if (Style.isJavaScript()) {
2016         if (FormatTok->is(tok::l_brace))
2017           parseChildBlock();
2018         break;
2019       }
2020       break;
2021     case tok::l_brace:
2022       // Assume there are no blocks inside a braced init list apart
2023       // from the ones we explicitly parse out (like lambdas).
2024       FormatTok->setBlockKind(BK_BracedInit);
2025       nextToken();
2026       parseBracedList();
2027       break;
2028     case tok::less:
2029       if (Style.Language == FormatStyle::LK_Proto) {
2030         nextToken();
2031         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2032                         /*ClosingBraceKind=*/tok::greater);
2033       } else {
2034         nextToken();
2035       }
2036       break;
2037     case tok::semi:
2038       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2039       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2040       // used for error recovery if we have otherwise determined that this is
2041       // a braced list.
2042       if (Style.isJavaScript()) {
2043         nextToken();
2044         break;
2045       }
2046       HasError = true;
2047       if (!ContinueOnSemicolons)
2048         return !HasError;
2049       nextToken();
2050       break;
2051     case tok::comma:
2052       nextToken();
2053       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2054         addUnwrappedLine();
2055       break;
2056     default:
2057       nextToken();
2058       break;
2059     }
2060   } while (!eof());
2061   return false;
2062 }
2063 
2064 void UnwrappedLineParser::parseParens() {
2065   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2066   nextToken();
2067   do {
2068     switch (FormatTok->Tok.getKind()) {
2069     case tok::l_paren:
2070       parseParens();
2071       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2072         parseChildBlock();
2073       break;
2074     case tok::r_paren:
2075       nextToken();
2076       return;
2077     case tok::r_brace:
2078       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2079       return;
2080     case tok::l_square:
2081       tryToParseLambda();
2082       break;
2083     case tok::l_brace:
2084       if (!tryToParseBracedList())
2085         parseChildBlock();
2086       break;
2087     case tok::at:
2088       nextToken();
2089       if (FormatTok->Tok.is(tok::l_brace)) {
2090         nextToken();
2091         parseBracedList();
2092       }
2093       break;
2094     case tok::equal:
2095       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2096         tryToParseCSharpLambda();
2097       else
2098         nextToken();
2099       break;
2100     case tok::kw_class:
2101       if (Style.isJavaScript())
2102         parseRecord(/*ParseAsExpr=*/true);
2103       else
2104         nextToken();
2105       break;
2106     case tok::identifier:
2107       if (Style.isJavaScript() &&
2108           (FormatTok->is(Keywords.kw_function) ||
2109            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2110         tryToParseJSFunction();
2111       else
2112         nextToken();
2113       break;
2114     default:
2115       nextToken();
2116       break;
2117     }
2118   } while (!eof());
2119 }
2120 
2121 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2122   if (!LambdaIntroducer) {
2123     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2124     if (tryToParseLambda())
2125       return;
2126   }
2127   do {
2128     switch (FormatTok->Tok.getKind()) {
2129     case tok::l_paren:
2130       parseParens();
2131       break;
2132     case tok::r_square:
2133       nextToken();
2134       return;
2135     case tok::r_brace:
2136       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2137       return;
2138     case tok::l_square:
2139       parseSquare();
2140       break;
2141     case tok::l_brace: {
2142       if (!tryToParseBracedList())
2143         parseChildBlock();
2144       break;
2145     }
2146     case tok::at:
2147       nextToken();
2148       if (FormatTok->Tok.is(tok::l_brace)) {
2149         nextToken();
2150         parseBracedList();
2151       }
2152       break;
2153     default:
2154       nextToken();
2155       break;
2156     }
2157   } while (!eof());
2158 }
2159 
2160 void UnwrappedLineParser::parseIfThenElse() {
2161   auto HandleAttributes = [this]() {
2162     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2163     if (FormatTok->is(TT_AttributeMacro))
2164       nextToken();
2165     // Handle [[likely]] / [[unlikely]] attributes.
2166     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2167       parseSquare();
2168   };
2169 
2170   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2171   nextToken();
2172   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2173     nextToken();
2174   if (FormatTok->Tok.is(tok::l_paren))
2175     parseParens();
2176   HandleAttributes();
2177   bool NeedsUnwrappedLine = false;
2178   if (FormatTok->Tok.is(tok::l_brace)) {
2179     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2180     parseBlock();
2181     if (Style.BraceWrapping.BeforeElse)
2182       addUnwrappedLine();
2183     else
2184       NeedsUnwrappedLine = true;
2185   } else {
2186     addUnwrappedLine();
2187     ++Line->Level;
2188     parseStructuralElement();
2189     --Line->Level;
2190   }
2191   if (FormatTok->Tok.is(tok::kw_else)) {
2192     nextToken();
2193     HandleAttributes();
2194     if (FormatTok->Tok.is(tok::l_brace)) {
2195       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2196       parseBlock();
2197       addUnwrappedLine();
2198     } else if (FormatTok->Tok.is(tok::kw_if)) {
2199       FormatToken *Previous = Tokens->getPreviousToken();
2200       bool PrecededByComment = Previous && Previous->is(tok::comment);
2201       if (PrecededByComment) {
2202         addUnwrappedLine();
2203         ++Line->Level;
2204       }
2205       parseIfThenElse();
2206       if (PrecededByComment)
2207         --Line->Level;
2208     } else {
2209       addUnwrappedLine();
2210       ++Line->Level;
2211       parseStructuralElement();
2212       if (FormatTok->is(tok::eof))
2213         addUnwrappedLine();
2214       --Line->Level;
2215     }
2216   } else if (NeedsUnwrappedLine) {
2217     addUnwrappedLine();
2218   }
2219 }
2220 
2221 void UnwrappedLineParser::parseTryCatch() {
2222   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2223   nextToken();
2224   bool NeedsUnwrappedLine = false;
2225   if (FormatTok->is(tok::colon)) {
2226     // We are in a function try block, what comes is an initializer list.
2227     nextToken();
2228 
2229     // In case identifiers were removed by clang-tidy, what might follow is
2230     // multiple commas in sequence - before the first identifier.
2231     while (FormatTok->is(tok::comma))
2232       nextToken();
2233 
2234     while (FormatTok->is(tok::identifier)) {
2235       nextToken();
2236       if (FormatTok->is(tok::l_paren))
2237         parseParens();
2238       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2239           FormatTok->is(tok::l_brace)) {
2240         do {
2241           nextToken();
2242         } while (!FormatTok->is(tok::r_brace));
2243         nextToken();
2244       }
2245 
2246       // In case identifiers were removed by clang-tidy, what might follow is
2247       // multiple commas in sequence - after the first identifier.
2248       while (FormatTok->is(tok::comma))
2249         nextToken();
2250     }
2251   }
2252   // Parse try with resource.
2253   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2254     parseParens();
2255   }
2256   if (FormatTok->is(tok::l_brace)) {
2257     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2258     parseBlock();
2259     if (Style.BraceWrapping.BeforeCatch) {
2260       addUnwrappedLine();
2261     } else {
2262       NeedsUnwrappedLine = true;
2263     }
2264   } else if (!FormatTok->is(tok::kw_catch)) {
2265     // The C++ standard requires a compound-statement after a try.
2266     // If there's none, we try to assume there's a structuralElement
2267     // and try to continue.
2268     addUnwrappedLine();
2269     ++Line->Level;
2270     parseStructuralElement();
2271     --Line->Level;
2272   }
2273   while (1) {
2274     if (FormatTok->is(tok::at))
2275       nextToken();
2276     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2277                              tok::kw___finally) ||
2278           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2279            FormatTok->is(Keywords.kw_finally)) ||
2280           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2281            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2282       break;
2283     nextToken();
2284     while (FormatTok->isNot(tok::l_brace)) {
2285       if (FormatTok->is(tok::l_paren)) {
2286         parseParens();
2287         continue;
2288       }
2289       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2290         return;
2291       nextToken();
2292     }
2293     NeedsUnwrappedLine = false;
2294     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2295     parseBlock();
2296     if (Style.BraceWrapping.BeforeCatch)
2297       addUnwrappedLine();
2298     else
2299       NeedsUnwrappedLine = true;
2300   }
2301   if (NeedsUnwrappedLine)
2302     addUnwrappedLine();
2303 }
2304 
2305 void UnwrappedLineParser::parseNamespace() {
2306   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2307          "'namespace' expected");
2308 
2309   const FormatToken &InitialToken = *FormatTok;
2310   nextToken();
2311   if (InitialToken.is(TT_NamespaceMacro)) {
2312     parseParens();
2313   } else {
2314     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2315                               tok::l_square, tok::period)) {
2316       if (FormatTok->is(tok::l_square))
2317         parseSquare();
2318       else
2319         nextToken();
2320     }
2321   }
2322   if (FormatTok->Tok.is(tok::l_brace)) {
2323     if (ShouldBreakBeforeBrace(Style, InitialToken))
2324       addUnwrappedLine();
2325 
2326     unsigned AddLevels =
2327         Style.NamespaceIndentation == FormatStyle::NI_All ||
2328                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2329                  DeclarationScopeStack.size() > 1)
2330             ? 1u
2331             : 0u;
2332     bool ManageWhitesmithsBraces =
2333         AddLevels == 0u &&
2334         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2335 
2336     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2337     // the whole block.
2338     if (ManageWhitesmithsBraces)
2339       ++Line->Level;
2340 
2341     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2342                /*MunchSemi=*/true,
2343                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2344 
2345     // Munch the semicolon after a namespace. This is more common than one would
2346     // think. Putting the semicolon into its own line is very ugly.
2347     if (FormatTok->Tok.is(tok::semi))
2348       nextToken();
2349 
2350     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2351 
2352     if (ManageWhitesmithsBraces)
2353       --Line->Level;
2354   }
2355   // FIXME: Add error handling.
2356 }
2357 
2358 void UnwrappedLineParser::parseNew() {
2359   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2360   nextToken();
2361 
2362   if (Style.isCSharp()) {
2363     do {
2364       if (FormatTok->is(tok::l_brace))
2365         parseBracedList();
2366 
2367       if (FormatTok->isOneOf(tok::semi, tok::comma))
2368         return;
2369 
2370       nextToken();
2371     } while (!eof());
2372   }
2373 
2374   if (Style.Language != FormatStyle::LK_Java)
2375     return;
2376 
2377   // In Java, we can parse everything up to the parens, which aren't optional.
2378   do {
2379     // There should not be a ;, { or } before the new's open paren.
2380     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2381       return;
2382 
2383     // Consume the parens.
2384     if (FormatTok->is(tok::l_paren)) {
2385       parseParens();
2386 
2387       // If there is a class body of an anonymous class, consume that as child.
2388       if (FormatTok->is(tok::l_brace))
2389         parseChildBlock();
2390       return;
2391     }
2392     nextToken();
2393   } while (!eof());
2394 }
2395 
2396 void UnwrappedLineParser::parseForOrWhileLoop() {
2397   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2398          "'for', 'while' or foreach macro expected");
2399   nextToken();
2400   // JS' for await ( ...
2401   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2402     nextToken();
2403   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2404     nextToken();
2405   if (FormatTok->Tok.is(tok::l_paren))
2406     parseParens();
2407   if (FormatTok->Tok.is(tok::l_brace)) {
2408     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2409     parseBlock();
2410     addUnwrappedLine();
2411   } else {
2412     addUnwrappedLine();
2413     ++Line->Level;
2414     parseStructuralElement();
2415     --Line->Level;
2416   }
2417 }
2418 
2419 void UnwrappedLineParser::parseDoWhile() {
2420   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2421   nextToken();
2422   if (FormatTok->Tok.is(tok::l_brace)) {
2423     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2424     parseBlock();
2425     if (Style.BraceWrapping.BeforeWhile)
2426       addUnwrappedLine();
2427   } else {
2428     addUnwrappedLine();
2429     ++Line->Level;
2430     parseStructuralElement();
2431     --Line->Level;
2432   }
2433 
2434   // FIXME: Add error handling.
2435   if (!FormatTok->Tok.is(tok::kw_while)) {
2436     addUnwrappedLine();
2437     return;
2438   }
2439 
2440   // If in Whitesmiths mode, the line with the while() needs to be indented
2441   // to the same level as the block.
2442   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2443     ++Line->Level;
2444 
2445   nextToken();
2446   parseStructuralElement();
2447 }
2448 
2449 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2450   nextToken();
2451   unsigned OldLineLevel = Line->Level;
2452   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2453     --Line->Level;
2454   if (LeftAlignLabel)
2455     Line->Level = 0;
2456 
2457   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2458       FormatTok->Tok.is(tok::l_brace)) {
2459 
2460     CompoundStatementIndenter Indenter(this, Line->Level,
2461                                        Style.BraceWrapping.AfterCaseLabel,
2462                                        Style.BraceWrapping.IndentBraces);
2463     parseBlock();
2464     if (FormatTok->Tok.is(tok::kw_break)) {
2465       if (Style.BraceWrapping.AfterControlStatement ==
2466           FormatStyle::BWACS_Always) {
2467         addUnwrappedLine();
2468         if (!Style.IndentCaseBlocks &&
2469             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2470           Line->Level++;
2471         }
2472       }
2473       parseStructuralElement();
2474     }
2475     addUnwrappedLine();
2476   } else {
2477     if (FormatTok->is(tok::semi))
2478       nextToken();
2479     addUnwrappedLine();
2480   }
2481   Line->Level = OldLineLevel;
2482   if (FormatTok->isNot(tok::l_brace)) {
2483     parseStructuralElement();
2484     addUnwrappedLine();
2485   }
2486 }
2487 
2488 void UnwrappedLineParser::parseCaseLabel() {
2489   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2490 
2491   // FIXME: fix handling of complex expressions here.
2492   do {
2493     nextToken();
2494   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2495   parseLabel();
2496 }
2497 
2498 void UnwrappedLineParser::parseSwitch() {
2499   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2500   nextToken();
2501   if (FormatTok->Tok.is(tok::l_paren))
2502     parseParens();
2503   if (FormatTok->Tok.is(tok::l_brace)) {
2504     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2505     parseBlock();
2506     addUnwrappedLine();
2507   } else {
2508     addUnwrappedLine();
2509     ++Line->Level;
2510     parseStructuralElement();
2511     --Line->Level;
2512   }
2513 }
2514 
2515 void UnwrappedLineParser::parseAccessSpecifier() {
2516   nextToken();
2517   // Understand Qt's slots.
2518   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2519     nextToken();
2520   // Otherwise, we don't know what it is, and we'd better keep the next token.
2521   if (FormatTok->Tok.is(tok::colon))
2522     nextToken();
2523   addUnwrappedLine();
2524 }
2525 
2526 void UnwrappedLineParser::parseConcept() {
2527   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2528   nextToken();
2529   if (!FormatTok->Tok.is(tok::identifier))
2530     return;
2531   nextToken();
2532   if (!FormatTok->Tok.is(tok::equal))
2533     return;
2534   nextToken();
2535   if (FormatTok->Tok.is(tok::kw_requires)) {
2536     nextToken();
2537     parseRequiresExpression(Line->Level);
2538   } else {
2539     parseConstraintExpression(Line->Level);
2540   }
2541 }
2542 
2543 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2544   // requires (R range)
2545   if (FormatTok->Tok.is(tok::l_paren)) {
2546     parseParens();
2547     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2548       addUnwrappedLine();
2549       --Line->Level;
2550     }
2551   }
2552 
2553   if (FormatTok->Tok.is(tok::l_brace)) {
2554     if (Style.BraceWrapping.AfterFunction)
2555       addUnwrappedLine();
2556     FormatTok->setType(TT_FunctionLBrace);
2557     parseBlock();
2558     addUnwrappedLine();
2559   } else {
2560     parseConstraintExpression(OriginalLevel);
2561   }
2562 }
2563 
2564 void UnwrappedLineParser::parseConstraintExpression(
2565     unsigned int OriginalLevel) {
2566   // requires Id<T> && Id<T> || Id<T>
2567   while (
2568       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2569     nextToken();
2570     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2571                               tok::greater, tok::comma, tok::ellipsis)) {
2572       if (FormatTok->Tok.is(tok::less)) {
2573         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2574                         /*ClosingBraceKind=*/tok::greater);
2575         continue;
2576       }
2577       nextToken();
2578     }
2579     if (FormatTok->Tok.is(tok::kw_requires)) {
2580       parseRequiresExpression(OriginalLevel);
2581     }
2582     if (FormatTok->Tok.is(tok::less)) {
2583       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2584                       /*ClosingBraceKind=*/tok::greater);
2585     }
2586 
2587     if (FormatTok->Tok.is(tok::l_paren)) {
2588       parseParens();
2589     }
2590     if (FormatTok->Tok.is(tok::l_brace)) {
2591       if (Style.BraceWrapping.AfterFunction)
2592         addUnwrappedLine();
2593       FormatTok->setType(TT_FunctionLBrace);
2594       parseBlock();
2595     }
2596     if (FormatTok->Tok.is(tok::semi)) {
2597       // Eat any trailing semi.
2598       nextToken();
2599       addUnwrappedLine();
2600     }
2601     if (FormatTok->Tok.is(tok::colon)) {
2602       return;
2603     }
2604     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2605       if (FormatTok->Previous &&
2606           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2607                                         tok::coloncolon)) {
2608         addUnwrappedLine();
2609       }
2610       if (Style.IndentRequires && OriginalLevel != Line->Level) {
2611         --Line->Level;
2612       }
2613       break;
2614     } else {
2615       FormatTok->setType(TT_ConstraintJunctions);
2616     }
2617 
2618     nextToken();
2619   }
2620 }
2621 
2622 void UnwrappedLineParser::parseRequires() {
2623   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2624 
2625   unsigned OriginalLevel = Line->Level;
2626   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2627     addUnwrappedLine();
2628     if (Style.IndentRequires) {
2629       Line->Level++;
2630     }
2631   }
2632   nextToken();
2633 
2634   parseRequiresExpression(OriginalLevel);
2635 }
2636 
2637 bool UnwrappedLineParser::parseEnum() {
2638   // Won't be 'enum' for NS_ENUMs.
2639   if (FormatTok->Tok.is(tok::kw_enum))
2640     nextToken();
2641 
2642   const FormatToken &InitialToken = *FormatTok;
2643 
2644   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2645   // declarations. An "enum" keyword followed by a colon would be a syntax
2646   // error and thus assume it is just an identifier.
2647   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
2648     return false;
2649 
2650   // In protobuf, "enum" can be used as a field name.
2651   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2652     return false;
2653 
2654   // Eat up enum class ...
2655   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2656     nextToken();
2657 
2658   while (FormatTok->Tok.getIdentifierInfo() ||
2659          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2660                             tok::greater, tok::comma, tok::question)) {
2661     nextToken();
2662     // We can have macros or attributes in between 'enum' and the enum name.
2663     if (FormatTok->is(tok::l_paren))
2664       parseParens();
2665     if (FormatTok->is(tok::identifier)) {
2666       nextToken();
2667       // If there are two identifiers in a row, this is likely an elaborate
2668       // return type. In Java, this can be "implements", etc.
2669       if (Style.isCpp() && FormatTok->is(tok::identifier))
2670         return false;
2671     }
2672   }
2673 
2674   // Just a declaration or something is wrong.
2675   if (FormatTok->isNot(tok::l_brace))
2676     return true;
2677   FormatTok->setBlockKind(BK_Block);
2678 
2679   if (Style.Language == FormatStyle::LK_Java) {
2680     // Java enums are different.
2681     parseJavaEnumBody();
2682     return true;
2683   }
2684   if (Style.Language == FormatStyle::LK_Proto) {
2685     parseBlock(/*MustBeDeclaration=*/true);
2686     return true;
2687   }
2688 
2689   if (!Style.AllowShortEnumsOnASingleLine &&
2690       ShouldBreakBeforeBrace(Style, InitialToken))
2691     addUnwrappedLine();
2692   // Parse enum body.
2693   nextToken();
2694   if (!Style.AllowShortEnumsOnASingleLine) {
2695     addUnwrappedLine();
2696     Line->Level += 1;
2697   }
2698   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2699                                    /*IsEnum=*/true);
2700   if (!Style.AllowShortEnumsOnASingleLine)
2701     Line->Level -= 1;
2702   if (HasError) {
2703     if (FormatTok->is(tok::semi))
2704       nextToken();
2705     addUnwrappedLine();
2706   }
2707   return true;
2708 
2709   // There is no addUnwrappedLine() here so that we fall through to parsing a
2710   // structural element afterwards. Thus, in "enum A {} n, m;",
2711   // "} n, m;" will end up in one unwrapped line.
2712 }
2713 
2714 bool UnwrappedLineParser::parseStructLike() {
2715   // parseRecord falls through and does not yet add an unwrapped line as a
2716   // record declaration or definition can start a structural element.
2717   parseRecord();
2718   // This does not apply to Java, JavaScript and C#.
2719   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
2720       Style.isCSharp()) {
2721     if (FormatTok->is(tok::semi))
2722       nextToken();
2723     addUnwrappedLine();
2724     return true;
2725   }
2726   return false;
2727 }
2728 
2729 namespace {
2730 // A class used to set and restore the Token position when peeking
2731 // ahead in the token source.
2732 class ScopedTokenPosition {
2733   unsigned StoredPosition;
2734   FormatTokenSource *Tokens;
2735 
2736 public:
2737   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2738     assert(Tokens && "Tokens expected to not be null");
2739     StoredPosition = Tokens->getPosition();
2740   }
2741 
2742   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2743 };
2744 } // namespace
2745 
2746 // Look to see if we have [[ by looking ahead, if
2747 // its not then rewind to the original position.
2748 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2749   ScopedTokenPosition AutoPosition(Tokens);
2750   FormatToken *Tok = Tokens->getNextToken();
2751   // We already read the first [ check for the second.
2752   if (!Tok->is(tok::l_square)) {
2753     return false;
2754   }
2755   // Double check that the attribute is just something
2756   // fairly simple.
2757   while (Tok->isNot(tok::eof)) {
2758     if (Tok->is(tok::r_square)) {
2759       break;
2760     }
2761     Tok = Tokens->getNextToken();
2762   }
2763   if (Tok->is(tok::eof))
2764     return false;
2765   Tok = Tokens->getNextToken();
2766   if (!Tok->is(tok::r_square)) {
2767     return false;
2768   }
2769   Tok = Tokens->getNextToken();
2770   if (Tok->is(tok::semi)) {
2771     return false;
2772   }
2773   return true;
2774 }
2775 
2776 void UnwrappedLineParser::parseJavaEnumBody() {
2777   // Determine whether the enum is simple, i.e. does not have a semicolon or
2778   // constants with class bodies. Simple enums can be formatted like braced
2779   // lists, contracted to a single line, etc.
2780   unsigned StoredPosition = Tokens->getPosition();
2781   bool IsSimple = true;
2782   FormatToken *Tok = Tokens->getNextToken();
2783   while (!Tok->is(tok::eof)) {
2784     if (Tok->is(tok::r_brace))
2785       break;
2786     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2787       IsSimple = false;
2788       break;
2789     }
2790     // FIXME: This will also mark enums with braces in the arguments to enum
2791     // constants as "not simple". This is probably fine in practice, though.
2792     Tok = Tokens->getNextToken();
2793   }
2794   FormatTok = Tokens->setPosition(StoredPosition);
2795 
2796   if (IsSimple) {
2797     nextToken();
2798     parseBracedList();
2799     addUnwrappedLine();
2800     return;
2801   }
2802 
2803   // Parse the body of a more complex enum.
2804   // First add a line for everything up to the "{".
2805   nextToken();
2806   addUnwrappedLine();
2807   ++Line->Level;
2808 
2809   // Parse the enum constants.
2810   while (FormatTok) {
2811     if (FormatTok->is(tok::l_brace)) {
2812       // Parse the constant's class body.
2813       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2814                  /*MunchSemi=*/false);
2815     } else if (FormatTok->is(tok::l_paren)) {
2816       parseParens();
2817     } else if (FormatTok->is(tok::comma)) {
2818       nextToken();
2819       addUnwrappedLine();
2820     } else if (FormatTok->is(tok::semi)) {
2821       nextToken();
2822       addUnwrappedLine();
2823       break;
2824     } else if (FormatTok->is(tok::r_brace)) {
2825       addUnwrappedLine();
2826       break;
2827     } else {
2828       nextToken();
2829     }
2830   }
2831 
2832   // Parse the class body after the enum's ";" if any.
2833   parseLevel(/*HasOpeningBrace=*/true);
2834   nextToken();
2835   --Line->Level;
2836   addUnwrappedLine();
2837 }
2838 
2839 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2840   const FormatToken &InitialToken = *FormatTok;
2841   nextToken();
2842 
2843   // The actual identifier can be a nested name specifier, and in macros
2844   // it is often token-pasted.
2845   // An [[attribute]] can be before the identifier.
2846   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2847                             tok::kw___attribute, tok::kw___declspec,
2848                             tok::kw_alignas, tok::l_square, tok::r_square) ||
2849          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2850           FormatTok->isOneOf(tok::period, tok::comma))) {
2851     if (Style.isJavaScript() &&
2852         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2853       // JavaScript/TypeScript supports inline object types in
2854       // extends/implements positions:
2855       //     class Foo implements {bar: number} { }
2856       nextToken();
2857       if (FormatTok->is(tok::l_brace)) {
2858         tryToParseBracedList();
2859         continue;
2860       }
2861     }
2862     bool IsNonMacroIdentifier =
2863         FormatTok->is(tok::identifier) &&
2864         FormatTok->TokenText != FormatTok->TokenText.upper();
2865     nextToken();
2866     // We can have macros or attributes in between 'class' and the class name.
2867     if (!IsNonMacroIdentifier) {
2868       if (FormatTok->Tok.is(tok::l_paren)) {
2869         parseParens();
2870       } else if (FormatTok->is(TT_AttributeSquare)) {
2871         parseSquare();
2872         // Consume the closing TT_AttributeSquare.
2873         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2874           nextToken();
2875       }
2876     }
2877   }
2878 
2879   // Note that parsing away template declarations here leads to incorrectly
2880   // accepting function declarations as record declarations.
2881   // In general, we cannot solve this problem. Consider:
2882   // class A<int> B() {}
2883   // which can be a function definition or a class definition when B() is a
2884   // macro. If we find enough real-world cases where this is a problem, we
2885   // can parse for the 'template' keyword in the beginning of the statement,
2886   // and thus rule out the record production in case there is no template
2887   // (this would still leave us with an ambiguity between template function
2888   // and class declarations).
2889   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2890     while (!eof()) {
2891       if (FormatTok->is(tok::l_brace)) {
2892         calculateBraceTypes(/*ExpectClassBody=*/true);
2893         if (!tryToParseBracedList())
2894           break;
2895       }
2896       if (FormatTok->Tok.is(tok::semi))
2897         return;
2898       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2899         addUnwrappedLine();
2900         nextToken();
2901         parseCSharpGenericTypeConstraint();
2902         break;
2903       }
2904       nextToken();
2905     }
2906   }
2907   if (FormatTok->Tok.is(tok::l_brace)) {
2908     if (ParseAsExpr) {
2909       parseChildBlock();
2910     } else {
2911       if (ShouldBreakBeforeBrace(Style, InitialToken))
2912         addUnwrappedLine();
2913 
2914       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2915       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2916     }
2917   }
2918   // There is no addUnwrappedLine() here so that we fall through to parsing a
2919   // structural element afterwards. Thus, in "class A {} n, m;",
2920   // "} n, m;" will end up in one unwrapped line.
2921 }
2922 
2923 void UnwrappedLineParser::parseObjCMethod() {
2924   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2925          "'(' or identifier expected.");
2926   do {
2927     if (FormatTok->Tok.is(tok::semi)) {
2928       nextToken();
2929       addUnwrappedLine();
2930       return;
2931     } else if (FormatTok->Tok.is(tok::l_brace)) {
2932       if (Style.BraceWrapping.AfterFunction)
2933         addUnwrappedLine();
2934       parseBlock();
2935       addUnwrappedLine();
2936       return;
2937     } else {
2938       nextToken();
2939     }
2940   } while (!eof());
2941 }
2942 
2943 void UnwrappedLineParser::parseObjCProtocolList() {
2944   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2945   do {
2946     nextToken();
2947     // Early exit in case someone forgot a close angle.
2948     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2949         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2950       return;
2951   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2952   nextToken(); // Skip '>'.
2953 }
2954 
2955 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2956   do {
2957     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2958       nextToken();
2959       addUnwrappedLine();
2960       break;
2961     }
2962     if (FormatTok->is(tok::l_brace)) {
2963       parseBlock();
2964       // In ObjC interfaces, nothing should be following the "}".
2965       addUnwrappedLine();
2966     } else if (FormatTok->is(tok::r_brace)) {
2967       // Ignore stray "}". parseStructuralElement doesn't consume them.
2968       nextToken();
2969       addUnwrappedLine();
2970     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2971       nextToken();
2972       parseObjCMethod();
2973     } else {
2974       parseStructuralElement();
2975     }
2976   } while (!eof());
2977 }
2978 
2979 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2980   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2981          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2982   nextToken();
2983   nextToken(); // interface name
2984 
2985   // @interface can be followed by a lightweight generic
2986   // specialization list, then either a base class or a category.
2987   if (FormatTok->Tok.is(tok::less)) {
2988     parseObjCLightweightGenerics();
2989   }
2990   if (FormatTok->Tok.is(tok::colon)) {
2991     nextToken();
2992     nextToken(); // base class name
2993     // The base class can also have lightweight generics applied to it.
2994     if (FormatTok->Tok.is(tok::less)) {
2995       parseObjCLightweightGenerics();
2996     }
2997   } else if (FormatTok->Tok.is(tok::l_paren))
2998     // Skip category, if present.
2999     parseParens();
3000 
3001   if (FormatTok->Tok.is(tok::less))
3002     parseObjCProtocolList();
3003 
3004   if (FormatTok->Tok.is(tok::l_brace)) {
3005     if (Style.BraceWrapping.AfterObjCDeclaration)
3006       addUnwrappedLine();
3007     parseBlock(/*MustBeDeclaration=*/true);
3008   }
3009 
3010   // With instance variables, this puts '}' on its own line.  Without instance
3011   // variables, this ends the @interface line.
3012   addUnwrappedLine();
3013 
3014   parseObjCUntilAtEnd();
3015 }
3016 
3017 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3018   assert(FormatTok->Tok.is(tok::less));
3019   // Unlike protocol lists, generic parameterizations support
3020   // nested angles:
3021   //
3022   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3023   //     NSObject <NSCopying, NSSecureCoding>
3024   //
3025   // so we need to count how many open angles we have left.
3026   unsigned NumOpenAngles = 1;
3027   do {
3028     nextToken();
3029     // Early exit in case someone forgot a close angle.
3030     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3031         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3032       break;
3033     if (FormatTok->Tok.is(tok::less))
3034       ++NumOpenAngles;
3035     else if (FormatTok->Tok.is(tok::greater)) {
3036       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3037       --NumOpenAngles;
3038     }
3039   } while (!eof() && NumOpenAngles != 0);
3040   nextToken(); // Skip '>'.
3041 }
3042 
3043 // Returns true for the declaration/definition form of @protocol,
3044 // false for the expression form.
3045 bool UnwrappedLineParser::parseObjCProtocol() {
3046   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3047   nextToken();
3048 
3049   if (FormatTok->is(tok::l_paren))
3050     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3051     return false;
3052 
3053   // The definition/declaration form,
3054   // @protocol Foo
3055   // - (int)someMethod;
3056   // @end
3057 
3058   nextToken(); // protocol name
3059 
3060   if (FormatTok->Tok.is(tok::less))
3061     parseObjCProtocolList();
3062 
3063   // Check for protocol declaration.
3064   if (FormatTok->Tok.is(tok::semi)) {
3065     nextToken();
3066     addUnwrappedLine();
3067     return true;
3068   }
3069 
3070   addUnwrappedLine();
3071   parseObjCUntilAtEnd();
3072   return true;
3073 }
3074 
3075 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3076   bool IsImport = FormatTok->is(Keywords.kw_import);
3077   assert(IsImport || FormatTok->is(tok::kw_export));
3078   nextToken();
3079 
3080   // Consume the "default" in "export default class/function".
3081   if (FormatTok->is(tok::kw_default))
3082     nextToken();
3083 
3084   // Consume "async function", "function" and "default function", so that these
3085   // get parsed as free-standing JS functions, i.e. do not require a trailing
3086   // semicolon.
3087   if (FormatTok->is(Keywords.kw_async))
3088     nextToken();
3089   if (FormatTok->is(Keywords.kw_function)) {
3090     nextToken();
3091     return;
3092   }
3093 
3094   // For imports, `export *`, `export {...}`, consume the rest of the line up
3095   // to the terminating `;`. For everything else, just return and continue
3096   // parsing the structural element, i.e. the declaration or expression for
3097   // `export default`.
3098   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3099       !FormatTok->isStringLiteral())
3100     return;
3101 
3102   while (!eof()) {
3103     if (FormatTok->is(tok::semi))
3104       return;
3105     if (Line->Tokens.empty()) {
3106       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3107       // import statement should terminate.
3108       return;
3109     }
3110     if (FormatTok->is(tok::l_brace)) {
3111       FormatTok->setBlockKind(BK_Block);
3112       nextToken();
3113       parseBracedList();
3114     } else {
3115       nextToken();
3116     }
3117   }
3118 }
3119 
3120 void UnwrappedLineParser::parseStatementMacro() {
3121   nextToken();
3122   if (FormatTok->is(tok::l_paren))
3123     parseParens();
3124   if (FormatTok->is(tok::semi))
3125     nextToken();
3126   addUnwrappedLine();
3127 }
3128 
3129 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3130                                                  StringRef Prefix = "") {
3131   llvm::dbgs() << Prefix << "Line(" << Line.Level
3132                << ", FSC=" << Line.FirstStartColumn << ")"
3133                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3134   for (const auto &Node : Line.Tokens) {
3135     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3136                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3137                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3138   }
3139   for (const auto &Node : Line.Tokens)
3140     for (const auto &ChildNode : Node.Children)
3141       printDebugInfo(ChildNode, "\nChild: ");
3142 
3143   llvm::dbgs() << "\n";
3144 }
3145 
3146 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3147   if (Line->Tokens.empty())
3148     return;
3149   LLVM_DEBUG({
3150     if (CurrentLines == &Lines)
3151       printDebugInfo(*Line);
3152   });
3153 
3154   // If this line closes a block when in Whitesmiths mode, remember that
3155   // information so that the level can be decreased after the line is added.
3156   // This has to happen after the addition of the line since the line itself
3157   // needs to be indented.
3158   bool ClosesWhitesmithsBlock =
3159       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3160       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3161 
3162   CurrentLines->push_back(std::move(*Line));
3163   Line->Tokens.clear();
3164   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3165   Line->FirstStartColumn = 0;
3166 
3167   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3168     --Line->Level;
3169   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3170     CurrentLines->append(
3171         std::make_move_iterator(PreprocessorDirectives.begin()),
3172         std::make_move_iterator(PreprocessorDirectives.end()));
3173     PreprocessorDirectives.clear();
3174   }
3175   // Disconnect the current token from the last token on the previous line.
3176   FormatTok->Previous = nullptr;
3177 }
3178 
3179 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3180 
3181 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3182   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3183          FormatTok.NewlinesBefore > 0;
3184 }
3185 
3186 // Checks if \p FormatTok is a line comment that continues the line comment
3187 // section on \p Line.
3188 static bool
3189 continuesLineCommentSection(const FormatToken &FormatTok,
3190                             const UnwrappedLine &Line,
3191                             const llvm::Regex &CommentPragmasRegex) {
3192   if (Line.Tokens.empty())
3193     return false;
3194 
3195   StringRef IndentContent = FormatTok.TokenText;
3196   if (FormatTok.TokenText.startswith("//") ||
3197       FormatTok.TokenText.startswith("/*"))
3198     IndentContent = FormatTok.TokenText.substr(2);
3199   if (CommentPragmasRegex.match(IndentContent))
3200     return false;
3201 
3202   // If Line starts with a line comment, then FormatTok continues the comment
3203   // section if its original column is greater or equal to the original start
3204   // column of the line.
3205   //
3206   // Define the min column token of a line as follows: if a line ends in '{' or
3207   // contains a '{' followed by a line comment, then the min column token is
3208   // that '{'. Otherwise, the min column token of the line is the first token of
3209   // the line.
3210   //
3211   // If Line starts with a token other than a line comment, then FormatTok
3212   // continues the comment section if its original column is greater than the
3213   // original start column of the min column token of the line.
3214   //
3215   // For example, the second line comment continues the first in these cases:
3216   //
3217   // // first line
3218   // // second line
3219   //
3220   // and:
3221   //
3222   // // first line
3223   //  // second line
3224   //
3225   // and:
3226   //
3227   // int i; // first line
3228   //  // second line
3229   //
3230   // and:
3231   //
3232   // do { // first line
3233   //      // second line
3234   //   int i;
3235   // } while (true);
3236   //
3237   // and:
3238   //
3239   // enum {
3240   //   a, // first line
3241   //    // second line
3242   //   b
3243   // };
3244   //
3245   // The second line comment doesn't continue the first in these cases:
3246   //
3247   //   // first line
3248   //  // second line
3249   //
3250   // and:
3251   //
3252   // int i; // first line
3253   // // second line
3254   //
3255   // and:
3256   //
3257   // do { // first line
3258   //   // second line
3259   //   int i;
3260   // } while (true);
3261   //
3262   // and:
3263   //
3264   // enum {
3265   //   a, // first line
3266   //   // second line
3267   // };
3268   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3269 
3270   // Scan for '{//'. If found, use the column of '{' as a min column for line
3271   // comment section continuation.
3272   const FormatToken *PreviousToken = nullptr;
3273   for (const UnwrappedLineNode &Node : Line.Tokens) {
3274     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3275         isLineComment(*Node.Tok)) {
3276       MinColumnToken = PreviousToken;
3277       break;
3278     }
3279     PreviousToken = Node.Tok;
3280 
3281     // Grab the last newline preceding a token in this unwrapped line.
3282     if (Node.Tok->NewlinesBefore > 0) {
3283       MinColumnToken = Node.Tok;
3284     }
3285   }
3286   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3287     MinColumnToken = PreviousToken;
3288   }
3289 
3290   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3291                               MinColumnToken);
3292 }
3293 
3294 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3295   bool JustComments = Line->Tokens.empty();
3296   for (SmallVectorImpl<FormatToken *>::const_iterator
3297            I = CommentsBeforeNextToken.begin(),
3298            E = CommentsBeforeNextToken.end();
3299        I != E; ++I) {
3300     // Line comments that belong to the same line comment section are put on the
3301     // same line since later we might want to reflow content between them.
3302     // Additional fine-grained breaking of line comment sections is controlled
3303     // by the class BreakableLineCommentSection in case it is desirable to keep
3304     // several line comment sections in the same unwrapped line.
3305     //
3306     // FIXME: Consider putting separate line comment sections as children to the
3307     // unwrapped line instead.
3308     (*I)->ContinuesLineCommentSection =
3309         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3310     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3311       addUnwrappedLine();
3312     pushToken(*I);
3313   }
3314   if (NewlineBeforeNext && JustComments)
3315     addUnwrappedLine();
3316   CommentsBeforeNextToken.clear();
3317 }
3318 
3319 void UnwrappedLineParser::nextToken(int LevelDifference) {
3320   if (eof())
3321     return;
3322   flushComments(isOnNewLine(*FormatTok));
3323   pushToken(FormatTok);
3324   FormatToken *Previous = FormatTok;
3325   if (!Style.isJavaScript())
3326     readToken(LevelDifference);
3327   else
3328     readTokenWithJavaScriptASI();
3329   FormatTok->Previous = Previous;
3330 }
3331 
3332 void UnwrappedLineParser::distributeComments(
3333     const SmallVectorImpl<FormatToken *> &Comments,
3334     const FormatToken *NextTok) {
3335   // Whether or not a line comment token continues a line is controlled by
3336   // the method continuesLineCommentSection, with the following caveat:
3337   //
3338   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3339   // that each comment line from the trail is aligned with the next token, if
3340   // the next token exists. If a trail exists, the beginning of the maximal
3341   // trail is marked as a start of a new comment section.
3342   //
3343   // For example in this code:
3344   //
3345   // int a; // line about a
3346   //   // line 1 about b
3347   //   // line 2 about b
3348   //   int b;
3349   //
3350   // the two lines about b form a maximal trail, so there are two sections, the
3351   // first one consisting of the single comment "// line about a" and the
3352   // second one consisting of the next two comments.
3353   if (Comments.empty())
3354     return;
3355   bool ShouldPushCommentsInCurrentLine = true;
3356   bool HasTrailAlignedWithNextToken = false;
3357   unsigned StartOfTrailAlignedWithNextToken = 0;
3358   if (NextTok) {
3359     // We are skipping the first element intentionally.
3360     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3361       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3362         HasTrailAlignedWithNextToken = true;
3363         StartOfTrailAlignedWithNextToken = i;
3364       }
3365     }
3366   }
3367   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3368     FormatToken *FormatTok = Comments[i];
3369     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3370       FormatTok->ContinuesLineCommentSection = false;
3371     } else {
3372       FormatTok->ContinuesLineCommentSection =
3373           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3374     }
3375     if (!FormatTok->ContinuesLineCommentSection &&
3376         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3377       ShouldPushCommentsInCurrentLine = false;
3378     }
3379     if (ShouldPushCommentsInCurrentLine) {
3380       pushToken(FormatTok);
3381     } else {
3382       CommentsBeforeNextToken.push_back(FormatTok);
3383     }
3384   }
3385 }
3386 
3387 void UnwrappedLineParser::readToken(int LevelDifference) {
3388   SmallVector<FormatToken *, 1> Comments;
3389   do {
3390     FormatTok = Tokens->getNextToken();
3391     assert(FormatTok);
3392     while (FormatTok->getType() == TT_ConflictStart ||
3393            FormatTok->getType() == TT_ConflictEnd ||
3394            FormatTok->getType() == TT_ConflictAlternative) {
3395       if (FormatTok->getType() == TT_ConflictStart) {
3396         conditionalCompilationStart(/*Unreachable=*/false);
3397       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3398         conditionalCompilationAlternative();
3399       } else if (FormatTok->getType() == TT_ConflictEnd) {
3400         conditionalCompilationEnd();
3401       }
3402       FormatTok = Tokens->getNextToken();
3403       FormatTok->MustBreakBefore = true;
3404     }
3405 
3406     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3407            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3408       distributeComments(Comments, FormatTok);
3409       Comments.clear();
3410       // If there is an unfinished unwrapped line, we flush the preprocessor
3411       // directives only after that unwrapped line was finished later.
3412       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3413       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3414       assert((LevelDifference >= 0 ||
3415               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3416              "LevelDifference makes Line->Level negative");
3417       Line->Level += LevelDifference;
3418       // Comments stored before the preprocessor directive need to be output
3419       // before the preprocessor directive, at the same level as the
3420       // preprocessor directive, as we consider them to apply to the directive.
3421       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3422           PPBranchLevel > 0)
3423         Line->Level += PPBranchLevel;
3424       flushComments(isOnNewLine(*FormatTok));
3425       parsePPDirective();
3426     }
3427 
3428     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3429         !Line->InPPDirective) {
3430       continue;
3431     }
3432 
3433     if (!FormatTok->Tok.is(tok::comment)) {
3434       distributeComments(Comments, FormatTok);
3435       Comments.clear();
3436       return;
3437     }
3438 
3439     Comments.push_back(FormatTok);
3440   } while (!eof());
3441 
3442   distributeComments(Comments, nullptr);
3443   Comments.clear();
3444 }
3445 
3446 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3447   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3448   if (MustBreakBeforeNextToken) {
3449     Line->Tokens.back().Tok->MustBreakBefore = true;
3450     MustBreakBeforeNextToken = false;
3451   }
3452 }
3453 
3454 } // end namespace format
3455 } // end namespace clang
3456