1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #include <algorithm>
22 
23 #define DEBUG_TYPE "format-parser"
24 
25 namespace clang {
26 namespace format {
27 
28 class FormatTokenSource {
29 public:
30   virtual ~FormatTokenSource() {}
31 
32   // Returns the next token in the token stream.
33   virtual FormatToken *getNextToken() = 0;
34 
35   // Returns the token precedint the token returned by the last call to
36   // getNextToken() in the token stream, or nullptr if no such token exists.
37   virtual FormatToken *getPreviousToken() = 0;
38 
39   // Returns the token that would be returned by the next call to
40   // getNextToken().
41   virtual FormatToken *peekNextToken() = 0;
42 
43   // Returns whether we are at the end of the file.
44   // This can be different from whether getNextToken() returned an eof token
45   // when the FormatTokenSource is a view on a part of the token stream.
46   virtual bool isEOF() = 0;
47 
48   // Gets the current position in the token stream, to be used by setPosition().
49   virtual unsigned getPosition() = 0;
50 
51   // Resets the token stream to the state it was in when getPosition() returned
52   // Position, and return the token at that position in the stream.
53   virtual FormatToken *setPosition(unsigned Position) = 0;
54 };
55 
56 namespace {
57 
58 class ScopedDeclarationState {
59 public:
60   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
61                          bool MustBeDeclaration)
62       : Line(Line), Stack(Stack) {
63     Line.MustBeDeclaration = MustBeDeclaration;
64     Stack.push_back(MustBeDeclaration);
65   }
66   ~ScopedDeclarationState() {
67     Stack.pop_back();
68     if (!Stack.empty())
69       Line.MustBeDeclaration = Stack.back();
70     else
71       Line.MustBeDeclaration = true;
72   }
73 
74 private:
75   UnwrappedLine &Line;
76   std::vector<bool> &Stack;
77 };
78 
79 static bool isLineComment(const FormatToken &FormatTok) {
80   return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
81 }
82 
83 // Checks if \p FormatTok is a line comment that continues the line comment
84 // \p Previous. The original column of \p MinColumnToken is used to determine
85 // whether \p FormatTok is indented enough to the right to continue \p Previous.
86 static bool continuesLineComment(const FormatToken &FormatTok,
87                                  const FormatToken *Previous,
88                                  const FormatToken *MinColumnToken) {
89   if (!Previous || !MinColumnToken)
90     return false;
91   unsigned MinContinueColumn =
92       MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
93   return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
94          isLineComment(*Previous) &&
95          FormatTok.OriginalColumn >= MinContinueColumn;
96 }
97 
98 class ScopedMacroState : public FormatTokenSource {
99 public:
100   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
101                    FormatToken *&ResetToken)
102       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
103         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
104         Token(nullptr), PreviousToken(nullptr) {
105     FakeEOF.Tok.startToken();
106     FakeEOF.Tok.setKind(tok::eof);
107     TokenSource = this;
108     Line.Level = 0;
109     Line.InPPDirective = true;
110   }
111 
112   ~ScopedMacroState() override {
113     TokenSource = PreviousTokenSource;
114     ResetToken = Token;
115     Line.InPPDirective = false;
116     Line.Level = PreviousLineLevel;
117   }
118 
119   FormatToken *getNextToken() override {
120     // The \c UnwrappedLineParser guards against this by never calling
121     // \c getNextToken() after it has encountered the first eof token.
122     assert(!eof());
123     PreviousToken = Token;
124     Token = PreviousTokenSource->getNextToken();
125     if (eof())
126       return &FakeEOF;
127     return Token;
128   }
129 
130   FormatToken *getPreviousToken() override {
131     return PreviousTokenSource->getPreviousToken();
132   }
133 
134   FormatToken *peekNextToken() override {
135     if (eof())
136       return &FakeEOF;
137     return PreviousTokenSource->peekNextToken();
138   }
139 
140   bool isEOF() override { return PreviousTokenSource->isEOF(); }
141 
142   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
143 
144   FormatToken *setPosition(unsigned Position) override {
145     PreviousToken = nullptr;
146     Token = PreviousTokenSource->setPosition(Position);
147     return Token;
148   }
149 
150 private:
151   bool eof() {
152     return Token && Token->HasUnescapedNewline &&
153            !continuesLineComment(*Token, PreviousToken,
154                                  /*MinColumnToken=*/PreviousToken);
155   }
156 
157   FormatToken FakeEOF;
158   UnwrappedLine &Line;
159   FormatTokenSource *&TokenSource;
160   FormatToken *&ResetToken;
161   unsigned PreviousLineLevel;
162   FormatTokenSource *PreviousTokenSource;
163 
164   FormatToken *Token;
165   FormatToken *PreviousToken;
166 };
167 
168 } // end anonymous namespace
169 
170 class ScopedLineState {
171 public:
172   ScopedLineState(UnwrappedLineParser &Parser,
173                   bool SwitchToPreprocessorLines = false)
174       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
175     if (SwitchToPreprocessorLines)
176       Parser.CurrentLines = &Parser.PreprocessorDirectives;
177     else if (!Parser.Line->Tokens.empty())
178       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
179     PreBlockLine = std::move(Parser.Line);
180     Parser.Line = std::make_unique<UnwrappedLine>();
181     Parser.Line->Level = PreBlockLine->Level;
182     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
183   }
184 
185   ~ScopedLineState() {
186     if (!Parser.Line->Tokens.empty()) {
187       Parser.addUnwrappedLine();
188     }
189     assert(Parser.Line->Tokens.empty());
190     Parser.Line = std::move(PreBlockLine);
191     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
192       Parser.MustBreakBeforeNextToken = true;
193     Parser.CurrentLines = OriginalLines;
194   }
195 
196 private:
197   UnwrappedLineParser &Parser;
198 
199   std::unique_ptr<UnwrappedLine> PreBlockLine;
200   SmallVectorImpl<UnwrappedLine> *OriginalLines;
201 };
202 
203 class CompoundStatementIndenter {
204 public:
205   CompoundStatementIndenter(UnwrappedLineParser *Parser,
206                             const FormatStyle &Style, unsigned &LineLevel)
207       : CompoundStatementIndenter(Parser, LineLevel,
208                                   Style.BraceWrapping.AfterControlStatement,
209                                   Style.BraceWrapping.IndentBraces) {}
210   CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
211                             bool WrapBrace, bool IndentBrace)
212       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
213     if (WrapBrace)
214       Parser->addUnwrappedLine();
215     if (IndentBrace)
216       ++LineLevel;
217   }
218   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
219 
220 private:
221   unsigned &LineLevel;
222   unsigned OldLineLevel;
223 };
224 
225 namespace {
226 
227 class IndexedTokenSource : public FormatTokenSource {
228 public:
229   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
230       : Tokens(Tokens), Position(-1) {}
231 
232   FormatToken *getNextToken() override {
233     if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
234       LLVM_DEBUG({
235         llvm::dbgs() << "Next ";
236         dbgToken(Position);
237       });
238       return Tokens[Position];
239     }
240     ++Position;
241     LLVM_DEBUG({
242       llvm::dbgs() << "Next ";
243       dbgToken(Position);
244     });
245     return Tokens[Position];
246   }
247 
248   FormatToken *getPreviousToken() override {
249     assert(Position > 0);
250     return Tokens[Position - 1];
251   }
252 
253   FormatToken *peekNextToken() override {
254     int Next = Position + 1;
255     LLVM_DEBUG({
256       llvm::dbgs() << "Peeking ";
257       dbgToken(Next);
258     });
259     return Tokens[Next];
260   }
261 
262   bool isEOF() override { return Tokens[Position]->is(tok::eof); }
263 
264   unsigned getPosition() override {
265     LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
266     assert(Position >= 0);
267     return Position;
268   }
269 
270   FormatToken *setPosition(unsigned P) override {
271     LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
272     Position = P;
273     return Tokens[Position];
274   }
275 
276   void reset() { Position = -1; }
277 
278 private:
279   void dbgToken(int Position, llvm::StringRef Indent = "") {
280     FormatToken *Tok = Tokens[Position];
281     llvm::dbgs() << Indent << "[" << Position
282                  << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
283                  << ", Macro: " << !!Tok->MacroCtx << "\n";
284   }
285 
286   ArrayRef<FormatToken *> Tokens;
287   int Position;
288 };
289 
290 } // end anonymous namespace
291 
292 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
293                                          const AdditionalKeywords &Keywords,
294                                          unsigned FirstStartColumn,
295                                          ArrayRef<FormatToken *> Tokens,
296                                          UnwrappedLineConsumer &Callback)
297     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
298       CurrentLines(&Lines), Style(Style), Keywords(Keywords),
299       CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
300       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
301       IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
302                        ? IG_Rejected
303                        : IG_Inited),
304       IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
305 
306 void UnwrappedLineParser::reset() {
307   PPBranchLevel = -1;
308   IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
309                      ? IG_Rejected
310                      : IG_Inited;
311   IncludeGuardToken = nullptr;
312   Line.reset(new UnwrappedLine);
313   CommentsBeforeNextToken.clear();
314   FormatTok = nullptr;
315   MustBreakBeforeNextToken = false;
316   PreprocessorDirectives.clear();
317   CurrentLines = &Lines;
318   DeclarationScopeStack.clear();
319   PPStack.clear();
320   Line->FirstStartColumn = FirstStartColumn;
321 }
322 
323 void UnwrappedLineParser::parse() {
324   IndexedTokenSource TokenSource(AllTokens);
325   Line->FirstStartColumn = FirstStartColumn;
326   do {
327     LLVM_DEBUG(llvm::dbgs() << "----\n");
328     reset();
329     Tokens = &TokenSource;
330     TokenSource.reset();
331 
332     readToken();
333     parseFile();
334 
335     // If we found an include guard then all preprocessor directives (other than
336     // the guard) are over-indented by one.
337     if (IncludeGuard == IG_Found)
338       for (auto &Line : Lines)
339         if (Line.InPPDirective && Line.Level > 0)
340           --Line.Level;
341 
342     // Create line with eof token.
343     pushToken(FormatTok);
344     addUnwrappedLine();
345 
346     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
347                                                   E = Lines.end();
348          I != E; ++I) {
349       Callback.consumeUnwrappedLine(*I);
350     }
351     Callback.finishRun();
352     Lines.clear();
353     while (!PPLevelBranchIndex.empty() &&
354            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
355       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
356       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
357     }
358     if (!PPLevelBranchIndex.empty()) {
359       ++PPLevelBranchIndex.back();
360       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
361       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
362     }
363   } while (!PPLevelBranchIndex.empty());
364 }
365 
366 void UnwrappedLineParser::parseFile() {
367   // The top-level context in a file always has declarations, except for pre-
368   // processor directives and JavaScript files.
369   bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
370   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
371                                           MustBeDeclaration);
372   if (Style.Language == FormatStyle::LK_TextProto)
373     parseBracedList();
374   else
375     parseLevel(/*HasOpeningBrace=*/false);
376   // Make sure to format the remaining tokens.
377   //
378   // LK_TextProto is special since its top-level is parsed as the body of a
379   // braced list, which does not necessarily have natural line separators such
380   // as a semicolon. Comments after the last entry that have been determined to
381   // not belong to that line, as in:
382   //   key: value
383   //   // endfile comment
384   // do not have a chance to be put on a line of their own until this point.
385   // Here we add this newline before end-of-file comments.
386   if (Style.Language == FormatStyle::LK_TextProto &&
387       !CommentsBeforeNextToken.empty())
388     addUnwrappedLine();
389   flushComments(true);
390   addUnwrappedLine();
391 }
392 
393 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
394   do {
395     switch (FormatTok->Tok.getKind()) {
396     case tok::l_brace:
397       return;
398     default:
399       if (FormatTok->is(Keywords.kw_where)) {
400         addUnwrappedLine();
401         nextToken();
402         parseCSharpGenericTypeConstraint();
403         break;
404       }
405       nextToken();
406       break;
407     }
408   } while (!eof());
409 }
410 
411 void UnwrappedLineParser::parseCSharpAttribute() {
412   int UnpairedSquareBrackets = 1;
413   do {
414     switch (FormatTok->Tok.getKind()) {
415     case tok::r_square:
416       nextToken();
417       --UnpairedSquareBrackets;
418       if (UnpairedSquareBrackets == 0) {
419         addUnwrappedLine();
420         return;
421       }
422       break;
423     case tok::l_square:
424       ++UnpairedSquareBrackets;
425       nextToken();
426       break;
427     default:
428       nextToken();
429       break;
430     }
431   } while (!eof());
432 }
433 
434 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
435   bool SwitchLabelEncountered = false;
436   do {
437     tok::TokenKind kind = FormatTok->Tok.getKind();
438     if (FormatTok->getType() == TT_MacroBlockBegin) {
439       kind = tok::l_brace;
440     } else if (FormatTok->getType() == TT_MacroBlockEnd) {
441       kind = tok::r_brace;
442     }
443 
444     switch (kind) {
445     case tok::comment:
446       nextToken();
447       addUnwrappedLine();
448       break;
449     case tok::l_brace:
450       // FIXME: Add parameter whether this can happen - if this happens, we must
451       // be in a non-declaration context.
452       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
453         continue;
454       parseBlock();
455       addUnwrappedLine();
456       break;
457     case tok::r_brace:
458       if (HasOpeningBrace)
459         return;
460       nextToken();
461       addUnwrappedLine();
462       break;
463     case tok::kw_default: {
464       unsigned StoredPosition = Tokens->getPosition();
465       FormatToken *Next;
466       do {
467         Next = Tokens->getNextToken();
468       } while (Next->is(tok::comment));
469       FormatTok = Tokens->setPosition(StoredPosition);
470       if (Next && Next->isNot(tok::colon)) {
471         // default not followed by ':' is not a case label; treat it like
472         // an identifier.
473         parseStructuralElement();
474         break;
475       }
476       // Else, if it is 'default:', fall through to the case handling.
477       LLVM_FALLTHROUGH;
478     }
479     case tok::kw_case:
480       if (Style.isJavaScript() && Line->MustBeDeclaration) {
481         // A 'case: string' style field declaration.
482         parseStructuralElement();
483         break;
484       }
485       if (!SwitchLabelEncountered &&
486           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
487         ++Line->Level;
488       SwitchLabelEncountered = true;
489       parseStructuralElement();
490       break;
491     case tok::l_square:
492       if (Style.isCSharp()) {
493         nextToken();
494         parseCSharpAttribute();
495         break;
496       }
497       LLVM_FALLTHROUGH;
498     default:
499       parseStructuralElement(!HasOpeningBrace);
500       break;
501     }
502   } while (!eof());
503 }
504 
505 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
506   // We'll parse forward through the tokens until we hit
507   // a closing brace or eof - note that getNextToken() will
508   // parse macros, so this will magically work inside macro
509   // definitions, too.
510   unsigned StoredPosition = Tokens->getPosition();
511   FormatToken *Tok = FormatTok;
512   const FormatToken *PrevTok = Tok->Previous;
513   // Keep a stack of positions of lbrace tokens. We will
514   // update information about whether an lbrace starts a
515   // braced init list or a different block during the loop.
516   SmallVector<FormatToken *, 8> LBraceStack;
517   assert(Tok->Tok.is(tok::l_brace));
518   do {
519     // Get next non-comment token.
520     FormatToken *NextTok;
521     unsigned ReadTokens = 0;
522     do {
523       NextTok = Tokens->getNextToken();
524       ++ReadTokens;
525     } while (NextTok->is(tok::comment));
526 
527     switch (Tok->Tok.getKind()) {
528     case tok::l_brace:
529       if (Style.isJavaScript() && PrevTok) {
530         if (PrevTok->isOneOf(tok::colon, tok::less))
531           // A ':' indicates this code is in a type, or a braced list
532           // following a label in an object literal ({a: {b: 1}}).
533           // A '<' could be an object used in a comparison, but that is nonsense
534           // code (can never return true), so more likely it is a generic type
535           // argument (`X<{a: string; b: number}>`).
536           // The code below could be confused by semicolons between the
537           // individual members in a type member list, which would normally
538           // trigger BK_Block. In both cases, this must be parsed as an inline
539           // braced init.
540           Tok->setBlockKind(BK_BracedInit);
541         else if (PrevTok->is(tok::r_paren))
542           // `) { }` can only occur in function or method declarations in JS.
543           Tok->setBlockKind(BK_Block);
544       } else {
545         Tok->setBlockKind(BK_Unknown);
546       }
547       LBraceStack.push_back(Tok);
548       break;
549     case tok::r_brace:
550       if (LBraceStack.empty())
551         break;
552       if (LBraceStack.back()->is(BK_Unknown)) {
553         bool ProbablyBracedList = false;
554         if (Style.Language == FormatStyle::LK_Proto) {
555           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
556         } else {
557           // Skip NextTok over preprocessor lines, otherwise we may not
558           // properly diagnose the block as a braced intializer
559           // if the comma separator appears after the pp directive.
560           while (NextTok->is(tok::hash)) {
561             ScopedMacroState MacroState(*Line, Tokens, NextTok);
562             do {
563               NextTok = Tokens->getNextToken();
564               ++ReadTokens;
565             } while (NextTok->isNot(tok::eof));
566           }
567 
568           // Using OriginalColumn to distinguish between ObjC methods and
569           // binary operators is a bit hacky.
570           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
571                                   NextTok->OriginalColumn == 0;
572 
573           // If there is a comma, semicolon or right paren after the closing
574           // brace, we assume this is a braced initializer list.  Note that
575           // regardless how we mark inner braces here, we will overwrite the
576           // BlockKind later if we parse a braced list (where all blocks
577           // inside are by default braced lists), or when we explicitly detect
578           // blocks (for example while parsing lambdas).
579           // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
580           // braced list in JS.
581           ProbablyBracedList =
582               (Style.isJavaScript() &&
583                NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
584                                 Keywords.kw_as)) ||
585               (Style.isCpp() && NextTok->is(tok::l_paren)) ||
586               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
587                                tok::r_paren, tok::r_square, tok::l_brace,
588                                tok::ellipsis) ||
589               (NextTok->is(tok::identifier) &&
590                !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
591               (NextTok->is(tok::semi) &&
592                (!ExpectClassBody || LBraceStack.size() != 1)) ||
593               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
594           if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
595             // We can have an array subscript after a braced init
596             // list, but C++11 attributes are expected after blocks.
597             NextTok = Tokens->getNextToken();
598             ++ReadTokens;
599             ProbablyBracedList = NextTok->isNot(tok::l_square);
600           }
601         }
602         if (ProbablyBracedList) {
603           Tok->setBlockKind(BK_BracedInit);
604           LBraceStack.back()->setBlockKind(BK_BracedInit);
605         } else {
606           Tok->setBlockKind(BK_Block);
607           LBraceStack.back()->setBlockKind(BK_Block);
608         }
609       }
610       LBraceStack.pop_back();
611       break;
612     case tok::identifier:
613       if (!Tok->is(TT_StatementMacro))
614         break;
615       LLVM_FALLTHROUGH;
616     case tok::at:
617     case tok::semi:
618     case tok::kw_if:
619     case tok::kw_while:
620     case tok::kw_for:
621     case tok::kw_switch:
622     case tok::kw_try:
623     case tok::kw___try:
624       if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
625         LBraceStack.back()->setBlockKind(BK_Block);
626       break;
627     default:
628       break;
629     }
630     PrevTok = Tok;
631     Tok = NextTok;
632   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
633 
634   // Assume other blocks for all unclosed opening braces.
635   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
636     if (LBraceStack[i]->is(BK_Unknown))
637       LBraceStack[i]->setBlockKind(BK_Block);
638   }
639 
640   FormatTok = Tokens->setPosition(StoredPosition);
641 }
642 
643 template <class T>
644 static inline void hash_combine(std::size_t &seed, const T &v) {
645   std::hash<T> hasher;
646   seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
647 }
648 
649 size_t UnwrappedLineParser::computePPHash() const {
650   size_t h = 0;
651   for (const auto &i : PPStack) {
652     hash_combine(h, size_t(i.Kind));
653     hash_combine(h, i.Line);
654   }
655   return h;
656 }
657 
658 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
659                                      bool MunchSemi,
660                                      bool UnindentWhitesmithsBraces) {
661   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
662          "'{' or macro block token expected");
663   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
664   FormatTok->setBlockKind(BK_Block);
665 
666   // For Whitesmiths mode, jump to the next level prior to skipping over the
667   // braces.
668   if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
669     ++Line->Level;
670 
671   size_t PPStartHash = computePPHash();
672 
673   unsigned InitialLevel = Line->Level;
674   nextToken(/*LevelDifference=*/AddLevels);
675 
676   if (MacroBlock && FormatTok->is(tok::l_paren))
677     parseParens();
678 
679   size_t NbPreprocessorDirectives =
680       CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
681   addUnwrappedLine();
682   size_t OpeningLineIndex =
683       CurrentLines->empty()
684           ? (UnwrappedLine::kInvalidIndex)
685           : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
686 
687   // Whitesmiths is weird here. The brace needs to be indented for the namespace
688   // block, but the block itself may not be indented depending on the style
689   // settings. This allows the format to back up one level in those cases.
690   if (UnindentWhitesmithsBraces)
691     --Line->Level;
692 
693   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
694                                           MustBeDeclaration);
695   if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
696     Line->Level += AddLevels;
697   parseLevel(/*HasOpeningBrace=*/true);
698 
699   if (eof())
700     return;
701 
702   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
703                  : !FormatTok->is(tok::r_brace)) {
704     Line->Level = InitialLevel;
705     FormatTok->setBlockKind(BK_Block);
706     return;
707   }
708 
709   size_t PPEndHash = computePPHash();
710 
711   // Munch the closing brace.
712   nextToken(/*LevelDifference=*/-AddLevels);
713 
714   if (MacroBlock && FormatTok->is(tok::l_paren))
715     parseParens();
716 
717   if (FormatTok->is(tok::arrow)) {
718     // Following the } we can find a trailing return type arrow
719     // as part of an implicit conversion constraint.
720     nextToken();
721     parseStructuralElement();
722   }
723 
724   if (MunchSemi && FormatTok->Tok.is(tok::semi))
725     nextToken();
726 
727   Line->Level = InitialLevel;
728 
729   if (PPStartHash == PPEndHash) {
730     Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
731     if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
732       // Update the opening line to add the forward reference as well
733       (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
734           CurrentLines->size() - 1;
735     }
736   }
737 }
738 
739 static bool isGoogScope(const UnwrappedLine &Line) {
740   // FIXME: Closure-library specific stuff should not be hard-coded but be
741   // configurable.
742   if (Line.Tokens.size() < 4)
743     return false;
744   auto I = Line.Tokens.begin();
745   if (I->Tok->TokenText != "goog")
746     return false;
747   ++I;
748   if (I->Tok->isNot(tok::period))
749     return false;
750   ++I;
751   if (I->Tok->TokenText != "scope")
752     return false;
753   ++I;
754   return I->Tok->is(tok::l_paren);
755 }
756 
757 static bool isIIFE(const UnwrappedLine &Line,
758                    const AdditionalKeywords &Keywords) {
759   // Look for the start of an immediately invoked anonymous function.
760   // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
761   // This is commonly done in JavaScript to create a new, anonymous scope.
762   // Example: (function() { ... })()
763   if (Line.Tokens.size() < 3)
764     return false;
765   auto I = Line.Tokens.begin();
766   if (I->Tok->isNot(tok::l_paren))
767     return false;
768   ++I;
769   if (I->Tok->isNot(Keywords.kw_function))
770     return false;
771   ++I;
772   return I->Tok->is(tok::l_paren);
773 }
774 
775 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
776                                    const FormatToken &InitialToken) {
777   if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
778     return Style.BraceWrapping.AfterNamespace;
779   if (InitialToken.is(tok::kw_class))
780     return Style.BraceWrapping.AfterClass;
781   if (InitialToken.is(tok::kw_union))
782     return Style.BraceWrapping.AfterUnion;
783   if (InitialToken.is(tok::kw_struct))
784     return Style.BraceWrapping.AfterStruct;
785   if (InitialToken.is(tok::kw_enum))
786     return Style.BraceWrapping.AfterEnum;
787   return false;
788 }
789 
790 void UnwrappedLineParser::parseChildBlock() {
791   FormatTok->setBlockKind(BK_Block);
792   nextToken();
793   {
794     bool SkipIndent = (Style.isJavaScript() &&
795                        (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
796     ScopedLineState LineState(*this);
797     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
798                                             /*MustBeDeclaration=*/false);
799     Line->Level += SkipIndent ? 0 : 1;
800     parseLevel(/*HasOpeningBrace=*/true);
801     flushComments(isOnNewLine(*FormatTok));
802     Line->Level -= SkipIndent ? 0 : 1;
803   }
804   nextToken();
805 }
806 
807 void UnwrappedLineParser::parsePPDirective() {
808   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
809   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
810 
811   nextToken();
812 
813   if (!FormatTok->Tok.getIdentifierInfo()) {
814     parsePPUnknown();
815     return;
816   }
817 
818   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
819   case tok::pp_define:
820     parsePPDefine();
821     return;
822   case tok::pp_if:
823     parsePPIf(/*IfDef=*/false);
824     break;
825   case tok::pp_ifdef:
826   case tok::pp_ifndef:
827     parsePPIf(/*IfDef=*/true);
828     break;
829   case tok::pp_else:
830     parsePPElse();
831     break;
832   case tok::pp_elifdef:
833   case tok::pp_elifndef:
834   case tok::pp_elif:
835     parsePPElIf();
836     break;
837   case tok::pp_endif:
838     parsePPEndIf();
839     break;
840   default:
841     parsePPUnknown();
842     break;
843   }
844 }
845 
846 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
847   size_t Line = CurrentLines->size();
848   if (CurrentLines == &PreprocessorDirectives)
849     Line += Lines.size();
850 
851   if (Unreachable ||
852       (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
853     PPStack.push_back({PP_Unreachable, Line});
854   else
855     PPStack.push_back({PP_Conditional, Line});
856 }
857 
858 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
859   ++PPBranchLevel;
860   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
861   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
862     PPLevelBranchIndex.push_back(0);
863     PPLevelBranchCount.push_back(0);
864   }
865   PPChainBranchIndex.push(0);
866   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
867   conditionalCompilationCondition(Unreachable || Skip);
868 }
869 
870 void UnwrappedLineParser::conditionalCompilationAlternative() {
871   if (!PPStack.empty())
872     PPStack.pop_back();
873   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
874   if (!PPChainBranchIndex.empty())
875     ++PPChainBranchIndex.top();
876   conditionalCompilationCondition(
877       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
878       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
879 }
880 
881 void UnwrappedLineParser::conditionalCompilationEnd() {
882   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
883   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
884     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
885       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
886     }
887   }
888   // Guard against #endif's without #if.
889   if (PPBranchLevel > -1)
890     --PPBranchLevel;
891   if (!PPChainBranchIndex.empty())
892     PPChainBranchIndex.pop();
893   if (!PPStack.empty())
894     PPStack.pop_back();
895 }
896 
897 void UnwrappedLineParser::parsePPIf(bool IfDef) {
898   bool IfNDef = FormatTok->is(tok::pp_ifndef);
899   nextToken();
900   bool Unreachable = false;
901   if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
902     Unreachable = true;
903   if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
904     Unreachable = true;
905   conditionalCompilationStart(Unreachable);
906   FormatToken *IfCondition = FormatTok;
907   // If there's a #ifndef on the first line, and the only lines before it are
908   // comments, it could be an include guard.
909   bool MaybeIncludeGuard = IfNDef;
910   if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
911     for (auto &Line : Lines) {
912       if (!Line.Tokens.front().Tok->is(tok::comment)) {
913         MaybeIncludeGuard = false;
914         IncludeGuard = IG_Rejected;
915         break;
916       }
917     }
918   --PPBranchLevel;
919   parsePPUnknown();
920   ++PPBranchLevel;
921   if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
922     IncludeGuard = IG_IfNdefed;
923     IncludeGuardToken = IfCondition;
924   }
925 }
926 
927 void UnwrappedLineParser::parsePPElse() {
928   // If a potential include guard has an #else, it's not an include guard.
929   if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
930     IncludeGuard = IG_Rejected;
931   conditionalCompilationAlternative();
932   if (PPBranchLevel > -1)
933     --PPBranchLevel;
934   parsePPUnknown();
935   ++PPBranchLevel;
936 }
937 
938 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
939 
940 void UnwrappedLineParser::parsePPEndIf() {
941   conditionalCompilationEnd();
942   parsePPUnknown();
943   // If the #endif of a potential include guard is the last thing in the file,
944   // then we found an include guard.
945   if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
946       Style.IndentPPDirectives != FormatStyle::PPDIS_None)
947     IncludeGuard = IG_Found;
948 }
949 
950 void UnwrappedLineParser::parsePPDefine() {
951   nextToken();
952 
953   if (!FormatTok->Tok.getIdentifierInfo()) {
954     IncludeGuard = IG_Rejected;
955     IncludeGuardToken = nullptr;
956     parsePPUnknown();
957     return;
958   }
959 
960   if (IncludeGuard == IG_IfNdefed &&
961       IncludeGuardToken->TokenText == FormatTok->TokenText) {
962     IncludeGuard = IG_Defined;
963     IncludeGuardToken = nullptr;
964     for (auto &Line : Lines) {
965       if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
966         IncludeGuard = IG_Rejected;
967         break;
968       }
969     }
970   }
971 
972   nextToken();
973   if (FormatTok->Tok.getKind() == tok::l_paren &&
974       FormatTok->WhitespaceRange.getBegin() ==
975           FormatTok->WhitespaceRange.getEnd()) {
976     parseParens();
977   }
978   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
979     Line->Level += PPBranchLevel + 1;
980   addUnwrappedLine();
981   ++Line->Level;
982 
983   // Errors during a preprocessor directive can only affect the layout of the
984   // preprocessor directive, and thus we ignore them. An alternative approach
985   // would be to use the same approach we use on the file level (no
986   // re-indentation if there was a structural error) within the macro
987   // definition.
988   parseFile();
989 }
990 
991 void UnwrappedLineParser::parsePPUnknown() {
992   do {
993     nextToken();
994   } while (!eof());
995   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
996     Line->Level += PPBranchLevel + 1;
997   addUnwrappedLine();
998 }
999 
1000 // Here we exclude certain tokens that are not usually the first token in an
1001 // unwrapped line. This is used in attempt to distinguish macro calls without
1002 // trailing semicolons from other constructs split to several lines.
1003 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1004   // Semicolon can be a null-statement, l_square can be a start of a macro or
1005   // a C++11 attribute, but this doesn't seem to be common.
1006   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1007          Tok.isNot(TT_AttributeSquare) &&
1008          // Tokens that can only be used as binary operators and a part of
1009          // overloaded operator names.
1010          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1011          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1012          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1013          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1014          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1015          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1016          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1017          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1018          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1019          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1020          Tok.isNot(tok::lesslessequal) &&
1021          // Colon is used in labels, base class lists, initializer lists,
1022          // range-based for loops, ternary operator, but should never be the
1023          // first token in an unwrapped line.
1024          Tok.isNot(tok::colon) &&
1025          // 'noexcept' is a trailing annotation.
1026          Tok.isNot(tok::kw_noexcept);
1027 }
1028 
1029 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1030                           const FormatToken *FormatTok) {
1031   // FIXME: This returns true for C/C++ keywords like 'struct'.
1032   return FormatTok->is(tok::identifier) &&
1033          (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1034           !FormatTok->isOneOf(
1035               Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1036               Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1037               Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1038               Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1039               Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1040               Keywords.kw_instanceof, Keywords.kw_interface,
1041               Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1042 }
1043 
1044 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1045                                  const FormatToken *FormatTok) {
1046   return FormatTok->Tok.isLiteral() ||
1047          FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1048          mustBeJSIdent(Keywords, FormatTok);
1049 }
1050 
1051 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1052 // when encountered after a value (see mustBeJSIdentOrValue).
1053 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1054                            const FormatToken *FormatTok) {
1055   return FormatTok->isOneOf(
1056       tok::kw_return, Keywords.kw_yield,
1057       // conditionals
1058       tok::kw_if, tok::kw_else,
1059       // loops
1060       tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1061       // switch/case
1062       tok::kw_switch, tok::kw_case,
1063       // exceptions
1064       tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1065       // declaration
1066       tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1067       Keywords.kw_async, Keywords.kw_function,
1068       // import/export
1069       Keywords.kw_import, tok::kw_export);
1070 }
1071 
1072 // Checks whether a token is a type in K&R C (aka C78).
1073 static bool isC78Type(const FormatToken &Tok) {
1074   return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1075                      tok::kw_unsigned, tok::kw_float, tok::kw_double,
1076                      tok::identifier);
1077 }
1078 
1079 // This function checks whether a token starts the first parameter declaration
1080 // in a K&R C (aka C78) function definition, e.g.:
1081 //   int f(a, b)
1082 //   short a, b;
1083 //   {
1084 //      return a + b;
1085 //   }
1086 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1087                                const FormatToken *FuncName) {
1088   assert(Tok);
1089   assert(Next);
1090   assert(FuncName);
1091 
1092   if (FuncName->isNot(tok::identifier))
1093     return false;
1094 
1095   const FormatToken *Prev = FuncName->Previous;
1096   if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1097     return false;
1098 
1099   if (!isC78Type(*Tok) &&
1100       !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1101     return false;
1102 
1103   if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1104     return false;
1105 
1106   Tok = Tok->Previous;
1107   if (!Tok || Tok->isNot(tok::r_paren))
1108     return false;
1109 
1110   Tok = Tok->Previous;
1111   if (!Tok || Tok->isNot(tok::identifier))
1112     return false;
1113 
1114   return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1115 }
1116 
1117 void UnwrappedLineParser::parseModuleImport() {
1118   nextToken();
1119   while (!eof()) {
1120     if (FormatTok->is(tok::colon)) {
1121       FormatTok->setType(TT_ModulePartitionColon);
1122     }
1123     // Handle import <foo/bar.h> as we would an include statement.
1124     else if (FormatTok->is(tok::less)) {
1125       nextToken();
1126       while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1127         // Mark tokens up to the trailing line comments as implicit string
1128         // literals.
1129         if (FormatTok->isNot(tok::comment) &&
1130             !FormatTok->TokenText.startswith("//"))
1131           FormatTok->setType(TT_ImplicitStringLiteral);
1132         nextToken();
1133       }
1134     }
1135     if (FormatTok->is(tok::semi)) {
1136       nextToken();
1137       break;
1138     }
1139     nextToken();
1140   }
1141 
1142   addUnwrappedLine();
1143 }
1144 
1145 // readTokenWithJavaScriptASI reads the next token and terminates the current
1146 // line if JavaScript Automatic Semicolon Insertion must
1147 // happen between the current token and the next token.
1148 //
1149 // This method is conservative - it cannot cover all edge cases of JavaScript,
1150 // but only aims to correctly handle certain well known cases. It *must not*
1151 // return true in speculative cases.
1152 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1153   FormatToken *Previous = FormatTok;
1154   readToken();
1155   FormatToken *Next = FormatTok;
1156 
1157   bool IsOnSameLine =
1158       CommentsBeforeNextToken.empty()
1159           ? Next->NewlinesBefore == 0
1160           : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1161   if (IsOnSameLine)
1162     return;
1163 
1164   bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1165   bool PreviousStartsTemplateExpr =
1166       Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1167   if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1168     // If the line contains an '@' sign, the previous token might be an
1169     // annotation, which can precede another identifier/value.
1170     bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1171       return LineNode.Tok->is(tok::at);
1172     });
1173     if (HasAt)
1174       return;
1175   }
1176   if (Next->is(tok::exclaim) && PreviousMustBeValue)
1177     return addUnwrappedLine();
1178   bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1179   bool NextEndsTemplateExpr =
1180       Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1181   if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1182       (PreviousMustBeValue ||
1183        Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1184                          tok::minusminus)))
1185     return addUnwrappedLine();
1186   if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1187       isJSDeclOrStmt(Keywords, Next))
1188     return addUnwrappedLine();
1189 }
1190 
1191 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
1192   if (Style.Language == FormatStyle::LK_TableGen &&
1193       FormatTok->is(tok::pp_include)) {
1194     nextToken();
1195     if (FormatTok->is(tok::string_literal))
1196       nextToken();
1197     addUnwrappedLine();
1198     return;
1199   }
1200   switch (FormatTok->Tok.getKind()) {
1201   case tok::kw_asm:
1202     nextToken();
1203     if (FormatTok->is(tok::l_brace)) {
1204       FormatTok->setType(TT_InlineASMBrace);
1205       nextToken();
1206       while (FormatTok && FormatTok->isNot(tok::eof)) {
1207         if (FormatTok->is(tok::r_brace)) {
1208           FormatTok->setType(TT_InlineASMBrace);
1209           nextToken();
1210           addUnwrappedLine();
1211           break;
1212         }
1213         FormatTok->Finalized = true;
1214         nextToken();
1215       }
1216     }
1217     break;
1218   case tok::kw_namespace:
1219     parseNamespace();
1220     return;
1221   case tok::kw_public:
1222   case tok::kw_protected:
1223   case tok::kw_private:
1224     if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1225         Style.isCSharp())
1226       nextToken();
1227     else
1228       parseAccessSpecifier();
1229     return;
1230   case tok::kw_if:
1231     if (Style.isJavaScript() && Line->MustBeDeclaration)
1232       // field/method declaration.
1233       break;
1234     parseIfThenElse();
1235     return;
1236   case tok::kw_for:
1237   case tok::kw_while:
1238     if (Style.isJavaScript() && Line->MustBeDeclaration)
1239       // field/method declaration.
1240       break;
1241     parseForOrWhileLoop();
1242     return;
1243   case tok::kw_do:
1244     if (Style.isJavaScript() && Line->MustBeDeclaration)
1245       // field/method declaration.
1246       break;
1247     parseDoWhile();
1248     return;
1249   case tok::kw_switch:
1250     if (Style.isJavaScript() && Line->MustBeDeclaration)
1251       // 'switch: string' field declaration.
1252       break;
1253     parseSwitch();
1254     return;
1255   case tok::kw_default:
1256     if (Style.isJavaScript() && Line->MustBeDeclaration)
1257       // 'default: string' field declaration.
1258       break;
1259     nextToken();
1260     if (FormatTok->is(tok::colon)) {
1261       parseLabel();
1262       return;
1263     }
1264     // e.g. "default void f() {}" in a Java interface.
1265     break;
1266   case tok::kw_case:
1267     if (Style.isJavaScript() && Line->MustBeDeclaration)
1268       // 'case: string' field declaration.
1269       break;
1270     parseCaseLabel();
1271     return;
1272   case tok::kw_try:
1273   case tok::kw___try:
1274     if (Style.isJavaScript() && Line->MustBeDeclaration)
1275       // field/method declaration.
1276       break;
1277     parseTryCatch();
1278     return;
1279   case tok::kw_extern:
1280     nextToken();
1281     if (FormatTok->Tok.is(tok::string_literal)) {
1282       nextToken();
1283       if (FormatTok->Tok.is(tok::l_brace)) {
1284         if (Style.BraceWrapping.AfterExternBlock)
1285           addUnwrappedLine();
1286         // Either we indent or for backwards compatibility we follow the
1287         // AfterExternBlock style.
1288         unsigned AddLevels =
1289             (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1290                     (Style.BraceWrapping.AfterExternBlock &&
1291                      Style.IndentExternBlock ==
1292                          FormatStyle::IEBS_AfterExternBlock)
1293                 ? 1u
1294                 : 0u;
1295         parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1296         addUnwrappedLine();
1297         return;
1298       }
1299     }
1300     break;
1301   case tok::kw_export:
1302     if (Style.isJavaScript()) {
1303       parseJavaScriptEs6ImportExport();
1304       return;
1305     }
1306     if (!Style.isCpp())
1307       break;
1308     // Handle C++ "(inline|export) namespace".
1309     LLVM_FALLTHROUGH;
1310   case tok::kw_inline:
1311     nextToken();
1312     if (FormatTok->Tok.is(tok::kw_namespace)) {
1313       parseNamespace();
1314       return;
1315     }
1316     break;
1317   case tok::identifier:
1318     if (FormatTok->is(TT_ForEachMacro)) {
1319       parseForOrWhileLoop();
1320       return;
1321     }
1322     if (FormatTok->is(TT_MacroBlockBegin)) {
1323       parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1324                  /*MunchSemi=*/false);
1325       return;
1326     }
1327     if (FormatTok->is(Keywords.kw_import)) {
1328       if (Style.isJavaScript()) {
1329         parseJavaScriptEs6ImportExport();
1330         return;
1331       }
1332       if (Style.Language == FormatStyle::LK_Proto) {
1333         nextToken();
1334         if (FormatTok->is(tok::kw_public))
1335           nextToken();
1336         if (!FormatTok->is(tok::string_literal))
1337           return;
1338         nextToken();
1339         if (FormatTok->is(tok::semi))
1340           nextToken();
1341         addUnwrappedLine();
1342         return;
1343       }
1344       if (Style.isCpp()) {
1345         parseModuleImport();
1346         return;
1347       }
1348     }
1349     if (Style.isCpp() &&
1350         FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1351                            Keywords.kw_slots, Keywords.kw_qslots)) {
1352       nextToken();
1353       if (FormatTok->is(tok::colon)) {
1354         nextToken();
1355         addUnwrappedLine();
1356         return;
1357       }
1358     }
1359     if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1360       parseStatementMacro();
1361       return;
1362     }
1363     if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1364       parseNamespace();
1365       return;
1366     }
1367     // In all other cases, parse the declaration.
1368     break;
1369   default:
1370     break;
1371   }
1372   do {
1373     const FormatToken *Previous = FormatTok->Previous;
1374     switch (FormatTok->Tok.getKind()) {
1375     case tok::at:
1376       nextToken();
1377       if (FormatTok->Tok.is(tok::l_brace)) {
1378         nextToken();
1379         parseBracedList();
1380         break;
1381       } else if (Style.Language == FormatStyle::LK_Java &&
1382                  FormatTok->is(Keywords.kw_interface)) {
1383         nextToken();
1384         break;
1385       }
1386       switch (FormatTok->Tok.getObjCKeywordID()) {
1387       case tok::objc_public:
1388       case tok::objc_protected:
1389       case tok::objc_package:
1390       case tok::objc_private:
1391         return parseAccessSpecifier();
1392       case tok::objc_interface:
1393       case tok::objc_implementation:
1394         return parseObjCInterfaceOrImplementation();
1395       case tok::objc_protocol:
1396         if (parseObjCProtocol())
1397           return;
1398         break;
1399       case tok::objc_end:
1400         return; // Handled by the caller.
1401       case tok::objc_optional:
1402       case tok::objc_required:
1403         nextToken();
1404         addUnwrappedLine();
1405         return;
1406       case tok::objc_autoreleasepool:
1407         nextToken();
1408         if (FormatTok->Tok.is(tok::l_brace)) {
1409           if (Style.BraceWrapping.AfterControlStatement ==
1410               FormatStyle::BWACS_Always)
1411             addUnwrappedLine();
1412           parseBlock();
1413         }
1414         addUnwrappedLine();
1415         return;
1416       case tok::objc_synchronized:
1417         nextToken();
1418         if (FormatTok->Tok.is(tok::l_paren))
1419           // Skip synchronization object
1420           parseParens();
1421         if (FormatTok->Tok.is(tok::l_brace)) {
1422           if (Style.BraceWrapping.AfterControlStatement ==
1423               FormatStyle::BWACS_Always)
1424             addUnwrappedLine();
1425           parseBlock();
1426         }
1427         addUnwrappedLine();
1428         return;
1429       case tok::objc_try:
1430         // This branch isn't strictly necessary (the kw_try case below would
1431         // do this too after the tok::at is parsed above).  But be explicit.
1432         parseTryCatch();
1433         return;
1434       default:
1435         break;
1436       }
1437       break;
1438     case tok::kw_concept:
1439       parseConcept();
1440       return;
1441     case tok::kw_requires:
1442       parseRequires();
1443       return;
1444     case tok::kw_enum:
1445       // Ignore if this is part of "template <enum ...".
1446       if (Previous && Previous->is(tok::less)) {
1447         nextToken();
1448         break;
1449       }
1450 
1451       // parseEnum falls through and does not yet add an unwrapped line as an
1452       // enum definition can start a structural element.
1453       if (!parseEnum())
1454         break;
1455       // This only applies for C++.
1456       if (!Style.isCpp()) {
1457         addUnwrappedLine();
1458         return;
1459       }
1460       break;
1461     case tok::kw_typedef:
1462       nextToken();
1463       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1464                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1465                              Keywords.kw_CF_CLOSED_ENUM,
1466                              Keywords.kw_NS_CLOSED_ENUM))
1467         parseEnum();
1468       break;
1469     case tok::kw_struct:
1470     case tok::kw_union:
1471     case tok::kw_class:
1472       if (parseStructLike()) {
1473         return;
1474       }
1475       break;
1476     case tok::period:
1477       nextToken();
1478       // In Java, classes have an implicit static member "class".
1479       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1480           FormatTok->is(tok::kw_class))
1481         nextToken();
1482       if (Style.isJavaScript() && FormatTok &&
1483           FormatTok->Tok.getIdentifierInfo())
1484         // JavaScript only has pseudo keywords, all keywords are allowed to
1485         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1486         nextToken();
1487       break;
1488     case tok::semi:
1489       nextToken();
1490       addUnwrappedLine();
1491       return;
1492     case tok::r_brace:
1493       addUnwrappedLine();
1494       return;
1495     case tok::l_paren: {
1496       parseParens();
1497       // Break the unwrapped line if a K&R C function definition has a parameter
1498       // declaration.
1499       if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1500         break;
1501       if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1502         addUnwrappedLine();
1503         return;
1504       }
1505       break;
1506     }
1507     case tok::kw_operator:
1508       nextToken();
1509       if (FormatTok->isBinaryOperator())
1510         nextToken();
1511       break;
1512     case tok::caret:
1513       nextToken();
1514       if (FormatTok->Tok.isAnyIdentifier() ||
1515           FormatTok->isSimpleTypeSpecifier())
1516         nextToken();
1517       if (FormatTok->is(tok::l_paren))
1518         parseParens();
1519       if (FormatTok->is(tok::l_brace))
1520         parseChildBlock();
1521       break;
1522     case tok::l_brace:
1523       if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1524         // A block outside of parentheses must be the last part of a
1525         // structural element.
1526         // FIXME: Figure out cases where this is not true, and add projections
1527         // for them (the one we know is missing are lambdas).
1528         if (Style.BraceWrapping.AfterFunction)
1529           addUnwrappedLine();
1530         FormatTok->setType(TT_FunctionLBrace);
1531         parseBlock();
1532         addUnwrappedLine();
1533         return;
1534       }
1535       // Otherwise this was a braced init list, and the structural
1536       // element continues.
1537       break;
1538     case tok::kw_try:
1539       if (Style.isJavaScript() && Line->MustBeDeclaration) {
1540         // field/method declaration.
1541         nextToken();
1542         break;
1543       }
1544       // We arrive here when parsing function-try blocks.
1545       if (Style.BraceWrapping.AfterFunction)
1546         addUnwrappedLine();
1547       parseTryCatch();
1548       return;
1549     case tok::identifier: {
1550       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1551           Line->MustBeDeclaration) {
1552         addUnwrappedLine();
1553         parseCSharpGenericTypeConstraint();
1554         break;
1555       }
1556       if (FormatTok->is(TT_MacroBlockEnd)) {
1557         addUnwrappedLine();
1558         return;
1559       }
1560 
1561       // Function declarations (as opposed to function expressions) are parsed
1562       // on their own unwrapped line by continuing this loop. Function
1563       // expressions (functions that are not on their own line) must not create
1564       // a new unwrapped line, so they are special cased below.
1565       size_t TokenCount = Line->Tokens.size();
1566       if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1567           (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1568                                                      Keywords.kw_async)))) {
1569         tryToParseJSFunction();
1570         break;
1571       }
1572       if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1573           FormatTok->is(Keywords.kw_interface)) {
1574         if (Style.isJavaScript()) {
1575           // In JavaScript/TypeScript, "interface" can be used as a standalone
1576           // identifier, e.g. in `var interface = 1;`. If "interface" is
1577           // followed by another identifier, it is very like to be an actual
1578           // interface declaration.
1579           unsigned StoredPosition = Tokens->getPosition();
1580           FormatToken *Next = Tokens->getNextToken();
1581           FormatTok = Tokens->setPosition(StoredPosition);
1582           if (!mustBeJSIdent(Keywords, Next)) {
1583             nextToken();
1584             break;
1585           }
1586         }
1587         parseRecord();
1588         addUnwrappedLine();
1589         return;
1590       }
1591 
1592       if (FormatTok->is(Keywords.kw_interface)) {
1593         if (parseStructLike()) {
1594           return;
1595         }
1596         break;
1597       }
1598 
1599       if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1600         parseStatementMacro();
1601         return;
1602       }
1603 
1604       // See if the following token should start a new unwrapped line.
1605       StringRef Text = FormatTok->TokenText;
1606       nextToken();
1607 
1608       // JS doesn't have macros, and within classes colons indicate fields, not
1609       // labels.
1610       if (Style.isJavaScript())
1611         break;
1612 
1613       TokenCount = Line->Tokens.size();
1614       if (TokenCount == 1 ||
1615           (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1616         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1617           Line->Tokens.begin()->Tok->MustBreakBefore = true;
1618           parseLabel(!Style.IndentGotoLabels);
1619           return;
1620         }
1621         // Recognize function-like macro usages without trailing semicolon as
1622         // well as free-standing macros like Q_OBJECT.
1623         bool FunctionLike = FormatTok->is(tok::l_paren);
1624         if (FunctionLike)
1625           parseParens();
1626 
1627         bool FollowedByNewline =
1628             CommentsBeforeNextToken.empty()
1629                 ? FormatTok->NewlinesBefore > 0
1630                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1631 
1632         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1633             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1634           addUnwrappedLine();
1635           return;
1636         }
1637       }
1638       break;
1639     }
1640     case tok::equal:
1641       if ((Style.isJavaScript() || Style.isCSharp()) &&
1642           FormatTok->is(TT_FatArrow)) {
1643         tryToParseChildBlock();
1644         break;
1645       }
1646 
1647       nextToken();
1648       if (FormatTok->Tok.is(tok::l_brace)) {
1649         // Block kind should probably be set to BK_BracedInit for any language.
1650         // C# needs this change to ensure that array initialisers and object
1651         // initialisers are indented the same way.
1652         if (Style.isCSharp())
1653           FormatTok->setBlockKind(BK_BracedInit);
1654         nextToken();
1655         parseBracedList();
1656       } else if (Style.Language == FormatStyle::LK_Proto &&
1657                  FormatTok->Tok.is(tok::less)) {
1658         nextToken();
1659         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1660                         /*ClosingBraceKind=*/tok::greater);
1661       }
1662       break;
1663     case tok::l_square:
1664       parseSquare();
1665       break;
1666     case tok::kw_new:
1667       parseNew();
1668       break;
1669     default:
1670       nextToken();
1671       break;
1672     }
1673   } while (!eof());
1674 }
1675 
1676 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1677   assert(FormatTok->is(tok::l_brace));
1678   if (!Style.isCSharp())
1679     return false;
1680   // See if it's a property accessor.
1681   if (FormatTok->Previous->isNot(tok::identifier))
1682     return false;
1683 
1684   // See if we are inside a property accessor.
1685   //
1686   // Record the current tokenPosition so that we can advance and
1687   // reset the current token. `Next` is not set yet so we need
1688   // another way to advance along the token stream.
1689   unsigned int StoredPosition = Tokens->getPosition();
1690   FormatToken *Tok = Tokens->getNextToken();
1691 
1692   // A trivial property accessor is of the form:
1693   // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1694   // Track these as they do not require line breaks to be introduced.
1695   bool HasGetOrSet = false;
1696   bool IsTrivialPropertyAccessor = true;
1697   while (!eof()) {
1698     if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1699                      tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1700                      Keywords.kw_set)) {
1701       if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1702         HasGetOrSet = true;
1703       Tok = Tokens->getNextToken();
1704       continue;
1705     }
1706     if (Tok->isNot(tok::r_brace))
1707       IsTrivialPropertyAccessor = false;
1708     break;
1709   }
1710 
1711   if (!HasGetOrSet) {
1712     Tokens->setPosition(StoredPosition);
1713     return false;
1714   }
1715 
1716   // Try to parse the property accessor:
1717   // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1718   Tokens->setPosition(StoredPosition);
1719   if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
1720     addUnwrappedLine();
1721   nextToken();
1722   do {
1723     switch (FormatTok->Tok.getKind()) {
1724     case tok::r_brace:
1725       nextToken();
1726       if (FormatTok->is(tok::equal)) {
1727         while (!eof() && FormatTok->isNot(tok::semi))
1728           nextToken();
1729         nextToken();
1730       }
1731       addUnwrappedLine();
1732       return true;
1733     case tok::l_brace:
1734       ++Line->Level;
1735       parseBlock(/*MustBeDeclaration=*/true);
1736       addUnwrappedLine();
1737       --Line->Level;
1738       break;
1739     case tok::equal:
1740       if (FormatTok->is(TT_FatArrow)) {
1741         ++Line->Level;
1742         do {
1743           nextToken();
1744         } while (!eof() && FormatTok->isNot(tok::semi));
1745         nextToken();
1746         addUnwrappedLine();
1747         --Line->Level;
1748         break;
1749       }
1750       nextToken();
1751       break;
1752     default:
1753       if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1754           !IsTrivialPropertyAccessor) {
1755         // Non-trivial get/set needs to be on its own line.
1756         addUnwrappedLine();
1757       }
1758       nextToken();
1759     }
1760   } while (!eof());
1761 
1762   // Unreachable for well-formed code (paired '{' and '}').
1763   return true;
1764 }
1765 
1766 bool UnwrappedLineParser::tryToParseLambda() {
1767   if (!Style.isCpp()) {
1768     nextToken();
1769     return false;
1770   }
1771   assert(FormatTok->is(tok::l_square));
1772   FormatToken &LSquare = *FormatTok;
1773   if (!tryToParseLambdaIntroducer())
1774     return false;
1775 
1776   bool SeenArrow = false;
1777 
1778   while (FormatTok->isNot(tok::l_brace)) {
1779     if (FormatTok->isSimpleTypeSpecifier()) {
1780       nextToken();
1781       continue;
1782     }
1783     switch (FormatTok->Tok.getKind()) {
1784     case tok::l_brace:
1785       break;
1786     case tok::l_paren:
1787       parseParens();
1788       break;
1789     case tok::l_square:
1790       parseSquare();
1791       break;
1792     case tok::amp:
1793     case tok::star:
1794     case tok::kw_const:
1795     case tok::comma:
1796     case tok::less:
1797     case tok::greater:
1798     case tok::identifier:
1799     case tok::numeric_constant:
1800     case tok::coloncolon:
1801     case tok::kw_class:
1802     case tok::kw_mutable:
1803     case tok::kw_noexcept:
1804     case tok::kw_template:
1805     case tok::kw_typename:
1806       nextToken();
1807       break;
1808     // Specialization of a template with an integer parameter can contain
1809     // arithmetic, logical, comparison and ternary operators.
1810     //
1811     // FIXME: This also accepts sequences of operators that are not in the scope
1812     // of a template argument list.
1813     //
1814     // In a C++ lambda a template type can only occur after an arrow. We use
1815     // this as an heuristic to distinguish between Objective-C expressions
1816     // followed by an `a->b` expression, such as:
1817     // ([obj func:arg] + a->b)
1818     // Otherwise the code below would parse as a lambda.
1819     //
1820     // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1821     // explicit template lists: []<bool b = true && false>(U &&u){}
1822     case tok::plus:
1823     case tok::minus:
1824     case tok::exclaim:
1825     case tok::tilde:
1826     case tok::slash:
1827     case tok::percent:
1828     case tok::lessless:
1829     case tok::pipe:
1830     case tok::pipepipe:
1831     case tok::ampamp:
1832     case tok::caret:
1833     case tok::equalequal:
1834     case tok::exclaimequal:
1835     case tok::greaterequal:
1836     case tok::lessequal:
1837     case tok::question:
1838     case tok::colon:
1839     case tok::ellipsis:
1840     case tok::kw_true:
1841     case tok::kw_false:
1842       if (SeenArrow) {
1843         nextToken();
1844         break;
1845       }
1846       return true;
1847     case tok::arrow:
1848       // This might or might not actually be a lambda arrow (this could be an
1849       // ObjC method invocation followed by a dereferencing arrow). We might
1850       // reset this back to TT_Unknown in TokenAnnotator.
1851       FormatTok->setType(TT_LambdaArrow);
1852       SeenArrow = true;
1853       nextToken();
1854       break;
1855     default:
1856       return true;
1857     }
1858   }
1859   FormatTok->setType(TT_LambdaLBrace);
1860   LSquare.setType(TT_LambdaLSquare);
1861   parseChildBlock();
1862   return true;
1863 }
1864 
1865 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1866   const FormatToken *Previous = FormatTok->Previous;
1867   if (Previous &&
1868       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1869                          tok::kw_delete, tok::l_square) ||
1870        FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1871        Previous->isSimpleTypeSpecifier())) {
1872     nextToken();
1873     return false;
1874   }
1875   nextToken();
1876   if (FormatTok->is(tok::l_square)) {
1877     return false;
1878   }
1879   parseSquare(/*LambdaIntroducer=*/true);
1880   return true;
1881 }
1882 
1883 void UnwrappedLineParser::tryToParseJSFunction() {
1884   assert(FormatTok->is(Keywords.kw_function) ||
1885          FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1886   if (FormatTok->is(Keywords.kw_async))
1887     nextToken();
1888   // Consume "function".
1889   nextToken();
1890 
1891   // Consume * (generator function). Treat it like C++'s overloaded operators.
1892   if (FormatTok->is(tok::star)) {
1893     FormatTok->setType(TT_OverloadedOperator);
1894     nextToken();
1895   }
1896 
1897   // Consume function name.
1898   if (FormatTok->is(tok::identifier))
1899     nextToken();
1900 
1901   if (FormatTok->isNot(tok::l_paren))
1902     return;
1903 
1904   // Parse formal parameter list.
1905   parseParens();
1906 
1907   if (FormatTok->is(tok::colon)) {
1908     // Parse a type definition.
1909     nextToken();
1910 
1911     // Eat the type declaration. For braced inline object types, balance braces,
1912     // otherwise just parse until finding an l_brace for the function body.
1913     if (FormatTok->is(tok::l_brace))
1914       tryToParseBracedList();
1915     else
1916       while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1917         nextToken();
1918   }
1919 
1920   if (FormatTok->is(tok::semi))
1921     return;
1922 
1923   parseChildBlock();
1924 }
1925 
1926 bool UnwrappedLineParser::tryToParseBracedList() {
1927   if (FormatTok->is(BK_Unknown))
1928     calculateBraceTypes();
1929   assert(FormatTok->isNot(BK_Unknown));
1930   if (FormatTok->is(BK_Block))
1931     return false;
1932   nextToken();
1933   parseBracedList();
1934   return true;
1935 }
1936 
1937 bool UnwrappedLineParser::tryToParseChildBlock() {
1938   assert(Style.isJavaScript() || Style.isCSharp());
1939   assert(FormatTok->is(TT_FatArrow));
1940   // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
1941   // They always start an expression or a child block if followed by a curly
1942   // brace.
1943   nextToken();
1944   if (FormatTok->isNot(tok::l_brace))
1945     return false;
1946   parseChildBlock();
1947   return true;
1948 }
1949 
1950 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1951                                           bool IsEnum,
1952                                           tok::TokenKind ClosingBraceKind) {
1953   bool HasError = false;
1954 
1955   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1956   // replace this by using parseAssignmentExpression() inside.
1957   do {
1958     if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
1959         tryToParseChildBlock())
1960       continue;
1961     if (Style.isJavaScript()) {
1962       if (FormatTok->is(Keywords.kw_function) ||
1963           FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1964         tryToParseJSFunction();
1965         continue;
1966       }
1967       if (FormatTok->is(tok::l_brace)) {
1968         // Could be a method inside of a braced list `{a() { return 1; }}`.
1969         if (tryToParseBracedList())
1970           continue;
1971         parseChildBlock();
1972       }
1973     }
1974     if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1975       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1976         addUnwrappedLine();
1977       nextToken();
1978       return !HasError;
1979     }
1980     switch (FormatTok->Tok.getKind()) {
1981     case tok::l_square:
1982       if (Style.isCSharp())
1983         parseSquare();
1984       else
1985         tryToParseLambda();
1986       break;
1987     case tok::l_paren:
1988       parseParens();
1989       // JavaScript can just have free standing methods and getters/setters in
1990       // object literals. Detect them by a "{" following ")".
1991       if (Style.isJavaScript()) {
1992         if (FormatTok->is(tok::l_brace))
1993           parseChildBlock();
1994         break;
1995       }
1996       break;
1997     case tok::l_brace:
1998       // Assume there are no blocks inside a braced init list apart
1999       // from the ones we explicitly parse out (like lambdas).
2000       FormatTok->setBlockKind(BK_BracedInit);
2001       nextToken();
2002       parseBracedList();
2003       break;
2004     case tok::less:
2005       if (Style.Language == FormatStyle::LK_Proto) {
2006         nextToken();
2007         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2008                         /*ClosingBraceKind=*/tok::greater);
2009       } else {
2010         nextToken();
2011       }
2012       break;
2013     case tok::semi:
2014       // JavaScript (or more precisely TypeScript) can have semicolons in braced
2015       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2016       // used for error recovery if we have otherwise determined that this is
2017       // a braced list.
2018       if (Style.isJavaScript()) {
2019         nextToken();
2020         break;
2021       }
2022       HasError = true;
2023       if (!ContinueOnSemicolons)
2024         return !HasError;
2025       nextToken();
2026       break;
2027     case tok::comma:
2028       nextToken();
2029       if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2030         addUnwrappedLine();
2031       break;
2032     default:
2033       nextToken();
2034       break;
2035     }
2036   } while (!eof());
2037   return false;
2038 }
2039 
2040 void UnwrappedLineParser::parseParens() {
2041   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
2042   nextToken();
2043   do {
2044     switch (FormatTok->Tok.getKind()) {
2045     case tok::l_paren:
2046       parseParens();
2047       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2048         parseChildBlock();
2049       break;
2050     case tok::r_paren:
2051       nextToken();
2052       return;
2053     case tok::r_brace:
2054       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2055       return;
2056     case tok::l_square:
2057       tryToParseLambda();
2058       break;
2059     case tok::l_brace:
2060       if (!tryToParseBracedList())
2061         parseChildBlock();
2062       break;
2063     case tok::at:
2064       nextToken();
2065       if (FormatTok->Tok.is(tok::l_brace)) {
2066         nextToken();
2067         parseBracedList();
2068       }
2069       break;
2070     case tok::equal:
2071       if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2072         tryToParseChildBlock();
2073       else
2074         nextToken();
2075       break;
2076     case tok::kw_class:
2077       if (Style.isJavaScript())
2078         parseRecord(/*ParseAsExpr=*/true);
2079       else
2080         nextToken();
2081       break;
2082     case tok::identifier:
2083       if (Style.isJavaScript() &&
2084           (FormatTok->is(Keywords.kw_function) ||
2085            FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2086         tryToParseJSFunction();
2087       else
2088         nextToken();
2089       break;
2090     default:
2091       nextToken();
2092       break;
2093     }
2094   } while (!eof());
2095 }
2096 
2097 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2098   if (!LambdaIntroducer) {
2099     assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2100     if (tryToParseLambda())
2101       return;
2102   }
2103   do {
2104     switch (FormatTok->Tok.getKind()) {
2105     case tok::l_paren:
2106       parseParens();
2107       break;
2108     case tok::r_square:
2109       nextToken();
2110       return;
2111     case tok::r_brace:
2112       // A "}" inside parenthesis is an error if there wasn't a matching "{".
2113       return;
2114     case tok::l_square:
2115       parseSquare();
2116       break;
2117     case tok::l_brace: {
2118       if (!tryToParseBracedList())
2119         parseChildBlock();
2120       break;
2121     }
2122     case tok::at:
2123       nextToken();
2124       if (FormatTok->Tok.is(tok::l_brace)) {
2125         nextToken();
2126         parseBracedList();
2127       }
2128       break;
2129     default:
2130       nextToken();
2131       break;
2132     }
2133   } while (!eof());
2134 }
2135 
2136 void UnwrappedLineParser::parseIfThenElse() {
2137   auto HandleAttributes = [this]() {
2138     // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2139     if (FormatTok->is(TT_AttributeMacro))
2140       nextToken();
2141     // Handle [[likely]] / [[unlikely]] attributes.
2142     if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2143       parseSquare();
2144   };
2145 
2146   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2147   nextToken();
2148   if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2149     nextToken();
2150   if (FormatTok->Tok.is(tok::l_paren))
2151     parseParens();
2152   HandleAttributes();
2153   bool NeedsUnwrappedLine = false;
2154   if (FormatTok->Tok.is(tok::l_brace)) {
2155     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2156     parseBlock();
2157     if (Style.BraceWrapping.BeforeElse)
2158       addUnwrappedLine();
2159     else
2160       NeedsUnwrappedLine = true;
2161   } else {
2162     addUnwrappedLine();
2163     ++Line->Level;
2164     parseStructuralElement();
2165     --Line->Level;
2166   }
2167   if (FormatTok->Tok.is(tok::kw_else)) {
2168     nextToken();
2169     HandleAttributes();
2170     if (FormatTok->Tok.is(tok::l_brace)) {
2171       CompoundStatementIndenter Indenter(this, Style, Line->Level);
2172       parseBlock();
2173       addUnwrappedLine();
2174     } else if (FormatTok->Tok.is(tok::kw_if)) {
2175       FormatToken *Previous = Tokens->getPreviousToken();
2176       bool PrecededByComment = Previous && Previous->is(tok::comment);
2177       if (PrecededByComment) {
2178         addUnwrappedLine();
2179         ++Line->Level;
2180       }
2181       parseIfThenElse();
2182       if (PrecededByComment)
2183         --Line->Level;
2184     } else {
2185       addUnwrappedLine();
2186       ++Line->Level;
2187       parseStructuralElement();
2188       if (FormatTok->is(tok::eof))
2189         addUnwrappedLine();
2190       --Line->Level;
2191     }
2192   } else if (NeedsUnwrappedLine) {
2193     addUnwrappedLine();
2194   }
2195 }
2196 
2197 void UnwrappedLineParser::parseTryCatch() {
2198   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2199   nextToken();
2200   bool NeedsUnwrappedLine = false;
2201   if (FormatTok->is(tok::colon)) {
2202     // We are in a function try block, what comes is an initializer list.
2203     nextToken();
2204 
2205     // In case identifiers were removed by clang-tidy, what might follow is
2206     // multiple commas in sequence - before the first identifier.
2207     while (FormatTok->is(tok::comma))
2208       nextToken();
2209 
2210     while (FormatTok->is(tok::identifier)) {
2211       nextToken();
2212       if (FormatTok->is(tok::l_paren))
2213         parseParens();
2214       if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2215           FormatTok->is(tok::l_brace)) {
2216         do {
2217           nextToken();
2218         } while (!FormatTok->is(tok::r_brace));
2219         nextToken();
2220       }
2221 
2222       // In case identifiers were removed by clang-tidy, what might follow is
2223       // multiple commas in sequence - after the first identifier.
2224       while (FormatTok->is(tok::comma))
2225         nextToken();
2226     }
2227   }
2228   // Parse try with resource.
2229   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2230     parseParens();
2231   }
2232   if (FormatTok->is(tok::l_brace)) {
2233     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2234     parseBlock();
2235     if (Style.BraceWrapping.BeforeCatch) {
2236       addUnwrappedLine();
2237     } else {
2238       NeedsUnwrappedLine = true;
2239     }
2240   } else if (!FormatTok->is(tok::kw_catch)) {
2241     // The C++ standard requires a compound-statement after a try.
2242     // If there's none, we try to assume there's a structuralElement
2243     // and try to continue.
2244     addUnwrappedLine();
2245     ++Line->Level;
2246     parseStructuralElement();
2247     --Line->Level;
2248   }
2249   while (1) {
2250     if (FormatTok->is(tok::at))
2251       nextToken();
2252     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2253                              tok::kw___finally) ||
2254           ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2255            FormatTok->is(Keywords.kw_finally)) ||
2256           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2257            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2258       break;
2259     nextToken();
2260     while (FormatTok->isNot(tok::l_brace)) {
2261       if (FormatTok->is(tok::l_paren)) {
2262         parseParens();
2263         continue;
2264       }
2265       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2266         return;
2267       nextToken();
2268     }
2269     NeedsUnwrappedLine = false;
2270     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2271     parseBlock();
2272     if (Style.BraceWrapping.BeforeCatch)
2273       addUnwrappedLine();
2274     else
2275       NeedsUnwrappedLine = true;
2276   }
2277   if (NeedsUnwrappedLine)
2278     addUnwrappedLine();
2279 }
2280 
2281 void UnwrappedLineParser::parseNamespace() {
2282   assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2283          "'namespace' expected");
2284 
2285   const FormatToken &InitialToken = *FormatTok;
2286   nextToken();
2287   if (InitialToken.is(TT_NamespaceMacro)) {
2288     parseParens();
2289   } else {
2290     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2291                               tok::l_square, tok::period)) {
2292       if (FormatTok->is(tok::l_square))
2293         parseSquare();
2294       else
2295         nextToken();
2296     }
2297   }
2298   if (FormatTok->Tok.is(tok::l_brace)) {
2299     if (ShouldBreakBeforeBrace(Style, InitialToken))
2300       addUnwrappedLine();
2301 
2302     unsigned AddLevels =
2303         Style.NamespaceIndentation == FormatStyle::NI_All ||
2304                 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2305                  DeclarationScopeStack.size() > 1)
2306             ? 1u
2307             : 0u;
2308     bool ManageWhitesmithsBraces =
2309         AddLevels == 0u &&
2310         Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2311 
2312     // If we're in Whitesmiths mode, indent the brace if we're not indenting
2313     // the whole block.
2314     if (ManageWhitesmithsBraces)
2315       ++Line->Level;
2316 
2317     parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2318                /*MunchSemi=*/true,
2319                /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2320 
2321     // Munch the semicolon after a namespace. This is more common than one would
2322     // think. Putting the semicolon into its own line is very ugly.
2323     if (FormatTok->Tok.is(tok::semi))
2324       nextToken();
2325 
2326     addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2327 
2328     if (ManageWhitesmithsBraces)
2329       --Line->Level;
2330   }
2331   // FIXME: Add error handling.
2332 }
2333 
2334 void UnwrappedLineParser::parseNew() {
2335   assert(FormatTok->is(tok::kw_new) && "'new' expected");
2336   nextToken();
2337 
2338   if (Style.isCSharp()) {
2339     do {
2340       if (FormatTok->is(tok::l_brace))
2341         parseBracedList();
2342 
2343       if (FormatTok->isOneOf(tok::semi, tok::comma))
2344         return;
2345 
2346       nextToken();
2347     } while (!eof());
2348   }
2349 
2350   if (Style.Language != FormatStyle::LK_Java)
2351     return;
2352 
2353   // In Java, we can parse everything up to the parens, which aren't optional.
2354   do {
2355     // There should not be a ;, { or } before the new's open paren.
2356     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2357       return;
2358 
2359     // Consume the parens.
2360     if (FormatTok->is(tok::l_paren)) {
2361       parseParens();
2362 
2363       // If there is a class body of an anonymous class, consume that as child.
2364       if (FormatTok->is(tok::l_brace))
2365         parseChildBlock();
2366       return;
2367     }
2368     nextToken();
2369   } while (!eof());
2370 }
2371 
2372 void UnwrappedLineParser::parseForOrWhileLoop() {
2373   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2374          "'for', 'while' or foreach macro expected");
2375   nextToken();
2376   // JS' for await ( ...
2377   if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2378     nextToken();
2379   if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2380     nextToken();
2381   if (FormatTok->Tok.is(tok::l_paren))
2382     parseParens();
2383   if (FormatTok->Tok.is(tok::l_brace)) {
2384     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2385     parseBlock();
2386     addUnwrappedLine();
2387   } else {
2388     addUnwrappedLine();
2389     ++Line->Level;
2390     parseStructuralElement();
2391     --Line->Level;
2392   }
2393 }
2394 
2395 void UnwrappedLineParser::parseDoWhile() {
2396   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2397   nextToken();
2398   if (FormatTok->Tok.is(tok::l_brace)) {
2399     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2400     parseBlock();
2401     if (Style.BraceWrapping.BeforeWhile)
2402       addUnwrappedLine();
2403   } else {
2404     addUnwrappedLine();
2405     ++Line->Level;
2406     parseStructuralElement();
2407     --Line->Level;
2408   }
2409 
2410   // FIXME: Add error handling.
2411   if (!FormatTok->Tok.is(tok::kw_while)) {
2412     addUnwrappedLine();
2413     return;
2414   }
2415 
2416   // If in Whitesmiths mode, the line with the while() needs to be indented
2417   // to the same level as the block.
2418   if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2419     ++Line->Level;
2420 
2421   nextToken();
2422   parseStructuralElement();
2423 }
2424 
2425 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2426   nextToken();
2427   unsigned OldLineLevel = Line->Level;
2428   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2429     --Line->Level;
2430   if (LeftAlignLabel)
2431     Line->Level = 0;
2432 
2433   if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2434       FormatTok->Tok.is(tok::l_brace)) {
2435 
2436     CompoundStatementIndenter Indenter(this, Line->Level,
2437                                        Style.BraceWrapping.AfterCaseLabel,
2438                                        Style.BraceWrapping.IndentBraces);
2439     parseBlock();
2440     if (FormatTok->Tok.is(tok::kw_break)) {
2441       if (Style.BraceWrapping.AfterControlStatement ==
2442           FormatStyle::BWACS_Always) {
2443         addUnwrappedLine();
2444         if (!Style.IndentCaseBlocks &&
2445             Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2446           Line->Level++;
2447         }
2448       }
2449       parseStructuralElement();
2450     }
2451     addUnwrappedLine();
2452   } else {
2453     if (FormatTok->is(tok::semi))
2454       nextToken();
2455     addUnwrappedLine();
2456   }
2457   Line->Level = OldLineLevel;
2458   if (FormatTok->isNot(tok::l_brace)) {
2459     parseStructuralElement();
2460     addUnwrappedLine();
2461   }
2462 }
2463 
2464 void UnwrappedLineParser::parseCaseLabel() {
2465   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2466 
2467   // FIXME: fix handling of complex expressions here.
2468   do {
2469     nextToken();
2470   } while (!eof() && !FormatTok->Tok.is(tok::colon));
2471   parseLabel();
2472 }
2473 
2474 void UnwrappedLineParser::parseSwitch() {
2475   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2476   nextToken();
2477   if (FormatTok->Tok.is(tok::l_paren))
2478     parseParens();
2479   if (FormatTok->Tok.is(tok::l_brace)) {
2480     CompoundStatementIndenter Indenter(this, Style, Line->Level);
2481     parseBlock();
2482     addUnwrappedLine();
2483   } else {
2484     addUnwrappedLine();
2485     ++Line->Level;
2486     parseStructuralElement();
2487     --Line->Level;
2488   }
2489 }
2490 
2491 void UnwrappedLineParser::parseAccessSpecifier() {
2492   nextToken();
2493   // Understand Qt's slots.
2494   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2495     nextToken();
2496   // Otherwise, we don't know what it is, and we'd better keep the next token.
2497   if (FormatTok->Tok.is(tok::colon))
2498     nextToken();
2499   addUnwrappedLine();
2500 }
2501 
2502 void UnwrappedLineParser::parseConcept() {
2503   assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2504   nextToken();
2505   if (!FormatTok->Tok.is(tok::identifier))
2506     return;
2507   nextToken();
2508   if (!FormatTok->Tok.is(tok::equal))
2509     return;
2510   nextToken();
2511   if (FormatTok->Tok.is(tok::kw_requires)) {
2512     nextToken();
2513     parseRequiresExpression(Line->Level);
2514   } else {
2515     parseConstraintExpression(Line->Level);
2516   }
2517 }
2518 
2519 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2520   // requires (R range)
2521   if (FormatTok->Tok.is(tok::l_paren)) {
2522     parseParens();
2523     if (Style.IndentRequires && OriginalLevel != Line->Level) {
2524       addUnwrappedLine();
2525       --Line->Level;
2526     }
2527   }
2528 
2529   if (FormatTok->Tok.is(tok::l_brace)) {
2530     if (Style.BraceWrapping.AfterFunction)
2531       addUnwrappedLine();
2532     FormatTok->setType(TT_FunctionLBrace);
2533     parseBlock();
2534     addUnwrappedLine();
2535   } else {
2536     parseConstraintExpression(OriginalLevel);
2537   }
2538 }
2539 
2540 void UnwrappedLineParser::parseConstraintExpression(
2541     unsigned int OriginalLevel) {
2542   // requires Id<T> && Id<T> || Id<T>
2543   while (
2544       FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2545     nextToken();
2546     while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2547                               tok::greater, tok::comma, tok::ellipsis)) {
2548       if (FormatTok->Tok.is(tok::less)) {
2549         parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2550                         /*ClosingBraceKind=*/tok::greater);
2551         continue;
2552       }
2553       nextToken();
2554     }
2555     if (FormatTok->Tok.is(tok::kw_requires)) {
2556       parseRequiresExpression(OriginalLevel);
2557     }
2558     if (FormatTok->Tok.is(tok::less)) {
2559       parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2560                       /*ClosingBraceKind=*/tok::greater);
2561     }
2562 
2563     if (FormatTok->Tok.is(tok::l_paren)) {
2564       parseParens();
2565     }
2566     if (FormatTok->Tok.is(tok::l_brace)) {
2567       if (Style.BraceWrapping.AfterFunction)
2568         addUnwrappedLine();
2569       FormatTok->setType(TT_FunctionLBrace);
2570       parseBlock();
2571     }
2572     if (FormatTok->Tok.is(tok::semi)) {
2573       // Eat any trailing semi.
2574       nextToken();
2575       addUnwrappedLine();
2576     }
2577     if (FormatTok->Tok.is(tok::colon)) {
2578       return;
2579     }
2580     if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2581       if (FormatTok->Previous &&
2582           !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2583                                         tok::coloncolon)) {
2584         addUnwrappedLine();
2585       }
2586       if (Style.IndentRequires && OriginalLevel != Line->Level) {
2587         --Line->Level;
2588       }
2589       break;
2590     } else {
2591       FormatTok->setType(TT_ConstraintJunctions);
2592     }
2593 
2594     nextToken();
2595   }
2596 }
2597 
2598 void UnwrappedLineParser::parseRequires() {
2599   assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2600 
2601   unsigned OriginalLevel = Line->Level;
2602   if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2603     addUnwrappedLine();
2604     if (Style.IndentRequires) {
2605       Line->Level++;
2606     }
2607   }
2608   nextToken();
2609 
2610   parseRequiresExpression(OriginalLevel);
2611 }
2612 
2613 bool UnwrappedLineParser::parseEnum() {
2614   const FormatToken &InitialToken = *FormatTok;
2615 
2616   // Won't be 'enum' for NS_ENUMs.
2617   if (FormatTok->Tok.is(tok::kw_enum))
2618     nextToken();
2619 
2620   // In TypeScript, "enum" can also be used as property name, e.g. in interface
2621   // declarations. An "enum" keyword followed by a colon would be a syntax
2622   // error and thus assume it is just an identifier.
2623   if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
2624     return false;
2625 
2626   // In protobuf, "enum" can be used as a field name.
2627   if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2628     return false;
2629 
2630   // Eat up enum class ...
2631   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2632     nextToken();
2633 
2634   while (FormatTok->Tok.getIdentifierInfo() ||
2635          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2636                             tok::greater, tok::comma, tok::question)) {
2637     nextToken();
2638     // We can have macros or attributes in between 'enum' and the enum name.
2639     if (FormatTok->is(tok::l_paren))
2640       parseParens();
2641     if (FormatTok->is(tok::identifier)) {
2642       nextToken();
2643       // If there are two identifiers in a row, this is likely an elaborate
2644       // return type. In Java, this can be "implements", etc.
2645       if (Style.isCpp() && FormatTok->is(tok::identifier))
2646         return false;
2647     }
2648   }
2649 
2650   // Just a declaration or something is wrong.
2651   if (FormatTok->isNot(tok::l_brace))
2652     return true;
2653   FormatTok->setBlockKind(BK_Block);
2654 
2655   if (Style.Language == FormatStyle::LK_Java) {
2656     // Java enums are different.
2657     parseJavaEnumBody();
2658     return true;
2659   }
2660   if (Style.Language == FormatStyle::LK_Proto) {
2661     parseBlock(/*MustBeDeclaration=*/true);
2662     return true;
2663   }
2664 
2665   if (!Style.AllowShortEnumsOnASingleLine &&
2666       ShouldBreakBeforeBrace(Style, InitialToken))
2667     addUnwrappedLine();
2668   // Parse enum body.
2669   nextToken();
2670   if (!Style.AllowShortEnumsOnASingleLine) {
2671     addUnwrappedLine();
2672     Line->Level += 1;
2673   }
2674   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2675                                    /*IsEnum=*/true);
2676   if (!Style.AllowShortEnumsOnASingleLine)
2677     Line->Level -= 1;
2678   if (HasError) {
2679     if (FormatTok->is(tok::semi))
2680       nextToken();
2681     addUnwrappedLine();
2682   }
2683   return true;
2684 
2685   // There is no addUnwrappedLine() here so that we fall through to parsing a
2686   // structural element afterwards. Thus, in "enum A {} n, m;",
2687   // "} n, m;" will end up in one unwrapped line.
2688 }
2689 
2690 bool UnwrappedLineParser::parseStructLike() {
2691   // parseRecord falls through and does not yet add an unwrapped line as a
2692   // record declaration or definition can start a structural element.
2693   parseRecord();
2694   // This does not apply to Java, JavaScript and C#.
2695   if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
2696       Style.isCSharp()) {
2697     if (FormatTok->is(tok::semi))
2698       nextToken();
2699     addUnwrappedLine();
2700     return true;
2701   }
2702   return false;
2703 }
2704 
2705 namespace {
2706 // A class used to set and restore the Token position when peeking
2707 // ahead in the token source.
2708 class ScopedTokenPosition {
2709   unsigned StoredPosition;
2710   FormatTokenSource *Tokens;
2711 
2712 public:
2713   ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2714     assert(Tokens && "Tokens expected to not be null");
2715     StoredPosition = Tokens->getPosition();
2716   }
2717 
2718   ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2719 };
2720 } // namespace
2721 
2722 // Look to see if we have [[ by looking ahead, if
2723 // its not then rewind to the original position.
2724 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2725   ScopedTokenPosition AutoPosition(Tokens);
2726   FormatToken *Tok = Tokens->getNextToken();
2727   // We already read the first [ check for the second.
2728   if (!Tok->is(tok::l_square)) {
2729     return false;
2730   }
2731   // Double check that the attribute is just something
2732   // fairly simple.
2733   while (Tok->isNot(tok::eof)) {
2734     if (Tok->is(tok::r_square)) {
2735       break;
2736     }
2737     Tok = Tokens->getNextToken();
2738   }
2739   if (Tok->is(tok::eof))
2740     return false;
2741   Tok = Tokens->getNextToken();
2742   if (!Tok->is(tok::r_square)) {
2743     return false;
2744   }
2745   Tok = Tokens->getNextToken();
2746   if (Tok->is(tok::semi)) {
2747     return false;
2748   }
2749   return true;
2750 }
2751 
2752 void UnwrappedLineParser::parseJavaEnumBody() {
2753   // Determine whether the enum is simple, i.e. does not have a semicolon or
2754   // constants with class bodies. Simple enums can be formatted like braced
2755   // lists, contracted to a single line, etc.
2756   unsigned StoredPosition = Tokens->getPosition();
2757   bool IsSimple = true;
2758   FormatToken *Tok = Tokens->getNextToken();
2759   while (!Tok->is(tok::eof)) {
2760     if (Tok->is(tok::r_brace))
2761       break;
2762     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2763       IsSimple = false;
2764       break;
2765     }
2766     // FIXME: This will also mark enums with braces in the arguments to enum
2767     // constants as "not simple". This is probably fine in practice, though.
2768     Tok = Tokens->getNextToken();
2769   }
2770   FormatTok = Tokens->setPosition(StoredPosition);
2771 
2772   if (IsSimple) {
2773     nextToken();
2774     parseBracedList();
2775     addUnwrappedLine();
2776     return;
2777   }
2778 
2779   // Parse the body of a more complex enum.
2780   // First add a line for everything up to the "{".
2781   nextToken();
2782   addUnwrappedLine();
2783   ++Line->Level;
2784 
2785   // Parse the enum constants.
2786   while (FormatTok) {
2787     if (FormatTok->is(tok::l_brace)) {
2788       // Parse the constant's class body.
2789       parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2790                  /*MunchSemi=*/false);
2791     } else if (FormatTok->is(tok::l_paren)) {
2792       parseParens();
2793     } else if (FormatTok->is(tok::comma)) {
2794       nextToken();
2795       addUnwrappedLine();
2796     } else if (FormatTok->is(tok::semi)) {
2797       nextToken();
2798       addUnwrappedLine();
2799       break;
2800     } else if (FormatTok->is(tok::r_brace)) {
2801       addUnwrappedLine();
2802       break;
2803     } else {
2804       nextToken();
2805     }
2806   }
2807 
2808   // Parse the class body after the enum's ";" if any.
2809   parseLevel(/*HasOpeningBrace=*/true);
2810   nextToken();
2811   --Line->Level;
2812   addUnwrappedLine();
2813 }
2814 
2815 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2816   const FormatToken &InitialToken = *FormatTok;
2817   nextToken();
2818 
2819   // The actual identifier can be a nested name specifier, and in macros
2820   // it is often token-pasted.
2821   // An [[attribute]] can be before the identifier.
2822   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2823                             tok::kw___attribute, tok::kw___declspec,
2824                             tok::kw_alignas, tok::l_square, tok::r_square) ||
2825          ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2826           FormatTok->isOneOf(tok::period, tok::comma))) {
2827     if (Style.isJavaScript() &&
2828         FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2829       // JavaScript/TypeScript supports inline object types in
2830       // extends/implements positions:
2831       //     class Foo implements {bar: number} { }
2832       nextToken();
2833       if (FormatTok->is(tok::l_brace)) {
2834         tryToParseBracedList();
2835         continue;
2836       }
2837     }
2838     bool IsNonMacroIdentifier =
2839         FormatTok->is(tok::identifier) &&
2840         FormatTok->TokenText != FormatTok->TokenText.upper();
2841     nextToken();
2842     // We can have macros or attributes in between 'class' and the class name.
2843     if (!IsNonMacroIdentifier) {
2844       if (FormatTok->Tok.is(tok::l_paren)) {
2845         parseParens();
2846       } else if (FormatTok->is(TT_AttributeSquare)) {
2847         parseSquare();
2848         // Consume the closing TT_AttributeSquare.
2849         if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2850           nextToken();
2851       }
2852     }
2853   }
2854 
2855   // Note that parsing away template declarations here leads to incorrectly
2856   // accepting function declarations as record declarations.
2857   // In general, we cannot solve this problem. Consider:
2858   // class A<int> B() {}
2859   // which can be a function definition or a class definition when B() is a
2860   // macro. If we find enough real-world cases where this is a problem, we
2861   // can parse for the 'template' keyword in the beginning of the statement,
2862   // and thus rule out the record production in case there is no template
2863   // (this would still leave us with an ambiguity between template function
2864   // and class declarations).
2865   if (FormatTok->isOneOf(tok::colon, tok::less)) {
2866     while (!eof()) {
2867       if (FormatTok->is(tok::l_brace)) {
2868         calculateBraceTypes(/*ExpectClassBody=*/true);
2869         if (!tryToParseBracedList())
2870           break;
2871       }
2872       if (FormatTok->Tok.is(tok::semi))
2873         return;
2874       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2875         addUnwrappedLine();
2876         nextToken();
2877         parseCSharpGenericTypeConstraint();
2878         break;
2879       }
2880       nextToken();
2881     }
2882   }
2883   if (FormatTok->Tok.is(tok::l_brace)) {
2884     if (ParseAsExpr) {
2885       parseChildBlock();
2886     } else {
2887       if (ShouldBreakBeforeBrace(Style, InitialToken))
2888         addUnwrappedLine();
2889 
2890       unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2891       parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2892     }
2893   }
2894   // There is no addUnwrappedLine() here so that we fall through to parsing a
2895   // structural element afterwards. Thus, in "class A {} n, m;",
2896   // "} n, m;" will end up in one unwrapped line.
2897 }
2898 
2899 void UnwrappedLineParser::parseObjCMethod() {
2900   assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2901          "'(' or identifier expected.");
2902   do {
2903     if (FormatTok->Tok.is(tok::semi)) {
2904       nextToken();
2905       addUnwrappedLine();
2906       return;
2907     } else if (FormatTok->Tok.is(tok::l_brace)) {
2908       if (Style.BraceWrapping.AfterFunction)
2909         addUnwrappedLine();
2910       parseBlock();
2911       addUnwrappedLine();
2912       return;
2913     } else {
2914       nextToken();
2915     }
2916   } while (!eof());
2917 }
2918 
2919 void UnwrappedLineParser::parseObjCProtocolList() {
2920   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2921   do {
2922     nextToken();
2923     // Early exit in case someone forgot a close angle.
2924     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2925         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2926       return;
2927   } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2928   nextToken(); // Skip '>'.
2929 }
2930 
2931 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2932   do {
2933     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2934       nextToken();
2935       addUnwrappedLine();
2936       break;
2937     }
2938     if (FormatTok->is(tok::l_brace)) {
2939       parseBlock();
2940       // In ObjC interfaces, nothing should be following the "}".
2941       addUnwrappedLine();
2942     } else if (FormatTok->is(tok::r_brace)) {
2943       // Ignore stray "}". parseStructuralElement doesn't consume them.
2944       nextToken();
2945       addUnwrappedLine();
2946     } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2947       nextToken();
2948       parseObjCMethod();
2949     } else {
2950       parseStructuralElement();
2951     }
2952   } while (!eof());
2953 }
2954 
2955 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2956   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2957          FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2958   nextToken();
2959   nextToken(); // interface name
2960 
2961   // @interface can be followed by a lightweight generic
2962   // specialization list, then either a base class or a category.
2963   if (FormatTok->Tok.is(tok::less)) {
2964     parseObjCLightweightGenerics();
2965   }
2966   if (FormatTok->Tok.is(tok::colon)) {
2967     nextToken();
2968     nextToken(); // base class name
2969     // The base class can also have lightweight generics applied to it.
2970     if (FormatTok->Tok.is(tok::less)) {
2971       parseObjCLightweightGenerics();
2972     }
2973   } else if (FormatTok->Tok.is(tok::l_paren))
2974     // Skip category, if present.
2975     parseParens();
2976 
2977   if (FormatTok->Tok.is(tok::less))
2978     parseObjCProtocolList();
2979 
2980   if (FormatTok->Tok.is(tok::l_brace)) {
2981     if (Style.BraceWrapping.AfterObjCDeclaration)
2982       addUnwrappedLine();
2983     parseBlock(/*MustBeDeclaration=*/true);
2984   }
2985 
2986   // With instance variables, this puts '}' on its own line.  Without instance
2987   // variables, this ends the @interface line.
2988   addUnwrappedLine();
2989 
2990   parseObjCUntilAtEnd();
2991 }
2992 
2993 void UnwrappedLineParser::parseObjCLightweightGenerics() {
2994   assert(FormatTok->Tok.is(tok::less));
2995   // Unlike protocol lists, generic parameterizations support
2996   // nested angles:
2997   //
2998   // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2999   //     NSObject <NSCopying, NSSecureCoding>
3000   //
3001   // so we need to count how many open angles we have left.
3002   unsigned NumOpenAngles = 1;
3003   do {
3004     nextToken();
3005     // Early exit in case someone forgot a close angle.
3006     if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3007         FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
3008       break;
3009     if (FormatTok->Tok.is(tok::less))
3010       ++NumOpenAngles;
3011     else if (FormatTok->Tok.is(tok::greater)) {
3012       assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3013       --NumOpenAngles;
3014     }
3015   } while (!eof() && NumOpenAngles != 0);
3016   nextToken(); // Skip '>'.
3017 }
3018 
3019 // Returns true for the declaration/definition form of @protocol,
3020 // false for the expression form.
3021 bool UnwrappedLineParser::parseObjCProtocol() {
3022   assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3023   nextToken();
3024 
3025   if (FormatTok->is(tok::l_paren))
3026     // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3027     return false;
3028 
3029   // The definition/declaration form,
3030   // @protocol Foo
3031   // - (int)someMethod;
3032   // @end
3033 
3034   nextToken(); // protocol name
3035 
3036   if (FormatTok->Tok.is(tok::less))
3037     parseObjCProtocolList();
3038 
3039   // Check for protocol declaration.
3040   if (FormatTok->Tok.is(tok::semi)) {
3041     nextToken();
3042     addUnwrappedLine();
3043     return true;
3044   }
3045 
3046   addUnwrappedLine();
3047   parseObjCUntilAtEnd();
3048   return true;
3049 }
3050 
3051 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3052   bool IsImport = FormatTok->is(Keywords.kw_import);
3053   assert(IsImport || FormatTok->is(tok::kw_export));
3054   nextToken();
3055 
3056   // Consume the "default" in "export default class/function".
3057   if (FormatTok->is(tok::kw_default))
3058     nextToken();
3059 
3060   // Consume "async function", "function" and "default function", so that these
3061   // get parsed as free-standing JS functions, i.e. do not require a trailing
3062   // semicolon.
3063   if (FormatTok->is(Keywords.kw_async))
3064     nextToken();
3065   if (FormatTok->is(Keywords.kw_function)) {
3066     nextToken();
3067     return;
3068   }
3069 
3070   // For imports, `export *`, `export {...}`, consume the rest of the line up
3071   // to the terminating `;`. For everything else, just return and continue
3072   // parsing the structural element, i.e. the declaration or expression for
3073   // `export default`.
3074   if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3075       !FormatTok->isStringLiteral())
3076     return;
3077 
3078   while (!eof()) {
3079     if (FormatTok->is(tok::semi))
3080       return;
3081     if (Line->Tokens.empty()) {
3082       // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3083       // import statement should terminate.
3084       return;
3085     }
3086     if (FormatTok->is(tok::l_brace)) {
3087       FormatTok->setBlockKind(BK_Block);
3088       nextToken();
3089       parseBracedList();
3090     } else {
3091       nextToken();
3092     }
3093   }
3094 }
3095 
3096 void UnwrappedLineParser::parseStatementMacro() {
3097   nextToken();
3098   if (FormatTok->is(tok::l_paren))
3099     parseParens();
3100   if (FormatTok->is(tok::semi))
3101     nextToken();
3102   addUnwrappedLine();
3103 }
3104 
3105 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3106                                                  StringRef Prefix = "") {
3107   llvm::dbgs() << Prefix << "Line(" << Line.Level
3108                << ", FSC=" << Line.FirstStartColumn << ")"
3109                << (Line.InPPDirective ? " MACRO" : "") << ": ";
3110   for (const auto &Node : Line.Tokens) {
3111     llvm::dbgs() << Node.Tok->Tok.getName() << "["
3112                  << "T=" << static_cast<unsigned>(Node.Tok->getType())
3113                  << ", OC=" << Node.Tok->OriginalColumn << "] ";
3114   }
3115   for (const auto &Node : Line.Tokens)
3116     for (const auto &ChildNode : Node.Children)
3117       printDebugInfo(ChildNode, "\nChild: ");
3118 
3119   llvm::dbgs() << "\n";
3120 }
3121 
3122 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3123   if (Line->Tokens.empty())
3124     return;
3125   LLVM_DEBUG({
3126     if (CurrentLines == &Lines)
3127       printDebugInfo(*Line);
3128   });
3129 
3130   // If this line closes a block when in Whitesmiths mode, remember that
3131   // information so that the level can be decreased after the line is added.
3132   // This has to happen after the addition of the line since the line itself
3133   // needs to be indented.
3134   bool ClosesWhitesmithsBlock =
3135       Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3136       Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3137 
3138   CurrentLines->push_back(std::move(*Line));
3139   Line->Tokens.clear();
3140   Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3141   Line->FirstStartColumn = 0;
3142 
3143   if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3144     --Line->Level;
3145   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3146     CurrentLines->append(
3147         std::make_move_iterator(PreprocessorDirectives.begin()),
3148         std::make_move_iterator(PreprocessorDirectives.end()));
3149     PreprocessorDirectives.clear();
3150   }
3151   // Disconnect the current token from the last token on the previous line.
3152   FormatTok->Previous = nullptr;
3153 }
3154 
3155 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3156 
3157 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3158   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3159          FormatTok.NewlinesBefore > 0;
3160 }
3161 
3162 // Checks if \p FormatTok is a line comment that continues the line comment
3163 // section on \p Line.
3164 static bool
3165 continuesLineCommentSection(const FormatToken &FormatTok,
3166                             const UnwrappedLine &Line,
3167                             const llvm::Regex &CommentPragmasRegex) {
3168   if (Line.Tokens.empty())
3169     return false;
3170 
3171   StringRef IndentContent = FormatTok.TokenText;
3172   if (FormatTok.TokenText.startswith("//") ||
3173       FormatTok.TokenText.startswith("/*"))
3174     IndentContent = FormatTok.TokenText.substr(2);
3175   if (CommentPragmasRegex.match(IndentContent))
3176     return false;
3177 
3178   // If Line starts with a line comment, then FormatTok continues the comment
3179   // section if its original column is greater or equal to the original start
3180   // column of the line.
3181   //
3182   // Define the min column token of a line as follows: if a line ends in '{' or
3183   // contains a '{' followed by a line comment, then the min column token is
3184   // that '{'. Otherwise, the min column token of the line is the first token of
3185   // the line.
3186   //
3187   // If Line starts with a token other than a line comment, then FormatTok
3188   // continues the comment section if its original column is greater than the
3189   // original start column of the min column token of the line.
3190   //
3191   // For example, the second line comment continues the first in these cases:
3192   //
3193   // // first line
3194   // // second line
3195   //
3196   // and:
3197   //
3198   // // first line
3199   //  // second line
3200   //
3201   // and:
3202   //
3203   // int i; // first line
3204   //  // second line
3205   //
3206   // and:
3207   //
3208   // do { // first line
3209   //      // second line
3210   //   int i;
3211   // } while (true);
3212   //
3213   // and:
3214   //
3215   // enum {
3216   //   a, // first line
3217   //    // second line
3218   //   b
3219   // };
3220   //
3221   // The second line comment doesn't continue the first in these cases:
3222   //
3223   //   // first line
3224   //  // second line
3225   //
3226   // and:
3227   //
3228   // int i; // first line
3229   // // second line
3230   //
3231   // and:
3232   //
3233   // do { // first line
3234   //   // second line
3235   //   int i;
3236   // } while (true);
3237   //
3238   // and:
3239   //
3240   // enum {
3241   //   a, // first line
3242   //   // second line
3243   // };
3244   const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3245 
3246   // Scan for '{//'. If found, use the column of '{' as a min column for line
3247   // comment section continuation.
3248   const FormatToken *PreviousToken = nullptr;
3249   for (const UnwrappedLineNode &Node : Line.Tokens) {
3250     if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3251         isLineComment(*Node.Tok)) {
3252       MinColumnToken = PreviousToken;
3253       break;
3254     }
3255     PreviousToken = Node.Tok;
3256 
3257     // Grab the last newline preceding a token in this unwrapped line.
3258     if (Node.Tok->NewlinesBefore > 0) {
3259       MinColumnToken = Node.Tok;
3260     }
3261   }
3262   if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3263     MinColumnToken = PreviousToken;
3264   }
3265 
3266   return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3267                               MinColumnToken);
3268 }
3269 
3270 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3271   bool JustComments = Line->Tokens.empty();
3272   for (SmallVectorImpl<FormatToken *>::const_iterator
3273            I = CommentsBeforeNextToken.begin(),
3274            E = CommentsBeforeNextToken.end();
3275        I != E; ++I) {
3276     // Line comments that belong to the same line comment section are put on the
3277     // same line since later we might want to reflow content between them.
3278     // Additional fine-grained breaking of line comment sections is controlled
3279     // by the class BreakableLineCommentSection in case it is desirable to keep
3280     // several line comment sections in the same unwrapped line.
3281     //
3282     // FIXME: Consider putting separate line comment sections as children to the
3283     // unwrapped line instead.
3284     (*I)->ContinuesLineCommentSection =
3285         continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3286     if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3287       addUnwrappedLine();
3288     pushToken(*I);
3289   }
3290   if (NewlineBeforeNext && JustComments)
3291     addUnwrappedLine();
3292   CommentsBeforeNextToken.clear();
3293 }
3294 
3295 void UnwrappedLineParser::nextToken(int LevelDifference) {
3296   if (eof())
3297     return;
3298   flushComments(isOnNewLine(*FormatTok));
3299   pushToken(FormatTok);
3300   FormatToken *Previous = FormatTok;
3301   if (!Style.isJavaScript())
3302     readToken(LevelDifference);
3303   else
3304     readTokenWithJavaScriptASI();
3305   FormatTok->Previous = Previous;
3306 }
3307 
3308 void UnwrappedLineParser::distributeComments(
3309     const SmallVectorImpl<FormatToken *> &Comments,
3310     const FormatToken *NextTok) {
3311   // Whether or not a line comment token continues a line is controlled by
3312   // the method continuesLineCommentSection, with the following caveat:
3313   //
3314   // Define a trail of Comments to be a nonempty proper postfix of Comments such
3315   // that each comment line from the trail is aligned with the next token, if
3316   // the next token exists. If a trail exists, the beginning of the maximal
3317   // trail is marked as a start of a new comment section.
3318   //
3319   // For example in this code:
3320   //
3321   // int a; // line about a
3322   //   // line 1 about b
3323   //   // line 2 about b
3324   //   int b;
3325   //
3326   // the two lines about b form a maximal trail, so there are two sections, the
3327   // first one consisting of the single comment "// line about a" and the
3328   // second one consisting of the next two comments.
3329   if (Comments.empty())
3330     return;
3331   bool ShouldPushCommentsInCurrentLine = true;
3332   bool HasTrailAlignedWithNextToken = false;
3333   unsigned StartOfTrailAlignedWithNextToken = 0;
3334   if (NextTok) {
3335     // We are skipping the first element intentionally.
3336     for (unsigned i = Comments.size() - 1; i > 0; --i) {
3337       if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3338         HasTrailAlignedWithNextToken = true;
3339         StartOfTrailAlignedWithNextToken = i;
3340       }
3341     }
3342   }
3343   for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3344     FormatToken *FormatTok = Comments[i];
3345     if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3346       FormatTok->ContinuesLineCommentSection = false;
3347     } else {
3348       FormatTok->ContinuesLineCommentSection =
3349           continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3350     }
3351     if (!FormatTok->ContinuesLineCommentSection &&
3352         (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3353       ShouldPushCommentsInCurrentLine = false;
3354     }
3355     if (ShouldPushCommentsInCurrentLine) {
3356       pushToken(FormatTok);
3357     } else {
3358       CommentsBeforeNextToken.push_back(FormatTok);
3359     }
3360   }
3361 }
3362 
3363 void UnwrappedLineParser::readToken(int LevelDifference) {
3364   SmallVector<FormatToken *, 1> Comments;
3365   do {
3366     FormatTok = Tokens->getNextToken();
3367     assert(FormatTok);
3368     while (FormatTok->getType() == TT_ConflictStart ||
3369            FormatTok->getType() == TT_ConflictEnd ||
3370            FormatTok->getType() == TT_ConflictAlternative) {
3371       if (FormatTok->getType() == TT_ConflictStart) {
3372         conditionalCompilationStart(/*Unreachable=*/false);
3373       } else if (FormatTok->getType() == TT_ConflictAlternative) {
3374         conditionalCompilationAlternative();
3375       } else if (FormatTok->getType() == TT_ConflictEnd) {
3376         conditionalCompilationEnd();
3377       }
3378       FormatTok = Tokens->getNextToken();
3379       FormatTok->MustBreakBefore = true;
3380     }
3381 
3382     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3383            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3384       distributeComments(Comments, FormatTok);
3385       Comments.clear();
3386       // If there is an unfinished unwrapped line, we flush the preprocessor
3387       // directives only after that unwrapped line was finished later.
3388       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3389       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3390       assert((LevelDifference >= 0 ||
3391               static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3392              "LevelDifference makes Line->Level negative");
3393       Line->Level += LevelDifference;
3394       // Comments stored before the preprocessor directive need to be output
3395       // before the preprocessor directive, at the same level as the
3396       // preprocessor directive, as we consider them to apply to the directive.
3397       if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3398           PPBranchLevel > 0)
3399         Line->Level += PPBranchLevel;
3400       flushComments(isOnNewLine(*FormatTok));
3401       parsePPDirective();
3402     }
3403 
3404     if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3405         !Line->InPPDirective) {
3406       continue;
3407     }
3408 
3409     if (!FormatTok->Tok.is(tok::comment)) {
3410       distributeComments(Comments, FormatTok);
3411       Comments.clear();
3412       return;
3413     }
3414 
3415     Comments.push_back(FormatTok);
3416   } while (!eof());
3417 
3418   distributeComments(Comments, nullptr);
3419   Comments.clear();
3420 }
3421 
3422 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3423   Line->Tokens.push_back(UnwrappedLineNode(Tok));
3424   if (MustBreakBeforeNextToken) {
3425     Line->Tokens.back().Tok->MustBreakBefore = true;
3426     MustBreakBeforeNextToken = false;
3427   }
3428 }
3429 
3430 } // end namespace format
3431 } // end namespace clang
3432