1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
158       Parser->addUnwrappedLine();
159     } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
160       Parser->addUnwrappedLine();
161       ++LineLevel;
162     }
163   }
164   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
165 
166 private:
167   unsigned &LineLevel;
168   unsigned OldLineLevel;
169 };
170 
171 namespace {
172 
173 class IndexedTokenSource : public FormatTokenSource {
174 public:
175   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
176       : Tokens(Tokens), Position(-1) {}
177 
178   FormatToken *getNextToken() override {
179     ++Position;
180     return Tokens[Position];
181   }
182 
183   unsigned getPosition() override {
184     assert(Position >= 0);
185     return Position;
186   }
187 
188   FormatToken *setPosition(unsigned P) override {
189     Position = P;
190     return Tokens[Position];
191   }
192 
193   void reset() { Position = -1; }
194 
195 private:
196   ArrayRef<FormatToken *> Tokens;
197   int Position;
198 };
199 
200 } // end anonymous namespace
201 
202 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
203                                          const AdditionalKeywords &Keywords,
204                                          ArrayRef<FormatToken *> Tokens,
205                                          UnwrappedLineConsumer &Callback)
206     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
207       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
208       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
209 
210 void UnwrappedLineParser::reset() {
211   PPBranchLevel = -1;
212   Line.reset(new UnwrappedLine);
213   CommentsBeforeNextToken.clear();
214   FormatTok = nullptr;
215   MustBreakBeforeNextToken = false;
216   PreprocessorDirectives.clear();
217   CurrentLines = &Lines;
218   DeclarationScopeStack.clear();
219   PPStack.clear();
220 }
221 
222 void UnwrappedLineParser::parse() {
223   IndexedTokenSource TokenSource(AllTokens);
224   do {
225     DEBUG(llvm::dbgs() << "----\n");
226     reset();
227     Tokens = &TokenSource;
228     TokenSource.reset();
229 
230     readToken();
231     parseFile();
232     // Create line with eof token.
233     pushToken(FormatTok);
234     addUnwrappedLine();
235 
236     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
237                                                   E = Lines.end();
238          I != E; ++I) {
239       Callback.consumeUnwrappedLine(*I);
240     }
241     Callback.finishRun();
242     Lines.clear();
243     while (!PPLevelBranchIndex.empty() &&
244            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
245       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
246       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
247     }
248     if (!PPLevelBranchIndex.empty()) {
249       ++PPLevelBranchIndex.back();
250       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
251       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
252     }
253   } while (!PPLevelBranchIndex.empty());
254 }
255 
256 void UnwrappedLineParser::parseFile() {
257   // The top-level context in a file always has declarations, except for pre-
258   // processor directives and JavaScript files.
259   bool MustBeDeclaration =
260       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
261   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
262                                           MustBeDeclaration);
263   parseLevel(/*HasOpeningBrace=*/false);
264   // Make sure to format the remaining tokens.
265   flushComments(true);
266   addUnwrappedLine();
267 }
268 
269 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
270   bool SwitchLabelEncountered = false;
271   do {
272     tok::TokenKind kind = FormatTok->Tok.getKind();
273     if (FormatTok->Type == TT_MacroBlockBegin) {
274       kind = tok::l_brace;
275     } else if (FormatTok->Type == TT_MacroBlockEnd) {
276       kind = tok::r_brace;
277     }
278 
279     switch (kind) {
280     case tok::comment:
281       nextToken();
282       addUnwrappedLine();
283       break;
284     case tok::l_brace:
285       // FIXME: Add parameter whether this can happen - if this happens, we must
286       // be in a non-declaration context.
287       parseBlock(/*MustBeDeclaration=*/false);
288       addUnwrappedLine();
289       break;
290     case tok::r_brace:
291       if (HasOpeningBrace)
292         return;
293       nextToken();
294       addUnwrappedLine();
295       break;
296     case tok::kw_default:
297     case tok::kw_case:
298       if (!SwitchLabelEncountered &&
299           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
300         ++Line->Level;
301       SwitchLabelEncountered = true;
302       parseStructuralElement();
303       break;
304     default:
305       parseStructuralElement();
306       break;
307     }
308   } while (!eof());
309 }
310 
311 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
312   // We'll parse forward through the tokens until we hit
313   // a closing brace or eof - note that getNextToken() will
314   // parse macros, so this will magically work inside macro
315   // definitions, too.
316   unsigned StoredPosition = Tokens->getPosition();
317   FormatToken *Tok = FormatTok;
318   // Keep a stack of positions of lbrace tokens. We will
319   // update information about whether an lbrace starts a
320   // braced init list or a different block during the loop.
321   SmallVector<FormatToken *, 8> LBraceStack;
322   assert(Tok->Tok.is(tok::l_brace));
323   do {
324     // Get next none-comment token.
325     FormatToken *NextTok;
326     unsigned ReadTokens = 0;
327     do {
328       NextTok = Tokens->getNextToken();
329       ++ReadTokens;
330     } while (NextTok->is(tok::comment));
331 
332     switch (Tok->Tok.getKind()) {
333     case tok::l_brace:
334       Tok->BlockKind = BK_Unknown;
335       LBraceStack.push_back(Tok);
336       break;
337     case tok::r_brace:
338       if (!LBraceStack.empty()) {
339         if (LBraceStack.back()->BlockKind == BK_Unknown) {
340           bool ProbablyBracedList = false;
341           if (Style.Language == FormatStyle::LK_Proto) {
342             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
343           } else {
344             // Using OriginalColumn to distinguish between ObjC methods and
345             // binary operators is a bit hacky.
346             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
347                                     NextTok->OriginalColumn == 0;
348 
349             // If there is a comma, semicolon or right paren after the closing
350             // brace, we assume this is a braced initializer list.  Note that
351             // regardless how we mark inner braces here, we will overwrite the
352             // BlockKind later if we parse a braced list (where all blocks
353             // inside are by default braced lists), or when we explicitly detect
354             // blocks (for example while parsing lambdas).
355             //
356             // We exclude + and - as they can be ObjC visibility modifiers.
357             ProbablyBracedList =
358                 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
359                                  tok::r_paren, tok::r_square, tok::l_brace,
360                                  tok::l_paren, tok::ellipsis) ||
361                 (NextTok->is(tok::semi) &&
362                  (!ExpectClassBody || LBraceStack.size() != 1)) ||
363                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
364           }
365           if (ProbablyBracedList) {
366             Tok->BlockKind = BK_BracedInit;
367             LBraceStack.back()->BlockKind = BK_BracedInit;
368           } else {
369             Tok->BlockKind = BK_Block;
370             LBraceStack.back()->BlockKind = BK_Block;
371           }
372         }
373         LBraceStack.pop_back();
374       }
375       break;
376     case tok::at:
377     case tok::semi:
378     case tok::kw_if:
379     case tok::kw_while:
380     case tok::kw_for:
381     case tok::kw_switch:
382     case tok::kw_try:
383     case tok::kw___try:
384       if (!LBraceStack.empty())
385         LBraceStack.back()->BlockKind = BK_Block;
386       break;
387     default:
388       break;
389     }
390     Tok = NextTok;
391   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
392   // Assume other blocks for all unclosed opening braces.
393   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
394     if (LBraceStack[i]->BlockKind == BK_Unknown)
395       LBraceStack[i]->BlockKind = BK_Block;
396   }
397 
398   FormatTok = Tokens->setPosition(StoredPosition);
399 }
400 
401 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
402                                      bool MunchSemi) {
403   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
404          "'{' or macro block token expected");
405   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
406 
407   unsigned InitialLevel = Line->Level;
408   nextToken();
409 
410   if (MacroBlock && FormatTok->is(tok::l_paren))
411     parseParens();
412 
413   addUnwrappedLine();
414 
415   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
416                                           MustBeDeclaration);
417   if (AddLevel)
418     ++Line->Level;
419   parseLevel(/*HasOpeningBrace=*/true);
420 
421   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
422                  : !FormatTok->is(tok::r_brace)) {
423     Line->Level = InitialLevel;
424     return;
425   }
426 
427   nextToken(); // Munch the closing brace.
428 
429   if (MacroBlock && FormatTok->is(tok::l_paren))
430     parseParens();
431 
432   if (MunchSemi && FormatTok->Tok.is(tok::semi))
433     nextToken();
434   Line->Level = InitialLevel;
435 }
436 
437 static bool isGoogScope(const UnwrappedLine &Line) {
438   // FIXME: Closure-library specific stuff should not be hard-coded but be
439   // configurable.
440   if (Line.Tokens.size() < 4)
441     return false;
442   auto I = Line.Tokens.begin();
443   if (I->Tok->TokenText != "goog")
444     return false;
445   ++I;
446   if (I->Tok->isNot(tok::period))
447     return false;
448   ++I;
449   if (I->Tok->TokenText != "scope")
450     return false;
451   ++I;
452   return I->Tok->is(tok::l_paren);
453 }
454 
455 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
456                                    const FormatToken &InitialToken) {
457   switch (Style.BreakBeforeBraces) {
458   case FormatStyle::BS_Linux:
459     return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
460   case FormatStyle::BS_Allman:
461   case FormatStyle::BS_GNU:
462     return true;
463   default:
464     return false;
465   }
466 }
467 
468 void UnwrappedLineParser::parseChildBlock() {
469   FormatTok->BlockKind = BK_Block;
470   nextToken();
471   {
472     bool GoogScope =
473         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
474     ScopedLineState LineState(*this);
475     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
476                                             /*MustBeDeclaration=*/false);
477     Line->Level += GoogScope ? 0 : 1;
478     parseLevel(/*HasOpeningBrace=*/true);
479     flushComments(isOnNewLine(*FormatTok));
480     Line->Level -= GoogScope ? 0 : 1;
481   }
482   nextToken();
483 }
484 
485 void UnwrappedLineParser::parsePPDirective() {
486   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
487   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
488   nextToken();
489 
490   if (!FormatTok->Tok.getIdentifierInfo()) {
491     parsePPUnknown();
492     return;
493   }
494 
495   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
496   case tok::pp_define:
497     parsePPDefine();
498     return;
499   case tok::pp_if:
500     parsePPIf(/*IfDef=*/false);
501     break;
502   case tok::pp_ifdef:
503   case tok::pp_ifndef:
504     parsePPIf(/*IfDef=*/true);
505     break;
506   case tok::pp_else:
507     parsePPElse();
508     break;
509   case tok::pp_elif:
510     parsePPElIf();
511     break;
512   case tok::pp_endif:
513     parsePPEndIf();
514     break;
515   default:
516     parsePPUnknown();
517     break;
518   }
519 }
520 
521 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
522   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
523     PPStack.push_back(PP_Unreachable);
524   else
525     PPStack.push_back(PP_Conditional);
526 }
527 
528 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
529   ++PPBranchLevel;
530   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
531   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
532     PPLevelBranchIndex.push_back(0);
533     PPLevelBranchCount.push_back(0);
534   }
535   PPChainBranchIndex.push(0);
536   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
537   conditionalCompilationCondition(Unreachable || Skip);
538 }
539 
540 void UnwrappedLineParser::conditionalCompilationAlternative() {
541   if (!PPStack.empty())
542     PPStack.pop_back();
543   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
544   if (!PPChainBranchIndex.empty())
545     ++PPChainBranchIndex.top();
546   conditionalCompilationCondition(
547       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
548       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
549 }
550 
551 void UnwrappedLineParser::conditionalCompilationEnd() {
552   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
553   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
554     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
555       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
556     }
557   }
558   // Guard against #endif's without #if.
559   if (PPBranchLevel > 0)
560     --PPBranchLevel;
561   if (!PPChainBranchIndex.empty())
562     PPChainBranchIndex.pop();
563   if (!PPStack.empty())
564     PPStack.pop_back();
565 }
566 
567 void UnwrappedLineParser::parsePPIf(bool IfDef) {
568   nextToken();
569   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
570                          FormatTok->Tok.getLiteralData() != nullptr &&
571                          StringRef(FormatTok->Tok.getLiteralData(),
572                                    FormatTok->Tok.getLength()) == "0") ||
573                         FormatTok->Tok.is(tok::kw_false);
574   conditionalCompilationStart(!IfDef && IsLiteralFalse);
575   parsePPUnknown();
576 }
577 
578 void UnwrappedLineParser::parsePPElse() {
579   conditionalCompilationAlternative();
580   parsePPUnknown();
581 }
582 
583 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
584 
585 void UnwrappedLineParser::parsePPEndIf() {
586   conditionalCompilationEnd();
587   parsePPUnknown();
588 }
589 
590 void UnwrappedLineParser::parsePPDefine() {
591   nextToken();
592 
593   if (FormatTok->Tok.getKind() != tok::identifier) {
594     parsePPUnknown();
595     return;
596   }
597   nextToken();
598   if (FormatTok->Tok.getKind() == tok::l_paren &&
599       FormatTok->WhitespaceRange.getBegin() ==
600           FormatTok->WhitespaceRange.getEnd()) {
601     parseParens();
602   }
603   addUnwrappedLine();
604   Line->Level = 1;
605 
606   // Errors during a preprocessor directive can only affect the layout of the
607   // preprocessor directive, and thus we ignore them. An alternative approach
608   // would be to use the same approach we use on the file level (no
609   // re-indentation if there was a structural error) within the macro
610   // definition.
611   parseFile();
612 }
613 
614 void UnwrappedLineParser::parsePPUnknown() {
615   do {
616     nextToken();
617   } while (!eof());
618   addUnwrappedLine();
619 }
620 
621 // Here we blacklist certain tokens that are not usually the first token in an
622 // unwrapped line. This is used in attempt to distinguish macro calls without
623 // trailing semicolons from other constructs split to several lines.
624 static bool tokenCanStartNewLine(const clang::Token &Tok) {
625   // Semicolon can be a null-statement, l_square can be a start of a macro or
626   // a C++11 attribute, but this doesn't seem to be common.
627   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
628          Tok.isNot(tok::l_square) &&
629          // Tokens that can only be used as binary operators and a part of
630          // overloaded operator names.
631          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
632          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
633          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
634          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
635          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
636          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
637          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
638          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
639          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
640          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
641          Tok.isNot(tok::lesslessequal) &&
642          // Colon is used in labels, base class lists, initializer lists,
643          // range-based for loops, ternary operator, but should never be the
644          // first token in an unwrapped line.
645          Tok.isNot(tok::colon) &&
646          // 'noexcept' is a trailing annotation.
647          Tok.isNot(tok::kw_noexcept);
648 }
649 
650 void UnwrappedLineParser::parseStructuralElement() {
651   assert(!FormatTok->Tok.is(tok::l_brace));
652   switch (FormatTok->Tok.getKind()) {
653   case tok::at:
654     nextToken();
655     if (FormatTok->Tok.is(tok::l_brace)) {
656       parseBracedList();
657       break;
658     }
659     switch (FormatTok->Tok.getObjCKeywordID()) {
660     case tok::objc_public:
661     case tok::objc_protected:
662     case tok::objc_package:
663     case tok::objc_private:
664       return parseAccessSpecifier();
665     case tok::objc_interface:
666     case tok::objc_implementation:
667       return parseObjCInterfaceOrImplementation();
668     case tok::objc_protocol:
669       return parseObjCProtocol();
670     case tok::objc_end:
671       return; // Handled by the caller.
672     case tok::objc_optional:
673     case tok::objc_required:
674       nextToken();
675       addUnwrappedLine();
676       return;
677     case tok::objc_autoreleasepool:
678       nextToken();
679       if (FormatTok->Tok.is(tok::l_brace)) {
680         if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
681             Style.BreakBeforeBraces == FormatStyle::BS_GNU)
682           addUnwrappedLine();
683         parseBlock(/*MustBeDeclaration=*/false);
684       }
685       addUnwrappedLine();
686       return;
687     case tok::objc_try:
688       // This branch isn't strictly necessary (the kw_try case below would
689       // do this too after the tok::at is parsed above).  But be explicit.
690       parseTryCatch();
691       return;
692     default:
693       break;
694     }
695     break;
696   case tok::kw_asm:
697     nextToken();
698     if (FormatTok->is(tok::l_brace)) {
699       FormatTok->Type = TT_InlineASMBrace;
700       nextToken();
701       while (FormatTok && FormatTok->isNot(tok::eof)) {
702         if (FormatTok->is(tok::r_brace)) {
703           FormatTok->Type = TT_InlineASMBrace;
704           nextToken();
705           addUnwrappedLine();
706           break;
707         }
708         FormatTok->Finalized = true;
709         nextToken();
710       }
711     }
712     break;
713   case tok::kw_namespace:
714     parseNamespace();
715     return;
716   case tok::kw_inline:
717     nextToken();
718     if (FormatTok->Tok.is(tok::kw_namespace)) {
719       parseNamespace();
720       return;
721     }
722     break;
723   case tok::kw_public:
724   case tok::kw_protected:
725   case tok::kw_private:
726     if (Style.Language == FormatStyle::LK_Java ||
727         Style.Language == FormatStyle::LK_JavaScript)
728       nextToken();
729     else
730       parseAccessSpecifier();
731     return;
732   case tok::kw_if:
733     parseIfThenElse();
734     return;
735   case tok::kw_for:
736   case tok::kw_while:
737     parseForOrWhileLoop();
738     return;
739   case tok::kw_do:
740     parseDoWhile();
741     return;
742   case tok::kw_switch:
743     parseSwitch();
744     return;
745   case tok::kw_default:
746     nextToken();
747     parseLabel();
748     return;
749   case tok::kw_case:
750     parseCaseLabel();
751     return;
752   case tok::kw_try:
753   case tok::kw___try:
754     parseTryCatch();
755     return;
756   case tok::kw_extern:
757     nextToken();
758     if (FormatTok->Tok.is(tok::string_literal)) {
759       nextToken();
760       if (FormatTok->Tok.is(tok::l_brace)) {
761         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
762         addUnwrappedLine();
763         return;
764       }
765     }
766     break;
767   case tok::kw_export:
768     if (Style.Language == FormatStyle::LK_JavaScript) {
769       parseJavaScriptEs6ImportExport();
770       return;
771     }
772     break;
773   case tok::identifier:
774     if (FormatTok->is(TT_ForEachMacro)) {
775       parseForOrWhileLoop();
776       return;
777     }
778     if (FormatTok->is(TT_MacroBlockBegin)) {
779       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
780                  /*MunchSemi=*/false);
781       return;
782     }
783     if (Style.Language == FormatStyle::LK_JavaScript &&
784         FormatTok->is(Keywords.kw_import)) {
785       parseJavaScriptEs6ImportExport();
786       return;
787     }
788     if (FormatTok->is(Keywords.kw_signals)) {
789       nextToken();
790       if (FormatTok->is(tok::colon)) {
791         nextToken();
792         addUnwrappedLine();
793       }
794       return;
795     }
796     // In all other cases, parse the declaration.
797     break;
798   default:
799     break;
800   }
801   do {
802     switch (FormatTok->Tok.getKind()) {
803     case tok::at:
804       nextToken();
805       if (FormatTok->Tok.is(tok::l_brace))
806         parseBracedList();
807       break;
808     case tok::kw_enum:
809       // parseEnum falls through and does not yet add an unwrapped line as an
810       // enum definition can start a structural element.
811       parseEnum();
812       // This does not apply for Java and JavaScript.
813       if (Style.Language == FormatStyle::LK_Java ||
814           Style.Language == FormatStyle::LK_JavaScript) {
815         addUnwrappedLine();
816         return;
817       }
818       break;
819     case tok::kw_typedef:
820       nextToken();
821       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
822                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
823         parseEnum();
824       break;
825     case tok::kw_struct:
826     case tok::kw_union:
827     case tok::kw_class:
828       // parseRecord falls through and does not yet add an unwrapped line as a
829       // record declaration or definition can start a structural element.
830       parseRecord();
831       // This does not apply for Java and JavaScript.
832       if (Style.Language == FormatStyle::LK_Java ||
833           Style.Language == FormatStyle::LK_JavaScript) {
834         addUnwrappedLine();
835         return;
836       }
837       break;
838     case tok::period:
839       nextToken();
840       // In Java, classes have an implicit static member "class".
841       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
842           FormatTok->is(tok::kw_class))
843         nextToken();
844       break;
845     case tok::semi:
846       nextToken();
847       addUnwrappedLine();
848       return;
849     case tok::r_brace:
850       addUnwrappedLine();
851       return;
852     case tok::l_paren:
853       parseParens();
854       break;
855     case tok::caret:
856       nextToken();
857       if (FormatTok->Tok.isAnyIdentifier() ||
858           FormatTok->isSimpleTypeSpecifier())
859         nextToken();
860       if (FormatTok->is(tok::l_paren))
861         parseParens();
862       if (FormatTok->is(tok::l_brace))
863         parseChildBlock();
864       break;
865     case tok::l_brace:
866       if (!tryToParseBracedList()) {
867         // A block outside of parentheses must be the last part of a
868         // structural element.
869         // FIXME: Figure out cases where this is not true, and add projections
870         // for them (the one we know is missing are lambdas).
871         if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
872           addUnwrappedLine();
873         FormatTok->Type = TT_FunctionLBrace;
874         parseBlock(/*MustBeDeclaration=*/false);
875         addUnwrappedLine();
876         return;
877       }
878       // Otherwise this was a braced init list, and the structural
879       // element continues.
880       break;
881     case tok::kw_try:
882       // We arrive here when parsing function-try blocks.
883       parseTryCatch();
884       return;
885     case tok::identifier: {
886       if (FormatTok->is(TT_MacroBlockEnd)) {
887         addUnwrappedLine();
888         return;
889       }
890 
891       // Parse function literal unless 'function' is the first token in a line
892       // in which case this should be treated as a free-standing function.
893       if (Style.Language == FormatStyle::LK_JavaScript &&
894           FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
895         tryToParseJSFunction();
896         break;
897       }
898       if ((Style.Language == FormatStyle::LK_JavaScript ||
899            Style.Language == FormatStyle::LK_Java) &&
900           FormatTok->is(Keywords.kw_interface)) {
901         parseRecord();
902         addUnwrappedLine();
903         return;
904       }
905 
906       StringRef Text = FormatTok->TokenText;
907       nextToken();
908       if (Line->Tokens.size() == 1 &&
909           // JS doesn't have macros, and within classes colons indicate fields,
910           // not labels.
911           Style.Language != FormatStyle::LK_JavaScript) {
912         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
913           parseLabel();
914           return;
915         }
916         // Recognize function-like macro usages without trailing semicolon as
917         // well as free-standing macros like Q_OBJECT.
918         bool FunctionLike = FormatTok->is(tok::l_paren);
919         if (FunctionLike)
920           parseParens();
921 
922         bool FollowedByNewline =
923             CommentsBeforeNextToken.empty()
924                 ? FormatTok->NewlinesBefore > 0
925                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
926 
927         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
928             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
929           addUnwrappedLine();
930           return;
931         }
932       }
933       break;
934     }
935     case tok::equal:
936       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
937       // TT_JsFatArrow. The always start an expression or a child block if
938       // followed by a curly.
939       if (FormatTok->is(TT_JsFatArrow)) {
940         nextToken();
941         if (FormatTok->is(tok::l_brace))
942           parseChildBlock();
943         break;
944       }
945 
946       nextToken();
947       if (FormatTok->Tok.is(tok::l_brace)) {
948         parseBracedList();
949       }
950       break;
951     case tok::l_square:
952       parseSquare();
953       break;
954     case tok::kw_new:
955       parseNew();
956       break;
957     default:
958       nextToken();
959       break;
960     }
961   } while (!eof());
962 }
963 
964 bool UnwrappedLineParser::tryToParseLambda() {
965   if (Style.Language != FormatStyle::LK_Cpp) {
966     nextToken();
967     return false;
968   }
969   // FIXME: This is a dirty way to access the previous token. Find a better
970   // solution.
971   if (!Line->Tokens.empty() &&
972       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
973                                         tok::kw_new, tok::kw_delete) ||
974        Line->Tokens.back().Tok->closesScope() ||
975        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
976     nextToken();
977     return false;
978   }
979   assert(FormatTok->is(tok::l_square));
980   FormatToken &LSquare = *FormatTok;
981   if (!tryToParseLambdaIntroducer())
982     return false;
983 
984   while (FormatTok->isNot(tok::l_brace)) {
985     if (FormatTok->isSimpleTypeSpecifier()) {
986       nextToken();
987       continue;
988     }
989     switch (FormatTok->Tok.getKind()) {
990     case tok::l_brace:
991       break;
992     case tok::l_paren:
993       parseParens();
994       break;
995     case tok::amp:
996     case tok::star:
997     case tok::kw_const:
998     case tok::comma:
999     case tok::less:
1000     case tok::greater:
1001     case tok::identifier:
1002     case tok::coloncolon:
1003     case tok::kw_mutable:
1004       nextToken();
1005       break;
1006     case tok::arrow:
1007       FormatTok->Type = TT_LambdaArrow;
1008       nextToken();
1009       break;
1010     default:
1011       return true;
1012     }
1013   }
1014   LSquare.Type = TT_LambdaLSquare;
1015   parseChildBlock();
1016   return true;
1017 }
1018 
1019 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1020   nextToken();
1021   if (FormatTok->is(tok::equal)) {
1022     nextToken();
1023     if (FormatTok->is(tok::r_square)) {
1024       nextToken();
1025       return true;
1026     }
1027     if (FormatTok->isNot(tok::comma))
1028       return false;
1029     nextToken();
1030   } else if (FormatTok->is(tok::amp)) {
1031     nextToken();
1032     if (FormatTok->is(tok::r_square)) {
1033       nextToken();
1034       return true;
1035     }
1036     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1037       return false;
1038     }
1039     if (FormatTok->is(tok::comma))
1040       nextToken();
1041   } else if (FormatTok->is(tok::r_square)) {
1042     nextToken();
1043     return true;
1044   }
1045   do {
1046     if (FormatTok->is(tok::amp))
1047       nextToken();
1048     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1049       return false;
1050     nextToken();
1051     if (FormatTok->is(tok::ellipsis))
1052       nextToken();
1053     if (FormatTok->is(tok::comma)) {
1054       nextToken();
1055     } else if (FormatTok->is(tok::r_square)) {
1056       nextToken();
1057       return true;
1058     } else {
1059       return false;
1060     }
1061   } while (!eof());
1062   return false;
1063 }
1064 
1065 void UnwrappedLineParser::tryToParseJSFunction() {
1066   nextToken();
1067 
1068   // Consume function name.
1069   if (FormatTok->is(tok::identifier))
1070     nextToken();
1071 
1072   if (FormatTok->isNot(tok::l_paren))
1073     return;
1074 
1075   // Parse formal parameter list.
1076   parseParens();
1077 
1078   if (FormatTok->is(tok::colon)) {
1079     // Parse a type definition.
1080     nextToken();
1081 
1082     // Eat the type declaration. For braced inline object types, balance braces,
1083     // otherwise just parse until finding an l_brace for the function body.
1084     if (FormatTok->is(tok::l_brace))
1085       tryToParseBracedList();
1086     else
1087       while (FormatTok->isNot(tok::l_brace) && !eof())
1088         nextToken();
1089   }
1090 
1091   parseChildBlock();
1092 }
1093 
1094 bool UnwrappedLineParser::tryToParseBracedList() {
1095   if (FormatTok->BlockKind == BK_Unknown)
1096     calculateBraceTypes();
1097   assert(FormatTok->BlockKind != BK_Unknown);
1098   if (FormatTok->BlockKind == BK_Block)
1099     return false;
1100   parseBracedList();
1101   return true;
1102 }
1103 
1104 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1105   bool HasError = false;
1106   nextToken();
1107 
1108   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1109   // replace this by using parseAssigmentExpression() inside.
1110   do {
1111     if (Style.Language == FormatStyle::LK_JavaScript) {
1112       if (FormatTok->is(Keywords.kw_function)) {
1113         tryToParseJSFunction();
1114         continue;
1115       }
1116       if (FormatTok->is(TT_JsFatArrow)) {
1117         nextToken();
1118         // Fat arrows can be followed by simple expressions or by child blocks
1119         // in curly braces.
1120         if (FormatTok->is(tok::l_brace)) {
1121           parseChildBlock();
1122           continue;
1123         }
1124       }
1125     }
1126     switch (FormatTok->Tok.getKind()) {
1127     case tok::caret:
1128       nextToken();
1129       if (FormatTok->is(tok::l_brace)) {
1130         parseChildBlock();
1131       }
1132       break;
1133     case tok::l_square:
1134       tryToParseLambda();
1135       break;
1136     case tok::l_brace:
1137       // Assume there are no blocks inside a braced init list apart
1138       // from the ones we explicitly parse out (like lambdas).
1139       FormatTok->BlockKind = BK_BracedInit;
1140       parseBracedList();
1141       break;
1142     case tok::l_paren:
1143       parseParens();
1144       // JavaScript can just have free standing methods and getters/setters in
1145       // object literals. Detect them by a "{" following ")".
1146       if (Style.Language == FormatStyle::LK_JavaScript) {
1147         if (FormatTok->is(tok::l_brace))
1148           parseChildBlock();
1149         break;
1150       }
1151       break;
1152     case tok::r_brace:
1153       nextToken();
1154       return !HasError;
1155     case tok::semi:
1156       HasError = true;
1157       if (!ContinueOnSemicolons)
1158         return !HasError;
1159       nextToken();
1160       break;
1161     case tok::comma:
1162       nextToken();
1163       break;
1164     default:
1165       nextToken();
1166       break;
1167     }
1168   } while (!eof());
1169   return false;
1170 }
1171 
1172 void UnwrappedLineParser::parseParens() {
1173   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1174   nextToken();
1175   do {
1176     switch (FormatTok->Tok.getKind()) {
1177     case tok::l_paren:
1178       parseParens();
1179       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1180         parseChildBlock();
1181       break;
1182     case tok::r_paren:
1183       nextToken();
1184       return;
1185     case tok::r_brace:
1186       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1187       return;
1188     case tok::l_square:
1189       tryToParseLambda();
1190       break;
1191     case tok::l_brace:
1192       if (!tryToParseBracedList())
1193         parseChildBlock();
1194       break;
1195     case tok::at:
1196       nextToken();
1197       if (FormatTok->Tok.is(tok::l_brace))
1198         parseBracedList();
1199       break;
1200     case tok::identifier:
1201       if (Style.Language == FormatStyle::LK_JavaScript &&
1202           FormatTok->is(Keywords.kw_function))
1203         tryToParseJSFunction();
1204       else
1205         nextToken();
1206       break;
1207     default:
1208       nextToken();
1209       break;
1210     }
1211   } while (!eof());
1212 }
1213 
1214 void UnwrappedLineParser::parseSquare() {
1215   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1216   if (tryToParseLambda())
1217     return;
1218   do {
1219     switch (FormatTok->Tok.getKind()) {
1220     case tok::l_paren:
1221       parseParens();
1222       break;
1223     case tok::r_square:
1224       nextToken();
1225       return;
1226     case tok::r_brace:
1227       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1228       return;
1229     case tok::l_square:
1230       parseSquare();
1231       break;
1232     case tok::l_brace: {
1233       if (!tryToParseBracedList())
1234         parseChildBlock();
1235       break;
1236     }
1237     case tok::at:
1238       nextToken();
1239       if (FormatTok->Tok.is(tok::l_brace))
1240         parseBracedList();
1241       break;
1242     default:
1243       nextToken();
1244       break;
1245     }
1246   } while (!eof());
1247 }
1248 
1249 void UnwrappedLineParser::parseIfThenElse() {
1250   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1251   nextToken();
1252   if (FormatTok->Tok.is(tok::l_paren))
1253     parseParens();
1254   bool NeedsUnwrappedLine = false;
1255   if (FormatTok->Tok.is(tok::l_brace)) {
1256     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1257     parseBlock(/*MustBeDeclaration=*/false);
1258     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1259         Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1260       addUnwrappedLine();
1261     } else {
1262       NeedsUnwrappedLine = true;
1263     }
1264   } else {
1265     addUnwrappedLine();
1266     ++Line->Level;
1267     parseStructuralElement();
1268     --Line->Level;
1269   }
1270   if (FormatTok->Tok.is(tok::kw_else)) {
1271     if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
1272       addUnwrappedLine();
1273     nextToken();
1274     if (FormatTok->Tok.is(tok::l_brace)) {
1275       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1276       parseBlock(/*MustBeDeclaration=*/false);
1277       addUnwrappedLine();
1278     } else if (FormatTok->Tok.is(tok::kw_if)) {
1279       parseIfThenElse();
1280     } else {
1281       addUnwrappedLine();
1282       ++Line->Level;
1283       parseStructuralElement();
1284       --Line->Level;
1285     }
1286   } else if (NeedsUnwrappedLine) {
1287     addUnwrappedLine();
1288   }
1289 }
1290 
1291 void UnwrappedLineParser::parseTryCatch() {
1292   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1293   nextToken();
1294   bool NeedsUnwrappedLine = false;
1295   if (FormatTok->is(tok::colon)) {
1296     // We are in a function try block, what comes is an initializer list.
1297     nextToken();
1298     while (FormatTok->is(tok::identifier)) {
1299       nextToken();
1300       if (FormatTok->is(tok::l_paren))
1301         parseParens();
1302       if (FormatTok->is(tok::comma))
1303         nextToken();
1304     }
1305   }
1306   // Parse try with resource.
1307   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1308     parseParens();
1309   }
1310   if (FormatTok->is(tok::l_brace)) {
1311     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1312     parseBlock(/*MustBeDeclaration=*/false);
1313     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1314         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1315         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1316       addUnwrappedLine();
1317     } else {
1318       NeedsUnwrappedLine = true;
1319     }
1320   } else if (!FormatTok->is(tok::kw_catch)) {
1321     // The C++ standard requires a compound-statement after a try.
1322     // If there's none, we try to assume there's a structuralElement
1323     // and try to continue.
1324     addUnwrappedLine();
1325     ++Line->Level;
1326     parseStructuralElement();
1327     --Line->Level;
1328   }
1329   while (1) {
1330     if (FormatTok->is(tok::at))
1331       nextToken();
1332     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1333                              tok::kw___finally) ||
1334           ((Style.Language == FormatStyle::LK_Java ||
1335             Style.Language == FormatStyle::LK_JavaScript) &&
1336            FormatTok->is(Keywords.kw_finally)) ||
1337           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1338            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1339       break;
1340     nextToken();
1341     while (FormatTok->isNot(tok::l_brace)) {
1342       if (FormatTok->is(tok::l_paren)) {
1343         parseParens();
1344         continue;
1345       }
1346       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1347         return;
1348       nextToken();
1349     }
1350     NeedsUnwrappedLine = false;
1351     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1352     parseBlock(/*MustBeDeclaration=*/false);
1353     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1354         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1355         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1356       addUnwrappedLine();
1357     } else {
1358       NeedsUnwrappedLine = true;
1359     }
1360   }
1361   if (NeedsUnwrappedLine) {
1362     addUnwrappedLine();
1363   }
1364 }
1365 
1366 void UnwrappedLineParser::parseNamespace() {
1367   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1368 
1369   const FormatToken &InitialToken = *FormatTok;
1370   nextToken();
1371   if (FormatTok->Tok.is(tok::identifier))
1372     nextToken();
1373   if (FormatTok->Tok.is(tok::l_brace)) {
1374     if (ShouldBreakBeforeBrace(Style, InitialToken))
1375       addUnwrappedLine();
1376 
1377     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1378                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1379                      DeclarationScopeStack.size() > 1);
1380     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1381     // Munch the semicolon after a namespace. This is more common than one would
1382     // think. Puttin the semicolon into its own line is very ugly.
1383     if (FormatTok->Tok.is(tok::semi))
1384       nextToken();
1385     addUnwrappedLine();
1386   }
1387   // FIXME: Add error handling.
1388 }
1389 
1390 void UnwrappedLineParser::parseNew() {
1391   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1392   nextToken();
1393   if (Style.Language != FormatStyle::LK_Java)
1394     return;
1395 
1396   // In Java, we can parse everything up to the parens, which aren't optional.
1397   do {
1398     // There should not be a ;, { or } before the new's open paren.
1399     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1400       return;
1401 
1402     // Consume the parens.
1403     if (FormatTok->is(tok::l_paren)) {
1404       parseParens();
1405 
1406       // If there is a class body of an anonymous class, consume that as child.
1407       if (FormatTok->is(tok::l_brace))
1408         parseChildBlock();
1409       return;
1410     }
1411     nextToken();
1412   } while (!eof());
1413 }
1414 
1415 void UnwrappedLineParser::parseForOrWhileLoop() {
1416   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1417          "'for', 'while' or foreach macro expected");
1418   nextToken();
1419   if (FormatTok->Tok.is(tok::l_paren))
1420     parseParens();
1421   if (FormatTok->Tok.is(tok::l_brace)) {
1422     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1423     parseBlock(/*MustBeDeclaration=*/false);
1424     addUnwrappedLine();
1425   } else {
1426     addUnwrappedLine();
1427     ++Line->Level;
1428     parseStructuralElement();
1429     --Line->Level;
1430   }
1431 }
1432 
1433 void UnwrappedLineParser::parseDoWhile() {
1434   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1435   nextToken();
1436   if (FormatTok->Tok.is(tok::l_brace)) {
1437     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1438     parseBlock(/*MustBeDeclaration=*/false);
1439     if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1440       addUnwrappedLine();
1441   } else {
1442     addUnwrappedLine();
1443     ++Line->Level;
1444     parseStructuralElement();
1445     --Line->Level;
1446   }
1447 
1448   // FIXME: Add error handling.
1449   if (!FormatTok->Tok.is(tok::kw_while)) {
1450     addUnwrappedLine();
1451     return;
1452   }
1453 
1454   nextToken();
1455   parseStructuralElement();
1456 }
1457 
1458 void UnwrappedLineParser::parseLabel() {
1459   nextToken();
1460   unsigned OldLineLevel = Line->Level;
1461   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1462     --Line->Level;
1463   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1464     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1465     parseBlock(/*MustBeDeclaration=*/false);
1466     if (FormatTok->Tok.is(tok::kw_break)) {
1467       // "break;" after "}" on its own line only for BS_Allman and BS_GNU
1468       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1469           Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1470         addUnwrappedLine();
1471       }
1472       parseStructuralElement();
1473     }
1474     addUnwrappedLine();
1475   } else {
1476     if (FormatTok->is(tok::semi))
1477       nextToken();
1478     addUnwrappedLine();
1479   }
1480   Line->Level = OldLineLevel;
1481 }
1482 
1483 void UnwrappedLineParser::parseCaseLabel() {
1484   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1485   // FIXME: fix handling of complex expressions here.
1486   do {
1487     nextToken();
1488   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1489   parseLabel();
1490 }
1491 
1492 void UnwrappedLineParser::parseSwitch() {
1493   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1494   nextToken();
1495   if (FormatTok->Tok.is(tok::l_paren))
1496     parseParens();
1497   if (FormatTok->Tok.is(tok::l_brace)) {
1498     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1499     parseBlock(/*MustBeDeclaration=*/false);
1500     addUnwrappedLine();
1501   } else {
1502     addUnwrappedLine();
1503     ++Line->Level;
1504     parseStructuralElement();
1505     --Line->Level;
1506   }
1507 }
1508 
1509 void UnwrappedLineParser::parseAccessSpecifier() {
1510   nextToken();
1511   // Understand Qt's slots.
1512   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1513     nextToken();
1514   // Otherwise, we don't know what it is, and we'd better keep the next token.
1515   if (FormatTok->Tok.is(tok::colon))
1516     nextToken();
1517   addUnwrappedLine();
1518 }
1519 
1520 void UnwrappedLineParser::parseEnum() {
1521   // Won't be 'enum' for NS_ENUMs.
1522   if (FormatTok->Tok.is(tok::kw_enum))
1523     nextToken();
1524 
1525   // Eat up enum class ...
1526   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1527     nextToken();
1528 
1529   while (FormatTok->Tok.getIdentifierInfo() ||
1530          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1531                             tok::greater, tok::comma, tok::question)) {
1532     nextToken();
1533     // We can have macros or attributes in between 'enum' and the enum name.
1534     if (FormatTok->is(tok::l_paren))
1535       parseParens();
1536     if (FormatTok->is(tok::identifier)) {
1537       nextToken();
1538       // If there are two identifiers in a row, this is likely an elaborate
1539       // return type. In Java, this can be "implements", etc.
1540       if (Style.Language == FormatStyle::LK_Cpp &&
1541           FormatTok->is(tok::identifier))
1542         return;
1543     }
1544   }
1545 
1546   // Just a declaration or something is wrong.
1547   if (FormatTok->isNot(tok::l_brace))
1548     return;
1549   FormatTok->BlockKind = BK_Block;
1550 
1551   if (Style.Language == FormatStyle::LK_Java) {
1552     // Java enums are different.
1553     parseJavaEnumBody();
1554     return;
1555   }
1556 
1557   // Parse enum body.
1558   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1559   if (HasError) {
1560     if (FormatTok->is(tok::semi))
1561       nextToken();
1562     addUnwrappedLine();
1563   }
1564 
1565   // There is no addUnwrappedLine() here so that we fall through to parsing a
1566   // structural element afterwards. Thus, in "enum A {} n, m;",
1567   // "} n, m;" will end up in one unwrapped line.
1568 }
1569 
1570 void UnwrappedLineParser::parseJavaEnumBody() {
1571   // Determine whether the enum is simple, i.e. does not have a semicolon or
1572   // constants with class bodies. Simple enums can be formatted like braced
1573   // lists, contracted to a single line, etc.
1574   unsigned StoredPosition = Tokens->getPosition();
1575   bool IsSimple = true;
1576   FormatToken *Tok = Tokens->getNextToken();
1577   while (Tok) {
1578     if (Tok->is(tok::r_brace))
1579       break;
1580     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1581       IsSimple = false;
1582       break;
1583     }
1584     // FIXME: This will also mark enums with braces in the arguments to enum
1585     // constants as "not simple". This is probably fine in practice, though.
1586     Tok = Tokens->getNextToken();
1587   }
1588   FormatTok = Tokens->setPosition(StoredPosition);
1589 
1590   if (IsSimple) {
1591     parseBracedList();
1592     addUnwrappedLine();
1593     return;
1594   }
1595 
1596   // Parse the body of a more complex enum.
1597   // First add a line for everything up to the "{".
1598   nextToken();
1599   addUnwrappedLine();
1600   ++Line->Level;
1601 
1602   // Parse the enum constants.
1603   while (FormatTok) {
1604     if (FormatTok->is(tok::l_brace)) {
1605       // Parse the constant's class body.
1606       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1607                  /*MunchSemi=*/false);
1608     } else if (FormatTok->is(tok::l_paren)) {
1609       parseParens();
1610     } else if (FormatTok->is(tok::comma)) {
1611       nextToken();
1612       addUnwrappedLine();
1613     } else if (FormatTok->is(tok::semi)) {
1614       nextToken();
1615       addUnwrappedLine();
1616       break;
1617     } else if (FormatTok->is(tok::r_brace)) {
1618       addUnwrappedLine();
1619       break;
1620     } else {
1621       nextToken();
1622     }
1623   }
1624 
1625   // Parse the class body after the enum's ";" if any.
1626   parseLevel(/*HasOpeningBrace=*/true);
1627   nextToken();
1628   --Line->Level;
1629   addUnwrappedLine();
1630 }
1631 
1632 void UnwrappedLineParser::parseRecord() {
1633   const FormatToken &InitialToken = *FormatTok;
1634   nextToken();
1635 
1636   // The actual identifier can be a nested name specifier, and in macros
1637   // it is often token-pasted.
1638   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1639                             tok::kw___attribute, tok::kw___declspec,
1640                             tok::kw_alignas) ||
1641          ((Style.Language == FormatStyle::LK_Java ||
1642            Style.Language == FormatStyle::LK_JavaScript) &&
1643           FormatTok->isOneOf(tok::period, tok::comma))) {
1644     bool IsNonMacroIdentifier =
1645         FormatTok->is(tok::identifier) &&
1646         FormatTok->TokenText != FormatTok->TokenText.upper();
1647     nextToken();
1648     // We can have macros or attributes in between 'class' and the class name.
1649     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1650       parseParens();
1651   }
1652 
1653   // Note that parsing away template declarations here leads to incorrectly
1654   // accepting function declarations as record declarations.
1655   // In general, we cannot solve this problem. Consider:
1656   // class A<int> B() {}
1657   // which can be a function definition or a class definition when B() is a
1658   // macro. If we find enough real-world cases where this is a problem, we
1659   // can parse for the 'template' keyword in the beginning of the statement,
1660   // and thus rule out the record production in case there is no template
1661   // (this would still leave us with an ambiguity between template function
1662   // and class declarations).
1663   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1664     while (!eof()) {
1665       if (FormatTok->is(tok::l_brace)) {
1666         calculateBraceTypes(/*ExpectClassBody=*/true);
1667         if (!tryToParseBracedList())
1668           break;
1669       }
1670       if (FormatTok->Tok.is(tok::semi))
1671         return;
1672       nextToken();
1673     }
1674   }
1675   if (FormatTok->Tok.is(tok::l_brace)) {
1676     if (ShouldBreakBeforeBrace(Style, InitialToken))
1677       addUnwrappedLine();
1678 
1679     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1680                /*MunchSemi=*/false);
1681   }
1682   // There is no addUnwrappedLine() here so that we fall through to parsing a
1683   // structural element afterwards. Thus, in "class A {} n, m;",
1684   // "} n, m;" will end up in one unwrapped line.
1685 }
1686 
1687 void UnwrappedLineParser::parseObjCProtocolList() {
1688   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1689   do
1690     nextToken();
1691   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1692   nextToken(); // Skip '>'.
1693 }
1694 
1695 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1696   do {
1697     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1698       nextToken();
1699       addUnwrappedLine();
1700       break;
1701     }
1702     if (FormatTok->is(tok::l_brace)) {
1703       parseBlock(/*MustBeDeclaration=*/false);
1704       // In ObjC interfaces, nothing should be following the "}".
1705       addUnwrappedLine();
1706     } else if (FormatTok->is(tok::r_brace)) {
1707       // Ignore stray "}". parseStructuralElement doesn't consume them.
1708       nextToken();
1709       addUnwrappedLine();
1710     } else {
1711       parseStructuralElement();
1712     }
1713   } while (!eof());
1714 }
1715 
1716 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1717   nextToken();
1718   nextToken(); // interface name
1719 
1720   // @interface can be followed by either a base class, or a category.
1721   if (FormatTok->Tok.is(tok::colon)) {
1722     nextToken();
1723     nextToken(); // base class name
1724   } else if (FormatTok->Tok.is(tok::l_paren))
1725     // Skip category, if present.
1726     parseParens();
1727 
1728   if (FormatTok->Tok.is(tok::less))
1729     parseObjCProtocolList();
1730 
1731   if (FormatTok->Tok.is(tok::l_brace)) {
1732     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1733         Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1734       addUnwrappedLine();
1735     parseBlock(/*MustBeDeclaration=*/true);
1736   }
1737 
1738   // With instance variables, this puts '}' on its own line.  Without instance
1739   // variables, this ends the @interface line.
1740   addUnwrappedLine();
1741 
1742   parseObjCUntilAtEnd();
1743 }
1744 
1745 void UnwrappedLineParser::parseObjCProtocol() {
1746   nextToken();
1747   nextToken(); // protocol name
1748 
1749   if (FormatTok->Tok.is(tok::less))
1750     parseObjCProtocolList();
1751 
1752   // Check for protocol declaration.
1753   if (FormatTok->Tok.is(tok::semi)) {
1754     nextToken();
1755     return addUnwrappedLine();
1756   }
1757 
1758   addUnwrappedLine();
1759   parseObjCUntilAtEnd();
1760 }
1761 
1762 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1763   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1764   nextToken();
1765 
1766   // Consume the "default" in "export default class/function".
1767   if (FormatTok->is(tok::kw_default))
1768     nextToken();
1769 
1770   // Consume "function" and "default function", so that these get parsed as
1771   // free-standing JS functions, i.e. do not require a trailing semicolon.
1772   if (FormatTok->is(Keywords.kw_function)) {
1773     nextToken();
1774     return;
1775   }
1776 
1777   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
1778                          Keywords.kw_var))
1779     return; // Fall through to parsing the corresponding structure.
1780 
1781   if (FormatTok->is(tok::l_brace)) {
1782     FormatTok->BlockKind = BK_Block;
1783     parseBracedList();
1784   }
1785 
1786   while (!eof() && FormatTok->isNot(tok::semi) &&
1787          FormatTok->isNot(tok::l_brace)) {
1788     nextToken();
1789   }
1790 }
1791 
1792 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1793                                                  StringRef Prefix = "") {
1794   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1795                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1796   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1797                                                     E = Line.Tokens.end();
1798        I != E; ++I) {
1799     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1800   }
1801   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1802                                                     E = Line.Tokens.end();
1803        I != E; ++I) {
1804     const UnwrappedLineNode &Node = *I;
1805     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1806              I = Node.Children.begin(),
1807              E = Node.Children.end();
1808          I != E; ++I) {
1809       printDebugInfo(*I, "\nChild: ");
1810     }
1811   }
1812   llvm::dbgs() << "\n";
1813 }
1814 
1815 void UnwrappedLineParser::addUnwrappedLine() {
1816   if (Line->Tokens.empty())
1817     return;
1818   DEBUG({
1819     if (CurrentLines == &Lines)
1820       printDebugInfo(*Line);
1821   });
1822   CurrentLines->push_back(std::move(*Line));
1823   Line->Tokens.clear();
1824   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1825     CurrentLines->append(
1826         std::make_move_iterator(PreprocessorDirectives.begin()),
1827         std::make_move_iterator(PreprocessorDirectives.end()));
1828     PreprocessorDirectives.clear();
1829   }
1830 }
1831 
1832 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1833 
1834 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1835   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1836          FormatTok.NewlinesBefore > 0;
1837 }
1838 
1839 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1840   bool JustComments = Line->Tokens.empty();
1841   for (SmallVectorImpl<FormatToken *>::const_iterator
1842            I = CommentsBeforeNextToken.begin(),
1843            E = CommentsBeforeNextToken.end();
1844        I != E; ++I) {
1845     if (isOnNewLine(**I) && JustComments)
1846       addUnwrappedLine();
1847     pushToken(*I);
1848   }
1849   if (NewlineBeforeNext && JustComments)
1850     addUnwrappedLine();
1851   CommentsBeforeNextToken.clear();
1852 }
1853 
1854 void UnwrappedLineParser::nextToken() {
1855   if (eof())
1856     return;
1857   flushComments(isOnNewLine(*FormatTok));
1858   pushToken(FormatTok);
1859   readToken();
1860 }
1861 
1862 void UnwrappedLineParser::readToken() {
1863   bool CommentsInCurrentLine = true;
1864   do {
1865     FormatTok = Tokens->getNextToken();
1866     assert(FormatTok);
1867     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1868            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1869       // If there is an unfinished unwrapped line, we flush the preprocessor
1870       // directives only after that unwrapped line was finished later.
1871       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1872       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1873       // Comments stored before the preprocessor directive need to be output
1874       // before the preprocessor directive, at the same level as the
1875       // preprocessor directive, as we consider them to apply to the directive.
1876       flushComments(isOnNewLine(*FormatTok));
1877       parsePPDirective();
1878     }
1879     while (FormatTok->Type == TT_ConflictStart ||
1880            FormatTok->Type == TT_ConflictEnd ||
1881            FormatTok->Type == TT_ConflictAlternative) {
1882       if (FormatTok->Type == TT_ConflictStart) {
1883         conditionalCompilationStart(/*Unreachable=*/false);
1884       } else if (FormatTok->Type == TT_ConflictAlternative) {
1885         conditionalCompilationAlternative();
1886       } else if (FormatTok->Type == TT_ConflictEnd) {
1887         conditionalCompilationEnd();
1888       }
1889       FormatTok = Tokens->getNextToken();
1890       FormatTok->MustBreakBefore = true;
1891     }
1892 
1893     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1894         !Line->InPPDirective) {
1895       continue;
1896     }
1897 
1898     if (!FormatTok->Tok.is(tok::comment))
1899       return;
1900     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1901       CommentsInCurrentLine = false;
1902     }
1903     if (CommentsInCurrentLine) {
1904       pushToken(FormatTok);
1905     } else {
1906       CommentsBeforeNextToken.push_back(FormatTok);
1907     }
1908   } while (!eof());
1909 }
1910 
1911 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1912   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1913   if (MustBreakBeforeNextToken) {
1914     Line->Tokens.back().Tok->MustBreakBefore = true;
1915     MustBreakBeforeNextToken = false;
1916   }
1917 }
1918 
1919 } // end namespace format
1920 } // end namespace clang
1921