1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman) {
158       Parser->addUnwrappedLine();
159     } else if (Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
160       Parser->addUnwrappedLine();
161       ++LineLevel;
162     }
163   }
164   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
165 
166 private:
167   unsigned &LineLevel;
168   unsigned OldLineLevel;
169 };
170 
171 namespace {
172 
173 class IndexedTokenSource : public FormatTokenSource {
174 public:
175   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
176       : Tokens(Tokens), Position(-1) {}
177 
178   FormatToken *getNextToken() override {
179     ++Position;
180     return Tokens[Position];
181   }
182 
183   unsigned getPosition() override {
184     assert(Position >= 0);
185     return Position;
186   }
187 
188   FormatToken *setPosition(unsigned P) override {
189     Position = P;
190     return Tokens[Position];
191   }
192 
193   void reset() { Position = -1; }
194 
195 private:
196   ArrayRef<FormatToken *> Tokens;
197   int Position;
198 };
199 
200 } // end anonymous namespace
201 
202 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
203                                          const AdditionalKeywords &Keywords,
204                                          ArrayRef<FormatToken *> Tokens,
205                                          UnwrappedLineConsumer &Callback)
206     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
207       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
208       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
209 
210 void UnwrappedLineParser::reset() {
211   PPBranchLevel = -1;
212   Line.reset(new UnwrappedLine);
213   CommentsBeforeNextToken.clear();
214   FormatTok = nullptr;
215   MustBreakBeforeNextToken = false;
216   PreprocessorDirectives.clear();
217   CurrentLines = &Lines;
218   DeclarationScopeStack.clear();
219   PPStack.clear();
220 }
221 
222 void UnwrappedLineParser::parse() {
223   IndexedTokenSource TokenSource(AllTokens);
224   do {
225     DEBUG(llvm::dbgs() << "----\n");
226     reset();
227     Tokens = &TokenSource;
228     TokenSource.reset();
229 
230     readToken();
231     parseFile();
232     // Create line with eof token.
233     pushToken(FormatTok);
234     addUnwrappedLine();
235 
236     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
237                                                   E = Lines.end();
238          I != E; ++I) {
239       Callback.consumeUnwrappedLine(*I);
240     }
241     Callback.finishRun();
242     Lines.clear();
243     while (!PPLevelBranchIndex.empty() &&
244            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
245       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
246       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
247     }
248     if (!PPLevelBranchIndex.empty()) {
249       ++PPLevelBranchIndex.back();
250       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
251       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
252     }
253   } while (!PPLevelBranchIndex.empty());
254 }
255 
256 void UnwrappedLineParser::parseFile() {
257   // The top-level context in a file always has declarations, except for pre-
258   // processor directives and JavaScript files.
259   bool MustBeDeclaration =
260       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
261   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
262                                           MustBeDeclaration);
263   parseLevel(/*HasOpeningBrace=*/false);
264   // Make sure to format the remaining tokens.
265   flushComments(true);
266   addUnwrappedLine();
267 }
268 
269 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
270   bool SwitchLabelEncountered = false;
271   do {
272     tok::TokenKind kind = FormatTok->Tok.getKind();
273     if (FormatTok->Type == TT_MacroBlockBegin) {
274       kind = tok::l_brace;
275     } else if (FormatTok->Type == TT_MacroBlockEnd) {
276       kind = tok::r_brace;
277     }
278 
279     switch (kind) {
280     case tok::comment:
281       nextToken();
282       addUnwrappedLine();
283       break;
284     case tok::l_brace:
285       // FIXME: Add parameter whether this can happen - if this happens, we must
286       // be in a non-declaration context.
287       parseBlock(/*MustBeDeclaration=*/false);
288       addUnwrappedLine();
289       break;
290     case tok::r_brace:
291       if (HasOpeningBrace)
292         return;
293       nextToken();
294       addUnwrappedLine();
295       break;
296     case tok::kw_default:
297     case tok::kw_case:
298       if (!SwitchLabelEncountered &&
299           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
300         ++Line->Level;
301       SwitchLabelEncountered = true;
302       parseStructuralElement();
303       break;
304     default:
305       parseStructuralElement();
306       break;
307     }
308   } while (!eof());
309 }
310 
311 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
312   // We'll parse forward through the tokens until we hit
313   // a closing brace or eof - note that getNextToken() will
314   // parse macros, so this will magically work inside macro
315   // definitions, too.
316   unsigned StoredPosition = Tokens->getPosition();
317   FormatToken *Tok = FormatTok;
318   // Keep a stack of positions of lbrace tokens. We will
319   // update information about whether an lbrace starts a
320   // braced init list or a different block during the loop.
321   SmallVector<FormatToken *, 8> LBraceStack;
322   assert(Tok->Tok.is(tok::l_brace));
323   do {
324     // Get next none-comment token.
325     FormatToken *NextTok;
326     unsigned ReadTokens = 0;
327     do {
328       NextTok = Tokens->getNextToken();
329       ++ReadTokens;
330     } while (NextTok->is(tok::comment));
331 
332     switch (Tok->Tok.getKind()) {
333     case tok::l_brace:
334       Tok->BlockKind = BK_Unknown;
335       LBraceStack.push_back(Tok);
336       break;
337     case tok::r_brace:
338       if (!LBraceStack.empty()) {
339         if (LBraceStack.back()->BlockKind == BK_Unknown) {
340           bool ProbablyBracedList = false;
341           if (Style.Language == FormatStyle::LK_Proto) {
342             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
343           } else {
344             // Using OriginalColumn to distinguish between ObjC methods and
345             // binary operators is a bit hacky.
346             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
347                                     NextTok->OriginalColumn == 0;
348 
349             // If there is a comma, semicolon or right paren after the closing
350             // brace, we assume this is a braced initializer list.  Note that
351             // regardless how we mark inner braces here, we will overwrite the
352             // BlockKind later if we parse a braced list (where all blocks
353             // inside are by default braced lists), or when we explicitly detect
354             // blocks (for example while parsing lambdas).
355             //
356             // We exclude + and - as they can be ObjC visibility modifiers.
357             ProbablyBracedList =
358                 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
359                                  tok::r_paren, tok::r_square, tok::l_brace,
360                                  tok::l_paren, tok::ellipsis) ||
361                 (NextTok->is(tok::semi) &&
362                  (!ExpectClassBody || LBraceStack.size() != 1)) ||
363                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
364           }
365           if (ProbablyBracedList) {
366             Tok->BlockKind = BK_BracedInit;
367             LBraceStack.back()->BlockKind = BK_BracedInit;
368           } else {
369             Tok->BlockKind = BK_Block;
370             LBraceStack.back()->BlockKind = BK_Block;
371           }
372         }
373         LBraceStack.pop_back();
374       }
375       break;
376     case tok::at:
377     case tok::semi:
378     case tok::kw_if:
379     case tok::kw_while:
380     case tok::kw_for:
381     case tok::kw_switch:
382     case tok::kw_try:
383     case tok::kw___try:
384       if (!LBraceStack.empty())
385         LBraceStack.back()->BlockKind = BK_Block;
386       break;
387     default:
388       break;
389     }
390     Tok = NextTok;
391   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
392   // Assume other blocks for all unclosed opening braces.
393   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
394     if (LBraceStack[i]->BlockKind == BK_Unknown)
395       LBraceStack[i]->BlockKind = BK_Block;
396   }
397 
398   FormatTok = Tokens->setPosition(StoredPosition);
399 }
400 
401 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
402                                      bool MunchSemi) {
403   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
404          "'{' or macro block token expected");
405   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
406 
407   unsigned InitialLevel = Line->Level;
408   nextToken();
409 
410   if (MacroBlock && FormatTok->is(tok::l_paren))
411     parseParens();
412 
413   addUnwrappedLine();
414 
415   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
416                                           MustBeDeclaration);
417   if (AddLevel)
418     ++Line->Level;
419   parseLevel(/*HasOpeningBrace=*/true);
420 
421   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
422                  : !FormatTok->is(tok::r_brace)) {
423     Line->Level = InitialLevel;
424     return;
425   }
426 
427   nextToken(); // Munch the closing brace.
428 
429   if (MacroBlock && FormatTok->is(tok::l_paren))
430     parseParens();
431 
432   if (MunchSemi && FormatTok->Tok.is(tok::semi))
433     nextToken();
434   Line->Level = InitialLevel;
435 }
436 
437 static bool isGoogScope(const UnwrappedLine &Line) {
438   // FIXME: Closure-library specific stuff should not be hard-coded but be
439   // configurable.
440   if (Line.Tokens.size() < 4)
441     return false;
442   auto I = Line.Tokens.begin();
443   if (I->Tok->TokenText != "goog")
444     return false;
445   ++I;
446   if (I->Tok->isNot(tok::period))
447     return false;
448   ++I;
449   if (I->Tok->TokenText != "scope")
450     return false;
451   ++I;
452   return I->Tok->is(tok::l_paren);
453 }
454 
455 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
456                                    const FormatToken &InitialToken) {
457   switch (Style.BreakBeforeBraces) {
458   case FormatStyle::BS_Linux:
459     return InitialToken.isOneOf(tok::kw_namespace, tok::kw_class);
460   case FormatStyle::BS_Mozilla:
461     return InitialToken.isOneOf(tok::kw_class, tok::kw_struct, tok::kw_union);
462   case FormatStyle::BS_Allman:
463   case FormatStyle::BS_GNU:
464     return true;
465   default:
466     return false;
467   }
468 }
469 
470 void UnwrappedLineParser::parseChildBlock() {
471   FormatTok->BlockKind = BK_Block;
472   nextToken();
473   {
474     bool GoogScope =
475         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
476     ScopedLineState LineState(*this);
477     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
478                                             /*MustBeDeclaration=*/false);
479     Line->Level += GoogScope ? 0 : 1;
480     parseLevel(/*HasOpeningBrace=*/true);
481     flushComments(isOnNewLine(*FormatTok));
482     Line->Level -= GoogScope ? 0 : 1;
483   }
484   nextToken();
485 }
486 
487 void UnwrappedLineParser::parsePPDirective() {
488   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
489   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
490   nextToken();
491 
492   if (!FormatTok->Tok.getIdentifierInfo()) {
493     parsePPUnknown();
494     return;
495   }
496 
497   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
498   case tok::pp_define:
499     parsePPDefine();
500     return;
501   case tok::pp_if:
502     parsePPIf(/*IfDef=*/false);
503     break;
504   case tok::pp_ifdef:
505   case tok::pp_ifndef:
506     parsePPIf(/*IfDef=*/true);
507     break;
508   case tok::pp_else:
509     parsePPElse();
510     break;
511   case tok::pp_elif:
512     parsePPElIf();
513     break;
514   case tok::pp_endif:
515     parsePPEndIf();
516     break;
517   default:
518     parsePPUnknown();
519     break;
520   }
521 }
522 
523 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
524   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
525     PPStack.push_back(PP_Unreachable);
526   else
527     PPStack.push_back(PP_Conditional);
528 }
529 
530 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
531   ++PPBranchLevel;
532   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
533   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
534     PPLevelBranchIndex.push_back(0);
535     PPLevelBranchCount.push_back(0);
536   }
537   PPChainBranchIndex.push(0);
538   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
539   conditionalCompilationCondition(Unreachable || Skip);
540 }
541 
542 void UnwrappedLineParser::conditionalCompilationAlternative() {
543   if (!PPStack.empty())
544     PPStack.pop_back();
545   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
546   if (!PPChainBranchIndex.empty())
547     ++PPChainBranchIndex.top();
548   conditionalCompilationCondition(
549       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
550       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
551 }
552 
553 void UnwrappedLineParser::conditionalCompilationEnd() {
554   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
555   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
556     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
557       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
558     }
559   }
560   // Guard against #endif's without #if.
561   if (PPBranchLevel > 0)
562     --PPBranchLevel;
563   if (!PPChainBranchIndex.empty())
564     PPChainBranchIndex.pop();
565   if (!PPStack.empty())
566     PPStack.pop_back();
567 }
568 
569 void UnwrappedLineParser::parsePPIf(bool IfDef) {
570   nextToken();
571   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
572                          FormatTok->Tok.getLiteralData() != nullptr &&
573                          StringRef(FormatTok->Tok.getLiteralData(),
574                                    FormatTok->Tok.getLength()) == "0") ||
575                         FormatTok->Tok.is(tok::kw_false);
576   conditionalCompilationStart(!IfDef && IsLiteralFalse);
577   parsePPUnknown();
578 }
579 
580 void UnwrappedLineParser::parsePPElse() {
581   conditionalCompilationAlternative();
582   parsePPUnknown();
583 }
584 
585 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
586 
587 void UnwrappedLineParser::parsePPEndIf() {
588   conditionalCompilationEnd();
589   parsePPUnknown();
590 }
591 
592 void UnwrappedLineParser::parsePPDefine() {
593   nextToken();
594 
595   if (FormatTok->Tok.getKind() != tok::identifier) {
596     parsePPUnknown();
597     return;
598   }
599   nextToken();
600   if (FormatTok->Tok.getKind() == tok::l_paren &&
601       FormatTok->WhitespaceRange.getBegin() ==
602           FormatTok->WhitespaceRange.getEnd()) {
603     parseParens();
604   }
605   addUnwrappedLine();
606   Line->Level = 1;
607 
608   // Errors during a preprocessor directive can only affect the layout of the
609   // preprocessor directive, and thus we ignore them. An alternative approach
610   // would be to use the same approach we use on the file level (no
611   // re-indentation if there was a structural error) within the macro
612   // definition.
613   parseFile();
614 }
615 
616 void UnwrappedLineParser::parsePPUnknown() {
617   do {
618     nextToken();
619   } while (!eof());
620   addUnwrappedLine();
621 }
622 
623 // Here we blacklist certain tokens that are not usually the first token in an
624 // unwrapped line. This is used in attempt to distinguish macro calls without
625 // trailing semicolons from other constructs split to several lines.
626 static bool tokenCanStartNewLine(const clang::Token &Tok) {
627   // Semicolon can be a null-statement, l_square can be a start of a macro or
628   // a C++11 attribute, but this doesn't seem to be common.
629   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
630          Tok.isNot(tok::l_square) &&
631          // Tokens that can only be used as binary operators and a part of
632          // overloaded operator names.
633          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
634          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
635          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
636          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
637          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
638          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
639          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
640          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
641          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
642          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
643          Tok.isNot(tok::lesslessequal) &&
644          // Colon is used in labels, base class lists, initializer lists,
645          // range-based for loops, ternary operator, but should never be the
646          // first token in an unwrapped line.
647          Tok.isNot(tok::colon) &&
648          // 'noexcept' is a trailing annotation.
649          Tok.isNot(tok::kw_noexcept);
650 }
651 
652 void UnwrappedLineParser::parseStructuralElement() {
653   assert(!FormatTok->Tok.is(tok::l_brace));
654   switch (FormatTok->Tok.getKind()) {
655   case tok::at:
656     nextToken();
657     if (FormatTok->Tok.is(tok::l_brace)) {
658       parseBracedList();
659       break;
660     }
661     switch (FormatTok->Tok.getObjCKeywordID()) {
662     case tok::objc_public:
663     case tok::objc_protected:
664     case tok::objc_package:
665     case tok::objc_private:
666       return parseAccessSpecifier();
667     case tok::objc_interface:
668     case tok::objc_implementation:
669       return parseObjCInterfaceOrImplementation();
670     case tok::objc_protocol:
671       return parseObjCProtocol();
672     case tok::objc_end:
673       return; // Handled by the caller.
674     case tok::objc_optional:
675     case tok::objc_required:
676       nextToken();
677       addUnwrappedLine();
678       return;
679     case tok::objc_autoreleasepool:
680       nextToken();
681       if (FormatTok->Tok.is(tok::l_brace)) {
682         if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
683             Style.BreakBeforeBraces == FormatStyle::BS_GNU)
684           addUnwrappedLine();
685         parseBlock(/*MustBeDeclaration=*/false);
686       }
687       addUnwrappedLine();
688       return;
689     case tok::objc_try:
690       // This branch isn't strictly necessary (the kw_try case below would
691       // do this too after the tok::at is parsed above).  But be explicit.
692       parseTryCatch();
693       return;
694     default:
695       break;
696     }
697     break;
698   case tok::kw_asm:
699     nextToken();
700     if (FormatTok->is(tok::l_brace)) {
701       FormatTok->Type = TT_InlineASMBrace;
702       nextToken();
703       while (FormatTok && FormatTok->isNot(tok::eof)) {
704         if (FormatTok->is(tok::r_brace)) {
705           FormatTok->Type = TT_InlineASMBrace;
706           nextToken();
707           addUnwrappedLine();
708           break;
709         }
710         FormatTok->Finalized = true;
711         nextToken();
712       }
713     }
714     break;
715   case tok::kw_namespace:
716     parseNamespace();
717     return;
718   case tok::kw_inline:
719     nextToken();
720     if (FormatTok->Tok.is(tok::kw_namespace)) {
721       parseNamespace();
722       return;
723     }
724     break;
725   case tok::kw_public:
726   case tok::kw_protected:
727   case tok::kw_private:
728     if (Style.Language == FormatStyle::LK_Java ||
729         Style.Language == FormatStyle::LK_JavaScript)
730       nextToken();
731     else
732       parseAccessSpecifier();
733     return;
734   case tok::kw_if:
735     parseIfThenElse();
736     return;
737   case tok::kw_for:
738   case tok::kw_while:
739     parseForOrWhileLoop();
740     return;
741   case tok::kw_do:
742     parseDoWhile();
743     return;
744   case tok::kw_switch:
745     parseSwitch();
746     return;
747   case tok::kw_default:
748     nextToken();
749     parseLabel();
750     return;
751   case tok::kw_case:
752     parseCaseLabel();
753     return;
754   case tok::kw_try:
755   case tok::kw___try:
756     parseTryCatch();
757     return;
758   case tok::kw_extern:
759     nextToken();
760     if (FormatTok->Tok.is(tok::string_literal)) {
761       nextToken();
762       if (FormatTok->Tok.is(tok::l_brace)) {
763         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
764         addUnwrappedLine();
765         return;
766       }
767     }
768     break;
769   case tok::kw_export:
770     if (Style.Language == FormatStyle::LK_JavaScript) {
771       parseJavaScriptEs6ImportExport();
772       return;
773     }
774     break;
775   case tok::identifier:
776     if (FormatTok->is(TT_ForEachMacro)) {
777       parseForOrWhileLoop();
778       return;
779     }
780     if (FormatTok->is(TT_MacroBlockBegin)) {
781       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
782                  /*MunchSemi=*/false);
783       return;
784     }
785     if (Style.Language == FormatStyle::LK_JavaScript &&
786         FormatTok->is(Keywords.kw_import)) {
787       parseJavaScriptEs6ImportExport();
788       return;
789     }
790     if (FormatTok->is(Keywords.kw_signals)) {
791       nextToken();
792       if (FormatTok->is(tok::colon)) {
793         nextToken();
794         addUnwrappedLine();
795       }
796       return;
797     }
798     // In all other cases, parse the declaration.
799     break;
800   default:
801     break;
802   }
803   do {
804     switch (FormatTok->Tok.getKind()) {
805     case tok::at:
806       nextToken();
807       if (FormatTok->Tok.is(tok::l_brace))
808         parseBracedList();
809       break;
810     case tok::kw_enum:
811       // parseEnum falls through and does not yet add an unwrapped line as an
812       // enum definition can start a structural element.
813       parseEnum();
814       // This only applies for C++.
815       if (Style.Language != FormatStyle::LK_Cpp) {
816         addUnwrappedLine();
817         return;
818       }
819       break;
820     case tok::kw_typedef:
821       nextToken();
822       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
823                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
824         parseEnum();
825       break;
826     case tok::kw_struct:
827     case tok::kw_union:
828     case tok::kw_class:
829       // parseRecord falls through and does not yet add an unwrapped line as a
830       // record declaration or definition can start a structural element.
831       parseRecord();
832       // This does not apply for Java and JavaScript.
833       if (Style.Language == FormatStyle::LK_Java ||
834           Style.Language == FormatStyle::LK_JavaScript) {
835         addUnwrappedLine();
836         return;
837       }
838       break;
839     case tok::period:
840       nextToken();
841       // In Java, classes have an implicit static member "class".
842       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
843           FormatTok->is(tok::kw_class))
844         nextToken();
845       break;
846     case tok::semi:
847       nextToken();
848       addUnwrappedLine();
849       return;
850     case tok::r_brace:
851       addUnwrappedLine();
852       return;
853     case tok::l_paren:
854       parseParens();
855       break;
856     case tok::caret:
857       nextToken();
858       if (FormatTok->Tok.isAnyIdentifier() ||
859           FormatTok->isSimpleTypeSpecifier())
860         nextToken();
861       if (FormatTok->is(tok::l_paren))
862         parseParens();
863       if (FormatTok->is(tok::l_brace))
864         parseChildBlock();
865       break;
866     case tok::l_brace:
867       if (!tryToParseBracedList()) {
868         // A block outside of parentheses must be the last part of a
869         // structural element.
870         // FIXME: Figure out cases where this is not true, and add projections
871         // for them (the one we know is missing are lambdas).
872         if (Style.BreakBeforeBraces != FormatStyle::BS_Attach)
873           addUnwrappedLine();
874         FormatTok->Type = TT_FunctionLBrace;
875         parseBlock(/*MustBeDeclaration=*/false);
876         addUnwrappedLine();
877         return;
878       }
879       // Otherwise this was a braced init list, and the structural
880       // element continues.
881       break;
882     case tok::kw_try:
883       // We arrive here when parsing function-try blocks.
884       parseTryCatch();
885       return;
886     case tok::identifier: {
887       if (FormatTok->is(TT_MacroBlockEnd)) {
888         addUnwrappedLine();
889         return;
890       }
891 
892       // Parse function literal unless 'function' is the first token in a line
893       // in which case this should be treated as a free-standing function.
894       if (Style.Language == FormatStyle::LK_JavaScript &&
895           FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
896         tryToParseJSFunction();
897         break;
898       }
899       if ((Style.Language == FormatStyle::LK_JavaScript ||
900            Style.Language == FormatStyle::LK_Java) &&
901           FormatTok->is(Keywords.kw_interface)) {
902         parseRecord();
903         addUnwrappedLine();
904         return;
905       }
906 
907       StringRef Text = FormatTok->TokenText;
908       nextToken();
909       if (Line->Tokens.size() == 1 &&
910           // JS doesn't have macros, and within classes colons indicate fields,
911           // not labels.
912           Style.Language != FormatStyle::LK_JavaScript) {
913         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
914           parseLabel();
915           return;
916         }
917         // Recognize function-like macro usages without trailing semicolon as
918         // well as free-standing macros like Q_OBJECT.
919         bool FunctionLike = FormatTok->is(tok::l_paren);
920         if (FunctionLike)
921           parseParens();
922 
923         bool FollowedByNewline =
924             CommentsBeforeNextToken.empty()
925                 ? FormatTok->NewlinesBefore > 0
926                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
927 
928         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
929             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
930           addUnwrappedLine();
931           return;
932         }
933       }
934       break;
935     }
936     case tok::equal:
937       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
938       // TT_JsFatArrow. The always start an expression or a child block if
939       // followed by a curly.
940       if (FormatTok->is(TT_JsFatArrow)) {
941         nextToken();
942         if (FormatTok->is(tok::l_brace))
943           parseChildBlock();
944         break;
945       }
946 
947       nextToken();
948       if (FormatTok->Tok.is(tok::l_brace)) {
949         parseBracedList();
950       }
951       break;
952     case tok::l_square:
953       parseSquare();
954       break;
955     case tok::kw_new:
956       parseNew();
957       break;
958     default:
959       nextToken();
960       break;
961     }
962   } while (!eof());
963 }
964 
965 bool UnwrappedLineParser::tryToParseLambda() {
966   if (Style.Language != FormatStyle::LK_Cpp) {
967     nextToken();
968     return false;
969   }
970   // FIXME: This is a dirty way to access the previous token. Find a better
971   // solution.
972   if (!Line->Tokens.empty() &&
973       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
974                                         tok::kw_new, tok::kw_delete) ||
975        Line->Tokens.back().Tok->closesScope() ||
976        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
977     nextToken();
978     return false;
979   }
980   assert(FormatTok->is(tok::l_square));
981   FormatToken &LSquare = *FormatTok;
982   if (!tryToParseLambdaIntroducer())
983     return false;
984 
985   while (FormatTok->isNot(tok::l_brace)) {
986     if (FormatTok->isSimpleTypeSpecifier()) {
987       nextToken();
988       continue;
989     }
990     switch (FormatTok->Tok.getKind()) {
991     case tok::l_brace:
992       break;
993     case tok::l_paren:
994       parseParens();
995       break;
996     case tok::amp:
997     case tok::star:
998     case tok::kw_const:
999     case tok::comma:
1000     case tok::less:
1001     case tok::greater:
1002     case tok::identifier:
1003     case tok::coloncolon:
1004     case tok::kw_mutable:
1005       nextToken();
1006       break;
1007     case tok::arrow:
1008       FormatTok->Type = TT_LambdaArrow;
1009       nextToken();
1010       break;
1011     default:
1012       return true;
1013     }
1014   }
1015   LSquare.Type = TT_LambdaLSquare;
1016   parseChildBlock();
1017   return true;
1018 }
1019 
1020 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1021   nextToken();
1022   if (FormatTok->is(tok::equal)) {
1023     nextToken();
1024     if (FormatTok->is(tok::r_square)) {
1025       nextToken();
1026       return true;
1027     }
1028     if (FormatTok->isNot(tok::comma))
1029       return false;
1030     nextToken();
1031   } else if (FormatTok->is(tok::amp)) {
1032     nextToken();
1033     if (FormatTok->is(tok::r_square)) {
1034       nextToken();
1035       return true;
1036     }
1037     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1038       return false;
1039     }
1040     if (FormatTok->is(tok::comma))
1041       nextToken();
1042   } else if (FormatTok->is(tok::r_square)) {
1043     nextToken();
1044     return true;
1045   }
1046   do {
1047     if (FormatTok->is(tok::amp))
1048       nextToken();
1049     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1050       return false;
1051     nextToken();
1052     if (FormatTok->is(tok::ellipsis))
1053       nextToken();
1054     if (FormatTok->is(tok::comma)) {
1055       nextToken();
1056     } else if (FormatTok->is(tok::r_square)) {
1057       nextToken();
1058       return true;
1059     } else {
1060       return false;
1061     }
1062   } while (!eof());
1063   return false;
1064 }
1065 
1066 void UnwrappedLineParser::tryToParseJSFunction() {
1067   nextToken();
1068 
1069   // Consume function name.
1070   if (FormatTok->is(tok::identifier))
1071     nextToken();
1072 
1073   if (FormatTok->isNot(tok::l_paren))
1074     return;
1075 
1076   // Parse formal parameter list.
1077   parseParens();
1078 
1079   if (FormatTok->is(tok::colon)) {
1080     // Parse a type definition.
1081     nextToken();
1082 
1083     // Eat the type declaration. For braced inline object types, balance braces,
1084     // otherwise just parse until finding an l_brace for the function body.
1085     if (FormatTok->is(tok::l_brace))
1086       tryToParseBracedList();
1087     else
1088       while (FormatTok->isNot(tok::l_brace) && !eof())
1089         nextToken();
1090   }
1091 
1092   parseChildBlock();
1093 }
1094 
1095 bool UnwrappedLineParser::tryToParseBracedList() {
1096   if (FormatTok->BlockKind == BK_Unknown)
1097     calculateBraceTypes();
1098   assert(FormatTok->BlockKind != BK_Unknown);
1099   if (FormatTok->BlockKind == BK_Block)
1100     return false;
1101   parseBracedList();
1102   return true;
1103 }
1104 
1105 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1106   bool HasError = false;
1107   nextToken();
1108 
1109   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1110   // replace this by using parseAssigmentExpression() inside.
1111   do {
1112     if (Style.Language == FormatStyle::LK_JavaScript) {
1113       if (FormatTok->is(Keywords.kw_function)) {
1114         tryToParseJSFunction();
1115         continue;
1116       }
1117       if (FormatTok->is(TT_JsFatArrow)) {
1118         nextToken();
1119         // Fat arrows can be followed by simple expressions or by child blocks
1120         // in curly braces.
1121         if (FormatTok->is(tok::l_brace)) {
1122           parseChildBlock();
1123           continue;
1124         }
1125       }
1126     }
1127     switch (FormatTok->Tok.getKind()) {
1128     case tok::caret:
1129       nextToken();
1130       if (FormatTok->is(tok::l_brace)) {
1131         parseChildBlock();
1132       }
1133       break;
1134     case tok::l_square:
1135       tryToParseLambda();
1136       break;
1137     case tok::l_brace:
1138       // Assume there are no blocks inside a braced init list apart
1139       // from the ones we explicitly parse out (like lambdas).
1140       FormatTok->BlockKind = BK_BracedInit;
1141       parseBracedList();
1142       break;
1143     case tok::l_paren:
1144       parseParens();
1145       // JavaScript can just have free standing methods and getters/setters in
1146       // object literals. Detect them by a "{" following ")".
1147       if (Style.Language == FormatStyle::LK_JavaScript) {
1148         if (FormatTok->is(tok::l_brace))
1149           parseChildBlock();
1150         break;
1151       }
1152       break;
1153     case tok::r_brace:
1154       nextToken();
1155       return !HasError;
1156     case tok::semi:
1157       HasError = true;
1158       if (!ContinueOnSemicolons)
1159         return !HasError;
1160       nextToken();
1161       break;
1162     case tok::comma:
1163       nextToken();
1164       break;
1165     default:
1166       nextToken();
1167       break;
1168     }
1169   } while (!eof());
1170   return false;
1171 }
1172 
1173 void UnwrappedLineParser::parseParens() {
1174   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1175   nextToken();
1176   do {
1177     switch (FormatTok->Tok.getKind()) {
1178     case tok::l_paren:
1179       parseParens();
1180       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1181         parseChildBlock();
1182       break;
1183     case tok::r_paren:
1184       nextToken();
1185       return;
1186     case tok::r_brace:
1187       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1188       return;
1189     case tok::l_square:
1190       tryToParseLambda();
1191       break;
1192     case tok::l_brace:
1193       if (!tryToParseBracedList())
1194         parseChildBlock();
1195       break;
1196     case tok::at:
1197       nextToken();
1198       if (FormatTok->Tok.is(tok::l_brace))
1199         parseBracedList();
1200       break;
1201     case tok::identifier:
1202       if (Style.Language == FormatStyle::LK_JavaScript &&
1203           FormatTok->is(Keywords.kw_function))
1204         tryToParseJSFunction();
1205       else
1206         nextToken();
1207       break;
1208     default:
1209       nextToken();
1210       break;
1211     }
1212   } while (!eof());
1213 }
1214 
1215 void UnwrappedLineParser::parseSquare() {
1216   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1217   if (tryToParseLambda())
1218     return;
1219   do {
1220     switch (FormatTok->Tok.getKind()) {
1221     case tok::l_paren:
1222       parseParens();
1223       break;
1224     case tok::r_square:
1225       nextToken();
1226       return;
1227     case tok::r_brace:
1228       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1229       return;
1230     case tok::l_square:
1231       parseSquare();
1232       break;
1233     case tok::l_brace: {
1234       if (!tryToParseBracedList())
1235         parseChildBlock();
1236       break;
1237     }
1238     case tok::at:
1239       nextToken();
1240       if (FormatTok->Tok.is(tok::l_brace))
1241         parseBracedList();
1242       break;
1243     default:
1244       nextToken();
1245       break;
1246     }
1247   } while (!eof());
1248 }
1249 
1250 void UnwrappedLineParser::parseIfThenElse() {
1251   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1252   nextToken();
1253   if (FormatTok->Tok.is(tok::l_paren))
1254     parseParens();
1255   bool NeedsUnwrappedLine = false;
1256   if (FormatTok->Tok.is(tok::l_brace)) {
1257     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1258     parseBlock(/*MustBeDeclaration=*/false);
1259     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1260         Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1261       addUnwrappedLine();
1262     } else {
1263       NeedsUnwrappedLine = true;
1264     }
1265   } else {
1266     addUnwrappedLine();
1267     ++Line->Level;
1268     parseStructuralElement();
1269     --Line->Level;
1270   }
1271   if (FormatTok->Tok.is(tok::kw_else)) {
1272     if (Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
1273       addUnwrappedLine();
1274     nextToken();
1275     if (FormatTok->Tok.is(tok::l_brace)) {
1276       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1277       parseBlock(/*MustBeDeclaration=*/false);
1278       addUnwrappedLine();
1279     } else if (FormatTok->Tok.is(tok::kw_if)) {
1280       parseIfThenElse();
1281     } else {
1282       addUnwrappedLine();
1283       ++Line->Level;
1284       parseStructuralElement();
1285       --Line->Level;
1286     }
1287   } else if (NeedsUnwrappedLine) {
1288     addUnwrappedLine();
1289   }
1290 }
1291 
1292 void UnwrappedLineParser::parseTryCatch() {
1293   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1294   nextToken();
1295   bool NeedsUnwrappedLine = false;
1296   if (FormatTok->is(tok::colon)) {
1297     // We are in a function try block, what comes is an initializer list.
1298     nextToken();
1299     while (FormatTok->is(tok::identifier)) {
1300       nextToken();
1301       if (FormatTok->is(tok::l_paren))
1302         parseParens();
1303       if (FormatTok->is(tok::comma))
1304         nextToken();
1305     }
1306   }
1307   // Parse try with resource.
1308   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1309     parseParens();
1310   }
1311   if (FormatTok->is(tok::l_brace)) {
1312     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1313     parseBlock(/*MustBeDeclaration=*/false);
1314     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1315         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1316         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1317       addUnwrappedLine();
1318     } else {
1319       NeedsUnwrappedLine = true;
1320     }
1321   } else if (!FormatTok->is(tok::kw_catch)) {
1322     // The C++ standard requires a compound-statement after a try.
1323     // If there's none, we try to assume there's a structuralElement
1324     // and try to continue.
1325     addUnwrappedLine();
1326     ++Line->Level;
1327     parseStructuralElement();
1328     --Line->Level;
1329   }
1330   while (1) {
1331     if (FormatTok->is(tok::at))
1332       nextToken();
1333     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1334                              tok::kw___finally) ||
1335           ((Style.Language == FormatStyle::LK_Java ||
1336             Style.Language == FormatStyle::LK_JavaScript) &&
1337            FormatTok->is(Keywords.kw_finally)) ||
1338           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1339            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1340       break;
1341     nextToken();
1342     while (FormatTok->isNot(tok::l_brace)) {
1343       if (FormatTok->is(tok::l_paren)) {
1344         parseParens();
1345         continue;
1346       }
1347       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1348         return;
1349       nextToken();
1350     }
1351     NeedsUnwrappedLine = false;
1352     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1353     parseBlock(/*MustBeDeclaration=*/false);
1354     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1355         Style.BreakBeforeBraces == FormatStyle::BS_GNU ||
1356         Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup) {
1357       addUnwrappedLine();
1358     } else {
1359       NeedsUnwrappedLine = true;
1360     }
1361   }
1362   if (NeedsUnwrappedLine) {
1363     addUnwrappedLine();
1364   }
1365 }
1366 
1367 void UnwrappedLineParser::parseNamespace() {
1368   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1369 
1370   const FormatToken &InitialToken = *FormatTok;
1371   nextToken();
1372   if (FormatTok->Tok.is(tok::identifier))
1373     nextToken();
1374   if (FormatTok->Tok.is(tok::l_brace)) {
1375     if (ShouldBreakBeforeBrace(Style, InitialToken))
1376       addUnwrappedLine();
1377 
1378     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1379                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1380                      DeclarationScopeStack.size() > 1);
1381     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1382     // Munch the semicolon after a namespace. This is more common than one would
1383     // think. Puttin the semicolon into its own line is very ugly.
1384     if (FormatTok->Tok.is(tok::semi))
1385       nextToken();
1386     addUnwrappedLine();
1387   }
1388   // FIXME: Add error handling.
1389 }
1390 
1391 void UnwrappedLineParser::parseNew() {
1392   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1393   nextToken();
1394   if (Style.Language != FormatStyle::LK_Java)
1395     return;
1396 
1397   // In Java, we can parse everything up to the parens, which aren't optional.
1398   do {
1399     // There should not be a ;, { or } before the new's open paren.
1400     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1401       return;
1402 
1403     // Consume the parens.
1404     if (FormatTok->is(tok::l_paren)) {
1405       parseParens();
1406 
1407       // If there is a class body of an anonymous class, consume that as child.
1408       if (FormatTok->is(tok::l_brace))
1409         parseChildBlock();
1410       return;
1411     }
1412     nextToken();
1413   } while (!eof());
1414 }
1415 
1416 void UnwrappedLineParser::parseForOrWhileLoop() {
1417   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1418          "'for', 'while' or foreach macro expected");
1419   nextToken();
1420   if (FormatTok->Tok.is(tok::l_paren))
1421     parseParens();
1422   if (FormatTok->Tok.is(tok::l_brace)) {
1423     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1424     parseBlock(/*MustBeDeclaration=*/false);
1425     addUnwrappedLine();
1426   } else {
1427     addUnwrappedLine();
1428     ++Line->Level;
1429     parseStructuralElement();
1430     --Line->Level;
1431   }
1432 }
1433 
1434 void UnwrappedLineParser::parseDoWhile() {
1435   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1436   nextToken();
1437   if (FormatTok->Tok.is(tok::l_brace)) {
1438     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1439     parseBlock(/*MustBeDeclaration=*/false);
1440     if (Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1441       addUnwrappedLine();
1442   } else {
1443     addUnwrappedLine();
1444     ++Line->Level;
1445     parseStructuralElement();
1446     --Line->Level;
1447   }
1448 
1449   // FIXME: Add error handling.
1450   if (!FormatTok->Tok.is(tok::kw_while)) {
1451     addUnwrappedLine();
1452     return;
1453   }
1454 
1455   nextToken();
1456   parseStructuralElement();
1457 }
1458 
1459 void UnwrappedLineParser::parseLabel() {
1460   nextToken();
1461   unsigned OldLineLevel = Line->Level;
1462   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1463     --Line->Level;
1464   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1465     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1466     parseBlock(/*MustBeDeclaration=*/false);
1467     if (FormatTok->Tok.is(tok::kw_break)) {
1468       // "break;" after "}" on its own line only for BS_Allman and BS_GNU
1469       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1470           Style.BreakBeforeBraces == FormatStyle::BS_GNU) {
1471         addUnwrappedLine();
1472       }
1473       parseStructuralElement();
1474     }
1475     addUnwrappedLine();
1476   } else {
1477     if (FormatTok->is(tok::semi))
1478       nextToken();
1479     addUnwrappedLine();
1480   }
1481   Line->Level = OldLineLevel;
1482 }
1483 
1484 void UnwrappedLineParser::parseCaseLabel() {
1485   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1486   // FIXME: fix handling of complex expressions here.
1487   do {
1488     nextToken();
1489   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1490   parseLabel();
1491 }
1492 
1493 void UnwrappedLineParser::parseSwitch() {
1494   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1495   nextToken();
1496   if (FormatTok->Tok.is(tok::l_paren))
1497     parseParens();
1498   if (FormatTok->Tok.is(tok::l_brace)) {
1499     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1500     parseBlock(/*MustBeDeclaration=*/false);
1501     addUnwrappedLine();
1502   } else {
1503     addUnwrappedLine();
1504     ++Line->Level;
1505     parseStructuralElement();
1506     --Line->Level;
1507   }
1508 }
1509 
1510 void UnwrappedLineParser::parseAccessSpecifier() {
1511   nextToken();
1512   // Understand Qt's slots.
1513   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1514     nextToken();
1515   // Otherwise, we don't know what it is, and we'd better keep the next token.
1516   if (FormatTok->Tok.is(tok::colon))
1517     nextToken();
1518   addUnwrappedLine();
1519 }
1520 
1521 void UnwrappedLineParser::parseEnum() {
1522   // Won't be 'enum' for NS_ENUMs.
1523   if (FormatTok->Tok.is(tok::kw_enum))
1524     nextToken();
1525 
1526   // Eat up enum class ...
1527   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1528     nextToken();
1529 
1530   while (FormatTok->Tok.getIdentifierInfo() ||
1531          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1532                             tok::greater, tok::comma, tok::question)) {
1533     nextToken();
1534     // We can have macros or attributes in between 'enum' and the enum name.
1535     if (FormatTok->is(tok::l_paren))
1536       parseParens();
1537     if (FormatTok->is(tok::identifier)) {
1538       nextToken();
1539       // If there are two identifiers in a row, this is likely an elaborate
1540       // return type. In Java, this can be "implements", etc.
1541       if (Style.Language == FormatStyle::LK_Cpp &&
1542           FormatTok->is(tok::identifier))
1543         return;
1544     }
1545   }
1546 
1547   // Just a declaration or something is wrong.
1548   if (FormatTok->isNot(tok::l_brace))
1549     return;
1550   FormatTok->BlockKind = BK_Block;
1551 
1552   if (Style.Language == FormatStyle::LK_Java) {
1553     // Java enums are different.
1554     parseJavaEnumBody();
1555     return;
1556   } else if (Style.Language == FormatStyle::LK_Proto) {
1557     parseBlock(/*MustBeDeclaration=*/true);
1558     return;
1559   }
1560 
1561   // Parse enum body.
1562   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1563   if (HasError) {
1564     if (FormatTok->is(tok::semi))
1565       nextToken();
1566     addUnwrappedLine();
1567   }
1568 
1569   // There is no addUnwrappedLine() here so that we fall through to parsing a
1570   // structural element afterwards. Thus, in "enum A {} n, m;",
1571   // "} n, m;" will end up in one unwrapped line.
1572 }
1573 
1574 void UnwrappedLineParser::parseJavaEnumBody() {
1575   // Determine whether the enum is simple, i.e. does not have a semicolon or
1576   // constants with class bodies. Simple enums can be formatted like braced
1577   // lists, contracted to a single line, etc.
1578   unsigned StoredPosition = Tokens->getPosition();
1579   bool IsSimple = true;
1580   FormatToken *Tok = Tokens->getNextToken();
1581   while (Tok) {
1582     if (Tok->is(tok::r_brace))
1583       break;
1584     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1585       IsSimple = false;
1586       break;
1587     }
1588     // FIXME: This will also mark enums with braces in the arguments to enum
1589     // constants as "not simple". This is probably fine in practice, though.
1590     Tok = Tokens->getNextToken();
1591   }
1592   FormatTok = Tokens->setPosition(StoredPosition);
1593 
1594   if (IsSimple) {
1595     parseBracedList();
1596     addUnwrappedLine();
1597     return;
1598   }
1599 
1600   // Parse the body of a more complex enum.
1601   // First add a line for everything up to the "{".
1602   nextToken();
1603   addUnwrappedLine();
1604   ++Line->Level;
1605 
1606   // Parse the enum constants.
1607   while (FormatTok) {
1608     if (FormatTok->is(tok::l_brace)) {
1609       // Parse the constant's class body.
1610       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1611                  /*MunchSemi=*/false);
1612     } else if (FormatTok->is(tok::l_paren)) {
1613       parseParens();
1614     } else if (FormatTok->is(tok::comma)) {
1615       nextToken();
1616       addUnwrappedLine();
1617     } else if (FormatTok->is(tok::semi)) {
1618       nextToken();
1619       addUnwrappedLine();
1620       break;
1621     } else if (FormatTok->is(tok::r_brace)) {
1622       addUnwrappedLine();
1623       break;
1624     } else {
1625       nextToken();
1626     }
1627   }
1628 
1629   // Parse the class body after the enum's ";" if any.
1630   parseLevel(/*HasOpeningBrace=*/true);
1631   nextToken();
1632   --Line->Level;
1633   addUnwrappedLine();
1634 }
1635 
1636 void UnwrappedLineParser::parseRecord() {
1637   const FormatToken &InitialToken = *FormatTok;
1638   nextToken();
1639 
1640   // The actual identifier can be a nested name specifier, and in macros
1641   // it is often token-pasted.
1642   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1643                             tok::kw___attribute, tok::kw___declspec,
1644                             tok::kw_alignas) ||
1645          ((Style.Language == FormatStyle::LK_Java ||
1646            Style.Language == FormatStyle::LK_JavaScript) &&
1647           FormatTok->isOneOf(tok::period, tok::comma))) {
1648     bool IsNonMacroIdentifier =
1649         FormatTok->is(tok::identifier) &&
1650         FormatTok->TokenText != FormatTok->TokenText.upper();
1651     nextToken();
1652     // We can have macros or attributes in between 'class' and the class name.
1653     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1654       parseParens();
1655   }
1656 
1657   // Note that parsing away template declarations here leads to incorrectly
1658   // accepting function declarations as record declarations.
1659   // In general, we cannot solve this problem. Consider:
1660   // class A<int> B() {}
1661   // which can be a function definition or a class definition when B() is a
1662   // macro. If we find enough real-world cases where this is a problem, we
1663   // can parse for the 'template' keyword in the beginning of the statement,
1664   // and thus rule out the record production in case there is no template
1665   // (this would still leave us with an ambiguity between template function
1666   // and class declarations).
1667   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1668     while (!eof()) {
1669       if (FormatTok->is(tok::l_brace)) {
1670         calculateBraceTypes(/*ExpectClassBody=*/true);
1671         if (!tryToParseBracedList())
1672           break;
1673       }
1674       if (FormatTok->Tok.is(tok::semi))
1675         return;
1676       nextToken();
1677     }
1678   }
1679   if (FormatTok->Tok.is(tok::l_brace)) {
1680     if (ShouldBreakBeforeBrace(Style, InitialToken))
1681       addUnwrappedLine();
1682 
1683     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1684                /*MunchSemi=*/false);
1685   }
1686   // There is no addUnwrappedLine() here so that we fall through to parsing a
1687   // structural element afterwards. Thus, in "class A {} n, m;",
1688   // "} n, m;" will end up in one unwrapped line.
1689 }
1690 
1691 void UnwrappedLineParser::parseObjCProtocolList() {
1692   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1693   do
1694     nextToken();
1695   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1696   nextToken(); // Skip '>'.
1697 }
1698 
1699 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1700   do {
1701     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1702       nextToken();
1703       addUnwrappedLine();
1704       break;
1705     }
1706     if (FormatTok->is(tok::l_brace)) {
1707       parseBlock(/*MustBeDeclaration=*/false);
1708       // In ObjC interfaces, nothing should be following the "}".
1709       addUnwrappedLine();
1710     } else if (FormatTok->is(tok::r_brace)) {
1711       // Ignore stray "}". parseStructuralElement doesn't consume them.
1712       nextToken();
1713       addUnwrappedLine();
1714     } else {
1715       parseStructuralElement();
1716     }
1717   } while (!eof());
1718 }
1719 
1720 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1721   nextToken();
1722   nextToken(); // interface name
1723 
1724   // @interface can be followed by either a base class, or a category.
1725   if (FormatTok->Tok.is(tok::colon)) {
1726     nextToken();
1727     nextToken(); // base class name
1728   } else if (FormatTok->Tok.is(tok::l_paren))
1729     // Skip category, if present.
1730     parseParens();
1731 
1732   if (FormatTok->Tok.is(tok::less))
1733     parseObjCProtocolList();
1734 
1735   if (FormatTok->Tok.is(tok::l_brace)) {
1736     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman ||
1737         Style.BreakBeforeBraces == FormatStyle::BS_GNU)
1738       addUnwrappedLine();
1739     parseBlock(/*MustBeDeclaration=*/true);
1740   }
1741 
1742   // With instance variables, this puts '}' on its own line.  Without instance
1743   // variables, this ends the @interface line.
1744   addUnwrappedLine();
1745 
1746   parseObjCUntilAtEnd();
1747 }
1748 
1749 void UnwrappedLineParser::parseObjCProtocol() {
1750   nextToken();
1751   nextToken(); // protocol name
1752 
1753   if (FormatTok->Tok.is(tok::less))
1754     parseObjCProtocolList();
1755 
1756   // Check for protocol declaration.
1757   if (FormatTok->Tok.is(tok::semi)) {
1758     nextToken();
1759     return addUnwrappedLine();
1760   }
1761 
1762   addUnwrappedLine();
1763   parseObjCUntilAtEnd();
1764 }
1765 
1766 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1767   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1768   nextToken();
1769 
1770   // Consume the "default" in "export default class/function".
1771   if (FormatTok->is(tok::kw_default))
1772     nextToken();
1773 
1774   // Consume "function" and "default function", so that these get parsed as
1775   // free-standing JS functions, i.e. do not require a trailing semicolon.
1776   if (FormatTok->is(Keywords.kw_function)) {
1777     nextToken();
1778     return;
1779   }
1780 
1781   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
1782                          Keywords.kw_var))
1783     return; // Fall through to parsing the corresponding structure.
1784 
1785   if (FormatTok->is(tok::l_brace)) {
1786     FormatTok->BlockKind = BK_Block;
1787     parseBracedList();
1788   }
1789 
1790   while (!eof() && FormatTok->isNot(tok::semi) &&
1791          FormatTok->isNot(tok::l_brace)) {
1792     nextToken();
1793   }
1794 }
1795 
1796 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1797                                                  StringRef Prefix = "") {
1798   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1799                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1800   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1801                                                     E = Line.Tokens.end();
1802        I != E; ++I) {
1803     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1804   }
1805   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1806                                                     E = Line.Tokens.end();
1807        I != E; ++I) {
1808     const UnwrappedLineNode &Node = *I;
1809     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1810              I = Node.Children.begin(),
1811              E = Node.Children.end();
1812          I != E; ++I) {
1813       printDebugInfo(*I, "\nChild: ");
1814     }
1815   }
1816   llvm::dbgs() << "\n";
1817 }
1818 
1819 void UnwrappedLineParser::addUnwrappedLine() {
1820   if (Line->Tokens.empty())
1821     return;
1822   DEBUG({
1823     if (CurrentLines == &Lines)
1824       printDebugInfo(*Line);
1825   });
1826   CurrentLines->push_back(std::move(*Line));
1827   Line->Tokens.clear();
1828   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1829     CurrentLines->append(
1830         std::make_move_iterator(PreprocessorDirectives.begin()),
1831         std::make_move_iterator(PreprocessorDirectives.end()));
1832     PreprocessorDirectives.clear();
1833   }
1834 }
1835 
1836 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1837 
1838 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1839   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1840          FormatTok.NewlinesBefore > 0;
1841 }
1842 
1843 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1844   bool JustComments = Line->Tokens.empty();
1845   for (SmallVectorImpl<FormatToken *>::const_iterator
1846            I = CommentsBeforeNextToken.begin(),
1847            E = CommentsBeforeNextToken.end();
1848        I != E; ++I) {
1849     if (isOnNewLine(**I) && JustComments)
1850       addUnwrappedLine();
1851     pushToken(*I);
1852   }
1853   if (NewlineBeforeNext && JustComments)
1854     addUnwrappedLine();
1855   CommentsBeforeNextToken.clear();
1856 }
1857 
1858 void UnwrappedLineParser::nextToken() {
1859   if (eof())
1860     return;
1861   flushComments(isOnNewLine(*FormatTok));
1862   pushToken(FormatTok);
1863   readToken();
1864 }
1865 
1866 void UnwrappedLineParser::readToken() {
1867   bool CommentsInCurrentLine = true;
1868   do {
1869     FormatTok = Tokens->getNextToken();
1870     assert(FormatTok);
1871     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1872            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1873       // If there is an unfinished unwrapped line, we flush the preprocessor
1874       // directives only after that unwrapped line was finished later.
1875       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1876       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1877       // Comments stored before the preprocessor directive need to be output
1878       // before the preprocessor directive, at the same level as the
1879       // preprocessor directive, as we consider them to apply to the directive.
1880       flushComments(isOnNewLine(*FormatTok));
1881       parsePPDirective();
1882     }
1883     while (FormatTok->Type == TT_ConflictStart ||
1884            FormatTok->Type == TT_ConflictEnd ||
1885            FormatTok->Type == TT_ConflictAlternative) {
1886       if (FormatTok->Type == TT_ConflictStart) {
1887         conditionalCompilationStart(/*Unreachable=*/false);
1888       } else if (FormatTok->Type == TT_ConflictAlternative) {
1889         conditionalCompilationAlternative();
1890       } else if (FormatTok->Type == TT_ConflictEnd) {
1891         conditionalCompilationEnd();
1892       }
1893       FormatTok = Tokens->getNextToken();
1894       FormatTok->MustBreakBefore = true;
1895     }
1896 
1897     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1898         !Line->InPPDirective) {
1899       continue;
1900     }
1901 
1902     if (!FormatTok->Tok.is(tok::comment))
1903       return;
1904     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1905       CommentsInCurrentLine = false;
1906     }
1907     if (CommentsInCurrentLine) {
1908       pushToken(FormatTok);
1909     } else {
1910       CommentsBeforeNextToken.push_back(FormatTok);
1911     }
1912   } while (!eof());
1913 }
1914 
1915 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1916   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1917   if (MustBreakBeforeNextToken) {
1918     Line->Tokens.back().Tok->MustBreakBefore = true;
1919     MustBreakBeforeNextToken = false;
1920   }
1921 }
1922 
1923 } // end namespace format
1924 } // end namespace clang
1925