1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BraceWrapping.AfterControlStatement)
158       Parser->addUnwrappedLine();
159     if (Style.BraceWrapping.IndentBraces)
160       ++LineLevel;
161   }
162   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
163 
164 private:
165   unsigned &LineLevel;
166   unsigned OldLineLevel;
167 };
168 
169 namespace {
170 
171 class IndexedTokenSource : public FormatTokenSource {
172 public:
173   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
174       : Tokens(Tokens), Position(-1) {}
175 
176   FormatToken *getNextToken() override {
177     ++Position;
178     return Tokens[Position];
179   }
180 
181   unsigned getPosition() override {
182     assert(Position >= 0);
183     return Position;
184   }
185 
186   FormatToken *setPosition(unsigned P) override {
187     Position = P;
188     return Tokens[Position];
189   }
190 
191   void reset() { Position = -1; }
192 
193 private:
194   ArrayRef<FormatToken *> Tokens;
195   int Position;
196 };
197 
198 } // end anonymous namespace
199 
200 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
201                                          const AdditionalKeywords &Keywords,
202                                          ArrayRef<FormatToken *> Tokens,
203                                          UnwrappedLineConsumer &Callback)
204     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
205       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
206       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
207 
208 void UnwrappedLineParser::reset() {
209   PPBranchLevel = -1;
210   Line.reset(new UnwrappedLine);
211   CommentsBeforeNextToken.clear();
212   FormatTok = nullptr;
213   MustBreakBeforeNextToken = false;
214   PreprocessorDirectives.clear();
215   CurrentLines = &Lines;
216   DeclarationScopeStack.clear();
217   PPStack.clear();
218 }
219 
220 void UnwrappedLineParser::parse() {
221   IndexedTokenSource TokenSource(AllTokens);
222   do {
223     DEBUG(llvm::dbgs() << "----\n");
224     reset();
225     Tokens = &TokenSource;
226     TokenSource.reset();
227 
228     readToken();
229     parseFile();
230     // Create line with eof token.
231     pushToken(FormatTok);
232     addUnwrappedLine();
233 
234     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
235                                                   E = Lines.end();
236          I != E; ++I) {
237       Callback.consumeUnwrappedLine(*I);
238     }
239     Callback.finishRun();
240     Lines.clear();
241     while (!PPLevelBranchIndex.empty() &&
242            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
243       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
244       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
245     }
246     if (!PPLevelBranchIndex.empty()) {
247       ++PPLevelBranchIndex.back();
248       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
249       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
250     }
251   } while (!PPLevelBranchIndex.empty());
252 }
253 
254 void UnwrappedLineParser::parseFile() {
255   // The top-level context in a file always has declarations, except for pre-
256   // processor directives and JavaScript files.
257   bool MustBeDeclaration =
258       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
259   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
260                                           MustBeDeclaration);
261   parseLevel(/*HasOpeningBrace=*/false);
262   // Make sure to format the remaining tokens.
263   flushComments(true);
264   addUnwrappedLine();
265 }
266 
267 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
268   bool SwitchLabelEncountered = false;
269   do {
270     tok::TokenKind kind = FormatTok->Tok.getKind();
271     if (FormatTok->Type == TT_MacroBlockBegin) {
272       kind = tok::l_brace;
273     } else if (FormatTok->Type == TT_MacroBlockEnd) {
274       kind = tok::r_brace;
275     }
276 
277     switch (kind) {
278     case tok::comment:
279       nextToken();
280       addUnwrappedLine();
281       break;
282     case tok::l_brace:
283       // FIXME: Add parameter whether this can happen - if this happens, we must
284       // be in a non-declaration context.
285       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
286         continue;
287       parseBlock(/*MustBeDeclaration=*/false);
288       addUnwrappedLine();
289       break;
290     case tok::r_brace:
291       if (HasOpeningBrace)
292         return;
293       nextToken();
294       addUnwrappedLine();
295       break;
296     case tok::kw_default:
297     case tok::kw_case:
298       if (!SwitchLabelEncountered &&
299           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
300         ++Line->Level;
301       SwitchLabelEncountered = true;
302       parseStructuralElement();
303       break;
304     default:
305       parseStructuralElement();
306       break;
307     }
308   } while (!eof());
309 }
310 
311 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
312   // We'll parse forward through the tokens until we hit
313   // a closing brace or eof - note that getNextToken() will
314   // parse macros, so this will magically work inside macro
315   // definitions, too.
316   unsigned StoredPosition = Tokens->getPosition();
317   FormatToken *Tok = FormatTok;
318   // Keep a stack of positions of lbrace tokens. We will
319   // update information about whether an lbrace starts a
320   // braced init list or a different block during the loop.
321   SmallVector<FormatToken *, 8> LBraceStack;
322   assert(Tok->Tok.is(tok::l_brace));
323   do {
324     // Get next none-comment token.
325     FormatToken *NextTok;
326     unsigned ReadTokens = 0;
327     do {
328       NextTok = Tokens->getNextToken();
329       ++ReadTokens;
330     } while (NextTok->is(tok::comment));
331 
332     switch (Tok->Tok.getKind()) {
333     case tok::l_brace:
334       Tok->BlockKind = BK_Unknown;
335       LBraceStack.push_back(Tok);
336       break;
337     case tok::r_brace:
338       if (!LBraceStack.empty()) {
339         if (LBraceStack.back()->BlockKind == BK_Unknown) {
340           bool ProbablyBracedList = false;
341           if (Style.Language == FormatStyle::LK_Proto) {
342             ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
343           } else {
344             // Using OriginalColumn to distinguish between ObjC methods and
345             // binary operators is a bit hacky.
346             bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
347                                     NextTok->OriginalColumn == 0;
348 
349             // If there is a comma, semicolon or right paren after the closing
350             // brace, we assume this is a braced initializer list.  Note that
351             // regardless how we mark inner braces here, we will overwrite the
352             // BlockKind later if we parse a braced list (where all blocks
353             // inside are by default braced lists), or when we explicitly detect
354             // blocks (for example while parsing lambdas).
355             //
356             // We exclude + and - as they can be ObjC visibility modifiers.
357             ProbablyBracedList =
358                 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
359                                  tok::r_paren, tok::r_square, tok::l_brace,
360                                  tok::l_paren, tok::ellipsis) ||
361                 (NextTok->is(tok::semi) &&
362                  (!ExpectClassBody || LBraceStack.size() != 1)) ||
363                 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
364           }
365           if (ProbablyBracedList) {
366             Tok->BlockKind = BK_BracedInit;
367             LBraceStack.back()->BlockKind = BK_BracedInit;
368           } else {
369             Tok->BlockKind = BK_Block;
370             LBraceStack.back()->BlockKind = BK_Block;
371           }
372         }
373         LBraceStack.pop_back();
374       }
375       break;
376     case tok::at:
377     case tok::semi:
378     case tok::kw_if:
379     case tok::kw_while:
380     case tok::kw_for:
381     case tok::kw_switch:
382     case tok::kw_try:
383     case tok::kw___try:
384       if (!LBraceStack.empty())
385         LBraceStack.back()->BlockKind = BK_Block;
386       break;
387     default:
388       break;
389     }
390     Tok = NextTok;
391   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
392   // Assume other blocks for all unclosed opening braces.
393   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
394     if (LBraceStack[i]->BlockKind == BK_Unknown)
395       LBraceStack[i]->BlockKind = BK_Block;
396   }
397 
398   FormatTok = Tokens->setPosition(StoredPosition);
399 }
400 
401 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
402                                      bool MunchSemi) {
403   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
404          "'{' or macro block token expected");
405   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
406 
407   unsigned InitialLevel = Line->Level;
408   nextToken();
409 
410   if (MacroBlock && FormatTok->is(tok::l_paren))
411     parseParens();
412 
413   addUnwrappedLine();
414 
415   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
416                                           MustBeDeclaration);
417   if (AddLevel)
418     ++Line->Level;
419   parseLevel(/*HasOpeningBrace=*/true);
420 
421   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
422                  : !FormatTok->is(tok::r_brace)) {
423     Line->Level = InitialLevel;
424     return;
425   }
426 
427   nextToken(); // Munch the closing brace.
428 
429   if (MacroBlock && FormatTok->is(tok::l_paren))
430     parseParens();
431 
432   if (MunchSemi && FormatTok->Tok.is(tok::semi))
433     nextToken();
434   Line->Level = InitialLevel;
435 }
436 
437 static bool isGoogScope(const UnwrappedLine &Line) {
438   // FIXME: Closure-library specific stuff should not be hard-coded but be
439   // configurable.
440   if (Line.Tokens.size() < 4)
441     return false;
442   auto I = Line.Tokens.begin();
443   if (I->Tok->TokenText != "goog")
444     return false;
445   ++I;
446   if (I->Tok->isNot(tok::period))
447     return false;
448   ++I;
449   if (I->Tok->TokenText != "scope")
450     return false;
451   ++I;
452   return I->Tok->is(tok::l_paren);
453 }
454 
455 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
456                                    const FormatToken &InitialToken) {
457   if (InitialToken.is(tok::kw_namespace))
458     return Style.BraceWrapping.AfterNamespace;
459   if (InitialToken.is(tok::kw_class))
460     return Style.BraceWrapping.AfterClass;
461   if (InitialToken.is(tok::kw_union))
462     return Style.BraceWrapping.AfterUnion;
463   if (InitialToken.is(tok::kw_struct))
464     return Style.BraceWrapping.AfterStruct;
465   return false;
466 }
467 
468 void UnwrappedLineParser::parseChildBlock() {
469   FormatTok->BlockKind = BK_Block;
470   nextToken();
471   {
472     bool GoogScope =
473         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
474     ScopedLineState LineState(*this);
475     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
476                                             /*MustBeDeclaration=*/false);
477     Line->Level += GoogScope ? 0 : 1;
478     parseLevel(/*HasOpeningBrace=*/true);
479     flushComments(isOnNewLine(*FormatTok));
480     Line->Level -= GoogScope ? 0 : 1;
481   }
482   nextToken();
483 }
484 
485 void UnwrappedLineParser::parsePPDirective() {
486   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
487   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
488   nextToken();
489 
490   if (!FormatTok->Tok.getIdentifierInfo()) {
491     parsePPUnknown();
492     return;
493   }
494 
495   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
496   case tok::pp_define:
497     parsePPDefine();
498     return;
499   case tok::pp_if:
500     parsePPIf(/*IfDef=*/false);
501     break;
502   case tok::pp_ifdef:
503   case tok::pp_ifndef:
504     parsePPIf(/*IfDef=*/true);
505     break;
506   case tok::pp_else:
507     parsePPElse();
508     break;
509   case tok::pp_elif:
510     parsePPElIf();
511     break;
512   case tok::pp_endif:
513     parsePPEndIf();
514     break;
515   default:
516     parsePPUnknown();
517     break;
518   }
519 }
520 
521 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
522   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
523     PPStack.push_back(PP_Unreachable);
524   else
525     PPStack.push_back(PP_Conditional);
526 }
527 
528 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
529   ++PPBranchLevel;
530   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
531   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
532     PPLevelBranchIndex.push_back(0);
533     PPLevelBranchCount.push_back(0);
534   }
535   PPChainBranchIndex.push(0);
536   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
537   conditionalCompilationCondition(Unreachable || Skip);
538 }
539 
540 void UnwrappedLineParser::conditionalCompilationAlternative() {
541   if (!PPStack.empty())
542     PPStack.pop_back();
543   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
544   if (!PPChainBranchIndex.empty())
545     ++PPChainBranchIndex.top();
546   conditionalCompilationCondition(
547       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
548       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
549 }
550 
551 void UnwrappedLineParser::conditionalCompilationEnd() {
552   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
553   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
554     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
555       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
556     }
557   }
558   // Guard against #endif's without #if.
559   if (PPBranchLevel > 0)
560     --PPBranchLevel;
561   if (!PPChainBranchIndex.empty())
562     PPChainBranchIndex.pop();
563   if (!PPStack.empty())
564     PPStack.pop_back();
565 }
566 
567 void UnwrappedLineParser::parsePPIf(bool IfDef) {
568   nextToken();
569   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
570                          FormatTok->Tok.getLiteralData() != nullptr &&
571                          StringRef(FormatTok->Tok.getLiteralData(),
572                                    FormatTok->Tok.getLength()) == "0") ||
573                         FormatTok->Tok.is(tok::kw_false);
574   conditionalCompilationStart(!IfDef && IsLiteralFalse);
575   parsePPUnknown();
576 }
577 
578 void UnwrappedLineParser::parsePPElse() {
579   conditionalCompilationAlternative();
580   parsePPUnknown();
581 }
582 
583 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
584 
585 void UnwrappedLineParser::parsePPEndIf() {
586   conditionalCompilationEnd();
587   parsePPUnknown();
588 }
589 
590 void UnwrappedLineParser::parsePPDefine() {
591   nextToken();
592 
593   if (FormatTok->Tok.getKind() != tok::identifier) {
594     parsePPUnknown();
595     return;
596   }
597   nextToken();
598   if (FormatTok->Tok.getKind() == tok::l_paren &&
599       FormatTok->WhitespaceRange.getBegin() ==
600           FormatTok->WhitespaceRange.getEnd()) {
601     parseParens();
602   }
603   addUnwrappedLine();
604   Line->Level = 1;
605 
606   // Errors during a preprocessor directive can only affect the layout of the
607   // preprocessor directive, and thus we ignore them. An alternative approach
608   // would be to use the same approach we use on the file level (no
609   // re-indentation if there was a structural error) within the macro
610   // definition.
611   parseFile();
612 }
613 
614 void UnwrappedLineParser::parsePPUnknown() {
615   do {
616     nextToken();
617   } while (!eof());
618   addUnwrappedLine();
619 }
620 
621 // Here we blacklist certain tokens that are not usually the first token in an
622 // unwrapped line. This is used in attempt to distinguish macro calls without
623 // trailing semicolons from other constructs split to several lines.
624 static bool tokenCanStartNewLine(const clang::Token &Tok) {
625   // Semicolon can be a null-statement, l_square can be a start of a macro or
626   // a C++11 attribute, but this doesn't seem to be common.
627   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
628          Tok.isNot(tok::l_square) &&
629          // Tokens that can only be used as binary operators and a part of
630          // overloaded operator names.
631          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
632          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
633          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
634          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
635          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
636          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
637          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
638          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
639          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
640          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
641          Tok.isNot(tok::lesslessequal) &&
642          // Colon is used in labels, base class lists, initializer lists,
643          // range-based for loops, ternary operator, but should never be the
644          // first token in an unwrapped line.
645          Tok.isNot(tok::colon) &&
646          // 'noexcept' is a trailing annotation.
647          Tok.isNot(tok::kw_noexcept);
648 }
649 
650 void UnwrappedLineParser::parseStructuralElement() {
651   assert(!FormatTok->Tok.is(tok::l_brace));
652   switch (FormatTok->Tok.getKind()) {
653   case tok::at:
654     nextToken();
655     if (FormatTok->Tok.is(tok::l_brace)) {
656       parseBracedList();
657       break;
658     }
659     switch (FormatTok->Tok.getObjCKeywordID()) {
660     case tok::objc_public:
661     case tok::objc_protected:
662     case tok::objc_package:
663     case tok::objc_private:
664       return parseAccessSpecifier();
665     case tok::objc_interface:
666     case tok::objc_implementation:
667       return parseObjCInterfaceOrImplementation();
668     case tok::objc_protocol:
669       return parseObjCProtocol();
670     case tok::objc_end:
671       return; // Handled by the caller.
672     case tok::objc_optional:
673     case tok::objc_required:
674       nextToken();
675       addUnwrappedLine();
676       return;
677     case tok::objc_autoreleasepool:
678       nextToken();
679       if (FormatTok->Tok.is(tok::l_brace)) {
680         if (Style.BraceWrapping.AfterObjCDeclaration)
681           addUnwrappedLine();
682         parseBlock(/*MustBeDeclaration=*/false);
683       }
684       addUnwrappedLine();
685       return;
686     case tok::objc_try:
687       // This branch isn't strictly necessary (the kw_try case below would
688       // do this too after the tok::at is parsed above).  But be explicit.
689       parseTryCatch();
690       return;
691     default:
692       break;
693     }
694     break;
695   case tok::kw_asm:
696     nextToken();
697     if (FormatTok->is(tok::l_brace)) {
698       FormatTok->Type = TT_InlineASMBrace;
699       nextToken();
700       while (FormatTok && FormatTok->isNot(tok::eof)) {
701         if (FormatTok->is(tok::r_brace)) {
702           FormatTok->Type = TT_InlineASMBrace;
703           nextToken();
704           addUnwrappedLine();
705           break;
706         }
707         FormatTok->Finalized = true;
708         nextToken();
709       }
710     }
711     break;
712   case tok::kw_namespace:
713     parseNamespace();
714     return;
715   case tok::kw_inline:
716     nextToken();
717     if (FormatTok->Tok.is(tok::kw_namespace)) {
718       parseNamespace();
719       return;
720     }
721     break;
722   case tok::kw_public:
723   case tok::kw_protected:
724   case tok::kw_private:
725     if (Style.Language == FormatStyle::LK_Java ||
726         Style.Language == FormatStyle::LK_JavaScript)
727       nextToken();
728     else
729       parseAccessSpecifier();
730     return;
731   case tok::kw_if:
732     parseIfThenElse();
733     return;
734   case tok::kw_for:
735   case tok::kw_while:
736     parseForOrWhileLoop();
737     return;
738   case tok::kw_do:
739     parseDoWhile();
740     return;
741   case tok::kw_switch:
742     parseSwitch();
743     return;
744   case tok::kw_default:
745     nextToken();
746     parseLabel();
747     return;
748   case tok::kw_case:
749     parseCaseLabel();
750     return;
751   case tok::kw_try:
752   case tok::kw___try:
753     parseTryCatch();
754     return;
755   case tok::kw_extern:
756     nextToken();
757     if (FormatTok->Tok.is(tok::string_literal)) {
758       nextToken();
759       if (FormatTok->Tok.is(tok::l_brace)) {
760         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
761         addUnwrappedLine();
762         return;
763       }
764     }
765     break;
766   case tok::kw_export:
767     if (Style.Language == FormatStyle::LK_JavaScript) {
768       parseJavaScriptEs6ImportExport();
769       return;
770     }
771     break;
772   case tok::identifier:
773     if (FormatTok->is(TT_ForEachMacro)) {
774       parseForOrWhileLoop();
775       return;
776     }
777     if (FormatTok->is(TT_MacroBlockBegin)) {
778       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
779                  /*MunchSemi=*/false);
780       return;
781     }
782     if (Style.Language == FormatStyle::LK_JavaScript &&
783         FormatTok->is(Keywords.kw_import)) {
784       parseJavaScriptEs6ImportExport();
785       return;
786     }
787     if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
788                            Keywords.kw_slots, Keywords.kw_qslots)) {
789       nextToken();
790       if (FormatTok->is(tok::colon)) {
791         nextToken();
792         addUnwrappedLine();
793       }
794       return;
795     }
796     // In all other cases, parse the declaration.
797     break;
798   default:
799     break;
800   }
801   do {
802     switch (FormatTok->Tok.getKind()) {
803     case tok::at:
804       nextToken();
805       if (FormatTok->Tok.is(tok::l_brace))
806         parseBracedList();
807       break;
808     case tok::kw_enum:
809       // parseEnum falls through and does not yet add an unwrapped line as an
810       // enum definition can start a structural element.
811       parseEnum();
812       // This only applies for C++.
813       if (Style.Language != FormatStyle::LK_Cpp) {
814         addUnwrappedLine();
815         return;
816       }
817       break;
818     case tok::kw_typedef:
819       nextToken();
820       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
821                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
822         parseEnum();
823       break;
824     case tok::kw_struct:
825     case tok::kw_union:
826     case tok::kw_class:
827       // parseRecord falls through and does not yet add an unwrapped line as a
828       // record declaration or definition can start a structural element.
829       parseRecord();
830       // This does not apply for Java and JavaScript.
831       if (Style.Language == FormatStyle::LK_Java ||
832           Style.Language == FormatStyle::LK_JavaScript) {
833         addUnwrappedLine();
834         return;
835       }
836       break;
837     case tok::period:
838       nextToken();
839       // In Java, classes have an implicit static member "class".
840       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
841           FormatTok->is(tok::kw_class))
842         nextToken();
843       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
844           FormatTok->Tok.getIdentifierInfo())
845         // JavaScript only has pseudo keywords, all keywords are allowed to
846         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
847         nextToken();
848       break;
849     case tok::semi:
850       nextToken();
851       addUnwrappedLine();
852       return;
853     case tok::r_brace:
854       addUnwrappedLine();
855       return;
856     case tok::l_paren:
857       parseParens();
858       break;
859     case tok::kw_operator:
860       nextToken();
861       if (FormatTok->isBinaryOperator())
862         nextToken();
863       break;
864     case tok::caret:
865       nextToken();
866       if (FormatTok->Tok.isAnyIdentifier() ||
867           FormatTok->isSimpleTypeSpecifier())
868         nextToken();
869       if (FormatTok->is(tok::l_paren))
870         parseParens();
871       if (FormatTok->is(tok::l_brace))
872         parseChildBlock();
873       break;
874     case tok::l_brace:
875       if (!tryToParseBracedList()) {
876         // A block outside of parentheses must be the last part of a
877         // structural element.
878         // FIXME: Figure out cases where this is not true, and add projections
879         // for them (the one we know is missing are lambdas).
880         if (Style.BraceWrapping.AfterFunction)
881           addUnwrappedLine();
882         FormatTok->Type = TT_FunctionLBrace;
883         parseBlock(/*MustBeDeclaration=*/false);
884         addUnwrappedLine();
885         return;
886       }
887       // Otherwise this was a braced init list, and the structural
888       // element continues.
889       break;
890     case tok::kw_try:
891       // We arrive here when parsing function-try blocks.
892       parseTryCatch();
893       return;
894     case tok::identifier: {
895       if (FormatTok->is(TT_MacroBlockEnd)) {
896         addUnwrappedLine();
897         return;
898       }
899 
900       // Parse function literal unless 'function' is the first token in a line
901       // in which case this should be treated as a free-standing function.
902       if (Style.Language == FormatStyle::LK_JavaScript &&
903           FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
904         tryToParseJSFunction();
905         break;
906       }
907       if ((Style.Language == FormatStyle::LK_JavaScript ||
908            Style.Language == FormatStyle::LK_Java) &&
909           FormatTok->is(Keywords.kw_interface)) {
910         parseRecord();
911         addUnwrappedLine();
912         return;
913       }
914 
915       StringRef Text = FormatTok->TokenText;
916       nextToken();
917       if (Line->Tokens.size() == 1 &&
918           // JS doesn't have macros, and within classes colons indicate fields,
919           // not labels.
920           Style.Language != FormatStyle::LK_JavaScript) {
921         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
922           parseLabel();
923           return;
924         }
925         // Recognize function-like macro usages without trailing semicolon as
926         // well as free-standing macros like Q_OBJECT.
927         bool FunctionLike = FormatTok->is(tok::l_paren);
928         if (FunctionLike)
929           parseParens();
930 
931         bool FollowedByNewline =
932             CommentsBeforeNextToken.empty()
933                 ? FormatTok->NewlinesBefore > 0
934                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
935 
936         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
937             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
938           addUnwrappedLine();
939           return;
940         }
941       }
942       break;
943     }
944     case tok::equal:
945       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
946       // TT_JsFatArrow. The always start an expression or a child block if
947       // followed by a curly.
948       if (FormatTok->is(TT_JsFatArrow)) {
949         nextToken();
950         if (FormatTok->is(tok::l_brace))
951           parseChildBlock();
952         break;
953       }
954 
955       nextToken();
956       if (FormatTok->Tok.is(tok::l_brace)) {
957         parseBracedList();
958       }
959       break;
960     case tok::l_square:
961       parseSquare();
962       break;
963     case tok::kw_new:
964       parseNew();
965       break;
966     default:
967       nextToken();
968       break;
969     }
970   } while (!eof());
971 }
972 
973 bool UnwrappedLineParser::tryToParseLambda() {
974   if (Style.Language != FormatStyle::LK_Cpp) {
975     nextToken();
976     return false;
977   }
978   // FIXME: This is a dirty way to access the previous token. Find a better
979   // solution.
980   if (!Line->Tokens.empty() &&
981       (Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator,
982                                         tok::kw_new, tok::kw_delete) ||
983        Line->Tokens.back().Tok->closesScope() ||
984        Line->Tokens.back().Tok->isSimpleTypeSpecifier())) {
985     nextToken();
986     return false;
987   }
988   assert(FormatTok->is(tok::l_square));
989   FormatToken &LSquare = *FormatTok;
990   if (!tryToParseLambdaIntroducer())
991     return false;
992 
993   while (FormatTok->isNot(tok::l_brace)) {
994     if (FormatTok->isSimpleTypeSpecifier()) {
995       nextToken();
996       continue;
997     }
998     switch (FormatTok->Tok.getKind()) {
999     case tok::l_brace:
1000       break;
1001     case tok::l_paren:
1002       parseParens();
1003       break;
1004     case tok::amp:
1005     case tok::star:
1006     case tok::kw_const:
1007     case tok::comma:
1008     case tok::less:
1009     case tok::greater:
1010     case tok::identifier:
1011     case tok::numeric_constant:
1012     case tok::coloncolon:
1013     case tok::kw_mutable:
1014       nextToken();
1015       break;
1016     case tok::arrow:
1017       FormatTok->Type = TT_LambdaArrow;
1018       nextToken();
1019       break;
1020     default:
1021       return true;
1022     }
1023   }
1024   LSquare.Type = TT_LambdaLSquare;
1025   parseChildBlock();
1026   return true;
1027 }
1028 
1029 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1030   nextToken();
1031   if (FormatTok->is(tok::equal)) {
1032     nextToken();
1033     if (FormatTok->is(tok::r_square)) {
1034       nextToken();
1035       return true;
1036     }
1037     if (FormatTok->isNot(tok::comma))
1038       return false;
1039     nextToken();
1040   } else if (FormatTok->is(tok::amp)) {
1041     nextToken();
1042     if (FormatTok->is(tok::r_square)) {
1043       nextToken();
1044       return true;
1045     }
1046     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1047       return false;
1048     }
1049     if (FormatTok->is(tok::comma))
1050       nextToken();
1051   } else if (FormatTok->is(tok::r_square)) {
1052     nextToken();
1053     return true;
1054   }
1055   do {
1056     if (FormatTok->is(tok::amp))
1057       nextToken();
1058     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1059       return false;
1060     nextToken();
1061     if (FormatTok->is(tok::ellipsis))
1062       nextToken();
1063     if (FormatTok->is(tok::comma)) {
1064       nextToken();
1065     } else if (FormatTok->is(tok::r_square)) {
1066       nextToken();
1067       return true;
1068     } else {
1069       return false;
1070     }
1071   } while (!eof());
1072   return false;
1073 }
1074 
1075 void UnwrappedLineParser::tryToParseJSFunction() {
1076   nextToken();
1077 
1078   // Consume function name.
1079   if (FormatTok->is(tok::identifier))
1080     nextToken();
1081 
1082   if (FormatTok->isNot(tok::l_paren))
1083     return;
1084 
1085   // Parse formal parameter list.
1086   parseParens();
1087 
1088   if (FormatTok->is(tok::colon)) {
1089     // Parse a type definition.
1090     nextToken();
1091 
1092     // Eat the type declaration. For braced inline object types, balance braces,
1093     // otherwise just parse until finding an l_brace for the function body.
1094     if (FormatTok->is(tok::l_brace))
1095       tryToParseBracedList();
1096     else
1097       while (FormatTok->isNot(tok::l_brace) && !eof())
1098         nextToken();
1099   }
1100 
1101   parseChildBlock();
1102 }
1103 
1104 bool UnwrappedLineParser::tryToParseBracedList() {
1105   if (FormatTok->BlockKind == BK_Unknown)
1106     calculateBraceTypes();
1107   assert(FormatTok->BlockKind != BK_Unknown);
1108   if (FormatTok->BlockKind == BK_Block)
1109     return false;
1110   parseBracedList();
1111   return true;
1112 }
1113 
1114 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1115   bool HasError = false;
1116   nextToken();
1117 
1118   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1119   // replace this by using parseAssigmentExpression() inside.
1120   do {
1121     if (Style.Language == FormatStyle::LK_JavaScript) {
1122       if (FormatTok->is(Keywords.kw_function)) {
1123         tryToParseJSFunction();
1124         continue;
1125       }
1126       if (FormatTok->is(TT_JsFatArrow)) {
1127         nextToken();
1128         // Fat arrows can be followed by simple expressions or by child blocks
1129         // in curly braces.
1130         if (FormatTok->is(tok::l_brace)) {
1131           parseChildBlock();
1132           continue;
1133         }
1134       }
1135     }
1136     switch (FormatTok->Tok.getKind()) {
1137     case tok::caret:
1138       nextToken();
1139       if (FormatTok->is(tok::l_brace)) {
1140         parseChildBlock();
1141       }
1142       break;
1143     case tok::l_square:
1144       tryToParseLambda();
1145       break;
1146     case tok::l_brace:
1147       // Assume there are no blocks inside a braced init list apart
1148       // from the ones we explicitly parse out (like lambdas).
1149       FormatTok->BlockKind = BK_BracedInit;
1150       parseBracedList();
1151       break;
1152     case tok::l_paren:
1153       parseParens();
1154       // JavaScript can just have free standing methods and getters/setters in
1155       // object literals. Detect them by a "{" following ")".
1156       if (Style.Language == FormatStyle::LK_JavaScript) {
1157         if (FormatTok->is(tok::l_brace))
1158           parseChildBlock();
1159         break;
1160       }
1161       break;
1162     case tok::r_brace:
1163       nextToken();
1164       return !HasError;
1165     case tok::semi:
1166       HasError = true;
1167       if (!ContinueOnSemicolons)
1168         return !HasError;
1169       nextToken();
1170       break;
1171     case tok::comma:
1172       nextToken();
1173       break;
1174     default:
1175       nextToken();
1176       break;
1177     }
1178   } while (!eof());
1179   return false;
1180 }
1181 
1182 void UnwrappedLineParser::parseParens() {
1183   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1184   nextToken();
1185   do {
1186     switch (FormatTok->Tok.getKind()) {
1187     case tok::l_paren:
1188       parseParens();
1189       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1190         parseChildBlock();
1191       break;
1192     case tok::r_paren:
1193       nextToken();
1194       return;
1195     case tok::r_brace:
1196       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1197       return;
1198     case tok::l_square:
1199       tryToParseLambda();
1200       break;
1201     case tok::l_brace:
1202       if (!tryToParseBracedList())
1203         parseChildBlock();
1204       break;
1205     case tok::at:
1206       nextToken();
1207       if (FormatTok->Tok.is(tok::l_brace))
1208         parseBracedList();
1209       break;
1210     case tok::identifier:
1211       if (Style.Language == FormatStyle::LK_JavaScript &&
1212           FormatTok->is(Keywords.kw_function))
1213         tryToParseJSFunction();
1214       else
1215         nextToken();
1216       break;
1217     default:
1218       nextToken();
1219       break;
1220     }
1221   } while (!eof());
1222 }
1223 
1224 void UnwrappedLineParser::parseSquare() {
1225   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1226   if (tryToParseLambda())
1227     return;
1228   do {
1229     switch (FormatTok->Tok.getKind()) {
1230     case tok::l_paren:
1231       parseParens();
1232       break;
1233     case tok::r_square:
1234       nextToken();
1235       return;
1236     case tok::r_brace:
1237       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1238       return;
1239     case tok::l_square:
1240       parseSquare();
1241       break;
1242     case tok::l_brace: {
1243       if (!tryToParseBracedList())
1244         parseChildBlock();
1245       break;
1246     }
1247     case tok::at:
1248       nextToken();
1249       if (FormatTok->Tok.is(tok::l_brace))
1250         parseBracedList();
1251       break;
1252     default:
1253       nextToken();
1254       break;
1255     }
1256   } while (!eof());
1257 }
1258 
1259 void UnwrappedLineParser::parseIfThenElse() {
1260   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1261   nextToken();
1262   if (FormatTok->Tok.is(tok::l_paren))
1263     parseParens();
1264   bool NeedsUnwrappedLine = false;
1265   if (FormatTok->Tok.is(tok::l_brace)) {
1266     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1267     parseBlock(/*MustBeDeclaration=*/false);
1268     if (Style.BraceWrapping.BeforeElse)
1269       addUnwrappedLine();
1270     else
1271       NeedsUnwrappedLine = true;
1272   } else {
1273     addUnwrappedLine();
1274     ++Line->Level;
1275     parseStructuralElement();
1276     --Line->Level;
1277   }
1278   if (FormatTok->Tok.is(tok::kw_else)) {
1279     nextToken();
1280     if (FormatTok->Tok.is(tok::l_brace)) {
1281       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1282       parseBlock(/*MustBeDeclaration=*/false);
1283       addUnwrappedLine();
1284     } else if (FormatTok->Tok.is(tok::kw_if)) {
1285       parseIfThenElse();
1286     } else {
1287       addUnwrappedLine();
1288       ++Line->Level;
1289       parseStructuralElement();
1290       --Line->Level;
1291     }
1292   } else if (NeedsUnwrappedLine) {
1293     addUnwrappedLine();
1294   }
1295 }
1296 
1297 void UnwrappedLineParser::parseTryCatch() {
1298   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1299   nextToken();
1300   bool NeedsUnwrappedLine = false;
1301   if (FormatTok->is(tok::colon)) {
1302     // We are in a function try block, what comes is an initializer list.
1303     nextToken();
1304     while (FormatTok->is(tok::identifier)) {
1305       nextToken();
1306       if (FormatTok->is(tok::l_paren))
1307         parseParens();
1308       if (FormatTok->is(tok::comma))
1309         nextToken();
1310     }
1311   }
1312   // Parse try with resource.
1313   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1314     parseParens();
1315   }
1316   if (FormatTok->is(tok::l_brace)) {
1317     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1318     parseBlock(/*MustBeDeclaration=*/false);
1319     if (Style.BraceWrapping.BeforeCatch) {
1320       addUnwrappedLine();
1321     } else {
1322       NeedsUnwrappedLine = true;
1323     }
1324   } else if (!FormatTok->is(tok::kw_catch)) {
1325     // The C++ standard requires a compound-statement after a try.
1326     // If there's none, we try to assume there's a structuralElement
1327     // and try to continue.
1328     addUnwrappedLine();
1329     ++Line->Level;
1330     parseStructuralElement();
1331     --Line->Level;
1332   }
1333   while (1) {
1334     if (FormatTok->is(tok::at))
1335       nextToken();
1336     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1337                              tok::kw___finally) ||
1338           ((Style.Language == FormatStyle::LK_Java ||
1339             Style.Language == FormatStyle::LK_JavaScript) &&
1340            FormatTok->is(Keywords.kw_finally)) ||
1341           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1342            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1343       break;
1344     nextToken();
1345     while (FormatTok->isNot(tok::l_brace)) {
1346       if (FormatTok->is(tok::l_paren)) {
1347         parseParens();
1348         continue;
1349       }
1350       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1351         return;
1352       nextToken();
1353     }
1354     NeedsUnwrappedLine = false;
1355     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1356     parseBlock(/*MustBeDeclaration=*/false);
1357     if (Style.BraceWrapping.BeforeCatch)
1358       addUnwrappedLine();
1359     else
1360       NeedsUnwrappedLine = true;
1361   }
1362   if (NeedsUnwrappedLine)
1363     addUnwrappedLine();
1364 }
1365 
1366 void UnwrappedLineParser::parseNamespace() {
1367   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1368 
1369   const FormatToken &InitialToken = *FormatTok;
1370   nextToken();
1371   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1372     nextToken();
1373   if (FormatTok->Tok.is(tok::l_brace)) {
1374     if (ShouldBreakBeforeBrace(Style, InitialToken))
1375       addUnwrappedLine();
1376 
1377     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1378                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1379                      DeclarationScopeStack.size() > 1);
1380     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1381     // Munch the semicolon after a namespace. This is more common than one would
1382     // think. Puttin the semicolon into its own line is very ugly.
1383     if (FormatTok->Tok.is(tok::semi))
1384       nextToken();
1385     addUnwrappedLine();
1386   }
1387   // FIXME: Add error handling.
1388 }
1389 
1390 void UnwrappedLineParser::parseNew() {
1391   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1392   nextToken();
1393   if (Style.Language != FormatStyle::LK_Java)
1394     return;
1395 
1396   // In Java, we can parse everything up to the parens, which aren't optional.
1397   do {
1398     // There should not be a ;, { or } before the new's open paren.
1399     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1400       return;
1401 
1402     // Consume the parens.
1403     if (FormatTok->is(tok::l_paren)) {
1404       parseParens();
1405 
1406       // If there is a class body of an anonymous class, consume that as child.
1407       if (FormatTok->is(tok::l_brace))
1408         parseChildBlock();
1409       return;
1410     }
1411     nextToken();
1412   } while (!eof());
1413 }
1414 
1415 void UnwrappedLineParser::parseForOrWhileLoop() {
1416   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1417          "'for', 'while' or foreach macro expected");
1418   nextToken();
1419   if (FormatTok->Tok.is(tok::l_paren))
1420     parseParens();
1421   if (FormatTok->Tok.is(tok::l_brace)) {
1422     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1423     parseBlock(/*MustBeDeclaration=*/false);
1424     addUnwrappedLine();
1425   } else {
1426     addUnwrappedLine();
1427     ++Line->Level;
1428     parseStructuralElement();
1429     --Line->Level;
1430   }
1431 }
1432 
1433 void UnwrappedLineParser::parseDoWhile() {
1434   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1435   nextToken();
1436   if (FormatTok->Tok.is(tok::l_brace)) {
1437     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1438     parseBlock(/*MustBeDeclaration=*/false);
1439     if (Style.BraceWrapping.IndentBraces)
1440       addUnwrappedLine();
1441   } else {
1442     addUnwrappedLine();
1443     ++Line->Level;
1444     parseStructuralElement();
1445     --Line->Level;
1446   }
1447 
1448   // FIXME: Add error handling.
1449   if (!FormatTok->Tok.is(tok::kw_while)) {
1450     addUnwrappedLine();
1451     return;
1452   }
1453 
1454   nextToken();
1455   parseStructuralElement();
1456 }
1457 
1458 void UnwrappedLineParser::parseLabel() {
1459   nextToken();
1460   unsigned OldLineLevel = Line->Level;
1461   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1462     --Line->Level;
1463   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1464     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1465     parseBlock(/*MustBeDeclaration=*/false);
1466     if (FormatTok->Tok.is(tok::kw_break)) {
1467       if (Style.BraceWrapping.AfterControlStatement)
1468         addUnwrappedLine();
1469       parseStructuralElement();
1470     }
1471     addUnwrappedLine();
1472   } else {
1473     if (FormatTok->is(tok::semi))
1474       nextToken();
1475     addUnwrappedLine();
1476   }
1477   Line->Level = OldLineLevel;
1478 }
1479 
1480 void UnwrappedLineParser::parseCaseLabel() {
1481   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1482   // FIXME: fix handling of complex expressions here.
1483   do {
1484     nextToken();
1485   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1486   parseLabel();
1487 }
1488 
1489 void UnwrappedLineParser::parseSwitch() {
1490   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1491   nextToken();
1492   if (FormatTok->Tok.is(tok::l_paren))
1493     parseParens();
1494   if (FormatTok->Tok.is(tok::l_brace)) {
1495     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1496     parseBlock(/*MustBeDeclaration=*/false);
1497     addUnwrappedLine();
1498   } else {
1499     addUnwrappedLine();
1500     ++Line->Level;
1501     parseStructuralElement();
1502     --Line->Level;
1503   }
1504 }
1505 
1506 void UnwrappedLineParser::parseAccessSpecifier() {
1507   nextToken();
1508   // Understand Qt's slots.
1509   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1510     nextToken();
1511   // Otherwise, we don't know what it is, and we'd better keep the next token.
1512   if (FormatTok->Tok.is(tok::colon))
1513     nextToken();
1514   addUnwrappedLine();
1515 }
1516 
1517 void UnwrappedLineParser::parseEnum() {
1518   // Won't be 'enum' for NS_ENUMs.
1519   if (FormatTok->Tok.is(tok::kw_enum))
1520     nextToken();
1521 
1522   // Eat up enum class ...
1523   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1524     nextToken();
1525 
1526   while (FormatTok->Tok.getIdentifierInfo() ||
1527          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1528                             tok::greater, tok::comma, tok::question)) {
1529     nextToken();
1530     // We can have macros or attributes in between 'enum' and the enum name.
1531     if (FormatTok->is(tok::l_paren))
1532       parseParens();
1533     if (FormatTok->is(tok::identifier)) {
1534       nextToken();
1535       // If there are two identifiers in a row, this is likely an elaborate
1536       // return type. In Java, this can be "implements", etc.
1537       if (Style.Language == FormatStyle::LK_Cpp &&
1538           FormatTok->is(tok::identifier))
1539         return;
1540     }
1541   }
1542 
1543   // Just a declaration or something is wrong.
1544   if (FormatTok->isNot(tok::l_brace))
1545     return;
1546   FormatTok->BlockKind = BK_Block;
1547 
1548   if (Style.Language == FormatStyle::LK_Java) {
1549     // Java enums are different.
1550     parseJavaEnumBody();
1551     return;
1552   } else if (Style.Language == FormatStyle::LK_Proto) {
1553     parseBlock(/*MustBeDeclaration=*/true);
1554     return;
1555   }
1556 
1557   // Parse enum body.
1558   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1559   if (HasError) {
1560     if (FormatTok->is(tok::semi))
1561       nextToken();
1562     addUnwrappedLine();
1563   }
1564 
1565   // There is no addUnwrappedLine() here so that we fall through to parsing a
1566   // structural element afterwards. Thus, in "enum A {} n, m;",
1567   // "} n, m;" will end up in one unwrapped line.
1568 }
1569 
1570 void UnwrappedLineParser::parseJavaEnumBody() {
1571   // Determine whether the enum is simple, i.e. does not have a semicolon or
1572   // constants with class bodies. Simple enums can be formatted like braced
1573   // lists, contracted to a single line, etc.
1574   unsigned StoredPosition = Tokens->getPosition();
1575   bool IsSimple = true;
1576   FormatToken *Tok = Tokens->getNextToken();
1577   while (Tok) {
1578     if (Tok->is(tok::r_brace))
1579       break;
1580     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1581       IsSimple = false;
1582       break;
1583     }
1584     // FIXME: This will also mark enums with braces in the arguments to enum
1585     // constants as "not simple". This is probably fine in practice, though.
1586     Tok = Tokens->getNextToken();
1587   }
1588   FormatTok = Tokens->setPosition(StoredPosition);
1589 
1590   if (IsSimple) {
1591     parseBracedList();
1592     addUnwrappedLine();
1593     return;
1594   }
1595 
1596   // Parse the body of a more complex enum.
1597   // First add a line for everything up to the "{".
1598   nextToken();
1599   addUnwrappedLine();
1600   ++Line->Level;
1601 
1602   // Parse the enum constants.
1603   while (FormatTok) {
1604     if (FormatTok->is(tok::l_brace)) {
1605       // Parse the constant's class body.
1606       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1607                  /*MunchSemi=*/false);
1608     } else if (FormatTok->is(tok::l_paren)) {
1609       parseParens();
1610     } else if (FormatTok->is(tok::comma)) {
1611       nextToken();
1612       addUnwrappedLine();
1613     } else if (FormatTok->is(tok::semi)) {
1614       nextToken();
1615       addUnwrappedLine();
1616       break;
1617     } else if (FormatTok->is(tok::r_brace)) {
1618       addUnwrappedLine();
1619       break;
1620     } else {
1621       nextToken();
1622     }
1623   }
1624 
1625   // Parse the class body after the enum's ";" if any.
1626   parseLevel(/*HasOpeningBrace=*/true);
1627   nextToken();
1628   --Line->Level;
1629   addUnwrappedLine();
1630 }
1631 
1632 void UnwrappedLineParser::parseRecord() {
1633   const FormatToken &InitialToken = *FormatTok;
1634   nextToken();
1635 
1636   // The actual identifier can be a nested name specifier, and in macros
1637   // it is often token-pasted.
1638   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1639                             tok::kw___attribute, tok::kw___declspec,
1640                             tok::kw_alignas) ||
1641          ((Style.Language == FormatStyle::LK_Java ||
1642            Style.Language == FormatStyle::LK_JavaScript) &&
1643           FormatTok->isOneOf(tok::period, tok::comma))) {
1644     bool IsNonMacroIdentifier =
1645         FormatTok->is(tok::identifier) &&
1646         FormatTok->TokenText != FormatTok->TokenText.upper();
1647     nextToken();
1648     // We can have macros or attributes in between 'class' and the class name.
1649     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1650       parseParens();
1651   }
1652 
1653   // Note that parsing away template declarations here leads to incorrectly
1654   // accepting function declarations as record declarations.
1655   // In general, we cannot solve this problem. Consider:
1656   // class A<int> B() {}
1657   // which can be a function definition or a class definition when B() is a
1658   // macro. If we find enough real-world cases where this is a problem, we
1659   // can parse for the 'template' keyword in the beginning of the statement,
1660   // and thus rule out the record production in case there is no template
1661   // (this would still leave us with an ambiguity between template function
1662   // and class declarations).
1663   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1664     while (!eof()) {
1665       if (FormatTok->is(tok::l_brace)) {
1666         calculateBraceTypes(/*ExpectClassBody=*/true);
1667         if (!tryToParseBracedList())
1668           break;
1669       }
1670       if (FormatTok->Tok.is(tok::semi))
1671         return;
1672       nextToken();
1673     }
1674   }
1675   if (FormatTok->Tok.is(tok::l_brace)) {
1676     if (ShouldBreakBeforeBrace(Style, InitialToken))
1677       addUnwrappedLine();
1678 
1679     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1680                /*MunchSemi=*/false);
1681   }
1682   // There is no addUnwrappedLine() here so that we fall through to parsing a
1683   // structural element afterwards. Thus, in "class A {} n, m;",
1684   // "} n, m;" will end up in one unwrapped line.
1685 }
1686 
1687 void UnwrappedLineParser::parseObjCProtocolList() {
1688   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1689   do
1690     nextToken();
1691   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1692   nextToken(); // Skip '>'.
1693 }
1694 
1695 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1696   do {
1697     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1698       nextToken();
1699       addUnwrappedLine();
1700       break;
1701     }
1702     if (FormatTok->is(tok::l_brace)) {
1703       parseBlock(/*MustBeDeclaration=*/false);
1704       // In ObjC interfaces, nothing should be following the "}".
1705       addUnwrappedLine();
1706     } else if (FormatTok->is(tok::r_brace)) {
1707       // Ignore stray "}". parseStructuralElement doesn't consume them.
1708       nextToken();
1709       addUnwrappedLine();
1710     } else {
1711       parseStructuralElement();
1712     }
1713   } while (!eof());
1714 }
1715 
1716 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1717   nextToken();
1718   nextToken(); // interface name
1719 
1720   // @interface can be followed by either a base class, or a category.
1721   if (FormatTok->Tok.is(tok::colon)) {
1722     nextToken();
1723     nextToken(); // base class name
1724   } else if (FormatTok->Tok.is(tok::l_paren))
1725     // Skip category, if present.
1726     parseParens();
1727 
1728   if (FormatTok->Tok.is(tok::less))
1729     parseObjCProtocolList();
1730 
1731   if (FormatTok->Tok.is(tok::l_brace)) {
1732     if (Style.BraceWrapping.AfterObjCDeclaration)
1733       addUnwrappedLine();
1734     parseBlock(/*MustBeDeclaration=*/true);
1735   }
1736 
1737   // With instance variables, this puts '}' on its own line.  Without instance
1738   // variables, this ends the @interface line.
1739   addUnwrappedLine();
1740 
1741   parseObjCUntilAtEnd();
1742 }
1743 
1744 void UnwrappedLineParser::parseObjCProtocol() {
1745   nextToken();
1746   nextToken(); // protocol name
1747 
1748   if (FormatTok->Tok.is(tok::less))
1749     parseObjCProtocolList();
1750 
1751   // Check for protocol declaration.
1752   if (FormatTok->Tok.is(tok::semi)) {
1753     nextToken();
1754     return addUnwrappedLine();
1755   }
1756 
1757   addUnwrappedLine();
1758   parseObjCUntilAtEnd();
1759 }
1760 
1761 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1762   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1763   nextToken();
1764 
1765   // Consume the "default" in "export default class/function".
1766   if (FormatTok->is(tok::kw_default))
1767     nextToken();
1768 
1769   // Consume "function" and "default function", so that these get parsed as
1770   // free-standing JS functions, i.e. do not require a trailing semicolon.
1771   if (FormatTok->is(Keywords.kw_function)) {
1772     nextToken();
1773     return;
1774   }
1775 
1776   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
1777                          Keywords.kw_let, Keywords.kw_var))
1778     return; // Fall through to parsing the corresponding structure.
1779 
1780   if (FormatTok->is(tok::l_brace)) {
1781     FormatTok->BlockKind = BK_Block;
1782     parseBracedList();
1783   }
1784 
1785   while (!eof() && FormatTok->isNot(tok::semi) &&
1786          FormatTok->isNot(tok::l_brace)) {
1787     nextToken();
1788   }
1789 }
1790 
1791 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1792                                                  StringRef Prefix = "") {
1793   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1794                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1795   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1796                                                     E = Line.Tokens.end();
1797        I != E; ++I) {
1798     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1799   }
1800   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1801                                                     E = Line.Tokens.end();
1802        I != E; ++I) {
1803     const UnwrappedLineNode &Node = *I;
1804     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1805              I = Node.Children.begin(),
1806              E = Node.Children.end();
1807          I != E; ++I) {
1808       printDebugInfo(*I, "\nChild: ");
1809     }
1810   }
1811   llvm::dbgs() << "\n";
1812 }
1813 
1814 void UnwrappedLineParser::addUnwrappedLine() {
1815   if (Line->Tokens.empty())
1816     return;
1817   DEBUG({
1818     if (CurrentLines == &Lines)
1819       printDebugInfo(*Line);
1820   });
1821   CurrentLines->push_back(std::move(*Line));
1822   Line->Tokens.clear();
1823   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1824     CurrentLines->append(
1825         std::make_move_iterator(PreprocessorDirectives.begin()),
1826         std::make_move_iterator(PreprocessorDirectives.end()));
1827     PreprocessorDirectives.clear();
1828   }
1829 }
1830 
1831 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1832 
1833 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1834   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1835          FormatTok.NewlinesBefore > 0;
1836 }
1837 
1838 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1839   bool JustComments = Line->Tokens.empty();
1840   for (SmallVectorImpl<FormatToken *>::const_iterator
1841            I = CommentsBeforeNextToken.begin(),
1842            E = CommentsBeforeNextToken.end();
1843        I != E; ++I) {
1844     if (isOnNewLine(**I) && JustComments)
1845       addUnwrappedLine();
1846     pushToken(*I);
1847   }
1848   if (NewlineBeforeNext && JustComments)
1849     addUnwrappedLine();
1850   CommentsBeforeNextToken.clear();
1851 }
1852 
1853 void UnwrappedLineParser::nextToken() {
1854   if (eof())
1855     return;
1856   flushComments(isOnNewLine(*FormatTok));
1857   pushToken(FormatTok);
1858   readToken();
1859 }
1860 
1861 void UnwrappedLineParser::readToken() {
1862   bool CommentsInCurrentLine = true;
1863   do {
1864     FormatTok = Tokens->getNextToken();
1865     assert(FormatTok);
1866     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1867            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1868       // If there is an unfinished unwrapped line, we flush the preprocessor
1869       // directives only after that unwrapped line was finished later.
1870       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1871       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1872       // Comments stored before the preprocessor directive need to be output
1873       // before the preprocessor directive, at the same level as the
1874       // preprocessor directive, as we consider them to apply to the directive.
1875       flushComments(isOnNewLine(*FormatTok));
1876       parsePPDirective();
1877     }
1878     while (FormatTok->Type == TT_ConflictStart ||
1879            FormatTok->Type == TT_ConflictEnd ||
1880            FormatTok->Type == TT_ConflictAlternative) {
1881       if (FormatTok->Type == TT_ConflictStart) {
1882         conditionalCompilationStart(/*Unreachable=*/false);
1883       } else if (FormatTok->Type == TT_ConflictAlternative) {
1884         conditionalCompilationAlternative();
1885       } else if (FormatTok->Type == TT_ConflictEnd) {
1886         conditionalCompilationEnd();
1887       }
1888       FormatTok = Tokens->getNextToken();
1889       FormatTok->MustBreakBefore = true;
1890     }
1891 
1892     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1893         !Line->InPPDirective) {
1894       continue;
1895     }
1896 
1897     if (!FormatTok->Tok.is(tok::comment))
1898       return;
1899     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1900       CommentsInCurrentLine = false;
1901     }
1902     if (CommentsInCurrentLine) {
1903       pushToken(FormatTok);
1904     } else {
1905       CommentsBeforeNextToken.push_back(FormatTok);
1906     }
1907   } while (!eof());
1908 }
1909 
1910 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1911   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1912   if (MustBreakBeforeNextToken) {
1913     Line->Tokens.back().Tok->MustBreakBefore = true;
1914     MustBreakBeforeNextToken = false;
1915   }
1916 }
1917 
1918 } // end namespace format
1919 } // end namespace clang
1920