1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-parser"
17 
18 #include "UnwrappedLineParser.h"
19 #include "llvm/Support/Debug.h"
20 
21 namespace clang {
22 namespace format {
23 
24 class FormatTokenSource {
25 public:
26   virtual ~FormatTokenSource() {}
27   virtual FormatToken *getNextToken() = 0;
28 
29   virtual unsigned getPosition() = 0;
30   virtual FormatToken *setPosition(unsigned Position) = 0;
31 };
32 
33 namespace {
34 
35 class ScopedDeclarationState {
36 public:
37   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
38                          bool MustBeDeclaration)
39       : Line(Line), Stack(Stack) {
40     Line.MustBeDeclaration = MustBeDeclaration;
41     Stack.push_back(MustBeDeclaration);
42   }
43   ~ScopedDeclarationState() {
44     Stack.pop_back();
45     if (!Stack.empty())
46       Line.MustBeDeclaration = Stack.back();
47     else
48       Line.MustBeDeclaration = true;
49   }
50 
51 private:
52   UnwrappedLine &Line;
53   std::vector<bool> &Stack;
54 };
55 
56 class ScopedMacroState : public FormatTokenSource {
57 public:
58   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
59                    FormatToken *&ResetToken, bool &StructuralError)
60       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
61         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
62         StructuralError(StructuralError),
63         PreviousStructuralError(StructuralError), Token(NULL) {
64     TokenSource = this;
65     Line.Level = 0;
66     Line.InPPDirective = true;
67   }
68 
69   ~ScopedMacroState() {
70     TokenSource = PreviousTokenSource;
71     ResetToken = Token;
72     Line.InPPDirective = false;
73     Line.Level = PreviousLineLevel;
74     StructuralError = PreviousStructuralError;
75   }
76 
77   virtual FormatToken *getNextToken() {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); }
88 
89   virtual FormatToken *setPosition(unsigned Position) {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113   bool &StructuralError;
114   bool PreviousStructuralError;
115 
116   FormatToken *Token;
117 };
118 
119 } // end anonymous namespace
120 
121 class ScopedLineState {
122 public:
123   ScopedLineState(UnwrappedLineParser &Parser,
124                   bool SwitchToPreprocessorLines = false)
125       : Parser(Parser) {
126     OriginalLines = Parser.CurrentLines;
127     if (SwitchToPreprocessorLines)
128       Parser.CurrentLines = &Parser.PreprocessorDirectives;
129     else if (!Parser.Line->Tokens.empty())
130       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
131     PreBlockLine = Parser.Line.take();
132     Parser.Line.reset(new UnwrappedLine());
133     Parser.Line->Level = PreBlockLine->Level;
134     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
135   }
136 
137   ~ScopedLineState() {
138     if (!Parser.Line->Tokens.empty()) {
139       Parser.addUnwrappedLine();
140     }
141     assert(Parser.Line->Tokens.empty());
142     Parser.Line.reset(PreBlockLine);
143     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
144       Parser.MustBreakBeforeNextToken = true;
145     Parser.CurrentLines = OriginalLines;
146   }
147 
148 private:
149   UnwrappedLineParser &Parser;
150 
151   UnwrappedLine *PreBlockLine;
152   SmallVectorImpl<UnwrappedLine> *OriginalLines;
153 };
154 
155 namespace {
156 
157 class IndexedTokenSource : public FormatTokenSource {
158 public:
159   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
160       : Tokens(Tokens), Position(-1) {}
161 
162   virtual FormatToken *getNextToken() {
163     ++Position;
164     return Tokens[Position];
165   }
166 
167   virtual unsigned getPosition() {
168     assert(Position >= 0);
169     return Position;
170   }
171 
172   virtual FormatToken *setPosition(unsigned P) {
173     Position = P;
174     return Tokens[Position];
175   }
176 
177 private:
178   ArrayRef<FormatToken *> Tokens;
179   int Position;
180 };
181 
182 } // end anonymous namespace
183 
184 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
185                                          ArrayRef<FormatToken *> Tokens,
186                                          UnwrappedLineConsumer &Callback)
187     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
188       CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL),
189       Callback(Callback), AllTokens(Tokens) {}
190 
191 bool UnwrappedLineParser::parse() {
192   DEBUG(llvm::dbgs() << "----\n");
193   IndexedTokenSource TokenSource(AllTokens);
194   Tokens = &TokenSource;
195   readToken();
196   parseFile();
197   for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
198                                                 E = Lines.end();
199        I != E; ++I) {
200     Callback.consumeUnwrappedLine(*I);
201   }
202 
203   // Create line with eof token.
204   pushToken(FormatTok);
205   Callback.consumeUnwrappedLine(*Line);
206   return StructuralError;
207 }
208 
209 void UnwrappedLineParser::parseFile() {
210   ScopedDeclarationState DeclarationState(
211       *Line, DeclarationScopeStack,
212       /*MustBeDeclaration=*/ !Line->InPPDirective);
213   parseLevel(/*HasOpeningBrace=*/false);
214   // Make sure to format the remaining tokens.
215   flushComments(true);
216   addUnwrappedLine();
217 }
218 
219 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
220   bool SwitchLabelEncountered = false;
221   do {
222     switch (FormatTok->Tok.getKind()) {
223     case tok::comment:
224       nextToken();
225       addUnwrappedLine();
226       break;
227     case tok::l_brace:
228       // FIXME: Add parameter whether this can happen - if this happens, we must
229       // be in a non-declaration context.
230       parseBlock(/*MustBeDeclaration=*/false);
231       addUnwrappedLine();
232       break;
233     case tok::r_brace:
234       if (HasOpeningBrace)
235         return;
236       StructuralError = true;
237       nextToken();
238       addUnwrappedLine();
239       break;
240     case tok::kw_default:
241     case tok::kw_case:
242       if (!SwitchLabelEncountered &&
243           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
244         ++Line->Level;
245       SwitchLabelEncountered = true;
246       parseStructuralElement();
247       break;
248     default:
249       parseStructuralElement();
250       break;
251     }
252   } while (!eof());
253 }
254 
255 void UnwrappedLineParser::calculateBraceTypes() {
256   // We'll parse forward through the tokens until we hit
257   // a closing brace or eof - note that getNextToken() will
258   // parse macros, so this will magically work inside macro
259   // definitions, too.
260   unsigned StoredPosition = Tokens->getPosition();
261   unsigned Position = StoredPosition;
262   FormatToken *Tok = FormatTok;
263   // Keep a stack of positions of lbrace tokens. We will
264   // update information about whether an lbrace starts a
265   // braced init list or a different block during the loop.
266   SmallVector<FormatToken *, 8> LBraceStack;
267   assert(Tok->Tok.is(tok::l_brace));
268   do {
269     // Get next none-comment token.
270     FormatToken *NextTok;
271     unsigned ReadTokens = 0;
272     do {
273       NextTok = Tokens->getNextToken();
274       ++ReadTokens;
275     } while (NextTok->is(tok::comment));
276 
277     switch (Tok->Tok.getKind()) {
278     case tok::l_brace:
279       LBraceStack.push_back(Tok);
280       break;
281     case tok::r_brace:
282       if (!LBraceStack.empty()) {
283         if (LBraceStack.back()->BlockKind == BK_Unknown) {
284           // If there is a comma, semicolon or right paren after the closing
285           // brace, we assume this is a braced initializer list.  Note that
286           // regardless how we mark inner braces here, we will overwrite the
287           // BlockKind later if we parse a braced list (where all blocks inside
288           // are by default braced lists), or when we explicitly detect blocks
289           // (for example while parsing lambdas).
290           //
291           // We exclude + and - as they can be ObjC visibility modifiers.
292           if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren,
293                                tok::r_square, tok::l_brace, tok::colon) ||
294               (NextTok->isBinaryOperator() &&
295                !NextTok->isOneOf(tok::plus, tok::minus))) {
296             Tok->BlockKind = BK_BracedInit;
297             LBraceStack.back()->BlockKind = BK_BracedInit;
298           } else {
299             Tok->BlockKind = BK_Block;
300             LBraceStack.back()->BlockKind = BK_Block;
301           }
302         }
303         LBraceStack.pop_back();
304       }
305       break;
306     case tok::semi:
307     case tok::kw_if:
308     case tok::kw_while:
309     case tok::kw_for:
310     case tok::kw_switch:
311     case tok::kw_try:
312       if (!LBraceStack.empty())
313         LBraceStack.back()->BlockKind = BK_Block;
314       break;
315     default:
316       break;
317     }
318     Tok = NextTok;
319     Position += ReadTokens;
320   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
321   // Assume other blocks for all unclosed opening braces.
322   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
323     if (LBraceStack[i]->BlockKind == BK_Unknown)
324       LBraceStack[i]->BlockKind = BK_Block;
325   }
326 
327   FormatTok = Tokens->setPosition(StoredPosition);
328 }
329 
330 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel) {
331   assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
332   unsigned InitialLevel = Line->Level;
333   nextToken();
334 
335   addUnwrappedLine();
336 
337   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
338                                           MustBeDeclaration);
339   if (AddLevel)
340     ++Line->Level;
341   parseLevel(/*HasOpeningBrace=*/true);
342 
343   if (!FormatTok->Tok.is(tok::r_brace)) {
344     Line->Level = InitialLevel;
345     StructuralError = true;
346     return;
347   }
348 
349   nextToken(); // Munch the closing brace.
350   Line->Level = InitialLevel;
351 }
352 
353 void UnwrappedLineParser::parseChildBlock() {
354   FormatTok->BlockKind = BK_Block;
355   nextToken();
356   {
357     ScopedLineState LineState(*this);
358     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
359                                             /*MustBeDeclaration=*/false);
360     Line->Level += 1;
361     parseLevel(/*HasOpeningBrace=*/true);
362     Line->Level -= 1;
363   }
364   nextToken();
365 }
366 
367 void UnwrappedLineParser::parsePPDirective() {
368   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
369   ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
370   nextToken();
371 
372   if (FormatTok->Tok.getIdentifierInfo() == NULL) {
373     parsePPUnknown();
374     return;
375   }
376 
377   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
378   case tok::pp_define:
379     parsePPDefine();
380     return;
381   case tok::pp_if:
382     parsePPIf();
383     break;
384   case tok::pp_ifdef:
385   case tok::pp_ifndef:
386     parsePPIfdef();
387     break;
388   case tok::pp_else:
389     parsePPElse();
390     break;
391   case tok::pp_elif:
392     parsePPElIf();
393     break;
394   case tok::pp_endif:
395     parsePPEndIf();
396     break;
397   default:
398     parsePPUnknown();
399     break;
400   }
401 }
402 
403 void UnwrappedLineParser::pushPPConditional() {
404   if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
405     PPStack.push_back(PP_Unreachable);
406   else
407     PPStack.push_back(PP_Conditional);
408 }
409 
410 void UnwrappedLineParser::parsePPIf() {
411   nextToken();
412   if ((FormatTok->Tok.isLiteral() &&
413        StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) ==
414            "0") ||
415       FormatTok->Tok.is(tok::kw_false)) {
416     PPStack.push_back(PP_Unreachable);
417   } else {
418     pushPPConditional();
419   }
420   parsePPUnknown();
421 }
422 
423 void UnwrappedLineParser::parsePPIfdef() {
424   pushPPConditional();
425   parsePPUnknown();
426 }
427 
428 void UnwrappedLineParser::parsePPElse() {
429   if (!PPStack.empty())
430     PPStack.pop_back();
431   pushPPConditional();
432   parsePPUnknown();
433 }
434 
435 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
436 
437 void UnwrappedLineParser::parsePPEndIf() {
438   if (!PPStack.empty())
439     PPStack.pop_back();
440   parsePPUnknown();
441 }
442 
443 void UnwrappedLineParser::parsePPDefine() {
444   nextToken();
445 
446   if (FormatTok->Tok.getKind() != tok::identifier) {
447     parsePPUnknown();
448     return;
449   }
450   nextToken();
451   if (FormatTok->Tok.getKind() == tok::l_paren &&
452       FormatTok->WhitespaceRange.getBegin() ==
453           FormatTok->WhitespaceRange.getEnd()) {
454     parseParens();
455   }
456   addUnwrappedLine();
457   Line->Level = 1;
458 
459   // Errors during a preprocessor directive can only affect the layout of the
460   // preprocessor directive, and thus we ignore them. An alternative approach
461   // would be to use the same approach we use on the file level (no
462   // re-indentation if there was a structural error) within the macro
463   // definition.
464   parseFile();
465 }
466 
467 void UnwrappedLineParser::parsePPUnknown() {
468   do {
469     nextToken();
470   } while (!eof());
471   addUnwrappedLine();
472 }
473 
474 // Here we blacklist certain tokens that are not usually the first token in an
475 // unwrapped line. This is used in attempt to distinguish macro calls without
476 // trailing semicolons from other constructs split to several lines.
477 bool tokenCanStartNewLine(clang::Token Tok) {
478   // Semicolon can be a null-statement, l_square can be a start of a macro or
479   // a C++11 attribute, but this doesn't seem to be common.
480   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
481          Tok.isNot(tok::l_square) &&
482          // Tokens that can only be used as binary operators and a part of
483          // overloaded operator names.
484          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
485          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
486          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
487          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
488          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
489          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
490          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
491          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
492          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
493          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
494          Tok.isNot(tok::lesslessequal) &&
495          // Colon is used in labels, base class lists, initializer lists,
496          // range-based for loops, ternary operator, but should never be the
497          // first token in an unwrapped line.
498          Tok.isNot(tok::colon);
499 }
500 
501 void UnwrappedLineParser::parseStructuralElement() {
502   assert(!FormatTok->Tok.is(tok::l_brace));
503   switch (FormatTok->Tok.getKind()) {
504   case tok::at:
505     nextToken();
506     if (FormatTok->Tok.is(tok::l_brace)) {
507       parseBracedList();
508       break;
509     }
510     switch (FormatTok->Tok.getObjCKeywordID()) {
511     case tok::objc_public:
512     case tok::objc_protected:
513     case tok::objc_package:
514     case tok::objc_private:
515       return parseAccessSpecifier();
516     case tok::objc_interface:
517     case tok::objc_implementation:
518       return parseObjCInterfaceOrImplementation();
519     case tok::objc_protocol:
520       return parseObjCProtocol();
521     case tok::objc_end:
522       return; // Handled by the caller.
523     case tok::objc_optional:
524     case tok::objc_required:
525       nextToken();
526       addUnwrappedLine();
527       return;
528     default:
529       break;
530     }
531     break;
532   case tok::kw_namespace:
533     parseNamespace();
534     return;
535   case tok::kw_inline:
536     nextToken();
537     if (FormatTok->Tok.is(tok::kw_namespace)) {
538       parseNamespace();
539       return;
540     }
541     break;
542   case tok::kw_public:
543   case tok::kw_protected:
544   case tok::kw_private:
545     parseAccessSpecifier();
546     return;
547   case tok::kw_if:
548     parseIfThenElse();
549     return;
550   case tok::kw_for:
551   case tok::kw_while:
552     parseForOrWhileLoop();
553     return;
554   case tok::kw_do:
555     parseDoWhile();
556     return;
557   case tok::kw_switch:
558     parseSwitch();
559     return;
560   case tok::kw_default:
561     nextToken();
562     parseLabel();
563     return;
564   case tok::kw_case:
565     parseCaseLabel();
566     return;
567   case tok::kw_return:
568     parseReturn();
569     return;
570   case tok::kw_extern:
571     nextToken();
572     if (FormatTok->Tok.is(tok::string_literal)) {
573       nextToken();
574       if (FormatTok->Tok.is(tok::l_brace)) {
575         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
576         addUnwrappedLine();
577         return;
578       }
579     }
580     // In all other cases, parse the declaration.
581     break;
582   default:
583     break;
584   }
585   do {
586     switch (FormatTok->Tok.getKind()) {
587     case tok::at:
588       nextToken();
589       if (FormatTok->Tok.is(tok::l_brace))
590         parseBracedList();
591       break;
592     case tok::kw_enum:
593       parseEnum();
594       break;
595     case tok::kw_struct:
596     case tok::kw_union:
597     case tok::kw_class:
598       parseRecord();
599       // A record declaration or definition is always the start of a structural
600       // element.
601       break;
602     case tok::semi:
603       nextToken();
604       addUnwrappedLine();
605       return;
606     case tok::r_brace:
607       addUnwrappedLine();
608       return;
609     case tok::l_paren:
610       parseParens();
611       break;
612     case tok::caret:
613       nextToken();
614       if (FormatTok->is(tok::l_brace)) {
615         parseChildBlock();
616       }
617       break;
618     case tok::l_brace:
619       if (!tryToParseBracedList()) {
620         // A block outside of parentheses must be the last part of a
621         // structural element.
622         // FIXME: Figure out cases where this is not true, and add projections
623         // for them (the one we know is missing are lambdas).
624         if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
625             Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup ||
626             Style.BreakBeforeBraces == FormatStyle::BS_Allman)
627           addUnwrappedLine();
628         parseBlock(/*MustBeDeclaration=*/false);
629         addUnwrappedLine();
630         return;
631       }
632       // Otherwise this was a braced init list, and the structural
633       // element continues.
634       break;
635     case tok::identifier: {
636       StringRef Text = FormatTok->TokenText;
637       nextToken();
638       if (Line->Tokens.size() == 1) {
639         if (FormatTok->Tok.is(tok::colon)) {
640           parseLabel();
641           return;
642         }
643         // Recognize function-like macro usages without trailing semicolon.
644         if (FormatTok->Tok.is(tok::l_paren)) {
645           parseParens();
646           if (FormatTok->HasUnescapedNewline &&
647               tokenCanStartNewLine(FormatTok->Tok)) {
648             addUnwrappedLine();
649             return;
650           }
651         } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 &&
652                    Text == Text.upper()) {
653           // Recognize free-standing macros like Q_OBJECT.
654           addUnwrappedLine();
655           return;
656         }
657       }
658       break;
659     }
660     case tok::equal:
661       nextToken();
662       if (FormatTok->Tok.is(tok::l_brace)) {
663         parseBracedList();
664       }
665       break;
666     case tok::l_square:
667       tryToParseLambda();
668       break;
669     default:
670       nextToken();
671       break;
672     }
673   } while (!eof());
674 }
675 
676 void UnwrappedLineParser::tryToParseLambda() {
677   // FIXME: This is a dirty way to access the previous token. Find a better
678   // solution.
679   if (!Line->Tokens.empty() &&
680       Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator)) {
681     nextToken();
682     return;
683   }
684   assert(FormatTok->is(tok::l_square));
685   FormatToken &LSquare = *FormatTok;
686   if (!tryToParseLambdaIntroducer())
687     return;
688 
689   while (FormatTok->isNot(tok::l_brace)) {
690     switch (FormatTok->Tok.getKind()) {
691     case tok::l_brace:
692       break;
693     case tok::l_paren:
694       parseParens();
695       break;
696     case tok::identifier:
697     case tok::kw_mutable:
698       nextToken();
699       break;
700     default:
701       return;
702     }
703   }
704   LSquare.Type = TT_LambdaLSquare;
705   parseChildBlock();
706 }
707 
708 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
709   nextToken();
710   if (FormatTok->is(tok::equal)) {
711     nextToken();
712     if (FormatTok->is(tok::r_square)) {
713       nextToken();
714       return true;
715     }
716     if (FormatTok->isNot(tok::comma))
717       return false;
718     nextToken();
719   } else if (FormatTok->is(tok::amp)) {
720     nextToken();
721     if (FormatTok->is(tok::r_square)) {
722       nextToken();
723       return true;
724     }
725     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
726       return false;
727     }
728     if (FormatTok->is(tok::comma))
729       nextToken();
730   } else if (FormatTok->is(tok::r_square)) {
731     nextToken();
732     return true;
733   }
734   do {
735     if (FormatTok->is(tok::amp))
736       nextToken();
737     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
738       return false;
739     nextToken();
740     if (FormatTok->is(tok::comma)) {
741       nextToken();
742     } else if (FormatTok->is(tok::r_square)) {
743       nextToken();
744       return true;
745     } else {
746       return false;
747     }
748   } while (!eof());
749   return false;
750 }
751 
752 bool UnwrappedLineParser::tryToParseBracedList() {
753   if (FormatTok->BlockKind == BK_Unknown)
754     calculateBraceTypes();
755   assert(FormatTok->BlockKind != BK_Unknown);
756   if (FormatTok->BlockKind == BK_Block)
757     return false;
758   parseBracedList();
759   return true;
760 }
761 
762 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
763   bool HasError = false;
764   nextToken();
765 
766   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
767   // replace this by using parseAssigmentExpression() inside.
768   do {
769     // FIXME: When we start to support lambdas, we'll want to parse them away
770     // here, otherwise our bail-out scenarios below break. The better solution
771     // might be to just implement a more or less complete expression parser.
772     switch (FormatTok->Tok.getKind()) {
773     case tok::caret:
774       nextToken();
775       if (FormatTok->is(tok::l_brace)) {
776         parseChildBlock();
777       }
778       break;
779     case tok::l_square:
780       tryToParseLambda();
781       break;
782     case tok::l_brace:
783       // Assume there are no blocks inside a braced init list apart
784       // from the ones we explicitly parse out (like lambdas).
785       FormatTok->BlockKind = BK_BracedInit;
786       parseBracedList();
787       break;
788     case tok::r_brace:
789       nextToken();
790       return !HasError;
791     case tok::semi:
792       HasError = true;
793       if (!ContinueOnSemicolons)
794         return !HasError;
795       nextToken();
796       break;
797     case tok::comma:
798       nextToken();
799       break;
800     default:
801       nextToken();
802       break;
803     }
804   } while (!eof());
805   return false;
806 }
807 
808 void UnwrappedLineParser::parseReturn() {
809   nextToken();
810 
811   do {
812     switch (FormatTok->Tok.getKind()) {
813     case tok::l_brace:
814       parseBracedList();
815       if (FormatTok->Tok.isNot(tok::semi)) {
816         // Assume missing ';'.
817         addUnwrappedLine();
818         return;
819       }
820       break;
821     case tok::l_paren:
822       parseParens();
823       break;
824     case tok::r_brace:
825       // Assume missing ';'.
826       addUnwrappedLine();
827       return;
828     case tok::semi:
829       nextToken();
830       addUnwrappedLine();
831       return;
832     case tok::l_square:
833       tryToParseLambda();
834       break;
835     default:
836       nextToken();
837       break;
838     }
839   } while (!eof());
840 }
841 
842 void UnwrappedLineParser::parseParens() {
843   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
844   nextToken();
845   do {
846     switch (FormatTok->Tok.getKind()) {
847     case tok::l_paren:
848       parseParens();
849       break;
850     case tok::r_paren:
851       nextToken();
852       return;
853     case tok::r_brace:
854       // A "}" inside parenthesis is an error if there wasn't a matching "{".
855       return;
856     case tok::l_square:
857       tryToParseLambda();
858       break;
859     case tok::l_brace: {
860       if (!tryToParseBracedList()) {
861         parseChildBlock();
862       }
863       break;
864     }
865     case tok::at:
866       nextToken();
867       if (FormatTok->Tok.is(tok::l_brace))
868         parseBracedList();
869       break;
870     default:
871       nextToken();
872       break;
873     }
874   } while (!eof());
875 }
876 
877 void UnwrappedLineParser::parseIfThenElse() {
878   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
879   nextToken();
880   if (FormatTok->Tok.is(tok::l_paren))
881     parseParens();
882   bool NeedsUnwrappedLine = false;
883   if (FormatTok->Tok.is(tok::l_brace)) {
884     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
885       addUnwrappedLine();
886     parseBlock(/*MustBeDeclaration=*/false);
887     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
888       addUnwrappedLine();
889     else
890       NeedsUnwrappedLine = true;
891   } else {
892     addUnwrappedLine();
893     ++Line->Level;
894     parseStructuralElement();
895     --Line->Level;
896   }
897   if (FormatTok->Tok.is(tok::kw_else)) {
898     nextToken();
899     if (FormatTok->Tok.is(tok::l_brace)) {
900       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
901         addUnwrappedLine();
902       parseBlock(/*MustBeDeclaration=*/false);
903       addUnwrappedLine();
904     } else if (FormatTok->Tok.is(tok::kw_if)) {
905       parseIfThenElse();
906     } else {
907       addUnwrappedLine();
908       ++Line->Level;
909       parseStructuralElement();
910       --Line->Level;
911     }
912   } else if (NeedsUnwrappedLine) {
913     addUnwrappedLine();
914   }
915 }
916 
917 void UnwrappedLineParser::parseNamespace() {
918   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
919   nextToken();
920   if (FormatTok->Tok.is(tok::identifier))
921     nextToken();
922   if (FormatTok->Tok.is(tok::l_brace)) {
923     if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
924         Style.BreakBeforeBraces == FormatStyle::BS_Allman)
925       addUnwrappedLine();
926 
927     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
928                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
929                      DeclarationScopeStack.size() > 1);
930     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
931     // Munch the semicolon after a namespace. This is more common than one would
932     // think. Puttin the semicolon into its own line is very ugly.
933     if (FormatTok->Tok.is(tok::semi))
934       nextToken();
935     addUnwrappedLine();
936   }
937   // FIXME: Add error handling.
938 }
939 
940 void UnwrappedLineParser::parseForOrWhileLoop() {
941   assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) &&
942          "'for' or 'while' expected");
943   nextToken();
944   if (FormatTok->Tok.is(tok::l_paren))
945     parseParens();
946   if (FormatTok->Tok.is(tok::l_brace)) {
947     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
948       addUnwrappedLine();
949     parseBlock(/*MustBeDeclaration=*/false);
950     addUnwrappedLine();
951   } else {
952     addUnwrappedLine();
953     ++Line->Level;
954     parseStructuralElement();
955     --Line->Level;
956   }
957 }
958 
959 void UnwrappedLineParser::parseDoWhile() {
960   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
961   nextToken();
962   if (FormatTok->Tok.is(tok::l_brace)) {
963     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
964       addUnwrappedLine();
965     parseBlock(/*MustBeDeclaration=*/false);
966   } else {
967     addUnwrappedLine();
968     ++Line->Level;
969     parseStructuralElement();
970     --Line->Level;
971   }
972 
973   // FIXME: Add error handling.
974   if (!FormatTok->Tok.is(tok::kw_while)) {
975     addUnwrappedLine();
976     return;
977   }
978 
979   nextToken();
980   parseStructuralElement();
981 }
982 
983 void UnwrappedLineParser::parseLabel() {
984   nextToken();
985   unsigned OldLineLevel = Line->Level;
986   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
987     --Line->Level;
988   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
989     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
990       addUnwrappedLine();
991     parseBlock(/*MustBeDeclaration=*/false);
992     if (FormatTok->Tok.is(tok::kw_break)) {
993       // "break;" after "}" on its own line only for BS_Allman
994       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
995         addUnwrappedLine();
996       parseStructuralElement();
997     }
998   }
999   addUnwrappedLine();
1000   Line->Level = OldLineLevel;
1001 }
1002 
1003 void UnwrappedLineParser::parseCaseLabel() {
1004   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1005   // FIXME: fix handling of complex expressions here.
1006   do {
1007     nextToken();
1008   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1009   parseLabel();
1010 }
1011 
1012 void UnwrappedLineParser::parseSwitch() {
1013   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1014   nextToken();
1015   if (FormatTok->Tok.is(tok::l_paren))
1016     parseParens();
1017   if (FormatTok->Tok.is(tok::l_brace)) {
1018     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1019       addUnwrappedLine();
1020     parseBlock(/*MustBeDeclaration=*/false);
1021     addUnwrappedLine();
1022   } else {
1023     addUnwrappedLine();
1024     ++Line->Level;
1025     parseStructuralElement();
1026     --Line->Level;
1027   }
1028 }
1029 
1030 void UnwrappedLineParser::parseAccessSpecifier() {
1031   nextToken();
1032   // Otherwise, we don't know what it is, and we'd better keep the next token.
1033   if (FormatTok->Tok.is(tok::colon))
1034     nextToken();
1035   addUnwrappedLine();
1036 }
1037 
1038 void UnwrappedLineParser::parseEnum() {
1039   nextToken();
1040   // Eat up enum class ...
1041   if (FormatTok->Tok.is(tok::kw_class) ||
1042       FormatTok->Tok.is(tok::kw_struct))
1043       nextToken();
1044   while (FormatTok->Tok.getIdentifierInfo() ||
1045          FormatTok->isOneOf(tok::colon, tok::coloncolon)) {
1046     nextToken();
1047     // We can have macros or attributes in between 'enum' and the enum name.
1048     if (FormatTok->Tok.is(tok::l_paren)) {
1049       parseParens();
1050     }
1051     if (FormatTok->Tok.is(tok::identifier))
1052       nextToken();
1053   }
1054   if (FormatTok->Tok.is(tok::l_brace)) {
1055     FormatTok->BlockKind = BK_Block;
1056     bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1057     if (HasError) {
1058       if (FormatTok->is(tok::semi))
1059         nextToken();
1060       addUnwrappedLine();
1061     }
1062   }
1063   // We fall through to parsing a structural element afterwards, so that in
1064   // enum A {} n, m;
1065   // "} n, m;" will end up in one unwrapped line.
1066 }
1067 
1068 void UnwrappedLineParser::parseRecord() {
1069   nextToken();
1070   if (FormatTok->Tok.is(tok::identifier) ||
1071       FormatTok->Tok.is(tok::kw___attribute) ||
1072       FormatTok->Tok.is(tok::kw___declspec) ||
1073       FormatTok->Tok.is(tok::kw_alignas)) {
1074     nextToken();
1075     // We can have macros or attributes in between 'class' and the class name.
1076     if (FormatTok->Tok.is(tok::l_paren)) {
1077       parseParens();
1078     }
1079     // The actual identifier can be a nested name specifier, and in macros
1080     // it is often token-pasted.
1081     while (FormatTok->Tok.is(tok::identifier) ||
1082            FormatTok->Tok.is(tok::coloncolon) ||
1083            FormatTok->Tok.is(tok::hashhash))
1084       nextToken();
1085 
1086     // Note that parsing away template declarations here leads to incorrectly
1087     // accepting function declarations as record declarations.
1088     // In general, we cannot solve this problem. Consider:
1089     // class A<int> B() {}
1090     // which can be a function definition or a class definition when B() is a
1091     // macro. If we find enough real-world cases where this is a problem, we
1092     // can parse for the 'template' keyword in the beginning of the statement,
1093     // and thus rule out the record production in case there is no template
1094     // (this would still leave us with an ambiguity between template function
1095     // and class declarations).
1096     if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
1097       while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
1098         if (FormatTok->Tok.is(tok::semi))
1099           return;
1100         nextToken();
1101       }
1102     }
1103   }
1104   if (FormatTok->Tok.is(tok::l_brace)) {
1105     if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
1106         Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1107       addUnwrappedLine();
1108 
1109     parseBlock(/*MustBeDeclaration=*/true);
1110   }
1111   // We fall through to parsing a structural element afterwards, so
1112   // class A {} n, m;
1113   // will end up in one unwrapped line.
1114 }
1115 
1116 void UnwrappedLineParser::parseObjCProtocolList() {
1117   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1118   do
1119     nextToken();
1120   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1121   nextToken(); // Skip '>'.
1122 }
1123 
1124 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1125   do {
1126     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1127       nextToken();
1128       addUnwrappedLine();
1129       break;
1130     }
1131     if (FormatTok->is(tok::l_brace)) {
1132       parseBlock(/*MustBeDeclaration=*/false);
1133       // In ObjC interfaces, nothing should be following the "}".
1134       addUnwrappedLine();
1135     } else {
1136       parseStructuralElement();
1137     }
1138   } while (!eof());
1139 }
1140 
1141 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1142   nextToken();
1143   nextToken(); // interface name
1144 
1145   // @interface can be followed by either a base class, or a category.
1146   if (FormatTok->Tok.is(tok::colon)) {
1147     nextToken();
1148     nextToken(); // base class name
1149   } else if (FormatTok->Tok.is(tok::l_paren))
1150     // Skip category, if present.
1151     parseParens();
1152 
1153   if (FormatTok->Tok.is(tok::less))
1154     parseObjCProtocolList();
1155 
1156   // If instance variables are present, keep the '{' on the first line too.
1157   if (FormatTok->Tok.is(tok::l_brace))
1158     parseBlock(/*MustBeDeclaration=*/true);
1159 
1160   // With instance variables, this puts '}' on its own line.  Without instance
1161   // variables, this ends the @interface line.
1162   addUnwrappedLine();
1163 
1164   parseObjCUntilAtEnd();
1165 }
1166 
1167 void UnwrappedLineParser::parseObjCProtocol() {
1168   nextToken();
1169   nextToken(); // protocol name
1170 
1171   if (FormatTok->Tok.is(tok::less))
1172     parseObjCProtocolList();
1173 
1174   // Check for protocol declaration.
1175   if (FormatTok->Tok.is(tok::semi)) {
1176     nextToken();
1177     return addUnwrappedLine();
1178   }
1179 
1180   addUnwrappedLine();
1181   parseObjCUntilAtEnd();
1182 }
1183 
1184 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1185                                                  StringRef Prefix = "") {
1186   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1187                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1188   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1189                                                     E = Line.Tokens.end();
1190        I != E; ++I) {
1191     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1192   }
1193   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1194                                                     E = Line.Tokens.end();
1195        I != E; ++I) {
1196     const UnwrappedLineNode &Node = *I;
1197     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1198              I = Node.Children.begin(),
1199              E = Node.Children.end();
1200          I != E; ++I) {
1201       printDebugInfo(*I, "\nChild: ");
1202     }
1203   }
1204   llvm::dbgs() << "\n";
1205 }
1206 
1207 void UnwrappedLineParser::addUnwrappedLine() {
1208   if (Line->Tokens.empty())
1209     return;
1210   DEBUG({
1211     if (CurrentLines == &Lines)
1212       printDebugInfo(*Line);
1213   });
1214   CurrentLines->push_back(*Line);
1215   Line->Tokens.clear();
1216   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1217     for (SmallVectorImpl<UnwrappedLine>::iterator
1218              I = PreprocessorDirectives.begin(),
1219              E = PreprocessorDirectives.end();
1220          I != E; ++I) {
1221       CurrentLines->push_back(*I);
1222     }
1223     PreprocessorDirectives.clear();
1224   }
1225 }
1226 
1227 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1228 
1229 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1230   bool JustComments = Line->Tokens.empty();
1231   for (SmallVectorImpl<FormatToken *>::const_iterator
1232            I = CommentsBeforeNextToken.begin(),
1233            E = CommentsBeforeNextToken.end();
1234        I != E; ++I) {
1235     if ((*I)->NewlinesBefore && JustComments) {
1236       addUnwrappedLine();
1237     }
1238     pushToken(*I);
1239   }
1240   if (NewlineBeforeNext && JustComments) {
1241     addUnwrappedLine();
1242   }
1243   CommentsBeforeNextToken.clear();
1244 }
1245 
1246 void UnwrappedLineParser::nextToken() {
1247   if (eof())
1248     return;
1249   flushComments(FormatTok->NewlinesBefore > 0);
1250   pushToken(FormatTok);
1251   readToken();
1252 }
1253 
1254 void UnwrappedLineParser::readToken() {
1255   bool CommentsInCurrentLine = true;
1256   do {
1257     FormatTok = Tokens->getNextToken();
1258     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1259            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1260       // If there is an unfinished unwrapped line, we flush the preprocessor
1261       // directives only after that unwrapped line was finished later.
1262       bool SwitchToPreprocessorLines =
1263           !Line->Tokens.empty() && CurrentLines == &Lines;
1264       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1265       // Comments stored before the preprocessor directive need to be output
1266       // before the preprocessor directive, at the same level as the
1267       // preprocessor directive, as we consider them to apply to the directive.
1268       flushComments(FormatTok->NewlinesBefore > 0);
1269       parsePPDirective();
1270     }
1271 
1272     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1273         !Line->InPPDirective) {
1274       continue;
1275     }
1276 
1277     if (!FormatTok->Tok.is(tok::comment))
1278       return;
1279     if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) {
1280       CommentsInCurrentLine = false;
1281     }
1282     if (CommentsInCurrentLine) {
1283       pushToken(FormatTok);
1284     } else {
1285       CommentsBeforeNextToken.push_back(FormatTok);
1286     }
1287   } while (!eof());
1288 }
1289 
1290 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1291   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1292   if (MustBreakBeforeNextToken) {
1293     Line->Tokens.back().Tok->MustBreakBefore = true;
1294     MustBreakBeforeNextToken = false;
1295   }
1296 }
1297 
1298 } // end namespace format
1299 } // end namespace clang
1300