1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-parser"
17 
18 #include "UnwrappedLineParser.h"
19 #include "llvm/Support/Debug.h"
20 
21 namespace clang {
22 namespace format {
23 
24 class FormatTokenSource {
25 public:
26   virtual ~FormatTokenSource() {}
27   virtual FormatToken *getNextToken() = 0;
28 
29   virtual unsigned getPosition() = 0;
30   virtual FormatToken *setPosition(unsigned Position) = 0;
31 };
32 
33 namespace {
34 
35 class ScopedDeclarationState {
36 public:
37   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
38                          bool MustBeDeclaration)
39       : Line(Line), Stack(Stack) {
40     Line.MustBeDeclaration = MustBeDeclaration;
41     Stack.push_back(MustBeDeclaration);
42   }
43   ~ScopedDeclarationState() {
44     Stack.pop_back();
45     if (!Stack.empty())
46       Line.MustBeDeclaration = Stack.back();
47     else
48       Line.MustBeDeclaration = true;
49   }
50 
51 private:
52   UnwrappedLine &Line;
53   std::vector<bool> &Stack;
54 };
55 
56 class ScopedMacroState : public FormatTokenSource {
57 public:
58   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
59                    FormatToken *&ResetToken, bool &StructuralError)
60       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
61         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
62         StructuralError(StructuralError),
63         PreviousStructuralError(StructuralError), Token(NULL) {
64     TokenSource = this;
65     Line.Level = 0;
66     Line.InPPDirective = true;
67   }
68 
69   ~ScopedMacroState() {
70     TokenSource = PreviousTokenSource;
71     ResetToken = Token;
72     Line.InPPDirective = false;
73     Line.Level = PreviousLineLevel;
74     StructuralError = PreviousStructuralError;
75   }
76 
77   virtual FormatToken *getNextToken() {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); }
88 
89   virtual FormatToken *setPosition(unsigned Position) {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113   bool &StructuralError;
114   bool PreviousStructuralError;
115 
116   FormatToken *Token;
117 };
118 
119 } // end anonymous namespace
120 
121 class ScopedLineState {
122 public:
123   ScopedLineState(UnwrappedLineParser &Parser,
124                   bool SwitchToPreprocessorLines = false)
125       : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) {
126     if (SwitchToPreprocessorLines)
127       Parser.CurrentLines = &Parser.PreprocessorDirectives;
128     PreBlockLine = Parser.Line.take();
129     Parser.Line.reset(new UnwrappedLine());
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line.reset(PreBlockLine);
140     Parser.MustBreakBeforeNextToken = true;
141     if (SwitchToPreprocessorLines)
142       Parser.CurrentLines = &Parser.Lines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147   const bool SwitchToPreprocessorLines;
148 
149   UnwrappedLine *PreBlockLine;
150 };
151 
152 namespace {
153 
154 class IndexedTokenSource : public FormatTokenSource {
155 public:
156   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
157       : Tokens(Tokens), Position(-1) {}
158 
159   virtual FormatToken *getNextToken() {
160     ++Position;
161     return Tokens[Position];
162   }
163 
164   virtual unsigned getPosition() {
165     assert(Position >= 0);
166     return Position;
167   }
168 
169   virtual FormatToken *setPosition(unsigned P) {
170     Position = P;
171     return Tokens[Position];
172   }
173 
174 private:
175   ArrayRef<FormatToken *> Tokens;
176   int Position;
177 };
178 
179 } // end anonymous namespace
180 
181 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
182                                          ArrayRef<FormatToken *> Tokens,
183                                          UnwrappedLineConsumer &Callback)
184     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
185       CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL),
186       Callback(Callback), AllTokens(Tokens) {}
187 
188 bool UnwrappedLineParser::parse() {
189   DEBUG(llvm::dbgs() << "----\n");
190   IndexedTokenSource TokenSource(AllTokens);
191   Tokens = &TokenSource;
192   readToken();
193   parseFile();
194   for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end();
195        I != E; ++I) {
196     Callback.consumeUnwrappedLine(*I);
197   }
198 
199   // Create line with eof token.
200   pushToken(FormatTok);
201   Callback.consumeUnwrappedLine(*Line);
202   return StructuralError;
203 }
204 
205 void UnwrappedLineParser::parseFile() {
206   ScopedDeclarationState DeclarationState(
207       *Line, DeclarationScopeStack,
208       /*MustBeDeclaration=*/ !Line->InPPDirective);
209   parseLevel(/*HasOpeningBrace=*/false);
210   // Make sure to format the remaining tokens.
211   flushComments(true);
212   addUnwrappedLine();
213 }
214 
215 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
216   bool SwitchLabelEncountered = false;
217   do {
218     switch (FormatTok->Tok.getKind()) {
219     case tok::comment:
220       nextToken();
221       addUnwrappedLine();
222       break;
223     case tok::l_brace:
224       // FIXME: Add parameter whether this can happen - if this happens, we must
225       // be in a non-declaration context.
226       parseBlock(/*MustBeDeclaration=*/false);
227       addUnwrappedLine();
228       break;
229     case tok::r_brace:
230       if (HasOpeningBrace)
231         return;
232       StructuralError = true;
233       nextToken();
234       addUnwrappedLine();
235       break;
236     case tok::kw_default:
237     case tok::kw_case:
238       if (!SwitchLabelEncountered &&
239           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
240         ++Line->Level;
241       SwitchLabelEncountered = true;
242       parseStructuralElement();
243       break;
244     default:
245       parseStructuralElement();
246       break;
247     }
248   } while (!eof());
249 }
250 
251 void UnwrappedLineParser::calculateBraceTypes() {
252   // We'll parse forward through the tokens until we hit
253   // a closing brace or eof - note that getNextToken() will
254   // parse macros, so this will magically work inside macro
255   // definitions, too.
256   unsigned StoredPosition = Tokens->getPosition();
257   unsigned Position = StoredPosition;
258   FormatToken *Tok = FormatTok;
259   // Keep a stack of positions of lbrace tokens. We will
260   // update information about whether an lbrace starts a
261   // braced init list or a different block during the loop.
262   SmallVector<FormatToken *, 8> LBraceStack;
263   assert(Tok->Tok.is(tok::l_brace));
264   do {
265     // Get next none-comment token.
266     FormatToken *NextTok;
267     unsigned ReadTokens = 0;
268     do {
269       NextTok = Tokens->getNextToken();
270       ++ReadTokens;
271     } while (NextTok->is(tok::comment));
272 
273     switch (Tok->Tok.getKind()) {
274     case tok::l_brace:
275       LBraceStack.push_back(Tok);
276       break;
277     case tok::r_brace:
278       if (!LBraceStack.empty()) {
279         if (LBraceStack.back()->BlockKind == BK_Unknown) {
280           // If there is a comma, semicolon or right paren after the closing
281           // brace, we assume this is a braced initializer list.  Note that
282           // regardless how we mark inner braces here, we will overwrite the
283           // BlockKind later if we parse a braced list (where all blocks inside
284           // are by default braced lists), or when we explicitly detect blocks
285           // (for example while parsing lambdas).
286           //
287           // We exclude + and - as they can be ObjC visibility modifiers.
288           if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren,
289                                tok::l_brace, tok::colon) ||
290               (NextTok->isBinaryOperator() &&
291                !NextTok->isOneOf(tok::plus, tok::minus))) {
292             Tok->BlockKind = BK_BracedInit;
293             LBraceStack.back()->BlockKind = BK_BracedInit;
294           } else {
295             Tok->BlockKind = BK_Block;
296             LBraceStack.back()->BlockKind = BK_Block;
297           }
298         }
299         LBraceStack.pop_back();
300       }
301       break;
302     case tok::semi:
303     case tok::kw_if:
304     case tok::kw_while:
305     case tok::kw_for:
306     case tok::kw_switch:
307     case tok::kw_try:
308       if (!LBraceStack.empty())
309         LBraceStack.back()->BlockKind = BK_Block;
310       break;
311     default:
312       break;
313     }
314     Tok = NextTok;
315     Position += ReadTokens;
316   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
317   // Assume other blocks for all unclosed opening braces.
318   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
319     if (LBraceStack[i]->BlockKind == BK_Unknown)
320       LBraceStack[i]->BlockKind = BK_Block;
321   }
322 
323   FormatTok = Tokens->setPosition(StoredPosition);
324 }
325 
326 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel) {
327   assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
328   unsigned InitialLevel = Line->Level;
329   nextToken();
330 
331   addUnwrappedLine();
332 
333   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
334                                           MustBeDeclaration);
335   if (AddLevel)
336     ++Line->Level;
337   parseLevel(/*HasOpeningBrace=*/true);
338 
339   if (!FormatTok->Tok.is(tok::r_brace)) {
340     Line->Level = InitialLevel;
341     StructuralError = true;
342     return;
343   }
344 
345   nextToken(); // Munch the closing brace.
346   Line->Level = InitialLevel;
347 }
348 
349 void UnwrappedLineParser::parseChildBlock() {
350   FormatTok->BlockKind = BK_Block;
351   nextToken();
352   {
353     ScopedLineState LineState(*this);
354     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
355                                             /*MustBeDeclaration=*/false);
356     Line->Level += 1;
357     parseLevel(/*HasOpeningBrace=*/true);
358     Line->Level -= 1;
359   }
360   nextToken();
361 }
362 
363 void UnwrappedLineParser::parsePPDirective() {
364   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
365   ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
366   nextToken();
367 
368   if (FormatTok->Tok.getIdentifierInfo() == NULL) {
369     parsePPUnknown();
370     return;
371   }
372 
373   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
374   case tok::pp_define:
375     parsePPDefine();
376     return;
377   case tok::pp_if:
378     parsePPIf();
379     break;
380   case tok::pp_ifdef:
381   case tok::pp_ifndef:
382     parsePPIfdef();
383     break;
384   case tok::pp_else:
385     parsePPElse();
386     break;
387   case tok::pp_elif:
388     parsePPElIf();
389     break;
390   case tok::pp_endif:
391     parsePPEndIf();
392     break;
393   default:
394     parsePPUnknown();
395     break;
396   }
397 }
398 
399 void UnwrappedLineParser::pushPPConditional() {
400   if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
401     PPStack.push_back(PP_Unreachable);
402   else
403     PPStack.push_back(PP_Conditional);
404 }
405 
406 void UnwrappedLineParser::parsePPIf() {
407   nextToken();
408   if ((FormatTok->Tok.isLiteral() &&
409        StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) ==
410            "0") ||
411       FormatTok->Tok.is(tok::kw_false)) {
412     PPStack.push_back(PP_Unreachable);
413   } else {
414     pushPPConditional();
415   }
416   parsePPUnknown();
417 }
418 
419 void UnwrappedLineParser::parsePPIfdef() {
420   pushPPConditional();
421   parsePPUnknown();
422 }
423 
424 void UnwrappedLineParser::parsePPElse() {
425   if (!PPStack.empty())
426     PPStack.pop_back();
427   pushPPConditional();
428   parsePPUnknown();
429 }
430 
431 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
432 
433 void UnwrappedLineParser::parsePPEndIf() {
434   if (!PPStack.empty())
435     PPStack.pop_back();
436   parsePPUnknown();
437 }
438 
439 void UnwrappedLineParser::parsePPDefine() {
440   nextToken();
441 
442   if (FormatTok->Tok.getKind() != tok::identifier) {
443     parsePPUnknown();
444     return;
445   }
446   nextToken();
447   if (FormatTok->Tok.getKind() == tok::l_paren &&
448       FormatTok->WhitespaceRange.getBegin() ==
449           FormatTok->WhitespaceRange.getEnd()) {
450     parseParens();
451   }
452   addUnwrappedLine();
453   Line->Level = 1;
454 
455   // Errors during a preprocessor directive can only affect the layout of the
456   // preprocessor directive, and thus we ignore them. An alternative approach
457   // would be to use the same approach we use on the file level (no
458   // re-indentation if there was a structural error) within the macro
459   // definition.
460   parseFile();
461 }
462 
463 void UnwrappedLineParser::parsePPUnknown() {
464   do {
465     nextToken();
466   } while (!eof());
467   addUnwrappedLine();
468 }
469 
470 // Here we blacklist certain tokens that are not usually the first token in an
471 // unwrapped line. This is used in attempt to distinguish macro calls without
472 // trailing semicolons from other constructs split to several lines.
473 bool tokenCanStartNewLine(clang::Token Tok) {
474   // Semicolon can be a null-statement, l_square can be a start of a macro or
475   // a C++11 attribute, but this doesn't seem to be common.
476   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
477          Tok.isNot(tok::l_square) &&
478          // Tokens that can only be used as binary operators and a part of
479          // overloaded operator names.
480          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
481          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
482          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
483          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
484          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
485          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
486          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
487          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
488          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
489          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
490          Tok.isNot(tok::lesslessequal) &&
491          // Colon is used in labels, base class lists, initializer lists,
492          // range-based for loops, ternary operator, but should never be the
493          // first token in an unwrapped line.
494          Tok.isNot(tok::colon);
495 }
496 
497 void UnwrappedLineParser::parseStructuralElement() {
498   assert(!FormatTok->Tok.is(tok::l_brace));
499   switch (FormatTok->Tok.getKind()) {
500   case tok::at:
501     nextToken();
502     if (FormatTok->Tok.is(tok::l_brace)) {
503       parseBracedList();
504       break;
505     }
506     switch (FormatTok->Tok.getObjCKeywordID()) {
507     case tok::objc_public:
508     case tok::objc_protected:
509     case tok::objc_package:
510     case tok::objc_private:
511       return parseAccessSpecifier();
512     case tok::objc_interface:
513     case tok::objc_implementation:
514       return parseObjCInterfaceOrImplementation();
515     case tok::objc_protocol:
516       return parseObjCProtocol();
517     case tok::objc_end:
518       return; // Handled by the caller.
519     case tok::objc_optional:
520     case tok::objc_required:
521       nextToken();
522       addUnwrappedLine();
523       return;
524     default:
525       break;
526     }
527     break;
528   case tok::kw_namespace:
529     parseNamespace();
530     return;
531   case tok::kw_inline:
532     nextToken();
533     if (FormatTok->Tok.is(tok::kw_namespace)) {
534       parseNamespace();
535       return;
536     }
537     break;
538   case tok::kw_public:
539   case tok::kw_protected:
540   case tok::kw_private:
541     parseAccessSpecifier();
542     return;
543   case tok::kw_if:
544     parseIfThenElse();
545     return;
546   case tok::kw_for:
547   case tok::kw_while:
548     parseForOrWhileLoop();
549     return;
550   case tok::kw_do:
551     parseDoWhile();
552     return;
553   case tok::kw_switch:
554     parseSwitch();
555     return;
556   case tok::kw_default:
557     nextToken();
558     parseLabel();
559     return;
560   case tok::kw_case:
561     parseCaseLabel();
562     return;
563   case tok::kw_return:
564     parseReturn();
565     return;
566   case tok::kw_extern:
567     nextToken();
568     if (FormatTok->Tok.is(tok::string_literal)) {
569       nextToken();
570       if (FormatTok->Tok.is(tok::l_brace)) {
571         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
572         addUnwrappedLine();
573         return;
574       }
575     }
576     // In all other cases, parse the declaration.
577     break;
578   default:
579     break;
580   }
581   do {
582     switch (FormatTok->Tok.getKind()) {
583     case tok::at:
584       nextToken();
585       if (FormatTok->Tok.is(tok::l_brace))
586         parseBracedList();
587       break;
588     case tok::kw_enum:
589       parseEnum();
590       break;
591     case tok::kw_struct:
592     case tok::kw_union:
593     case tok::kw_class:
594       parseRecord();
595       // A record declaration or definition is always the start of a structural
596       // element.
597       break;
598     case tok::semi:
599       nextToken();
600       addUnwrappedLine();
601       return;
602     case tok::r_brace:
603       addUnwrappedLine();
604       return;
605     case tok::l_paren:
606       parseParens();
607       break;
608     case tok::caret:
609       nextToken();
610       if (FormatTok->is(tok::l_brace)) {
611         parseChildBlock();
612       }
613       break;
614     case tok::l_brace:
615       if (!tryToParseBracedList()) {
616         // A block outside of parentheses must be the last part of a
617         // structural element.
618         // FIXME: Figure out cases where this is not true, and add projections
619         // for them (the one we know is missing are lambdas).
620         if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
621             Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup ||
622             Style.BreakBeforeBraces == FormatStyle::BS_Allman)
623           addUnwrappedLine();
624         parseBlock(/*MustBeDeclaration=*/false);
625         addUnwrappedLine();
626         return;
627       }
628       // Otherwise this was a braced init list, and the structural
629       // element continues.
630       break;
631     case tok::identifier: {
632       StringRef Text = FormatTok->TokenText;
633       nextToken();
634       if (Line->Tokens.size() == 1) {
635         if (FormatTok->Tok.is(tok::colon)) {
636           parseLabel();
637           return;
638         }
639         // Recognize function-like macro usages without trailing semicolon.
640         if (FormatTok->Tok.is(tok::l_paren)) {
641           parseParens();
642           if (FormatTok->HasUnescapedNewline &&
643               tokenCanStartNewLine(FormatTok->Tok)) {
644             addUnwrappedLine();
645             return;
646           }
647         } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 &&
648                    Text == Text.upper()) {
649           // Recognize free-standing macros like Q_OBJECT.
650           addUnwrappedLine();
651           return;
652         }
653       }
654       break;
655     }
656     case tok::equal:
657       nextToken();
658       if (FormatTok->Tok.is(tok::l_brace)) {
659         parseBracedList();
660       }
661       break;
662     case tok::l_square:
663       tryToParseLambda();
664       break;
665     default:
666       nextToken();
667       break;
668     }
669   } while (!eof());
670 }
671 
672 void UnwrappedLineParser::tryToParseLambda() {
673   if (!tryToParseLambdaIntroducer()) {
674     return;
675   }
676   if (FormatTok->is(tok::l_paren)) {
677     parseParens();
678   }
679 
680   while (FormatTok->isNot(tok::l_brace)) {
681     switch (FormatTok->Tok.getKind()) {
682       case tok::l_brace:
683         break;
684         return;
685       case tok::l_paren:
686         parseParens();
687         break;
688       case tok::semi:
689       case tok::equal:
690       case tok::eof:
691         return;
692       default:
693         nextToken();
694         break;
695     }
696   }
697   parseChildBlock();
698 }
699 
700 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
701   nextToken();
702   if (FormatTok->is(tok::equal)) {
703     nextToken();
704     if (FormatTok->is(tok::r_square)) return true;
705     if (FormatTok->isNot(tok::comma)) return false;
706     nextToken();
707   } else if (FormatTok->is(tok::amp)) {
708     nextToken();
709     if (FormatTok->is(tok::r_square)) return true;
710     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
711       return false;
712     }
713     if (FormatTok->is(tok::comma)) nextToken();
714   } else if (FormatTok->is(tok::r_square)) {
715     nextToken();
716     return true;
717   }
718   do {
719     if (FormatTok->is(tok::amp)) nextToken();
720     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this)) return false;
721     nextToken();
722     if (FormatTok->is(tok::comma)) {
723       nextToken();
724     } else if (FormatTok->is(tok::r_square)) {
725       nextToken();
726       return true;
727     } else {
728       return false;
729     }
730   } while (!eof());
731   return false;
732 }
733 
734 bool UnwrappedLineParser::tryToParseBracedList() {
735   if (FormatTok->BlockKind == BK_Unknown)
736     calculateBraceTypes();
737   assert(FormatTok->BlockKind != BK_Unknown);
738   if (FormatTok->BlockKind == BK_Block)
739     return false;
740   parseBracedList();
741   return true;
742 }
743 
744 void UnwrappedLineParser::parseBracedList() {
745   nextToken();
746 
747   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
748   // replace this by using parseAssigmentExpression() inside.
749   do {
750     // FIXME: When we start to support lambdas, we'll want to parse them away
751     // here, otherwise our bail-out scenarios below break. The better solution
752     // might be to just implement a more or less complete expression parser.
753     switch (FormatTok->Tok.getKind()) {
754     case tok::caret:
755       nextToken();
756       if (FormatTok->is(tok::l_brace)) {
757         parseChildBlock();
758       }
759       break;
760     case tok::l_square:
761       tryToParseLambda();
762       break;
763     case tok::l_brace:
764       // Assume there are no blocks inside a braced init list apart
765       // from the ones we explicitly parse out (like lambdas).
766       FormatTok->BlockKind = BK_BracedInit;
767       parseBracedList();
768       break;
769     case tok::r_brace:
770       nextToken();
771       return;
772     case tok::semi:
773       // Probably a missing closing brace. Bail out.
774       return;
775     case tok::comma:
776       nextToken();
777       break;
778     default:
779       nextToken();
780       break;
781     }
782   } while (!eof());
783 }
784 
785 void UnwrappedLineParser::parseReturn() {
786   nextToken();
787 
788   do {
789     switch (FormatTok->Tok.getKind()) {
790     case tok::l_brace:
791       parseBracedList();
792       if (FormatTok->Tok.isNot(tok::semi)) {
793         // Assume missing ';'.
794         addUnwrappedLine();
795         return;
796       }
797       break;
798     case tok::l_paren:
799       parseParens();
800       break;
801     case tok::r_brace:
802       // Assume missing ';'.
803       addUnwrappedLine();
804       return;
805     case tok::semi:
806       nextToken();
807       addUnwrappedLine();
808       return;
809     case tok::l_square:
810       tryToParseLambda();
811       break;
812     default:
813       nextToken();
814       break;
815     }
816   } while (!eof());
817 }
818 
819 void UnwrappedLineParser::parseParens() {
820   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
821   nextToken();
822   do {
823     switch (FormatTok->Tok.getKind()) {
824     case tok::l_paren:
825       parseParens();
826       break;
827     case tok::r_paren:
828       nextToken();
829       return;
830     case tok::r_brace:
831       // A "}" inside parenthesis is an error if there wasn't a matching "{".
832       return;
833     case tok::l_brace: {
834       if (!tryToParseBracedList()) {
835         parseChildBlock();
836       }
837       break;
838     }
839     case tok::at:
840       nextToken();
841       if (FormatTok->Tok.is(tok::l_brace))
842         parseBracedList();
843       break;
844     default:
845       nextToken();
846       break;
847     }
848   } while (!eof());
849 }
850 
851 void UnwrappedLineParser::parseIfThenElse() {
852   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
853   nextToken();
854   if (FormatTok->Tok.is(tok::l_paren))
855     parseParens();
856   bool NeedsUnwrappedLine = false;
857   if (FormatTok->Tok.is(tok::l_brace)) {
858     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
859       addUnwrappedLine();
860     parseBlock(/*MustBeDeclaration=*/false);
861     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
862       addUnwrappedLine();
863     else
864       NeedsUnwrappedLine = true;
865   } else {
866     addUnwrappedLine();
867     ++Line->Level;
868     parseStructuralElement();
869     --Line->Level;
870   }
871   if (FormatTok->Tok.is(tok::kw_else)) {
872     nextToken();
873     if (FormatTok->Tok.is(tok::l_brace)) {
874       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
875         addUnwrappedLine();
876       parseBlock(/*MustBeDeclaration=*/false);
877       addUnwrappedLine();
878     } else if (FormatTok->Tok.is(tok::kw_if)) {
879       parseIfThenElse();
880     } else {
881       addUnwrappedLine();
882       ++Line->Level;
883       parseStructuralElement();
884       --Line->Level;
885     }
886   } else if (NeedsUnwrappedLine) {
887     addUnwrappedLine();
888   }
889 }
890 
891 void UnwrappedLineParser::parseNamespace() {
892   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
893   nextToken();
894   if (FormatTok->Tok.is(tok::identifier))
895     nextToken();
896   if (FormatTok->Tok.is(tok::l_brace)) {
897     if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
898         Style.BreakBeforeBraces == FormatStyle::BS_Allman)
899       addUnwrappedLine();
900 
901     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
902                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
903                      DeclarationScopeStack.size() > 1);
904     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
905     // Munch the semicolon after a namespace. This is more common than one would
906     // think. Puttin the semicolon into its own line is very ugly.
907     if (FormatTok->Tok.is(tok::semi))
908       nextToken();
909     addUnwrappedLine();
910   }
911   // FIXME: Add error handling.
912 }
913 
914 void UnwrappedLineParser::parseForOrWhileLoop() {
915   assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) &&
916          "'for' or 'while' expected");
917   nextToken();
918   if (FormatTok->Tok.is(tok::l_paren))
919     parseParens();
920   if (FormatTok->Tok.is(tok::l_brace)) {
921     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
922       addUnwrappedLine();
923     parseBlock(/*MustBeDeclaration=*/false);
924     addUnwrappedLine();
925   } else {
926     addUnwrappedLine();
927     ++Line->Level;
928     parseStructuralElement();
929     --Line->Level;
930   }
931 }
932 
933 void UnwrappedLineParser::parseDoWhile() {
934   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
935   nextToken();
936   if (FormatTok->Tok.is(tok::l_brace)) {
937     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
938       addUnwrappedLine();
939     parseBlock(/*MustBeDeclaration=*/false);
940   } else {
941     addUnwrappedLine();
942     ++Line->Level;
943     parseStructuralElement();
944     --Line->Level;
945   }
946 
947   // FIXME: Add error handling.
948   if (!FormatTok->Tok.is(tok::kw_while)) {
949     addUnwrappedLine();
950     return;
951   }
952 
953   nextToken();
954   parseStructuralElement();
955 }
956 
957 void UnwrappedLineParser::parseLabel() {
958   nextToken();
959   unsigned OldLineLevel = Line->Level;
960   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
961     --Line->Level;
962   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
963     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
964       addUnwrappedLine();
965     parseBlock(/*MustBeDeclaration=*/false);
966     if (FormatTok->Tok.is(tok::kw_break)) {
967       // "break;" after "}" on its own line only for BS_Allman
968       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
969         addUnwrappedLine();
970       parseStructuralElement();
971     }
972   }
973   addUnwrappedLine();
974   Line->Level = OldLineLevel;
975 }
976 
977 void UnwrappedLineParser::parseCaseLabel() {
978   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
979   // FIXME: fix handling of complex expressions here.
980   do {
981     nextToken();
982   } while (!eof() && !FormatTok->Tok.is(tok::colon));
983   parseLabel();
984 }
985 
986 void UnwrappedLineParser::parseSwitch() {
987   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
988   nextToken();
989   if (FormatTok->Tok.is(tok::l_paren))
990     parseParens();
991   if (FormatTok->Tok.is(tok::l_brace)) {
992     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
993       addUnwrappedLine();
994     parseBlock(/*MustBeDeclaration=*/false);
995     addUnwrappedLine();
996   } else {
997     addUnwrappedLine();
998     ++Line->Level;
999     parseStructuralElement();
1000     --Line->Level;
1001   }
1002 }
1003 
1004 void UnwrappedLineParser::parseAccessSpecifier() {
1005   nextToken();
1006   // Otherwise, we don't know what it is, and we'd better keep the next token.
1007   if (FormatTok->Tok.is(tok::colon))
1008     nextToken();
1009   addUnwrappedLine();
1010 }
1011 
1012 void UnwrappedLineParser::parseEnum() {
1013   nextToken();
1014   // Eat up enum class ...
1015   if (FormatTok->Tok.is(tok::kw_class) ||
1016       FormatTok->Tok.is(tok::kw_struct))
1017       nextToken();
1018   if (FormatTok->Tok.is(tok::identifier) ||
1019       FormatTok->Tok.is(tok::kw___attribute) ||
1020       FormatTok->Tok.is(tok::kw___declspec)) {
1021     nextToken();
1022     // We can have macros or attributes in between 'enum' and the enum name.
1023     if (FormatTok->Tok.is(tok::l_paren)) {
1024       parseParens();
1025     }
1026     if (FormatTok->Tok.is(tok::identifier))
1027       nextToken();
1028   }
1029   bool HasError = false;
1030   if (FormatTok->Tok.is(tok::l_brace)) {
1031     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1032       addUnwrappedLine();
1033     nextToken();
1034     addUnwrappedLine();
1035     ++Line->Level;
1036     do {
1037       switch (FormatTok->Tok.getKind()) {
1038       case tok::l_paren:
1039         parseParens();
1040         break;
1041       case tok::r_brace:
1042         addUnwrappedLine();
1043         nextToken();
1044         --Line->Level;
1045         if (HasError) {
1046           if (FormatTok->is(tok::semi))
1047             nextToken();
1048           addUnwrappedLine();
1049         }
1050         return;
1051       case tok::semi:
1052         HasError = true;
1053         nextToken();
1054         addUnwrappedLine();
1055         break;
1056       case tok::comma:
1057         nextToken();
1058         addUnwrappedLine();
1059         break;
1060       default:
1061         nextToken();
1062         break;
1063       }
1064     } while (!eof());
1065   }
1066   // We fall through to parsing a structural element afterwards, so that in
1067   // enum A {} n, m;
1068   // "} n, m;" will end up in one unwrapped line.
1069 }
1070 
1071 void UnwrappedLineParser::parseRecord() {
1072   nextToken();
1073   if (FormatTok->Tok.is(tok::identifier) ||
1074       FormatTok->Tok.is(tok::kw___attribute) ||
1075       FormatTok->Tok.is(tok::kw___declspec)) {
1076     nextToken();
1077     // We can have macros or attributes in between 'class' and the class name.
1078     if (FormatTok->Tok.is(tok::l_paren)) {
1079       parseParens();
1080     }
1081     // The actual identifier can be a nested name specifier, and in macros
1082     // it is often token-pasted.
1083     while (FormatTok->Tok.is(tok::identifier) ||
1084            FormatTok->Tok.is(tok::coloncolon) ||
1085            FormatTok->Tok.is(tok::hashhash))
1086       nextToken();
1087 
1088     // Note that parsing away template declarations here leads to incorrectly
1089     // accepting function declarations as record declarations.
1090     // In general, we cannot solve this problem. Consider:
1091     // class A<int> B() {}
1092     // which can be a function definition or a class definition when B() is a
1093     // macro. If we find enough real-world cases where this is a problem, we
1094     // can parse for the 'template' keyword in the beginning of the statement,
1095     // and thus rule out the record production in case there is no template
1096     // (this would still leave us with an ambiguity between template function
1097     // and class declarations).
1098     if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
1099       while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
1100         if (FormatTok->Tok.is(tok::semi))
1101           return;
1102         nextToken();
1103       }
1104     }
1105   }
1106   if (FormatTok->Tok.is(tok::l_brace)) {
1107     if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
1108         Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1109       addUnwrappedLine();
1110 
1111     parseBlock(/*MustBeDeclaration=*/true);
1112   }
1113   // We fall through to parsing a structural element afterwards, so
1114   // class A {} n, m;
1115   // will end up in one unwrapped line.
1116 }
1117 
1118 void UnwrappedLineParser::parseObjCProtocolList() {
1119   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1120   do
1121     nextToken();
1122   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1123   nextToken(); // Skip '>'.
1124 }
1125 
1126 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1127   do {
1128     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1129       nextToken();
1130       addUnwrappedLine();
1131       break;
1132     }
1133     if (FormatTok->is(tok::l_brace)) {
1134       parseBlock(/*MustBeDeclaration=*/false);
1135       // In ObjC interfaces, nothing should be following the "}".
1136       addUnwrappedLine();
1137     } else {
1138       parseStructuralElement();
1139     }
1140   } while (!eof());
1141 }
1142 
1143 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1144   nextToken();
1145   nextToken(); // interface name
1146 
1147   // @interface can be followed by either a base class, or a category.
1148   if (FormatTok->Tok.is(tok::colon)) {
1149     nextToken();
1150     nextToken(); // base class name
1151   } else if (FormatTok->Tok.is(tok::l_paren))
1152     // Skip category, if present.
1153     parseParens();
1154 
1155   if (FormatTok->Tok.is(tok::less))
1156     parseObjCProtocolList();
1157 
1158   // If instance variables are present, keep the '{' on the first line too.
1159   if (FormatTok->Tok.is(tok::l_brace))
1160     parseBlock(/*MustBeDeclaration=*/true);
1161 
1162   // With instance variables, this puts '}' on its own line.  Without instance
1163   // variables, this ends the @interface line.
1164   addUnwrappedLine();
1165 
1166   parseObjCUntilAtEnd();
1167 }
1168 
1169 void UnwrappedLineParser::parseObjCProtocol() {
1170   nextToken();
1171   nextToken(); // protocol name
1172 
1173   if (FormatTok->Tok.is(tok::less))
1174     parseObjCProtocolList();
1175 
1176   // Check for protocol declaration.
1177   if (FormatTok->Tok.is(tok::semi)) {
1178     nextToken();
1179     return addUnwrappedLine();
1180   }
1181 
1182   addUnwrappedLine();
1183   parseObjCUntilAtEnd();
1184 }
1185 
1186 void UnwrappedLineParser::addUnwrappedLine() {
1187   if (Line->Tokens.empty())
1188     return;
1189   DEBUG({
1190     llvm::dbgs() << "Line(" << Line->Level << ")"
1191                  << (Line->InPPDirective ? " MACRO" : "") << ": ";
1192     for (std::list<FormatToken *>::iterator I = Line->Tokens.begin(),
1193                                             E = Line->Tokens.end();
1194          I != E; ++I) {
1195       llvm::dbgs() << (*I)->Tok.getName() << " ";
1196     }
1197     llvm::dbgs() << "\n";
1198   });
1199   CurrentLines->push_back(*Line);
1200   Line->Tokens.clear();
1201   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1202     for (std::vector<UnwrappedLine>::iterator
1203              I = PreprocessorDirectives.begin(),
1204              E = PreprocessorDirectives.end();
1205          I != E; ++I) {
1206       CurrentLines->push_back(*I);
1207     }
1208     PreprocessorDirectives.clear();
1209   }
1210 }
1211 
1212 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1213 
1214 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1215   bool JustComments = Line->Tokens.empty();
1216   for (SmallVectorImpl<FormatToken *>::const_iterator
1217            I = CommentsBeforeNextToken.begin(),
1218            E = CommentsBeforeNextToken.end();
1219        I != E; ++I) {
1220     if ((*I)->NewlinesBefore && JustComments) {
1221       addUnwrappedLine();
1222     }
1223     pushToken(*I);
1224   }
1225   if (NewlineBeforeNext && JustComments) {
1226     addUnwrappedLine();
1227   }
1228   CommentsBeforeNextToken.clear();
1229 }
1230 
1231 void UnwrappedLineParser::nextToken() {
1232   if (eof())
1233     return;
1234   flushComments(FormatTok->NewlinesBefore > 0);
1235   pushToken(FormatTok);
1236   readToken();
1237 }
1238 
1239 void UnwrappedLineParser::readToken() {
1240   bool CommentsInCurrentLine = true;
1241   do {
1242     FormatTok = Tokens->getNextToken();
1243     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1244            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1245       // If there is an unfinished unwrapped line, we flush the preprocessor
1246       // directives only after that unwrapped line was finished later.
1247       bool SwitchToPreprocessorLines =
1248           !Line->Tokens.empty() && CurrentLines == &Lines;
1249       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1250       // Comments stored before the preprocessor directive need to be output
1251       // before the preprocessor directive, at the same level as the
1252       // preprocessor directive, as we consider them to apply to the directive.
1253       flushComments(FormatTok->NewlinesBefore > 0);
1254       parsePPDirective();
1255     }
1256 
1257     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1258         !Line->InPPDirective) {
1259       continue;
1260     }
1261 
1262     if (!FormatTok->Tok.is(tok::comment))
1263       return;
1264     if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) {
1265       CommentsInCurrentLine = false;
1266     }
1267     if (CommentsInCurrentLine) {
1268       pushToken(FormatTok);
1269     } else {
1270       CommentsBeforeNextToken.push_back(FormatTok);
1271     }
1272   } while (!eof());
1273 }
1274 
1275 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1276   Line->Tokens.push_back(Tok);
1277   if (MustBreakBeforeNextToken) {
1278     Line->Tokens.back()->MustBreakBefore = true;
1279     MustBreakBeforeNextToken = false;
1280   }
1281 }
1282 
1283 } // end namespace format
1284 } // end namespace clang
1285