1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-parser"
17 
18 #include "UnwrappedLineParser.h"
19 #include "llvm/Support/Debug.h"
20 
21 namespace clang {
22 namespace format {
23 
24 class FormatTokenSource {
25 public:
26   virtual ~FormatTokenSource() {}
27   virtual FormatToken *getNextToken() = 0;
28 
29   virtual unsigned getPosition() = 0;
30   virtual FormatToken *setPosition(unsigned Position) = 0;
31 };
32 
33 class ScopedDeclarationState {
34 public:
35   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
36                          bool MustBeDeclaration)
37       : Line(Line), Stack(Stack) {
38     Line.MustBeDeclaration = MustBeDeclaration;
39     Stack.push_back(MustBeDeclaration);
40   }
41   ~ScopedDeclarationState() {
42     Stack.pop_back();
43     if (!Stack.empty())
44       Line.MustBeDeclaration = Stack.back();
45     else
46       Line.MustBeDeclaration = true;
47   }
48 
49 private:
50   UnwrappedLine &Line;
51   std::vector<bool> &Stack;
52 };
53 
54 class ScopedMacroState : public FormatTokenSource {
55 public:
56   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
57                    FormatToken *&ResetToken, bool &StructuralError)
58       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
59         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
60         StructuralError(StructuralError),
61         PreviousStructuralError(StructuralError), Token(NULL) {
62     TokenSource = this;
63     Line.Level = 0;
64     Line.InPPDirective = true;
65   }
66 
67   ~ScopedMacroState() {
68     TokenSource = PreviousTokenSource;
69     ResetToken = Token;
70     Line.InPPDirective = false;
71     Line.Level = PreviousLineLevel;
72     StructuralError = PreviousStructuralError;
73   }
74 
75   virtual FormatToken *getNextToken() {
76     // The \c UnwrappedLineParser guards against this by never calling
77     // \c getNextToken() after it has encountered the first eof token.
78     assert(!eof());
79     Token = PreviousTokenSource->getNextToken();
80     if (eof())
81       return getFakeEOF();
82     return Token;
83   }
84 
85   virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); }
86 
87   virtual FormatToken *setPosition(unsigned Position) {
88     Token = PreviousTokenSource->setPosition(Position);
89     return Token;
90   }
91 
92 private:
93   bool eof() { return Token && Token->HasUnescapedNewline; }
94 
95   FormatToken *getFakeEOF() {
96     static bool EOFInitialized = false;
97     static FormatToken FormatTok;
98     if (!EOFInitialized) {
99       FormatTok.Tok.startToken();
100       FormatTok.Tok.setKind(tok::eof);
101       EOFInitialized = true;
102     }
103     return &FormatTok;
104   }
105 
106   UnwrappedLine &Line;
107   FormatTokenSource *&TokenSource;
108   FormatToken *&ResetToken;
109   unsigned PreviousLineLevel;
110   FormatTokenSource *PreviousTokenSource;
111   bool &StructuralError;
112   bool PreviousStructuralError;
113 
114   FormatToken *Token;
115 };
116 
117 class ScopedLineState {
118 public:
119   ScopedLineState(UnwrappedLineParser &Parser,
120                   bool SwitchToPreprocessorLines = false)
121       : Parser(Parser), SwitchToPreprocessorLines(SwitchToPreprocessorLines) {
122     if (SwitchToPreprocessorLines)
123       Parser.CurrentLines = &Parser.PreprocessorDirectives;
124     PreBlockLine = Parser.Line.take();
125     Parser.Line.reset(new UnwrappedLine());
126     Parser.Line->Level = PreBlockLine->Level;
127     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
128   }
129 
130   ~ScopedLineState() {
131     if (!Parser.Line->Tokens.empty()) {
132       Parser.addUnwrappedLine();
133     }
134     assert(Parser.Line->Tokens.empty());
135     Parser.Line.reset(PreBlockLine);
136     Parser.MustBreakBeforeNextToken = true;
137     if (SwitchToPreprocessorLines)
138       Parser.CurrentLines = &Parser.Lines;
139   }
140 
141 private:
142   UnwrappedLineParser &Parser;
143   const bool SwitchToPreprocessorLines;
144 
145   UnwrappedLine *PreBlockLine;
146 };
147 
148 class IndexedTokenSource : public FormatTokenSource {
149 public:
150   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
151       : Tokens(Tokens), Position(-1) {}
152 
153   virtual FormatToken *getNextToken() {
154     ++Position;
155     return Tokens[Position];
156   }
157 
158   virtual unsigned getPosition() {
159     assert(Position >= 0);
160     return Position;
161   }
162 
163   virtual FormatToken *setPosition(unsigned P) {
164     Position = P;
165     return Tokens[Position];
166   }
167 
168 private:
169   ArrayRef<FormatToken *> Tokens;
170   int Position;
171 };
172 
173 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
174                                          ArrayRef<FormatToken *> Tokens,
175                                          UnwrappedLineConsumer &Callback)
176     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
177       CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL),
178       Callback(Callback), AllTokens(Tokens) {
179   LBraces.resize(Tokens.size(), BS_Unknown);
180 }
181 
182 bool UnwrappedLineParser::parse() {
183   DEBUG(llvm::dbgs() << "----\n");
184   IndexedTokenSource TokenSource(AllTokens);
185   Tokens = &TokenSource;
186   readToken();
187   parseFile();
188   for (std::vector<UnwrappedLine>::iterator I = Lines.begin(), E = Lines.end();
189        I != E; ++I) {
190     Callback.consumeUnwrappedLine(*I);
191   }
192 
193   // Create line with eof token.
194   pushToken(FormatTok);
195   Callback.consumeUnwrappedLine(*Line);
196   return StructuralError;
197 }
198 
199 void UnwrappedLineParser::parseFile() {
200   ScopedDeclarationState DeclarationState(
201       *Line, DeclarationScopeStack,
202       /*MustBeDeclaration=*/ !Line->InPPDirective);
203   parseLevel(/*HasOpeningBrace=*/false);
204   // Make sure to format the remaining tokens.
205   flushComments(true);
206   addUnwrappedLine();
207 }
208 
209 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
210   do {
211     switch (FormatTok->Tok.getKind()) {
212     case tok::comment:
213       nextToken();
214       addUnwrappedLine();
215       break;
216     case tok::l_brace:
217       // FIXME: Add parameter whether this can happen - if this happens, we must
218       // be in a non-declaration context.
219       parseBlock(/*MustBeDeclaration=*/false);
220       addUnwrappedLine();
221       break;
222     case tok::r_brace:
223       if (HasOpeningBrace)
224         return;
225       StructuralError = true;
226       nextToken();
227       addUnwrappedLine();
228       break;
229     default:
230       parseStructuralElement();
231       break;
232     }
233   } while (!eof());
234 }
235 
236 void UnwrappedLineParser::calculateBraceTypes() {
237   // We'll parse forward through the tokens until we hit
238   // a closing brace or eof - note that getNextToken() will
239   // parse macros, so this will magically work inside macro
240   // definitions, too.
241   unsigned StoredPosition = Tokens->getPosition();
242   unsigned Position = StoredPosition;
243   FormatToken *Tok = FormatTok;
244   // Keep a stack of positions of lbrace tokens. We will
245   // update information about whether an lbrace starts a
246   // braced init list or a different block during the loop.
247   SmallVector<unsigned, 8> LBraceStack;
248   assert(Tok->Tok.is(tok::l_brace));
249   do {
250     FormatToken *NextTok = Tokens->getNextToken();
251     switch (Tok->Tok.getKind()) {
252     case tok::l_brace:
253       LBraceStack.push_back(Position);
254       break;
255     case tok::r_brace:
256       if (!LBraceStack.empty()) {
257         if (LBraces[LBraceStack.back()] == BS_Unknown) {
258           // If there is a comma, semicolon or right paren after the closing
259           // brace, we assume this is a braced initializer list.
260 
261           // FIXME: Note that this currently works only because we do not
262           // use the brace information while inside a braced init list.
263           // Thus, if the parent is a braced init list, we consider all
264           // brace blocks inside it braced init list. That works good enough
265           // for now, but we will need to fix it to correctly handle lambdas.
266           if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren,
267                                tok::l_brace, tok::colon))
268             LBraces[LBraceStack.back()] = BS_BracedInit;
269           else
270             LBraces[LBraceStack.back()] = BS_Block;
271         }
272         LBraceStack.pop_back();
273       }
274       break;
275     case tok::semi:
276     case tok::kw_if:
277     case tok::kw_while:
278     case tok::kw_for:
279     case tok::kw_switch:
280     case tok::kw_try:
281       if (!LBraceStack.empty())
282         LBraces[LBraceStack.back()] = BS_Block;
283       break;
284     default:
285       break;
286     }
287     Tok = NextTok;
288     ++Position;
289   } while (Tok->Tok.isNot(tok::eof));
290   // Assume other blocks for all unclosed opening braces.
291   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
292     if (LBraces[LBraceStack[i]] == BS_Unknown)
293       LBraces[LBraceStack[i]] = BS_Block;
294   }
295   FormatTok = Tokens->setPosition(StoredPosition);
296 }
297 
298 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration,
299                                      unsigned AddLevels) {
300   assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
301   nextToken();
302 
303   addUnwrappedLine();
304 
305   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
306                                           MustBeDeclaration);
307   Line->Level += AddLevels;
308   parseLevel(/*HasOpeningBrace=*/true);
309 
310   if (!FormatTok->Tok.is(tok::r_brace)) {
311     Line->Level -= AddLevels;
312     StructuralError = true;
313     return;
314   }
315 
316   nextToken(); // Munch the closing brace.
317   Line->Level -= AddLevels;
318 }
319 
320 void UnwrappedLineParser::parsePPDirective() {
321   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
322   ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
323   nextToken();
324 
325   if (FormatTok->Tok.getIdentifierInfo() == NULL) {
326     parsePPUnknown();
327     return;
328   }
329 
330   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
331   case tok::pp_define:
332     parsePPDefine();
333     return;
334   case tok::pp_if:
335     parsePPIf();
336     break;
337   case tok::pp_ifdef:
338   case tok::pp_ifndef:
339     parsePPIfdef();
340     break;
341   case tok::pp_else:
342     parsePPElse();
343     break;
344   case tok::pp_elif:
345     parsePPElIf();
346     break;
347   case tok::pp_endif:
348     parsePPEndIf();
349     break;
350   default:
351     parsePPUnknown();
352     break;
353   }
354 }
355 
356 void UnwrappedLineParser::pushPPConditional() {
357   if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
358     PPStack.push_back(PP_Unreachable);
359   else
360     PPStack.push_back(PP_Conditional);
361 }
362 
363 void UnwrappedLineParser::parsePPIf() {
364   nextToken();
365   if ((FormatTok->Tok.isLiteral() &&
366        StringRef(FormatTok->Tok.getLiteralData(), FormatTok->Tok.getLength()) ==
367            "0") ||
368       FormatTok->Tok.is(tok::kw_false)) {
369     PPStack.push_back(PP_Unreachable);
370   } else {
371     pushPPConditional();
372   }
373   parsePPUnknown();
374 }
375 
376 void UnwrappedLineParser::parsePPIfdef() {
377   pushPPConditional();
378   parsePPUnknown();
379 }
380 
381 void UnwrappedLineParser::parsePPElse() {
382   if (!PPStack.empty())
383     PPStack.pop_back();
384   pushPPConditional();
385   parsePPUnknown();
386 }
387 
388 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
389 
390 void UnwrappedLineParser::parsePPEndIf() {
391   if (!PPStack.empty())
392     PPStack.pop_back();
393   parsePPUnknown();
394 }
395 
396 void UnwrappedLineParser::parsePPDefine() {
397   nextToken();
398 
399   if (FormatTok->Tok.getKind() != tok::identifier) {
400     parsePPUnknown();
401     return;
402   }
403   nextToken();
404   if (FormatTok->Tok.getKind() == tok::l_paren &&
405       FormatTok->WhitespaceRange.getBegin() ==
406           FormatTok->WhitespaceRange.getEnd()) {
407     parseParens();
408   }
409   addUnwrappedLine();
410   Line->Level = 1;
411 
412   // Errors during a preprocessor directive can only affect the layout of the
413   // preprocessor directive, and thus we ignore them. An alternative approach
414   // would be to use the same approach we use on the file level (no
415   // re-indentation if there was a structural error) within the macro
416   // definition.
417   parseFile();
418 }
419 
420 void UnwrappedLineParser::parsePPUnknown() {
421   do {
422     nextToken();
423   } while (!eof());
424   addUnwrappedLine();
425 }
426 
427 // Here we blacklist certain tokens that are not usually the first token in an
428 // unwrapped line. This is used in attempt to distinguish macro calls without
429 // trailing semicolons from other constructs split to several lines.
430 bool tokenCanStartNewLine(clang::Token Tok) {
431   // Semicolon can be a null-statement, l_square can be a start of a macro or
432   // a C++11 attribute, but this doesn't seem to be common.
433   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
434          Tok.isNot(tok::l_square) &&
435          // Tokens that can only be used as binary operators and a part of
436          // overloaded operator names.
437          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
438          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
439          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
440          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
441          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
442          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
443          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
444          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
445          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
446          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
447          Tok.isNot(tok::lesslessequal) &&
448          // Colon is used in labels, base class lists, initializer lists,
449          // range-based for loops, ternary operator, but should never be the
450          // first token in an unwrapped line.
451          Tok.isNot(tok::colon);
452 }
453 
454 void UnwrappedLineParser::parseStructuralElement() {
455   assert(!FormatTok->Tok.is(tok::l_brace));
456   switch (FormatTok->Tok.getKind()) {
457   case tok::at:
458     nextToken();
459     if (FormatTok->Tok.is(tok::l_brace)) {
460       parseBracedList();
461       break;
462     }
463     switch (FormatTok->Tok.getObjCKeywordID()) {
464     case tok::objc_public:
465     case tok::objc_protected:
466     case tok::objc_package:
467     case tok::objc_private:
468       return parseAccessSpecifier();
469     case tok::objc_interface:
470     case tok::objc_implementation:
471       return parseObjCInterfaceOrImplementation();
472     case tok::objc_protocol:
473       return parseObjCProtocol();
474     case tok::objc_end:
475       return; // Handled by the caller.
476     case tok::objc_optional:
477     case tok::objc_required:
478       nextToken();
479       addUnwrappedLine();
480       return;
481     default:
482       break;
483     }
484     break;
485   case tok::kw_namespace:
486     parseNamespace();
487     return;
488   case tok::kw_inline:
489     nextToken();
490     if (FormatTok->Tok.is(tok::kw_namespace)) {
491       parseNamespace();
492       return;
493     }
494     break;
495   case tok::kw_public:
496   case tok::kw_protected:
497   case tok::kw_private:
498     parseAccessSpecifier();
499     return;
500   case tok::kw_if:
501     parseIfThenElse();
502     return;
503   case tok::kw_for:
504   case tok::kw_while:
505     parseForOrWhileLoop();
506     return;
507   case tok::kw_do:
508     parseDoWhile();
509     return;
510   case tok::kw_switch:
511     parseSwitch();
512     return;
513   case tok::kw_default:
514     nextToken();
515     parseLabel();
516     return;
517   case tok::kw_case:
518     parseCaseLabel();
519     return;
520   case tok::kw_return:
521     parseReturn();
522     return;
523   case tok::kw_extern:
524     nextToken();
525     if (FormatTok->Tok.is(tok::string_literal)) {
526       nextToken();
527       if (FormatTok->Tok.is(tok::l_brace)) {
528         parseBlock(/*MustBeDeclaration=*/true, 0);
529         addUnwrappedLine();
530         return;
531       }
532     }
533     // In all other cases, parse the declaration.
534     break;
535   default:
536     break;
537   }
538   do {
539     switch (FormatTok->Tok.getKind()) {
540     case tok::at:
541       nextToken();
542       if (FormatTok->Tok.is(tok::l_brace))
543         parseBracedList();
544       break;
545     case tok::kw_enum:
546       parseEnum();
547       break;
548     case tok::kw_struct:
549     case tok::kw_union:
550     case tok::kw_class:
551       parseRecord();
552       // A record declaration or definition is always the start of a structural
553       // element.
554       break;
555     case tok::semi:
556       nextToken();
557       addUnwrappedLine();
558       return;
559     case tok::r_brace:
560       addUnwrappedLine();
561       return;
562     case tok::l_paren:
563       parseParens();
564       break;
565     case tok::l_brace:
566       if (!tryToParseBracedList()) {
567         // A block outside of parentheses must be the last part of a
568         // structural element.
569         // FIXME: Figure out cases where this is not true, and add projections
570         // for them (the one we know is missing are lambdas).
571         if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
572             Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup)
573           addUnwrappedLine();
574         parseBlock(/*MustBeDeclaration=*/false);
575         addUnwrappedLine();
576         return;
577       }
578       // Otherwise this was a braced init list, and the structural
579       // element continues.
580       break;
581     case tok::identifier: {
582       StringRef Text = FormatTok->TokenText;
583       nextToken();
584       if (Line->Tokens.size() == 1) {
585         if (FormatTok->Tok.is(tok::colon)) {
586           parseLabel();
587           return;
588         }
589         // Recognize function-like macro usages without trailing semicolon.
590         if (FormatTok->Tok.is(tok::l_paren)) {
591           parseParens();
592           if (FormatTok->HasUnescapedNewline &&
593               tokenCanStartNewLine(FormatTok->Tok)) {
594             addUnwrappedLine();
595             return;
596           }
597         } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 &&
598                    Text == Text.upper()) {
599           // Recognize free-standing macros like Q_OBJECT.
600           addUnwrappedLine();
601           return;
602         }
603       }
604       break;
605     }
606     case tok::equal:
607       nextToken();
608       if (FormatTok->Tok.is(tok::l_brace)) {
609         parseBracedList();
610       }
611       break;
612     default:
613       nextToken();
614       break;
615     }
616   } while (!eof());
617 }
618 
619 bool UnwrappedLineParser::tryToParseBracedList() {
620   if (LBraces[Tokens->getPosition()] == BS_Unknown)
621     calculateBraceTypes();
622   assert(LBraces[Tokens->getPosition()] != BS_Unknown);
623   if (LBraces[Tokens->getPosition()] == BS_Block)
624     return false;
625   parseBracedList();
626   return true;
627 }
628 
629 void UnwrappedLineParser::parseBracedList() {
630   nextToken();
631 
632   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
633   // replace this by using parseAssigmentExpression() inside.
634   do {
635     // FIXME: When we start to support lambdas, we'll want to parse them away
636     // here, otherwise our bail-out scenarios below break. The better solution
637     // might be to just implement a more or less complete expression parser.
638     switch (FormatTok->Tok.getKind()) {
639     case tok::l_brace:
640       parseBracedList();
641       break;
642     case tok::r_brace:
643       nextToken();
644       return;
645     case tok::semi:
646       // Probably a missing closing brace. Bail out.
647       return;
648     case tok::comma:
649       nextToken();
650       break;
651     default:
652       nextToken();
653       break;
654     }
655   } while (!eof());
656 }
657 
658 void UnwrappedLineParser::parseReturn() {
659   nextToken();
660 
661   do {
662     switch (FormatTok->Tok.getKind()) {
663     case tok::l_brace:
664       parseBracedList();
665       if (FormatTok->Tok.isNot(tok::semi)) {
666         // Assume missing ';'.
667         addUnwrappedLine();
668         return;
669       }
670       break;
671     case tok::l_paren:
672       parseParens();
673       break;
674     case tok::r_brace:
675       // Assume missing ';'.
676       addUnwrappedLine();
677       return;
678     case tok::semi:
679       nextToken();
680       addUnwrappedLine();
681       return;
682     default:
683       nextToken();
684       break;
685     }
686   } while (!eof());
687 }
688 
689 void UnwrappedLineParser::parseParens() {
690   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
691   nextToken();
692   do {
693     switch (FormatTok->Tok.getKind()) {
694     case tok::l_paren:
695       parseParens();
696       break;
697     case tok::r_paren:
698       nextToken();
699       return;
700     case tok::r_brace:
701       // A "}" inside parenthesis is an error if there wasn't a matching "{".
702       return;
703     case tok::l_brace: {
704       if (!tryToParseBracedList()) {
705         nextToken();
706         {
707           ScopedLineState LineState(*this);
708           ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
709                                                   /*MustBeDeclaration=*/false);
710           Line->Level += 1;
711           parseLevel(/*HasOpeningBrace=*/true);
712           Line->Level -= 1;
713         }
714         nextToken();
715       }
716       break;
717     }
718     case tok::at:
719       nextToken();
720       if (FormatTok->Tok.is(tok::l_brace))
721         parseBracedList();
722       break;
723     default:
724       nextToken();
725       break;
726     }
727   } while (!eof());
728 }
729 
730 void UnwrappedLineParser::parseIfThenElse() {
731   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
732   nextToken();
733   if (FormatTok->Tok.is(tok::l_paren))
734     parseParens();
735   bool NeedsUnwrappedLine = false;
736   if (FormatTok->Tok.is(tok::l_brace)) {
737     parseBlock(/*MustBeDeclaration=*/false);
738     NeedsUnwrappedLine = true;
739   } else {
740     addUnwrappedLine();
741     ++Line->Level;
742     parseStructuralElement();
743     --Line->Level;
744   }
745   if (FormatTok->Tok.is(tok::kw_else)) {
746     nextToken();
747     if (FormatTok->Tok.is(tok::l_brace)) {
748       parseBlock(/*MustBeDeclaration=*/false);
749       addUnwrappedLine();
750     } else if (FormatTok->Tok.is(tok::kw_if)) {
751       parseIfThenElse();
752     } else {
753       addUnwrappedLine();
754       ++Line->Level;
755       parseStructuralElement();
756       --Line->Level;
757     }
758   } else if (NeedsUnwrappedLine) {
759     addUnwrappedLine();
760   }
761 }
762 
763 void UnwrappedLineParser::parseNamespace() {
764   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
765   nextToken();
766   if (FormatTok->Tok.is(tok::identifier))
767     nextToken();
768   if (FormatTok->Tok.is(tok::l_brace)) {
769     if (Style.BreakBeforeBraces == FormatStyle::BS_Linux)
770       addUnwrappedLine();
771 
772     parseBlock(/*MustBeDeclaration=*/true, 0);
773     // Munch the semicolon after a namespace. This is more common than one would
774     // think. Puttin the semicolon into its own line is very ugly.
775     if (FormatTok->Tok.is(tok::semi))
776       nextToken();
777     addUnwrappedLine();
778   }
779   // FIXME: Add error handling.
780 }
781 
782 void UnwrappedLineParser::parseForOrWhileLoop() {
783   assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) &&
784          "'for' or 'while' expected");
785   nextToken();
786   if (FormatTok->Tok.is(tok::l_paren))
787     parseParens();
788   if (FormatTok->Tok.is(tok::l_brace)) {
789     parseBlock(/*MustBeDeclaration=*/false);
790     addUnwrappedLine();
791   } else {
792     addUnwrappedLine();
793     ++Line->Level;
794     parseStructuralElement();
795     --Line->Level;
796   }
797 }
798 
799 void UnwrappedLineParser::parseDoWhile() {
800   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
801   nextToken();
802   if (FormatTok->Tok.is(tok::l_brace)) {
803     parseBlock(/*MustBeDeclaration=*/false);
804   } else {
805     addUnwrappedLine();
806     ++Line->Level;
807     parseStructuralElement();
808     --Line->Level;
809   }
810 
811   // FIXME: Add error handling.
812   if (!FormatTok->Tok.is(tok::kw_while)) {
813     addUnwrappedLine();
814     return;
815   }
816 
817   nextToken();
818   parseStructuralElement();
819 }
820 
821 void UnwrappedLineParser::parseLabel() {
822   if (FormatTok->Tok.isNot(tok::colon))
823     return;
824   nextToken();
825   unsigned OldLineLevel = Line->Level;
826   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
827     --Line->Level;
828   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
829     parseBlock(/*MustBeDeclaration=*/false);
830     if (FormatTok->Tok.is(tok::kw_break))
831       parseStructuralElement(); // "break;" after "}" goes on the same line.
832   }
833   addUnwrappedLine();
834   Line->Level = OldLineLevel;
835 }
836 
837 void UnwrappedLineParser::parseCaseLabel() {
838   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
839   // FIXME: fix handling of complex expressions here.
840   do {
841     nextToken();
842   } while (!eof() && !FormatTok->Tok.is(tok::colon));
843   parseLabel();
844 }
845 
846 void UnwrappedLineParser::parseSwitch() {
847   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
848   nextToken();
849   if (FormatTok->Tok.is(tok::l_paren))
850     parseParens();
851   if (FormatTok->Tok.is(tok::l_brace)) {
852     parseBlock(/*MustBeDeclaration=*/false, Style.IndentCaseLabels ? 2 : 1);
853     addUnwrappedLine();
854   } else {
855     addUnwrappedLine();
856     Line->Level += (Style.IndentCaseLabels ? 2 : 1);
857     parseStructuralElement();
858     Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
859   }
860 }
861 
862 void UnwrappedLineParser::parseAccessSpecifier() {
863   nextToken();
864   // Otherwise, we don't know what it is, and we'd better keep the next token.
865   if (FormatTok->Tok.is(tok::colon))
866     nextToken();
867   addUnwrappedLine();
868 }
869 
870 void UnwrappedLineParser::parseEnum() {
871   nextToken();
872   if (FormatTok->Tok.is(tok::identifier) ||
873       FormatTok->Tok.is(tok::kw___attribute) ||
874       FormatTok->Tok.is(tok::kw___declspec)) {
875     nextToken();
876     // We can have macros or attributes in between 'enum' and the enum name.
877     if (FormatTok->Tok.is(tok::l_paren)) {
878       parseParens();
879     }
880     if (FormatTok->Tok.is(tok::identifier))
881       nextToken();
882   }
883   if (FormatTok->Tok.is(tok::l_brace)) {
884     nextToken();
885     addUnwrappedLine();
886     ++Line->Level;
887     do {
888       switch (FormatTok->Tok.getKind()) {
889       case tok::l_paren:
890         parseParens();
891         break;
892       case tok::r_brace:
893         addUnwrappedLine();
894         nextToken();
895         --Line->Level;
896         return;
897       case tok::comma:
898         nextToken();
899         addUnwrappedLine();
900         break;
901       default:
902         nextToken();
903         break;
904       }
905     } while (!eof());
906   }
907   // We fall through to parsing a structural element afterwards, so that in
908   // enum A {} n, m;
909   // "} n, m;" will end up in one unwrapped line.
910 }
911 
912 void UnwrappedLineParser::parseRecord() {
913   nextToken();
914   if (FormatTok->Tok.is(tok::identifier) ||
915       FormatTok->Tok.is(tok::kw___attribute) ||
916       FormatTok->Tok.is(tok::kw___declspec)) {
917     nextToken();
918     // We can have macros or attributes in between 'class' and the class name.
919     if (FormatTok->Tok.is(tok::l_paren)) {
920       parseParens();
921     }
922     // The actual identifier can be a nested name specifier, and in macros
923     // it is often token-pasted.
924     while (FormatTok->Tok.is(tok::identifier) ||
925            FormatTok->Tok.is(tok::coloncolon) ||
926            FormatTok->Tok.is(tok::hashhash))
927       nextToken();
928 
929     // Note that parsing away template declarations here leads to incorrectly
930     // accepting function declarations as record declarations.
931     // In general, we cannot solve this problem. Consider:
932     // class A<int> B() {}
933     // which can be a function definition or a class definition when B() is a
934     // macro. If we find enough real-world cases where this is a problem, we
935     // can parse for the 'template' keyword in the beginning of the statement,
936     // and thus rule out the record production in case there is no template
937     // (this would still leave us with an ambiguity between template function
938     // and class declarations).
939     if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
940       while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
941         if (FormatTok->Tok.is(tok::semi))
942           return;
943         nextToken();
944       }
945     }
946   }
947   if (FormatTok->Tok.is(tok::l_brace)) {
948     if (Style.BreakBeforeBraces == FormatStyle::BS_Linux)
949       addUnwrappedLine();
950 
951     parseBlock(/*MustBeDeclaration=*/true);
952   }
953   // We fall through to parsing a structural element afterwards, so
954   // class A {} n, m;
955   // will end up in one unwrapped line.
956 }
957 
958 void UnwrappedLineParser::parseObjCProtocolList() {
959   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
960   do
961     nextToken();
962   while (!eof() && FormatTok->Tok.isNot(tok::greater));
963   nextToken(); // Skip '>'.
964 }
965 
966 void UnwrappedLineParser::parseObjCUntilAtEnd() {
967   do {
968     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
969       nextToken();
970       addUnwrappedLine();
971       break;
972     }
973     parseStructuralElement();
974   } while (!eof());
975 }
976 
977 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
978   nextToken();
979   nextToken(); // interface name
980 
981   // @interface can be followed by either a base class, or a category.
982   if (FormatTok->Tok.is(tok::colon)) {
983     nextToken();
984     nextToken(); // base class name
985   } else if (FormatTok->Tok.is(tok::l_paren))
986     // Skip category, if present.
987     parseParens();
988 
989   if (FormatTok->Tok.is(tok::less))
990     parseObjCProtocolList();
991 
992   // If instance variables are present, keep the '{' on the first line too.
993   if (FormatTok->Tok.is(tok::l_brace))
994     parseBlock(/*MustBeDeclaration=*/true);
995 
996   // With instance variables, this puts '}' on its own line.  Without instance
997   // variables, this ends the @interface line.
998   addUnwrappedLine();
999 
1000   parseObjCUntilAtEnd();
1001 }
1002 
1003 void UnwrappedLineParser::parseObjCProtocol() {
1004   nextToken();
1005   nextToken(); // protocol name
1006 
1007   if (FormatTok->Tok.is(tok::less))
1008     parseObjCProtocolList();
1009 
1010   // Check for protocol declaration.
1011   if (FormatTok->Tok.is(tok::semi)) {
1012     nextToken();
1013     return addUnwrappedLine();
1014   }
1015 
1016   addUnwrappedLine();
1017   parseObjCUntilAtEnd();
1018 }
1019 
1020 void UnwrappedLineParser::addUnwrappedLine() {
1021   if (Line->Tokens.empty())
1022     return;
1023   DEBUG({
1024     llvm::dbgs() << "Line(" << Line->Level << ")"
1025                  << (Line->InPPDirective ? " MACRO" : "") << ": ";
1026     for (std::list<FormatToken *>::iterator I = Line->Tokens.begin(),
1027                                             E = Line->Tokens.end();
1028          I != E; ++I) {
1029       llvm::dbgs() << (*I)->Tok.getName() << " ";
1030     }
1031     llvm::dbgs() << "\n";
1032   });
1033   CurrentLines->push_back(*Line);
1034   Line->Tokens.clear();
1035   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1036     for (std::vector<UnwrappedLine>::iterator
1037              I = PreprocessorDirectives.begin(),
1038              E = PreprocessorDirectives.end();
1039          I != E; ++I) {
1040       CurrentLines->push_back(*I);
1041     }
1042     PreprocessorDirectives.clear();
1043   }
1044 }
1045 
1046 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1047 
1048 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1049   bool JustComments = Line->Tokens.empty();
1050   for (SmallVectorImpl<FormatToken *>::const_iterator
1051            I = CommentsBeforeNextToken.begin(),
1052            E = CommentsBeforeNextToken.end();
1053        I != E; ++I) {
1054     if ((*I)->NewlinesBefore && JustComments) {
1055       addUnwrappedLine();
1056     }
1057     pushToken(*I);
1058   }
1059   if (NewlineBeforeNext && JustComments) {
1060     addUnwrappedLine();
1061   }
1062   CommentsBeforeNextToken.clear();
1063 }
1064 
1065 void UnwrappedLineParser::nextToken() {
1066   if (eof())
1067     return;
1068   flushComments(FormatTok->NewlinesBefore > 0);
1069   pushToken(FormatTok);
1070   readToken();
1071 }
1072 
1073 void UnwrappedLineParser::readToken() {
1074   bool CommentsInCurrentLine = true;
1075   do {
1076     FormatTok = Tokens->getNextToken();
1077     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1078            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1079       // If there is an unfinished unwrapped line, we flush the preprocessor
1080       // directives only after that unwrapped line was finished later.
1081       bool SwitchToPreprocessorLines =
1082           !Line->Tokens.empty() && CurrentLines == &Lines;
1083       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1084       // Comments stored before the preprocessor directive need to be output
1085       // before the preprocessor directive, at the same level as the
1086       // preprocessor directive, as we consider them to apply to the directive.
1087       flushComments(FormatTok->NewlinesBefore > 0);
1088       parsePPDirective();
1089     }
1090 
1091     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1092         !Line->InPPDirective) {
1093       continue;
1094     }
1095 
1096     if (!FormatTok->Tok.is(tok::comment))
1097       return;
1098     if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) {
1099       CommentsInCurrentLine = false;
1100     }
1101     if (CommentsInCurrentLine) {
1102       pushToken(FormatTok);
1103     } else {
1104       CommentsBeforeNextToken.push_back(FormatTok);
1105     }
1106   } while (!eof());
1107 }
1108 
1109 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1110   Line->Tokens.push_back(Tok);
1111   if (MustBreakBeforeNextToken) {
1112     Line->Tokens.back()->MustBreakBefore = true;
1113     MustBreakBeforeNextToken = false;
1114   }
1115 }
1116 
1117 } // end namespace format
1118 } // end namespace clang
1119