1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #define DEBUG_TYPE "format-parser"
17 
18 #include "UnwrappedLineParser.h"
19 #include "llvm/Support/Debug.h"
20 
21 namespace clang {
22 namespace format {
23 
24 class FormatTokenSource {
25 public:
26   virtual ~FormatTokenSource() {}
27   virtual FormatToken *getNextToken() = 0;
28 
29   virtual unsigned getPosition() = 0;
30   virtual FormatToken *setPosition(unsigned Position) = 0;
31 };
32 
33 namespace {
34 
35 class ScopedDeclarationState {
36 public:
37   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
38                          bool MustBeDeclaration)
39       : Line(Line), Stack(Stack) {
40     Line.MustBeDeclaration = MustBeDeclaration;
41     Stack.push_back(MustBeDeclaration);
42   }
43   ~ScopedDeclarationState() {
44     Stack.pop_back();
45     if (!Stack.empty())
46       Line.MustBeDeclaration = Stack.back();
47     else
48       Line.MustBeDeclaration = true;
49   }
50 
51 private:
52   UnwrappedLine &Line;
53   std::vector<bool> &Stack;
54 };
55 
56 class ScopedMacroState : public FormatTokenSource {
57 public:
58   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
59                    FormatToken *&ResetToken, bool &StructuralError)
60       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
61         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
62         StructuralError(StructuralError),
63         PreviousStructuralError(StructuralError), Token(NULL) {
64     TokenSource = this;
65     Line.Level = 0;
66     Line.InPPDirective = true;
67   }
68 
69   ~ScopedMacroState() {
70     TokenSource = PreviousTokenSource;
71     ResetToken = Token;
72     Line.InPPDirective = false;
73     Line.Level = PreviousLineLevel;
74     StructuralError = PreviousStructuralError;
75   }
76 
77   virtual FormatToken *getNextToken() {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   virtual unsigned getPosition() { return PreviousTokenSource->getPosition(); }
88 
89   virtual FormatToken *setPosition(unsigned Position) {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113   bool &StructuralError;
114   bool PreviousStructuralError;
115 
116   FormatToken *Token;
117 };
118 
119 } // end anonymous namespace
120 
121 class ScopedLineState {
122 public:
123   ScopedLineState(UnwrappedLineParser &Parser,
124                   bool SwitchToPreprocessorLines = false)
125       : Parser(Parser) {
126     OriginalLines = Parser.CurrentLines;
127     if (SwitchToPreprocessorLines)
128       Parser.CurrentLines = &Parser.PreprocessorDirectives;
129     else if (!Parser.Line->Tokens.empty())
130       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
131     PreBlockLine = Parser.Line.take();
132     Parser.Line.reset(new UnwrappedLine());
133     Parser.Line->Level = PreBlockLine->Level;
134     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
135   }
136 
137   ~ScopedLineState() {
138     if (!Parser.Line->Tokens.empty()) {
139       Parser.addUnwrappedLine();
140     }
141     assert(Parser.Line->Tokens.empty());
142     Parser.Line.reset(PreBlockLine);
143     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
144       Parser.MustBreakBeforeNextToken = true;
145     Parser.CurrentLines = OriginalLines;
146   }
147 
148 private:
149   UnwrappedLineParser &Parser;
150 
151   UnwrappedLine *PreBlockLine;
152   SmallVectorImpl<UnwrappedLine> *OriginalLines;
153 };
154 
155 namespace {
156 
157 class IndexedTokenSource : public FormatTokenSource {
158 public:
159   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
160       : Tokens(Tokens), Position(-1) {}
161 
162   virtual FormatToken *getNextToken() {
163     ++Position;
164     return Tokens[Position];
165   }
166 
167   virtual unsigned getPosition() {
168     assert(Position >= 0);
169     return Position;
170   }
171 
172   virtual FormatToken *setPosition(unsigned P) {
173     Position = P;
174     return Tokens[Position];
175   }
176 
177   void reset() { Position = -1; }
178 
179 private:
180   ArrayRef<FormatToken *> Tokens;
181   int Position;
182 };
183 
184 } // end anonymous namespace
185 
186 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
187                                          ArrayRef<FormatToken *> Tokens,
188                                          UnwrappedLineConsumer &Callback)
189     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
190       CurrentLines(&Lines), StructuralError(false), Style(Style), Tokens(NULL),
191       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
192 
193 void UnwrappedLineParser::reset() {
194   PPBranchLevel = -1;
195   Line.reset(new UnwrappedLine);
196   CommentsBeforeNextToken.clear();
197   FormatTok = NULL;
198   MustBreakBeforeNextToken = false;
199   PreprocessorDirectives.clear();
200   CurrentLines = &Lines;
201   DeclarationScopeStack.clear();
202   StructuralError = false;
203   PPStack.clear();
204 }
205 
206 bool UnwrappedLineParser::parse() {
207   IndexedTokenSource TokenSource(AllTokens);
208   do {
209     DEBUG(llvm::dbgs() << "----\n");
210     reset();
211     Tokens = &TokenSource;
212     TokenSource.reset();
213 
214     readToken();
215     parseFile();
216     // Create line with eof token.
217     pushToken(FormatTok);
218     addUnwrappedLine();
219 
220     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
221                                                   E = Lines.end();
222          I != E; ++I) {
223       Callback.consumeUnwrappedLine(*I);
224     }
225     Callback.finishRun();
226     Lines.clear();
227     while (!PPLevelBranchIndex.empty() &&
228            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
229       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
230       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
231     }
232     if (!PPLevelBranchIndex.empty()) {
233       ++PPLevelBranchIndex.back();
234       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
235       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
236     }
237   } while (!PPLevelBranchIndex.empty());
238 
239   return StructuralError;
240 }
241 
242 void UnwrappedLineParser::parseFile() {
243   ScopedDeclarationState DeclarationState(
244       *Line, DeclarationScopeStack,
245       /*MustBeDeclaration=*/ !Line->InPPDirective);
246   parseLevel(/*HasOpeningBrace=*/false);
247   // Make sure to format the remaining tokens.
248   flushComments(true);
249   addUnwrappedLine();
250 }
251 
252 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
253   bool SwitchLabelEncountered = false;
254   do {
255     switch (FormatTok->Tok.getKind()) {
256     case tok::comment:
257       nextToken();
258       addUnwrappedLine();
259       break;
260     case tok::l_brace:
261       // FIXME: Add parameter whether this can happen - if this happens, we must
262       // be in a non-declaration context.
263       parseBlock(/*MustBeDeclaration=*/false);
264       addUnwrappedLine();
265       break;
266     case tok::r_brace:
267       if (HasOpeningBrace)
268         return;
269       StructuralError = true;
270       nextToken();
271       addUnwrappedLine();
272       break;
273     case tok::kw_default:
274     case tok::kw_case:
275       if (!SwitchLabelEncountered &&
276           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
277         ++Line->Level;
278       SwitchLabelEncountered = true;
279       parseStructuralElement();
280       break;
281     default:
282       parseStructuralElement();
283       break;
284     }
285   } while (!eof());
286 }
287 
288 void UnwrappedLineParser::calculateBraceTypes() {
289   // We'll parse forward through the tokens until we hit
290   // a closing brace or eof - note that getNextToken() will
291   // parse macros, so this will magically work inside macro
292   // definitions, too.
293   unsigned StoredPosition = Tokens->getPosition();
294   unsigned Position = StoredPosition;
295   FormatToken *Tok = FormatTok;
296   // Keep a stack of positions of lbrace tokens. We will
297   // update information about whether an lbrace starts a
298   // braced init list or a different block during the loop.
299   SmallVector<FormatToken *, 8> LBraceStack;
300   assert(Tok->Tok.is(tok::l_brace));
301   do {
302     // Get next none-comment token.
303     FormatToken *NextTok;
304     unsigned ReadTokens = 0;
305     do {
306       NextTok = Tokens->getNextToken();
307       ++ReadTokens;
308     } while (NextTok->is(tok::comment));
309 
310     switch (Tok->Tok.getKind()) {
311     case tok::l_brace:
312       LBraceStack.push_back(Tok);
313       break;
314     case tok::r_brace:
315       if (!LBraceStack.empty()) {
316         if (LBraceStack.back()->BlockKind == BK_Unknown) {
317           // If there is a comma, semicolon or right paren after the closing
318           // brace, we assume this is a braced initializer list.  Note that
319           // regardless how we mark inner braces here, we will overwrite the
320           // BlockKind later if we parse a braced list (where all blocks inside
321           // are by default braced lists), or when we explicitly detect blocks
322           // (for example while parsing lambdas).
323           //
324           // We exclude + and - as they can be ObjC visibility modifiers.
325           if (NextTok->isOneOf(tok::comma, tok::semi, tok::r_paren,
326                                tok::r_square, tok::l_brace, tok::colon) ||
327               (NextTok->isBinaryOperator() &&
328                !NextTok->isOneOf(tok::plus, tok::minus))) {
329             Tok->BlockKind = BK_BracedInit;
330             LBraceStack.back()->BlockKind = BK_BracedInit;
331           } else {
332             Tok->BlockKind = BK_Block;
333             LBraceStack.back()->BlockKind = BK_Block;
334           }
335         }
336         LBraceStack.pop_back();
337       }
338       break;
339     case tok::semi:
340     case tok::kw_if:
341     case tok::kw_while:
342     case tok::kw_for:
343     case tok::kw_switch:
344     case tok::kw_try:
345       if (!LBraceStack.empty())
346         LBraceStack.back()->BlockKind = BK_Block;
347       break;
348     default:
349       break;
350     }
351     Tok = NextTok;
352     Position += ReadTokens;
353   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
354   // Assume other blocks for all unclosed opening braces.
355   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
356     if (LBraceStack[i]->BlockKind == BK_Unknown)
357       LBraceStack[i]->BlockKind = BK_Block;
358   }
359 
360   FormatTok = Tokens->setPosition(StoredPosition);
361 }
362 
363 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
364                                      bool MunchSemi) {
365   assert(FormatTok->Tok.is(tok::l_brace) && "'{' expected");
366   unsigned InitialLevel = Line->Level;
367   nextToken();
368 
369   addUnwrappedLine();
370 
371   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
372                                           MustBeDeclaration);
373   if (AddLevel)
374     ++Line->Level;
375   parseLevel(/*HasOpeningBrace=*/true);
376 
377   if (!FormatTok->Tok.is(tok::r_brace)) {
378     Line->Level = InitialLevel;
379     StructuralError = true;
380     return;
381   }
382 
383   nextToken(); // Munch the closing brace.
384   if (MunchSemi && FormatTok->Tok.is(tok::semi))
385     nextToken();
386   Line->Level = InitialLevel;
387 }
388 
389 void UnwrappedLineParser::parseChildBlock() {
390   FormatTok->BlockKind = BK_Block;
391   nextToken();
392   {
393     ScopedLineState LineState(*this);
394     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
395                                             /*MustBeDeclaration=*/false);
396     Line->Level += 1;
397     parseLevel(/*HasOpeningBrace=*/true);
398     Line->Level -= 1;
399   }
400   nextToken();
401 }
402 
403 void UnwrappedLineParser::parsePPDirective() {
404   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
405   ScopedMacroState MacroState(*Line, Tokens, FormatTok, StructuralError);
406   nextToken();
407 
408   if (FormatTok->Tok.getIdentifierInfo() == NULL) {
409     parsePPUnknown();
410     return;
411   }
412 
413   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
414   case tok::pp_define:
415     parsePPDefine();
416     return;
417   case tok::pp_if:
418     parsePPIf(/*IfDef=*/false);
419     break;
420   case tok::pp_ifdef:
421   case tok::pp_ifndef:
422     parsePPIf(/*IfDef=*/true);
423     break;
424   case tok::pp_else:
425     parsePPElse();
426     break;
427   case tok::pp_elif:
428     parsePPElIf();
429     break;
430   case tok::pp_endif:
431     parsePPEndIf();
432     break;
433   default:
434     parsePPUnknown();
435     break;
436   }
437 }
438 
439 void UnwrappedLineParser::pushPPConditional() {
440   if (!PPStack.empty() && PPStack.back() == PP_Unreachable)
441     PPStack.push_back(PP_Unreachable);
442   else
443     PPStack.push_back(PP_Conditional);
444 }
445 
446 void UnwrappedLineParser::parsePPIf(bool IfDef) {
447   ++PPBranchLevel;
448   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
449   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
450     PPLevelBranchIndex.push_back(0);
451     PPLevelBranchCount.push_back(0);
452   }
453   PPChainBranchIndex.push(0);
454   nextToken();
455   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
456                          StringRef(FormatTok->Tok.getLiteralData(),
457                                    FormatTok->Tok.getLength()) == "0") ||
458                         FormatTok->Tok.is(tok::kw_false);
459   if ((!IfDef && IsLiteralFalse) || PPLevelBranchIndex[PPBranchLevel] > 0) {
460     PPStack.push_back(PP_Unreachable);
461   } else {
462     pushPPConditional();
463   }
464   parsePPUnknown();
465 }
466 
467 void UnwrappedLineParser::parsePPElse() {
468   if (!PPStack.empty())
469     PPStack.pop_back();
470   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
471   if (!PPChainBranchIndex.empty())
472     ++PPChainBranchIndex.top();
473   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
474       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top()) {
475     PPStack.push_back(PP_Unreachable);
476   } else {
477     pushPPConditional();
478   }
479   parsePPUnknown();
480 }
481 
482 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
483 
484 void UnwrappedLineParser::parsePPEndIf() {
485   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
486   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
487     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
488       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
489     }
490   }
491   --PPBranchLevel;
492   if (!PPChainBranchIndex.empty())
493     PPChainBranchIndex.pop();
494   if (!PPStack.empty())
495     PPStack.pop_back();
496   parsePPUnknown();
497 }
498 
499 void UnwrappedLineParser::parsePPDefine() {
500   nextToken();
501 
502   if (FormatTok->Tok.getKind() != tok::identifier) {
503     parsePPUnknown();
504     return;
505   }
506   nextToken();
507   if (FormatTok->Tok.getKind() == tok::l_paren &&
508       FormatTok->WhitespaceRange.getBegin() ==
509           FormatTok->WhitespaceRange.getEnd()) {
510     parseParens();
511   }
512   addUnwrappedLine();
513   Line->Level = 1;
514 
515   // Errors during a preprocessor directive can only affect the layout of the
516   // preprocessor directive, and thus we ignore them. An alternative approach
517   // would be to use the same approach we use on the file level (no
518   // re-indentation if there was a structural error) within the macro
519   // definition.
520   parseFile();
521 }
522 
523 void UnwrappedLineParser::parsePPUnknown() {
524   do {
525     nextToken();
526   } while (!eof());
527   addUnwrappedLine();
528 }
529 
530 // Here we blacklist certain tokens that are not usually the first token in an
531 // unwrapped line. This is used in attempt to distinguish macro calls without
532 // trailing semicolons from other constructs split to several lines.
533 bool tokenCanStartNewLine(clang::Token Tok) {
534   // Semicolon can be a null-statement, l_square can be a start of a macro or
535   // a C++11 attribute, but this doesn't seem to be common.
536   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
537          Tok.isNot(tok::l_square) &&
538          // Tokens that can only be used as binary operators and a part of
539          // overloaded operator names.
540          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
541          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
542          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
543          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
544          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
545          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
546          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
547          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
548          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
549          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
550          Tok.isNot(tok::lesslessequal) &&
551          // Colon is used in labels, base class lists, initializer lists,
552          // range-based for loops, ternary operator, but should never be the
553          // first token in an unwrapped line.
554          Tok.isNot(tok::colon);
555 }
556 
557 void UnwrappedLineParser::parseStructuralElement() {
558   assert(!FormatTok->Tok.is(tok::l_brace));
559   switch (FormatTok->Tok.getKind()) {
560   case tok::at:
561     nextToken();
562     if (FormatTok->Tok.is(tok::l_brace)) {
563       parseBracedList();
564       break;
565     }
566     switch (FormatTok->Tok.getObjCKeywordID()) {
567     case tok::objc_public:
568     case tok::objc_protected:
569     case tok::objc_package:
570     case tok::objc_private:
571       return parseAccessSpecifier();
572     case tok::objc_interface:
573     case tok::objc_implementation:
574       return parseObjCInterfaceOrImplementation();
575     case tok::objc_protocol:
576       return parseObjCProtocol();
577     case tok::objc_end:
578       return; // Handled by the caller.
579     case tok::objc_optional:
580     case tok::objc_required:
581       nextToken();
582       addUnwrappedLine();
583       return;
584     default:
585       break;
586     }
587     break;
588   case tok::kw_namespace:
589     parseNamespace();
590     return;
591   case tok::kw_inline:
592     nextToken();
593     if (FormatTok->Tok.is(tok::kw_namespace)) {
594       parseNamespace();
595       return;
596     }
597     break;
598   case tok::kw_public:
599   case tok::kw_protected:
600   case tok::kw_private:
601     parseAccessSpecifier();
602     return;
603   case tok::kw_if:
604     parseIfThenElse();
605     return;
606   case tok::kw_for:
607   case tok::kw_while:
608     parseForOrWhileLoop();
609     return;
610   case tok::kw_do:
611     parseDoWhile();
612     return;
613   case tok::kw_switch:
614     parseSwitch();
615     return;
616   case tok::kw_default:
617     nextToken();
618     parseLabel();
619     return;
620   case tok::kw_case:
621     parseCaseLabel();
622     return;
623   case tok::kw_return:
624     parseReturn();
625     return;
626   case tok::kw_extern:
627     nextToken();
628     if (FormatTok->Tok.is(tok::string_literal)) {
629       nextToken();
630       if (FormatTok->Tok.is(tok::l_brace)) {
631         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
632         addUnwrappedLine();
633         return;
634       }
635     }
636     // In all other cases, parse the declaration.
637     break;
638   default:
639     break;
640   }
641   do {
642     switch (FormatTok->Tok.getKind()) {
643     case tok::at:
644       nextToken();
645       if (FormatTok->Tok.is(tok::l_brace))
646         parseBracedList();
647       break;
648     case tok::kw_enum:
649       parseEnum();
650       break;
651     case tok::kw_struct:
652     case tok::kw_union:
653     case tok::kw_class:
654       parseRecord();
655       // A record declaration or definition is always the start of a structural
656       // element.
657       break;
658     case tok::semi:
659       nextToken();
660       addUnwrappedLine();
661       return;
662     case tok::r_brace:
663       addUnwrappedLine();
664       return;
665     case tok::l_paren:
666       parseParens();
667       break;
668     case tok::caret:
669       nextToken();
670       if (FormatTok->is(tok::l_brace)) {
671         parseChildBlock();
672       }
673       break;
674     case tok::l_brace:
675       if (!tryToParseBracedList()) {
676         // A block outside of parentheses must be the last part of a
677         // structural element.
678         // FIXME: Figure out cases where this is not true, and add projections
679         // for them (the one we know is missing are lambdas).
680         if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
681             Style.BreakBeforeBraces == FormatStyle::BS_Stroustrup ||
682             Style.BreakBeforeBraces == FormatStyle::BS_Allman)
683           addUnwrappedLine();
684         FormatTok->Type = TT_FunctionLBrace;
685         parseBlock(/*MustBeDeclaration=*/false);
686         addUnwrappedLine();
687         return;
688       }
689       // Otherwise this was a braced init list, and the structural
690       // element continues.
691       break;
692     case tok::identifier: {
693       StringRef Text = FormatTok->TokenText;
694       nextToken();
695       if (Line->Tokens.size() == 1) {
696         if (FormatTok->Tok.is(tok::colon)) {
697           parseLabel();
698           return;
699         }
700         // Recognize function-like macro usages without trailing semicolon.
701         if (FormatTok->Tok.is(tok::l_paren)) {
702           parseParens();
703           if (FormatTok->HasUnescapedNewline &&
704               tokenCanStartNewLine(FormatTok->Tok)) {
705             addUnwrappedLine();
706             return;
707           }
708         } else if (FormatTok->HasUnescapedNewline && Text.size() >= 5 &&
709                    Text == Text.upper()) {
710           // Recognize free-standing macros like Q_OBJECT.
711           addUnwrappedLine();
712           return;
713         }
714       }
715       break;
716     }
717     case tok::equal:
718       nextToken();
719       if (FormatTok->Tok.is(tok::l_brace)) {
720         parseBracedList();
721       }
722       break;
723     case tok::l_square:
724       tryToParseLambda();
725       break;
726     default:
727       nextToken();
728       break;
729     }
730   } while (!eof());
731 }
732 
733 void UnwrappedLineParser::tryToParseLambda() {
734   // FIXME: This is a dirty way to access the previous token. Find a better
735   // solution.
736   if (!Line->Tokens.empty() &&
737       Line->Tokens.back().Tok->isOneOf(tok::identifier, tok::kw_operator)) {
738     nextToken();
739     return;
740   }
741   assert(FormatTok->is(tok::l_square));
742   FormatToken &LSquare = *FormatTok;
743   if (!tryToParseLambdaIntroducer())
744     return;
745 
746   while (FormatTok->isNot(tok::l_brace)) {
747     switch (FormatTok->Tok.getKind()) {
748     case tok::l_brace:
749       break;
750     case tok::l_paren:
751       parseParens();
752       break;
753     case tok::identifier:
754     case tok::kw_mutable:
755       nextToken();
756       break;
757     default:
758       return;
759     }
760   }
761   LSquare.Type = TT_LambdaLSquare;
762   parseChildBlock();
763 }
764 
765 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
766   nextToken();
767   if (FormatTok->is(tok::equal)) {
768     nextToken();
769     if (FormatTok->is(tok::r_square)) {
770       nextToken();
771       return true;
772     }
773     if (FormatTok->isNot(tok::comma))
774       return false;
775     nextToken();
776   } else if (FormatTok->is(tok::amp)) {
777     nextToken();
778     if (FormatTok->is(tok::r_square)) {
779       nextToken();
780       return true;
781     }
782     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
783       return false;
784     }
785     if (FormatTok->is(tok::comma))
786       nextToken();
787   } else if (FormatTok->is(tok::r_square)) {
788     nextToken();
789     return true;
790   }
791   do {
792     if (FormatTok->is(tok::amp))
793       nextToken();
794     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
795       return false;
796     nextToken();
797     if (FormatTok->is(tok::comma)) {
798       nextToken();
799     } else if (FormatTok->is(tok::r_square)) {
800       nextToken();
801       return true;
802     } else {
803       return false;
804     }
805   } while (!eof());
806   return false;
807 }
808 
809 bool UnwrappedLineParser::tryToParseBracedList() {
810   if (FormatTok->BlockKind == BK_Unknown)
811     calculateBraceTypes();
812   assert(FormatTok->BlockKind != BK_Unknown);
813   if (FormatTok->BlockKind == BK_Block)
814     return false;
815   parseBracedList();
816   return true;
817 }
818 
819 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
820   bool HasError = false;
821   nextToken();
822 
823   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
824   // replace this by using parseAssigmentExpression() inside.
825   do {
826     // FIXME: When we start to support lambdas, we'll want to parse them away
827     // here, otherwise our bail-out scenarios below break. The better solution
828     // might be to just implement a more or less complete expression parser.
829     switch (FormatTok->Tok.getKind()) {
830     case tok::caret:
831       nextToken();
832       if (FormatTok->is(tok::l_brace)) {
833         parseChildBlock();
834       }
835       break;
836     case tok::l_square:
837       tryToParseLambda();
838       break;
839     case tok::l_brace:
840       // Assume there are no blocks inside a braced init list apart
841       // from the ones we explicitly parse out (like lambdas).
842       FormatTok->BlockKind = BK_BracedInit;
843       parseBracedList();
844       break;
845     case tok::r_brace:
846       nextToken();
847       return !HasError;
848     case tok::semi:
849       HasError = true;
850       if (!ContinueOnSemicolons)
851         return !HasError;
852       nextToken();
853       break;
854     case tok::comma:
855       nextToken();
856       break;
857     default:
858       nextToken();
859       break;
860     }
861   } while (!eof());
862   return false;
863 }
864 
865 void UnwrappedLineParser::parseReturn() {
866   nextToken();
867 
868   do {
869     switch (FormatTok->Tok.getKind()) {
870     case tok::l_brace:
871       parseBracedList();
872       if (FormatTok->Tok.isNot(tok::semi)) {
873         // Assume missing ';'.
874         addUnwrappedLine();
875         return;
876       }
877       break;
878     case tok::l_paren:
879       parseParens();
880       break;
881     case tok::r_brace:
882       // Assume missing ';'.
883       addUnwrappedLine();
884       return;
885     case tok::semi:
886       nextToken();
887       addUnwrappedLine();
888       return;
889     case tok::l_square:
890       tryToParseLambda();
891       break;
892     default:
893       nextToken();
894       break;
895     }
896   } while (!eof());
897 }
898 
899 void UnwrappedLineParser::parseParens() {
900   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
901   nextToken();
902   do {
903     switch (FormatTok->Tok.getKind()) {
904     case tok::l_paren:
905       parseParens();
906       break;
907     case tok::r_paren:
908       nextToken();
909       return;
910     case tok::r_brace:
911       // A "}" inside parenthesis is an error if there wasn't a matching "{".
912       return;
913     case tok::l_square:
914       tryToParseLambda();
915       break;
916     case tok::l_brace: {
917       if (!tryToParseBracedList()) {
918         parseChildBlock();
919       }
920       break;
921     }
922     case tok::at:
923       nextToken();
924       if (FormatTok->Tok.is(tok::l_brace))
925         parseBracedList();
926       break;
927     default:
928       nextToken();
929       break;
930     }
931   } while (!eof());
932 }
933 
934 void UnwrappedLineParser::parseIfThenElse() {
935   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
936   nextToken();
937   if (FormatTok->Tok.is(tok::l_paren))
938     parseParens();
939   bool NeedsUnwrappedLine = false;
940   if (FormatTok->Tok.is(tok::l_brace)) {
941     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
942       addUnwrappedLine();
943     parseBlock(/*MustBeDeclaration=*/false);
944     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
945       addUnwrappedLine();
946     else
947       NeedsUnwrappedLine = true;
948   } else {
949     addUnwrappedLine();
950     ++Line->Level;
951     parseStructuralElement();
952     --Line->Level;
953   }
954   if (FormatTok->Tok.is(tok::kw_else)) {
955     nextToken();
956     if (FormatTok->Tok.is(tok::l_brace)) {
957       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
958         addUnwrappedLine();
959       parseBlock(/*MustBeDeclaration=*/false);
960       addUnwrappedLine();
961     } else if (FormatTok->Tok.is(tok::kw_if)) {
962       parseIfThenElse();
963     } else {
964       addUnwrappedLine();
965       ++Line->Level;
966       parseStructuralElement();
967       --Line->Level;
968     }
969   } else if (NeedsUnwrappedLine) {
970     addUnwrappedLine();
971   }
972 }
973 
974 void UnwrappedLineParser::parseNamespace() {
975   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
976   nextToken();
977   if (FormatTok->Tok.is(tok::identifier))
978     nextToken();
979   if (FormatTok->Tok.is(tok::l_brace)) {
980     if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
981         Style.BreakBeforeBraces == FormatStyle::BS_Allman)
982       addUnwrappedLine();
983 
984     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
985                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
986                      DeclarationScopeStack.size() > 1);
987     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
988     // Munch the semicolon after a namespace. This is more common than one would
989     // think. Puttin the semicolon into its own line is very ugly.
990     if (FormatTok->Tok.is(tok::semi))
991       nextToken();
992     addUnwrappedLine();
993   }
994   // FIXME: Add error handling.
995 }
996 
997 void UnwrappedLineParser::parseForOrWhileLoop() {
998   assert((FormatTok->Tok.is(tok::kw_for) || FormatTok->Tok.is(tok::kw_while)) &&
999          "'for' or 'while' expected");
1000   nextToken();
1001   if (FormatTok->Tok.is(tok::l_paren))
1002     parseParens();
1003   if (FormatTok->Tok.is(tok::l_brace)) {
1004     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1005       addUnwrappedLine();
1006     parseBlock(/*MustBeDeclaration=*/false);
1007     addUnwrappedLine();
1008   } else {
1009     addUnwrappedLine();
1010     ++Line->Level;
1011     parseStructuralElement();
1012     --Line->Level;
1013   }
1014 }
1015 
1016 void UnwrappedLineParser::parseDoWhile() {
1017   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1018   nextToken();
1019   if (FormatTok->Tok.is(tok::l_brace)) {
1020     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1021       addUnwrappedLine();
1022     parseBlock(/*MustBeDeclaration=*/false);
1023   } else {
1024     addUnwrappedLine();
1025     ++Line->Level;
1026     parseStructuralElement();
1027     --Line->Level;
1028   }
1029 
1030   // FIXME: Add error handling.
1031   if (!FormatTok->Tok.is(tok::kw_while)) {
1032     addUnwrappedLine();
1033     return;
1034   }
1035 
1036   nextToken();
1037   parseStructuralElement();
1038 }
1039 
1040 void UnwrappedLineParser::parseLabel() {
1041   nextToken();
1042   unsigned OldLineLevel = Line->Level;
1043   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1044     --Line->Level;
1045   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1046     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1047       addUnwrappedLine();
1048     parseBlock(/*MustBeDeclaration=*/false);
1049     if (FormatTok->Tok.is(tok::kw_break)) {
1050       // "break;" after "}" on its own line only for BS_Allman
1051       if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1052         addUnwrappedLine();
1053       parseStructuralElement();
1054     }
1055   }
1056   addUnwrappedLine();
1057   Line->Level = OldLineLevel;
1058 }
1059 
1060 void UnwrappedLineParser::parseCaseLabel() {
1061   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1062   // FIXME: fix handling of complex expressions here.
1063   do {
1064     nextToken();
1065   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1066   parseLabel();
1067 }
1068 
1069 void UnwrappedLineParser::parseSwitch() {
1070   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1071   nextToken();
1072   if (FormatTok->Tok.is(tok::l_paren))
1073     parseParens();
1074   if (FormatTok->Tok.is(tok::l_brace)) {
1075     if (Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1076       addUnwrappedLine();
1077     parseBlock(/*MustBeDeclaration=*/false);
1078     addUnwrappedLine();
1079   } else {
1080     addUnwrappedLine();
1081     ++Line->Level;
1082     parseStructuralElement();
1083     --Line->Level;
1084   }
1085 }
1086 
1087 void UnwrappedLineParser::parseAccessSpecifier() {
1088   nextToken();
1089   // Otherwise, we don't know what it is, and we'd better keep the next token.
1090   if (FormatTok->Tok.is(tok::colon))
1091     nextToken();
1092   addUnwrappedLine();
1093 }
1094 
1095 void UnwrappedLineParser::parseEnum() {
1096   nextToken();
1097   // Eat up enum class ...
1098   if (FormatTok->Tok.is(tok::kw_class) ||
1099       FormatTok->Tok.is(tok::kw_struct))
1100       nextToken();
1101   while (FormatTok->Tok.getIdentifierInfo() ||
1102          FormatTok->isOneOf(tok::colon, tok::coloncolon)) {
1103     nextToken();
1104     // We can have macros or attributes in between 'enum' and the enum name.
1105     if (FormatTok->Tok.is(tok::l_paren)) {
1106       parseParens();
1107     }
1108     if (FormatTok->Tok.is(tok::identifier))
1109       nextToken();
1110   }
1111   if (FormatTok->Tok.is(tok::l_brace)) {
1112     FormatTok->BlockKind = BK_Block;
1113     bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1114     if (HasError) {
1115       if (FormatTok->is(tok::semi))
1116         nextToken();
1117       addUnwrappedLine();
1118     }
1119   }
1120   // We fall through to parsing a structural element afterwards, so that in
1121   // enum A {} n, m;
1122   // "} n, m;" will end up in one unwrapped line.
1123 }
1124 
1125 void UnwrappedLineParser::parseRecord() {
1126   nextToken();
1127   if (FormatTok->Tok.is(tok::identifier) ||
1128       FormatTok->Tok.is(tok::kw___attribute) ||
1129       FormatTok->Tok.is(tok::kw___declspec) ||
1130       FormatTok->Tok.is(tok::kw_alignas)) {
1131     nextToken();
1132     // We can have macros or attributes in between 'class' and the class name.
1133     if (FormatTok->Tok.is(tok::l_paren)) {
1134       parseParens();
1135     }
1136     // The actual identifier can be a nested name specifier, and in macros
1137     // it is often token-pasted.
1138     while (FormatTok->Tok.is(tok::identifier) ||
1139            FormatTok->Tok.is(tok::coloncolon) ||
1140            FormatTok->Tok.is(tok::hashhash))
1141       nextToken();
1142 
1143     // Note that parsing away template declarations here leads to incorrectly
1144     // accepting function declarations as record declarations.
1145     // In general, we cannot solve this problem. Consider:
1146     // class A<int> B() {}
1147     // which can be a function definition or a class definition when B() is a
1148     // macro. If we find enough real-world cases where this is a problem, we
1149     // can parse for the 'template' keyword in the beginning of the statement,
1150     // and thus rule out the record production in case there is no template
1151     // (this would still leave us with an ambiguity between template function
1152     // and class declarations).
1153     if (FormatTok->Tok.is(tok::colon) || FormatTok->Tok.is(tok::less)) {
1154       while (!eof() && FormatTok->Tok.isNot(tok::l_brace)) {
1155         if (FormatTok->Tok.is(tok::semi))
1156           return;
1157         nextToken();
1158       }
1159     }
1160   }
1161   if (FormatTok->Tok.is(tok::l_brace)) {
1162     if (Style.BreakBeforeBraces == FormatStyle::BS_Linux ||
1163         Style.BreakBeforeBraces == FormatStyle::BS_Allman)
1164       addUnwrappedLine();
1165 
1166     parseBlock(/*MustBeDeclaration=*/true, /*Addlevel=*/true,
1167                /*MunchSemi=*/false);
1168   }
1169   // We fall through to parsing a structural element afterwards, so
1170   // class A {} n, m;
1171   // will end up in one unwrapped line.
1172 }
1173 
1174 void UnwrappedLineParser::parseObjCProtocolList() {
1175   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1176   do
1177     nextToken();
1178   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1179   nextToken(); // Skip '>'.
1180 }
1181 
1182 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1183   do {
1184     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1185       nextToken();
1186       addUnwrappedLine();
1187       break;
1188     }
1189     if (FormatTok->is(tok::l_brace)) {
1190       parseBlock(/*MustBeDeclaration=*/false);
1191       // In ObjC interfaces, nothing should be following the "}".
1192       addUnwrappedLine();
1193     } else {
1194       parseStructuralElement();
1195     }
1196   } while (!eof());
1197 }
1198 
1199 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1200   nextToken();
1201   nextToken(); // interface name
1202 
1203   // @interface can be followed by either a base class, or a category.
1204   if (FormatTok->Tok.is(tok::colon)) {
1205     nextToken();
1206     nextToken(); // base class name
1207   } else if (FormatTok->Tok.is(tok::l_paren))
1208     // Skip category, if present.
1209     parseParens();
1210 
1211   if (FormatTok->Tok.is(tok::less))
1212     parseObjCProtocolList();
1213 
1214   // If instance variables are present, keep the '{' on the first line too.
1215   if (FormatTok->Tok.is(tok::l_brace))
1216     parseBlock(/*MustBeDeclaration=*/true);
1217 
1218   // With instance variables, this puts '}' on its own line.  Without instance
1219   // variables, this ends the @interface line.
1220   addUnwrappedLine();
1221 
1222   parseObjCUntilAtEnd();
1223 }
1224 
1225 void UnwrappedLineParser::parseObjCProtocol() {
1226   nextToken();
1227   nextToken(); // protocol name
1228 
1229   if (FormatTok->Tok.is(tok::less))
1230     parseObjCProtocolList();
1231 
1232   // Check for protocol declaration.
1233   if (FormatTok->Tok.is(tok::semi)) {
1234     nextToken();
1235     return addUnwrappedLine();
1236   }
1237 
1238   addUnwrappedLine();
1239   parseObjCUntilAtEnd();
1240 }
1241 
1242 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1243                                                  StringRef Prefix = "") {
1244   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1245                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1246   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1247                                                     E = Line.Tokens.end();
1248        I != E; ++I) {
1249     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1250   }
1251   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1252                                                     E = Line.Tokens.end();
1253        I != E; ++I) {
1254     const UnwrappedLineNode &Node = *I;
1255     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1256              I = Node.Children.begin(),
1257              E = Node.Children.end();
1258          I != E; ++I) {
1259       printDebugInfo(*I, "\nChild: ");
1260     }
1261   }
1262   llvm::dbgs() << "\n";
1263 }
1264 
1265 void UnwrappedLineParser::addUnwrappedLine() {
1266   if (Line->Tokens.empty())
1267     return;
1268   DEBUG({
1269     if (CurrentLines == &Lines)
1270       printDebugInfo(*Line);
1271   });
1272   CurrentLines->push_back(*Line);
1273   Line->Tokens.clear();
1274   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1275     for (SmallVectorImpl<UnwrappedLine>::iterator
1276              I = PreprocessorDirectives.begin(),
1277              E = PreprocessorDirectives.end();
1278          I != E; ++I) {
1279       CurrentLines->push_back(*I);
1280     }
1281     PreprocessorDirectives.clear();
1282   }
1283 }
1284 
1285 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1286 
1287 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1288   bool JustComments = Line->Tokens.empty();
1289   for (SmallVectorImpl<FormatToken *>::const_iterator
1290            I = CommentsBeforeNextToken.begin(),
1291            E = CommentsBeforeNextToken.end();
1292        I != E; ++I) {
1293     if ((*I)->NewlinesBefore && JustComments) {
1294       addUnwrappedLine();
1295     }
1296     pushToken(*I);
1297   }
1298   if (NewlineBeforeNext && JustComments) {
1299     addUnwrappedLine();
1300   }
1301   CommentsBeforeNextToken.clear();
1302 }
1303 
1304 void UnwrappedLineParser::nextToken() {
1305   if (eof())
1306     return;
1307   flushComments(FormatTok->NewlinesBefore > 0);
1308   pushToken(FormatTok);
1309   readToken();
1310 }
1311 
1312 void UnwrappedLineParser::readToken() {
1313   bool CommentsInCurrentLine = true;
1314   do {
1315     FormatTok = Tokens->getNextToken();
1316     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1317            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1318       // If there is an unfinished unwrapped line, we flush the preprocessor
1319       // directives only after that unwrapped line was finished later.
1320       bool SwitchToPreprocessorLines =
1321           !Line->Tokens.empty() && CurrentLines == &Lines;
1322       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1323       // Comments stored before the preprocessor directive need to be output
1324       // before the preprocessor directive, at the same level as the
1325       // preprocessor directive, as we consider them to apply to the directive.
1326       flushComments(FormatTok->NewlinesBefore > 0);
1327       parsePPDirective();
1328     }
1329 
1330     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1331         !Line->InPPDirective) {
1332       continue;
1333     }
1334 
1335     if (!FormatTok->Tok.is(tok::comment))
1336       return;
1337     if (FormatTok->NewlinesBefore > 0 || FormatTok->IsFirst) {
1338       CommentsInCurrentLine = false;
1339     }
1340     if (CommentsInCurrentLine) {
1341       pushToken(FormatTok);
1342     } else {
1343       CommentsBeforeNextToken.push_back(FormatTok);
1344     }
1345   } while (!eof());
1346 }
1347 
1348 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1349   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1350   if (MustBreakBeforeNextToken) {
1351     Line->Tokens.back().Tok->MustBreakBefore = true;
1352     MustBreakBeforeNextToken = false;
1353   }
1354 }
1355 
1356 } // end namespace format
1357 } // end namespace clang
1358