1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15 /// where it can be used to format real code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "UnwrappedLineParser.h"
20 #include "clang/Basic/Diagnostic.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 // Uncomment to get debug output from the UnwrappedLineParser.
24 // Use in combination with --gtest_filter=*TestName* to limit the output to a
25 // single test.
26 // #define UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT
27 
28 namespace clang {
29 namespace format {
30 
31 class ScopedMacroState : public FormatTokenSource {
32 public:
33   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
34                    FormatToken &ResetToken)
35       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
36         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
37     TokenSource = this;
38     Line.Level = 0;
39     Line.InPPDirective = true;
40   }
41 
42   ~ScopedMacroState() {
43     TokenSource = PreviousTokenSource;
44     ResetToken = Token;
45     Line.InPPDirective = false;
46     Line.Level = PreviousLineLevel;
47   }
48 
49   virtual FormatToken getNextToken() {
50     // The \c UnwrappedLineParser guards against this by never calling
51     // \c getNextToken() after it has encountered the first eof token.
52     assert(!eof());
53     Token = PreviousTokenSource->getNextToken();
54     if (eof())
55       return createEOF();
56     return Token;
57   }
58 
59 private:
60   bool eof() {
61     return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline;
62   }
63 
64   FormatToken createEOF() {
65     FormatToken FormatTok;
66     FormatTok.Tok.startToken();
67     FormatTok.Tok.setKind(tok::eof);
68     return FormatTok;
69   }
70 
71   UnwrappedLine &Line;
72   FormatTokenSource *&TokenSource;
73   FormatToken &ResetToken;
74   unsigned PreviousLineLevel;
75   FormatTokenSource *PreviousTokenSource;
76 
77   FormatToken Token;
78 };
79 
80 class ScopedLineState {
81 public:
82   ScopedLineState(UnwrappedLineParser &Parser) : Parser(Parser) {
83     PreBlockLine = Parser.Line.take();
84     Parser.Line.reset(new UnwrappedLine(*PreBlockLine));
85     assert(Parser.LastInCurrentLine == NULL ||
86            Parser.LastInCurrentLine->Children.empty());
87     PreBlockLastToken = Parser.LastInCurrentLine;
88     PreBlockRootTokenInitialized = Parser.RootTokenInitialized;
89     Parser.RootTokenInitialized = false;
90     Parser.LastInCurrentLine = NULL;
91   }
92 
93   ~ScopedLineState() {
94     if (Parser.RootTokenInitialized) {
95       Parser.addUnwrappedLine();
96     }
97     assert(!Parser.RootTokenInitialized);
98     Parser.Line.reset(PreBlockLine);
99     Parser.RootTokenInitialized = PreBlockRootTokenInitialized;
100     Parser.LastInCurrentLine = PreBlockLastToken;
101     assert(Parser.LastInCurrentLine == NULL ||
102            Parser.LastInCurrentLine->Children.empty());
103     Parser.MustBreakBeforeNextToken = true;
104   }
105 
106 private:
107   UnwrappedLineParser &Parser;
108 
109   UnwrappedLine *PreBlockLine;
110   FormatToken* PreBlockLastToken;
111   bool PreBlockRootTokenInitialized;
112 };
113 
114 UnwrappedLineParser::UnwrappedLineParser(
115     clang::DiagnosticsEngine &Diag, const FormatStyle &Style,
116     FormatTokenSource &Tokens, UnwrappedLineConsumer &Callback)
117     : Line(new UnwrappedLine), RootTokenInitialized(false),
118       LastInCurrentLine(NULL), MustBreakBeforeNextToken(false), Diag(Diag),
119       Style(Style), Tokens(&Tokens), Callback(Callback) {
120 }
121 
122 bool UnwrappedLineParser::parse() {
123 #ifdef UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT
124   llvm::errs() << "----\n";
125 #endif
126   readToken();
127   return parseFile();
128 }
129 
130 bool UnwrappedLineParser::parseFile() {
131   bool Error = parseLevel(/*HasOpeningBrace=*/false);
132   // Make sure to format the remaining tokens.
133   addUnwrappedLine();
134   return Error;
135 }
136 
137 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
138   bool Error = false;
139   do {
140     switch (FormatTok.Tok.getKind()) {
141     case tok::comment:
142       nextToken();
143       addUnwrappedLine();
144       break;
145     case tok::l_brace:
146       Error |= parseBlock();
147       addUnwrappedLine();
148       break;
149     case tok::r_brace:
150       if (HasOpeningBrace) {
151         return false;
152       } else {
153         Diag.Report(FormatTok.Tok.getLocation(),
154                     Diag.getCustomDiagID(clang::DiagnosticsEngine::Error,
155                                          "unexpected '}'"));
156         Error = true;
157         nextToken();
158         addUnwrappedLine();
159       }
160       break;
161     default:
162       parseStructuralElement();
163       break;
164     }
165   } while (!eof());
166   return Error;
167 }
168 
169 bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
170   assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
171   nextToken();
172 
173   if (!FormatTok.Tok.is(tok::r_brace)) {
174     addUnwrappedLine();
175 
176     Line->Level += AddLevels;
177     parseLevel(/*HasOpeningBrace=*/true);
178     Line->Level -= AddLevels;
179 
180     if (!FormatTok.Tok.is(tok::r_brace))
181       return true;
182 
183   }
184   nextToken();  // Munch the closing brace.
185   return false;
186 }
187 
188 void UnwrappedLineParser::parsePPDirective() {
189   assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
190   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
191   nextToken();
192 
193   if (FormatTok.Tok.getIdentifierInfo() == NULL) {
194     addUnwrappedLine();
195     return;
196   }
197 
198   switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
199   case tok::pp_define:
200     parsePPDefine();
201     break;
202   default:
203     parsePPUnknown();
204     break;
205   }
206 }
207 
208 void UnwrappedLineParser::parsePPDefine() {
209   nextToken();
210 
211   if (FormatTok.Tok.getKind() != tok::identifier) {
212     parsePPUnknown();
213     return;
214   }
215   nextToken();
216   if (FormatTok.Tok.getKind() == tok::l_paren) {
217     parseParens();
218   }
219   addUnwrappedLine();
220   Line->Level = 1;
221 
222   // Errors during a preprocessor directive can only affect the layout of the
223   // preprocessor directive, and thus we ignore them. An alternative approach
224   // would be to use the same approach we use on the file level (no
225   // re-indentation if there was a structural error) within the macro
226   // definition.
227   parseFile();
228 }
229 
230 void UnwrappedLineParser::parsePPUnknown() {
231   do {
232     nextToken();
233   } while (!eof());
234   addUnwrappedLine();
235 }
236 
237 void UnwrappedLineParser::parseComments() {
238   // Consume leading line comments, e.g. for branches without compounds.
239   while (FormatTok.Tok.is(tok::comment)) {
240     nextToken();
241     addUnwrappedLine();
242   }
243 }
244 
245 void UnwrappedLineParser::parseStructuralElement() {
246   assert(!FormatTok.Tok.is(tok::l_brace));
247   parseComments();
248 
249   int TokenNumber = 0;
250   switch (FormatTok.Tok.getKind()) {
251   case tok::at:
252     nextToken();
253     switch (FormatTok.Tok.getObjCKeywordID()) {
254     case tok::objc_public:
255     case tok::objc_protected:
256     case tok::objc_package:
257     case tok::objc_private:
258       return parseAccessSpecifier();
259     case tok::objc_interface:
260     case tok::objc_implementation:
261       return parseObjCInterfaceOrImplementation();
262     case tok::objc_protocol:
263       return parseObjCProtocol();
264     case tok::objc_end:
265       return; // Handled by the caller.
266     case tok::objc_optional:
267     case tok::objc_required:
268       nextToken();
269       addUnwrappedLine();
270       return;
271     default:
272       break;
273     }
274     break;
275   case tok::kw_namespace:
276     parseNamespace();
277     return;
278   case tok::kw_inline:
279     nextToken();
280     TokenNumber++;
281     if (FormatTok.Tok.is(tok::kw_namespace)) {
282       parseNamespace();
283       return;
284     }
285     break;
286   case tok::kw_public:
287   case tok::kw_protected:
288   case tok::kw_private:
289     parseAccessSpecifier();
290     return;
291   case tok::kw_if:
292     parseIfThenElse();
293     return;
294   case tok::kw_for:
295   case tok::kw_while:
296     parseForOrWhileLoop();
297     return;
298   case tok::kw_do:
299     parseDoWhile();
300     return;
301   case tok::kw_switch:
302     parseSwitch();
303     return;
304   case tok::kw_default:
305     nextToken();
306     parseLabel();
307     return;
308   case tok::kw_case:
309     parseCaseLabel();
310     return;
311   default:
312     break;
313   }
314   do {
315     ++TokenNumber;
316     switch (FormatTok.Tok.getKind()) {
317     case tok::kw_enum:
318       parseEnum();
319       return;
320     case tok::kw_struct: // fallthrough
321     case tok::kw_union:  // fallthrough
322     case tok::kw_class:
323       parseStructClassOrBracedList();
324       return;
325     case tok::semi:
326       nextToken();
327       addUnwrappedLine();
328       return;
329     case tok::l_paren:
330       parseParens();
331       break;
332     case tok::l_brace:
333       // A block outside of parentheses must be the last part of a
334       // structural element.
335       // FIXME: Figure out cases where this is not true, and add projections for
336       // them (the one we know is missing are lambdas).
337       parseBlock();
338       addUnwrappedLine();
339       return;
340     case tok::identifier:
341       nextToken();
342       if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
343         parseLabel();
344         return;
345       }
346       break;
347     case tok::equal:
348       nextToken();
349       if (FormatTok.Tok.is(tok::l_brace)) {
350         parseBracedList();
351       }
352       break;
353     default:
354       nextToken();
355       break;
356     }
357   } while (!eof());
358 }
359 
360 void UnwrappedLineParser::parseBracedList() {
361   nextToken();
362 
363   do {
364     switch (FormatTok.Tok.getKind()) {
365     case tok::l_brace:
366       parseBracedList();
367       break;
368     case tok::r_brace:
369       nextToken();
370       return;
371     default:
372       nextToken();
373       break;
374     }
375   } while (!eof());
376 }
377 
378 void UnwrappedLineParser::parseParens() {
379   assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
380   nextToken();
381   do {
382     switch (FormatTok.Tok.getKind()) {
383     case tok::l_paren:
384       parseParens();
385       break;
386     case tok::r_paren:
387       nextToken();
388       return;
389     case tok::l_brace:
390       {
391         nextToken();
392         ScopedLineState LineState(*this);
393         Line->Level += 1;
394         parseLevel(/*HasOpeningBrace=*/true);
395         Line->Level -= 1;
396       }
397       break;
398     default:
399       nextToken();
400       break;
401     }
402   } while (!eof());
403 }
404 
405 void UnwrappedLineParser::parseIfThenElse() {
406   assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
407   nextToken();
408   if (FormatTok.Tok.is(tok::l_paren))
409     parseParens();
410   bool NeedsUnwrappedLine = false;
411   if (FormatTok.Tok.is(tok::l_brace)) {
412     parseBlock();
413     NeedsUnwrappedLine = true;
414   } else {
415     addUnwrappedLine();
416     ++Line->Level;
417     parseStructuralElement();
418     --Line->Level;
419   }
420   if (FormatTok.Tok.is(tok::kw_else)) {
421     nextToken();
422     if (FormatTok.Tok.is(tok::l_brace)) {
423       parseBlock();
424       addUnwrappedLine();
425     } else if (FormatTok.Tok.is(tok::kw_if)) {
426       parseIfThenElse();
427     } else {
428       addUnwrappedLine();
429       ++Line->Level;
430       parseStructuralElement();
431       --Line->Level;
432     }
433   } else if (NeedsUnwrappedLine) {
434     addUnwrappedLine();
435   }
436 }
437 
438 void UnwrappedLineParser::parseNamespace() {
439   assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
440   nextToken();
441   if (FormatTok.Tok.is(tok::identifier))
442     nextToken();
443   if (FormatTok.Tok.is(tok::l_brace)) {
444     parseBlock(0);
445     addUnwrappedLine();
446   }
447   // FIXME: Add error handling.
448 }
449 
450 void UnwrappedLineParser::parseForOrWhileLoop() {
451   assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
452          "'for' or 'while' expected");
453   nextToken();
454   if (FormatTok.Tok.is(tok::l_paren))
455     parseParens();
456   if (FormatTok.Tok.is(tok::l_brace)) {
457     parseBlock();
458     addUnwrappedLine();
459   } else {
460     addUnwrappedLine();
461     ++Line->Level;
462     parseStructuralElement();
463     --Line->Level;
464   }
465 }
466 
467 void UnwrappedLineParser::parseDoWhile() {
468   assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
469   nextToken();
470   if (FormatTok.Tok.is(tok::l_brace)) {
471     parseBlock();
472   } else {
473     addUnwrappedLine();
474     ++Line->Level;
475     parseStructuralElement();
476     --Line->Level;
477   }
478 
479   // FIXME: Add error handling.
480   if (!FormatTok.Tok.is(tok::kw_while)) {
481     addUnwrappedLine();
482     return;
483   }
484 
485   nextToken();
486   parseStructuralElement();
487 }
488 
489 void UnwrappedLineParser::parseLabel() {
490   // FIXME: remove all asserts.
491   assert(FormatTok.Tok.is(tok::colon) && "':' expected");
492   nextToken();
493   unsigned OldLineLevel = Line->Level;
494   if (Line->Level > 0)
495     --Line->Level;
496   if (FormatTok.Tok.is(tok::l_brace)) {
497     parseBlock();
498   }
499   addUnwrappedLine();
500   Line->Level = OldLineLevel;
501 }
502 
503 void UnwrappedLineParser::parseCaseLabel() {
504   assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
505   // FIXME: fix handling of complex expressions here.
506   do {
507     nextToken();
508   } while (!eof() && !FormatTok.Tok.is(tok::colon));
509   parseLabel();
510 }
511 
512 void UnwrappedLineParser::parseSwitch() {
513   assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
514   nextToken();
515   if (FormatTok.Tok.is(tok::l_paren))
516     parseParens();
517   if (FormatTok.Tok.is(tok::l_brace)) {
518     parseBlock(Style.IndentCaseLabels ? 2 : 1);
519     addUnwrappedLine();
520   } else {
521     addUnwrappedLine();
522     Line->Level += (Style.IndentCaseLabels ? 2 : 1);
523     parseStructuralElement();
524     Line->Level -= (Style.IndentCaseLabels ? 2 : 1);
525   }
526 }
527 
528 void UnwrappedLineParser::parseAccessSpecifier() {
529   nextToken();
530   // Otherwise, we don't know what it is, and we'd better keep the next token.
531   if (FormatTok.Tok.is(tok::colon))
532     nextToken();
533   addUnwrappedLine();
534 }
535 
536 void UnwrappedLineParser::parseEnum() {
537   bool HasContents = false;
538   do {
539     switch (FormatTok.Tok.getKind()) {
540     case tok::l_brace:
541       nextToken();
542       addUnwrappedLine();
543       ++Line->Level;
544       parseComments();
545       break;
546     case tok::l_paren:
547       parseParens();
548       break;
549     case tok::comma:
550       nextToken();
551       addUnwrappedLine();
552       parseComments();
553       break;
554     case tok::r_brace:
555       if (HasContents)
556         addUnwrappedLine();
557       --Line->Level;
558       nextToken();
559       break;
560     case tok::semi:
561       nextToken();
562       addUnwrappedLine();
563       return;
564     default:
565       HasContents = true;
566       nextToken();
567       break;
568     }
569   } while (!eof());
570 }
571 
572 void UnwrappedLineParser::parseStructClassOrBracedList() {
573   nextToken();
574   do {
575     switch (FormatTok.Tok.getKind()) {
576     case tok::l_brace:
577       // FIXME: Think about how to resolve the error handling here.
578       parseBlock();
579       parseStructuralElement();
580       return;
581     case tok::semi:
582       nextToken();
583       addUnwrappedLine();
584       return;
585     case tok::equal:
586       nextToken();
587       if (FormatTok.Tok.is(tok::l_brace)) {
588         parseBracedList();
589       }
590       break;
591     default:
592       nextToken();
593       break;
594     }
595   } while (!eof());
596 }
597 
598 void UnwrappedLineParser::parseObjCProtocolList() {
599   assert(FormatTok.Tok.is(tok::less) && "'<' expected.");
600   do
601     nextToken();
602   while (!eof() && FormatTok.Tok.isNot(tok::greater));
603   nextToken(); // Skip '>'.
604 }
605 
606 void UnwrappedLineParser::parseObjCUntilAtEnd() {
607   do {
608     if (FormatTok.Tok.isObjCAtKeyword(tok::objc_end)) {
609       nextToken();
610       addUnwrappedLine();
611       break;
612     }
613     parseStructuralElement();
614   } while (!eof());
615 }
616 
617 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
618   nextToken();
619   nextToken();  // interface name
620 
621   // @interface can be followed by either a base class, or a category.
622   if (FormatTok.Tok.is(tok::colon)) {
623     nextToken();
624     nextToken();  // base class name
625   } else if (FormatTok.Tok.is(tok::l_paren))
626     // Skip category, if present.
627     parseParens();
628 
629   if (FormatTok.Tok.is(tok::less))
630     parseObjCProtocolList();
631 
632   // If instance variables are present, keep the '{' on the first line too.
633   if (FormatTok.Tok.is(tok::l_brace))
634     parseBlock();
635 
636   // With instance variables, this puts '}' on its own line.  Without instance
637   // variables, this ends the @interface line.
638   addUnwrappedLine();
639 
640   parseObjCUntilAtEnd();
641 }
642 
643 void UnwrappedLineParser::parseObjCProtocol() {
644   nextToken();
645   nextToken();  // protocol name
646 
647   if (FormatTok.Tok.is(tok::less))
648     parseObjCProtocolList();
649 
650   // Check for protocol declaration.
651   if (FormatTok.Tok.is(tok::semi)) {
652     nextToken();
653     return addUnwrappedLine();
654   }
655 
656   addUnwrappedLine();
657   parseObjCUntilAtEnd();
658 }
659 
660 void UnwrappedLineParser::addUnwrappedLine() {
661   if (!RootTokenInitialized)
662     return;
663   // Consume trailing comments.
664   while (!eof() && FormatTok.NewlinesBefore == 0 &&
665          FormatTok.Tok.is(tok::comment)) {
666     nextToken();
667   }
668 #ifdef UNWRAPPED_LINE_PARSER_DEBUG_OUTPUT
669   FormatToken* NextToken = &Line->RootToken;
670   llvm::errs() << "Line: ";
671   while (NextToken) {
672     llvm::errs() << NextToken->Tok.getName() << " ";
673     NextToken = NextToken->Children.empty() ? NULL : &NextToken->Children[0];
674   }
675   llvm::errs() << "\n";
676 #endif
677   Callback.consumeUnwrappedLine(*Line);
678   RootTokenInitialized = false;
679   LastInCurrentLine = NULL;
680 }
681 
682 bool UnwrappedLineParser::eof() const {
683   return FormatTok.Tok.is(tok::eof);
684 }
685 
686 void UnwrappedLineParser::nextToken() {
687   if (eof())
688     return;
689   if (RootTokenInitialized) {
690     assert(LastInCurrentLine->Children.empty());
691     LastInCurrentLine->Children.push_back(FormatTok);
692     LastInCurrentLine = &LastInCurrentLine->Children.back();
693   } else {
694     Line->RootToken = FormatTok;
695     RootTokenInitialized = true;
696     LastInCurrentLine = &Line->RootToken;
697   }
698   if (MustBreakBeforeNextToken) {
699     LastInCurrentLine->MustBreakBefore = true;
700     MustBreakBeforeNextToken = false;
701   }
702   readToken();
703 }
704 
705 void UnwrappedLineParser::readToken() {
706   FormatTok = Tokens->getNextToken();
707   while (!Line->InPPDirective && FormatTok.Tok.is(tok::hash) &&
708          ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
709           FormatTok.IsFirst)) {
710     ScopedLineState BlockState(*this);
711     parsePPDirective();
712   }
713 }
714 
715 } // end namespace format
716 } // end namespace clang
717