1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15 /// where it can be used to format real code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "UnwrappedLineParser.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 namespace clang {
23 namespace format {
24 
25 class ScopedMacroState : public FormatTokenSource {
26 public:
27   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
28                    FormatToken &ResetToken)
29       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
30         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
31     TokenSource = this;
32     Line.Level = 0;
33     Line.InPPDirective = true;
34   }
35 
36   ~ScopedMacroState() {
37     TokenSource = PreviousTokenSource;
38     ResetToken = Token;
39     Line.InPPDirective = false;
40     Line.Level = PreviousLineLevel;
41   }
42 
43   virtual FormatToken getNextToken() {
44     // The \c UnwrappedLineParser guards against this by never calling
45     // \c getNextToken() after it has encountered the first eof token.
46     assert(!eof());
47     Token = PreviousTokenSource->getNextToken();
48     if (eof())
49       return createEOF();
50     return Token;
51   }
52 
53 private:
54   bool eof() {
55     return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline;
56   }
57 
58   FormatToken createEOF() {
59     FormatToken FormatTok;
60     FormatTok.Tok.startToken();
61     FormatTok.Tok.setKind(tok::eof);
62     return FormatTok;
63   }
64 
65   UnwrappedLine &Line;
66   FormatTokenSource *&TokenSource;
67   FormatToken &ResetToken;
68   unsigned PreviousLineLevel;
69   FormatTokenSource *PreviousTokenSource;
70 
71   FormatToken Token;
72 };
73 
74 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
75                                          FormatTokenSource &Tokens,
76                                          UnwrappedLineConsumer &Callback)
77     : RootTokenInitialized(false), Style(Style), Tokens(&Tokens),
78       Callback(Callback) {
79 }
80 
81 bool UnwrappedLineParser::parse() {
82   readToken();
83   return parseFile();
84 }
85 
86 bool UnwrappedLineParser::parseFile() {
87   bool Error = parseLevel(/*HasOpeningBrace=*/false);
88   // Make sure to format the remaining tokens.
89   addUnwrappedLine();
90   return Error;
91 }
92 
93 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
94   bool Error = false;
95   do {
96     switch (FormatTok.Tok.getKind()) {
97     case tok::comment:
98       nextToken();
99       addUnwrappedLine();
100       break;
101     case tok::l_brace:
102       Error |= parseBlock();
103       addUnwrappedLine();
104       break;
105     case tok::r_brace:
106       if (HasOpeningBrace) {
107         return false;
108       } else {
109         // Stray '}' is an error.
110         Error = true;
111         nextToken();
112         addUnwrappedLine();
113       }
114       break;
115     default:
116       parseStructuralElement();
117       break;
118     }
119   } while (!eof());
120   return Error;
121 }
122 
123 bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
124   assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
125   nextToken();
126 
127   addUnwrappedLine();
128 
129   Line.Level += AddLevels;
130   parseLevel(/*HasOpeningBrace=*/true);
131   Line.Level -= AddLevels;
132 
133   if (!FormatTok.Tok.is(tok::r_brace))
134     return true;
135 
136   nextToken();  // Munch the closing brace.
137   return false;
138 }
139 
140 void UnwrappedLineParser::parsePPDirective() {
141   assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
142   ScopedMacroState MacroState(Line, Tokens, FormatTok);
143   nextToken();
144 
145   if (FormatTok.Tok.getIdentifierInfo() == NULL) {
146     addUnwrappedLine();
147     return;
148   }
149 
150   switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
151   case tok::pp_define:
152     parsePPDefine();
153     break;
154   default:
155     parsePPUnknown();
156     break;
157   }
158 }
159 
160 void UnwrappedLineParser::parsePPDefine() {
161   nextToken();
162 
163   if (FormatTok.Tok.getKind() != tok::identifier) {
164     parsePPUnknown();
165     return;
166   }
167   nextToken();
168   if (FormatTok.Tok.getKind() == tok::l_paren) {
169     parseParens();
170   }
171   addUnwrappedLine();
172   Line.Level = 1;
173 
174   // Errors during a preprocessor directive can only affect the layout of the
175   // preprocessor directive, and thus we ignore them. An alternative approach
176   // would be to use the same approach we use on the file level (no
177   // re-indentation if there was a structural error) within the macro
178   // definition.
179   parseFile();
180 }
181 
182 void UnwrappedLineParser::parsePPUnknown() {
183   do {
184     nextToken();
185   } while (!eof());
186   addUnwrappedLine();
187 }
188 
189 void UnwrappedLineParser::parseComments() {
190   // Consume leading line comments, e.g. for branches without compounds.
191   while (FormatTok.Tok.is(tok::comment)) {
192     nextToken();
193     addUnwrappedLine();
194   }
195 }
196 
197 void UnwrappedLineParser::parseStructuralElement() {
198   parseComments();
199 
200   int TokenNumber = 0;
201   switch (FormatTok.Tok.getKind()) {
202   case tok::at:
203     nextToken();
204     switch (FormatTok.Tok.getObjCKeywordID()) {
205     case tok::objc_public:
206     case tok::objc_protected:
207     case tok::objc_package:
208     case tok::objc_private:
209       return parseAccessSpecifier();
210     default:
211       break;
212     }
213     break;
214   case tok::kw_namespace:
215     parseNamespace();
216     return;
217   case tok::kw_inline:
218     nextToken();
219     TokenNumber++;
220     if (FormatTok.Tok.is(tok::kw_namespace)) {
221       parseNamespace();
222       return;
223     }
224     break;
225   case tok::kw_public:
226   case tok::kw_protected:
227   case tok::kw_private:
228     parseAccessSpecifier();
229     return;
230   case tok::kw_if:
231     parseIfThenElse();
232     return;
233   case tok::kw_for:
234   case tok::kw_while:
235     parseForOrWhileLoop();
236     return;
237   case tok::kw_do:
238     parseDoWhile();
239     return;
240   case tok::kw_switch:
241     parseSwitch();
242     return;
243   case tok::kw_default:
244     nextToken();
245     parseLabel();
246     return;
247   case tok::kw_case:
248     parseCaseLabel();
249     return;
250   default:
251     break;
252   }
253   do {
254     ++TokenNumber;
255     switch (FormatTok.Tok.getKind()) {
256     case tok::kw_enum:
257       parseEnum();
258       return;
259     case tok::kw_struct:  // fallthrough
260     case tok::kw_class:
261       parseStructOrClass();
262       return;
263     case tok::semi:
264       nextToken();
265       addUnwrappedLine();
266       return;
267     case tok::l_paren:
268       parseParens();
269       break;
270     case tok::l_brace:
271       parseBlock();
272       addUnwrappedLine();
273       return;
274     case tok::identifier:
275       nextToken();
276       if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
277         parseLabel();
278         return;
279       }
280       break;
281     case tok::equal:
282       nextToken();
283       // Skip initializers as they will be formatted by a later step.
284       if (FormatTok.Tok.is(tok::l_brace))
285         nextToken();
286       break;
287     default:
288       nextToken();
289       break;
290     }
291   } while (!eof());
292 }
293 
294 void UnwrappedLineParser::parseParens() {
295   assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
296   nextToken();
297   do {
298     switch (FormatTok.Tok.getKind()) {
299     case tok::l_paren:
300       parseParens();
301       break;
302     case tok::r_paren:
303       nextToken();
304       return;
305     default:
306       nextToken();
307       break;
308     }
309   } while (!eof());
310 }
311 
312 void UnwrappedLineParser::parseIfThenElse() {
313   assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
314   nextToken();
315   parseParens();
316   bool NeedsUnwrappedLine = false;
317   if (FormatTok.Tok.is(tok::l_brace)) {
318     parseBlock();
319     NeedsUnwrappedLine = true;
320   } else {
321     addUnwrappedLine();
322     ++Line.Level;
323     parseStructuralElement();
324     --Line.Level;
325   }
326   if (FormatTok.Tok.is(tok::kw_else)) {
327     nextToken();
328     if (FormatTok.Tok.is(tok::l_brace)) {
329       parseBlock();
330       addUnwrappedLine();
331     } else if (FormatTok.Tok.is(tok::kw_if)) {
332       parseIfThenElse();
333     } else {
334       addUnwrappedLine();
335       ++Line.Level;
336       parseStructuralElement();
337       --Line.Level;
338     }
339   } else if (NeedsUnwrappedLine) {
340     addUnwrappedLine();
341   }
342 }
343 
344 void UnwrappedLineParser::parseNamespace() {
345   assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
346   nextToken();
347   if (FormatTok.Tok.is(tok::identifier))
348     nextToken();
349   if (FormatTok.Tok.is(tok::l_brace)) {
350     parseBlock(0);
351     addUnwrappedLine();
352   }
353   // FIXME: Add error handling.
354 }
355 
356 void UnwrappedLineParser::parseForOrWhileLoop() {
357   assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
358          "'for' or 'while' expected");
359   nextToken();
360   parseParens();
361   if (FormatTok.Tok.is(tok::l_brace)) {
362     parseBlock();
363     addUnwrappedLine();
364   } else {
365     addUnwrappedLine();
366     ++Line.Level;
367     parseStructuralElement();
368     --Line.Level;
369   }
370 }
371 
372 void UnwrappedLineParser::parseDoWhile() {
373   assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
374   nextToken();
375   if (FormatTok.Tok.is(tok::l_brace)) {
376     parseBlock();
377   } else {
378     addUnwrappedLine();
379     ++Line.Level;
380     parseStructuralElement();
381     --Line.Level;
382   }
383 
384   // FIXME: Add error handling.
385   if (!FormatTok.Tok.is(tok::kw_while)) {
386     addUnwrappedLine();
387     return;
388   }
389 
390   nextToken();
391   parseStructuralElement();
392 }
393 
394 void UnwrappedLineParser::parseLabel() {
395   // FIXME: remove all asserts.
396   assert(FormatTok.Tok.is(tok::colon) && "':' expected");
397   nextToken();
398   unsigned OldLineLevel = Line.Level;
399   if (Line.Level > 0)
400     --Line.Level;
401   if (FormatTok.Tok.is(tok::l_brace)) {
402     parseBlock();
403   }
404   addUnwrappedLine();
405   Line.Level = OldLineLevel;
406 }
407 
408 void UnwrappedLineParser::parseCaseLabel() {
409   assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
410   // FIXME: fix handling of complex expressions here.
411   do {
412     nextToken();
413   } while (!eof() && !FormatTok.Tok.is(tok::colon));
414   parseLabel();
415 }
416 
417 void UnwrappedLineParser::parseSwitch() {
418   assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
419   nextToken();
420   parseParens();
421   if (FormatTok.Tok.is(tok::l_brace)) {
422     parseBlock(Style.IndentCaseLabels ? 2 : 1);
423     addUnwrappedLine();
424   } else {
425     addUnwrappedLine();
426     Line.Level += (Style.IndentCaseLabels ? 2 : 1);
427     parseStructuralElement();
428     Line.Level -= (Style.IndentCaseLabels ? 2 : 1);
429   }
430 }
431 
432 void UnwrappedLineParser::parseAccessSpecifier() {
433   nextToken();
434   // Otherwise, we don't know what it is, and we'd better keep the next token.
435   if (FormatTok.Tok.is(tok::colon))
436     nextToken();
437   addUnwrappedLine();
438 }
439 
440 void UnwrappedLineParser::parseEnum() {
441   bool HasContents = false;
442   do {
443     switch (FormatTok.Tok.getKind()) {
444     case tok::l_brace:
445       nextToken();
446       addUnwrappedLine();
447       ++Line.Level;
448       parseComments();
449       break;
450     case tok::l_paren:
451       parseParens();
452       break;
453     case tok::comma:
454       nextToken();
455       addUnwrappedLine();
456       parseComments();
457       break;
458     case tok::r_brace:
459       if (HasContents)
460         addUnwrappedLine();
461       --Line.Level;
462       nextToken();
463       break;
464     case tok::semi:
465       nextToken();
466       addUnwrappedLine();
467       return;
468     default:
469       HasContents = true;
470       nextToken();
471       break;
472     }
473   } while (!eof());
474 }
475 
476 void UnwrappedLineParser::parseStructOrClass() {
477   nextToken();
478   do {
479     switch (FormatTok.Tok.getKind()) {
480     case tok::l_brace:
481       // FIXME: Think about how to resolve the error handling here.
482       parseBlock();
483       parseStructuralElement();
484       return;
485     case tok::semi:
486       nextToken();
487       addUnwrappedLine();
488       return;
489     default:
490       nextToken();
491       break;
492     }
493   } while (!eof());
494 }
495 
496 void UnwrappedLineParser::addUnwrappedLine() {
497   if (!RootTokenInitialized)
498     return;
499   // Consume trailing comments.
500   while (!eof() && FormatTok.NewlinesBefore == 0 &&
501          FormatTok.Tok.is(tok::comment)) {
502     nextToken();
503   }
504   Callback.consumeUnwrappedLine(Line);
505   RootTokenInitialized = false;
506 }
507 
508 bool UnwrappedLineParser::eof() const {
509   return FormatTok.Tok.is(tok::eof);
510 }
511 
512 void UnwrappedLineParser::nextToken() {
513   if (eof())
514     return;
515   if (RootTokenInitialized) {
516     LastInCurrentLine->Children.push_back(FormatTok);
517     LastInCurrentLine = &LastInCurrentLine->Children.back();
518   } else {
519     Line.RootToken = FormatTok;
520     RootTokenInitialized = true;
521     LastInCurrentLine = &Line.RootToken;
522   }
523   readToken();
524 }
525 
526 void UnwrappedLineParser::readToken() {
527   FormatTok = Tokens->getNextToken();
528   while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) &&
529          ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
530           FormatTok.IsFirst)) {
531     // FIXME: This is incorrect - the correct way is to create a
532     // data structure that will construct the parts around the preprocessor
533     // directive as a structured \c UnwrappedLine.
534     addUnwrappedLine();
535     parsePPDirective();
536   }
537 }
538 
539 } // end namespace format
540 } // end namespace clang
541