1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15 /// where it can be used to format real code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "UnwrappedLineParser.h"
20 #include "llvm/Support/raw_ostream.h"
21 
22 namespace clang {
23 namespace format {
24 
25 class ScopedMacroState : public FormatTokenSource {
26 public:
27   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
28                    FormatToken &ResetToken)
29       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
30         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource) {
31     TokenSource = this;
32     Line.Level = 0;
33     Line.InPPDirective = true;
34   }
35 
36   ~ScopedMacroState() {
37     TokenSource = PreviousTokenSource;
38     ResetToken = Token;
39     Line.InPPDirective = false;
40     Line.Level = PreviousLineLevel;
41   }
42 
43   virtual FormatToken getNextToken() {
44     // The \c UnwrappedLineParser guards against this by never calling
45     // \c getNextToken() after it has encountered the first eof token.
46     assert(!eof());
47     Token = PreviousTokenSource->getNextToken();
48     if (eof())
49       return createEOF();
50     return Token;
51   }
52 
53 private:
54   bool eof() {
55     return Token.NewlinesBefore > 0 && Token.HasUnescapedNewline;
56   }
57 
58   FormatToken createEOF() {
59     FormatToken FormatTok;
60     FormatTok.Tok.startToken();
61     FormatTok.Tok.setKind(tok::eof);
62     return FormatTok;
63   }
64 
65   UnwrappedLine &Line;
66   FormatTokenSource *&TokenSource;
67   FormatToken &ResetToken;
68   unsigned PreviousLineLevel;
69   FormatTokenSource *PreviousTokenSource;
70 
71   FormatToken Token;
72 };
73 
74 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
75                                          FormatTokenSource &Tokens,
76                                          UnwrappedLineConsumer &Callback)
77     : Style(Style), Tokens(&Tokens), Callback(Callback) {
78 }
79 
80 bool UnwrappedLineParser::parse() {
81   readToken();
82   return parseFile();
83 }
84 
85 bool UnwrappedLineParser::parseFile() {
86   bool Error = parseLevel(/*HasOpeningBrace=*/false);
87   // Make sure to format the remaining tokens.
88   addUnwrappedLine();
89   return Error;
90 }
91 
92 bool UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
93   bool Error = false;
94   do {
95     switch (FormatTok.Tok.getKind()) {
96     case tok::comment:
97       nextToken();
98       addUnwrappedLine();
99       break;
100     case tok::l_brace:
101       Error |= parseBlock();
102       addUnwrappedLine();
103       break;
104     case tok::r_brace:
105       if (HasOpeningBrace) {
106         return false;
107       } else {
108         // Stray '}' is an error.
109         Error = true;
110         nextToken();
111         addUnwrappedLine();
112       }
113       break;
114     default:
115       parseStructuralElement();
116       break;
117     }
118   } while (!eof());
119   return Error;
120 }
121 
122 bool UnwrappedLineParser::parseBlock(unsigned AddLevels) {
123   assert(FormatTok.Tok.is(tok::l_brace) && "'{' expected");
124   nextToken();
125 
126   addUnwrappedLine();
127 
128   Line.Level += AddLevels;
129   parseLevel(/*HasOpeningBrace=*/true);
130   Line.Level -= AddLevels;
131 
132   // FIXME: Add error handling.
133   if (!FormatTok.Tok.is(tok::r_brace))
134     return true;
135 
136   nextToken();
137   if (FormatTok.Tok.is(tok::semi))
138     nextToken();
139   return false;
140 }
141 
142 void UnwrappedLineParser::parsePPDirective() {
143   assert(FormatTok.Tok.is(tok::hash) && "'#' expected");
144   ScopedMacroState MacroState(Line, Tokens, FormatTok);
145   nextToken();
146 
147   if (FormatTok.Tok.getIdentifierInfo() == NULL) {
148     addUnwrappedLine();
149     return;
150   }
151 
152   switch (FormatTok.Tok.getIdentifierInfo()->getPPKeywordID()) {
153   case tok::pp_define:
154     parsePPDefine();
155     break;
156   default:
157     parsePPUnknown();
158     break;
159   }
160 }
161 
162 void UnwrappedLineParser::parsePPDefine() {
163   nextToken();
164 
165   if (FormatTok.Tok.getKind() != tok::identifier) {
166     parsePPUnknown();
167     return;
168   }
169   nextToken();
170   if (FormatTok.Tok.getKind() == tok::l_paren) {
171     parseParens();
172   }
173   addUnwrappedLine();
174   Line.Level = 1;
175 
176   // Errors during a preprocessor directive can only affect the layout of the
177   // preprocessor directive, and thus we ignore them. An alternative approach
178   // would be to use the same approach we use on the file level (no
179   // re-indentation if there was a structural error) within the macro
180   // definition.
181   parseFile();
182 }
183 
184 void UnwrappedLineParser::parsePPUnknown() {
185   do {
186     nextToken();
187   } while (!eof());
188   addUnwrappedLine();
189 }
190 
191 void UnwrappedLineParser::parseComments() {
192   // Consume leading line comments, e.g. for branches without compounds.
193   while (FormatTok.Tok.is(tok::comment)) {
194     nextToken();
195     addUnwrappedLine();
196   }
197 }
198 
199 void UnwrappedLineParser::parseStructuralElement() {
200   parseComments();
201 
202   int TokenNumber = 0;
203   switch (FormatTok.Tok.getKind()) {
204   case tok::kw_namespace:
205     parseNamespace();
206     return;
207   case tok::kw_inline:
208     nextToken();
209     TokenNumber++;
210     if (FormatTok.Tok.is(tok::kw_namespace)) {
211       parseNamespace();
212       return;
213     }
214     break;
215   case tok::kw_public:
216   case tok::kw_protected:
217   case tok::kw_private:
218     parseAccessSpecifier();
219     return;
220   case tok::kw_if:
221     parseIfThenElse();
222     return;
223   case tok::kw_for:
224   case tok::kw_while:
225     parseForOrWhileLoop();
226     return;
227   case tok::kw_do:
228     parseDoWhile();
229     return;
230   case tok::kw_switch:
231     parseSwitch();
232     return;
233   case tok::kw_default:
234     nextToken();
235     parseLabel();
236     return;
237   case tok::kw_case:
238     parseCaseLabel();
239     return;
240   default:
241     break;
242   }
243   do {
244     ++TokenNumber;
245     switch (FormatTok.Tok.getKind()) {
246     case tok::kw_enum:
247       parseEnum();
248       return;
249     case tok::semi:
250       nextToken();
251       addUnwrappedLine();
252       return;
253     case tok::l_paren:
254       parseParens();
255       break;
256     case tok::l_brace:
257       parseBlock();
258       addUnwrappedLine();
259       return;
260     case tok::identifier:
261       nextToken();
262       if (TokenNumber == 1 && FormatTok.Tok.is(tok::colon)) {
263         parseLabel();
264         return;
265       }
266       break;
267     case tok::equal:
268       nextToken();
269       // Skip initializers as they will be formatted by a later step.
270       if (FormatTok.Tok.is(tok::l_brace))
271         nextToken();
272       break;
273     default:
274       nextToken();
275       break;
276     }
277   } while (!eof());
278 }
279 
280 void UnwrappedLineParser::parseParens() {
281   assert(FormatTok.Tok.is(tok::l_paren) && "'(' expected.");
282   nextToken();
283   do {
284     switch (FormatTok.Tok.getKind()) {
285     case tok::l_paren:
286       parseParens();
287       break;
288     case tok::r_paren:
289       nextToken();
290       return;
291     default:
292       nextToken();
293       break;
294     }
295   } while (!eof());
296 }
297 
298 void UnwrappedLineParser::parseIfThenElse() {
299   assert(FormatTok.Tok.is(tok::kw_if) && "'if' expected");
300   nextToken();
301   parseParens();
302   bool NeedsUnwrappedLine = false;
303   if (FormatTok.Tok.is(tok::l_brace)) {
304     parseBlock();
305     NeedsUnwrappedLine = true;
306   } else {
307     addUnwrappedLine();
308     ++Line.Level;
309     parseStructuralElement();
310     --Line.Level;
311   }
312   if (FormatTok.Tok.is(tok::kw_else)) {
313     nextToken();
314     if (FormatTok.Tok.is(tok::l_brace)) {
315       parseBlock();
316       addUnwrappedLine();
317     } else if (FormatTok.Tok.is(tok::kw_if)) {
318       parseIfThenElse();
319     } else {
320       addUnwrappedLine();
321       ++Line.Level;
322       parseStructuralElement();
323       --Line.Level;
324     }
325   } else if (NeedsUnwrappedLine) {
326     addUnwrappedLine();
327   }
328 }
329 
330 void UnwrappedLineParser::parseNamespace() {
331   assert(FormatTok.Tok.is(tok::kw_namespace) && "'namespace' expected");
332   nextToken();
333   if (FormatTok.Tok.is(tok::identifier))
334     nextToken();
335   if (FormatTok.Tok.is(tok::l_brace)) {
336     parseBlock(0);
337     addUnwrappedLine();
338   }
339   // FIXME: Add error handling.
340 }
341 
342 void UnwrappedLineParser::parseForOrWhileLoop() {
343   assert((FormatTok.Tok.is(tok::kw_for) || FormatTok.Tok.is(tok::kw_while)) &&
344          "'for' or 'while' expected");
345   nextToken();
346   parseParens();
347   if (FormatTok.Tok.is(tok::l_brace)) {
348     parseBlock();
349     addUnwrappedLine();
350   } else {
351     addUnwrappedLine();
352     ++Line.Level;
353     parseStructuralElement();
354     --Line.Level;
355   }
356 }
357 
358 void UnwrappedLineParser::parseDoWhile() {
359   assert(FormatTok.Tok.is(tok::kw_do) && "'do' expected");
360   nextToken();
361   if (FormatTok.Tok.is(tok::l_brace)) {
362     parseBlock();
363   } else {
364     addUnwrappedLine();
365     ++Line.Level;
366     parseStructuralElement();
367     --Line.Level;
368   }
369 
370   // FIXME: Add error handling.
371   if (!FormatTok.Tok.is(tok::kw_while)) {
372     addUnwrappedLine();
373     return;
374   }
375 
376   nextToken();
377   parseStructuralElement();
378 }
379 
380 void UnwrappedLineParser::parseLabel() {
381   // FIXME: remove all asserts.
382   assert(FormatTok.Tok.is(tok::colon) && "':' expected");
383   nextToken();
384   unsigned OldLineLevel = Line.Level;
385   if (Line.Level > 0)
386     --Line.Level;
387   if (FormatTok.Tok.is(tok::l_brace)) {
388     parseBlock();
389   }
390   addUnwrappedLine();
391   Line.Level = OldLineLevel;
392 }
393 
394 void UnwrappedLineParser::parseCaseLabel() {
395   assert(FormatTok.Tok.is(tok::kw_case) && "'case' expected");
396   // FIXME: fix handling of complex expressions here.
397   do {
398     nextToken();
399   } while (!eof() && !FormatTok.Tok.is(tok::colon));
400   parseLabel();
401 }
402 
403 void UnwrappedLineParser::parseSwitch() {
404   assert(FormatTok.Tok.is(tok::kw_switch) && "'switch' expected");
405   nextToken();
406   parseParens();
407   if (FormatTok.Tok.is(tok::l_brace)) {
408     parseBlock(Style.IndentCaseLabels ? 2 : 1);
409     addUnwrappedLine();
410   } else {
411     addUnwrappedLine();
412     Line.Level += (Style.IndentCaseLabels ? 2 : 1);
413     parseStructuralElement();
414     Line.Level -= (Style.IndentCaseLabels ? 2 : 1);
415   }
416 }
417 
418 void UnwrappedLineParser::parseAccessSpecifier() {
419   nextToken();
420   // Otherwise, we don't know what it is, and we'd better keep the next token.
421   if (FormatTok.Tok.is(tok::colon))
422     nextToken();
423   addUnwrappedLine();
424 }
425 
426 void UnwrappedLineParser::parseEnum() {
427   bool HasContents = false;
428   do {
429     switch (FormatTok.Tok.getKind()) {
430     case tok::l_brace:
431       nextToken();
432       addUnwrappedLine();
433       ++Line.Level;
434       parseComments();
435       break;
436     case tok::l_paren:
437       parseParens();
438       break;
439     case tok::comma:
440       nextToken();
441       addUnwrappedLine();
442       parseComments();
443       break;
444     case tok::r_brace:
445       if (HasContents)
446         addUnwrappedLine();
447       --Line.Level;
448       nextToken();
449       break;
450     case tok::semi:
451       nextToken();
452       addUnwrappedLine();
453       return;
454     default:
455       HasContents = true;
456       nextToken();
457       break;
458     }
459   } while (!eof());
460 }
461 
462 void UnwrappedLineParser::addUnwrappedLine() {
463   // Consume trailing comments.
464   while (!eof() && FormatTok.NewlinesBefore == 0 &&
465          FormatTok.Tok.is(tok::comment)) {
466     nextToken();
467   }
468   Callback.consumeUnwrappedLine(Line);
469   Line.Tokens.clear();
470 }
471 
472 bool UnwrappedLineParser::eof() const {
473   return FormatTok.Tok.is(tok::eof);
474 }
475 
476 void UnwrappedLineParser::nextToken() {
477   if (eof())
478     return;
479   Line.Tokens.push_back(FormatTok);
480   readToken();
481 }
482 
483 void UnwrappedLineParser::readToken() {
484   FormatTok = Tokens->getNextToken();
485   while (!Line.InPPDirective && FormatTok.Tok.is(tok::hash) &&
486          ((FormatTok.NewlinesBefore > 0 && FormatTok.HasUnescapedNewline) ||
487           FormatTok.IsFirst)) {
488     // FIXME: This is incorrect - the correct way is to create a
489     // data structure that will construct the parts around the preprocessor
490     // directive as a structured \c UnwrappedLine.
491     addUnwrappedLine();
492     parsePPDirective();
493   }
494 }
495 
496 } // end namespace format
497 } // end namespace clang
498