xref: /sqlite-3.40.0/tool/stripccomments.c (revision b5e2e6fc)
1 /**
2    Strips C- and C++-style comments from stdin, sending the results to
3    stdout. It assumes that its input is legal C-like code, and does
4    only little error handling.
5 
6    It treats string literals as anything starting and ending with
7    matching double OR single quotes OR backticks (for use with
8    scripting languages which use those). It assumes that a quote
9    character within a string which uses the same quote type is escaped
10    by a backslash. It should not be used on any code which might
11    contain C/C++ comments inside heredocs, and similar constructs, as
12    it will strip those out.
13 
14    Usage: $0 [--keep-first|-k] < input > output
15 
16    The --keep-first (-k) flag tells it to retain the first comment in the
17    input stream (which is often a license or attribution block). It
18    may be given repeatedly, each one incrementing the number of
19    retained comments by one.
20 
21    License: Public Domain
22    Author: Stephan Beal ([email protected])
23 */
24 #include <stdio.h>
25 #include <assert.h>
26 #include <string.h>
27 
28 #if 1
29 #define MARKER(pfexp)                                                \
30   do{ printf("MARKER: %s:%d:\t",__FILE__,__LINE__);                  \
31     printf pfexp;                                                    \
32   } while(0)
33 #else
34 #define MARKER(exp) if(0) printf
35 #endif
36 
37 struct {
38   FILE * input;
39   FILE * output;
40   int rc;
41   int keepFirst;
42 } App = {
43   0/*input*/,
44   0/*output*/,
45   0/*rc*/,
46   0/*keepFirst*/
47 };
48 
do_it_all(void)49 void do_it_all(void){
50   enum states {
51     S_NONE = 0 /* not in comment */,
52     S_SLASH1 = 1 /* slash - possibly comment prefix */,
53     S_CPP = 2 /* in C++ comment */,
54     S_C = 3 /* in C comment */
55   };
56   int ch, prev = EOF;
57   FILE * out = App.output;
58   int const slash = '/';
59   int const star = '*';
60   int line = 1;
61   int col = 0;
62   enum states state = S_NONE /* current state */;
63   int elide = 0 /* true if currently eliding output */;
64   int state3Col = -99
65     /* huge kludge for odd corner case: */
66     /*/ <--- here. state3Col marks the source column in which a C-style
67       comment starts, so that it can tell if star-slash inside a
68       C-style comment is the end of the comment or is the weird corner
69       case marked at the start of _this_ comment block. */;
70   for( ; EOF != (ch = fgetc(App.input)); prev = ch,
71          ++col){
72     switch(state){
73       case S_NONE:
74         if('\''==ch || '"'==ch || '`'==ch){
75           /* Read string literal...
76              needed to properly catch comments in strings. */
77           int const quote = ch,
78             startLine = line, startCol = col;
79           int ch2, escaped = 0, endOfString = 0;
80           fputc(ch, out);
81           for( ++col; !endOfString && EOF != (ch2 = fgetc(App.input));
82                ++col ){
83             switch(ch2){
84               case '\\': escaped = !escaped;
85                 break;
86               case '`':
87               case '\'':
88               case '"':
89                 if(!escaped && quote == ch2) endOfString = 1;
90                 escaped = 0;
91                 break;
92               default:
93                 escaped = 0;
94                 break;
95             }
96             if('\n'==ch2){
97               ++line;
98               col = 0;
99             }
100             fputc(ch2, out);
101           }
102           if(EOF == ch2){
103             fprintf(stderr, "Unexpected EOF while reading %s literal "
104                     "on line %d column %d.\n",
105                     ('\''==ch) ? "char" : "string",
106                     startLine, startCol);
107             App.rc = 1;
108             return;
109           }
110           break;
111         }
112         else if(slash == ch){
113           /* MARKER(("state 0 ==> 1 @ %d:%d\n", line, col)); */
114           state = S_SLASH1;
115           break;
116         }
117         fputc(ch, out);
118         break;
119       case S_SLASH1: /* 1 slash */
120         /* MARKER(("SLASH1 @ %d:%d App.keepFirst=%d\n",
121            line, col, App.keepFirst)); */
122         switch(ch){
123           case '*':
124             /* Enter C comment */
125             if(App.keepFirst>0){
126               elide = 0;
127               --App.keepFirst;
128             }else{
129               elide = 1;
130             }
131             /*MARKER(("state 1 ==> 3 @ %d:%d\n", line, col));*/
132             state = S_C;
133             state3Col = col-1;
134             if(!elide){
135               fputc(prev, out);
136               fputc(ch, out);
137             }
138             break;
139           case '/':
140             /* Enter C++ comment */
141             if(App.keepFirst>0){
142               elide = 0;
143               --App.keepFirst;
144             }else{
145               elide = 1;
146             }
147             /*MARKER(("state 1 ==> 2 @ %d:%d\n", line, col));*/
148             state = S_CPP;
149             if(!elide){
150               fputc(prev, out);
151               fputc(ch, out);
152             }
153             break;
154           default:
155             /* It wasn't a comment after all. */
156             state = S_NONE;
157             if(!elide){
158               fputc(prev, out);
159               fputc(ch, out);
160             }
161         }
162         break;
163       case S_CPP: /* C++ comment */
164         if('\n' == ch){
165           /* MARKER(("state 2 ==> 0 @ %d:%d\n", line, col)); */
166           state = S_NONE;
167           elide = 0;
168         }
169         if(!elide){
170           fputc(ch, out);
171         }
172         break;
173       case S_C: /* C comment */
174         if(!elide){
175           fputc(ch, out);
176         }
177         if(slash == ch){
178           if(star == prev){
179             /* MARKER(("state 3 ==> 0 @ %d:%d\n", line, col)); */
180             /* Corner case which breaks this: */
181             /*/ <-- slash there */
182             /* That shows up twice in a piece of 3rd-party
183                code i use. */
184             /* And thus state3Col was introduced :/ */
185             if(col!=state3Col+2){
186               state = S_NONE;
187               elide = 0;
188               state3Col = -99;
189             }
190           }
191         }
192         break;
193       default:
194         assert(!"impossible!");
195         break;
196     }
197     if('\n' == ch){
198       ++line;
199       col = 0;
200       state3Col = -99;
201     }
202   }
203 }
204 
usage(char const * zAppName)205 static void usage(char const *zAppName){
206   fprintf(stderr, "Strips C- and C++-style comments from stdin and sends "
207           "the results to stdout.\n");
208   fprintf(stderr, "Usage: %s [--keep-first|-k] < input > output\n", zAppName);
209 }
210 
main(int argc,char const * const * argv)211 int main( int argc, char const * const * argv ){
212   int i;
213   for(i = 1; i < argc; ++i){
214     char const * zArg = argv[i];
215     while( '-'==*zArg ) ++zArg;
216     if( 0==strcmp(zArg,"k")
217         || 0==strcmp(zArg,"keep-first") ){
218       ++App.keepFirst;
219     }else{
220       usage(argv[0]);
221       return 1;
222     }
223   }
224   App.input = stdin;
225   App.output = stdout;
226   do_it_all();
227   return App.rc ? 1 : 0;
228 }
229