Add tests for the new code on this branch.
[sqlite.git] / tool / stripccomments.c
blob53933c0138fdfcabd8ea895658a2c268eb55c01f
1 /**
2 Strips C- and C++-style comments from stdin, sending the results to
3 stdout. It assumes that its input is legal C-like code, and does
4 only little error handling.
6 It treats string literals as anything starting and ending with
7 matching double OR single quotes OR backticks (for use with
8 scripting languages which use those). It assumes that a quote
9 character within a string which uses the same quote type is escaped
10 by a backslash. It should not be used on any code which might
11 contain C/C++ comments inside heredocs, and similar constructs, as
12 it will strip those out.
14 Usage: $0 [--keep-first|-k] < input > output
16 The --keep-first (-k) flag tells it to retain the first comment in the
17 input stream (which is often a license or attribution block). It
18 may be given repeatedly, each one incrementing the number of
19 retained comments by one.
21 License: Public Domain
22 Author: Stephan Beal (stephan@wanderinghorse.net)
24 #include <stdio.h>
25 #include <assert.h>
26 #include <string.h>
28 #if 1
29 #define MARKER(pfexp) \
30 do{ printf("MARKER: %s:%d:\t",__FILE__,__LINE__); \
31 printf pfexp; \
32 } while(0)
33 #else
34 #define MARKER(exp) if(0) printf
35 #endif
37 struct {
38 FILE * input;
39 FILE * output;
40 int rc;
41 int keepFirst;
42 } App = {
43 0/*input*/,
44 0/*output*/,
45 0/*rc*/,
46 0/*keepFirst*/
49 void do_it_all(void){
50 enum states {
51 S_NONE = 0 /* not in comment */,
52 S_SLASH1 = 1 /* slash - possibly comment prefix */,
53 S_CPP = 2 /* in C++ comment */,
54 S_C = 3 /* in C comment */
56 int ch, prev = EOF;
57 FILE * out = App.output;
58 int const slash = '/';
59 int const star = '*';
60 int line = 1;
61 int col = 0;
62 enum states state = S_NONE /* current state */;
63 int elide = 0 /* true if currently eliding output */;
64 int state3Col = -99
65 /* huge kludge for odd corner case: */
66 /*/ <--- here. state3Col marks the source column in which a C-style
67 comment starts, so that it can tell if star-slash inside a
68 C-style comment is the end of the comment or is the weird corner
69 case marked at the start of _this_ comment block. */;
70 for( ; EOF != (ch = fgetc(App.input)); prev = ch,
71 ++col){
72 switch(state){
73 case S_NONE:
74 if('\''==ch || '"'==ch || '`'==ch){
75 /* Read string literal...
76 needed to properly catch comments in strings. */
77 int const quote = ch,
78 startLine = line, startCol = col;
79 int ch2, escaped = 0, endOfString = 0;
80 fputc(ch, out);
81 for( ++col; !endOfString && EOF != (ch2 = fgetc(App.input));
82 ++col ){
83 switch(ch2){
84 case '\\': escaped = !escaped;
85 break;
86 case '`':
87 case '\'':
88 case '"':
89 if(!escaped && quote == ch2) endOfString = 1;
90 escaped = 0;
91 break;
92 default:
93 escaped = 0;
94 break;
96 if('\n'==ch2){
97 ++line;
98 col = 0;
100 fputc(ch2, out);
102 if(EOF == ch2){
103 fprintf(stderr, "Unexpected EOF while reading %s literal "
104 "on line %d column %d.\n",
105 ('\''==ch) ? "char" : "string",
106 startLine, startCol);
107 App.rc = 1;
108 return;
110 break;
112 else if(slash == ch){
113 /* MARKER(("state 0 ==> 1 @ %d:%d\n", line, col)); */
114 state = S_SLASH1;
115 break;
117 fputc(ch, out);
118 break;
119 case S_SLASH1: /* 1 slash */
120 /* MARKER(("SLASH1 @ %d:%d App.keepFirst=%d\n",
121 line, col, App.keepFirst)); */
122 switch(ch){
123 case '*':
124 /* Enter C comment */
125 if(App.keepFirst>0){
126 elide = 0;
127 --App.keepFirst;
128 }else{
129 elide = 1;
131 /*MARKER(("state 1 ==> 3 @ %d:%d\n", line, col));*/
132 state = S_C;
133 state3Col = col-1;
134 if(!elide){
135 fputc(prev, out);
136 fputc(ch, out);
138 break;
139 case '/':
140 /* Enter C++ comment */
141 if(App.keepFirst>0){
142 elide = 0;
143 --App.keepFirst;
144 }else{
145 elide = 1;
147 /*MARKER(("state 1 ==> 2 @ %d:%d\n", line, col));*/
148 state = S_CPP;
149 if(!elide){
150 fputc(prev, out);
151 fputc(ch, out);
153 break;
154 default:
155 /* It wasn't a comment after all. */
156 state = S_NONE;
157 if(!elide){
158 fputc(prev, out);
159 fputc(ch, out);
162 break;
163 case S_CPP: /* C++ comment */
164 if('\n' == ch){
165 /* MARKER(("state 2 ==> 0 @ %d:%d\n", line, col)); */
166 state = S_NONE;
167 elide = 0;
169 if(!elide){
170 fputc(ch, out);
172 break;
173 case S_C: /* C comment */
174 if(!elide){
175 fputc(ch, out);
177 if(slash == ch){
178 if(star == prev){
179 /* MARKER(("state 3 ==> 0 @ %d:%d\n", line, col)); */
180 /* Corner case which breaks this: */
181 /*/ <-- slash there */
182 /* That shows up twice in a piece of 3rd-party
183 code i use. */
184 /* And thus state3Col was introduced :/ */
185 if(col!=state3Col+2){
186 state = S_NONE;
187 elide = 0;
188 state3Col = -99;
192 break;
193 default:
194 assert(!"impossible!");
195 break;
197 if('\n' == ch){
198 ++line;
199 col = 0;
200 state3Col = -99;
205 static void usage(char const *zAppName){
206 fprintf(stderr, "Strips C- and C++-style comments from stdin and sends "
207 "the results to stdout.\n");
208 fprintf(stderr, "Usage: %s [--keep-first|-k] < input > output\n", zAppName);
211 int main( int argc, char const * const * argv ){
212 int i;
213 for(i = 1; i < argc; ++i){
214 char const * zArg = argv[i];
215 while( '-'==*zArg ) ++zArg;
216 if( 0==strcmp(zArg,"k")
217 || 0==strcmp(zArg,"keep-first") ){
218 ++App.keepFirst;
219 }else{
220 usage(argv[0]);
221 return 1;
224 App.input = stdin;
225 App.output = stdout;
226 do_it_all();
227 return App.rc ? 1 : 0;