Sync changes from latest snowball compiler version
[xapian.git] / xapian-core / languages / compiler / driver.c
blob0afd79b62c896e48992dd9ce8aee82ee32be2ccb
1 #include <stdio.h> /* for fprintf etc */
2 #include <stdlib.h> /* for free etc */
3 #include <string.h> /* for strcmp */
4 #include "header.h"
6 #define DEFAULT_PACKAGE "org.tartarus.snowball.ext"
7 #define DEFAULT_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
8 #define DEFAULT_AMONG_CLASS "org.tartarus.snowball.Among"
9 #define DEFAULT_STRING_CLASS "java.lang.StringBuilder"
10 #define DEFAULT_GO_PACKAGE "snowball"
11 #define DEFAULT_GO_SNOWBALL_RUNTIME "github.com/snowballstem/snowball/go"
13 #define DEFAULT_CS_NAMESPACE "Snowball"
14 #define DEFAULT_CS_BASE_CLASS "Stemmer"
15 #define DEFAULT_CS_AMONG_CLASS "Among"
16 #define DEFAULT_CS_STRING_CLASS "StringBuilder"
18 static int eq(const char * s1, const char * s2) {
19 return strcmp(s1, s2) == 0;
22 static void print_arglist(void) {
23 fprintf(stderr, "Usage: snowball <file>... [options]\n\n"
24 "options are: [-o[utput] file]\n"
25 " [-s[yntax]]\n"
26 #ifndef DISABLE_JAVA
27 " [-j[ava]]\n"
28 #endif
29 #ifndef DISABLE_CSHARP
30 " [-cs[harp]]\n"
31 #endif
32 " [-c++]\n"
33 #ifndef DISABLE_PYTHON
34 " [-py[thon]]\n"
35 #endif
36 #ifndef DISABLE_JS
37 " [-js]\n"
38 #endif
39 #ifndef DISABLE_RUST
40 " [-rust]\n"
41 #endif
42 #ifndef DISABLE_GO
43 " [-go]\n"
44 #endif
45 " [-w[idechars]]\n"
46 " [-u[tf8]]\n"
47 " [-n[ame] class name]\n"
48 " [-ep[refix] string]\n"
49 " [-vp[refix] string]\n"
50 " [-i[nclude] directory]\n"
51 " [-r[untime] path to runtime headers]\n"
52 " [-p[arentclassname] fully qualified parent class name]\n"
53 #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
54 " [-P[ackage] package name for stemmers]\n"
55 " [-S[tringclass] StringBuffer-compatible class]\n"
56 " [-a[mongclass] fully qualified name of the Among class]\n"
57 #endif
58 #ifndef DISABLE_GO
59 " [-gop[ackage] Go package name for stemmers]\n"
60 " [-gor[untime] Go snowball runtime package]\n"
61 #endif
63 exit(1);
66 static void check_lim(int i, int argc) {
67 if (i >= argc) {
68 fprintf(stderr, "argument list is one short\n");
69 print_arglist();
73 static FILE * get_output(symbol * b) {
74 char * s = b_to_s(b);
75 FILE * output = fopen(s, "w");
76 if (output == 0) {
77 fprintf(stderr, "Can't open output %s\n", s);
78 exit(1);
80 free(s);
81 return output;
84 static int read_options(struct options * o, int argc, char * argv[]) {
85 char * s;
86 int i = 1;
87 int new_argc = 1;
89 /* set defaults: */
91 o->output_file = 0;
92 o->syntax_tree = false;
93 o->externals_prefix = NULL;
94 o->variables_prefix = 0;
95 o->runtime_path = 0;
96 o->parent_class_name = DEFAULT_BASE_CLASS;
97 o->string_class = DEFAULT_STRING_CLASS;
98 o->among_class = DEFAULT_AMONG_CLASS;
99 o->package = DEFAULT_PACKAGE;
100 o->go_package = DEFAULT_GO_PACKAGE;
101 o->go_snowball_runtime = DEFAULT_GO_SNOWBALL_RUNTIME;
102 o->name = NULL;
103 o->make_lang = LANG_C;
104 o->includes = 0;
105 o->includes_end = 0;
106 o->encoding = ENC_SINGLEBYTE;
108 /* read options: */
110 while (i < argc) {
111 s = argv[i++];
112 if (s[0] != '-') {
113 /* Non-option argument - shuffle down. */
114 argv[new_argc++] = s;
115 continue;
119 if (eq(s, "-o") || eq(s, "-output")) {
120 check_lim(i, argc);
121 o->output_file = argv[i++];
122 continue;
124 if (eq(s, "-n") || eq(s, "-name")) {
125 check_lim(i, argc);
126 o->name = argv[i++];
127 continue;
129 #ifndef DISABLE_JS
130 if (eq(s, "-js")) {
131 o->make_lang = LANG_JAVASCRIPT;
132 o->encoding = ENC_WIDECHARS;
133 continue;
135 #endif
136 #ifndef DISABLE_RUST
137 if (eq(s, "-rust")) {
138 o->make_lang = LANG_RUST;
139 o->encoding = ENC_UTF8;
140 continue;
142 #endif
143 #ifndef DISABLE_GO
144 if (eq(s, "-go")) {
145 o->make_lang = LANG_GO;
146 o->encoding = ENC_UTF8;
147 continue;
149 #endif
150 #ifndef DISABLE_JAVA
151 if (eq(s, "-j") || eq(s, "-java")) {
152 o->make_lang = LANG_JAVA;
153 o->encoding = ENC_WIDECHARS;
154 continue;
156 #endif
157 #ifndef DISABLE_CSHARP
158 if (eq(s, "-cs") || eq(s, "-csharp")) {
159 o->make_lang = LANG_CSHARP;
160 o->encoding = ENC_WIDECHARS;
161 o->parent_class_name = DEFAULT_CS_BASE_CLASS;
162 o->string_class = DEFAULT_CS_STRING_CLASS;
163 o->among_class = DEFAULT_CS_AMONG_CLASS;
164 o->package = DEFAULT_CS_NAMESPACE;
165 continue;
167 #endif
168 if (eq(s, "-c++")) {
169 o->make_lang = LANG_CPLUSPLUS;
170 continue;
172 #ifndef DISABLE_PYTHON
173 if (eq(s, "-py") || eq(s, "-python")) {
174 o->make_lang = LANG_PYTHON;
175 o->encoding = ENC_WIDECHARS;
176 continue;
178 #endif
179 if (eq(s, "-w") || eq(s, "-widechars")) {
180 o->encoding = ENC_WIDECHARS;
181 continue;
183 if (eq(s, "-s") || eq(s, "-syntax")) {
184 o->syntax_tree = true;
185 continue;
187 if (eq(s, "-ep") || eq(s, "-eprefix")) {
188 check_lim(i, argc);
189 o->externals_prefix = argv[i++];
190 continue;
192 if (eq(s, "-vp") || eq(s, "-vprefix")) {
193 check_lim(i, argc);
194 o->variables_prefix = argv[i++];
195 continue;
197 if (eq(s, "-i") || eq(s, "-include")) {
198 check_lim(i, argc);
201 NEW(include, p);
202 symbol * b = add_s_to_b(0, argv[i++]);
203 b = add_s_to_b(b, "/");
204 p->next = 0; p->b = b;
206 if (o->includes == 0) o->includes = p; else
207 o->includes_end->next = p;
208 o->includes_end = p;
210 continue;
212 if (eq(s, "-r") || eq(s, "-runtime")) {
213 check_lim(i, argc);
214 o->runtime_path = argv[i++];
215 continue;
217 if (eq(s, "-u") || eq(s, "-utf8")) {
218 o->encoding = ENC_UTF8;
219 continue;
221 if (eq(s, "-p") || eq(s, "-parentclassname")) {
222 check_lim(i, argc);
223 o->parent_class_name = argv[i++];
224 continue;
226 #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
227 if (eq(s, "-P") || eq(s, "-Package")) {
228 check_lim(i, argc);
229 o->package = argv[i++];
230 continue;
232 if (eq(s, "-S") || eq(s, "-stringclass")) {
233 check_lim(i, argc);
234 o->string_class = argv[i++];
235 continue;
237 if (eq(s, "-a") || eq(s, "-amongclass")) {
238 check_lim(i, argc);
239 o->among_class = argv[i++];
240 continue;
242 #endif
243 #ifndef DISABLE_GO
244 if (eq(s, "-gop") || eq(s, "-gopackage")) {
245 check_lim(i, argc);
246 o->go_package = argv[i++];
247 continue;
249 if (eq(s, "-gor") || eq(s, "-goruntime")) {
250 check_lim(i, argc);
251 o->go_snowball_runtime = argv[i++];
252 continue;
254 #endif
255 fprintf(stderr, "'%s' misplaced\n", s);
256 print_arglist();
259 if (new_argc == 1) {
260 fprintf(stderr, "no source files specified\n");
261 print_arglist();
263 argv[new_argc] = NULL;
265 if (o->make_lang != LANG_C && o->make_lang != LANG_CPLUSPLUS) {
266 if (o->runtime_path) {
267 fprintf(stderr, "warning: -r/-runtime only meaningful for C and C++\n");
269 if (o->externals_prefix) {
270 fprintf(stderr, "warning: -ep/-eprefix only meaningful for C and C++\n");
273 if (!o->externals_prefix) o->externals_prefix = "";
275 if (!o->name && o->output_file) {
276 /* Default class name to basename of output_file - this is the standard
277 * convention for at least Java and C#.
279 const char * slash = strrchr(o->output_file, '/');
280 size_t len;
281 const char * leaf = (slash == NULL) ? o->output_file : slash + 1;
283 slash = strrchr(leaf, '\\');
284 if (slash != NULL) leaf = slash + 1;
287 const char * dot = strchr(leaf, '.');
288 len = (dot == NULL) ? strlen(leaf) : (size_t)(dot - leaf);
292 char * new_name = malloc(len + 1);
293 switch (o->make_lang) {
294 case LANG_CSHARP:
295 /* Upper case initial letter. */
296 memcpy(new_name, leaf, len);
297 new_name[0] = toupper(new_name[0]);
298 break;
299 case LANG_JAVASCRIPT:
300 case LANG_PYTHON: {
301 /* Upper case initial letter and change each
302 * underscore+letter or hyphen+letter to an upper case
303 * letter.
305 int i, j = 0;
306 int uc_next = true;
307 for (i = 0; i != len; ++i) {
308 unsigned char ch = leaf[i];
309 if (ch == '_' || ch == '-') {
310 uc_next = true;
311 } else {
312 if (uc_next) {
313 new_name[j] = toupper(ch);
314 uc_next = false;
315 } else {
316 new_name[j] = ch;
318 ++j;
321 len = j;
322 break;
324 default:
325 /* Just copy. */
326 memcpy(new_name, leaf, len);
327 break;
329 new_name[len] = '\0';
330 o->name = new_name;
334 return new_argc;
337 extern int main(int argc, char * argv[]) {
339 int i;
340 NEW(options, o);
341 argc = read_options(o, argc, argv);
343 symbol * filename = add_s_to_b(0, argv[1]);
344 char * file;
345 symbol * u = get_input(filename, &file);
346 lose_b(filename);
347 if (u == 0) {
348 fprintf(stderr, "Can't open input %s\n", argv[1]);
349 exit(1);
352 struct tokeniser * t = create_tokeniser(u, file);
353 struct analyser * a = create_analyser(t);
354 struct input ** next_input_ptr = &(t->next);
355 a->encoding = t->encoding = o->encoding;
356 t->includes = o->includes;
357 /* If multiple source files are specified, set up the others to be
358 * read after the first in order, using the same mechanism as
359 * 'get' uses. */
360 for (i = 2; i != argc; ++i) {
361 NEW(input, q);
362 filename = add_s_to_b(0, argv[i]);
363 u = get_input(filename, &file);
364 lose_b(filename);
365 if (u == 0) {
366 fprintf(stderr, "Can't open input %s\n", argv[i]);
367 exit(1);
369 q->p = u;
370 q->c = 0;
371 q->file = file;
372 q->line_number = 1;
373 *next_input_ptr = q;
374 next_input_ptr = &(q->next);
376 *next_input_ptr = NULL;
377 read_program(a);
378 if (t->error_count > 0) exit(1);
379 if (o->syntax_tree) print_program(a);
380 close_tokeniser(t);
381 if (!o->syntax_tree) {
382 struct generator * g;
384 const char * s = o->output_file;
385 if (!s) {
386 fprintf(stderr, "Please include the -o option\n");
387 print_arglist();
388 exit(1);
390 g = create_generator(a, o);
391 if (o->make_lang == LANG_C || o->make_lang == LANG_CPLUSPLUS) {
392 symbol * b = add_s_to_b(0, s);
393 b = add_s_to_b(b, ".h");
394 o->output_h = get_output(b);
395 b[SIZE(b) - 1] = 'c';
396 if (o->make_lang == LANG_CPLUSPLUS) {
397 b = add_s_to_b(b, "c");
399 o->output_src = get_output(b);
400 lose_b(b);
402 generate_program_c(g);
403 fclose(o->output_src);
404 fclose(o->output_h);
406 #ifndef DISABLE_JAVA
407 if (o->make_lang == LANG_JAVA) {
408 symbol * b = add_s_to_b(0, s);
409 b = add_s_to_b(b, ".java");
410 o->output_src = get_output(b);
411 lose_b(b);
412 generate_program_java(g);
413 fclose(o->output_src);
415 #endif
416 #ifndef DISABLE_PYTHON
417 if (o->make_lang == LANG_PYTHON) {
418 symbol * b = add_s_to_b(0, s);
419 b = add_s_to_b(b, ".py");
420 o->output_src = get_output(b);
421 lose_b(b);
422 generate_program_python(g);
423 fclose(o->output_src);
425 #endif
426 #ifndef DISABLE_JS
427 if (o->make_lang == LANG_JAVASCRIPT) {
428 symbol * b = add_s_to_b(0, s);
429 b = add_s_to_b(b, ".js");
430 o->output_src = get_output(b);
431 lose_b(b);
432 generate_program_js(g);
433 fclose(o->output_src);
435 #endif
436 #ifndef DISABLE_CSHARP
437 if (o->make_lang == LANG_CSHARP) {
438 symbol * b = add_s_to_b(0, s);
439 b = add_s_to_b(b, ".cs");
440 o->output_src = get_output(b);
441 lose_b(b);
442 generate_program_csharp(g);
443 fclose(o->output_src);
445 #endif
446 #ifndef DISABLE_RUST
447 if (o->make_lang == LANG_RUST) {
448 symbol * b = add_s_to_b(0, s);
449 b = add_s_to_b(b, ".rs");
450 o->output_src = get_output(b);
451 lose_b(b);
452 generate_program_rust(g);
453 fclose(o->output_src);
455 #endif
456 #ifndef DISABLE_GO
457 if (o->make_lang == LANG_GO) {
458 symbol * b = add_s_to_b(0, s);
459 b = add_s_to_b(b, ".go");
460 o->output_src = get_output(b);
461 lose_b(b);
462 generate_program_go(g);
463 fclose(o->output_src);
465 #endif
466 close_generator(g);
468 close_analyser(a);
470 lose_b(u);
472 { struct include * p = o->includes;
473 while (p) {
474 struct include * q = p->next;
475 lose_b(p->b); FREE(p); p = q;
478 FREE(o);
479 if (space_count) fprintf(stderr, "%d blocks unfreed\n", space_count);
480 return 0;