Sync changes from latest snowball compiler version
[xapian.git] / xapian-core / languages / compiler / driver.c
blob123306d1de17480e1d694851622cbde48ff0d97e
1 #include <stdio.h> /* for fprintf etc */
2 #include <stdlib.h> /* for free etc */
3 #include <string.h> /* for strcmp */
4 #include "header.h"
6 #define DEFAULT_PACKAGE "org.tartarus.snowball.ext"
7 #define DEFAULT_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
8 #define DEFAULT_AMONG_CLASS "org.tartarus.snowball.Among"
9 #define DEFAULT_STRING_CLASS "java.lang.StringBuilder"
10 #define DEFAULT_GO_PACKAGE "snowball"
11 #define DEFAULT_GO_SNOWBALL_RUNTIME "github.com/snowballstem/snowball/go"
13 #define DEFAULT_CS_NAMESPACE "Snowball"
14 #define DEFAULT_CS_BASE_CLASS "Stemmer"
15 #define DEFAULT_CS_AMONG_CLASS "Among"
16 #define DEFAULT_CS_STRING_CLASS "StringBuilder"
18 static int eq(const char * s1, const char * s2) {
19 return strcmp(s1, s2) == 0;
22 static void print_arglist(void) {
23 fprintf(stderr, "Usage: snowball <file>... [options]\n\n"
24 "options are: [-o[utput] file]\n"
25 " [-s[yntax]]\n"
26 #ifndef DISABLE_JAVA
27 " [-j[ava]]\n"
28 #endif
29 #ifndef DISABLE_CSHARP
30 " [-cs[harp]]\n"
31 #endif
32 " [-c++]\n"
33 #ifndef DISABLE_PYTHON
34 " [-py[thon]]\n"
35 #endif
36 #ifndef DISABLE_JS
37 " [-js]\n"
38 #endif
39 #ifndef DISABLE_RUST
40 " [-rust]\n"
41 #endif
42 #ifndef DISABLE_GO
43 " [-go]\n"
44 #endif
45 " [-w[idechars]]\n"
46 " [-u[tf8]]\n"
47 " [-n[ame] class name]\n"
48 " [-ep[refix] string]\n"
49 " [-vp[refix] string]\n"
50 " [-i[nclude] directory]\n"
51 " [-r[untime] path to runtime headers]\n"
52 " [-p[arentclassname] fully qualified parent class name]\n"
53 #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
54 " [-P[ackage] package name for stemmers]\n"
55 " [-S[tringclass] StringBuffer-compatible class]\n"
56 " [-a[mongclass] fully qualified name of the Among class]\n"
57 #endif
58 #ifndef DISABLE_GO
59 " [-gop[ackage] Go package name for stemmers]\n"
60 " [-gor[untime] Go snowball runtime package]\n"
61 #endif
63 exit(1);
66 static void check_lim(int i, int argc) {
67 if (i >= argc) {
68 fprintf(stderr, "argument list is one short\n");
69 print_arglist();
73 static FILE * get_output(symbol * b) {
74 char * s = b_to_s(b);
75 FILE * output = fopen(s, "w");
76 if (output == 0) {
77 fprintf(stderr, "Can't open output %s\n", s);
78 exit(1);
80 free(s);
81 return output;
84 static int read_options(struct options * o, int argc, char * argv[]) {
85 char * s;
86 int i = 1;
87 int new_argc = 1;
89 /* set defaults: */
91 o->output_file = 0;
92 o->syntax_tree = false;
93 o->externals_prefix = NULL;
94 o->variables_prefix = 0;
95 o->runtime_path = 0;
96 o->parent_class_name = DEFAULT_BASE_CLASS;
97 o->string_class = DEFAULT_STRING_CLASS;
98 o->among_class = DEFAULT_AMONG_CLASS;
99 o->package = DEFAULT_PACKAGE;
100 o->go_package = DEFAULT_GO_PACKAGE;
101 o->go_snowball_runtime = DEFAULT_GO_SNOWBALL_RUNTIME;
102 o->name = "";
103 o->make_lang = LANG_C;
104 o->includes = 0;
105 o->includes_end = 0;
106 o->encoding = ENC_SINGLEBYTE;
108 /* read options: */
110 while (i < argc) {
111 s = argv[i++];
112 if (s[0] != '-') {
113 /* Non-option argument - shuffle down. */
114 argv[new_argc++] = s;
115 continue;
119 if (eq(s, "-o") || eq(s, "-output")) {
120 check_lim(i, argc);
121 o->output_file = argv[i++];
122 continue;
124 if (eq(s, "-n") || eq(s, "-name")) {
125 check_lim(i, argc);
126 o->name = argv[i++];
127 continue;
129 #ifndef DISABLE_JS
130 if (eq(s, "-js")) {
131 o->make_lang = LANG_JAVASCRIPT;
132 o->encoding = ENC_WIDECHARS;
133 continue;
135 #endif
136 #ifndef DISABLE_RUST
137 if (eq(s, "-rust")) {
138 o->make_lang = LANG_RUST;
139 o->encoding = ENC_UTF8;
140 continue;
142 #endif
143 #ifndef DISABLE_GO
144 if (eq(s, "-go")) {
145 o->make_lang = LANG_GO;
146 o->encoding = ENC_UTF8;
147 continue;
149 #endif
150 #ifndef DISABLE_JAVA
151 if (eq(s, "-j") || eq(s, "-java")) {
152 o->make_lang = LANG_JAVA;
153 o->encoding = ENC_WIDECHARS;
154 continue;
156 #endif
157 #ifndef DISABLE_CSHARP
158 if (eq(s, "-cs") || eq(s, "-csharp")) {
159 o->make_lang = LANG_CSHARP;
160 o->encoding = ENC_WIDECHARS;
161 o->parent_class_name = DEFAULT_CS_BASE_CLASS;
162 o->string_class = DEFAULT_CS_STRING_CLASS;
163 o->among_class = DEFAULT_CS_AMONG_CLASS;
164 o->package = DEFAULT_CS_NAMESPACE;
165 continue;
167 #endif
168 if (eq(s, "-c++")) {
169 o->make_lang = LANG_CPLUSPLUS;
170 continue;
172 #ifndef DISABLE_PYTHON
173 if (eq(s, "-py") || eq(s, "-python")) {
174 o->make_lang = LANG_PYTHON;
175 o->encoding = ENC_WIDECHARS;
176 continue;
178 #endif
179 if (eq(s, "-w") || eq(s, "-widechars")) {
180 o->encoding = ENC_WIDECHARS;
181 continue;
183 if (eq(s, "-s") || eq(s, "-syntax")) {
184 o->syntax_tree = true;
185 continue;
187 if (eq(s, "-ep") || eq(s, "-eprefix")) {
188 check_lim(i, argc);
189 o->externals_prefix = argv[i++];
190 continue;
192 if (eq(s, "-vp") || eq(s, "-vprefix")) {
193 check_lim(i, argc);
194 o->variables_prefix = argv[i++];
195 continue;
197 if (eq(s, "-i") || eq(s, "-include")) {
198 check_lim(i, argc);
201 NEW(include, p);
202 symbol * b = add_s_to_b(0, argv[i++]);
203 b = add_s_to_b(b, "/");
204 p->next = 0; p->b = b;
206 if (o->includes == 0) o->includes = p; else
207 o->includes_end->next = p;
208 o->includes_end = p;
210 continue;
212 if (eq(s, "-r") || eq(s, "-runtime")) {
213 check_lim(i, argc);
214 o->runtime_path = argv[i++];
215 continue;
217 if (eq(s, "-u") || eq(s, "-utf8")) {
218 o->encoding = ENC_UTF8;
219 continue;
221 if (eq(s, "-p") || eq(s, "-parentclassname")) {
222 check_lim(i, argc);
223 o->parent_class_name = argv[i++];
224 continue;
226 #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
227 if (eq(s, "-P") || eq(s, "-Package")) {
228 check_lim(i, argc);
229 o->package = argv[i++];
230 continue;
232 if (eq(s, "-S") || eq(s, "-stringclass")) {
233 check_lim(i, argc);
234 o->string_class = argv[i++];
235 continue;
237 if (eq(s, "-a") || eq(s, "-amongclass")) {
238 check_lim(i, argc);
239 o->among_class = argv[i++];
240 continue;
242 #endif
243 #ifndef DISABLE_GO
244 if (eq(s, "-gop") || eq(s, "-gopackage")) {
245 check_lim(i, argc);
246 o->go_package = argv[i++];
247 continue;
249 if (eq(s, "-gor") || eq(s, "-goruntime")) {
250 check_lim(i, argc);
251 o->go_snowball_runtime = argv[i++];
252 continue;
254 #endif
255 fprintf(stderr, "'%s' misplaced\n", s);
256 print_arglist();
259 if (new_argc == 1) {
260 fprintf(stderr, "no source files specified\n");
261 print_arglist();
263 argv[new_argc] = NULL;
265 if (o->make_lang != LANG_C && o->make_lang != LANG_CPLUSPLUS) {
266 if (o->runtime_path) {
267 fprintf(stderr, "warning: -r/-runtime only meaningful for C and C++\n");
269 if (o->externals_prefix) {
270 fprintf(stderr, "warning: -ep/-eprefix only meaningful for C and C++\n");
273 if (!o->externals_prefix) o->externals_prefix = "";
274 return new_argc;
277 extern int main(int argc, char * argv[]) {
279 int i;
280 NEW(options, o);
281 argc = read_options(o, argc, argv);
283 symbol * filename = add_s_to_b(0, argv[1]);
284 char * file;
285 symbol * u = get_input(filename, &file);
286 lose_b(filename);
287 if (u == 0) {
288 fprintf(stderr, "Can't open input %s\n", argv[1]);
289 exit(1);
292 struct tokeniser * t = create_tokeniser(u, file);
293 struct analyser * a = create_analyser(t);
294 struct input ** next_input_ptr = &(t->next);
295 a->encoding = t->encoding = o->encoding;
296 t->includes = o->includes;
297 /* If multiple source files are specified, set up the others to be
298 * read after the first in order, using the same mechanism as
299 * 'get' uses. */
300 for (i = 2; i != argc; ++i) {
301 NEW(input, q);
302 filename = add_s_to_b(0, argv[i]);
303 u = get_input(filename, &file);
304 lose_b(filename);
305 if (u == 0) {
306 fprintf(stderr, "Can't open input %s\n", argv[i]);
307 exit(1);
309 q->p = u;
310 q->c = 0;
311 q->file = file;
312 q->line_number = 1;
313 *next_input_ptr = q;
314 next_input_ptr = &(q->next);
316 *next_input_ptr = NULL;
317 read_program(a);
318 if (t->error_count > 0) exit(1);
319 if (o->syntax_tree) print_program(a);
320 close_tokeniser(t);
321 if (!o->syntax_tree) {
322 struct generator * g;
324 const char * s = o->output_file;
325 if (!s) {
326 fprintf(stderr, "Please include the -o option\n");
327 print_arglist();
328 exit(1);
330 g = create_generator(a, o);
331 if (o->make_lang == LANG_C || o->make_lang == LANG_CPLUSPLUS) {
332 symbol * b = add_s_to_b(0, s);
333 b = add_s_to_b(b, ".h");
334 o->output_h = get_output(b);
335 b[SIZE(b) - 1] = 'c';
336 if (o->make_lang == LANG_CPLUSPLUS) {
337 b = add_s_to_b(b, "c");
339 o->output_src = get_output(b);
340 lose_b(b);
342 generate_program_c(g);
343 fclose(o->output_src);
344 fclose(o->output_h);
346 #ifndef DISABLE_JAVA
347 if (o->make_lang == LANG_JAVA) {
348 symbol * b = add_s_to_b(0, s);
349 b = add_s_to_b(b, ".java");
350 o->output_src = get_output(b);
351 lose_b(b);
352 generate_program_java(g);
353 fclose(o->output_src);
355 #endif
356 #ifndef DISABLE_PYTHON
357 if (o->make_lang == LANG_PYTHON) {
358 symbol * b = add_s_to_b(0, s);
359 b = add_s_to_b(b, ".py");
360 o->output_src = get_output(b);
361 lose_b(b);
362 generate_program_python(g);
363 fclose(o->output_src);
365 #endif
366 #ifndef DISABLE_JS
367 if (o->make_lang == LANG_JAVASCRIPT) {
368 symbol * b = add_s_to_b(0, s);
369 b = add_s_to_b(b, ".js");
370 o->output_src = get_output(b);
371 lose_b(b);
372 generate_program_js(g);
373 fclose(o->output_src);
375 #endif
376 #ifndef DISABLE_CSHARP
377 if (o->make_lang == LANG_CSHARP) {
378 symbol * b = add_s_to_b(0, s);
379 b = add_s_to_b(b, ".cs");
380 o->output_src = get_output(b);
381 lose_b(b);
382 generate_program_csharp(g);
383 fclose(o->output_src);
385 #endif
386 #ifndef DISABLE_RUST
387 if (o->make_lang == LANG_RUST) {
388 symbol * b = add_s_to_b(0, s);
389 b = add_s_to_b(b, ".rs");
390 o->output_src = get_output(b);
391 lose_b(b);
392 generate_program_rust(g);
393 fclose(o->output_src);
395 #endif
396 #ifndef DISABLE_GO
397 if (o->make_lang == LANG_GO) {
398 symbol * b = add_s_to_b(0, s);
399 b = add_s_to_b(b, ".go");
400 o->output_src = get_output(b);
401 lose_b(b);
402 generate_program_go(g);
403 fclose(o->output_src);
405 #endif
406 close_generator(g);
408 close_analyser(a);
410 lose_b(u);
412 { struct include * p = o->includes;
413 while (p) {
414 struct include * q = p->next;
415 lose_b(p->b); FREE(p); p = q;
418 FREE(o);
419 if (space_count) fprintf(stderr, "%d blocks unfreed\n", space_count);
420 return 0;