1 #include <stdio.h> /* for fprintf etc */
2 #include <stdlib.h> /* for free etc */
3 #include <string.h> /* for strcmp */
6 #define DEFAULT_PACKAGE "org.tartarus.snowball.ext"
7 #define DEFAULT_BASE_CLASS "org.tartarus.snowball.SnowballProgram"
8 #define DEFAULT_AMONG_CLASS "org.tartarus.snowball.Among"
9 #define DEFAULT_STRING_CLASS "java.lang.StringBuilder"
10 #define DEFAULT_GO_PACKAGE "snowball"
11 #define DEFAULT_GO_SNOWBALL_RUNTIME "github.com/snowballstem/snowball/go"
13 #define DEFAULT_CS_NAMESPACE "Snowball"
14 #define DEFAULT_CS_BASE_CLASS "Stemmer"
15 #define DEFAULT_CS_AMONG_CLASS "Among"
16 #define DEFAULT_CS_STRING_CLASS "StringBuilder"
18 static int eq(const char * s1
, const char * s2
) {
19 return strcmp(s1
, s2
) == 0;
22 static void print_arglist(void) {
23 fprintf(stderr
, "Usage: snowball <file>... [options]\n\n"
24 "options are: [-o[utput] file]\n"
29 #ifndef DISABLE_CSHARP
33 #ifndef DISABLE_PYTHON
47 " [-n[ame] class name]\n"
48 " [-ep[refix] string]\n"
49 " [-vp[refix] string]\n"
50 " [-i[nclude] directory]\n"
51 " [-r[untime] path to runtime headers]\n"
52 " [-p[arentclassname] fully qualified parent class name]\n"
53 #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
54 " [-P[ackage] package name for stemmers]\n"
55 " [-S[tringclass] StringBuffer-compatible class]\n"
56 " [-a[mongclass] fully qualified name of the Among class]\n"
59 " [-gop[ackage] Go package name for stemmers]\n"
60 " [-gor[untime] Go snowball runtime package]\n"
66 static void check_lim(int i
, int argc
) {
68 fprintf(stderr
, "argument list is one short\n");
73 static FILE * get_output(symbol
* b
) {
75 FILE * output
= fopen(s
, "w");
77 fprintf(stderr
, "Can't open output %s\n", s
);
84 static int read_options(struct options
* o
, int argc
, char * argv
[]) {
92 o
->syntax_tree
= false;
93 o
->externals_prefix
= NULL
;
94 o
->variables_prefix
= 0;
96 o
->parent_class_name
= DEFAULT_BASE_CLASS
;
97 o
->string_class
= DEFAULT_STRING_CLASS
;
98 o
->among_class
= DEFAULT_AMONG_CLASS
;
99 o
->package
= DEFAULT_PACKAGE
;
100 o
->go_package
= DEFAULT_GO_PACKAGE
;
101 o
->go_snowball_runtime
= DEFAULT_GO_SNOWBALL_RUNTIME
;
103 o
->make_lang
= LANG_C
;
106 o
->encoding
= ENC_SINGLEBYTE
;
113 /* Non-option argument - shuffle down. */
114 argv
[new_argc
++] = s
;
119 if (eq(s
, "-o") || eq(s
, "-output")) {
121 o
->output_file
= argv
[i
++];
124 if (eq(s
, "-n") || eq(s
, "-name")) {
131 o
->make_lang
= LANG_JAVASCRIPT
;
132 o
->encoding
= ENC_WIDECHARS
;
137 if (eq(s
, "-rust")) {
138 o
->make_lang
= LANG_RUST
;
139 o
->encoding
= ENC_UTF8
;
145 o
->make_lang
= LANG_GO
;
146 o
->encoding
= ENC_UTF8
;
151 if (eq(s
, "-j") || eq(s
, "-java")) {
152 o
->make_lang
= LANG_JAVA
;
153 o
->encoding
= ENC_WIDECHARS
;
157 #ifndef DISABLE_CSHARP
158 if (eq(s
, "-cs") || eq(s
, "-csharp")) {
159 o
->make_lang
= LANG_CSHARP
;
160 o
->encoding
= ENC_WIDECHARS
;
161 o
->parent_class_name
= DEFAULT_CS_BASE_CLASS
;
162 o
->string_class
= DEFAULT_CS_STRING_CLASS
;
163 o
->among_class
= DEFAULT_CS_AMONG_CLASS
;
164 o
->package
= DEFAULT_CS_NAMESPACE
;
169 o
->make_lang
= LANG_CPLUSPLUS
;
172 #ifndef DISABLE_PYTHON
173 if (eq(s
, "-py") || eq(s
, "-python")) {
174 o
->make_lang
= LANG_PYTHON
;
175 o
->encoding
= ENC_WIDECHARS
;
179 if (eq(s
, "-w") || eq(s
, "-widechars")) {
180 o
->encoding
= ENC_WIDECHARS
;
183 if (eq(s
, "-s") || eq(s
, "-syntax")) {
184 o
->syntax_tree
= true;
187 if (eq(s
, "-ep") || eq(s
, "-eprefix")) {
189 o
->externals_prefix
= argv
[i
++];
192 if (eq(s
, "-vp") || eq(s
, "-vprefix")) {
194 o
->variables_prefix
= argv
[i
++];
197 if (eq(s
, "-i") || eq(s
, "-include")) {
202 symbol
* b
= add_s_to_b(0, argv
[i
++]);
203 b
= add_s_to_b(b
, "/");
204 p
->next
= 0; p
->b
= b
;
206 if (o
->includes
== 0) o
->includes
= p
; else
207 o
->includes_end
->next
= p
;
212 if (eq(s
, "-r") || eq(s
, "-runtime")) {
214 o
->runtime_path
= argv
[i
++];
217 if (eq(s
, "-u") || eq(s
, "-utf8")) {
218 o
->encoding
= ENC_UTF8
;
221 if (eq(s
, "-p") || eq(s
, "-parentclassname")) {
223 o
->parent_class_name
= argv
[i
++];
226 #if !defined(DISABLE_JAVA) || !defined(DISABLE_CSHARP)
227 if (eq(s
, "-P") || eq(s
, "-Package")) {
229 o
->package
= argv
[i
++];
232 if (eq(s
, "-S") || eq(s
, "-stringclass")) {
234 o
->string_class
= argv
[i
++];
237 if (eq(s
, "-a") || eq(s
, "-amongclass")) {
239 o
->among_class
= argv
[i
++];
244 if (eq(s
, "-gop") || eq(s
, "-gopackage")) {
246 o
->go_package
= argv
[i
++];
249 if (eq(s
, "-gor") || eq(s
, "-goruntime")) {
251 o
->go_snowball_runtime
= argv
[i
++];
255 fprintf(stderr
, "'%s' misplaced\n", s
);
260 fprintf(stderr
, "no source files specified\n");
263 argv
[new_argc
] = NULL
;
265 if (o
->make_lang
!= LANG_C
&& o
->make_lang
!= LANG_CPLUSPLUS
) {
266 if (o
->runtime_path
) {
267 fprintf(stderr
, "warning: -r/-runtime only meaningful for C and C++\n");
269 if (o
->externals_prefix
) {
270 fprintf(stderr
, "warning: -ep/-eprefix only meaningful for C and C++\n");
273 if (!o
->externals_prefix
) o
->externals_prefix
= "";
275 if (!o
->name
&& o
->output_file
) {
276 /* Default class name to basename of output_file - this is the standard
277 * convention for at least Java and C#.
279 const char * slash
= strrchr(o
->output_file
, '/');
281 const char * leaf
= (slash
== NULL
) ? o
->output_file
: slash
+ 1;
283 slash
= strrchr(leaf
, '\\');
284 if (slash
!= NULL
) leaf
= slash
+ 1;
287 const char * dot
= strchr(leaf
, '.');
288 len
= (dot
== NULL
) ? strlen(leaf
) : (size_t)(dot
- leaf
);
292 char * new_name
= malloc(len
+ 1);
293 switch (o
->make_lang
) {
295 /* Upper case initial letter. */
296 memcpy(new_name
, leaf
, len
);
297 new_name
[0] = toupper(new_name
[0]);
299 case LANG_JAVASCRIPT
:
301 /* Upper case initial letter and change each
302 * underscore+letter or hyphen+letter to an upper case
307 for (i
= 0; i
!= len
; ++i
) {
308 unsigned char ch
= leaf
[i
];
309 if (ch
== '_' || ch
== '-') {
313 new_name
[j
] = toupper(ch
);
326 memcpy(new_name
, leaf
, len
);
329 new_name
[len
] = '\0';
337 extern int main(int argc
, char * argv
[]) {
341 argc
= read_options(o
, argc
, argv
);
343 symbol
* filename
= add_s_to_b(0, argv
[1]);
345 symbol
* u
= get_input(filename
, &file
);
348 fprintf(stderr
, "Can't open input %s\n", argv
[1]);
352 struct tokeniser
* t
= create_tokeniser(u
, file
);
353 struct analyser
* a
= create_analyser(t
);
354 struct input
** next_input_ptr
= &(t
->next
);
355 a
->encoding
= t
->encoding
= o
->encoding
;
356 t
->includes
= o
->includes
;
357 /* If multiple source files are specified, set up the others to be
358 * read after the first in order, using the same mechanism as
360 for (i
= 2; i
!= argc
; ++i
) {
362 filename
= add_s_to_b(0, argv
[i
]);
363 u
= get_input(filename
, &file
);
366 fprintf(stderr
, "Can't open input %s\n", argv
[i
]);
374 next_input_ptr
= &(q
->next
);
376 *next_input_ptr
= NULL
;
378 if (t
->error_count
> 0) exit(1);
379 if (o
->syntax_tree
) print_program(a
);
381 if (!o
->syntax_tree
) {
382 struct generator
* g
;
384 const char * s
= o
->output_file
;
386 fprintf(stderr
, "Please include the -o option\n");
390 g
= create_generator(a
, o
);
391 if (o
->make_lang
== LANG_C
|| o
->make_lang
== LANG_CPLUSPLUS
) {
392 symbol
* b
= add_s_to_b(0, s
);
393 b
= add_s_to_b(b
, ".h");
394 o
->output_h
= get_output(b
);
395 b
[SIZE(b
) - 1] = 'c';
396 if (o
->make_lang
== LANG_CPLUSPLUS
) {
397 b
= add_s_to_b(b
, "c");
399 o
->output_src
= get_output(b
);
402 generate_program_c(g
);
403 fclose(o
->output_src
);
407 if (o
->make_lang
== LANG_JAVA
) {
408 symbol
* b
= add_s_to_b(0, s
);
409 b
= add_s_to_b(b
, ".java");
410 o
->output_src
= get_output(b
);
412 generate_program_java(g
);
413 fclose(o
->output_src
);
416 #ifndef DISABLE_PYTHON
417 if (o
->make_lang
== LANG_PYTHON
) {
418 symbol
* b
= add_s_to_b(0, s
);
419 b
= add_s_to_b(b
, ".py");
420 o
->output_src
= get_output(b
);
422 generate_program_python(g
);
423 fclose(o
->output_src
);
427 if (o
->make_lang
== LANG_JAVASCRIPT
) {
428 symbol
* b
= add_s_to_b(0, s
);
429 b
= add_s_to_b(b
, ".js");
430 o
->output_src
= get_output(b
);
432 generate_program_js(g
);
433 fclose(o
->output_src
);
436 #ifndef DISABLE_CSHARP
437 if (o
->make_lang
== LANG_CSHARP
) {
438 symbol
* b
= add_s_to_b(0, s
);
439 b
= add_s_to_b(b
, ".cs");
440 o
->output_src
= get_output(b
);
442 generate_program_csharp(g
);
443 fclose(o
->output_src
);
447 if (o
->make_lang
== LANG_RUST
) {
448 symbol
* b
= add_s_to_b(0, s
);
449 b
= add_s_to_b(b
, ".rs");
450 o
->output_src
= get_output(b
);
452 generate_program_rust(g
);
453 fclose(o
->output_src
);
457 if (o
->make_lang
== LANG_GO
) {
458 symbol
* b
= add_s_to_b(0, s
);
459 b
= add_s_to_b(b
, ".go");
460 o
->output_src
= get_output(b
);
462 generate_program_go(g
);
463 fclose(o
->output_src
);
472 { struct include
* p
= o
->includes
;
474 struct include
* q
= p
->next
;
475 lose_b(p
->b
); FREE(p
); p
= q
;
479 if (space_count
) fprintf(stderr
, "%d blocks unfreed\n", space_count
);