Sync changes from latest snowball compiler version
[xapian.git] / xapian-core / languages / compiler / generator.c
blobe30b932f653dbb77eb0d08fd76a6b4c981adf403
2 #include <limits.h> /* for INT_MAX */
3 #include <stdio.h> /* for fprintf etc */
4 #include <stdlib.h> /* for free etc */
5 #include <string.h> /* for strlen */
6 #include "header.h"
8 /* Define this to get warning messages when optimisations can't be used. */
9 /* #define OPTIMISATION_WARNINGS */
11 /* recursive use: */
13 static void generate(struct generator * g, struct node * p);
15 enum special_labels {
17 x_return = -1
21 static int new_label(struct generator * g) {
22 return g->next_label++;
25 /* Write routines for simple entities */
27 /* Write a space if the preceding character was not whitespace */
28 static void ws_opt_space(struct generator * g, const char * s) {
29 int ch = str_back(g->outbuf);
30 if (ch != ' ' && ch != '\n' && ch != '\t' && ch != -1)
31 write_char(g, ' ');
32 write_string(g, s);
35 static void wi3(struct generator * g, int i) {
36 if (i < 100) write_char(g, ' ');
37 if (i < 10) write_char(g, ' ');
38 write_int(g, i); /* integer (width 3) */
42 /* Write routines for items from the syntax tree */
44 static void write_varname(struct generator * g, struct name * p) {
46 int ch = "SBIrxg"[p->type];
47 switch (p->type) {
48 case t_external:
49 write_string(g, g->options->externals_prefix); break;
50 case t_string:
51 case t_boolean:
52 case t_integer:
53 if (g->options->make_lang == LANG_C) {
54 write_char(g, ch);
55 write_char(g, '[');
56 write_int(g, p->count);
57 write_char(g, ']');
58 return;
60 /* FALLTHRU */
61 default:
62 write_char(g, ch); write_char(g, '_');
64 str_append_b(g->outbuf, p->b);
67 static void write_varref(struct generator * g, struct name * p) { /* reference to variable */
68 if (g->options->make_lang == LANG_C && p->type < t_routine)
69 write_string(g, "z->");
70 write_varname(g, p);
73 static void write_hexdigit(struct generator * g, int i) {
74 str_append_ch(g->outbuf, "0123456789ABCDEF"[i & 0xF]); /* hexchar */
77 static void write_hex(struct generator * g, int i) {
78 if (i >> 4) write_hex(g, i >> 4);
79 write_hexdigit(g, i); /* hex integer */
82 /* write character literal */
83 static void wlitch(struct generator * g, int ch) {
84 if (32 <= ch && ch < 127) {
85 write_char(g, '\'');
86 if (ch == '\'' || ch == '\\') {
87 write_char(g, '\\');
89 write_char(g, ch);
90 write_char(g, '\'');
91 } else {
92 write_string(g, "0x"); write_hex(g, ch);
96 static void wlitarray(struct generator * g, symbol * p) { /* write literal array */
99 int i;
100 for (i = 0; i < SIZE(p); i++) {
101 wlitch(g, p[i]);
102 if (i < SIZE(p) - 1) write_string(g, ", ");
107 static void wlitref(struct generator * g, symbol * p) { /* write ref to literal array */
109 if (SIZE(p) == 0) write_char(g, '0'); else {
110 struct str * s = g->outbuf;
111 g->outbuf = g->declarations;
112 write_string(g, "static const symbol s_"); write_int(g, g->literalstring_count); write_string(g, "[] = { ");
113 wlitarray(g, p);
114 write_string(g, " };\n");
115 g->outbuf = s;
116 write_string(g, "s_"); write_int(g, g->literalstring_count);
117 g->literalstring_count++;
121 static void write_margin(struct generator * g) {
122 int i;
123 for (i = 0; i < g->margin; i++) write_string(g, " ");
126 static void write_comment(struct generator * g, struct node * p) {
128 ws_opt_space(g, "/* ");
129 switch (p->type) {
130 case c_mathassign:
131 case c_plusassign:
132 case c_minusassign:
133 case c_multiplyassign:
134 case c_divideassign:
135 case c_eq:
136 case c_ne:
137 case c_gr:
138 case c_ge:
139 case c_ls:
140 case c_le:
141 if (p->name) {
142 write_char(g, '$');
143 str_append_b(g->outbuf, p->name->b);
144 write_char(g, ' ');
146 write_string(g, name_of_token(p->type));
147 write_string(g, " <integer expression>");
148 break;
149 default:
150 write_string(g, name_of_token(p->type));
151 if (p->name) {
152 write_char(g, ' ');
153 str_append_b(g->outbuf, p->name->b);
156 write_string(g, ", line "); write_int(g, p->line_number); write_string(g, " */");
157 write_newline(g);
160 static void wms(struct generator * g, const char * s) {
161 write_margin(g); write_string(g, s); } /* margin + string */
163 static void write_block_start(struct generator * g) { /* block start */
164 wms(g, "{ ");
165 g->margin++;
168 static void write_block_end(struct generator * g) { /* block end */
170 if (g->line_labelled == g->line_count) { wms(g, ";"); write_newline(g); }
171 g->margin--;
172 wms(g, "}"); write_newline(g);
175 static void w(struct generator * g, const char * s);
177 /* keep c */
178 static void wk(struct generator * g, struct node * p, int keep_limit) {
179 ++g->keep_count;
180 if (p->mode == m_forward) {
181 write_string(g, "int c");
182 write_int(g, g->keep_count);
183 w(g, " = ~zc");
184 if (keep_limit) {
185 write_string(g, ", mlimit");
186 write_int(g, g->keep_count);
188 write_char(g, ';');
189 } else {
190 write_string(g, "int m");
191 write_int(g, g->keep_count);
192 w(g, " = ~zl - ~zc");
193 if (keep_limit) {
194 write_string(g, ", mlimit");
195 write_int(g, g->keep_count);
197 write_string(g, "; (void)m");
198 write_int(g, g->keep_count);
199 write_char(g, ';');
203 static void wrestore(struct generator * g, struct node * p, int keep_token) { /* restore c */
204 if (p->mode == m_forward) {
205 w(g, "~zc = c");
206 } else {
207 w(g, "~zc = ~zl - m");
209 write_int(g, keep_token); write_char(g, ';');
212 static void wrestorelimit(struct generator * g, struct node * p, int keep_token) { /* restore limit */
213 if (p->mode == m_forward) {
214 w(g, "~zl += mlimit");
215 } else {
216 w(g, "~zlb = mlimit");
218 write_int(g, keep_token); write_string(g, ";");
221 static void winc(struct generator * g, struct node * p) { /* increment c */
222 w(g, p->mode == m_forward ? "~zc++;" :
223 "~zc--;");
226 static void wsetl(struct generator * g, int n) {
228 g->margin--;
229 wms(g, "lab"); write_int(g, n); write_char(g, ':'); write_newline(g);
230 g->line_labelled = g->line_count;
231 g->margin++;
234 static void wgotol(struct generator * g, int n) {
235 wms(g, "goto lab"); write_int(g, n); write_char(g, ';'); write_newline(g);
238 static void write_failure(struct generator * g, struct node * p) { /* fail */
239 if (g->failure_keep_count != 0) {
240 write_string(g, "{ ");
241 if (g->failure_keep_count > 0) {
242 wrestore(g, p, g->failure_keep_count);
243 } else {
244 wrestorelimit(g, p, -g->failure_keep_count);
246 write_char(g, ' ');
248 switch (g->failure_label)
250 case x_return:
251 write_string(g, "return 0;");
252 break;
253 default:
254 write_string(g, "goto lab");
255 write_int(g, g->failure_label);
256 write_char(g, ';');
257 g->label_used = 1;
259 if (g->failure_keep_count != 0) write_string(g, " }");
263 /* if at limit fail */
264 static void write_check_limit(struct generator * g, struct node * p) {
266 w(g, p->mode == m_forward ? "if (~zc >= ~zl) " :
267 "if (~zc <= ~zlb) ");
268 write_failure(g, p);
271 static void write_data_address(struct generator * g, struct node * p) {
272 symbol * b = p->literalstring;
273 if (b != 0) {
274 write_int(g, SIZE(b)); w(g, ", ");
275 wlitref(g, b);
276 } else {
277 write_varref(g, p->name);
281 /* Formatted write. */
282 static void writef(struct generator * g, const char * input, struct node * p) {
283 int i = 0;
284 int l = strlen(input);
286 while (i < l) {
287 int ch = input[i++];
288 if (ch != '~') {
289 write_char(g, ch);
290 continue;
292 switch (input[i++]) {
293 default: write_char(g, input[i - 1]); continue;
294 case 'C': write_comment(g, p); continue;
295 case 'k': wk(g, p, false); continue;
296 case 'K': wk(g, p, true); continue;
297 case 'i': winc(g, p); continue;
298 case 'l': write_check_limit(g, p); continue;
299 case 'f': write_failure(g, p); continue;
300 case 'M': write_margin(g); continue;
301 case 'N': write_newline(g); continue;
302 case '{': write_block_start(g); continue;
303 case '}': write_block_end(g); continue;
304 case 'S': write_string(g, g->S[input[i++] - '0']); continue;
305 case 'I': write_int(g, g->I[input[i++] - '0']); continue;
306 case 'J': wi3(g, g->I[input[i++] - '0']); continue;
307 case 'V': write_varref(g, g->V[input[i++] - '0']); continue;
308 case 'W': write_varname(g, g->V[input[i++] - '0']); continue;
309 case 'L': wlitref(g, g->L[input[i++] - '0']); continue;
310 case 'A': wlitarray(g, g->L[input[i++] - '0']); continue;
311 case 'c': wlitch(g, g->I[input[i++] - '0']); continue;
312 case 'a': write_data_address(g, p); continue;
313 case '+': g->margin++; continue;
314 case '-': g->margin--; continue;
315 case '$': /* insert_s, insert_v etc */
316 write_char(g, p->literalstring == 0 ? 'v' : 's');
317 continue;
318 case 'z':
319 if (g->options->make_lang == LANG_C)
320 write_string(g, "z->");
321 continue;
322 case 'Z':
323 if (g->options->make_lang == LANG_C)
324 write_string(g, input[i] == ')' ? "z" : "z, ");
325 continue;
326 case 'p': write_string(g, g->options->externals_prefix); continue;
331 static void w(struct generator * g, const char * s) {
332 writef(g, s, 0);
335 static void generate_AE(struct generator * g, struct node * p) {
336 const char * s;
337 switch (p->type) {
338 case c_name:
339 write_varref(g, p->name); break;
340 case c_number:
341 write_int(g, p->number); break;
342 case c_maxint:
343 write_string(g, "MAXINT"); break;
344 case c_minint:
345 write_string(g, "MININT"); break;
346 case c_neg:
347 write_char(g, '-'); generate_AE(g, p->right); break;
348 case c_multiply:
349 s = " * "; goto label0;
350 case c_plus:
351 s = " + "; goto label0;
352 case c_minus:
353 s = " - "; goto label0;
354 case c_divide:
355 s = " / ";
356 label0:
357 write_char(g, '('); generate_AE(g, p->left);
358 write_string(g, s); generate_AE(g, p->right); write_char(g, ')'); break;
359 case c_cursor:
360 w(g, "~zc"); break;
361 case c_limit:
362 w(g, p->mode == m_forward ? "~zl" : "~zlb"); break;
363 case c_len:
364 if (g->options->encoding == ENC_UTF8) {
365 w(g, "len_utf8(~zp)");
366 break;
368 /* FALLTHRU */
369 case c_size:
370 w(g, "SIZE(~zp)");
371 break;
372 case c_lenof:
373 if (g->options->encoding == ENC_UTF8) {
374 g->V[0] = p->name;
375 w(g, "len_utf8(~V0)");
376 break;
378 /* FALLTHRU */
379 case c_sizeof:
380 g->V[0] = p->name;
381 w(g, "SIZE(~V0)");
382 break;
386 /* K_needed() tests to see if we really need to keep c. Not true when the
387 command does not touch the cursor. This and repeat_score() could be
388 elaborated almost indefinitely.
391 static int K_needed_(struct generator * g, struct node * p, int call_depth) {
392 while (p) {
393 switch (p->type) {
394 case c_atlimit:
395 case c_do:
396 case c_dollar:
397 case c_leftslice:
398 case c_rightslice:
399 case c_mathassign:
400 case c_plusassign:
401 case c_minusassign:
402 case c_multiplyassign:
403 case c_divideassign:
404 case c_eq:
405 case c_ne:
406 case c_gr:
407 case c_ge:
408 case c_ls:
409 case c_le:
410 case c_sliceto:
411 case c_booltest:
412 case c_set:
413 case c_unset:
414 case c_true:
415 case c_false:
416 case c_debug:
417 break;
419 case c_call:
420 /* Recursive functions aren't typical in snowball programs, so
421 * make the pessimistic assumption that keep is needed if we
422 * hit a generous limit on recursion. It's not likely to make
423 * a difference to any real world program, but means we won't
424 * recurse until we run out of stack for pathological cases.
426 if (call_depth >= 100) return true;
427 if (K_needed_(g, p->name->definition, call_depth + 1))
428 return true;
429 break;
431 case c_bra:
432 if (K_needed_(g, p->left, call_depth)) return true;
433 break;
435 default: return true;
437 p = p->right;
439 return false;
442 extern int K_needed(struct generator * g, struct node * p) {
443 return K_needed_(g, p, 0);
446 static int repeat_score(struct generator * g, struct node * p, int call_depth) {
447 int score = 0;
448 while (p) {
449 switch (p->type) {
450 case c_dollar:
451 case c_leftslice:
452 case c_rightslice:
453 case c_mathassign:
454 case c_plusassign:
455 case c_minusassign:
456 case c_multiplyassign:
457 case c_divideassign:
458 case c_eq:
459 case c_ne:
460 case c_gr:
461 case c_ge:
462 case c_ls:
463 case c_le:
464 case c_sliceto: /* case c_not: must not be included here! */
465 case c_debug:
466 break;
468 case c_call:
469 /* Recursive functions aren't typical in snowball programs, so
470 * make the pessimistic assumption that repeat requires cursor
471 * reinstatement if we hit a generous limit on recursion. It's
472 * not likely to make a difference to any real world program,
473 * but means we won't recurse until we run out of stack for
474 * pathological cases.
476 if (call_depth >= 100) {
477 return 2;
479 score += repeat_score(g, p->name->definition, call_depth + 1);
480 if (score >= 2)
481 return score;
482 break;
484 case c_bra:
485 score += repeat_score(g, p->left, call_depth);
486 if (score >= 2)
487 return score;
488 break;
490 case c_name:
491 case c_literalstring:
492 case c_next:
493 case c_grouping:
494 case c_non:
495 case c_hop:
496 if (++score >= 2)
497 return score;
498 break;
500 default:
501 return 2;
503 p = p->right;
505 return score;
508 /* tests if an expression requires cursor reinstatement in a repeat */
510 extern int repeat_restore(struct generator * g, struct node * p) {
511 return repeat_score(g, p, 0) >= 2;
514 static void generate_bra(struct generator * g, struct node * p) {
515 p = p->left;
516 while (p) {
517 generate(g, p);
518 p = p->right;
522 static void generate_and(struct generator * g, struct node * p) {
523 int keep_c = 0;
524 if (K_needed(g, p->left)) {
525 writef(g, "~{~k~C", p);
526 keep_c = g->keep_count;
527 } else {
528 writef(g, "~M~C", p);
530 p = p->left;
531 while (p) {
532 generate(g, p);
533 if (keep_c && p->right != 0) {
534 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
536 p = p->right;
538 if (keep_c) w(g, "~}");
541 static void generate_or(struct generator * g, struct node * p) {
542 int keep_c = 0;
544 int used = g->label_used;
545 int a0 = g->failure_label;
546 int a1 = g->failure_keep_count;
548 int out_lab = new_label(g);
550 if (K_needed(g, p->left)) {
551 writef(g, "~{~k~C", p);
552 keep_c = g->keep_count;
553 } else {
554 writef(g, "~M~C", p);
556 p = p->left;
557 g->failure_keep_count = 0;
558 while (p->right) {
559 g->failure_label = new_label(g);
560 g->label_used = 0;
561 generate(g, p);
562 wgotol(g, out_lab);
563 if (g->label_used)
564 wsetl(g, g->failure_label);
565 if (keep_c) {
566 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
568 p = p->right;
570 g->label_used = used;
571 g->failure_label = a0;
572 g->failure_keep_count = a1;
574 generate(g, p);
575 if (keep_c) w(g, "~}");
576 wsetl(g, out_lab);
579 static void generate_backwards(struct generator * g, struct node * p) {
581 writef(g, "~M~zlb = ~zc; ~zc = ~zl;~C~N", p);
582 generate(g, p->left);
583 w(g, "~M~zc = ~zlb;~N");
587 static void generate_not(struct generator * g, struct node * p) {
588 int keep_c = 0;
590 int used = g->label_used;
591 int a0 = g->failure_label;
592 int a1 = g->failure_keep_count;
594 if (K_needed(g, p->left)) {
595 writef(g, "~{~k~C", p);
596 keep_c = g->keep_count;
597 } else {
598 writef(g, "~M~C", p);
601 g->failure_label = new_label(g);
602 g->label_used = 0;
603 g->failure_keep_count = 0;
604 generate(g, p->left);
607 int l = g->failure_label;
608 int u = g->label_used;
610 g->label_used = used;
611 g->failure_label = a0;
612 g->failure_keep_count = a1;
614 writef(g, "~M~f~N", p);
615 if (u)
616 wsetl(g, l);
618 if (keep_c) {
619 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N~}");
624 static void generate_try(struct generator * g, struct node * p) {
625 int keep_c = 0;
626 if (K_needed(g, p->left)) {
627 writef(g, "~{~k~C", p);
628 keep_c = g->keep_count;
629 } else {
630 writef(g, "~M~C", p);
632 g->failure_keep_count = keep_c;
634 g->failure_label = new_label(g);
635 g->label_used = 0;
636 generate(g, p->left);
638 if (g->label_used)
639 wsetl(g, g->failure_label);
641 if (keep_c) w(g, "~}");
644 static void generate_set(struct generator * g, struct node * p) {
645 g->V[0] = p->name; writef(g, "~M~V0 = 1;~C", p);
648 static void generate_unset(struct generator * g, struct node * p) {
649 g->V[0] = p->name; writef(g, "~M~V0 = 0;~C", p);
652 static void generate_fail(struct generator * g, struct node * p) {
653 generate(g, p->left);
654 writef(g, "~M~f~C", p);
657 /* generate_test() also implements 'reverse' */
659 static void generate_test(struct generator * g, struct node * p) {
660 int keep_c = 0;
661 if (K_needed(g, p->left)) {
662 keep_c = ++g->keep_count;
663 w(g, p->mode == m_forward ? "~{int c_test" :
664 "~{int m_test");
665 write_int(g, keep_c);
666 w(g, p->mode == m_forward ? " = ~zc;" :
667 " = ~zl - ~zc;");
668 writef(g, "~C", p);
669 } else writef(g, "~M~C", p);
671 generate(g, p->left);
673 if (keep_c) {
674 w(g, p->mode == m_forward ? "~M~zc = c_test" :
675 "~M~zc = ~zl - m_test");
676 write_int(g, keep_c);
677 writef(g, ";~N~}", p);
681 static void generate_do(struct generator * g, struct node * p) {
682 int keep_c = 0;
683 if (K_needed(g, p->left)) {
684 writef(g, "~{~k~C", p);
685 keep_c = g->keep_count;
686 } else {
687 writef(g, "~M~C", p);
690 g->failure_label = new_label(g);
691 g->label_used = 0;
692 g->failure_keep_count = 0;
693 generate(g, p->left);
695 if (g->label_used)
696 wsetl(g, g->failure_label);
697 if (keep_c) {
698 w(g, "~M"); wrestore(g, p, keep_c);
699 w(g, "~N~}");
703 static void generate_next(struct generator * g, struct node * p) {
704 if (g->options->encoding == ENC_UTF8) {
705 if (p->mode == m_forward)
706 w(g, "~{int ret = skip_utf8(~zp, ~zc, 0, ~zl, 1");
707 else
708 w(g, "~{int ret = skip_utf8(~zp, ~zc, ~zlb, 0, -1");
709 writef(g, ");~N"
710 "~Mif (ret < 0) ~f~N"
711 "~M~zc = ret;~C"
712 "~}", p);
713 } else
714 writef(g, "~M~l~N"
715 "~M~i~C", p);
718 static void generate_GO_grouping(struct generator * g, struct node * p, int is_goto, int complement) {
720 struct grouping * q = p->name->grouping;
721 g->S[0] = p->mode == m_forward ? "" : "_b";
722 g->S[1] = complement ? "in" : "out";
723 g->S[2] = g->options->encoding == ENC_UTF8 ? "_U" : "";
724 g->V[0] = p->name;
725 g->I[0] = q->smallest_ch;
726 g->I[1] = q->largest_ch;
727 if (is_goto) {
728 writef(g, "~Mif (~S1_grouping~S0~S2(~Z~V0, ~I0, ~I1, 1) < 0) ~f /* goto */~C", p);
729 } else {
730 writef(g, "~{ /* gopast */~C"
731 "~Mint ret = ~S1_grouping~S0~S2(~Z~V0, ~I0, ~I1, 1);~N"
732 "~Mif (ret < 0) ~f~N", p);
733 if (p->mode == m_forward)
734 w(g, "~M~zc += ret;~N");
735 else
736 w(g, "~M~zc -= ret;~N");
737 w(g, "~}");
741 static void generate_GO(struct generator * g, struct node * p, int style) {
742 int keep_c = 0;
744 int used = g->label_used;
745 int a0 = g->failure_label;
746 int a1 = g->failure_keep_count;
748 if (p->left->type == c_grouping || p->left->type == c_non) {
749 /* Special case for "goto" or "gopast" when used on a grouping or an
750 * inverted grouping - the movement of c by the matching action is
751 * exactly what we want! */
752 #ifdef OPTIMISATION_WARNINGS
753 printf("Optimising %s %s\n", style ? "goto" : "gopast", p->left->type == c_non ? "non" : "grouping");
754 #endif
755 generate_GO_grouping(g, p->left, style, p->left->type == c_non);
756 return;
759 w(g, "~Mwhile(1) {"); writef(g, "~C~+", p);
761 if (style == 1 || repeat_restore(g, p->left)) {
762 writef(g, "~M~k~N", p);
763 keep_c = g->keep_count;
766 g->failure_label = new_label(g);
767 g->label_used = 0;
768 generate(g, p->left);
770 if (style == 1) {
771 /* include for goto; omit for gopast */
772 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
774 w(g, "~Mbreak;~N");
775 if (g->label_used)
776 wsetl(g, g->failure_label);
777 if (keep_c) {
778 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
781 g->label_used = used;
782 g->failure_label = a0;
783 g->failure_keep_count = a1;
785 /* writef(g, "~M~l~N"
786 "~M~i~N", p); */
787 generate_next(g, p);
788 w(g, "~}");
791 static void generate_loop(struct generator * g, struct node * p) {
792 w(g, "~{int i; for (i = "); generate_AE(g, p->AE); writef(g, "; i > 0; i--)~C"
793 "~{", p);
795 generate(g, p->left);
797 w(g, "~}"
798 "~}");
801 static void generate_repeat(struct generator * g, struct node * p, int atleast_case) {
802 int keep_c = 0;
803 writef(g, "~Mwhile(1) {~C~+", p);
805 if (repeat_restore(g, p->left)) {
806 writef(g, "~M~k~N", p);
807 keep_c = g->keep_count;
810 g->failure_label = new_label(g);
811 g->label_used = 0;
812 g->failure_keep_count = 0;
813 generate(g, p->left);
815 if (atleast_case) w(g, "~Mi--;~N");
817 w(g, "~Mcontinue;~N");
818 if (g->label_used)
819 wsetl(g, g->failure_label);
821 if (keep_c) {
822 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
825 w(g, "~Mbreak;~N"
826 "~}");
829 static void generate_atleast(struct generator * g, struct node * p) {
830 w(g, "~{int i = "); generate_AE(g, p->AE); w(g, ";~N");
832 int used = g->label_used;
833 int a0 = g->failure_label;
834 int a1 = g->failure_keep_count;
836 generate_repeat(g, p, true);
838 g->label_used = used;
839 g->failure_label = a0;
840 g->failure_keep_count = a1;
842 writef(g, "~Mif (i > 0) ~f~N"
843 "~}", p);
846 static void generate_setmark(struct generator * g, struct node * p) {
847 g->V[0] = p->name;
848 writef(g, "~M~V0 = ~zc;~C", p);
851 static void generate_tomark(struct generator * g, struct node * p) {
852 g->S[0] = p->mode == m_forward ? ">" : "<";
854 w(g, "~Mif (~zc ~S0 "); generate_AE(g, p->AE); writef(g, ") ~f~N", p);
855 w(g, "~M~zc = "); generate_AE(g, p->AE); writef(g, ";~C", p);
858 static void generate_atmark(struct generator * g, struct node * p) {
860 w(g, "~Mif (~zc != "); generate_AE(g, p->AE); writef(g, ") ~f~C", p);
863 static void generate_hop(struct generator * g, struct node * p) {
864 g->S[0] = p->mode == m_forward ? "+" : "-";
865 g->S[1] = p->mode == m_forward ? "0" :
866 (g->options->make_lang == LANG_C ? "z->lb" : "lb");
867 if (g->options->encoding == ENC_UTF8) {
868 w(g, "~{int ret = skip_utf8(~zp, ~zc, ~S1, ~zl, ~S0 ");
869 generate_AE(g, p->AE); writef(g, ");~C", p);
870 writef(g, "~Mif (ret < 0) ~f~N", p);
871 } else {
872 w(g, "~{int ret = ~zc ~S0 ");
873 generate_AE(g, p->AE); writef(g, ";~C", p);
874 writef(g, "~Mif (~S1 > ret || ret > ~zl) ~f~N", p);
876 writef(g, "~M~zc = ret;~N"
877 "~}", p);
880 static void generate_delete(struct generator * g, struct node * p) {
881 #if 1
882 writef(g, "~Mif (slice_del(~Z) == -1) return -1;~C", p);
883 #else
884 writef(g, "~{int ret = slice_del(~Z);~C", p);
885 writef(g, "~Mif (ret < 0) return ret;~N"
886 "~}", p);
887 #endif
890 static void generate_tolimit(struct generator * g, struct node * p) {
891 g->S[0] = p->mode == m_forward ? "" : "b";
892 writef(g, "~M~zc = ~zl~S0;~C", p);
895 static void generate_atlimit(struct generator * g, struct node * p) {
896 g->S[0] = p->mode == m_forward ? "" : "b";
897 g->S[1] = p->mode == m_forward ? "<" : ">";
898 writef(g, "~Mif (~zc ~S1 ~zl~S0) ~f~C", p);
901 static void generate_leftslice(struct generator * g, struct node * p) {
902 g->S[0] = p->mode == m_forward ? "bra" : "ket";
903 writef(g, "~M~z~S0 = ~zc;~C", p);
906 static void generate_rightslice(struct generator * g, struct node * p) {
907 g->S[0] = p->mode == m_forward ? "ket" : "bra";
908 writef(g, "~M~z~S0 = ~zc;~C", p);
911 static void generate_assignto(struct generator * g, struct node * p) {
912 g->V[0] = p->name;
913 writef(g, "~M~V0 = assign_to(~Z~V0);~C", p);
914 if (g->options->make_lang == LANG_C)
915 writef(g, "~Mif (~V0 == 0) return -1;~C", p);
918 static void generate_sliceto(struct generator * g, struct node * p) {
919 g->V[0] = p->name;
920 writef(g, "~{symbol * ret = slice_to(~Z~V0);~C"
921 "~Mif (ret == 0) return -1;~N"
922 "~M~V0 = ret;~N"
923 "~}", p);
926 static void generate_insert(struct generator * g, struct node * p, int style) {
928 int keep_c = style == c_attach;
929 if (p->mode == m_backward) keep_c = !keep_c;
930 if (g->options->make_lang == LANG_C)
931 writef(g, "~{int ret;~N", p);
932 if (keep_c) w(g, "~{int saved_c = ~zc;~N");
933 if (g->options->make_lang == LANG_C)
934 writef(g, "~Mret = insert_~$(~Z~zc, ~zc, ~a);~C", p);
935 else
936 writef(g, "~Minsert_~$(~Z~zc, ~zc, ~a);~C", p);
937 if (keep_c) w(g, "~M~zc = saved_c;~N~}");
938 if (g->options->make_lang == LANG_C)
939 writef(g, "~Mif (ret < 0) return ret;~N"
940 "~}", p);
943 static void generate_assignfrom(struct generator * g, struct node * p) {
945 int keep_c = p->mode == m_forward; /* like 'attach' */
946 if (g->options->make_lang == LANG_C)
947 writef(g, "~{int ret;~N", p);
948 if (keep_c) writef(g, "~{int saved_c = ~zc;~N", p);
949 w(g, g->options->make_lang == LANG_C ? "~Mret =" : "~M");
950 writef(g, keep_c ? "insert_~$(~Z~zc, ~zl, ~a);~C" : "insert_~$(~Z~zlb, ~zc, ~a);~C", p);
951 if (keep_c) w(g, "~M~zc = saved_c;~N~}");
952 if (g->options->make_lang == LANG_C)
953 writef(g, "~Mif (ret < 0) return ret;~N"
954 "~}", p);
957 /* bugs marked <======= fixed 22/7/02. Similar fixes required for Java */
959 static void generate_slicefrom(struct generator * g, struct node * p) {
961 /* w(g, "~Mslice_from_s(~Z"); <============= bug! should be: */
962 writef(g, "~{int ret = slice_from_~$(~Z~a);~C", p);
963 writef(g, "~Mif (ret < 0) return ret;~N"
964 "~}", p);
967 static void generate_setlimit(struct generator * g, struct node * p) {
968 int keep_c;
969 if (p->left && p->left->type == c_tomark && !p->left->right) {
970 /* Special case for:
972 * setlimit tomark AE for C
974 * All uses of setlimit in the current stemmers we ship follow this
975 * pattern, and by special-casing we can avoid having to save and
976 * restore c.
978 struct node * q = p->left;
980 ++g->keep_count;
981 writef(g, "~N~{int mlimit", p);
982 write_int(g, g->keep_count);
983 writef(g, ";~C", p);
984 keep_c = g->keep_count;
986 g->S[0] = q->mode == m_forward ? ">" : "<";
988 w(g, "~Mif (~zc ~S0 "); generate_AE(g, q->AE); writef(g, ") ~f~N", q);
989 w(g, "~Mmlimit");
990 write_int(g, keep_c);
991 if (p->mode == m_forward) {
992 w(g, " = ~zl - ~zc; ~zl = ");
993 } else {
994 w(g, " = ~zlb; ~zlb = ");
996 generate_AE(g, q->AE);
997 w(g, ";~N");
998 } else {
999 writef(g, "~{~K~C", p);
1000 keep_c = g->keep_count;
1001 generate(g, p->left);
1003 w(g, "~Mmlimit");
1004 write_int(g, keep_c);
1005 if (p->mode == m_forward)
1006 w(g, " = ~zl - ~zc; ~zl = ~zc;~N");
1007 else
1008 w(g, " = ~zlb; ~zlb = ~zc;~N");
1009 w(g, "~M"); wrestore(g, p, keep_c); w(g, "~N");
1012 g->failure_keep_count = -keep_c;
1013 generate(g, p->aux);
1014 w(g, "~M");
1015 wrestorelimit(g, p, -g->failure_keep_count);
1016 w(g, "~N"
1017 "~}");
1020 static const char * vars[] = { "p", "c", "l", "lb", "bra", "ket", NULL };
1022 /* dollar sets snowball up to operate on a string variable as if it were the
1023 * current string */
1024 static void generate_dollar(struct generator * g, struct node * p) {
1026 int used = g->label_used;
1027 int a0 = g->failure_label;
1028 int a1 = g->failure_keep_count;
1029 int keep_token;
1030 g->failure_label = new_label(g);
1031 g->label_used = 0;
1032 g->failure_keep_count = 0;
1034 keep_token = ++g->keep_count;
1035 g->I[0] = keep_token;
1036 if (g->options->make_lang != LANG_C) {
1037 const char ** var;
1038 writef(g, "~{~C", p);
1039 for (var = vars; *var; ++var) {
1040 g->S[0] = *var;
1041 w(g, "~Mint ~S0~I0 = ~S0;~N");
1043 } else {
1044 writef(g, "~{struct SN_env env~I0 = * z;~C", p);
1046 g->V[0] = p->name;
1047 writef(g, "~Mint failure = 1; /* assume failure */~N"
1048 "~M~zp = ~V0;~N"
1049 "~M~zlb = ~zc = 0;~N"
1050 "~M~zl = SIZE(~zp);~N", p);
1051 generate(g, p->left);
1052 w(g, "~Mfailure = 0; /* mark success */~N");
1053 if (g->label_used)
1054 wsetl(g, g->failure_label);
1055 g->V[0] = p->name; /* necessary */
1057 g->label_used = used;
1058 g->failure_label = a0;
1059 g->failure_keep_count = a1;
1061 g->I[0] = keep_token;
1062 if (g->options->make_lang != LANG_C) {
1063 const char ** var;
1064 w(g, "~M~V0 = ~zp;~N");
1065 for (var = vars; *var; ++var) {
1066 g->S[0] = *var;
1067 w(g, "~M~S0 = ~S0~I0;~N");
1069 writef(g, "~Mif (failure) ~f~N~}", p);
1070 } else {
1071 writef(g, "~M~V0 = z->p;~N"
1072 "~M* z = env~I0;~N"
1073 "~Mif (failure) ~f~N~}", p);
1077 static void generate_integer_assign(struct generator * g, struct node * p, char * s) {
1079 g->V[0] = p->name;
1080 g->S[0] = s;
1081 w(g, "~M~V0 ~S0 "); generate_AE(g, p->AE); writef(g, ";~C", p);
1084 static void generate_integer_test(struct generator * g, struct node * p, char * s) {
1086 g->V[0] = p->name;
1087 g->S[0] = s;
1088 w(g, "~Mif (!(~V0 ~S0 "); generate_AE(g, p->AE); writef(g, ")) ~f~C", p);
1091 static void generate_call(struct generator * g, struct node * p) {
1093 g->V[0] = p->name;
1094 writef(g, "~{int ret = ~V0(~Z);~C", p);
1095 if (g->failure_keep_count == 0 && g->failure_label == x_return) {
1096 /* Combine the two tests in this special case for better optimisation
1097 * and clearer generated code. */
1098 writef(g, "~Mif (ret <= 0) return ret;~N~}", p);
1099 } else {
1100 writef(g, "~Mif (ret == 0) ~f~N"
1101 "~Mif (ret < 0) return ret;~N~}", p);
1105 static void generate_grouping(struct generator * g, struct node * p, int complement) {
1107 struct grouping * q = p->name->grouping;
1108 g->S[0] = p->mode == m_forward ? "" : "_b";
1109 g->S[1] = complement ? "out" : "in";
1110 g->S[2] = g->options->encoding == ENC_UTF8 ? "_U" : "";
1111 g->V[0] = p->name;
1112 g->I[0] = q->smallest_ch;
1113 g->I[1] = q->largest_ch;
1114 writef(g, "~Mif (~S1_grouping~S0~S2(~Z~V0, ~I0, ~I1, 0)) ~f~C", p);
1117 static void generate_namedstring(struct generator * g, struct node * p) {
1119 g->S[0] = p->mode == m_forward ? "" : "_b";
1120 g->V[0] = p->name;
1121 writef(g, "~Mif (!(eq_v~S0(~Z~V0))) ~f~C", p);
1124 static void generate_literalstring(struct generator * g, struct node * p) {
1125 symbol * b = p->literalstring;
1126 if (SIZE(b) == 1) {
1127 /* It's quite common to compare with a single character literal string,
1128 * so just inline the simpler code for this case rather than making a
1129 * function call. In UTF-8 mode, only do this for the ASCII subset,
1130 * since multi-byte characters are more complex to test against.
1132 if (g->options->encoding == ENC_UTF8 && *b >= 128) {
1133 printf("single byte %d\n", *b);
1134 exit(1);
1136 g->I[0] = *b;
1137 if (p->mode == m_forward) {
1138 writef(g, "~Mif (~zc == ~zl || ~zp[~zc] != ~c0) ~f~C"
1139 "~M~zc++;~N", p);
1140 } else {
1141 writef(g, "~Mif (~zc <= ~zlb || ~zp[~zc - 1] != ~c0) ~f~C"
1142 "~M~zc--;~N", p);
1144 } else {
1145 g->S[0] = p->mode == m_forward ? "" : "_b";
1146 g->I[0] = SIZE(b);
1147 g->L[0] = b;
1149 writef(g, "~Mif (!(eq_s~S0(~Z~I0, ~L0))) ~f~C", p);
1153 static void generate_define(struct generator * g, struct node * p) {
1154 struct name * q = p->name;
1155 g->next_label = 0;
1157 if (g->options->make_lang == LANG_C)
1158 g->S[0] = q->type == t_routine ? "static" : "extern";
1159 else
1160 g->S[0] = g->options->name;
1161 g->V[0] = q;
1163 if (g->options->make_lang == LANG_C)
1164 w(g, "~N~S0 int ~V0(struct SN_env * z) {");
1165 else
1166 w(g, "~Nint Xapian::~S0::~V0() {");
1167 write_string(g, p->mode == m_forward ? " /* forwardmode */" : " /* backwardmode */");
1168 w(g, "~N~+");
1169 if (p->amongvar_needed) w(g, "~Mint among_var;~N");
1170 g->failure_keep_count = 0;
1171 g->failure_label = x_return;
1172 g->label_used = 0;
1173 g->keep_count = 0;
1174 generate(g, p->left);
1175 w(g, "~Mreturn 1;~N~}");
1178 static void generate_substring(struct generator * g, struct node * p) {
1180 struct among * x = p->among;
1181 int block = -1;
1182 unsigned int bitmap = 0;
1183 struct amongvec * among_cases = x->b;
1184 int c;
1185 int empty_case = -1;
1186 int n_cases = 0;
1187 symbol cases[2];
1188 int shortest_size = INT_MAX;
1189 int shown_comment = 0;
1191 g->S[0] = p->mode == m_forward ? "" : "_b";
1192 g->I[0] = x->number;
1193 g->I[1] = x->literalstring_count;
1195 /* In forward mode with non-ASCII UTF-8 characters, the first character
1196 * of the string will often be the same, so instead look at the last
1197 * common character position.
1199 * In backward mode, we can't match if there are fewer characters before
1200 * the current position than the minimum length.
1202 for (c = 0; c < x->literalstring_count; ++c) {
1203 int size = among_cases[c].size;
1204 if (size != 0 && size < shortest_size) {
1205 shortest_size = size;
1209 for (c = 0; c < x->literalstring_count; ++c) {
1210 symbol ch;
1211 if (among_cases[c].size == 0) {
1212 empty_case = c;
1213 continue;
1215 if (p->mode == m_forward) {
1216 ch = among_cases[c].b[shortest_size - 1];
1217 } else {
1218 ch = among_cases[c].b[among_cases[c].size - 1];
1220 if (n_cases == 0) {
1221 block = ch >> 5;
1222 } else if (ch >> 5 != block) {
1223 block = -1;
1224 if (n_cases > 2) break;
1226 if (block == -1) {
1227 if (n_cases > 0 && ch == cases[0]) continue;
1228 if (n_cases < 2) {
1229 cases[n_cases++] = ch;
1230 } else if (ch != cases[1]) {
1231 ++n_cases;
1232 break;
1234 } else {
1235 if ((bitmap & (1u << (ch & 0x1f))) == 0) {
1236 bitmap |= 1u << (ch & 0x1f);
1237 if (n_cases < 2)
1238 cases[n_cases] = ch;
1239 ++n_cases;
1244 if (block != -1 || n_cases <= 2) {
1245 char buf[64];
1246 g->I[2] = block;
1247 g->I[3] = bitmap;
1248 g->I[4] = shortest_size - 1;
1249 if (p->mode == m_forward) {
1250 const char * z = g->options->make_lang == LANG_C ? "z->" : "";
1251 sprintf(buf, "%sp[%sc + %d]", z, z, shortest_size - 1);
1252 g->S[1] = buf;
1253 if (shortest_size == 1) {
1254 writef(g, "~Mif (~zc >= ~zl", p);
1255 } else {
1256 writef(g, "~Mif (~zc + ~I4 >= ~zl", p);
1258 } else {
1259 if (g->options->make_lang == LANG_C)
1260 g->S[1] = "z->p[z->c - 1]";
1261 else
1262 g->S[1] = "p[c - 1]";
1263 if (shortest_size == 1) {
1264 writef(g, "~Mif (~zc <= ~zlb", p);
1265 } else {
1266 writef(g, "~Mif (~zc - ~I4 <= ~zlb", p);
1269 if (n_cases == 0) {
1270 /* We get this for the degenerate case: among { '' }
1271 * This doesn't seem to be a useful construct, but it is
1272 * syntactically valid.
1274 } else if (n_cases == 1) {
1275 g->I[4] = cases[0];
1276 writef(g, " || ~S1 != ~I4", p);
1277 } else if (n_cases == 2) {
1278 g->I[4] = cases[0];
1279 g->I[5] = cases[1];
1280 writef(g, " || (~S1 != ~I4 && ~S1 != ~I5)", p);
1281 } else {
1282 writef(g, " || ~S1 >> 5 != ~I2 || !((~I3 >> (~S1 & 0x1f)) & 1)", p);
1284 write_string(g, ") ");
1285 if (empty_case != -1) {
1286 /* If the among includes the empty string, it can never fail
1287 * so not matching the bitmap means we match the empty string.
1289 g->I[4] = among_cases[empty_case].result;
1290 writef(g, "among_var = ~I4; else~C", p);
1291 } else {
1292 writef(g, "~f~C", p);
1294 shown_comment = 1;
1295 } else {
1296 #ifdef OPTIMISATION_WARNINGS
1297 printf("Couldn't shortcut among %d\n", x->number);
1298 #endif
1301 if (x->command_count == 0 && x->starter == 0) {
1302 w(g, "~Mif (!(find_among~S0(s_pool, ~Za_~I0, ~I1, ");
1303 if (x->function_count) {
1304 w(g, "af_~I0, af");
1305 } else {
1306 write_string(g, "0, 0");
1308 writef(g, "))) ~f", p);
1309 writef(g, shown_comment ? "~N" : "~C", p);
1310 } else {
1311 w(g, "~Mamong_var = find_among~S0(s_pool, ~Za_~I0, ~I1, ");
1312 if (x->function_count) {
1313 w(g, "af_~I0, af");
1314 } else {
1315 write_string(g, "0, 0");
1317 writef(g, ");", p);
1318 writef(g, shown_comment ? "~N" : "~C", p);
1319 writef(g, "~Mif (!(among_var)) ~f~N", p);
1323 static void generate_among(struct generator * g, struct node * p) {
1325 struct among * x = p->among;
1326 int case_number = 1;
1328 if (x->substring == 0) generate_substring(g, p);
1329 if (x->command_count == 0 && x->starter == 0) return;
1331 if (x->starter != 0) generate(g, x->starter);
1333 writef(g, "~Mswitch (among_var) {~C~+"
1334 "~Mcase 0: ~f~N", p);
1336 p = p->left;
1337 if (p != 0 && p->type != c_literalstring) p = p->right;
1339 while (p) {
1340 if (p->type == c_bra && p->left != 0) {
1341 g->I[0] = case_number++;
1342 w(g, "~Mcase ~I0:~N~+"); generate(g, p); w(g, "~Mbreak;~N~-");
1344 p = p->right;
1346 w(g, "~}");
1349 static void generate_booltest(struct generator * g, struct node * p) {
1351 g->V[0] = p->name;
1352 writef(g, "~Mif (!(~V0)) ~f~C", p);
1355 static void generate_false(struct generator * g, struct node * p) {
1357 writef(g, "~M~f~C", p);
1360 static void generate_debug(struct generator * g, struct node * p) {
1362 g->I[0] = g->debug_count++;
1363 g->I[1] = p->line_number;
1364 writef(g, "~Mdebug(~Z~I0, ~I1);~C", p);
1368 static void generate(struct generator * g, struct node * p) {
1370 int used = g->label_used;
1371 int a0 = g->failure_label;
1372 int a1 = g->failure_keep_count;
1374 switch (p->type)
1376 case c_define: generate_define(g, p); break;
1377 case c_bra: generate_bra(g, p); break;
1378 case c_and: generate_and(g, p); break;
1379 case c_or: generate_or(g, p); break;
1380 case c_backwards: generate_backwards(g, p); break;
1381 case c_not: generate_not(g, p); break;
1382 case c_set: generate_set(g, p); break;
1383 case c_unset: generate_unset(g, p); break;
1384 case c_try: generate_try(g, p); break;
1385 case c_fail: generate_fail(g, p); break;
1386 case c_reverse:
1387 case c_test: generate_test(g, p); break;
1388 case c_do: generate_do(g, p); break;
1389 case c_goto: generate_GO(g, p, 1); break;
1390 case c_gopast: generate_GO(g, p, 0); break;
1391 case c_repeat: generate_repeat(g, p, false); break;
1392 case c_loop: generate_loop(g, p); break;
1393 case c_atleast: generate_atleast(g, p); break;
1394 case c_setmark: generate_setmark(g, p); break;
1395 case c_tomark: generate_tomark(g, p); break;
1396 case c_atmark: generate_atmark(g, p); break;
1397 case c_hop: generate_hop(g, p); break;
1398 case c_delete: generate_delete(g, p); break;
1399 case c_next: generate_next(g, p); break;
1400 case c_tolimit: generate_tolimit(g, p); break;
1401 case c_atlimit: generate_atlimit(g, p); break;
1402 case c_leftslice: generate_leftslice(g, p); break;
1403 case c_rightslice: generate_rightslice(g, p); break;
1404 case c_assignto: generate_assignto(g, p); break;
1405 case c_sliceto: generate_sliceto(g, p); break;
1406 case c_assign: generate_assignfrom(g, p); break;
1407 case c_insert:
1408 case c_attach: generate_insert(g, p, p->type); break;
1409 case c_slicefrom: generate_slicefrom(g, p); break;
1410 case c_setlimit: generate_setlimit(g, p); break;
1411 case c_dollar: generate_dollar(g, p); break;
1412 case c_mathassign: generate_integer_assign(g, p, "="); break;
1413 case c_plusassign: generate_integer_assign(g, p, "+="); break;
1414 case c_minusassign: generate_integer_assign(g, p, "-="); break;
1415 case c_multiplyassign:generate_integer_assign(g, p, "*="); break;
1416 case c_divideassign: generate_integer_assign(g, p, "/="); break;
1417 case c_eq: generate_integer_test(g, p, "=="); break;
1418 case c_ne: generate_integer_test(g, p, "!="); break;
1419 case c_gr: generate_integer_test(g, p, ">"); break;
1420 case c_ge: generate_integer_test(g, p, ">="); break;
1421 case c_ls: generate_integer_test(g, p, "<"); break;
1422 case c_le: generate_integer_test(g, p, "<="); break;
1423 case c_call: generate_call(g, p); break;
1424 case c_grouping: generate_grouping(g, p, false); break;
1425 case c_non: generate_grouping(g, p, true); break;
1426 case c_name: generate_namedstring(g, p); break;
1427 case c_literalstring: generate_literalstring(g, p); break;
1428 case c_among: generate_among(g, p); break;
1429 case c_substring: generate_substring(g, p); break;
1430 case c_booltest: generate_booltest(g, p); break;
1431 case c_false: generate_false(g, p); break;
1432 case c_true: break;
1433 case c_debug: generate_debug(g, p); break;
1434 default: fprintf(stderr, "%d encountered\n", p->type);
1435 exit(1);
1438 if (g->failure_label != a0)
1439 g->label_used = used;
1440 g->failure_label = a0;
1441 g->failure_keep_count = a1;
1444 static void generate_start_comment(struct generator * g) {
1446 if (g->options->make_lang == LANG_C)
1447 w(g, "/* This file was generated automatically by the Snowball to ISO C compiler */~N");
1448 else
1449 w(g, "/* This file was generated automatically by the Snowball to ISO C++ compiler */~N");
1450 w(g, "/* http://snowballstem.org/ */~N");
1453 static void generate_head(struct generator * g) {
1455 if (g->options->make_lang != LANG_C) {
1456 const char * s = g->options->output_file;
1457 const char * leaf;
1458 w(g, "~N"
1459 "#include <config.h>~N"
1460 "#include <limits.h>~N");
1461 if (!s) abort(); /* checked in driver.c */
1462 leaf = strrchr(s, '/');
1463 if (leaf) ++leaf; else leaf = s;
1464 write_string(g, "#include \"");
1465 write_string(g, leaf);
1466 w(g, ".h\"~N~N");
1467 return;
1470 if (g->options->runtime_path == 0) {
1471 w(g, "~N#include \"header.h\"~N~N");
1472 } else {
1473 w(g, "~N#include \"");
1474 write_string(g, g->options->runtime_path);
1475 if (g->options->runtime_path[strlen(g->options->runtime_path) - 1] != '/')
1476 write_char(g, '/');
1477 w(g, "header.h\"~N~N");
1481 static void generate_routine_headers(struct generator * g) {
1482 struct name * q;
1483 for (q = g->analyser->names; q; q = q->next) {
1484 g->V[0] = q;
1485 switch (q->type) {
1486 case t_routine:
1487 w(g, "static int ~W0(struct SN_env * z);~N");
1488 break;
1489 case t_external:
1490 w(g,
1491 "#ifdef __cplusplus~N"
1492 "extern \"C\" {~N"
1493 "#endif~N"
1494 "extern int ~W0(struct SN_env * z);~N"
1495 "#ifdef __cplusplus~N"
1496 "}~N"
1497 "#endif~N"
1499 break;
1504 static unsigned pool_size = 0;
1506 static void generate_among_pool(struct generator * g, struct among * x) {
1508 while (x) {
1509 struct amongvec * v = x->b;
1510 int i;
1511 char * done = check_malloc(x->literalstring_count);
1512 memset(done, 0, x->literalstring_count);
1514 g->I[0] = x->number;
1516 for (i = 0; i < x->literalstring_count; i++, v++)
1518 int j;
1519 if (v->size == 0 || done[i]) continue;
1520 g->I[1] = i;
1521 /* Eliminate entries which are just substrings of other entries */
1522 for (j = 0; j < x->literalstring_count; j++) {
1523 if (j == i) continue;
1524 if (v->size <= v[j - i].size) {
1525 size_t offset = v[j - i].size - v->size;
1526 size_t len = v->size * sizeof(symbol);
1527 do {
1528 if (memcmp(v->b, v[j - i].b + offset, len) == 0) {
1529 g->I[2] = j;
1530 if (offset) {
1531 g->I[3] = offset;
1532 w(g, "#define s_~I0_~I1 (s_~I0_~I2 + ~I3)~N");
1533 } else {
1534 w(g, "#define s_~I0_~I1 s_~I0_~I2~N");
1536 goto done;
1538 } while (offset--);
1541 if (v->size) {
1542 if (pool_size == 0) {
1543 w(g, "static const symbol s_pool[] = {~N");
1545 g->I[2] = pool_size;
1546 w(g, "#define s_~I0_~I1 ~I2~N");
1547 g->L[0] = v->b;
1548 w(g, "~A0,~N");
1549 pool_size += v->size;
1551 done: ;
1554 check_free(done);
1555 x = x->next;
1557 if (pool_size != 0) {
1558 w(g, "};~N~N");
1562 static void generate_among_table(struct generator * g, struct among * x) {
1564 struct amongvec * v = x->b;
1566 g->I[0] = x->number;
1567 g->I[1] = x->literalstring_count;
1568 w(g, "~N~Mstatic const struct among a_~I0[~I1] =~N{~N");
1571 int i;
1572 for (i = 0; i < x->literalstring_count; i++) {
1573 g->I[1] = i;
1574 g->I[2] = v->size;
1575 g->I[3] = v->i;
1576 g->I[4] = v->result;
1577 g->S[0] = i < x->literalstring_count - 1 ? "," : "";
1579 w(g, "/*~J1 */ { ~I2, ");
1580 if (v->size == 0) w(g, "0,");
1581 else w(g, "s_~I0_~I1,");
1582 w(g, " ~I3, ~I4");
1583 w(g, "}~S0~N");
1584 v++;
1587 w(g, "};~N~N");
1589 if (x->function_count) {
1590 g->I[1] = x->literalstring_count;
1591 w(g, "~Mstatic const unsigned char af_~I0[~I1] =~N{~N");
1593 v = x->b;
1595 int i;
1596 for (i = 0; i < x->literalstring_count; i++) {
1597 g->I[1] = i;
1599 w(g, "/*~J1 */ ");
1600 if (v[i].function == 0) {
1601 w(g, "0");
1602 } else {
1603 write_int(g, v[i].function->among_func_count);
1604 g->V[0] = v[i].function;
1605 w(g, " /* t~W0 */");
1607 if (i < x->literalstring_count - 1) w(g, ",~N");
1610 w(g, "~N};~N~N");
1614 static void generate_amongs(struct generator * g) {
1615 struct among * x;
1616 struct name * q;
1617 int among_func_count = 0;
1619 g->S[0] = g->options->name;
1620 for (q = g->analyser->names; q; q = q->next) {
1621 if (q->type == t_routine && q->used_in_among) {
1622 q->among_func_count = ++among_func_count;
1623 g->V[0] = q;
1624 w(g, "static int t~V0(Xapian::StemImplementation * this_ptr) {~N"
1625 " return (static_cast<Xapian::~S0 *>(this_ptr))->~V0();~N"
1626 "}~N"
1627 "~N");
1631 if (among_func_count) {
1632 g->I[0] = among_func_count;
1633 w(g, "~Mstatic const among_function af[~I0] =~N{~N");
1635 q = g->analyser->names;
1636 g->S[0] = g->options->name;
1637 for (q = g->analyser->names; q; q = q->next) {
1638 if (q->type == t_routine && q->used_in_among) {
1639 g->V[0] = q;
1640 g->I[0] = q->among_func_count;
1641 w(g, "/*~J0 */ t~V0");
1642 if (q->among_func_count < among_func_count) w(g, ",~N"); else w(g, "~N");
1646 w(g, "};~N~N");
1649 generate_among_pool(g, g->analyser->amongs);
1651 for (x = g->analyser->amongs; x; x = x->next) {
1652 generate_among_table(g, x);
1656 static void set_bit(symbol * b, int i) { b[i/8] |= 1 << i%8; }
1658 static void generate_grouping_table(struct generator * g, struct grouping * q) {
1660 int range = q->largest_ch - q->smallest_ch + 1;
1661 int size = (range + 7)/ 8; /* assume 8 bits per symbol */
1662 symbol * b = q->b;
1663 symbol * map = create_b(size);
1664 int i;
1665 for (i = 0; i < size; i++) map[i] = 0;
1667 for (i = 0; i < SIZE(b); i++) set_bit(map, b[i] - q->smallest_ch);
1669 g->V[0] = q->name;
1671 w(g, "static const unsigned char ~V0[] = { ");
1672 for (i = 0; i < size; i++) {
1673 write_int(g, map[i]);
1674 if (i < size - 1) w(g, ", ");
1676 w(g, " };~N~N");
1677 lose_b(map);
1680 static void generate_groupings(struct generator * g) {
1681 struct grouping * q;
1682 for (q = g->analyser->groupings; q; q = q->next) {
1683 generate_grouping_table(g, q);
1687 static void generate_create(struct generator * g) {
1689 int * p = g->analyser->name_count;
1691 if (g->options->make_lang != LANG_C) {
1692 struct name * q = g->analyser->names;
1693 int first = true;
1694 const char * dtor;
1695 g->S[0] = g->options->name;
1696 dtor = strrchr(g->options->name, ':');
1697 if (dtor) ++dtor; else dtor = g->options->name;
1698 g->S[1] = dtor;
1699 w(g, "~N"
1700 "Xapian::~S0::~S1()");
1701 while (q) {
1702 if (q->type < t_routine) {
1703 w(g, first ? "~N : " : ", ");
1704 first = false;
1705 g->V[0] = q;
1706 w(g, "~W0(0)");
1708 q = q->next;
1710 w(g, "~N{~N");
1711 q = g->analyser->names;
1712 while (q) {
1713 if (q->type == t_string) {
1714 g->V[0] = q;
1715 w(g, " ~W0 = create_s();~N");
1717 q = q->next;
1719 w(g, "}~N");
1721 return;
1724 g->I[0] = p[t_string];
1725 g->I[1] = p[t_integer];
1726 g->I[2] = p[t_boolean];
1727 w(g, "~N"
1728 "extern struct SN_env * ~pcreate_env(void) { return SN_create_env(~I0, ~I1, ~I2); }"
1729 "~N");
1732 static void generate_close(struct generator * g) {
1734 int * p = g->analyser->name_count;
1735 if (g->options->make_lang != LANG_C) {
1736 struct name * q = g->analyser->names;
1737 const char * dtor;
1738 const char * lang;
1739 g->S[0] = g->options->name;
1740 dtor = strrchr(g->options->name, ':');
1741 if (dtor) ++dtor; else dtor = g->options->name;
1742 g->S[1] = dtor;
1743 lang = strrchr(g->options->output_file, '/');
1744 if (lang) ++lang; else lang = g->options->output_file;
1745 g->S[2] = lang;
1746 w(g, "~N"
1747 "Xapian::~S0::~~~S1()~N"
1748 "{~N");
1749 while (q) {
1750 if (q->type == t_string) {
1751 g->V[0] = q;
1752 w(g, " lose_s(~W0);~N");
1754 q = q->next;
1756 w(g, "}~N");
1758 w(g, "~N"
1759 "std::string~N"
1760 "Xapian::~S0::get_description() const~N"
1761 "{~N"
1762 " return \"~S2\";~N"
1763 "}~N");
1764 return;
1767 g->I[0] = p[t_string];
1768 w(g, "~Nextern void ~pclose_env(struct SN_env * z) { SN_close_env(z, ~I0); }~N~N");
1771 static void generate_create_and_close_templates(struct generator * g) {
1772 w(g, "~N"
1773 "extern struct SN_env * ~pcreate_env(void);~N"
1774 "extern void ~pclose_env(struct SN_env * z);~N"
1775 "~N");
1778 static void generate_header_file(struct generator * g) {
1780 struct name * q;
1781 const char * vp = g->options->variables_prefix;
1782 g->S[0] = vp;
1784 if (g->options->make_lang != LANG_C) {
1785 const char * p;
1786 w(g, "~N"
1787 "#include \"steminternal.h\"~N"
1788 "~N"
1789 "namespace Xapian {~N"
1790 "~N");
1792 g->S[1] = g->options->name;
1793 w(g, "class ~S1 ");
1794 if (g->options->parent_class_name) {
1795 g->S[1] = g->options->parent_class_name;
1796 w(g, ": public ~S1 ");
1798 w(g, "{~N");
1799 for (q = g->analyser->names; q; q = q->next) {
1800 switch (q->type) {
1801 case t_string: g->S[1] = "symbol *"; goto label1;
1802 case t_integer: g->S[1] = "int"; goto label1;
1803 case t_boolean: g->S[1] = "unsigned char";
1804 label1:
1805 g->V[0] = q;
1806 w(g, " ~S1 ~W0;~N");
1807 break;
1811 for (q = g->analyser->names; q; q = q->next) {
1812 if (q->type == t_routine && !q->used_in_among) {
1813 g->V[0] = q;
1814 w(g, " int ~W0();~N");
1818 w(g, "~N public:~N");
1820 /* FIXME: We currently need to make any routines used in an among
1821 * public. */
1822 for (q = g->analyser->names; q; q = q->next) {
1823 if (q->type == t_routine && q->used_in_among) {
1824 g->V[0] = q;
1825 w(g, " int ~W0();~N");
1829 w(g, "~N");
1830 p = strrchr(g->options->name, ':');
1831 if (p) ++p; else p = g->options->name;
1832 g->S[1] = p;
1833 w(g, " ~S1();~N"
1834 " ~~~S1();~N");
1835 for (q = g->analyser->names; q; q = q->next) {
1836 if (q->type == t_external) {
1837 g->V[0] = q;
1838 w(g, " int ~W0();~N");
1842 w(g, " std::string get_description() const;~N"
1843 "};~N"
1844 "~N"
1845 "}~N");
1847 return;
1850 w(g, "~N"
1851 "#ifdef __cplusplus~N"
1852 "extern \"C\" {~N"
1853 "#endif~N"); /* for C++ */
1855 generate_create_and_close_templates(g);
1856 for (q = g->analyser->names; q; q = q->next) {
1857 g->V[0] = q;
1858 switch (q->type) {
1859 case t_external:
1860 w(g, "extern int ~W0(struct SN_env * z);~N");
1861 break;
1862 case t_string: g->S[1] = "S"; goto label0;
1863 case t_integer: g->S[1] = "I"; goto label0;
1864 case t_boolean: g->S[1] = "B";
1865 label0:
1866 if (vp) {
1867 g->I[0] = q->count;
1868 w(g, "#define ~S0");
1869 str_append_b(g->outbuf, q->b);
1870 w(g, " (~S1[~I0])~N");
1872 break;
1876 w(g, "~N"
1877 "#ifdef __cplusplus~N"
1878 "}~N"
1879 "#endif~N"); /* for C++ */
1881 w(g, "~N");
1884 extern void generate_program_c(struct generator * g) {
1886 g->outbuf = str_new();
1887 generate_start_comment(g);
1888 generate_head(g);
1889 if (g->options->make_lang == LANG_C) {
1890 generate_routine_headers(g);
1891 w(g, "#ifdef __cplusplus~N"
1892 "extern \"C\" {~N"
1893 "#endif~N"
1894 "~N");
1895 generate_create_and_close_templates(g);
1896 w(g, "~N"
1897 "#ifdef __cplusplus~N"
1898 "}~N"
1899 "#endif~N");
1901 generate_amongs(g);
1902 generate_groupings(g);
1903 g->declarations = g->outbuf;
1904 g->outbuf = str_new();
1905 g->literalstring_count = 0;
1907 struct node * p = g->analyser->program;
1908 while (p) { generate(g, p); p = p->right; }
1910 generate_create(g);
1911 generate_close(g);
1912 output_str(g->options->output_src, g->declarations);
1913 str_delete(g->declarations);
1914 output_str(g->options->output_src, g->outbuf);
1915 str_clear(g->outbuf);
1917 generate_start_comment(g);
1918 generate_header_file(g);
1919 output_str(g->options->output_h, g->outbuf);
1920 str_delete(g->outbuf);
1923 /* Generator functions common to multiple languages. */
1925 extern struct generator * create_generator(struct analyser * a, struct options * o) {
1926 NEW(generator, g);
1927 g->analyser = a;
1928 g->options = o;
1929 g->margin = 0;
1930 g->debug_count = 0;
1931 g->copy_from_count = 0;
1932 g->line_count = 0;
1933 g->line_labelled = 0;
1934 g->failure_label = -1;
1935 g->unreachable = false;
1936 #ifndef DISABLE_PYTHON
1937 g->max_label = 0;
1938 #endif
1939 return g;
1942 extern void close_generator(struct generator * g) {
1943 FREE(g);
1946 /* Write routines for simple entities */
1948 extern void write_char(struct generator * g, int ch) {
1949 str_append_ch(g->outbuf, ch); /* character */
1952 extern void write_newline(struct generator * g) {
1953 str_append_ch(g->outbuf, '\n'); /* newline */
1954 g->line_count++;
1957 extern void write_string(struct generator * g, const char * s) {
1958 str_append_string(g->outbuf, s);
1961 extern void write_int(struct generator * g, int i) {
1962 str_append_int(g->outbuf, i);
1965 extern void write_b(struct generator * g, symbol * b) {
1967 str_append_b(g->outbuf, b);
1970 extern void write_str(struct generator * g, struct str * str) {
1972 str_append(g->outbuf, str);