xapian-core/languages/compiler/analyser.c

   1
   2 #include <stdio.h>   /* printf etc */
   3 #include <stdlib.h>  /* exit */
   4 #include <string.h>  /* memmove */
   5 #include "header.h"
   6
   7 typedef enum {
   8     e_token_omitted = 0,
   9     e_unexpected_token = 1,
  10     e_string_omitted = 2,
  11     e_unexpected_token_in_among = 3,
  12     /* For codes above here, report "after " t->previous_token after the error. */
  13     e_unresolved_substring = 14,
  14     e_not_allowed_inside_reverse = 15,
  15     e_empty_grouping = 16,
  16     e_already_backwards = 17,
  17     e_empty_among = 18,
  18     e_adjacent_bracketed_in_among = 19,
  19     e_substring_preceded_by_substring = 20,
  20     /* For codes below here, tokeniser->b is printed before the error. */
  21     e_redeclared = 30,
  22     e_undeclared = 31,
  23     e_declared_as_different_mode = 32,
  24     e_not_of_type_x = 33,
  25     e_not_of_type_string_or_integer = 34,
  26     e_misplaced = 35,
  27     e_redefined = 36,
  28     e_misused = 37
  29 } error_code;
  30
  31 /* recursive usage: */
  32
  33 static void read_program_(struct analyser * a, int terminator);
  34 static struct node * read_C(struct analyser * a);
  35 static struct node * C_style(struct analyser * a, const char * s, int token);
  36
  37
  38 static void print_node_(struct node * p, int n, const char * s) {
  39
  40     int i;
  41     for (i = 0; i < n; i++) fputs(i == n - 1 ? s : "  ", stdout);
  42     printf("%s ", name_of_token(p->type));
  43     if (p->name) report_b(stdout, p->name->b);
  44     if (p->literalstring) {
  45         printf("'");
  46         report_b(stdout, p->literalstring);
  47         printf("'");
  48     }
  49     printf("\n");
  50     if (p->AE) print_node_(p->AE, n+1, "# ");
  51     if (p->left) print_node_(p->left, n+1, "  ");
  52     if (p->right) print_node_(p->right, n, "  ");
  53     if (p->aux) print_node_(p->aux, n+1, "@ ");
  54 }
  55
  56 extern void print_program(struct analyser * a) {
  57     print_node_(a->program, 0, "  ");
  58 }
  59
  60 static struct node * new_node(struct analyser * a, int type) {
  61     NEW(node, p);
  62     p->next = a->nodes; a->nodes = p;
  63     p->left = 0;
  64     p->right = 0;
  65     p->aux = 0;
  66     p->AE = 0;
  67     p->name = 0;
  68     p->literalstring = 0;
  69     p->mode = a->mode;
  70     p->line_number = a->tokeniser->line_number;
  71     p->type = type;
  72     return p;
  73 }
  74
  75 static const char * name_of_mode(int n) {
  76     switch (n) {
  77         case m_backward: return "string backward";
  78         case m_forward:  return "string forward";
  79     /*  case m_integer:  return "integer";  */
  80     }
  81     fprintf(stderr, "Invalid mode %d in name_of_mode()\n", n);
  82     exit(1);
  83 }
  84
  85 static const char * name_of_type(int n) {
  86     switch (n) {
  87         case 's': return "string";
  88         case 'i': return "integer";
  89         case 'r': return "routine";
  90         case 'R': return "routine or grouping";
  91         case 'g': return "grouping";
  92     }
  93     fprintf(stderr, "Invalid type %d in name_of_type()\n", n);
  94     exit(1);
  95 }
  96
  97 static const char * name_of_name_type(int code) {
  98     switch (code) {
  99         case t_string: return "string";
 100         case t_boolean: return "boolean";
 101         case t_integer: return "integer";
 102         case t_routine: return "routine";
 103         case t_external: return "external";
 104         case t_grouping: return "grouping";
 105     }
 106     fprintf(stderr, "Invalid type code %d in name_of_name_type()\n", code);
 107     exit(1);
 108 }
 109
 110 static void count_error(struct analyser * a) {
 111     struct tokeniser * t = a->tokeniser;
 112     if (t->error_count >= 20) { fprintf(stderr, "... etc\n"); exit(1); }
 113     t->error_count++;
 114 }
 115
 116 static void error2(struct analyser * a, error_code n, int x) {
 117     struct tokeniser * t = a->tokeniser;
 118     count_error(a);
 119     fprintf(stderr, "%s:%d: ", t->file, t->line_number);
 120     if ((int)n >= (int)e_redeclared) report_b(stderr, t->b);
 121     switch (n) {
 122         case e_token_omitted:
 123             fprintf(stderr, "%s omitted", name_of_token(t->omission)); break;
 124         case e_unexpected_token_in_among:
 125             fprintf(stderr, "in among(...), ");
 126             /* fall through */
 127         case e_unexpected_token:
 128             fprintf(stderr, "unexpected %s", name_of_token(t->token));
 129             if (t->token == c_number) fprintf(stderr, " %d", t->number);
 130             if (t->token == c_name) {
 131                 fprintf(stderr, " ");
 132                 report_b(stderr, t->b);
 133             } break;
 134         case e_string_omitted:
 135             fprintf(stderr, "string omitted"); break;
 136
 137         case e_unresolved_substring:
 138             fprintf(stderr, "unresolved substring on line %d", x); break;
 139         case e_not_allowed_inside_reverse:
 140             fprintf(stderr, "%s not allowed inside reverse(...)", name_of_token(t->token)); break;
 141         case e_empty_grouping:
 142             fprintf(stderr, "empty grouping"); break;
 143         case e_already_backwards:
 144             fprintf(stderr, "backwards used when already in this mode"); break;
 145         case e_empty_among:
 146             fprintf(stderr, "empty among(...)"); break;
 147         case e_adjacent_bracketed_in_among:
 148             fprintf(stderr, "two adjacent bracketed expressions in among(...)"); break;
 149         case e_substring_preceded_by_substring:
 150             fprintf(stderr, "substring preceded by another substring on line %d", x); break;
 151
 152         case e_redeclared:
 153             fprintf(stderr, " re-declared"); break;
 154         case e_undeclared:
 155             fprintf(stderr, " undeclared"); break;
 156         case e_declared_as_different_mode:
 157             fprintf(stderr, " declared as %s mode; used as %s mode",
 158                             name_of_mode(a->mode), name_of_mode(x)); break;
 159         case e_not_of_type_x:
 160             fprintf(stderr, " not of type %s", name_of_type(x)); break;
 161         case e_not_of_type_string_or_integer:
 162             fprintf(stderr, " not of type string or integer"); break;
 163         case e_misplaced:
 164             fprintf(stderr, " misplaced"); break;
 165         case e_redefined:
 166             fprintf(stderr, " redefined"); break;
 167         case e_misused:
 168             fprintf(stderr, " mis-used as %s mode",
 169                             name_of_mode(x)); break;
 170     }
 171     if ((int)n < (int)e_unresolved_substring && t->previous_token > 0)
 172         fprintf(stderr, " after %s", name_of_token(t->previous_token));
 173     fprintf(stderr, "\n");
 174 }
 175
 176 static void error(struct analyser * a, error_code n) { error2(a, n, 0); }
 177
 178 static void error3(struct analyser * a, struct node * p, symbol * b) {
 179     count_error(a);
 180     fprintf(stderr, "%s:%d: among(...) has repeated string '", a->tokeniser->file, p->line_number);
 181     report_b(stderr, b);
 182     fprintf(stderr, "'\n");
 183 }
 184
 185 static void error3a(struct analyser * a, struct node * p) {
 186     count_error(a);
 187     fprintf(stderr, "%s:%d: previously seen here\n", a->tokeniser->file, p->line_number);
 188 }
 189
 190 static void error4(struct analyser * a, struct name * q) {
 191     count_error(a);
 192     fprintf(stderr, "%s:%d: ", a->tokeniser->file, q->used->line_number);
 193     report_b(stderr, q->b);
 194     fprintf(stderr, " undefined\n");
 195 }
 196
 197 static void omission_error(struct analyser * a, int n) {
 198     a->tokeniser->omission = n;
 199     error(a, e_token_omitted);
 200 }
 201
 202 static int check_token(struct analyser * a, int code) {
 203     struct tokeniser * t = a->tokeniser;
 204     if (t->token != code) { omission_error(a, code); return false; }
 205     return true;
 206 }
 207
 208 static int get_token(struct analyser * a, int code) {
 209     struct tokeniser * t = a->tokeniser;
 210     read_token(t);
 211     {
 212         int x = check_token(a, code);
 213         if (!x) t->token_held = true;
 214         return x;
 215     }
 216 }
 217
 218 static struct name * look_for_name(struct analyser * a) {
 219     symbol * q = a->tokeniser->b;
 220     struct name * p;
 221     for (p = a->names; p; p = p->next) {
 222         symbol * b = p->b;
 223         int n = SIZE(b);
 224         if (n == SIZE(q) && memcmp(q, b, n * sizeof(symbol)) == 0) {
 225             p->referenced = true;
 226             return p;
 227         }
 228     }
 229     return 0;
 230 }
 231
 232 static struct name * find_name(struct analyser * a) {
 233     struct name * p = look_for_name(a);
 234     if (p == 0) error(a, e_undeclared);
 235     return p;
 236 }
 237
 238 static void check_routine_mode(struct analyser * a, struct name * p, int mode) {
 239     if (p->mode < 0) p->mode = mode; else
 240     if (p->mode != mode) error2(a, e_misused, mode);
 241 }
 242
 243 static void check_name_type(struct analyser * a, struct name * p, int type) {
 244     switch (type) {
 245         case 's':
 246             if (p->type == t_string) return;
 247             break;
 248         case 'i':
 249             if (p->type == t_integer) return;
 250             break;
 251         case 'b':
 252             if (p->type == t_boolean) return;
 253             break;
 254         case 'R':
 255             if (p->type == t_grouping) return;
 256             /* FALLTHRU */
 257         case 'r':
 258             if (p->type == t_routine || p->type == t_external) return;
 259             break;
 260         case 'g':
 261             if (p->type == t_grouping) return;
 262             break;
 263     }
 264     error2(a, e_not_of_type_x, type);
 265 }
 266
 267 static void read_names(struct analyser * a, int type) {
 268     struct tokeniser * t = a->tokeniser;
 269     if (!get_token(a, c_bra)) return;
 270     while (true) {
 271         int token = read_token(t);
 272         switch (token) {
 273             case c_len: {
 274                 /* Context-sensitive token - once declared as a name, it loses
 275                  * its special meaning, for compatibility with older versions
 276                  * of snowball.
 277                  */
 278                 static const symbol c_len_lit[] = {
 279                     'l', 'e', 'n'
 280                 };
 281                 MOVE_TO_B(t->b, c_len_lit);
 282                 goto handle_as_name;
 283             }
 284             case c_lenof: {
 285                 /* Context-sensitive token - once declared as a name, it loses
 286                  * its special meaning, for compatibility with older versions
 287                  * of snowball.
 288                  */
 289                 static const symbol c_lenof_lit[] = {
 290                     'l', 'e', 'n', 'o', 'f'
 291                 };
 292                 MOVE_TO_B(t->b, c_lenof_lit);
 293                 goto handle_as_name;
 294             }
 295             case c_name:
 296 handle_as_name:
 297                 if (look_for_name(a) != 0) error(a, e_redeclared); else {
 298                     NEW(name, p);
 299                     p->b = copy_b(t->b);
 300                     p->type = type;
 301                     p->mode = -1; /* routines, externals */
 302                     p->count = a->name_count[type];
 303                     p->referenced = false;
 304                     p->used_in_among = false;
 305                     p->used = 0;
 306                     p->value_used = false;
 307                     p->initialised = false;
 308                     p->local_to = 0;
 309                     p->grouping = 0;
 310                     p->definition = 0;
 311                     p->declaration_line_number = t->line_number;
 312                     a->name_count[type]++;
 313                     p->next = a->names;
 314                     a->names = p;
 315                     if (token != c_name) {
 316                         disable_token(t, token);
 317                     }
 318                 }
 319                 break;
 320             default:
 321                 if (!check_token(a, c_ket)) t->token_held = true;
 322                 return;
 323         }
 324     }
 325 }
 326
 327 static symbol * new_literalstring(struct analyser * a) {
 328     NEW(literalstring, p);
 329     p->b = copy_b(a->tokeniser->b);
 330     p->next = a->literalstrings;
 331     a->literalstrings = p;
 332     return p->b;
 333 }
 334
 335 static int read_AE_test(struct analyser * a) {
 336
 337     struct tokeniser * t = a->tokeniser;
 338     switch (read_token(t)) {
 339         case c_assign: return c_mathassign;
 340         case c_plusassign:
 341         case c_minusassign:
 342         case c_multiplyassign:
 343         case c_divideassign:
 344         case c_eq:
 345         case c_ne:
 346         case c_gr:
 347         case c_ge:
 348         case c_ls:
 349         case c_le: return t->token;
 350         default: error(a, e_unexpected_token); t->token_held = true; return c_eq;
 351     }
 352 }
 353
 354 static int binding(int t) {
 355     switch (t) {
 356         case c_plus: case c_minus: return 1;
 357         case c_multiply: case c_divide: return 2;
 358         default: return -2;
 359     }
 360 }
 361
 362 static void mark_used_in(struct analyser * a, struct name * q, struct node * p) {
 363     if (!q->used) {
 364         q->used = p;
 365         q->local_to = a->program_end->name;
 366     } else if (q->local_to) {
 367         if (q->local_to != a->program_end->name) {
 368             /* Used in more than one routine/external. */
 369             q->local_to = NULL;
 370         }
 371     }
 372 }
 373
 374 static void name_to_node(struct analyser * a, struct node * p, int type) {
 375     struct name * q = find_name(a);
 376     if (q) {
 377         check_name_type(a, q, type);
 378         mark_used_in(a, q, p);
 379     }
 380     p->name = q;
 381 }
 382
 383 static struct node * read_AE(struct analyser * a, int B) {
 384     struct tokeniser * t = a->tokeniser;
 385     struct node * p;
 386     struct node * q;
 387     switch (read_token(t)) {
 388         case c_minus: /* monadic */
 389             q = read_AE(a, 100);
 390             if (q->type == c_neg) {
 391                 /* Optimise away double negation, which avoids generators
 392                  * having to worry about generating "--" (decrement operator
 393                  * in many languages).
 394                  */
 395                 p = q->right;
 396                 /* Don't free q, it's in the linked list a->nodes. */
 397                 break;
 398             }
 399             p = new_node(a, c_neg);
 400             p->right = q;
 401             break;
 402         case c_bra:
 403             p = read_AE(a, 0);
 404             get_token(a, c_ket);
 405             break;
 406         case c_name:
 407             p = new_node(a, c_name);
 408             name_to_node(a, p, 'i');
 409             if (p->name) p->name->value_used = true;
 410             break;
 411         case c_maxint:
 412         case c_minint:
 413             a->int_limits_used = true;
 414             /* fall through */
 415         case c_cursor:
 416         case c_limit:
 417         case c_len:
 418         case c_size:
 419             p = new_node(a, t->token);
 420             break;
 421         case c_number:
 422             p = new_node(a, c_number);
 423             p->number = t->number;
 424             break;
 425         case c_lenof:
 426         case c_sizeof:
 427             p = C_style(a, "s", t->token);
 428             break;
 429         default:
 430             error(a, e_unexpected_token);
 431             t->token_held = true;
 432             return 0;
 433     }
 434     while (true) {
 435         int token = read_token(t);
 436         int b = binding(token);
 437         if (binding(token) <= B) {
 438             t->token_held = true;
 439             return p;
 440         }
 441         q = new_node(a, token);
 442         q->left = p;
 443         q->right = read_AE(a, b);
 444         p = q;
 445     }
 446 }
 447
 448 static struct node * read_C_connection(struct analyser * a, struct node * q, int op) {
 449     struct tokeniser * t = a->tokeniser;
 450     struct node * p = new_node(a, op);
 451     struct node * p_end = q;
 452     p->left = q;
 453     do {
 454         q = read_C(a);
 455         p_end->right = q; p_end = q;
 456     } while (read_token(t) == op);
 457     t->token_held = true;
 458     return p;
 459 }
 460
 461 static struct node * read_C_list(struct analyser * a) {
 462     struct tokeniser * t = a->tokeniser;
 463     struct node * p = new_node(a, c_bra);
 464     struct node * p_end = 0;
 465     while (true) {
 466         int token = read_token(t);
 467         if (token == c_ket) return p;
 468         if (token < 0) { omission_error(a, c_ket); return p; }
 469         t->token_held = true;
 470         {
 471             struct node * q = read_C(a);
 472             while (true) {
 473                 token = read_token(t);
 474                 if (token != c_and && token != c_or) {
 475                     t->token_held = true;
 476                     break;
 477                 }
 478                 q = read_C_connection(a, q, token);
 479             }
 480             if (p_end == 0) p->left = q; else p_end->right = q;
 481             p_end = q;
 482         }
 483     }
 484 }
 485
 486 static struct node * C_style(struct analyser * a, const char * s, int token) {
 487     int i;
 488     struct node * p = new_node(a, token);
 489     for (i = 0; s[i] != 0; i++) switch (s[i]) {
 490         case 'C':
 491             p->left = read_C(a); continue;
 492         case 'D':
 493             p->aux = read_C(a); continue;
 494         case 'A':
 495             p->AE = read_AE(a, 0); continue;
 496         case 'f':
 497             get_token(a, c_for); continue;
 498         case 'S':
 499             {
 500                 int str_token = read_token(a->tokeniser);
 501                 if (str_token == c_name) name_to_node(a, p, 's'); else
 502                 if (str_token == c_literalstring) p->literalstring = new_literalstring(a);
 503                 else error(a, e_string_omitted);
 504             }
 505             continue;
 506         case 'b':
 507         case 's':
 508         case 'i':
 509             if (get_token(a, c_name)) name_to_node(a, p, s[i]);
 510             continue;
 511     }
 512     return p;
 513 }
 514
 515 static struct node * read_literalstring(struct analyser * a) {
 516     struct node * p = new_node(a, c_literalstring);
 517     p->literalstring = new_literalstring(a);
 518     return p;
 519 }
 520
 521 static void reverse_b(symbol * b) {
 522     int i = 0; int j = SIZE(b) - 1;
 523     while (i < j) {
 524         int ch1 = b[i]; int ch2 = b[j];
 525         b[i++] = ch2; b[j--] = ch1;
 526     }
 527 }
 528
 529 static int compare_amongvec(const void *pv, const void *qv) {
 530     const struct amongvec * p = (const struct amongvec*)pv;
 531     const struct amongvec * q = (const struct amongvec*)qv;
 532     symbol * b_p = p->b; int p_size = p->size;
 533     symbol * b_q = q->b; int q_size = q->size;
 534     int smaller_size = p_size < q_size ? p_size : q_size;
 535     int i;
 536     for (i = 0; i < smaller_size; i++)
 537         if (b_p[i] != b_q[i]) return b_p[i] - b_q[i];
 538     if (p_size - q_size)
 539         return p_size - q_size;
 540     return p->p->line_number - q->p->line_number;
 541 }
 542
 543 static void make_among(struct analyser * a, struct node * p, struct node * substring) {
 544
 545     NEW(among, x);
 546     NEWVEC(amongvec, v, p->number);
 547     struct node * q = p->left;
 548     struct amongvec * w0 = v;
 549     struct amongvec * w1 = v;
 550     int result = 1;
 551
 552     int direction = substring != 0 ? substring->mode : p->mode;
 553     int backward = direction == m_backward;
 554
 555     if (a->amongs == 0) a->amongs = x; else a->amongs_end->next = x;
 556     a->amongs_end = x;
 557     x->next = 0;
 558     x->b = v;
 559     x->number = a->among_count++;
 560     x->function_count = 0;
 561     x->starter = 0;
 562
 563     if (q->type == c_bra) { x->starter = q; q = q->right; }
 564
 565     while (q) {
 566         if (q->type == c_literalstring) {
 567             symbol * b = q->literalstring;
 568             w1->b = b;           /* pointer to case string */
 569             w1->p = q;           /* pointer to corresponding node */
 570             w1->size = SIZE(b);  /* number of characters in string */
 571             w1->i = -1;          /* index of longest substring */
 572             w1->result = -1;     /* number of corresponding case expression */
 573             if (q->left) {
 574                 struct name * function = q->left->name;
 575                 w1->function = function;
 576                 function->used_in_among = true;
 577                 check_routine_mode(a, function, direction);
 578                 x->function_count++;
 579             } else {
 580                 w1->function = 0;
 581             }
 582             w1++;
 583         }
 584         else
 585         if (q->left == 0)  /* empty command: () */
 586             w0 = w1;
 587         else {
 588             while (w0 != w1) {
 589                 w0->p = q;
 590                 w0->result = result;
 591                 w0++;
 592             }
 593             result++;
 594         }
 595         q = q->right;
 596     }
 597     if (w1-v != p->number) { fprintf(stderr, "oh! %d %d\n", (int)(w1-v), p->number); exit(1); }
 598     if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
 599     qsort(v, w1 - v, sizeof(struct amongvec), compare_amongvec);
 600
 601     /* the following loop is O(n squared) */
 602     for (w0 = w1 - 1; w0 >= v; w0--) {
 603         symbol * b = w0->b;
 604         int size = w0->size;
 605         struct amongvec * w;
 606
 607         for (w = w0 - 1; w >= v; w--) {
 608             if (w->size < size && memcmp(w->b, b, w->size * sizeof(symbol)) == 0) {
 609                 w0->i = w - v;  /* fill in index of longest substring */
 610                 break;
 611             }
 612         }
 613     }
 614     if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
 615
 616     for (w0 = v; w0 < w1 - 1; w0++)
 617         if (w0->size == (w0 + 1)->size &&
 618             memcmp(w0->b, (w0 + 1)->b, w0->size * sizeof(symbol)) == 0) {
 619             error3(a, (w0 + 1)->p, (w0 + 1)->b);
 620             error3a(a, w0->p);
 621         }
 622
 623     x->literalstring_count = p->number;
 624     x->command_count = result - 1;
 625     p->among = x;
 626
 627     x->substring = substring;
 628     if (substring != 0) substring->among = x;
 629     if (x->command_count != 0 || x->starter != 0) a->amongvar_needed = true;
 630 }
 631
 632 static struct node * read_among(struct analyser * a) {
 633     struct tokeniser * t = a->tokeniser;
 634     struct node * p = new_node(a, c_among);
 635     struct node * p_end = 0;
 636     int previous_token = -1;
 637     struct node * substring = a->substring;
 638
 639     a->substring = 0;
 640     p->number = 0; /* counts the number of literals */
 641     if (!get_token(a, c_bra)) return p;
 642     while (true) {
 643         struct node * q;
 644         int token = read_token(t);
 645         switch (token) {
 646             case c_literalstring:
 647                 q = read_literalstring(a);
 648                 if (read_token(t) == c_name) {
 649                     struct node * r = new_node(a, c_name);
 650                     name_to_node(a, r, 'r');
 651                     q->left = r;
 652                 }
 653                 else t->token_held = true;
 654                 p->number++; break;
 655             case c_bra:
 656                 if (previous_token == c_bra) error(a, e_adjacent_bracketed_in_among);
 657                 q = read_C_list(a); break;
 658             default:
 659                 error(a, e_unexpected_token_in_among);
 660                 previous_token = token;
 661                 continue;
 662             case c_ket:
 663                 if (p->number == 0) error(a, e_empty_among);
 664                 if (t->error_count == 0) make_among(a, p, substring);
 665                 return p;
 666         }
 667         previous_token = token;
 668         if (p_end == 0) p->left = q; else p_end->right = q;
 669         p_end = q;
 670     }
 671 }
 672
 673 static struct node * read_substring(struct analyser * a) {
 674
 675     struct node * p = new_node(a, c_substring);
 676     if (a->substring != 0) error2(a, e_substring_preceded_by_substring, a->substring->line_number);
 677     a->substring = p;
 678     return p;
 679 }
 680
 681 static void check_modifyable(struct analyser * a) {
 682     if (!a->modifyable) error(a, e_not_allowed_inside_reverse);
 683 }
 684
 685 static struct node * read_C(struct analyser * a) {
 686     struct tokeniser * t = a->tokeniser;
 687     int token = read_token(t);
 688     switch (token) {
 689         case c_bra:
 690             return read_C_list(a);
 691         case c_backwards:
 692             {
 693                 int mode = a->mode;
 694                 if (a->mode == m_backward) error(a, e_already_backwards); else a->mode = m_backward;
 695                 {   struct node * p = C_style(a, "C", token);
 696                     a->mode = mode;
 697                     return p;
 698                 }
 699             }
 700         case c_reverse:
 701             {
 702                 int mode = a->mode;
 703                 int modifyable = a->modifyable;
 704                 a->modifyable = false;
 705                 a->mode = mode == m_forward ? m_backward : m_forward;
 706                 {
 707                     struct node * p = C_style(a, "C", token);
 708                     a->mode = mode;
 709                     a->modifyable = modifyable;
 710                     return p;
 711                 }
 712             }
 713         case c_not:
 714         case c_try:
 715         case c_fail:
 716         case c_test:
 717         case c_do:
 718         case c_goto:
 719         case c_gopast:
 720         case c_repeat:
 721             return C_style(a, "C", token);
 722         case c_loop:
 723         case c_atleast:
 724             return C_style(a, "AC", token);
 725         case c_setmark: {
 726             struct node * n = C_style(a, "i", token);
 727             if (n->name) n->name->initialised = true;
 728             return n;
 729         }
 730         case c_tomark:
 731         case c_atmark:
 732         case c_hop:
 733             return C_style(a, "A", token);
 734         case c_delete:
 735             check_modifyable(a);
 736             /* fall through */
 737         case c_next:
 738         case c_tolimit:
 739         case c_atlimit:
 740         case c_leftslice:
 741         case c_rightslice:
 742         case c_true:
 743         case c_false:
 744         case c_debug:
 745             return C_style(a, "", token);
 746         case c_assignto:
 747         case c_sliceto: {
 748             struct node *n;
 749             check_modifyable(a);
 750             n = C_style(a, "s", token);
 751             if (n->name) n->name->initialised = true;
 752             return n;
 753         }
 754         case c_assign:
 755         case c_insert:
 756         case c_attach:
 757         case c_slicefrom: {
 758             struct node *n;
 759             check_modifyable(a);
 760             n = C_style(a, "S", token);
 761             if (n->name) n->name->value_used = true;
 762             return n;
 763         }
 764         case c_setlimit:
 765             return C_style(a, "CfD", token);
 766         case c_set:
 767         case c_unset: {
 768             struct node * n = C_style(a, "b", token);
 769             if (n->name) n->name->initialised = true;
 770             return n;
 771         }
 772         case c_dollar:
 773             get_token(a, c_name);
 774             {
 775                 struct node * p;
 776                 struct name * q = find_name(a);
 777                 int mode = a->mode;
 778                 int modifyable = a->modifyable;
 779                 switch (q ? q->type : t_string)
 780                     /* above line was: switch (q->type) - bug #1 fix 7/2/2003 */
 781                 {
 782                     default:
 783                         error(a, e_not_of_type_string_or_integer);
 784                         /* Handle $foo for unknown 'foo' as string since
 785                          * that's more common and so less likely to cause
 786                          * an error avalanche. */
 787                         /* fall through */
 788                     case t_string:
 789                         /* Assume for now that $ on string both initialises and
 790                          * uses the string variable.  FIXME: Can we do better?
 791                          */
 792                         q->initialised = true;
 793                         q->value_used = true;
 794                         a->mode = m_forward;
 795                         a->modifyable = true;
 796                         p = new_node(a, c_dollar);
 797                         p->left = read_C(a); break;
 798                     case t_integer:
 799                     /*  a->mode = m_integer;  */
 800                         p = new_node(a, read_AE_test(a));
 801                         p->AE = read_AE(a, 0);
 802                         if (q) {
 803                             /* +=, etc don't "initialise" as they only amend an
 804                              * existing value.  Similarly, they don't count as
 805                              * using the value.
 806                              */
 807                             switch (p->type) {
 808                                 case c_mathassign:
 809                                     q->initialised = true;
 810                                     break;
 811                                 case c_eq:
 812                                 case c_ne:
 813                                 case c_gr:
 814                                 case c_ge:
 815                                 case c_ls:
 816                                 case c_le:
 817                                     q->value_used = true;
 818                                     break;
 819                             }
 820                         }
 821                         break;
 822                 }
 823                 if (q) mark_used_in(a, q, p);
 824                 p->name = q;
 825                 a->mode = mode;
 826                 a->modifyable = modifyable;
 827                 return p;
 828             }
 829         case c_name:
 830             {
 831                 struct name * q = find_name(a);
 832                 struct node * p = new_node(a, c_name);
 833                 if (q) {
 834                     mark_used_in(a, q, p);
 835                     switch (q->type) {
 836                         case t_boolean:
 837                             p->type = c_booltest;
 838                             q->value_used = true;
 839                             break;
 840                         case t_integer:
 841                             error(a, e_misplaced); /* integer name misplaced */
 842                             break;
 843                         case t_string:
 844                             q->value_used = true;
 845                             break;
 846                         case t_routine:
 847                         case t_external:
 848                             p->type = c_call;
 849                             check_routine_mode(a, q, a->mode);
 850                             break;
 851                         case t_grouping:
 852                             p->type = c_grouping; break;
 853                     }
 854                 }
 855                 p->name = q;
 856                 return p;
 857             }
 858         case c_non:
 859             {
 860                 struct node * p = new_node(a, token);
 861                 read_token(t);
 862                 if (t->token == c_minus) read_token(t);
 863                 if (!check_token(a, c_name)) { omission_error(a, c_name); return p; }
 864                 name_to_node(a, p, 'g');
 865                 return p;
 866             }
 867         case c_literalstring:
 868             return read_literalstring(a);
 869         case c_among: return read_among(a);
 870         case c_substring: return read_substring(a);
 871         default: error(a, e_unexpected_token); return 0;
 872     }
 873 }
 874
 875 static int next_symbol(symbol * p, symbol * W, int utf8) {
 876     if (utf8) {
 877         int ch;
 878         int j = get_utf8(p, & ch);
 879         W[0] = ch; return j;
 880     } else {
 881         W[0] = p[0]; return 1;
 882     }
 883 }
 884
 885 static symbol * alter_grouping(symbol * p, symbol * q, int style, int utf8) {
 886     int j = 0;
 887     symbol W[1];
 888     int width;
 889     if (style == c_plus) {
 890         while (j < SIZE(q)) {
 891             width = next_symbol(q + j, W, utf8);
 892             p = add_to_b(p, 1, W);
 893             j += width;
 894         }
 895     } else {
 896         while (j < SIZE(q)) {
 897             int i;
 898             width = next_symbol(q + j, W, utf8);
 899             for (i = 0; i < SIZE(p); i++) {
 900                 if (p[i] == W[0]) {
 901                     memmove(p + i, p + i + 1, (SIZE(p) - i - 1) * sizeof(symbol));
 902                     SIZE(p)--;
 903                 }
 904             }
 905             j += width;
 906         }
 907     }
 908     return p;
 909 }
 910
 911 static void read_define_grouping(struct analyser * a, struct name * q) {
 912     struct tokeniser * t = a->tokeniser;
 913     int style = c_plus;
 914     {
 915         NEW(grouping, p);
 916         if (a->groupings == 0) a->groupings = p; else a->groupings_end->next = p;
 917         a->groupings_end = p;
 918         if (q) q->grouping = p;
 919         p->next = 0;
 920         p->name = q;
 921         p->number = q ? q->count : 0;
 922         p->line_number = a->tokeniser->line_number;
 923         p->b = create_b(0);
 924         while (true) {
 925             switch (read_token(t)) {
 926                 case c_name:
 927                     {
 928                         struct name * r = find_name(a);
 929                         if (r) {
 930                             check_name_type(a, r, 'g');
 931                             p->b = alter_grouping(p->b, r->grouping->b, style, false);
 932                         }
 933                     }
 934                     break;
 935                 case c_literalstring:
 936                     p->b = alter_grouping(p->b, t->b, style, (a->encoding == ENC_UTF8));
 937                     break;
 938                 default: error(a, e_unexpected_token); return;
 939             }
 940             switch (read_token(t)) {
 941                 case c_plus:
 942                 case c_minus: style = t->token; break;
 943                 default: goto label0;
 944             }
 945         }
 946     label0:
 947         {
 948             int i;
 949             int max = 0;
 950             int min = 1<<16;
 951             for (i = 0; i < SIZE(p->b); i++) {
 952                 if (p->b[i] > max) max = p->b[i];
 953                 if (p->b[i] < min) min = p->b[i];
 954             }
 955             p->largest_ch = max;
 956             p->smallest_ch = min;
 957             if (min == 1<<16) error(a, e_empty_grouping);
 958         }
 959         t->token_held = true; return;
 960     }
 961 }
 962
 963 static void read_define_routine(struct analyser * a, struct name * q) {
 964     struct node * p = new_node(a, c_define);
 965     a->amongvar_needed = false;
 966     if (q) {
 967         check_name_type(a, q, 'R');
 968         if (q->definition != 0) error(a, e_redefined);
 969         if (q->mode < 0) q->mode = a->mode; else
 970         if (q->mode != a->mode) error2(a, e_declared_as_different_mode, q->mode);
 971     }
 972     p->name = q;
 973     if (a->program == 0) a->program = p; else a->program_end->right = p;
 974     a->program_end = p;
 975     get_token(a, c_as);
 976     p->left = read_C(a);
 977     if (q) q->definition = p->left;
 978
 979     if (a->substring != 0) {
 980         error2(a, e_unresolved_substring, a->substring->line_number);
 981         a->substring = 0;
 982     }
 983     p->amongvar_needed = a->amongvar_needed;
 984 }
 985
 986 static void read_define(struct analyser * a) {
 987     if (get_token(a, c_name)) {
 988         struct name * q = find_name(a);
 989         int type;
 990         if (q) {
 991             type = q->type;
 992         } else {
 993             /* No declaration, so sniff next token - if it is 'as' then parse
 994              * as a routine, otherwise as a grouping.
 995              */
 996             if (read_token(a->tokeniser) == c_as) {
 997                 type = t_routine;
 998             } else {
 999                 type = t_grouping;
1000             }
1001             a->tokeniser->token_held = true;
1002         }
1003
1004         if (type == t_grouping) {
1005             read_define_grouping(a, q);
1006         } else {
1007             read_define_routine(a, q);
1008         }
1009     }
1010 }
1011
1012 static void read_backwardmode(struct analyser * a) {
1013     int mode = a->mode;
1014     a->mode = m_backward;
1015     if (get_token(a, c_bra)) {
1016         read_program_(a, c_ket);
1017         check_token(a, c_ket);
1018     }
1019     a->mode = mode;
1020 }
1021
1022 static void read_program_(struct analyser * a, int terminator) {
1023     struct tokeniser * t = a->tokeniser;
1024     while (true) {
1025         switch (read_token(t)) {
1026             case c_strings:     read_names(a, t_string); break;
1027             case c_booleans:    read_names(a, t_boolean); break;
1028             case c_integers:    read_names(a, t_integer); break;
1029             case c_routines:    read_names(a, t_routine); break;
1030             case c_externals:   read_names(a, t_external); break;
1031             case c_groupings:   read_names(a, t_grouping); break;
1032             case c_define:      read_define(a); break;
1033             case c_backwardmode:read_backwardmode(a); break;
1034             case c_ket:
1035                 if (terminator == c_ket) return;
1036                 /* fall through */
1037             default:
1038                 error(a, e_unexpected_token); break;
1039             case -1:
1040                 if (terminator >= 0) omission_error(a, c_ket);
1041                 return;
1042         }
1043     }
1044 }
1045
1046 extern void read_program(struct analyser * a) {
1047     read_program_(a, -1);
1048     {
1049         struct name * q = a->names;
1050         while (q) {
1051             switch (q->type) {
1052                 case t_external: case t_routine:
1053                     if (q->used && q->definition == 0) error4(a, q);
1054                     break;
1055                 case t_grouping:
1056                     if (q->used && q->grouping == 0) error4(a, q);
1057                     break;
1058             }
1059             q = q->next;
1060         }
1061     }
1062
1063     if (a->tokeniser->error_count == 0) {
1064         struct name * q = a->names;
1065         while (q) {
1066             if (!q->referenced) {
1067                 fprintf(stderr, "%s:%d: warning: %s '",
1068                         a->tokeniser->file,
1069                         q->declaration_line_number,
1070                         name_of_name_type(q->type));
1071                 report_b(stderr, q->b);
1072                 if (q->type == t_routine ||
1073                     q->type == t_external ||
1074                     q->type == t_grouping) {
1075                     fprintf(stderr, "' declared but not defined\n");
1076                 } else {
1077                     fprintf(stderr, "' defined but not used\n");
1078                 }
1079             } else if (q->type == t_routine || q->type == t_grouping) {
1080                 if (!q->used) {
1081                     int line_num;
1082                     if (q->type == t_routine) {
1083                         line_num = q->definition->line_number;
1084                     } else {
1085                         line_num = q->grouping->line_number;
1086                     }
1087                     fprintf(stderr, "%s:%d: warning: %s '",
1088                             a->tokeniser->file,
1089                             line_num,
1090                             name_of_name_type(q->type));
1091                     report_b(stderr, q->b);
1092                     fprintf(stderr, "' defined but not used\n");
1093                 }
1094             } else if (q->type == t_external) {
1095                 /* Unused is OK. */
1096             } else if (!q->initialised) {
1097                 count_error(a);
1098                 fprintf(stderr, "%s:%d: warning: %s '",
1099                         a->tokeniser->file,
1100                         q->declaration_line_number,
1101                         name_of_name_type(q->type));
1102                 report_b(stderr, q->b);
1103                 fprintf(stderr, "' is never initialised\n");
1104             } else if (!q->value_used) {
1105                 count_error(a);
1106                 fprintf(stderr, "%s:%d: warning: %s '",
1107                         a->tokeniser->file,
1108                         q->declaration_line_number,
1109                         name_of_name_type(q->type));
1110                 report_b(stderr, q->b);
1111                 fprintf(stderr, "' is set but never used\n");
1112             }
1113             q = q->next;
1114         }
1115     }
1116 }
1117
1118 extern struct analyser * create_analyser(struct tokeniser * t) {
1119     NEW(analyser, a);
1120     a->tokeniser = t;
1121     a->nodes = 0;
1122     a->names = 0;
1123     a->literalstrings = 0;
1124     a->program = 0;
1125     a->amongs = 0;
1126     a->among_count = 0;
1127     a->groupings = 0;
1128     a->mode = m_forward;
1129     a->modifyable = true;
1130     { int i; for (i = 0; i < t_size; i++) a->name_count[i] = 0; }
1131     a->substring = 0;
1132     a->int_limits_used = false;
1133     return a;
1134 }
1135
1136 extern void close_analyser(struct analyser * a) {
1137     {
1138         struct node * q = a->nodes;
1139         while (q) {
1140             struct node * q_next = q->next;
1141             FREE(q);
1142             q = q_next;
1143         }
1144     }
1145     {
1146         struct name * q = a->names;
1147         while (q) {
1148             struct name * q_next = q->next;
1149             lose_b(q->b); FREE(q);
1150             q = q_next;
1151         }
1152     }
1153     {
1154         struct literalstring * q = a->literalstrings;
1155         while (q) {
1156             struct literalstring * q_next = q->next;
1157             lose_b(q->b); FREE(q);
1158             q = q_next;
1159         }
1160     }
1161     {
1162         struct among * q = a->amongs;
1163         while (q) {
1164             struct among * q_next = q->next;
1165             FREE(q->b); FREE(q);
1166             q = q_next;
1167         }
1168     }
1169     {
1170         struct grouping * q = a->groupings;
1171         while (q) {
1172             struct grouping * q_next = q->next;
1173             lose_b(q->b); FREE(q);
1174             q = q_next;
1175         }
1176     }
1177     FREE(a);
1178 }
1179