xapian-core/languages/compiler/analyser.c

   1
   2 #include <stdio.h>   /* printf etc */
   3 #include <stdlib.h>  /* exit */
   4 #include <string.h>  /* memmove */
   5 #include "header.h"
   6
   7 typedef enum {
   8     e_token_omitted = 0,
   9     e_unexpected_token = 1,
  10     e_string_omitted = 2,
  11     e_unexpected_token_in_among = 3,
  12     /* For codes above here, report "after " t->previous_token after the error. */
  13     e_unresolved_substring = 14,
  14     e_not_allowed_inside_reverse = 15,
  15     e_empty_grouping = 16,
  16     e_already_backwards = 17,
  17     e_empty_among = 18,
  18     e_adjacent_bracketed_in_among = 19,
  19     e_substring_preceded_by_substring = 20,
  20     /* For codes below here, tokeniser->b is printed before the error. */
  21     e_redeclared = 30,
  22     e_undeclared = 31,
  23     e_declared_as_different_mode = 32,
  24     e_not_of_type_x = 33,
  25     e_not_of_type_string_or_integer = 34,
  26     e_misplaced = 35,
  27     e_redefined = 36,
  28     e_misused = 37
  29 } error_code;
  30
  31 /* recursive usage: */
  32
  33 static void read_program_(struct analyser * a, int terminator);
  34 static struct node * read_C(struct analyser * a);
  35 static struct node * C_style(struct analyser * a, const char * s, int token);
  36
  37
  38 static void print_node_(struct node * p, int n, const char * s) {
  39
  40     int i;
  41     for (i = 0; i < n; i++) fputs(i == n - 1 ? s : "  ", stdout);
  42     printf("%s ", name_of_token(p->type));
  43     if (p->name) report_b(stdout, p->name->b);
  44     if (p->literalstring) {
  45         printf("'");
  46         report_b(stdout, p->literalstring);
  47         printf("'");
  48     }
  49     printf("\n");
  50     if (p->AE) print_node_(p->AE, n+1, "# ");
  51     if (p->left) print_node_(p->left, n+1, "  ");
  52     if (p->right) print_node_(p->right, n, "  ");
  53     if (p->aux) print_node_(p->aux, n+1, "@ ");
  54 }
  55
  56 extern void print_program(struct analyser * a) {
  57     print_node_(a->program, 0, "  ");
  58 }
  59
  60 static struct node * new_node(struct analyser * a, int type) {
  61     NEW(node, p);
  62     p->next = a->nodes; a->nodes = p;
  63     p->left = 0;
  64     p->right = 0;
  65     p->aux = 0;
  66     p->AE = 0;
  67     p->name = 0;
  68     p->literalstring = 0;
  69     p->mode = a->mode;
  70     p->line_number = a->tokeniser->line_number;
  71     p->type = type;
  72     return p;
  73 }
  74
  75 static const char * name_of_mode(int n) {
  76     switch (n) {
  77         case m_backward: return "string backward";
  78         case m_forward:  return "string forward";
  79     /*  case m_integer:  return "integer";  */
  80     }
  81     fprintf(stderr, "Invalid mode %d in name_of_mode()\n", n);
  82     exit(1);
  83 }
  84
  85 static const char * name_of_type(int n) {
  86     switch (n) {
  87         case 's': return "string";
  88         case 'i': return "integer";
  89         case 'r': return "routine";
  90         case 'R': return "routine or grouping";
  91         case 'g': return "grouping";
  92     }
  93     fprintf(stderr, "Invalid type %d in name_of_type()\n", n);
  94     exit(1);
  95 }
  96
  97 static const char * name_of_name_type(int code) {
  98     switch (code) {
  99         case t_string: return "string";
 100         case t_boolean: return "boolean";
 101         case t_integer: return "integer";
 102         case t_routine: return "routine";
 103         case t_external: return "external";
 104         case t_grouping: return "grouping";
 105     }
 106     fprintf(stderr, "Invalid type code %d in name_of_name_type()\n", code);
 107     exit(1);
 108 }
 109
 110 static void count_error(struct analyser * a) {
 111     struct tokeniser * t = a->tokeniser;
 112     if (t->error_count >= 20) { fprintf(stderr, "... etc\n"); exit(1); }
 113     t->error_count++;
 114 }
 115
 116 static void error2(struct analyser * a, error_code n, int x) {
 117     struct tokeniser * t = a->tokeniser;
 118     count_error(a);
 119     fprintf(stderr, "%s:%d: ", t->file, t->line_number);
 120     if ((int)n >= (int)e_redeclared) report_b(stderr, t->b);
 121     switch (n) {
 122         case e_token_omitted:
 123             fprintf(stderr, "%s omitted", name_of_token(t->omission)); break;
 124         case e_unexpected_token_in_among:
 125             fprintf(stderr, "in among(...), ");
 126             /* fall through */
 127         case e_unexpected_token:
 128             fprintf(stderr, "unexpected %s", name_of_token(t->token));
 129             if (t->token == c_number) fprintf(stderr, " %d", t->number);
 130             if (t->token == c_name) {
 131                 fprintf(stderr, " ");
 132                 report_b(stderr, t->b);
 133             } break;
 134         case e_string_omitted:
 135             fprintf(stderr, "string omitted"); break;
 136
 137         case e_unresolved_substring:
 138             fprintf(stderr, "unresolved substring on line %d", x); break;
 139         case e_not_allowed_inside_reverse:
 140             fprintf(stderr, "%s not allowed inside reverse(...)", name_of_token(t->token)); break;
 141         case e_empty_grouping:
 142             fprintf(stderr, "empty grouping"); break;
 143         case e_already_backwards:
 144             fprintf(stderr, "backwards used when already in this mode"); break;
 145         case e_empty_among:
 146             fprintf(stderr, "empty among(...)"); break;
 147         case e_adjacent_bracketed_in_among:
 148             fprintf(stderr, "two adjacent bracketed expressions in among(...)"); break;
 149         case e_substring_preceded_by_substring:
 150             fprintf(stderr, "substring preceded by another substring on line %d", x); break;
 151
 152         case e_redeclared:
 153             fprintf(stderr, " re-declared"); break;
 154         case e_undeclared:
 155             fprintf(stderr, " undeclared"); break;
 156         case e_declared_as_different_mode:
 157             fprintf(stderr, " declared as %s mode; used as %s mode",
 158                             name_of_mode(a->mode), name_of_mode(x)); break;
 159         case e_not_of_type_x:
 160             fprintf(stderr, " not of type %s", name_of_type(x)); break;
 161         case e_not_of_type_string_or_integer:
 162             fprintf(stderr, " not of type string or integer"); break;
 163         case e_misplaced:
 164             fprintf(stderr, " misplaced"); break;
 165         case e_redefined:
 166             fprintf(stderr, " redefined"); break;
 167         case e_misused:
 168             fprintf(stderr, " mis-used as %s mode",
 169                             name_of_mode(x)); break;
 170     }
 171     if ((int)n < (int)e_unresolved_substring && t->previous_token > 0)
 172         fprintf(stderr, " after %s", name_of_token(t->previous_token));
 173     fprintf(stderr, "\n");
 174 }
 175
 176 static void error(struct analyser * a, error_code n) { error2(a, n, 0); }
 177
 178 static void error3(struct analyser * a, struct node * p, symbol * b) {
 179     count_error(a);
 180     fprintf(stderr, "%s:%d: among(...) has repeated string '", a->tokeniser->file, p->line_number);
 181     report_b(stderr, b);
 182     fprintf(stderr, "'\n");
 183 }
 184
 185 static void error3a(struct analyser * a, struct node * p) {
 186     count_error(a);
 187     fprintf(stderr, "%s:%d: previously seen here\n", a->tokeniser->file, p->line_number);
 188 }
 189
 190 static void error4(struct analyser * a, struct name * q) {
 191     count_error(a);
 192     fprintf(stderr, "%s:%d: ", a->tokeniser->file, q->used->line_number);
 193     report_b(stderr, q->b);
 194     fprintf(stderr, " undefined\n");
 195 }
 196
 197 static void omission_error(struct analyser * a, int n) {
 198     a->tokeniser->omission = n;
 199     error(a, e_token_omitted);
 200 }
 201
 202 static int check_token(struct analyser * a, int code) {
 203     struct tokeniser * t = a->tokeniser;
 204     if (t->token != code) { omission_error(a, code); return false; }
 205     return true;
 206 }
 207
 208 static int get_token(struct analyser * a, int code) {
 209     struct tokeniser * t = a->tokeniser;
 210     read_token(t);
 211     {
 212         int x = check_token(a, code);
 213         if (!x) t->token_held = true;
 214         return x;
 215     }
 216 }
 217
 218 static struct name * look_for_name(struct analyser * a) {
 219     symbol * q = a->tokeniser->b;
 220     struct name * p;
 221     for (p = a->names; p; p = p->next) {
 222         symbol * b = p->b;
 223         int n = SIZE(b);
 224         if (n == SIZE(q) && memcmp(q, b, n * sizeof(symbol)) == 0) {
 225             p->referenced = true;
 226             return p;
 227         }
 228     }
 229     return 0;
 230 }
 231
 232 static struct name * find_name(struct analyser * a) {
 233     struct name * p = look_for_name(a);
 234     if (p == 0) error(a, e_undeclared);
 235     return p;
 236 }
 237
 238 static void check_routine_mode(struct analyser * a, struct name * p, int mode) {
 239     if (p->mode < 0) p->mode = mode; else
 240     if (p->mode != mode) error2(a, e_misused, mode);
 241 }
 242
 243 static void check_name_type(struct analyser * a, struct name * p, int type) {
 244     switch (type) {
 245         case 's':
 246             if (p->type == t_string) return;
 247             break;
 248         case 'i':
 249             if (p->type == t_integer) return;
 250             break;
 251         case 'b':
 252             if (p->type == t_boolean) return;
 253             break;
 254         case 'R':
 255             if (p->type == t_grouping) return;
 256             /* FALLTHRU */
 257         case 'r':
 258             if (p->type == t_routine || p->type == t_external) return;
 259             break;
 260         case 'g':
 261             if (p->type == t_grouping) return;
 262             break;
 263     }
 264     error2(a, e_not_of_type_x, type);
 265 }
 266
 267 static void read_names(struct analyser * a, int type) {
 268     struct tokeniser * t = a->tokeniser;
 269     if (!get_token(a, c_bra)) return;
 270     while (true) {
 271         int token = read_token(t);
 272         switch (token) {
 273             case c_len: {
 274                 /* Context-sensitive token - once declared as a name, it loses
 275                  * its special meaning, for compatibility with older versions
 276                  * of snowball.
 277                  */
 278                 static const symbol c_len_lit[] = {
 279                     'l', 'e', 'n'
 280                 };
 281                 MOVE_TO_B(t->b, c_len_lit);
 282                 goto handle_as_name;
 283             }
 284             case c_lenof: {
 285                 /* Context-sensitive token - once declared as a name, it loses
 286                  * its special meaning, for compatibility with older versions
 287                  * of snowball.
 288                  */
 289                 static const symbol c_lenof_lit[] = {
 290                     'l', 'e', 'n', 'o', 'f'
 291                 };
 292                 MOVE_TO_B(t->b, c_lenof_lit);
 293                 goto handle_as_name;
 294             }
 295             case c_name:
 296 handle_as_name:
 297                 if (look_for_name(a) != 0) error(a, e_redeclared); else {
 298                     NEW(name, p);
 299                     p->b = copy_b(t->b);
 300                     p->type = type;
 301                     p->mode = -1; /* routines, externals */
 302                     p->count = a->name_count[type];
 303                     p->referenced = false;
 304                     p->used_in_among = false;
 305                     p->used = 0;
 306                     p->local_to = 0;
 307                     p->grouping = 0;
 308                     p->definition = 0;
 309                     p->declaration_line_number = t->line_number;
 310                     a->name_count[type]++;
 311                     p->next = a->names;
 312                     a->names = p;
 313                     if (token != c_name) {
 314                         disable_token(t, token);
 315                     }
 316                 }
 317                 break;
 318             default:
 319                 if (!check_token(a, c_ket)) t->token_held = true;
 320                 return;
 321         }
 322     }
 323 }
 324
 325 static symbol * new_literalstring(struct analyser * a) {
 326     NEW(literalstring, p);
 327     p->b = copy_b(a->tokeniser->b);
 328     p->next = a->literalstrings;
 329     a->literalstrings = p;
 330     return p->b;
 331 }
 332
 333 static int read_AE_test(struct analyser * a) {
 334
 335     struct tokeniser * t = a->tokeniser;
 336     switch (read_token(t)) {
 337         case c_assign: return c_mathassign;
 338         case c_plusassign:
 339         case c_minusassign:
 340         case c_multiplyassign:
 341         case c_divideassign:
 342         case c_eq:
 343         case c_ne:
 344         case c_gr:
 345         case c_ge:
 346         case c_ls:
 347         case c_le: return t->token;
 348         default: error(a, e_unexpected_token); t->token_held = true; return c_eq;
 349     }
 350 }
 351
 352 static int binding(int t) {
 353     switch (t) {
 354         case c_plus: case c_minus: return 1;
 355         case c_multiply: case c_divide: return 2;
 356         default: return -2;
 357     }
 358 }
 359
 360 static void mark_used_in(struct analyser * a, struct name * q, struct node * p) {
 361     if (!q->used) {
 362         q->used = p;
 363         q->local_to = a->program_end->name;
 364     } else if (q->local_to) {
 365         if (q->local_to != a->program_end->name) {
 366             /* Used in more than one routine/external. */
 367             q->local_to = NULL;
 368         }
 369     }
 370 }
 371
 372 static void name_to_node(struct analyser * a, struct node * p, int type) {
 373     struct name * q = find_name(a);
 374     if (q) {
 375         check_name_type(a, q, type);
 376         mark_used_in(a, q, p);
 377     }
 378     p->name = q;
 379 }
 380
 381 static struct node * read_AE(struct analyser * a, int B) {
 382     struct tokeniser * t = a->tokeniser;
 383     struct node * p;
 384     struct node * q;
 385     switch (read_token(t)) {
 386         case c_minus: /* monadic */
 387             q = read_AE(a, 100);
 388             if (q->type == c_neg) {
 389                 /* Optimise away double negation, which avoids generators
 390                  * having to worry about generating "--" (decrement operator
 391                  * in many languages).
 392                  */
 393                 p = q->right;
 394                 /* Don't free q, it's in the linked list a->nodes. */
 395                 break;
 396             }
 397             p = new_node(a, c_neg);
 398             p->right = q;
 399             break;
 400         case c_bra:
 401             p = read_AE(a, 0);
 402             get_token(a, c_ket);
 403             break;
 404         case c_name:
 405             p = new_node(a, c_name);
 406             name_to_node(a, p, 'i');
 407             break;
 408         case c_maxint:
 409         case c_minint:
 410             a->int_limits_used = true;
 411             /* fall through */
 412         case c_cursor:
 413         case c_limit:
 414         case c_len:
 415         case c_size:
 416             p = new_node(a, t->token);
 417             break;
 418         case c_number:
 419             p = new_node(a, c_number);
 420             p->number = t->number;
 421             break;
 422         case c_lenof:
 423         case c_sizeof:
 424             p = C_style(a, "s", t->token);
 425             break;
 426         default:
 427             error(a, e_unexpected_token);
 428             t->token_held = true;
 429             return 0;
 430     }
 431     while (true) {
 432         int token = read_token(t);
 433         int b = binding(token);
 434         if (binding(token) <= B) {
 435             t->token_held = true;
 436             return p;
 437         }
 438         q = new_node(a, token);
 439         q->left = p;
 440         q->right = read_AE(a, b);
 441         p = q;
 442     }
 443 }
 444
 445 static struct node * read_C_connection(struct analyser * a, struct node * q, int op) {
 446     struct tokeniser * t = a->tokeniser;
 447     struct node * p = new_node(a, op);
 448     struct node * p_end = q;
 449     p->left = q;
 450     do {
 451         q = read_C(a);
 452         p_end->right = q; p_end = q;
 453     } while (read_token(t) == op);
 454     t->token_held = true;
 455     return p;
 456 }
 457
 458 static struct node * read_C_list(struct analyser * a) {
 459     struct tokeniser * t = a->tokeniser;
 460     struct node * p = new_node(a, c_bra);
 461     struct node * p_end = 0;
 462     while (true) {
 463         int token = read_token(t);
 464         if (token == c_ket) return p;
 465         if (token < 0) { omission_error(a, c_ket); return p; }
 466         t->token_held = true;
 467         {
 468             struct node * q = read_C(a);
 469             while (true) {
 470                 token = read_token(t);
 471                 if (token != c_and && token != c_or) {
 472                     t->token_held = true;
 473                     break;
 474                 }
 475                 q = read_C_connection(a, q, token);
 476             }
 477             if (p_end == 0) p->left = q; else p_end->right = q;
 478             p_end = q;
 479         }
 480     }
 481 }
 482
 483 static struct node * C_style(struct analyser * a, const char * s, int token) {
 484     int i;
 485     struct node * p = new_node(a, token);
 486     for (i = 0; s[i] != 0; i++) switch (s[i]) {
 487         case 'C':
 488             p->left = read_C(a); continue;
 489         case 'D':
 490             p->aux = read_C(a); continue;
 491         case 'A':
 492             p->AE = read_AE(a, 0); continue;
 493         case 'f':
 494             get_token(a, c_for); continue;
 495         case 'S':
 496             {
 497                 int str_token = read_token(a->tokeniser);
 498                 if (str_token == c_name) name_to_node(a, p, 's'); else
 499                 if (str_token == c_literalstring) p->literalstring = new_literalstring(a);
 500                 else error(a, e_string_omitted);
 501             }
 502             continue;
 503         case 'b':
 504         case 's':
 505         case 'i':
 506             if (get_token(a, c_name)) name_to_node(a, p, s[i]);
 507             continue;
 508     }
 509     return p;
 510 }
 511
 512 static struct node * read_literalstring(struct analyser * a) {
 513     struct node * p = new_node(a, c_literalstring);
 514     p->literalstring = new_literalstring(a);
 515     return p;
 516 }
 517
 518 static void reverse_b(symbol * b) {
 519     int i = 0; int j = SIZE(b) - 1;
 520     while (i < j) {
 521         int ch1 = b[i]; int ch2 = b[j];
 522         b[i++] = ch2; b[j--] = ch1;
 523     }
 524 }
 525
 526 static int compare_amongvec(const void *pv, const void *qv) {
 527     const struct amongvec * p = (const struct amongvec*)pv;
 528     const struct amongvec * q = (const struct amongvec*)qv;
 529     symbol * b_p = p->b; int p_size = p->size;
 530     symbol * b_q = q->b; int q_size = q->size;
 531     int smaller_size = p_size < q_size ? p_size : q_size;
 532     int i;
 533     for (i = 0; i < smaller_size; i++)
 534         if (b_p[i] != b_q[i]) return b_p[i] - b_q[i];
 535     if (p_size - q_size)
 536         return p_size - q_size;
 537     return p->p->line_number - q->p->line_number;
 538 }
 539
 540 static void make_among(struct analyser * a, struct node * p, struct node * substring) {
 541
 542     NEW(among, x);
 543     NEWVEC(amongvec, v, p->number);
 544     struct node * q = p->left;
 545     struct amongvec * w0 = v;
 546     struct amongvec * w1 = v;
 547     int result = 1;
 548
 549     int direction = substring != 0 ? substring->mode : p->mode;
 550     int backward = direction == m_backward;
 551
 552     if (a->amongs == 0) a->amongs = x; else a->amongs_end->next = x;
 553     a->amongs_end = x;
 554     x->next = 0;
 555     x->b = v;
 556     x->number = a->among_count++;
 557     x->function_count = 0;
 558     x->starter = 0;
 559
 560     if (q->type == c_bra) { x->starter = q; q = q->right; }
 561
 562     while (q) {
 563         if (q->type == c_literalstring) {
 564             symbol * b = q->literalstring;
 565             w1->b = b;           /* pointer to case string */
 566             w1->p = q;           /* pointer to corresponding node */
 567             w1->size = SIZE(b);  /* number of characters in string */
 568             w1->i = -1;          /* index of longest substring */
 569             w1->result = -1;     /* number of corresponding case expression */
 570             if (q->left) {
 571                 struct name * function = q->left->name;
 572                 w1->function = function;
 573                 function->used_in_among = true;
 574                 check_routine_mode(a, function, direction);
 575                 x->function_count++;
 576             } else {
 577                 w1->function = 0;
 578             }
 579             w1++;
 580         }
 581         else
 582         if (q->left == 0)  /* empty command: () */
 583             w0 = w1;
 584         else {
 585             while (w0 != w1) {
 586                 w0->p = q;
 587                 w0->result = result;
 588                 w0++;
 589             }
 590             result++;
 591         }
 592         q = q->right;
 593     }
 594     if (w1-v != p->number) { fprintf(stderr, "oh! %d %d\n", (int)(w1-v), p->number); exit(1); }
 595     if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
 596     qsort(v, w1 - v, sizeof(struct amongvec), compare_amongvec);
 597
 598     /* the following loop is O(n squared) */
 599     for (w0 = w1 - 1; w0 >= v; w0--) {
 600         symbol * b = w0->b;
 601         int size = w0->size;
 602         struct amongvec * w;
 603
 604         for (w = w0 - 1; w >= v; w--) {
 605             if (w->size < size && memcmp(w->b, b, w->size * sizeof(symbol)) == 0) {
 606                 w0->i = w - v;  /* fill in index of longest substring */
 607                 break;
 608             }
 609         }
 610     }
 611     if (backward) for (w0 = v; w0 < w1; w0++) reverse_b(w0->b);
 612
 613     for (w0 = v; w0 < w1 - 1; w0++)
 614         if (w0->size == (w0 + 1)->size &&
 615             memcmp(w0->b, (w0 + 1)->b, w0->size * sizeof(symbol)) == 0) {
 616             error3(a, (w0 + 1)->p, (w0 + 1)->b);
 617             error3a(a, w0->p);
 618         }
 619
 620     x->literalstring_count = p->number;
 621     x->command_count = result - 1;
 622     p->among = x;
 623
 624     x->substring = substring;
 625     if (substring != 0) substring->among = x;
 626     if (x->command_count != 0 || x->starter != 0) a->amongvar_needed = true;
 627 }
 628
 629 static struct node * read_among(struct analyser * a) {
 630     struct tokeniser * t = a->tokeniser;
 631     struct node * p = new_node(a, c_among);
 632     struct node * p_end = 0;
 633     int previous_token = -1;
 634     struct node * substring = a->substring;
 635
 636     a->substring = 0;
 637     p->number = 0; /* counts the number of literals */
 638     if (!get_token(a, c_bra)) return p;
 639     while (true) {
 640         struct node * q;
 641         int token = read_token(t);
 642         switch (token) {
 643             case c_literalstring:
 644                 q = read_literalstring(a);
 645                 if (read_token(t) == c_name) {
 646                     struct node * r = new_node(a, c_name);
 647                     name_to_node(a, r, 'r');
 648                     q->left = r;
 649                 }
 650                 else t->token_held = true;
 651                 p->number++; break;
 652             case c_bra:
 653                 if (previous_token == c_bra) error(a, e_adjacent_bracketed_in_among);
 654                 q = read_C_list(a); break;
 655             default:
 656                 error(a, e_unexpected_token_in_among);
 657                 previous_token = token;
 658                 continue;
 659             case c_ket:
 660                 if (p->number == 0) error(a, e_empty_among);
 661                 if (t->error_count == 0) make_among(a, p, substring);
 662                 return p;
 663         }
 664         previous_token = token;
 665         if (p_end == 0) p->left = q; else p_end->right = q;
 666         p_end = q;
 667     }
 668 }
 669
 670 static struct node * read_substring(struct analyser * a) {
 671
 672     struct node * p = new_node(a, c_substring);
 673     if (a->substring != 0) error2(a, e_substring_preceded_by_substring, a->substring->line_number);
 674     a->substring = p;
 675     return p;
 676 }
 677
 678 static void check_modifyable(struct analyser * a) {
 679     if (!a->modifyable) error(a, e_not_allowed_inside_reverse);
 680 }
 681
 682 static struct node * read_C(struct analyser * a) {
 683     struct tokeniser * t = a->tokeniser;
 684     int token = read_token(t);
 685     switch (token) {
 686         case c_bra:
 687             return read_C_list(a);
 688         case c_backwards:
 689             {
 690                 int mode = a->mode;
 691                 if (a->mode == m_backward) error(a, e_already_backwards); else a->mode = m_backward;
 692                 {   struct node * p = C_style(a, "C", token);
 693                     a->mode = mode;
 694                     return p;
 695                 }
 696             }
 697         case c_reverse:
 698             {
 699                 int mode = a->mode;
 700                 int modifyable = a->modifyable;
 701                 a->modifyable = false;
 702                 a->mode = mode == m_forward ? m_backward : m_forward;
 703                 {
 704                     struct node * p = C_style(a, "C", token);
 705                     a->mode = mode;
 706                     a->modifyable = modifyable;
 707                     return p;
 708                 }
 709             }
 710         case c_not:
 711         case c_try:
 712         case c_fail:
 713         case c_test:
 714         case c_do:
 715         case c_goto:
 716         case c_gopast:
 717         case c_repeat:
 718             return C_style(a, "C", token);
 719         case c_loop:
 720         case c_atleast:
 721             return C_style(a, "AC", token);
 722         case c_setmark:
 723             return C_style(a, "i", token);
 724         case c_tomark:
 725         case c_atmark:
 726         case c_hop:
 727             return C_style(a, "A", token);
 728         case c_delete:
 729             check_modifyable(a);
 730             /* fall through */
 731         case c_next:
 732         case c_tolimit:
 733         case c_atlimit:
 734         case c_leftslice:
 735         case c_rightslice:
 736         case c_true:
 737         case c_false:
 738         case c_debug:
 739             return C_style(a, "", token);
 740         case c_assignto:
 741         case c_sliceto:
 742             check_modifyable(a);
 743             return C_style(a, "s", token);
 744         case c_assign:
 745         case c_insert:
 746         case c_attach:
 747         case c_slicefrom:
 748             check_modifyable(a);
 749             return C_style(a, "S", token);
 750         case c_setlimit:
 751             return C_style(a, "CfD", token);
 752         case c_set:
 753         case c_unset:
 754             return C_style(a, "b", token);
 755         case c_dollar:
 756             get_token(a, c_name);
 757             {
 758                 struct node * p;
 759                 struct name * q = find_name(a);
 760                 int mode = a->mode;
 761                 int modifyable = a->modifyable;
 762                 switch (q ? q->type : t_string)
 763                     /* above line was: switch (q->type) - bug #1 fix 7/2/2003 */
 764                 {
 765                     default:
 766                         error(a, e_not_of_type_string_or_integer);
 767                         /* Handle $foo for unknown 'foo' as string since
 768                          * that's more common and so less likely to cause
 769                          * an error avalanche. */
 770                         /* fall through */
 771                     case t_string:
 772                         a->mode = m_forward;
 773                         a->modifyable = true;
 774                         p = new_node(a, c_dollar);
 775                         p->left = read_C(a); break;
 776                     case t_integer:
 777                     /*  a->mode = m_integer;  */
 778                         p = new_node(a, read_AE_test(a));
 779                         p->AE = read_AE(a, 0); break;
 780                 }
 781                 if (q) mark_used_in(a, q, p);
 782                 p->name = q;
 783                 a->mode = mode;
 784                 a->modifyable = modifyable;
 785                 return p;
 786             }
 787         case c_name:
 788             {
 789                 struct name * q = find_name(a);
 790                 struct node * p = new_node(a, c_name);
 791                 if (q) {
 792                     mark_used_in(a, q, p);
 793                     switch (q->type) {
 794                         case t_boolean:
 795                             p->type = c_booltest; break;
 796                         case t_integer:
 797                             error(a, e_misplaced); /* integer name misplaced */
 798                         case t_string:
 799                             break;
 800                         case t_routine:
 801                         case t_external:
 802                             p->type = c_call;
 803                             check_routine_mode(a, q, a->mode);
 804                             break;
 805                         case t_grouping:
 806                             p->type = c_grouping; break;
 807                     }
 808                 }
 809                 p->name = q;
 810                 return p;
 811             }
 812         case c_non:
 813             {
 814                 struct node * p = new_node(a, token);
 815                 read_token(t);
 816                 if (t->token == c_minus) read_token(t);
 817                 if (!check_token(a, c_name)) { omission_error(a, c_name); return p; }
 818                 name_to_node(a, p, 'g');
 819                 return p;
 820             }
 821         case c_literalstring:
 822             return read_literalstring(a);
 823         case c_among: return read_among(a);
 824         case c_substring: return read_substring(a);
 825         default: error(a, e_unexpected_token); return 0;
 826     }
 827 }
 828
 829 static int next_symbol(symbol * p, symbol * W, int utf8) {
 830     if (utf8) {
 831         int ch;
 832         int j = get_utf8(p, & ch);
 833         W[0] = ch; return j;
 834     } else {
 835         W[0] = p[0]; return 1;
 836     }
 837 }
 838
 839 static symbol * alter_grouping(symbol * p, symbol * q, int style, int utf8) {
 840     int j = 0;
 841     symbol W[1];
 842     int width;
 843     if (style == c_plus) {
 844         while (j < SIZE(q)) {
 845             width = next_symbol(q + j, W, utf8);
 846             p = add_to_b(p, 1, W);
 847             j += width;
 848         }
 849     } else {
 850         while (j < SIZE(q)) {
 851             int i;
 852             width = next_symbol(q + j, W, utf8);
 853             for (i = 0; i < SIZE(p); i++) {
 854                 if (p[i] == W[0]) {
 855                     memmove(p + i, p + i + 1, (SIZE(p) - i - 1) * sizeof(symbol));
 856                     SIZE(p)--;
 857                 }
 858             }
 859             j += width;
 860         }
 861     }
 862     return p;
 863 }
 864
 865 static void read_define_grouping(struct analyser * a, struct name * q) {
 866     struct tokeniser * t = a->tokeniser;
 867     int style = c_plus;
 868     {
 869         NEW(grouping, p);
 870         if (a->groupings == 0) a->groupings = p; else a->groupings_end->next = p;
 871         a->groupings_end = p;
 872         if (q) q->grouping = p;
 873         p->next = 0;
 874         p->name = q;
 875         p->number = q ? q->count : 0;
 876         p->line_number = a->tokeniser->line_number;
 877         p->b = create_b(0);
 878         while (true) {
 879             switch (read_token(t)) {
 880                 case c_name:
 881                     {
 882                         struct name * r = find_name(a);
 883                         if (r) {
 884                             check_name_type(a, r, 'g');
 885                             p->b = alter_grouping(p->b, r->grouping->b, style, false);
 886                         }
 887                     }
 888                     break;
 889                 case c_literalstring:
 890                     p->b = alter_grouping(p->b, t->b, style, (a->encoding == ENC_UTF8));
 891                     break;
 892                 default: error(a, e_unexpected_token); return;
 893             }
 894             switch (read_token(t)) {
 895                 case c_plus:
 896                 case c_minus: style = t->token; break;
 897                 default: goto label0;
 898             }
 899         }
 900     label0:
 901         {
 902             int i;
 903             int max = 0;
 904             int min = 1<<16;
 905             for (i = 0; i < SIZE(p->b); i++) {
 906                 if (p->b[i] > max) max = p->b[i];
 907                 if (p->b[i] < min) min = p->b[i];
 908             }
 909             p->largest_ch = max;
 910             p->smallest_ch = min;
 911             if (min == 1<<16) error(a, e_empty_grouping);
 912         }
 913         t->token_held = true; return;
 914     }
 915 }
 916
 917 static void read_define_routine(struct analyser * a, struct name * q) {
 918     struct node * p = new_node(a, c_define);
 919     a->amongvar_needed = false;
 920     if (q) {
 921         check_name_type(a, q, 'R');
 922         if (q->definition != 0) error(a, e_redefined);
 923         if (q->mode < 0) q->mode = a->mode; else
 924         if (q->mode != a->mode) error2(a, e_declared_as_different_mode, q->mode);
 925     }
 926     p->name = q;
 927     if (a->program == 0) a->program = p; else a->program_end->right = p;
 928     a->program_end = p;
 929     get_token(a, c_as);
 930     p->left = read_C(a);
 931     if (q) q->definition = p->left;
 932
 933     if (a->substring != 0) {
 934         error2(a, e_unresolved_substring, a->substring->line_number);
 935         a->substring = 0;
 936     }
 937     p->amongvar_needed = a->amongvar_needed;
 938 }
 939
 940 static void read_define(struct analyser * a) {
 941     if (get_token(a, c_name)) {
 942         struct name * q = find_name(a);
 943         int type;
 944         if (q) {
 945             type = q->type;
 946         } else {
 947             /* No declaration, so sniff next token - if it is 'as' then parse
 948              * as a routine, otherwise as a grouping.
 949              */
 950             if (read_token(a->tokeniser) == c_as) {
 951                 type = t_routine;
 952             } else {
 953                 type = t_grouping;
 954             }
 955             a->tokeniser->token_held = true;
 956         }
 957
 958         if (type == t_grouping) {
 959             read_define_grouping(a, q);
 960         } else {
 961             read_define_routine(a, q);
 962         }
 963     }
 964 }
 965
 966 static void read_backwardmode(struct analyser * a) {
 967     int mode = a->mode;
 968     a->mode = m_backward;
 969     if (get_token(a, c_bra)) {
 970         read_program_(a, c_ket);
 971         check_token(a, c_ket);
 972     }
 973     a->mode = mode;
 974 }
 975
 976 static void read_program_(struct analyser * a, int terminator) {
 977     struct tokeniser * t = a->tokeniser;
 978     while (true) {
 979         switch (read_token(t)) {
 980             case c_strings:     read_names(a, t_string); break;
 981             case c_booleans:    read_names(a, t_boolean); break;
 982             case c_integers:    read_names(a, t_integer); break;
 983             case c_routines:    read_names(a, t_routine); break;
 984             case c_externals:   read_names(a, t_external); break;
 985             case c_groupings:   read_names(a, t_grouping); break;
 986             case c_define:      read_define(a); break;
 987             case c_backwardmode:read_backwardmode(a); break;
 988             case c_ket:
 989                 if (terminator == c_ket) return;
 990                 /* fall through */
 991             default:
 992                 error(a, e_unexpected_token); break;
 993             case -1:
 994                 if (terminator >= 0) omission_error(a, c_ket);
 995                 return;
 996         }
 997     }
 998 }
 999
1000 extern void read_program(struct analyser * a) {
1001     read_program_(a, -1);
1002     {
1003         struct name * q = a->names;
1004         while (q) {
1005             switch (q->type) {
1006                 case t_external: case t_routine:
1007                     if (q->used && q->definition == 0) error4(a, q);
1008                     break;
1009                 case t_grouping:
1010                     if (q->used && q->grouping == 0) error4(a, q);
1011                     break;
1012             }
1013             q = q->next;
1014         }
1015     }
1016
1017     if (a->tokeniser->error_count == 0) {
1018         struct name * q = a->names;
1019         while (q) {
1020             if (!q->referenced) {
1021                 fprintf(stderr, "%s:%d: warning: %s '",
1022                         a->tokeniser->file,
1023                         q->declaration_line_number,
1024                         name_of_name_type(q->type));
1025                 report_b(stderr, q->b);
1026                 if (q->type == t_routine ||
1027                     q->type == t_external ||
1028                     q->type == t_grouping) {
1029                     fprintf(stderr, "' declared but not defined\n");
1030                 } else {
1031                     fprintf(stderr, "' defined but not used\n");
1032                 }
1033             } else if (!q->used &&
1034                        (q->type == t_routine || q->type == t_grouping)) {
1035                 int line_num;
1036                 if (q->type == t_routine) {
1037                     line_num = q->definition->line_number;
1038                 } else {
1039                     line_num = q->grouping->line_number;
1040                 }
1041                 fprintf(stderr, "%s:%d: warning: %s '",
1042                         a->tokeniser->file,
1043                         line_num,
1044                         name_of_name_type(q->type));
1045                 report_b(stderr, q->b);
1046                 fprintf(stderr, "' defined but not used\n");
1047             }
1048             q = q->next;
1049         }
1050     }
1051 }
1052
1053 extern struct analyser * create_analyser(struct tokeniser * t) {
1054     NEW(analyser, a);
1055     a->tokeniser = t;
1056     a->nodes = 0;
1057     a->names = 0;
1058     a->literalstrings = 0;
1059     a->program = 0;
1060     a->amongs = 0;
1061     a->among_count = 0;
1062     a->groupings = 0;
1063     a->mode = m_forward;
1064     a->modifyable = true;
1065     { int i; for (i = 0; i < t_size; i++) a->name_count[i] = 0; }
1066     a->substring = 0;
1067     a->int_limits_used = false;
1068     return a;
1069 }
1070
1071 extern void close_analyser(struct analyser * a) {
1072     {
1073         struct node * q = a->nodes;
1074         while (q) {
1075             struct node * q_next = q->next;
1076             FREE(q);
1077             q = q_next;
1078         }
1079     }
1080     {
1081         struct name * q = a->names;
1082         while (q) {
1083             struct name * q_next = q->next;
1084             lose_b(q->b); FREE(q);
1085             q = q_next;
1086         }
1087     }
1088     {
1089         struct literalstring * q = a->literalstrings;
1090         while (q) {
1091             struct literalstring * q_next = q->next;
1092             lose_b(q->b); FREE(q);
1093             q = q_next;
1094         }
1095     }
1096     {
1097         struct among * q = a->amongs;
1098         while (q) {
1099             struct among * q_next = q->next;
1100             FREE(q->b); FREE(q);
1101             q = q_next;
1102         }
1103     }
1104     {
1105         struct grouping * q = a->groupings;
1106         while (q) {
1107             struct grouping * q_next = q->next;
1108             lose_b(q->b); FREE(q);
1109             q = q_next;
1110         }
1111     }
1112     FREE(a);
1113 }
1114