Merge from vendor branch PKGSRC:
[netbsd-mini2440.git] / usr.bin / yacc / reader.c
blob4da56b072782d9523c069dc391ac5c21830b3561
1 /* $NetBSD: reader.c,v 1.17 2006/11/24 19:47:00 christos Exp $ */
3 /*
4 * Copyright (c) 1989 The Regents of the University of California.
5 * All rights reserved.
7 * This code is derived from software contributed to Berkeley by
8 * Robert Paul Corbett.
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include <sys/cdefs.h>
36 #if defined(__RCSID) && !defined(lint)
37 #if 0
38 static char sccsid[] = "@(#)reader.c 5.7 (Berkeley) 1/20/91";
39 #else
40 __RCSID("$NetBSD: reader.c,v 1.17 2006/11/24 19:47:00 christos Exp $");
41 #endif
42 #endif /* not lint */
44 #include "defs.h"
46 /* The line size must be a positive integer. One hundred was chosen */
47 /* because few lines in Yacc input grammars exceed 100 characters. */
48 /* Note that if a line exceeds LINESIZE characters, the line buffer */
49 /* will be expanded to accommodate it. */
51 #define LINESIZE 100
53 int ntags;
54 char unionized;
55 char *cptr, *line;
57 static char *cache;
58 static int cinc, cache_size;
60 static int tagmax;
61 static char **tag_table;
63 static char saw_eof;
64 static int linesize;
66 static bucket *goal;
67 static int gensym;
68 static char last_was_action;
70 static int maxitems;
71 static bucket **pitem;
73 static int maxrules;
74 static bucket **plhs;
76 static int name_pool_size;
77 static char *name_pool;
79 static void cachec(int);
80 static void get_line(void);
81 static char * dup_line(void);
82 static void skip_comment(void);
83 static int nextc(void);
84 static int keyword(void);
85 static void copy_ident(void);
86 static void copy_text(void);
87 static void copy_union(void);
88 static int hexval(int);
89 static bucket * get_literal(void);
90 static int is_reserved(char *);
91 static bucket * get_name(void);
92 static int get_number(void);
93 static char * get_tag(void);
94 static void declare_tokens(int);
95 static void declare_types(void);
96 static void declare_start(void);
97 static void handle_expect(void);
98 static void read_declarations(void);
99 static void initialize_grammar(void);
100 static void expand_items(void);
101 static void expand_rules(void);
102 static void advance_to_start(void);
103 static void start_rule(bucket *, int);
104 static void end_rule(void);
105 static void insert_empty_rule(void);
106 static void add_symbol(void);
107 static void copy_action(void);
108 static int mark_symbol(void);
109 static void read_grammar(void);
110 static void free_tags(void);
111 static void pack_names(void);
112 static void check_symbols(void);
113 static void pack_symbols(void);
114 static void pack_grammar(void);
115 static void print_grammar(void);
118 static const char line_format[] = "#line %d \"%s\"\n";
120 static void
121 cachec(int c)
123 assert(cinc >= 0);
124 if (cinc >= cache_size)
126 cache_size += 256;
127 cache = REALLOC(cache, cache_size);
128 if (cache == 0) no_space();
130 cache[cinc] = c;
131 ++cinc;
134 static void
135 get_line(void)
137 FILE *f = input_file;
138 int c;
139 int i;
141 if (saw_eof || (c = getc(f)) == EOF)
143 if (line) { FREE(line); line = 0; }
144 cptr = 0;
145 saw_eof = 1;
146 return;
149 if (line == 0 || linesize != (LINESIZE + 1))
151 if (line) FREE(line);
152 linesize = LINESIZE + 1;
153 line = MALLOC(linesize);
154 if (line == 0) no_space();
157 i = 0;
158 ++lineno;
159 for (;;)
161 line[i] = c;
162 if (c == '\n') { cptr = line; return; }
163 if (++i >= linesize)
165 linesize += LINESIZE;
166 line = REALLOC(line, linesize);
167 if (line == 0) no_space();
169 c = getc(f);
170 if (c == EOF)
172 line[i] = '\n';
173 saw_eof = 1;
174 cptr = line;
175 return;
181 static char *
182 dup_line(void)
184 char *p, *s, *t;
186 if (line == 0) return (0);
187 s = line;
188 while (*s != '\n') ++s;
189 p = MALLOC(s - line + 1);
190 if (p == 0) no_space();
192 s = line;
193 t = p;
194 while ((*t++ = *s++) != '\n') continue;
195 return (p);
199 static void
200 skip_comment(void)
202 char *s;
204 int st_lineno = lineno;
205 char *st_line = dup_line();
206 char *st_cptr = st_line + (cptr - line);
208 s = cptr + 2;
209 for (;;)
211 if (*s == '*' && s[1] == '/')
213 cptr = s + 2;
214 FREE(st_line);
215 return;
217 if (*s == '\n')
219 get_line();
220 if (line == 0)
221 unterminated_comment(st_lineno, st_line, st_cptr);
222 s = cptr;
224 else
225 ++s;
230 static int
231 nextc(void)
233 char *s;
235 if (line == 0)
237 get_line();
238 if (line == 0)
239 return (EOF);
242 s = cptr;
243 for (;;)
245 switch (*s)
247 case '\n':
248 get_line();
249 if (line == 0) return (EOF);
250 s = cptr;
251 break;
253 case ' ':
254 case '\t':
255 case '\f':
256 case '\r':
257 case '\v':
258 case ',':
259 case ';':
260 ++s;
261 break;
263 case '\\':
264 cptr = s;
265 return ('%');
267 case '/':
268 if (s[1] == '*')
270 cptr = s;
271 skip_comment();
272 s = cptr;
273 break;
275 else if (s[1] == '/')
277 get_line();
278 if (line == 0) return (EOF);
279 s = cptr;
280 break;
282 /* fall through */
284 default:
285 cptr = s;
286 return (*s);
292 static int
293 keyword(void)
295 unsigned char c;
296 char *t_cptr = cptr;
298 c = *++cptr;
299 if (isalpha(c))
301 cinc = 0;
302 for (;;)
304 if (isalpha(c))
306 if (isupper(c)) c = tolower(c);
307 cachec(c);
309 else if (isdigit(c) || c == '_' || c == '.' || c == '$')
310 cachec(c);
311 else
312 break;
313 c = *++cptr;
315 cachec(NUL);
317 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
318 return (TOKEN);
319 if (strcmp(cache, "type") == 0)
320 return (TYPE);
321 if (strcmp(cache, "left") == 0)
322 return (LEFT);
323 if (strcmp(cache, "right") == 0)
324 return (RIGHT);
325 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
326 return (NONASSOC);
327 if (strcmp(cache, "start") == 0)
328 return (START);
329 if (strcmp(cache, "union") == 0)
330 return (UNION);
331 if (strcmp(cache, "ident") == 0)
332 return (IDENT);
333 if (strcmp(cache, "expect") == 0)
334 return (EXPECT);
336 else
338 ++cptr;
339 if (c == '{')
340 return (TEXT);
341 if (c == '%' || c == '\\')
342 return (MARK);
343 if (c == '<')
344 return (LEFT);
345 if (c == '>')
346 return (RIGHT);
347 if (c == '0')
348 return (TOKEN);
349 if (c == '2')
350 return (NONASSOC);
352 syntax_error(lineno, line, t_cptr);
353 /*NOTREACHED*/
354 exit(1);
358 static void
359 copy_ident(void)
361 int c;
362 FILE *f = output_file;
364 c = nextc();
365 if (c == EOF) unexpected_EOF();
366 if (c != '"') syntax_error(lineno, line, cptr);
367 ++outline;
368 fprintf(f, "#ident \"");
369 for (;;)
371 c = *++cptr;
372 if (c == '\n')
374 fprintf(f, "\"\n");
375 return;
377 putc(c, f);
378 if (c == '"')
380 putc('\n', f);
381 ++cptr;
382 return;
388 static void
389 copy_text(void)
391 int c;
392 int quote;
393 FILE *f = text_file;
394 int need_newline = 0;
395 int t_lineno = lineno;
396 char *t_line = dup_line();
397 char *t_cptr = t_line + (cptr - line - 2);
399 if (*cptr == '\n')
401 get_line();
402 if (line == 0)
403 unterminated_text(t_lineno, t_line, t_cptr);
405 if (!lflag) fprintf(f, line_format, lineno, input_file_name);
407 loop:
408 c = *cptr++;
409 switch (c)
411 case '\n':
412 next_line:
413 putc('\n', f);
414 need_newline = 0;
415 get_line();
416 if (line) goto loop;
417 unterminated_text(t_lineno, t_line, t_cptr);
419 case '\'':
420 case '"':
422 int s_lineno = lineno;
423 char *s_line = dup_line();
424 char *s_cptr = s_line + (cptr - line - 1);
426 quote = c;
427 putc(c, f);
428 for (;;)
430 c = *cptr++;
431 putc(c, f);
432 if (c == quote)
434 need_newline = 1;
435 FREE(s_line);
436 goto loop;
438 if (c == '\n')
439 unterminated_string(s_lineno, s_line, s_cptr);
440 if (c == '\\')
442 c = *cptr++;
443 putc(c, f);
444 if (c == '\n')
446 get_line();
447 if (line == 0)
448 unterminated_string(s_lineno, s_line, s_cptr);
454 case '/':
455 putc(c, f);
456 need_newline = 1;
457 c = *cptr;
458 if (c == '/')
460 putc('*', f);
461 while ((c = *++cptr) != '\n')
463 if (c == '*' && cptr[1] == '/')
464 fprintf(f, "* ");
465 else
466 putc(c, f);
468 fprintf(f, "*/");
469 goto next_line;
471 if (c == '*')
473 int c_lineno = lineno;
474 char *c_line = dup_line();
475 char *c_cptr = c_line + (cptr - line - 1);
477 putc('*', f);
478 ++cptr;
479 for (;;)
481 c = *cptr++;
482 putc(c, f);
483 if (c == '*' && *cptr == '/')
485 putc('/', f);
486 ++cptr;
487 FREE(c_line);
488 goto loop;
490 if (c == '\n')
492 get_line();
493 if (line == 0)
494 unterminated_comment(c_lineno, c_line, c_cptr);
498 need_newline = 1;
499 goto loop;
501 case '%':
502 case '\\':
503 if (*cptr == '}')
505 if (need_newline) putc('\n', f);
506 ++cptr;
507 FREE(t_line);
508 return;
510 /* fall through */
512 default:
513 putc(c, f);
514 need_newline = 1;
515 goto loop;
520 static void
521 copy_union(void)
523 int c;
524 int quote;
525 int depth;
526 int u_lineno = lineno;
527 char *u_line = dup_line();
528 char *u_cptr = u_line + (cptr - line - 6);
530 if (unionized) over_unionized(cptr - 6);
531 unionized = 1;
533 if (!lflag)
534 fprintf(text_file, line_format, lineno, input_file_name);
536 fprintf(text_file, "typedef union");
537 if (dflag) fprintf(union_file, "typedef union");
539 depth = 0;
540 loop:
541 c = *cptr++;
542 putc(c, text_file);
543 if (dflag) putc(c, union_file);
544 switch (c)
546 case '\n':
547 next_line:
548 get_line();
549 if (line == 0) unterminated_union(u_lineno, u_line, u_cptr);
550 goto loop;
552 case '{':
553 ++depth;
554 goto loop;
556 case '}':
557 if (--depth == 0)
559 fprintf(text_file, " YYSTYPE;\n");
560 FREE(u_line);
561 return;
563 goto loop;
565 case '\'':
566 case '"':
568 int s_lineno = lineno;
569 char *s_line = dup_line();
570 char *s_cptr = s_line + (cptr - line - 1);
572 quote = c;
573 for (;;)
575 c = *cptr++;
576 putc(c, text_file);
577 if (dflag) putc(c, union_file);
578 if (c == quote)
580 FREE(s_line);
581 goto loop;
583 if (c == '\n')
584 unterminated_string(s_lineno, s_line, s_cptr);
585 if (c == '\\')
587 c = *cptr++;
588 putc(c, text_file);
589 if (dflag) putc(c, union_file);
590 if (c == '\n')
592 get_line();
593 if (line == 0)
594 unterminated_string(s_lineno, s_line, s_cptr);
600 case '/':
601 c = *cptr;
602 if (c == '/')
604 putc('*', text_file);
605 if (dflag) putc('*', union_file);
606 while ((c = *++cptr) != '\n')
608 if (c == '*' && cptr[1] == '/')
610 fprintf(text_file, "* ");
611 if (dflag) fprintf(union_file, "* ");
613 else
615 putc(c, text_file);
616 if (dflag) putc(c, union_file);
619 fprintf(text_file, "*/\n");
620 if (dflag) fprintf(union_file, "*/\n");
621 goto next_line;
623 if (c == '*')
625 int c_lineno = lineno;
626 char *c_line = dup_line();
627 char *c_cptr = c_line + (cptr - line - 1);
629 putc('*', text_file);
630 if (dflag) putc('*', union_file);
631 ++cptr;
632 for (;;)
634 c = *cptr++;
635 putc(c, text_file);
636 if (dflag) putc(c, union_file);
637 if (c == '*' && *cptr == '/')
639 putc('/', text_file);
640 if (dflag) putc('/', union_file);
641 ++cptr;
642 FREE(c_line);
643 goto loop;
645 if (c == '\n')
647 get_line();
648 if (line == 0)
649 unterminated_comment(c_lineno, c_line, c_cptr);
653 goto loop;
655 default:
656 goto loop;
661 static int
662 hexval(int c)
664 if (c >= '0' && c <= '9')
665 return (c - '0');
666 if (c >= 'A' && c <= 'F')
667 return (c - 'A' + 10);
668 if (c >= 'a' && c <= 'f')
669 return (c - 'a' + 10);
670 return (-1);
674 static bucket *
675 get_literal(void)
677 int c, quote;
678 int i;
679 int n;
680 char *s;
681 bucket *bp;
682 int s_lineno = lineno;
683 char *s_line = dup_line();
684 char *s_cptr = s_line + (cptr - line);
686 quote = *cptr++;
687 cinc = 0;
688 for (;;)
690 c = *cptr++;
691 if (c == quote) break;
692 if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
693 if (c == '\\')
695 char *c_cptr = cptr - 1;
697 c = *cptr++;
698 switch (c)
700 case '\n':
701 get_line();
702 if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
703 continue;
705 case '0': case '1': case '2': case '3':
706 case '4': case '5': case '6': case '7':
707 n = c - '0';
708 c = *cptr;
709 if (IS_OCTAL(c))
711 n = (n << 3) + (c - '0');
712 c = *++cptr;
713 if (IS_OCTAL(c))
715 n = (n << 3) + (c - '0');
716 ++cptr;
719 if (n > MAXCHAR) illegal_character(c_cptr);
720 c = n;
721 break;
723 case 'x':
724 c = *cptr++;
725 n = hexval(c);
726 if (n < 0 || n >= 16)
727 illegal_character(c_cptr);
728 for (;;)
730 c = *cptr;
731 i = hexval(c);
732 if (i < 0 || i >= 16) break;
733 ++cptr;
734 n = (n << 4) + i;
735 if (n > MAXCHAR) illegal_character(c_cptr);
737 c = n;
738 break;
740 case 'a': c = 7; break;
741 case 'b': c = '\b'; break;
742 case 'f': c = '\f'; break;
743 case 'n': c = '\n'; break;
744 case 'r': c = '\r'; break;
745 case 't': c = '\t'; break;
746 case 'v': c = '\v'; break;
749 cachec(c);
751 FREE(s_line);
753 n = cinc;
754 s = MALLOC(n);
755 if (s == 0) no_space();
757 for (i = 0; i < n; ++i)
758 s[i] = cache[i];
760 cinc = 0;
761 if (n == 1)
762 cachec('\'');
763 else
764 cachec('"');
766 for (i = 0; i < n; ++i)
768 c = ((unsigned char *)s)[i];
769 if (c == '\\' || c == cache[0])
771 cachec('\\');
772 cachec(c);
774 else if (isprint(c))
775 cachec(c);
776 else
778 cachec('\\');
779 switch (c)
781 case 7: cachec('a'); break;
782 case '\b': cachec('b'); break;
783 case '\f': cachec('f'); break;
784 case '\n': cachec('n'); break;
785 case '\r': cachec('r'); break;
786 case '\t': cachec('t'); break;
787 case '\v': cachec('v'); break;
788 default:
789 cachec(((c >> 6) & 7) + '0');
790 cachec(((c >> 3) & 7) + '0');
791 cachec((c & 7) + '0');
792 break;
797 if (n == 1)
798 cachec('\'');
799 else
800 cachec('"');
802 cachec(NUL);
803 bp = lookup(cache);
804 bp->class = TERM;
805 if (n == 1 && bp->value == UNDEFINED)
806 bp->value = *(unsigned char *)s;
807 FREE(s);
809 return (bp);
813 static int
814 is_reserved(char *name)
816 char *s;
818 if (strcmp(name, ".") == 0 ||
819 strcmp(name, "$accept") == 0 ||
820 strcmp(name, "$end") == 0)
821 return (1);
823 if (name[0] == '$' && name[1] == '$' && isdigit((unsigned char)name[2]))
825 s = name + 3;
826 while (isdigit((unsigned char)*s)) ++s;
827 if (*s == NUL) return (1);
830 return (0);
834 static bucket *
835 get_name(void)
837 int c;
839 cinc = 0;
840 for (c = *cptr; IS_IDENT(c); c = *++cptr)
841 cachec(c);
842 cachec(NUL);
844 if (is_reserved(cache)) used_reserved(cache);
846 return (lookup(cache));
850 static int
851 get_number(void)
853 int c;
854 int n;
856 n = 0;
857 for (c = *cptr; isdigit(c); c = *++cptr)
858 n = 10*n + (c - '0');
860 return (n);
864 static char *
865 get_tag(void)
867 int c;
868 int i;
869 char *s;
870 int t_lineno = lineno;
871 char *t_line = dup_line();
872 char *t_cptr = t_line + (cptr - line);
874 ++cptr;
875 c = nextc();
876 if (c == EOF) unexpected_EOF();
877 if (!isalpha(c) && c != '_' && c != '$')
878 illegal_tag(t_lineno, t_line, t_cptr);
880 cinc = 0;
881 do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
882 cachec(NUL);
884 c = nextc();
885 if (c == EOF) unexpected_EOF();
886 if (c != '>')
887 illegal_tag(t_lineno, t_line, t_cptr);
888 FREE(t_line);
889 ++cptr;
891 for (i = 0; i < ntags; ++i)
893 if (strcmp(cache, tag_table[i]) == 0)
894 return (tag_table[i]);
897 if (ntags >= tagmax)
899 tagmax += 16;
900 tag_table = (char **)
901 (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
902 : MALLOC(tagmax*sizeof(char *)));
903 if (tag_table == 0) no_space();
906 s = MALLOC(cinc);
907 if (s == 0) no_space();
908 strcpy(s, cache);
909 tag_table[ntags] = s;
910 ++ntags;
911 return (s);
915 static void
916 declare_tokens(int assoc)
918 int c;
919 bucket *bp;
920 int value;
921 char *tag = 0;
922 static int prec;
924 if (assoc != TOKEN) ++prec;
926 c = nextc();
927 if (c == EOF) unexpected_EOF();
928 if (c == '<')
930 tag = get_tag();
931 c = nextc();
932 if (c == EOF) unexpected_EOF();
935 for (;;)
937 if (isalpha(c) || c == '_' || c == '.' || c == '$')
938 bp = get_name();
939 else if (c == '\'' || c == '"')
940 bp = get_literal();
941 else
942 return;
944 if (bp == goal) tokenized_start(bp->name);
945 bp->class = TERM;
947 if (tag)
949 if (bp->tag && tag != bp->tag)
950 retyped_warning(bp->name);
951 bp->tag = tag;
954 if (assoc != TOKEN)
956 if (bp->prec && prec != bp->prec)
957 reprec_warning(bp->name);
958 bp->assoc = assoc;
959 bp->prec = prec;
962 c = nextc();
963 if (c == EOF) unexpected_EOF();
964 value = UNDEFINED;
965 if (isdigit(c))
967 value = get_number();
968 if (bp->value != UNDEFINED && value != bp->value)
969 revalued_warning(bp->name);
970 bp->value = value;
971 c = nextc();
972 if (c == EOF) unexpected_EOF();
978 static void
979 declare_types(void)
981 int c;
982 bucket *bp;
983 char *tag;
985 c = nextc();
986 if (c == EOF) unexpected_EOF();
987 if (c != '<') syntax_error(lineno, line, cptr);
988 tag = get_tag();
990 for (;;)
992 c = nextc();
993 if (isalpha(c) || c == '_' || c == '.' || c == '$')
994 bp = get_name();
995 else if (c == '\'' || c == '"')
996 bp = get_literal();
997 else
998 return;
1000 if (bp->tag && tag != bp->tag)
1001 retyped_warning(bp->name);
1002 bp->tag = tag;
1007 static void
1008 declare_start(void)
1010 int c;
1011 bucket *bp;
1013 c = nextc();
1014 if (c == EOF) unexpected_EOF();
1015 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1016 syntax_error(lineno, line, cptr);
1017 bp = get_name();
1018 if (bp->class == TERM)
1019 terminal_start(bp->name);
1020 if (goal && goal != bp)
1021 restarted_warning();
1022 goal = bp;
1026 static void
1027 handle_expect(void)
1029 int c;
1030 int num;
1032 c = nextc();
1033 if (c == EOF) unexpected_EOF();
1034 if (!isdigit(c))
1035 syntax_error(lineno, line, cptr);
1036 num = get_number();
1037 if (num == 1)
1038 fprintf (stderr, "%s: Expect 1 shift/reduce conflict.\n", myname);
1039 else
1040 fprintf (stderr, "%s: Expect %d shift/reduce conflicts.\n", myname, num);
1044 static void
1045 read_declarations(void)
1047 int c, k;
1049 cache_size = 256;
1050 cache = MALLOC(cache_size);
1051 if (cache == 0) no_space();
1053 for (;;)
1055 c = nextc();
1056 if (c == EOF) unexpected_EOF();
1057 if (c != '%') syntax_error(lineno, line, cptr);
1058 switch (k = keyword())
1060 case MARK:
1061 return;
1063 case IDENT:
1064 copy_ident();
1065 break;
1067 case TEXT:
1068 copy_text();
1069 break;
1071 case UNION:
1072 copy_union();
1073 break;
1075 case TOKEN:
1076 case LEFT:
1077 case RIGHT:
1078 case NONASSOC:
1079 declare_tokens(k);
1080 break;
1082 case TYPE:
1083 declare_types();
1084 break;
1086 case START:
1087 declare_start();
1088 break;
1090 case EXPECT:
1091 handle_expect();
1092 break;
1098 static void
1099 initialize_grammar(void)
1101 nitems = 4;
1102 maxitems = 300;
1103 pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
1104 if (pitem == 0) no_space();
1105 pitem[0] = 0;
1106 pitem[1] = 0;
1107 pitem[2] = 0;
1108 pitem[3] = 0;
1110 nrules = 3;
1111 maxrules = 100;
1112 plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
1113 if (plhs == 0) no_space();
1114 plhs[0] = 0;
1115 plhs[1] = 0;
1116 plhs[2] = 0;
1117 rprec = (short *) MALLOC(maxrules*sizeof(short));
1118 if (rprec == 0) no_space();
1119 rprec[0] = 0;
1120 rprec[1] = 0;
1121 rprec[2] = 0;
1122 rassoc = (char *) MALLOC(maxrules*sizeof(char));
1123 if (rassoc == 0) no_space();
1124 rassoc[0] = TOKEN;
1125 rassoc[1] = TOKEN;
1126 rassoc[2] = TOKEN;
1130 static void
1131 expand_items(void)
1133 maxitems += 300;
1134 pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
1135 if (pitem == 0) no_space();
1139 static void
1140 expand_rules(void)
1142 maxrules += 100;
1143 plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
1144 if (plhs == 0) no_space();
1145 rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
1146 if (rprec == 0) no_space();
1147 rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
1148 if (rassoc == 0) no_space();
1152 static void
1153 advance_to_start(void)
1155 int c;
1156 bucket *bp;
1157 char *s_cptr;
1158 int s_lineno;
1160 for (;;)
1162 c = nextc();
1163 if (c != '%') break;
1164 s_cptr = cptr;
1165 switch (keyword())
1167 case MARK:
1168 no_grammar();
1170 case TEXT:
1171 copy_text();
1172 break;
1174 case START:
1175 declare_start();
1176 break;
1178 default:
1179 syntax_error(lineno, line, s_cptr);
1183 c = nextc();
1184 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1185 syntax_error(lineno, line, cptr);
1186 bp = get_name();
1187 if (goal == 0)
1189 if (bp->class == TERM)
1190 terminal_start(bp->name);
1191 goal = bp;
1194 s_lineno = lineno;
1195 c = nextc();
1196 if (c == EOF) unexpected_EOF();
1197 if (c != ':') syntax_error(lineno, line, cptr);
1198 start_rule(bp, s_lineno);
1199 ++cptr;
1203 static void
1204 start_rule(bucket *bp, int s_lineno)
1206 if (bp->class == TERM)
1207 terminal_lhs(s_lineno);
1208 bp->class = NONTERM;
1209 if (nrules >= maxrules)
1210 expand_rules();
1211 plhs[nrules] = bp;
1212 rprec[nrules] = UNDEFINED;
1213 rassoc[nrules] = TOKEN;
1217 static void
1218 end_rule(void)
1220 int i;
1222 if (!last_was_action && plhs[nrules]->tag)
1224 for (i = nitems - 1; pitem[i]; --i) continue;
1225 if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
1226 default_action_warning();
1229 last_was_action = 0;
1230 if (nitems >= maxitems) expand_items();
1231 pitem[nitems] = 0;
1232 ++nitems;
1233 ++nrules;
1237 static void
1238 insert_empty_rule(void)
1240 bucket *bp, **bpp;
1242 assert(cache);
1243 snprintf(cache, cache_size, "$$%d", ++gensym);
1244 bp = make_bucket(cache);
1245 last_symbol->next = bp;
1246 last_symbol = bp;
1247 bp->tag = plhs[nrules]->tag;
1248 bp->class = NONTERM;
1250 if ((nitems += 2) > maxitems)
1251 expand_items();
1252 bpp = pitem + nitems - 1;
1253 *bpp-- = bp;
1254 while ((bpp[0] = bpp[-1])) --bpp;
1256 if (++nrules >= maxrules)
1257 expand_rules();
1258 plhs[nrules] = plhs[nrules-1];
1259 plhs[nrules-1] = bp;
1260 rprec[nrules] = rprec[nrules-1];
1261 rprec[nrules-1] = 0;
1262 rassoc[nrules] = rassoc[nrules-1];
1263 rassoc[nrules-1] = TOKEN;
1267 static void
1268 add_symbol(void)
1270 int c;
1271 bucket *bp;
1272 int s_lineno = lineno;
1274 c = *cptr;
1275 if (c == '\'' || c == '"')
1276 bp = get_literal();
1277 else
1278 bp = get_name();
1280 c = nextc();
1281 if (c == ':')
1283 end_rule();
1284 start_rule(bp, s_lineno);
1285 ++cptr;
1286 return;
1289 if (last_was_action)
1290 insert_empty_rule();
1291 last_was_action = 0;
1293 if (++nitems > maxitems)
1294 expand_items();
1295 pitem[nitems-1] = bp;
1299 static void
1300 copy_action(void)
1302 int c;
1303 int i, n;
1304 int depth;
1305 int quote;
1306 char *tag;
1307 FILE *f = action_file;
1308 int a_lineno = lineno;
1309 char *a_line = dup_line();
1310 char *a_cptr = a_line + (cptr - line);
1312 if (last_was_action)
1313 insert_empty_rule();
1314 last_was_action = 1;
1316 fprintf(f, "case %d:\n", nrules - 2);
1317 if (!lflag)
1318 fprintf(f, line_format, lineno, input_file_name);
1319 if (*cptr == '=') ++cptr;
1321 n = 0;
1322 for (i = nitems - 1; pitem[i]; --i) ++n;
1324 depth = 0;
1325 loop:
1326 c = *cptr;
1327 if (c == '$')
1329 if (cptr[1] == '<')
1331 int d_lineno = lineno;
1332 char *d_line = dup_line();
1333 char *d_cptr = d_line + (cptr - line);
1335 ++cptr;
1336 tag = get_tag();
1337 c = *cptr;
1338 if (c == '$')
1340 fprintf(f, "yyval.%s", tag);
1341 ++cptr;
1342 FREE(d_line);
1343 goto loop;
1345 else if (isdigit(c))
1347 i = get_number();
1348 if (i > n) dollar_warning(d_lineno, i);
1349 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1350 FREE(d_line);
1351 goto loop;
1353 else if (c == '-' && isdigit((unsigned char)cptr[1]))
1355 ++cptr;
1356 i = -get_number() - n;
1357 fprintf(f, "yyvsp[%d].%s", i, tag);
1358 FREE(d_line);
1359 goto loop;
1361 else
1362 dollar_error(d_lineno, d_line, d_cptr);
1364 else if (cptr[1] == '$')
1366 if (ntags)
1368 tag = plhs[nrules]->tag;
1369 if (tag == 0) untyped_lhs();
1370 fprintf(f, "yyval.%s", tag);
1372 else
1373 fprintf(f, "yyval");
1374 cptr += 2;
1375 goto loop;
1377 else if (isdigit((unsigned char)cptr[1]))
1379 ++cptr;
1380 i = get_number();
1381 if (ntags)
1383 if (i <= 0 || i > n)
1384 unknown_rhs(i);
1385 tag = pitem[nitems + i - n - 1]->tag;
1386 if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1387 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1389 else
1391 if (i > n)
1392 dollar_warning(lineno, i);
1393 fprintf(f, "yyvsp[%d]", i - n);
1395 goto loop;
1397 else if (cptr[1] == '-')
1399 cptr += 2;
1400 i = get_number();
1401 if (ntags)
1402 unknown_rhs(-i);
1403 fprintf(f, "yyvsp[%d]", -i - n);
1404 goto loop;
1407 if (isalpha(c) || c == '_' || c == '$')
1411 putc(c, f);
1412 c = *++cptr;
1413 } while (isalnum(c) || c == '_' || c == '$');
1414 goto loop;
1416 putc(c, f);
1417 ++cptr;
1418 switch (c)
1420 case '\n':
1421 next_line:
1422 get_line();
1423 if (line) goto loop;
1424 unterminated_action(a_lineno, a_line, a_cptr);
1426 case ';':
1427 if (depth > 0) goto loop;
1428 fprintf(f, "\nbreak;\n");
1429 FREE(a_line);
1430 return;
1432 case '{':
1433 ++depth;
1434 goto loop;
1436 case '}':
1437 if (--depth > 0) goto loop;
1438 fprintf(f, "\nbreak;\n");
1439 FREE(a_line);
1440 return;
1442 case '\'':
1443 case '"':
1445 int s_lineno = lineno;
1446 char *s_line = dup_line();
1447 char *s_cptr = s_line + (cptr - line - 1);
1449 quote = c;
1450 for (;;)
1452 c = *cptr++;
1453 putc(c, f);
1454 if (c == quote)
1456 FREE(s_line);
1457 goto loop;
1459 if (c == '\n')
1460 unterminated_string(s_lineno, s_line, s_cptr);
1461 if (c == '\\')
1463 c = *cptr++;
1464 putc(c, f);
1465 if (c == '\n')
1467 get_line();
1468 if (line == 0)
1469 unterminated_string(s_lineno, s_line, s_cptr);
1475 case '/':
1476 c = *cptr;
1477 if (c == '/')
1479 putc('*', f);
1480 while ((c = *++cptr) != '\n')
1482 if (c == '*' && cptr[1] == '/')
1483 fprintf(f, "* ");
1484 else
1485 putc(c, f);
1487 fprintf(f, "*/\n");
1488 goto next_line;
1490 if (c == '*')
1492 int c_lineno = lineno;
1493 char *c_line = dup_line();
1494 char *c_cptr = c_line + (cptr - line - 1);
1496 putc('*', f);
1497 ++cptr;
1498 for (;;)
1500 c = *cptr++;
1501 putc(c, f);
1502 if (c == '*' && *cptr == '/')
1504 putc('/', f);
1505 ++cptr;
1506 FREE(c_line);
1507 goto loop;
1509 if (c == '\n')
1511 get_line();
1512 if (line == 0)
1513 unterminated_comment(c_lineno, c_line, c_cptr);
1517 goto loop;
1519 default:
1520 goto loop;
1525 static int
1526 mark_symbol(void)
1528 int c;
1529 bucket *bp;
1531 bp = NULL;
1533 c = cptr[1];
1534 if (c == '%' || c == '\\')
1536 cptr += 2;
1537 return (1);
1540 if (c == '=')
1541 cptr += 2;
1542 else if ((c == 'p' || c == 'P') &&
1543 ((c = cptr[2]) == 'r' || c == 'R') &&
1544 ((c = cptr[3]) == 'e' || c == 'E') &&
1545 ((c = cptr[4]) == 'c' || c == 'C') &&
1546 ((c = cptr[5], !IS_IDENT(c))))
1547 cptr += 5;
1548 else
1549 syntax_error(lineno, line, cptr);
1551 c = nextc();
1552 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1553 bp = get_name();
1554 else if (c == '\'' || c == '"')
1555 bp = get_literal();
1556 else
1558 syntax_error(lineno, line, cptr);
1559 /*NOTREACHED*/
1562 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1563 prec_redeclared();
1565 rprec[nrules] = bp->prec;
1566 rassoc[nrules] = bp->assoc;
1567 return (0);
1571 static void
1572 read_grammar(void)
1574 int c;
1576 initialize_grammar();
1577 advance_to_start();
1579 for (;;)
1581 c = nextc();
1582 if (c == EOF) break;
1583 if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1584 c == '"')
1585 add_symbol();
1586 else if (c == '{' || c == '=')
1587 copy_action();
1588 else if (c == '|')
1590 end_rule();
1591 start_rule(plhs[nrules-1], 0);
1592 ++cptr;
1594 else if (c == '%')
1596 if (mark_symbol()) break;
1598 else
1599 syntax_error(lineno, line, cptr);
1601 end_rule();
1605 static void
1606 free_tags(void)
1608 int i;
1610 if (tag_table == 0) return;
1612 for (i = 0; i < ntags; ++i)
1614 assert(tag_table[i]);
1615 FREE(tag_table[i]);
1617 FREE(tag_table);
1621 static void
1622 pack_names(void)
1624 bucket *bp;
1625 char *p, *s, *t;
1627 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1628 for (bp = first_symbol; bp; bp = bp->next)
1629 name_pool_size += strlen(bp->name) + 1;
1630 name_pool = MALLOC(name_pool_size);
1631 if (name_pool == 0) no_space();
1633 strlcpy(name_pool, "$accept", name_pool_size);
1634 strlcpy(name_pool+8, "$end", name_pool_size - 8);
1635 t = name_pool + 13;
1636 for (bp = first_symbol; bp; bp = bp->next)
1638 p = t;
1639 s = bp->name;
1640 while ((*t++ = *s++) != '\0') continue;
1641 FREE(bp->name);
1642 bp->name = p;
1647 static void
1648 check_symbols(void)
1650 bucket *bp;
1652 if (goal->class == UNKNOWN)
1653 undefined_goal(goal->name);
1655 for (bp = first_symbol; bp; bp = bp->next)
1657 if (bp->class == UNKNOWN)
1659 undefined_symbol_warning(bp->name);
1660 bp->class = TERM;
1666 static void
1667 pack_symbols(void)
1669 bucket *bp;
1670 bucket **v;
1671 int i, j, k, n;
1673 nsyms = 2;
1674 ntokens = 1;
1675 for (bp = first_symbol; bp; bp = bp->next)
1677 ++nsyms;
1678 if (bp->class == TERM) ++ntokens;
1680 start_symbol = ntokens;
1681 nvars = nsyms - ntokens;
1683 symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
1684 if (symbol_name == 0) no_space();
1685 symbol_value = (short *) MALLOC(nsyms*sizeof(short));
1686 if (symbol_value == 0) no_space();
1687 symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
1688 if (symbol_prec == 0) no_space();
1689 symbol_assoc = MALLOC(nsyms);
1690 if (symbol_assoc == 0) no_space();
1692 v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
1693 if (v == 0) no_space();
1695 v[0] = 0;
1696 v[start_symbol] = 0;
1698 i = 1;
1699 j = start_symbol + 1;
1700 for (bp = first_symbol; bp; bp = bp->next)
1702 if (bp->class == TERM)
1703 v[i++] = bp;
1704 else
1705 v[j++] = bp;
1707 assert(i == ntokens && j == nsyms);
1709 for (i = 1; i < ntokens; ++i)
1710 v[i]->index = i;
1712 goal->index = start_symbol + 1;
1713 k = start_symbol + 2;
1714 while (++i < nsyms)
1715 if (v[i] != goal)
1717 v[i]->index = k;
1718 ++k;
1721 goal->value = 0;
1722 k = 1;
1723 for (i = start_symbol + 1; i < nsyms; ++i)
1725 if (v[i] != goal)
1727 v[i]->value = k;
1728 ++k;
1732 k = 0;
1733 for (i = 1; i < ntokens; ++i)
1735 n = v[i]->value;
1736 if (n > 256)
1738 for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
1739 symbol_value[j] = symbol_value[j-1];
1740 symbol_value[j] = n;
1744 if (v[1]->value == UNDEFINED)
1745 v[1]->value = 256;
1747 j = 0;
1748 n = 257;
1749 for (i = 2; i < ntokens; ++i)
1751 if (v[i]->value == UNDEFINED)
1753 while (j < k && n == symbol_value[j])
1755 while (++j < k && n == symbol_value[j]) continue;
1756 ++n;
1758 v[i]->value = n;
1759 ++n;
1763 symbol_name[0] = name_pool + 8;
1764 symbol_value[0] = 0;
1765 symbol_prec[0] = 0;
1766 symbol_assoc[0] = TOKEN;
1767 for (i = 1; i < ntokens; ++i)
1769 symbol_name[i] = v[i]->name;
1770 symbol_value[i] = v[i]->value;
1771 symbol_prec[i] = v[i]->prec;
1772 symbol_assoc[i] = v[i]->assoc;
1774 symbol_name[start_symbol] = name_pool;
1775 symbol_value[start_symbol] = -1;
1776 symbol_prec[start_symbol] = 0;
1777 symbol_assoc[start_symbol] = TOKEN;
1778 for (++i; i < nsyms; ++i)
1780 k = v[i]->index;
1781 symbol_name[k] = v[i]->name;
1782 symbol_value[k] = v[i]->value;
1783 symbol_prec[k] = v[i]->prec;
1784 symbol_assoc[k] = v[i]->assoc;
1787 FREE(v);
1791 static void
1792 pack_grammar(void)
1794 int i, j;
1795 int assoc, prec;
1797 ritem = (short *) MALLOC(nitems*sizeof(short));
1798 if (ritem == 0) no_space();
1799 rlhs = (short *) MALLOC(nrules*sizeof(short));
1800 if (rlhs == 0) no_space();
1801 rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
1802 if (rrhs == 0) no_space();
1803 rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
1804 if (rprec == 0) no_space();
1805 rassoc = REALLOC(rassoc, nrules);
1806 if (rassoc == 0) no_space();
1808 ritem[0] = -1;
1809 ritem[1] = goal->index;
1810 ritem[2] = 0;
1811 ritem[3] = -2;
1812 rlhs[0] = 0;
1813 rlhs[1] = 0;
1814 rlhs[2] = start_symbol;
1815 rrhs[0] = 0;
1816 rrhs[1] = 0;
1817 rrhs[2] = 1;
1819 j = 4;
1820 for (i = 3; i < nrules; ++i)
1822 rlhs[i] = plhs[i]->index;
1823 rrhs[i] = j;
1824 assoc = TOKEN;
1825 prec = 0;
1826 while (pitem[j])
1828 ritem[j] = pitem[j]->index;
1829 if (pitem[j]->class == TERM)
1831 prec = pitem[j]->prec;
1832 assoc = pitem[j]->assoc;
1834 ++j;
1836 ritem[j] = -i;
1837 ++j;
1838 if (rprec[i] == UNDEFINED)
1840 rprec[i] = prec;
1841 rassoc[i] = assoc;
1844 rrhs[i] = j;
1846 FREE(plhs);
1847 FREE(pitem);
1851 static void
1852 print_grammar(void)
1854 int i, j, k;
1855 int spacing;
1856 FILE *f = verbose_file;
1858 spacing = 0;
1860 if (!vflag) return;
1862 k = 1;
1863 for (i = 2; i < nrules; ++i)
1865 if (rlhs[i] != rlhs[i-1])
1867 if (i != 2) fprintf(f, "\n");
1868 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
1869 spacing = strlen(symbol_name[rlhs[i]]) + 1;
1871 else
1873 fprintf(f, "%4d ", i - 2);
1874 j = spacing;
1875 while (--j >= 0) putc(' ', f);
1876 putc('|', f);
1879 while (ritem[k] >= 0)
1881 fprintf(f, " %s", symbol_name[ritem[k]]);
1882 ++k;
1884 ++k;
1885 putc('\n', f);
1890 void
1891 reader(void)
1893 write_section(banner);
1894 create_symbol_table();
1895 read_declarations();
1896 read_grammar();
1897 free_symbol_table();
1898 free_tags();
1899 pack_names();
1900 check_symbols();
1901 pack_symbols();
1902 pack_grammar();
1903 free_symbols();
1904 print_grammar();