don't bother resolving onbld python module deps
[unleashed.git] / bin / yacc / reader.c
blob4964be11f837946d5df4f93247119c0a6cba5e01
1 /* $OpenBSD: reader.c,v 1.34 2017/05/25 20:11:03 tedu Exp $ */
2 /* $NetBSD: reader.c,v 1.5 1996/03/19 03:21:43 jtc Exp $ */
4 /*
5 * Copyright (c) 1989 The Regents of the University of California.
6 * All rights reserved.
8 * This code is derived from software contributed to Berkeley by
9 * Robert Paul Corbett.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
36 #include <limits.h>
37 #include "defs.h"
39 /* The line size must be a positive integer. One hundred was chosen */
40 /* because few lines in Yacc input grammars exceed 100 characters. */
41 /* Note that if a line exceeds LINESIZE characters, the line buffer */
42 /* will be expanded to accommodate it. */
44 #define LINESIZE 100
46 char *cache;
47 int cinc, cache_size;
49 int ntags, tagmax;
50 char **tag_table;
52 char saw_eof, unionized;
53 char *cptr, *line;
54 int linesize;
56 bucket *goal;
57 int prec;
58 int gensym;
59 char last_was_action;
61 int maxitems;
62 bucket **pitem;
64 int maxrules;
65 bucket **plhs;
67 int name_pool_size;
68 char *name_pool;
70 void cachec(int);
71 void get_line(void);
72 char *dup_line(void);
73 void skip_comment(void);
74 int nextc(void);
75 int keyword(void);
76 void copy_ident(void);
77 void copy_text(void);
78 void copy_union(void);
79 bucket *get_literal(void);
80 int is_reserved(char *);
81 bucket *get_name(void);
82 int get_number(void);
83 char *get_tag(void);
84 void declare_tokens(int);
85 void declare_types(void);
86 void declare_start(void);
87 void handle_expect(void);
88 void read_declarations(void);
89 void initialize_grammar(void);
90 void expand_items(void);
91 void expand_rules(void);
92 void advance_to_start(void);
93 void start_rule(bucket *, int);
94 void end_rule(void);
95 void insert_empty_rule(void);
96 void add_symbol(void);
97 void copy_action(void);
98 int mark_symbol(void);
99 void read_grammar(void);
100 void free_tags(void);
101 void pack_names(void);
102 void check_symbols(void);
103 void pack_symbols(void);
104 void pack_grammar(void);
105 void print_grammar(void);
107 char line_format[] = "#line %d \"%s\"\n";
109 void
110 cachec(int c)
112 assert(cinc >= 0);
113 if (cinc >= cache_size) {
114 cache_size += 256;
115 cache = realloc(cache, cache_size);
116 if (cache == NULL)
117 no_space();
119 cache[cinc] = c;
120 ++cinc;
124 void
125 get_line(void)
127 FILE *f = input_file;
128 int c, i;
130 if (saw_eof || (c = getc(f)) == EOF) {
131 if (line) {
132 free(line);
133 line = 0;
135 cptr = 0;
136 saw_eof = 1;
137 return;
139 if (line == NULL || linesize != (LINESIZE + 1)) {
140 free(line);
141 linesize = LINESIZE + 1;
142 line = malloc(linesize);
143 if (line == NULL)
144 no_space();
146 i = 0;
147 ++lineno;
148 for (;;) {
149 line[i] = c;
150 if (c == '\n') {
151 cptr = line;
152 return;
154 if (++i >= linesize) {
155 linesize += LINESIZE;
156 line = realloc(line, linesize);
157 if (line == NULL)
158 no_space();
160 c = getc(f);
161 if (c == EOF) {
162 line[i] = '\n';
163 saw_eof = 1;
164 cptr = line;
165 return;
171 char *
172 dup_line(void)
174 char *p, *s, *t;
176 if (line == NULL)
177 return (0);
178 s = line;
179 while (*s != '\n')
180 ++s;
181 p = malloc(s - line + 1);
182 if (p == NULL)
183 no_space();
185 s = line;
186 t = p;
187 while ((*t++ = *s++) != '\n')
188 continue;
189 return (p);
193 void
194 skip_comment(void)
196 char *s;
197 int st_lineno = lineno;
198 char *st_line = dup_line();
199 char *st_cptr = st_line + (cptr - line);
201 s = cptr + 2;
202 for (;;) {
203 if (*s == '*' && s[1] == '/') {
204 cptr = s + 2;
205 free(st_line);
206 return;
208 if (*s == '\n') {
209 get_line();
210 if (line == NULL)
211 unterminated_comment(st_lineno, st_line, st_cptr);
212 s = cptr;
213 } else
214 ++s;
220 nextc(void)
222 char *s;
224 if (line == NULL) {
225 get_line();
226 if (line == NULL)
227 return (EOF);
229 s = cptr;
230 for (;;) {
231 switch (*s) {
232 case '\n':
233 get_line();
234 if (line == NULL)
235 return (EOF);
236 s = cptr;
237 break;
239 case ' ':
240 case '\t':
241 case '\f':
242 case '\r':
243 case '\v':
244 case ',':
245 case ';':
246 ++s;
247 break;
249 case '\\':
250 cptr = s;
251 return ('%');
253 case '/':
254 if (s[1] == '*') {
255 cptr = s;
256 skip_comment();
257 s = cptr;
258 break;
259 } else if (s[1] == '/') {
260 get_line();
261 if (line == NULL)
262 return (EOF);
263 s = cptr;
264 break;
266 /* fall through */
268 default:
269 cptr = s;
270 return ((unsigned char) *s);
277 keyword(void)
279 int c;
280 char *t_cptr = cptr;
282 c = (unsigned char) *++cptr;
283 if (isalpha(c)) {
284 cinc = 0;
285 for (;;) {
286 if (isalpha(c)) {
287 if (isupper(c))
288 c = tolower(c);
289 cachec(c);
290 } else if (isdigit(c) || c == '_' || c == '.' || c == '$')
291 cachec(c);
292 else
293 break;
294 c = (unsigned char) *++cptr;
296 cachec(NUL);
298 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
299 return (TOKEN);
300 if (strcmp(cache, "type") == 0)
301 return (TYPE);
302 if (strcmp(cache, "left") == 0)
303 return (LEFT);
304 if (strcmp(cache, "right") == 0)
305 return (RIGHT);
306 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
307 return (NONASSOC);
308 if (strcmp(cache, "start") == 0)
309 return (START);
310 if (strcmp(cache, "union") == 0)
311 return (UNION);
312 if (strcmp(cache, "ident") == 0)
313 return (IDENT);
314 if (strcmp(cache, "expect") == 0)
315 return (EXPECT);
316 } else {
317 ++cptr;
318 if (c == '{')
319 return (TEXT);
320 if (c == '%' || c == '\\')
321 return (MARK);
322 if (c == '<')
323 return (LEFT);
324 if (c == '>')
325 return (RIGHT);
326 if (c == '0')
327 return (TOKEN);
328 if (c == '2')
329 return (NONASSOC);
331 syntax_error(lineno, line, t_cptr);
332 /* NOTREACHED */
333 return (0);
337 void
338 copy_ident(void)
340 int c;
341 FILE *f = output_file;
343 c = nextc();
344 if (c == EOF)
345 unexpected_EOF();
346 if (c != '"')
347 syntax_error(lineno, line, cptr);
348 ++outline;
349 fprintf(f, "#ident \"");
350 for (;;) {
351 c = (unsigned char) *++cptr;
352 if (c == '\n') {
353 fprintf(f, "\"\n");
354 return;
356 putc(c, f);
357 if (c == '"') {
358 putc('\n', f);
359 ++cptr;
360 return;
366 void
367 copy_text(void)
369 int c;
370 int quote;
371 FILE *f = text_file;
372 int need_newline = 0;
373 int t_lineno = lineno;
374 char *t_line = dup_line();
375 char *t_cptr = t_line + (cptr - line - 2);
377 if (*cptr == '\n') {
378 get_line();
379 if (line == NULL)
380 unterminated_text(t_lineno, t_line, t_cptr);
382 if (!lflag)
383 fprintf(f, line_format, lineno, input_file_name);
385 loop:
386 c = (unsigned char) *cptr++;
387 switch (c) {
388 case '\n':
389 next_line:
390 putc('\n', f);
391 need_newline = 0;
392 get_line();
393 if (line)
394 goto loop;
395 unterminated_text(t_lineno, t_line, t_cptr);
397 case '\'':
398 case '"': {
399 int s_lineno = lineno;
400 char *s_line = dup_line();
401 char *s_cptr = s_line + (cptr - line - 1);
403 quote = c;
404 putc(c, f);
405 for (;;) {
406 c = (unsigned char) *cptr++;
407 putc(c, f);
408 if (c == quote) {
409 need_newline = 1;
410 free(s_line);
411 goto loop;
413 if (c == '\n')
414 unterminated_string(s_lineno, s_line, s_cptr);
415 if (c == '\\') {
416 c = (unsigned char) *cptr++;
417 putc(c, f);
418 if (c == '\n') {
419 get_line();
420 if (line == NULL)
421 unterminated_string(s_lineno, s_line, s_cptr);
427 case '/':
428 putc(c, f);
429 need_newline = 1;
430 c = (unsigned char) *cptr;
431 if (c == '/') {
432 putc('*', f);
433 while ((c = (unsigned char) *++cptr) != '\n') {
434 if (c == '*' && cptr[1] == '/')
435 fprintf(f, "* ");
436 else
437 putc(c, f);
439 fprintf(f, "*/");
440 goto next_line;
442 if (c == '*') {
443 int c_lineno = lineno;
444 char *c_line = dup_line();
445 char *c_cptr = c_line + (cptr - line - 1);
447 putc('*', f);
448 ++cptr;
449 for (;;) {
450 c = (unsigned char) *cptr++;
451 putc(c, f);
452 if (c == '*' && *cptr == '/') {
453 putc('/', f);
454 ++cptr;
455 free(c_line);
456 goto loop;
458 if (c == '\n') {
459 get_line();
460 if (line == NULL)
461 unterminated_comment(c_lineno, c_line, c_cptr);
465 need_newline = 1;
466 goto loop;
468 case '%':
469 case '\\':
470 if (*cptr == '}') {
471 if (need_newline)
472 putc('\n', f);
473 ++cptr;
474 free(t_line);
475 return;
477 /* fall through */
479 default:
480 putc(c, f);
481 need_newline = 1;
482 goto loop;
487 void
488 copy_union(void)
490 int c, quote, depth;
491 int u_lineno = lineno;
492 char *u_line = dup_line();
493 char *u_cptr = u_line + (cptr - line - 6);
495 if (unionized)
496 over_unionized(cptr - 6);
497 unionized = 1;
499 if (!lflag)
500 fprintf(text_file, line_format, lineno, input_file_name);
502 fprintf(text_file, "#ifndef YYSTYPE_DEFINED\n");
503 fprintf(text_file, "#define YYSTYPE_DEFINED\n");
504 fprintf(text_file, "typedef union");
505 if (dflag) {
506 fprintf(union_file, "#ifndef YYSTYPE_DEFINED\n");
507 fprintf(union_file, "#define YYSTYPE_DEFINED\n");
508 fprintf(union_file, "typedef union");
511 depth = 0;
512 loop:
513 c = (unsigned char) *cptr++;
514 putc(c, text_file);
515 if (dflag)
516 putc(c, union_file);
517 switch (c) {
518 case '\n':
519 next_line:
520 get_line();
521 if (line == NULL)
522 unterminated_union(u_lineno, u_line, u_cptr);
523 goto loop;
525 case '{':
526 ++depth;
527 goto loop;
529 case '}':
530 if (--depth == 0) {
531 fprintf(text_file, " YYSTYPE;\n");
532 fprintf(text_file, "#endif /* YYSTYPE_DEFINED */\n");
533 free(u_line);
534 return;
536 goto loop;
538 case '\'':
539 case '"': {
540 int s_lineno = lineno;
541 char *s_line = dup_line();
542 char *s_cptr = s_line + (cptr - line - 1);
544 quote = c;
545 for (;;) {
546 c = (unsigned char) *cptr++;
547 putc(c, text_file);
548 if (dflag)
549 putc(c, union_file);
550 if (c == quote) {
551 free(s_line);
552 goto loop;
554 if (c == '\n')
555 unterminated_string(s_lineno, s_line, s_cptr);
556 if (c == '\\') {
557 c = (unsigned char) *cptr++;
558 putc(c, text_file);
559 if (dflag)
560 putc(c, union_file);
561 if (c == '\n') {
562 get_line();
563 if (line == NULL)
564 unterminated_string(s_lineno,
565 s_line, s_cptr);
571 case '/':
572 c = (unsigned char) *cptr;
573 if (c == '/') {
574 putc('*', text_file);
575 if (dflag)
576 putc('*', union_file);
577 while ((c = (unsigned char) *++cptr) != '\n') {
578 if (c == '*' && cptr[1] == '/') {
579 fprintf(text_file, "* ");
580 if (dflag)
581 fprintf(union_file, "* ");
582 } else {
583 putc(c, text_file);
584 if (dflag)
585 putc(c, union_file);
588 fprintf(text_file, "*/\n");
589 if (dflag)
590 fprintf(union_file, "*/\n");
591 goto next_line;
593 if (c == '*') {
594 int c_lineno = lineno;
595 char *c_line = dup_line();
596 char *c_cptr = c_line + (cptr - line - 1);
598 putc('*', text_file);
599 if (dflag)
600 putc('*', union_file);
601 ++cptr;
602 for (;;) {
603 c = (unsigned char) *cptr++;
604 putc(c, text_file);
605 if (dflag)
606 putc(c, union_file);
607 if (c == '*' && *cptr == '/') {
608 putc('/', text_file);
609 if (dflag)
610 putc('/', union_file);
611 ++cptr;
612 free(c_line);
613 goto loop;
615 if (c == '\n') {
616 get_line();
617 if (line == NULL)
618 unterminated_comment(c_lineno,
619 c_line, c_cptr);
623 goto loop;
625 default:
626 goto loop;
631 bucket *
632 get_literal(void)
634 int c, quote, i, n;
635 char *s;
636 bucket *bp;
637 int s_lineno = lineno;
638 char *s_line = dup_line();
639 char *s_cptr = s_line + (cptr - line);
641 quote = (unsigned char) *cptr++;
642 cinc = 0;
643 for (;;) {
644 c = (unsigned char) *cptr++;
645 if (c == quote)
646 break;
647 if (c == '\n')
648 unterminated_string(s_lineno, s_line, s_cptr);
649 if (c == '\\') {
650 char *c_cptr = cptr - 1;
651 unsigned long ulval;
653 c = (unsigned char) *cptr++;
654 switch (c) {
655 case '\n':
656 get_line();
657 if (line == NULL)
658 unterminated_string(s_lineno, s_line,
659 s_cptr);
660 continue;
662 case '0':
663 case '1':
664 case '2':
665 case '3':
666 case '4':
667 case '5':
668 case '6':
669 case '7':
670 ulval = strtoul(cptr - 1, &s, 8);
671 if (s == cptr - 1 || ulval > MAXCHAR)
672 illegal_character(c_cptr);
673 c = (int) ulval;
674 cptr = s;
675 break;
677 case 'x':
678 ulval = strtoul(cptr, &s, 16);
679 if (s == cptr || ulval > MAXCHAR)
680 illegal_character(c_cptr);
681 c = (int) ulval;
682 cptr = s;
683 break;
685 case 'a':
686 c = 7;
687 break;
688 case 'b':
689 c = '\b';
690 break;
691 case 'f':
692 c = '\f';
693 break;
694 case 'n':
695 c = '\n';
696 break;
697 case 'r':
698 c = '\r';
699 break;
700 case 't':
701 c = '\t';
702 break;
703 case 'v':
704 c = '\v';
705 break;
708 cachec(c);
710 free(s_line);
712 n = cinc;
713 s = malloc(n);
714 if (s == NULL)
715 no_space();
717 memcpy(s, cache, n);
719 cinc = 0;
720 if (n == 1)
721 cachec('\'');
722 else
723 cachec('"');
725 for (i = 0; i < n; ++i) {
726 c = ((unsigned char *) s)[i];
727 if (c == '\\' || c == cache[0]) {
728 cachec('\\');
729 cachec(c);
730 } else if (isprint(c))
731 cachec(c);
732 else {
733 cachec('\\');
734 switch (c) {
735 case 7:
736 cachec('a');
737 break;
738 case '\b':
739 cachec('b');
740 break;
741 case '\f':
742 cachec('f');
743 break;
744 case '\n':
745 cachec('n');
746 break;
747 case '\r':
748 cachec('r');
749 break;
750 case '\t':
751 cachec('t');
752 break;
753 case '\v':
754 cachec('v');
755 break;
756 default:
757 cachec(((c >> 6) & 7) + '0');
758 cachec(((c >> 3) & 7) + '0');
759 cachec((c & 7) + '0');
760 break;
765 if (n == 1)
766 cachec('\'');
767 else
768 cachec('"');
770 cachec(NUL);
771 bp = lookup(cache);
772 bp->class = TERM;
773 if (n == 1 && bp->value == UNDEFINED)
774 bp->value = *(unsigned char *) s;
775 free(s);
777 return (bp);
782 is_reserved(char *name)
784 char *s;
786 if (strcmp(name, ".") == 0 ||
787 strcmp(name, "$accept") == 0 ||
788 strcmp(name, "$end") == 0)
789 return (1);
791 if (name[0] == '$' && name[1] == '$' && isdigit((unsigned char) name[2])) {
792 s = name + 3;
793 while (isdigit((unsigned char) *s))
794 ++s;
795 if (*s == NUL)
796 return (1);
798 return (0);
802 bucket *
803 get_name(void)
805 int c;
807 cinc = 0;
808 for (c = (unsigned char) *cptr; IS_IDENT(c); c = (unsigned char) *++cptr)
809 cachec(c);
810 cachec(NUL);
812 if (is_reserved(cache))
813 used_reserved(cache);
815 return (lookup(cache));
820 get_number(void)
822 unsigned long ul;
823 char *p;
825 ul = strtoul(cptr, &p, 10);
826 if (ul > INT_MAX)
827 syntax_error(lineno, line, cptr);
828 cptr = p;
829 return (ul);
833 char *
834 get_tag(void)
836 int c, i;
837 char *s;
838 int t_lineno = lineno;
839 char *t_line = dup_line();
840 char *t_cptr = t_line + (cptr - line);
842 ++cptr;
843 c = nextc();
844 if (c == EOF)
845 unexpected_EOF();
846 if (!isalpha(c) && c != '_' && c != '$')
847 illegal_tag(t_lineno, t_line, t_cptr);
849 cinc = 0;
850 do {
851 cachec(c);
852 c = (unsigned char) *++cptr;
853 } while (IS_IDENT(c));
854 cachec(NUL);
856 c = nextc();
857 if (c == EOF)
858 unexpected_EOF();
859 if (c != '>')
860 illegal_tag(t_lineno, t_line, t_cptr);
861 free(t_line);
862 ++cptr;
864 for (i = 0; i < ntags; ++i) {
865 if (strcmp(cache, tag_table[i]) == 0)
866 return (tag_table[i]);
869 if (ntags >= tagmax) {
870 tagmax += 16;
871 tag_table = reallocarray(tag_table, tagmax, sizeof(char *));
872 if (tag_table == NULL)
873 no_space();
875 s = malloc(cinc);
876 if (s == NULL)
877 no_space();
878 strlcpy(s, cache, cinc);
879 tag_table[ntags] = s;
880 ++ntags;
881 return (s);
885 void
886 declare_tokens(int assoc)
888 int c;
889 bucket *bp;
890 int value;
891 char *tag = 0;
893 if (assoc != TOKEN)
894 ++prec;
896 c = nextc();
897 if (c == EOF)
898 unexpected_EOF();
899 if (c == '<') {
900 tag = get_tag();
901 c = nextc();
902 if (c == EOF)
903 unexpected_EOF();
905 for (;;) {
906 if (isalpha(c) || c == '_' || c == '.' || c == '$')
907 bp = get_name();
908 else if (c == '\'' || c == '"')
909 bp = get_literal();
910 else
911 return;
913 if (bp == goal)
914 tokenized_start(bp->name);
915 bp->class = TERM;
917 if (tag) {
918 if (bp->tag && tag != bp->tag)
919 retyped_warning(bp->name);
920 bp->tag = tag;
922 if (assoc != TOKEN) {
923 if (bp->prec && prec != bp->prec)
924 reprec_warning(bp->name);
925 bp->assoc = assoc;
926 bp->prec = prec;
928 c = nextc();
929 if (c == EOF)
930 unexpected_EOF();
931 if (isdigit(c)) {
932 value = get_number();
933 if (bp->value != UNDEFINED && value != bp->value)
934 revalued_warning(bp->name);
935 bp->value = value;
936 c = nextc();
937 if (c == EOF)
938 unexpected_EOF();
945 * %expect requires special handling as it really isn't part of the yacc
946 * grammar only a flag for yacc proper.
948 static void
949 declare_expect(int assoc)
951 int c;
953 if (assoc != EXPECT)
954 ++prec;
957 * Stay away from nextc - doesn't detect EOL and will read to EOF.
959 c = (unsigned char) *++cptr;
960 if (c == EOF)
961 unexpected_EOF();
963 for (;;) {
964 if (isdigit(c)) {
965 SRexpect = get_number();
966 break;
969 * Looking for number before EOL.
970 * Spaces, tabs, and numbers are ok.
971 * Words, punc., etc. are syntax errors.
973 else if (c == '\n' || isalpha(c) || !isspace(c)) {
974 syntax_error(lineno, line, cptr);
975 } else {
976 c = (unsigned char) *++cptr;
977 if (c == EOF)
978 unexpected_EOF();
984 void
985 declare_types(void)
987 int c;
988 bucket *bp;
989 char *tag;
991 c = nextc();
992 if (c == EOF)
993 unexpected_EOF();
994 if (c != '<')
995 syntax_error(lineno, line, cptr);
996 tag = get_tag();
998 for (;;) {
999 c = nextc();
1000 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1001 bp = get_name();
1002 else if (c == '\'' || c == '"')
1003 bp = get_literal();
1004 else
1005 return;
1007 if (bp->tag && tag != bp->tag)
1008 retyped_warning(bp->name);
1009 bp->tag = tag;
1014 void
1015 declare_start(void)
1017 int c;
1018 bucket *bp;
1020 c = nextc();
1021 if (c == EOF)
1022 unexpected_EOF();
1023 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1024 syntax_error(lineno, line, cptr);
1025 bp = get_name();
1026 if (bp->class == TERM)
1027 terminal_start(bp->name);
1028 if (goal && goal != bp)
1029 restarted_warning();
1030 goal = bp;
1034 void
1035 read_declarations(void)
1037 int c, k;
1039 cache_size = 256;
1040 cache = malloc(cache_size);
1041 if (cache == NULL)
1042 no_space();
1044 for (;;) {
1045 c = nextc();
1046 if (c == EOF)
1047 unexpected_EOF();
1048 if (c != '%')
1049 syntax_error(lineno, line, cptr);
1050 switch (k = keyword()) {
1051 case MARK:
1052 return;
1054 case IDENT:
1055 copy_ident();
1056 break;
1058 case TEXT:
1059 copy_text();
1060 break;
1062 case UNION:
1063 copy_union();
1064 break;
1066 case TOKEN:
1067 case LEFT:
1068 case RIGHT:
1069 case NONASSOC:
1070 declare_tokens(k);
1071 break;
1073 case EXPECT:
1074 declare_expect(k);
1075 break;
1077 case TYPE:
1078 declare_types();
1079 break;
1081 case START:
1082 declare_start();
1083 break;
1089 void
1090 initialize_grammar(void)
1092 nitems = 4;
1093 maxitems = 300;
1094 pitem = calloc(maxitems, sizeof(bucket *));
1095 if (pitem == NULL)
1096 no_space();
1098 nrules = 3;
1099 maxrules = 100;
1100 plhs = reallocarray(NULL, maxrules, sizeof(bucket *));
1101 if (plhs == NULL)
1102 no_space();
1103 plhs[0] = 0;
1104 plhs[1] = 0;
1105 plhs[2] = 0;
1106 rprec = reallocarray(NULL, maxrules, sizeof(short));
1107 if (rprec == NULL)
1108 no_space();
1109 rprec[0] = 0;
1110 rprec[1] = 0;
1111 rprec[2] = 0;
1112 rassoc = reallocarray(NULL, maxrules, sizeof(char));
1113 if (rassoc == NULL)
1114 no_space();
1115 rassoc[0] = TOKEN;
1116 rassoc[1] = TOKEN;
1117 rassoc[2] = TOKEN;
1121 void
1122 expand_items(void)
1124 int olditems = maxitems;
1126 maxitems += 300;
1127 pitem = reallocarray(pitem, maxitems, sizeof(bucket *));
1128 if (pitem == NULL)
1129 no_space();
1130 memset(pitem + olditems, 0, (maxitems - olditems) * sizeof(bucket *));
1134 void
1135 expand_rules(void)
1137 maxrules += 100;
1138 plhs = reallocarray(plhs, maxrules, sizeof(bucket *));
1139 if (plhs == NULL)
1140 no_space();
1141 rprec = reallocarray(rprec, maxrules, sizeof(short));
1142 if (rprec == NULL)
1143 no_space();
1144 rassoc = reallocarray(rassoc, maxrules, sizeof(char));
1145 if (rassoc == NULL)
1146 no_space();
1150 void
1151 advance_to_start(void)
1153 int c;
1154 bucket *bp;
1155 char *s_cptr;
1156 int s_lineno;
1158 for (;;) {
1159 c = nextc();
1160 if (c != '%')
1161 break;
1162 s_cptr = cptr;
1163 switch (keyword()) {
1164 case MARK:
1165 no_grammar();
1167 case TEXT:
1168 copy_text();
1169 break;
1171 case START:
1172 declare_start();
1173 break;
1175 default:
1176 syntax_error(lineno, line, s_cptr);
1180 c = nextc();
1181 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1182 syntax_error(lineno, line, cptr);
1183 bp = get_name();
1184 if (goal == NULL) {
1185 if (bp->class == TERM)
1186 terminal_start(bp->name);
1187 goal = bp;
1189 s_lineno = lineno;
1190 c = nextc();
1191 if (c == EOF)
1192 unexpected_EOF();
1193 if (c != ':')
1194 syntax_error(lineno, line, cptr);
1195 start_rule(bp, s_lineno);
1196 ++cptr;
1200 void
1201 start_rule(bucket * bp, int s_lineno)
1203 if (bp->class == TERM)
1204 terminal_lhs(s_lineno);
1205 bp->class = NONTERM;
1206 if (nrules >= maxrules)
1207 expand_rules();
1208 plhs[nrules] = bp;
1209 rprec[nrules] = UNDEFINED;
1210 rassoc[nrules] = TOKEN;
1214 void
1215 end_rule(void)
1217 int i;
1219 if (!last_was_action && plhs[nrules]->tag) {
1220 for (i = nitems - 1; pitem[i]; --i)
1221 continue;
1222 if (i == maxitems - 1 || pitem[i + 1] == 0 ||
1223 pitem[i + 1]->tag != plhs[nrules]->tag)
1224 default_action_warning();
1226 last_was_action = 0;
1227 if (nitems >= maxitems)
1228 expand_items();
1229 pitem[nitems] = 0;
1230 ++nitems;
1231 ++nrules;
1235 void
1236 insert_empty_rule(void)
1238 bucket *bp, **bpp;
1240 assert(cache);
1241 snprintf(cache, cache_size, "$$%d", ++gensym);
1242 bp = make_bucket(cache);
1243 last_symbol->next = bp;
1244 last_symbol = bp;
1245 bp->tag = plhs[nrules]->tag;
1246 bp->class = NONTERM;
1248 if ((nitems += 2) > maxitems)
1249 expand_items();
1250 bpp = pitem + nitems - 1;
1251 *bpp-- = bp;
1252 while ((bpp[0] = bpp[-1]))
1253 --bpp;
1255 if (++nrules >= maxrules)
1256 expand_rules();
1257 plhs[nrules] = plhs[nrules - 1];
1258 plhs[nrules - 1] = bp;
1259 rprec[nrules] = rprec[nrules - 1];
1260 rprec[nrules - 1] = 0;
1261 rassoc[nrules] = rassoc[nrules - 1];
1262 rassoc[nrules - 1] = TOKEN;
1266 void
1267 add_symbol(void)
1269 int c;
1270 bucket *bp;
1271 int s_lineno = lineno;
1273 c = (unsigned char) *cptr;
1274 if (c == '\'' || c == '"')
1275 bp = get_literal();
1276 else
1277 bp = get_name();
1279 c = nextc();
1280 if (c == ':') {
1281 end_rule();
1282 start_rule(bp, s_lineno);
1283 ++cptr;
1284 return;
1286 if (last_was_action)
1287 insert_empty_rule();
1288 last_was_action = 0;
1290 if (++nitems > maxitems)
1291 expand_items();
1292 pitem[nitems - 1] = bp;
1296 void
1297 copy_action(void)
1299 int c, i, n, depth, quote;
1300 char *tag;
1301 FILE *f = action_file;
1302 int a_lineno = lineno;
1303 char *a_line = dup_line();
1304 char *a_cptr = a_line + (cptr - line);
1306 if (last_was_action)
1307 insert_empty_rule();
1308 last_was_action = 1;
1310 fprintf(f, "case %d:\n", nrules - 2);
1311 if (!lflag)
1312 fprintf(f, line_format, lineno, input_file_name);
1313 if (*cptr == '=')
1314 ++cptr;
1316 n = 0;
1317 for (i = nitems - 1; pitem[i]; --i)
1318 ++n;
1320 depth = 0;
1321 loop:
1322 c = (unsigned char) *cptr;
1323 if (c == '$') {
1324 if (cptr[1] == '<') {
1325 int d_lineno = lineno;
1326 char *d_line = dup_line();
1327 char *d_cptr = d_line + (cptr - line);
1329 ++cptr;
1330 tag = get_tag();
1331 c = (unsigned char) *cptr;
1332 if (c == '$') {
1333 fprintf(f, "yyval.%s", tag);
1334 ++cptr;
1335 free(d_line);
1336 goto loop;
1337 } else if (isdigit(c)) {
1338 i = get_number();
1339 if (i > n)
1340 dollar_warning(d_lineno, i);
1341 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1342 free(d_line);
1343 goto loop;
1344 } else if (c == '-' && isdigit((unsigned char) cptr[1])) {
1345 ++cptr;
1346 i = -get_number() - n;
1347 fprintf(f, "yyvsp[%d].%s", i, tag);
1348 free(d_line);
1349 goto loop;
1350 } else
1351 dollar_error(d_lineno, d_line, d_cptr);
1352 } else if (cptr[1] == '$') {
1353 if (ntags) {
1354 tag = plhs[nrules]->tag;
1355 if (tag == NULL)
1356 untyped_lhs();
1357 fprintf(f, "yyval.%s", tag);
1358 } else
1359 fprintf(f, "yyval");
1360 cptr += 2;
1361 goto loop;
1362 } else if (isdigit((unsigned char) cptr[1])) {
1363 ++cptr;
1364 i = get_number();
1365 if (ntags) {
1366 if (i <= 0 || i > n)
1367 unknown_rhs(i);
1368 tag = pitem[nitems + i - n - 1]->tag;
1369 if (tag == NULL)
1370 untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1371 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1372 } else {
1373 if (i > n)
1374 dollar_warning(lineno, i);
1375 fprintf(f, "yyvsp[%d]", i - n);
1377 goto loop;
1378 } else if (cptr[1] == '-') {
1379 cptr += 2;
1380 i = get_number();
1381 if (ntags)
1382 unknown_rhs(-i);
1383 fprintf(f, "yyvsp[%d]", -i - n);
1384 goto loop;
1387 if (isalpha(c) || c == '_' || c == '$') {
1388 do {
1389 putc(c, f);
1390 c = (unsigned char) *++cptr;
1391 } while (isalnum(c) || c == '_' || c == '$');
1392 goto loop;
1394 putc(c, f);
1395 ++cptr;
1396 switch (c) {
1397 case '\n':
1398 next_line:
1399 get_line();
1400 if (line)
1401 goto loop;
1402 unterminated_action(a_lineno, a_line, a_cptr);
1404 case ';':
1405 if (depth > 0)
1406 goto loop;
1407 fprintf(f, "\nbreak;\n");
1408 free(a_line);
1409 return;
1411 case '{':
1412 ++depth;
1413 goto loop;
1415 case '}':
1416 if (--depth > 0)
1417 goto loop;
1418 fprintf(f, "\nbreak;\n");
1419 free(a_line);
1420 return;
1422 case '\'':
1423 case '"': {
1424 int s_lineno = lineno;
1425 char *s_line = dup_line();
1426 char *s_cptr = s_line + (cptr - line - 1);
1428 quote = c;
1429 for (;;) {
1430 c = (unsigned char) *cptr++;
1431 putc(c, f);
1432 if (c == quote) {
1433 free(s_line);
1434 goto loop;
1436 if (c == '\n')
1437 unterminated_string(s_lineno, s_line, s_cptr);
1438 if (c == '\\') {
1439 c = (unsigned char) *cptr++;
1440 putc(c, f);
1441 if (c == '\n') {
1442 get_line();
1443 if (line == NULL)
1444 unterminated_string(s_lineno, s_line, s_cptr);
1450 case '/':
1451 c = (unsigned char) *cptr;
1452 if (c == '/') {
1453 putc('*', f);
1454 while ((c = (unsigned char) *++cptr) != '\n') {
1455 if (c == '*' && cptr[1] == '/')
1456 fprintf(f, "* ");
1457 else
1458 putc(c, f);
1460 fprintf(f, "*/\n");
1461 goto next_line;
1463 if (c == '*') {
1464 int c_lineno = lineno;
1465 char *c_line = dup_line();
1466 char *c_cptr = c_line + (cptr - line - 1);
1468 putc('*', f);
1469 ++cptr;
1470 for (;;) {
1471 c = (unsigned char) *cptr++;
1472 putc(c, f);
1473 if (c == '*' && *cptr == '/') {
1474 putc('/', f);
1475 ++cptr;
1476 free(c_line);
1477 goto loop;
1479 if (c == '\n') {
1480 get_line();
1481 if (line == NULL)
1482 unterminated_comment(c_lineno, c_line, c_cptr);
1486 goto loop;
1488 default:
1489 goto loop;
1495 mark_symbol(void)
1497 int c;
1498 bucket *bp = NULL;
1500 c = (unsigned char) cptr[1];
1501 if (c == '%' || c == '\\') {
1502 cptr += 2;
1503 return (1);
1505 if (c == '=')
1506 cptr += 2;
1507 else if ((c == 'p' || c == 'P') &&
1508 ((c = cptr[2]) == 'r' || c == 'R') &&
1509 ((c = cptr[3]) == 'e' || c == 'E') &&
1510 ((c = cptr[4]) == 'c' || c == 'C') &&
1511 ((c = (unsigned char) cptr[5], !IS_IDENT(c))))
1512 cptr += 5;
1513 else
1514 syntax_error(lineno, line, cptr);
1516 c = nextc();
1517 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1518 bp = get_name();
1519 else if (c == '\'' || c == '"')
1520 bp = get_literal();
1521 else {
1522 syntax_error(lineno, line, cptr);
1523 /* NOTREACHED */
1526 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1527 prec_redeclared();
1529 rprec[nrules] = bp->prec;
1530 rassoc[nrules] = bp->assoc;
1531 return (0);
1535 void
1536 read_grammar(void)
1538 int c;
1540 initialize_grammar();
1541 advance_to_start();
1543 for (;;) {
1544 c = nextc();
1545 if (c == EOF)
1546 break;
1547 if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1548 c == '"')
1549 add_symbol();
1550 else if (c == '{' || c == '=')
1551 copy_action();
1552 else if (c == '|') {
1553 end_rule();
1554 start_rule(plhs[nrules - 1], 0);
1555 ++cptr;
1556 } else if (c == '%') {
1557 if (mark_symbol())
1558 break;
1559 } else
1560 syntax_error(lineno, line, cptr);
1562 end_rule();
1566 void
1567 free_tags(void)
1569 int i;
1571 if (tag_table == NULL)
1572 return;
1574 for (i = 0; i < ntags; ++i) {
1575 assert(tag_table[i]);
1576 free(tag_table[i]);
1578 free(tag_table);
1582 void
1583 pack_names(void)
1585 bucket *bp;
1586 char *p, *s, *t;
1588 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1589 for (bp = first_symbol; bp; bp = bp->next)
1590 name_pool_size += strlen(bp->name) + 1;
1591 name_pool = malloc(name_pool_size);
1592 if (name_pool == NULL)
1593 no_space();
1595 strlcpy(name_pool, "$accept", name_pool_size);
1596 strlcpy(name_pool + 8, "$end", name_pool_size - 8);
1597 t = name_pool + 13;
1598 for (bp = first_symbol; bp; bp = bp->next) {
1599 p = t;
1600 s = bp->name;
1601 while ((*t++ = *s++))
1602 continue;
1603 free(bp->name);
1604 bp->name = p;
1609 void
1610 check_symbols(void)
1612 bucket *bp;
1614 if (goal->class == UNKNOWN)
1615 undefined_goal(goal->name);
1617 for (bp = first_symbol; bp; bp = bp->next) {
1618 if (bp->class == UNKNOWN) {
1619 undefined_symbol_warning(bp->name);
1620 bp->class = TERM;
1626 void
1627 pack_symbols(void)
1629 bucket *bp;
1630 bucket **v;
1631 int i, j, k, n;
1633 nsyms = 2;
1634 ntokens = 1;
1635 for (bp = first_symbol; bp; bp = bp->next) {
1636 ++nsyms;
1637 if (bp->class == TERM)
1638 ++ntokens;
1640 start_symbol = ntokens;
1641 nvars = nsyms - ntokens;
1643 symbol_name = reallocarray(NULL, nsyms, sizeof(char *));
1644 if (symbol_name == NULL)
1645 no_space();
1646 symbol_value = reallocarray(NULL, nsyms, sizeof(short));
1647 if (symbol_value == NULL)
1648 no_space();
1649 symbol_prec = reallocarray(NULL, nsyms, sizeof(short));
1650 if (symbol_prec == NULL)
1651 no_space();
1652 symbol_assoc = malloc(nsyms);
1653 if (symbol_assoc == NULL)
1654 no_space();
1656 v = reallocarray(NULL, nsyms, sizeof(bucket *));
1657 if (v == NULL)
1658 no_space();
1660 v[0] = 0;
1661 v[start_symbol] = 0;
1663 i = 1;
1664 j = start_symbol + 1;
1665 for (bp = first_symbol; bp; bp = bp->next) {
1666 if (bp->class == TERM)
1667 v[i++] = bp;
1668 else
1669 v[j++] = bp;
1671 assert(i == ntokens && j == nsyms);
1673 for (i = 1; i < ntokens; ++i)
1674 v[i]->index = i;
1676 goal->index = start_symbol + 1;
1677 k = start_symbol + 2;
1678 while (++i < nsyms)
1679 if (v[i] != goal) {
1680 v[i]->index = k;
1681 ++k;
1683 goal->value = 0;
1684 k = 1;
1685 for (i = start_symbol + 1; i < nsyms; ++i) {
1686 if (v[i] != goal) {
1687 v[i]->value = k;
1688 ++k;
1692 k = 0;
1693 for (i = 1; i < ntokens; ++i) {
1694 n = v[i]->value;
1695 if (n > 256) {
1696 for (j = k++; j > 0 && symbol_value[j - 1] > n; --j)
1697 symbol_value[j] = symbol_value[j - 1];
1698 symbol_value[j] = n;
1702 if (v[1]->value == UNDEFINED)
1703 v[1]->value = 256;
1705 j = 0;
1706 n = 257;
1707 for (i = 2; i < ntokens; ++i) {
1708 if (v[i]->value == UNDEFINED) {
1709 while (j < k && n == symbol_value[j]) {
1710 while (++j < k && n == symbol_value[j])
1711 continue;
1712 ++n;
1714 v[i]->value = n;
1715 ++n;
1719 symbol_name[0] = name_pool + 8;
1720 symbol_value[0] = 0;
1721 symbol_prec[0] = 0;
1722 symbol_assoc[0] = TOKEN;
1723 for (i = 1; i < ntokens; ++i) {
1724 symbol_name[i] = v[i]->name;
1725 symbol_value[i] = v[i]->value;
1726 symbol_prec[i] = v[i]->prec;
1727 symbol_assoc[i] = v[i]->assoc;
1729 symbol_name[start_symbol] = name_pool;
1730 symbol_value[start_symbol] = -1;
1731 symbol_prec[start_symbol] = 0;
1732 symbol_assoc[start_symbol] = TOKEN;
1733 for (++i; i < nsyms; ++i) {
1734 k = v[i]->index;
1735 symbol_name[k] = v[i]->name;
1736 symbol_value[k] = v[i]->value;
1737 symbol_prec[k] = v[i]->prec;
1738 symbol_assoc[k] = v[i]->assoc;
1741 free(v);
1745 void
1746 pack_grammar(void)
1748 int i, j;
1749 int assoc, pprec;
1751 ritem = reallocarray(NULL, nitems, sizeof(short));
1752 if (ritem == NULL)
1753 no_space();
1754 rlhs = reallocarray(NULL, nrules, sizeof(short));
1755 if (rlhs == NULL)
1756 no_space();
1757 rrhs = reallocarray(NULL, nrules + 1, sizeof(short));
1758 if (rrhs == NULL)
1759 no_space();
1760 rprec = reallocarray(rprec, nrules, sizeof(short));
1761 if (rprec == NULL)
1762 no_space();
1763 rassoc = realloc(rassoc, nrules);
1764 if (rassoc == NULL)
1765 no_space();
1767 ritem[0] = -1;
1768 ritem[1] = goal->index;
1769 ritem[2] = 0;
1770 ritem[3] = -2;
1771 rlhs[0] = 0;
1772 rlhs[1] = 0;
1773 rlhs[2] = start_symbol;
1774 rrhs[0] = 0;
1775 rrhs[1] = 0;
1776 rrhs[2] = 1;
1778 j = 4;
1779 for (i = 3; i < nrules; ++i) {
1780 rlhs[i] = plhs[i]->index;
1781 rrhs[i] = j;
1782 assoc = TOKEN;
1783 pprec = 0;
1784 while (pitem[j]) {
1785 ritem[j] = pitem[j]->index;
1786 if (pitem[j]->class == TERM) {
1787 pprec = pitem[j]->prec;
1788 assoc = pitem[j]->assoc;
1790 ++j;
1792 ritem[j] = -i;
1793 ++j;
1794 if (rprec[i] == UNDEFINED) {
1795 rprec[i] = pprec;
1796 rassoc[i] = assoc;
1799 rrhs[i] = j;
1801 free(plhs);
1802 free(pitem);
1806 void
1807 print_grammar(void)
1809 int i, j, k;
1810 int spacing = 0;
1811 FILE *f = verbose_file;
1813 if (!vflag)
1814 return;
1816 k = 1;
1817 for (i = 2; i < nrules; ++i) {
1818 if (rlhs[i] != rlhs[i - 1]) {
1819 if (i != 2)
1820 fprintf(f, "\n");
1821 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
1822 spacing = strlen(symbol_name[rlhs[i]]) + 1;
1823 } else {
1824 fprintf(f, "%4d ", i - 2);
1825 j = spacing;
1826 while (--j >= 0)
1827 putc(' ', f);
1828 putc('|', f);
1831 while (ritem[k] >= 0) {
1832 fprintf(f, " %s", symbol_name[ritem[k]]);
1833 ++k;
1835 ++k;
1836 putc('\n', f);
1841 void
1842 reader(void)
1844 write_section(banner);
1845 create_symbol_table();
1846 read_declarations();
1847 read_grammar();
1848 free_symbol_table();
1849 free_tags();
1850 pack_names();
1851 check_symbols();
1852 pack_symbols();
1853 pack_grammar();
1854 free_symbols();
1855 print_grammar();