printf(1): don't use getopt
[dragonfly.git] / usr.bin / yacc / reader.c
blobd077b3cbaa070f51697aa48d42f2d990ad37ac78
1 /*
2 * Copyright (c) 1989 The Regents of the University of California.
3 * All rights reserved.
5 * This code is derived from software contributed to Berkeley by
6 * Robert Paul Corbett.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
36 * $FreeBSD: src/usr.bin/yacc/reader.c,v 1.8.2.1 2001/10/05 03:00:44 obrien Exp $
37 * $DragonFly: src/usr.bin/yacc/reader.c,v 1.5 2005/01/05 15:26:05 joerg Exp $
39 * @(#)reader.c 5.7 (Berkeley) 1/20/91
42 #include <stdlib.h>
43 #include <string.h>
44 #include "defs.h"
46 /* The line size must be a positive integer. One hundred was chosen */
47 /* because few lines in Yacc input grammars exceed 100 characters. */
48 /* Note that if a line exceeds LINESIZE characters, the line buffer */
49 /* will be expanded to accomodate it. */
51 #define LINESIZE 100
53 char *cache;
54 int cinc, cache_size;
56 int ntags, tagmax;
57 char **tag_table;
59 char saw_eof, unionized;
60 const char *cptr;
61 char *line;
62 int linesize;
64 bucket *goal;
65 int prec;
66 int gensym;
67 char last_was_action;
69 int maxitems;
70 bucket **pitem;
72 int maxrules;
73 bucket **plhs;
75 int name_pool_size;
76 char *name_pool;
78 static const char line_format[] = "#line %d \"%s\"\n";
80 static void add_symbol(void);
81 static void advance_to_start(void);
82 static void cachec(int);
83 static void check_symbols(void);
84 static void copy_action(void);
85 static void copy_ident(void);
86 static void copy_text(void);
87 static void copy_union(void);
88 static void declare_start(void);
89 static void declare_tokens(int);
90 static void declare_types(void);
91 static char *dup_line(void);
92 static void end_rule(void);
93 static void expand_items(void);
94 static void expand_rules(void);
95 static void free_tags(void);
96 static void get_line(void);
97 static bucket *get_literal(void);
98 static bucket *get_name(void);
99 static int get_number(void);
100 static char *get_tag(void);
101 static int hexval(int);
102 static void initialize_grammar(void);
103 static void insert_empty_rule(void);
104 static int is_reserved(char *);
105 static int keyword(void);
106 static int mark_symbol(void);
107 static int nextc(void);
108 static void pack_grammar(void);
109 static void pack_names(void);
110 static void pack_symbols(void);
111 static void print_grammar(void);
112 static void read_declarations(void);
113 static void read_grammar(void);
114 static void skip_comment(void);
115 static void start_rule(bucket *, int);
117 static void
118 cachec(int c)
120 assert(cinc >= 0);
121 if (cinc >= cache_size)
123 cache_size += 256;
124 cache = REALLOC(cache, cache_size);
125 if (cache == 0) no_space();
127 cache[cinc] = c;
128 ++cinc;
132 static void
133 get_line(void)
135 FILE *f = input_file;
136 int c;
137 int i;
139 if (saw_eof || (c = getc(f)) == EOF)
141 if (line) { FREE(line); line = NULL; }
142 cptr = NULL;
143 saw_eof = 1;
144 return;
147 if (line == 0 || linesize != (LINESIZE + 1))
149 if (line) FREE(line);
150 linesize = LINESIZE + 1;
151 line = MALLOC(linesize);
152 if (line == 0) no_space();
155 i = 0;
156 ++lineno;
157 for (;;)
159 line[i] = c;
160 if (c == '\n') { cptr = line; return; }
161 if (++i >= linesize)
163 linesize += LINESIZE;
164 line = REALLOC(line, linesize);
165 if (line == 0) no_space();
167 c = getc(f);
168 if (c == EOF)
170 line[i] = '\n';
171 saw_eof = 1;
172 cptr = line;
173 return;
179 static char *
180 dup_line(void)
182 char *p, *s, *t;
184 if (line == 0) return (0);
185 s = line;
186 while (*s != '\n') ++s;
187 p = MALLOC(s - line + 1);
188 if (p == 0) no_space();
190 s = line;
191 t = p;
192 while ((*t++ = *s++) != '\n') continue;
193 return (p);
197 static void
198 skip_comment(void)
200 const char *s;
202 int st_lineno = lineno;
203 char *st_line = dup_line();
204 char *st_cptr = st_line + (cptr - line);
206 s = cptr + 2;
207 for (;;)
209 if (*s == '*' && s[1] == '/')
211 cptr = s + 2;
212 FREE(st_line);
213 return;
215 if (*s == '\n')
217 get_line();
218 if (line == NULL)
219 unterminated_comment(st_lineno, st_line, st_cptr);
220 s = cptr;
222 else
223 ++s;
228 static int
229 nextc(void)
231 const char *s;
233 if (line == 0)
235 get_line();
236 if (line == 0)
237 return (EOF);
240 s = cptr;
241 for (;;)
243 switch (*s)
245 case '\n':
246 get_line();
247 if (line == 0) return (EOF);
248 s = cptr;
249 break;
251 case ' ':
252 case '\t':
253 case '\f':
254 case '\r':
255 case '\v':
256 case ',':
257 case ';':
258 ++s;
259 break;
261 case '\\':
262 cptr = s;
263 return ('%');
265 case '/':
266 if (s[1] == '*')
268 cptr = s;
269 skip_comment();
270 s = cptr;
271 break;
273 else if (s[1] == '/')
275 get_line();
276 if (line == 0) return (EOF);
277 s = cptr;
278 break;
280 /* fall through */
282 default:
283 cptr = s;
284 return (*s);
290 static int
291 keyword(void)
293 int c;
294 const char *t_cptr = cptr;
296 c = *++cptr;
297 if (isalpha(c))
299 cinc = 0;
300 for (;;)
302 if (isalpha(c))
304 if (isupper(c)) c = tolower(c);
305 cachec(c);
307 else if (isdigit(c) || c == '_' || c == '.' || c == '$')
308 cachec(c);
309 else
310 break;
311 c = *++cptr;
313 cachec(NUL);
315 if (strcmp(cache, "token") == 0 || strcmp(cache, "term") == 0)
316 return (TOKEN);
317 if (strcmp(cache, "type") == 0)
318 return (TYPE);
319 if (strcmp(cache, "left") == 0)
320 return (LEFT);
321 if (strcmp(cache, "right") == 0)
322 return (RIGHT);
323 if (strcmp(cache, "nonassoc") == 0 || strcmp(cache, "binary") == 0)
324 return (NONASSOC);
325 if (strcmp(cache, "start") == 0)
326 return (START);
327 if (strcmp(cache, "union") == 0)
328 return (UNION);
329 if (strcmp(cache, "ident") == 0)
330 return (IDENT);
331 if (strcmp(cache, "expect") == 0)
332 return (EXPECT);
334 else
336 ++cptr;
337 if (c == '{')
338 return (TEXT);
339 if (c == '%' || c == '\\')
340 return (MARK);
341 if (c == '<')
342 return (LEFT);
343 if (c == '>')
344 return (RIGHT);
345 if (c == '0')
346 return (TOKEN);
347 if (c == '2')
348 return (NONASSOC);
350 syntax_error(lineno, line, t_cptr);
351 /*NOTREACHED*/
352 return (0);
356 static void
357 copy_ident(void)
359 int c;
360 FILE *f = output_file;
362 c = nextc();
363 if (c == EOF) unexpected_EOF();
364 if (c != '"') syntax_error(lineno, line, cptr);
365 ++outline;
366 fprintf(f, "#ident \"");
367 for (;;)
369 c = *++cptr;
370 if (c == '\n')
372 fprintf(f, "\"\n");
373 return;
375 putc(c, f);
376 if (c == '"')
378 putc('\n', f);
379 ++cptr;
380 return;
386 static void
387 copy_text(void)
389 int c;
390 int quote;
391 FILE *f = text_file;
392 int need_newline = 0;
393 int t_lineno = lineno;
394 char *t_line = dup_line();
395 const char *t_cptr = t_line + (cptr - line - 2);
397 if (*cptr == '\n')
399 get_line();
400 if (line == 0)
401 unterminated_text(t_lineno, t_line, t_cptr);
403 if (!lflag) fprintf(f, line_format, lineno, input_file_name);
405 loop:
406 c = *cptr++;
407 switch (c)
409 case '\n':
410 next_line:
411 putc('\n', f);
412 need_newline = 0;
413 get_line();
414 if (line) goto loop;
415 unterminated_text(t_lineno, t_line, t_cptr);
417 case '\'':
418 case '"':
420 int s_lineno = lineno;
421 char *s_line = dup_line();
422 const char *s_cptr = s_line + (cptr - line - 1);
424 quote = c;
425 putc(c, f);
426 for (;;)
428 c = *cptr++;
429 putc(c, f);
430 if (c == quote)
432 need_newline = 1;
433 FREE(s_line);
434 goto loop;
436 if (c == '\n')
437 unterminated_string(s_lineno, s_line, s_cptr);
438 if (c == '\\')
440 c = *cptr++;
441 putc(c, f);
442 if (c == '\n')
444 get_line();
445 if (line == 0)
446 unterminated_string(s_lineno, s_line, s_cptr);
452 case '/':
453 putc(c, f);
454 need_newline = 1;
455 c = *cptr;
456 if (c == '/')
458 putc('*', f);
459 while ((c = *++cptr) != '\n')
461 if (c == '*' && cptr[1] == '/')
462 fprintf(f, "* ");
463 else
464 putc(c, f);
466 fprintf(f, "*/");
467 goto next_line;
469 if (c == '*')
471 int c_lineno = lineno;
472 char *c_line = dup_line();
473 const char *c_cptr = c_line + (cptr - line - 1);
475 putc('*', f);
476 ++cptr;
477 for (;;)
479 c = *cptr++;
480 putc(c, f);
481 if (c == '*' && *cptr == '/')
483 putc('/', f);
484 ++cptr;
485 FREE(c_line);
486 goto loop;
488 if (c == '\n')
490 get_line();
491 if (line == 0)
492 unterminated_comment(c_lineno, c_line, c_cptr);
496 need_newline = 1;
497 goto loop;
499 case '%':
500 case '\\':
501 if (*cptr == '}')
503 if (need_newline) putc('\n', f);
504 ++cptr;
505 FREE(t_line);
506 return;
508 /* fall through */
510 default:
511 putc(c, f);
512 need_newline = 1;
513 goto loop;
518 static void
519 copy_union(void)
521 int c;
522 int quote;
523 int depth;
524 int u_lineno = lineno;
525 char *u_line = dup_line();
526 const char *u_cptr = u_line + (cptr - line - 6);
528 if (unionized) over_unionized(cptr - 6);
529 unionized = 1;
531 if (!lflag)
532 fprintf(text_file, line_format, lineno, input_file_name);
534 fprintf(text_file, "typedef union");
535 if (dflag) fprintf(union_file, "typedef union");
537 depth = 0;
538 loop:
539 c = *cptr++;
540 putc(c, text_file);
541 if (dflag) putc(c, union_file);
542 switch (c)
544 case '\n':
545 next_line:
546 get_line();
547 if (line == 0) unterminated_union(u_lineno, u_line, u_cptr);
548 goto loop;
550 case '{':
551 ++depth;
552 goto loop;
554 case '}':
555 if (--depth == 0)
557 fprintf(text_file, " YYSTYPE;\n");
558 FREE(u_line);
559 return;
561 goto loop;
563 case '\'':
564 case '"':
566 int s_lineno = lineno;
567 char *s_line = dup_line();
568 const char *s_cptr = s_line + (cptr - line - 1);
570 quote = c;
571 for (;;)
573 c = *cptr++;
574 putc(c, text_file);
575 if (dflag) putc(c, union_file);
576 if (c == quote)
578 FREE(s_line);
579 goto loop;
581 if (c == '\n')
582 unterminated_string(s_lineno, s_line, s_cptr);
583 if (c == '\\')
585 c = *cptr++;
586 putc(c, text_file);
587 if (dflag) putc(c, union_file);
588 if (c == '\n')
590 get_line();
591 if (line == 0)
592 unterminated_string(s_lineno, s_line, s_cptr);
598 case '/':
599 c = *cptr;
600 if (c == '/')
602 putc('*', text_file);
603 if (dflag) putc('*', union_file);
604 while ((c = *++cptr) != '\n')
606 if (c == '*' && cptr[1] == '/')
608 fprintf(text_file, "* ");
609 if (dflag) fprintf(union_file, "* ");
611 else
613 putc(c, text_file);
614 if (dflag) putc(c, union_file);
617 fprintf(text_file, "*/\n");
618 if (dflag) fprintf(union_file, "*/\n");
619 goto next_line;
621 if (c == '*')
623 int c_lineno = lineno;
624 char *c_line = dup_line();
625 const char *c_cptr = c_line + (cptr - line - 1);
627 putc('*', text_file);
628 if (dflag) putc('*', union_file);
629 ++cptr;
630 for (;;)
632 c = *cptr++;
633 putc(c, text_file);
634 if (dflag) putc(c, union_file);
635 if (c == '*' && *cptr == '/')
637 putc('/', text_file);
638 if (dflag) putc('/', union_file);
639 ++cptr;
640 FREE(c_line);
641 goto loop;
643 if (c == '\n')
645 get_line();
646 if (line == 0)
647 unterminated_comment(c_lineno, c_line, c_cptr);
651 goto loop;
653 default:
654 goto loop;
659 static int
660 hexval(int c)
662 if (c >= '0' && c <= '9')
663 return (c - '0');
664 if (c >= 'A' && c <= 'F')
665 return (c - 'A' + 10);
666 if (c >= 'a' && c <= 'f')
667 return (c - 'a' + 10);
668 return (-1);
672 static bucket *
673 get_literal(void)
675 int c, quote;
676 int i;
677 int n;
678 char *s;
679 bucket *bp;
680 int s_lineno = lineno;
681 char *s_line = dup_line();
682 const char *s_cptr = s_line + (cptr - line);
684 quote = *cptr++;
685 cinc = 0;
686 for (;;)
688 c = *cptr++;
689 if (c == quote) break;
690 if (c == '\n') unterminated_string(s_lineno, s_line, s_cptr);
691 if (c == '\\')
693 const char *c_cptr = cptr - 1;
695 c = *cptr++;
696 switch (c)
698 case '\n':
699 get_line();
700 if (line == 0) unterminated_string(s_lineno, s_line, s_cptr);
701 continue;
703 case '0': case '1': case '2': case '3':
704 case '4': case '5': case '6': case '7':
705 n = c - '0';
706 c = *cptr;
707 if (IS_OCTAL(c))
709 n = (n << 3) + (c - '0');
710 c = *++cptr;
711 if (IS_OCTAL(c))
713 n = (n << 3) + (c - '0');
714 ++cptr;
717 if (n > MAXCHAR) illegal_character(c_cptr);
718 c = n;
719 break;
721 case 'x':
722 c = *cptr++;
723 n = hexval(c);
724 if (n < 0 || n >= 16)
725 illegal_character(c_cptr);
726 for (;;)
728 c = *cptr;
729 i = hexval(c);
730 if (i < 0 || i >= 16) break;
731 ++cptr;
732 n = (n << 4) + i;
733 if (n > MAXCHAR) illegal_character(c_cptr);
735 c = n;
736 break;
738 case 'a': c = 7; break;
739 case 'b': c = '\b'; break;
740 case 'f': c = '\f'; break;
741 case 'n': c = '\n'; break;
742 case 'r': c = '\r'; break;
743 case 't': c = '\t'; break;
744 case 'v': c = '\v'; break;
747 cachec(c);
749 FREE(s_line);
751 n = cinc;
752 s = MALLOC(n);
753 if (s == 0) no_space();
755 for (i = 0; i < n; ++i)
756 s[i] = cache[i];
758 cinc = 0;
759 if (n == 1)
760 cachec('\'');
761 else
762 cachec('"');
764 for (i = 0; i < n; ++i)
766 c = ((unsigned char *)s)[i];
767 if (c == '\\' || c == cache[0])
769 cachec('\\');
770 cachec(c);
772 else if (isprint(c))
773 cachec(c);
774 else
776 cachec('\\');
777 switch (c)
779 case 7: cachec('a'); break;
780 case '\b': cachec('b'); break;
781 case '\f': cachec('f'); break;
782 case '\n': cachec('n'); break;
783 case '\r': cachec('r'); break;
784 case '\t': cachec('t'); break;
785 case '\v': cachec('v'); break;
786 default:
787 cachec(((c >> 6) & 7) + '0');
788 cachec(((c >> 3) & 7) + '0');
789 cachec((c & 7) + '0');
790 break;
795 if (n == 1)
796 cachec('\'');
797 else
798 cachec('"');
800 cachec(NUL);
801 bp = lookup(cache);
802 bp->class = TERM;
803 if (n == 1 && bp->value == UNDEFINED)
804 bp->value = *(unsigned char *)s;
805 FREE(s);
807 return (bp);
811 static int
812 is_reserved(char *name)
814 char *s;
816 if (strcmp(name, ".") == 0 ||
817 strcmp(name, "$accept") == 0 ||
818 strcmp(name, "$end") == 0)
819 return (1);
821 if (name[0] == '$' && name[1] == '$' && isdigit(name[2]))
823 s = name + 3;
824 while (isdigit(*s)) ++s;
825 if (*s == NUL) return (1);
828 return (0);
832 static bucket *
833 get_name(void)
835 int c;
837 cinc = 0;
838 for (c = *cptr; IS_IDENT(c); c = *++cptr)
839 cachec(c);
840 cachec(NUL);
842 if (is_reserved(cache)) used_reserved(cache);
844 return (lookup(cache));
848 static int
849 get_number(void)
851 int c;
852 int n;
854 n = 0;
855 for (c = *cptr; isdigit(c); c = *++cptr)
856 n = 10*n + (c - '0');
858 return (n);
862 static char *
863 get_tag(void)
865 int c;
866 int i;
867 char *s;
868 int t_lineno = lineno;
869 char *t_line = dup_line();
870 const char *t_cptr = t_line + (cptr - line);
872 ++cptr;
873 c = nextc();
874 if (c == EOF) unexpected_EOF();
875 if (!isalpha(c) && c != '_' && c != '$')
876 illegal_tag(t_lineno, t_line, t_cptr);
878 cinc = 0;
879 do { cachec(c); c = *++cptr; } while (IS_IDENT(c));
880 cachec(NUL);
882 c = nextc();
883 if (c == EOF) unexpected_EOF();
884 if (c != '>')
885 illegal_tag(t_lineno, t_line, t_cptr);
886 ++cptr;
888 for (i = 0; i < ntags; ++i)
890 if (strcmp(cache, tag_table[i]) == 0)
891 return (tag_table[i]);
894 if (ntags >= tagmax)
896 tagmax += 16;
897 tag_table = (char **)
898 (tag_table ? REALLOC(tag_table, tagmax*sizeof(char *))
899 : MALLOC(tagmax*sizeof(char *)));
900 if (tag_table == 0) no_space();
903 s = MALLOC(cinc);
904 if (s == 0) no_space();
905 strcpy(s, cache);
906 tag_table[ntags] = s;
907 ++ntags;
908 FREE(t_line);
909 return (s);
913 static void
914 declare_tokens(int assoc)
916 int c;
917 bucket *bp;
918 int value;
919 char *tag = 0;
921 if (assoc != TOKEN) ++prec;
923 c = nextc();
924 if (c == EOF) unexpected_EOF();
925 if (c == '<')
927 tag = get_tag();
928 c = nextc();
929 if (c == EOF) unexpected_EOF();
932 for (;;)
934 if (isalpha(c) || c == '_' || c == '.' || c == '$')
935 bp = get_name();
936 else if (c == '\'' || c == '"')
937 bp = get_literal();
938 else
939 return;
941 if (bp == goal) tokenized_start(bp->name);
942 bp->class = TERM;
944 if (tag)
946 if (bp->tag && tag != bp->tag)
947 retyped_warning(bp->name);
948 bp->tag = tag;
951 if (assoc != TOKEN)
953 if (bp->prec && prec != bp->prec)
954 reprec_warning(bp->name);
955 bp->assoc = assoc;
956 bp->prec = prec;
959 c = nextc();
960 if (c == EOF) unexpected_EOF();
961 value = UNDEFINED;
962 if (isdigit(c))
964 value = get_number();
965 if (bp->value != UNDEFINED && value != bp->value)
966 revalued_warning(bp->name);
967 bp->value = value;
968 c = nextc();
969 if (c == EOF) unexpected_EOF();
976 * %expect requires special handling
977 * as it really isn't part of the yacc
978 * grammar only a flag for yacc proper.
980 static void
981 declare_expect(int assoc)
983 int c;
985 if (assoc != EXPECT) ++prec;
988 * Stay away from nextc - doesn't
989 * detect EOL and will read to EOF.
991 c = *++cptr;
992 if (c == EOF) unexpected_EOF();
994 for(;;)
996 if (isdigit(c))
998 SRexpect = get_number();
999 break;
1002 * Looking for number before EOL.
1003 * Spaces, tabs, and numbers are ok,
1004 * words, punc., etc. are syntax errors.
1006 else if (c == '\n' || isalpha(c) || !isspace(c))
1008 syntax_error(lineno, line, cptr);
1010 else
1012 c = *++cptr;
1013 if (c == EOF) unexpected_EOF();
1019 static void
1020 declare_types(void)
1022 int c;
1023 bucket *bp;
1024 char *tag;
1026 c = nextc();
1027 if (c == EOF) unexpected_EOF();
1028 if (c != '<') syntax_error(lineno, line, cptr);
1029 tag = get_tag();
1031 for (;;)
1033 c = nextc();
1034 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1035 bp = get_name();
1036 else if (c == '\'' || c == '"')
1037 bp = get_literal();
1038 else
1039 return;
1041 if (bp->tag && tag != bp->tag)
1042 retyped_warning(bp->name);
1043 bp->tag = tag;
1048 static void
1049 declare_start(void)
1051 int c;
1052 bucket *bp;
1054 c = nextc();
1055 if (c == EOF) unexpected_EOF();
1056 if (!isalpha(c) && c != '_' && c != '.' && c != '$')
1057 syntax_error(lineno, line, cptr);
1058 bp = get_name();
1059 if (bp->class == TERM)
1060 terminal_start(bp->name);
1061 if (goal && goal != bp)
1062 restarted_warning();
1063 goal = bp;
1067 static void
1068 read_declarations(void)
1070 int c, k;
1072 cache_size = 256;
1073 cache = MALLOC(cache_size);
1074 if (cache == 0) no_space();
1076 for (;;)
1078 c = nextc();
1079 if (c == EOF) unexpected_EOF();
1080 if (c != '%') syntax_error(lineno, line, cptr);
1081 switch (k = keyword())
1083 case MARK:
1084 return;
1086 case IDENT:
1087 copy_ident();
1088 break;
1090 case TEXT:
1091 copy_text();
1092 break;
1094 case UNION:
1095 copy_union();
1096 break;
1098 case TOKEN:
1099 case LEFT:
1100 case RIGHT:
1101 case NONASSOC:
1102 declare_tokens(k);
1103 break;
1105 case EXPECT:
1106 declare_expect(k);
1107 break;
1109 case TYPE:
1110 declare_types();
1111 break;
1113 case START:
1114 declare_start();
1115 break;
1121 static void
1122 initialize_grammar(void)
1124 nitems = 4;
1125 maxitems = 300;
1126 pitem = (bucket **) MALLOC(maxitems*sizeof(bucket *));
1127 if (pitem == 0) no_space();
1128 pitem[0] = 0;
1129 pitem[1] = 0;
1130 pitem[2] = 0;
1131 pitem[3] = 0;
1133 nrules = 3;
1134 maxrules = 100;
1135 plhs = (bucket **) MALLOC(maxrules*sizeof(bucket *));
1136 if (plhs == 0) no_space();
1137 plhs[0] = 0;
1138 plhs[1] = 0;
1139 plhs[2] = 0;
1140 rprec = (short *) MALLOC(maxrules*sizeof(short));
1141 if (rprec == 0) no_space();
1142 rprec[0] = 0;
1143 rprec[1] = 0;
1144 rprec[2] = 0;
1145 rassoc = (char *) MALLOC(maxrules*sizeof(char));
1146 if (rassoc == 0) no_space();
1147 rassoc[0] = TOKEN;
1148 rassoc[1] = TOKEN;
1149 rassoc[2] = TOKEN;
1153 static void
1154 expand_items(void)
1156 maxitems += 300;
1157 pitem = (bucket **) REALLOC(pitem, maxitems*sizeof(bucket *));
1158 if (pitem == 0) no_space();
1162 static void
1163 expand_rules(void)
1165 maxrules += 100;
1166 plhs = (bucket **) REALLOC(plhs, maxrules*sizeof(bucket *));
1167 if (plhs == 0) no_space();
1168 rprec = (short *) REALLOC(rprec, maxrules*sizeof(short));
1169 if (rprec == 0) no_space();
1170 rassoc = (char *) REALLOC(rassoc, maxrules*sizeof(char));
1171 if (rassoc == 0) no_space();
1175 static void
1176 advance_to_start(void)
1178 int c;
1179 bucket *bp;
1180 const char *s_cptr;
1181 int s_lineno;
1183 for (;;)
1185 c = nextc();
1186 if (c != '%') break;
1187 s_cptr = cptr;
1188 switch (keyword())
1190 case MARK:
1191 no_grammar();
1193 case TEXT:
1194 copy_text();
1195 break;
1197 case START:
1198 declare_start();
1199 break;
1201 default:
1202 syntax_error(lineno, line, s_cptr);
1206 c = nextc();
1207 if (!isalpha(c) && c != '_' && c != '.' && c != '_')
1208 syntax_error(lineno, line, cptr);
1209 bp = get_name();
1210 if (goal == 0)
1212 if (bp->class == TERM)
1213 terminal_start(bp->name);
1214 goal = bp;
1217 s_lineno = lineno;
1218 c = nextc();
1219 if (c == EOF) unexpected_EOF();
1220 if (c != ':') syntax_error(lineno, line, cptr);
1221 start_rule(bp, s_lineno);
1222 ++cptr;
1226 static void
1227 start_rule(bucket *bp, int s_lineno)
1229 if (bp->class == TERM)
1230 terminal_lhs(s_lineno);
1231 bp->class = NONTERM;
1232 if (nrules >= maxrules)
1233 expand_rules();
1234 plhs[nrules] = bp;
1235 rprec[nrules] = UNDEFINED;
1236 rassoc[nrules] = TOKEN;
1240 static void
1241 end_rule(void)
1243 int i;
1245 if (!last_was_action && plhs[nrules]->tag)
1247 for (i = nitems - 1; pitem[i]; --i) continue;
1248 if (pitem[i+1] == 0 || pitem[i+1]->tag != plhs[nrules]->tag)
1249 default_action_warning();
1252 last_was_action = 0;
1253 if (nitems >= maxitems) expand_items();
1254 pitem[nitems] = 0;
1255 ++nitems;
1256 ++nrules;
1260 static void
1261 insert_empty_rule(void)
1263 bucket *bp, **bpp;
1265 assert(cache);
1266 sprintf(cache, "$$%d", ++gensym);
1267 bp = make_bucket(cache);
1268 last_symbol->next = bp;
1269 last_symbol = bp;
1270 bp->tag = plhs[nrules]->tag;
1271 bp->class = NONTERM;
1273 if ((nitems += 2) > maxitems)
1274 expand_items();
1275 bpp = pitem + nitems - 1;
1276 *bpp-- = bp;
1277 while ((bpp[0] = bpp[-1])) --bpp;
1279 if (++nrules >= maxrules)
1280 expand_rules();
1281 plhs[nrules] = plhs[nrules-1];
1282 plhs[nrules-1] = bp;
1283 rprec[nrules] = rprec[nrules-1];
1284 rprec[nrules-1] = 0;
1285 rassoc[nrules] = rassoc[nrules-1];
1286 rassoc[nrules-1] = TOKEN;
1290 static void
1291 add_symbol(void)
1293 int c;
1294 bucket *bp;
1295 int s_lineno = lineno;
1297 c = *cptr;
1298 if (c == '\'' || c == '"')
1299 bp = get_literal();
1300 else
1301 bp = get_name();
1303 c = nextc();
1304 if (c == ':')
1306 end_rule();
1307 start_rule(bp, s_lineno);
1308 ++cptr;
1309 return;
1312 if (last_was_action)
1313 insert_empty_rule();
1314 last_was_action = 0;
1316 if (++nitems > maxitems)
1317 expand_items();
1318 pitem[nitems-1] = bp;
1322 static void
1323 copy_action(void)
1325 int c;
1326 int i, n;
1327 int depth;
1328 int quote;
1329 char *tag;
1330 FILE *f = action_file;
1331 int a_lineno = lineno;
1332 char *a_line = dup_line();
1333 char *a_cptr = a_line + (cptr - line);
1335 if (last_was_action)
1336 insert_empty_rule();
1337 last_was_action = 1;
1339 fprintf(f, "case %d:\n", nrules - 2);
1340 if (!lflag)
1341 fprintf(f, line_format, lineno, input_file_name);
1342 if (*cptr == '=') ++cptr;
1344 n = 0;
1345 for (i = nitems - 1; pitem[i]; --i) ++n;
1347 depth = 0;
1348 loop:
1349 c = *cptr;
1350 if (c == '$')
1352 if (cptr[1] == '<')
1354 int d_lineno = lineno;
1355 char *d_line = dup_line();
1356 char *d_cptr = d_line + (cptr - line);
1358 ++cptr;
1359 tag = get_tag();
1360 c = *cptr;
1361 if (c == '$')
1363 fprintf(f, "yyval.%s", tag);
1364 ++cptr;
1365 FREE(d_line);
1366 goto loop;
1368 else if (isdigit(c))
1370 i = get_number();
1371 if (i > n) dollar_warning(d_lineno, i);
1372 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1373 FREE(d_line);
1374 goto loop;
1376 else if (c == '-' && isdigit(cptr[1]))
1378 ++cptr;
1379 i = -get_number() - n;
1380 fprintf(f, "yyvsp[%d].%s", i, tag);
1381 FREE(d_line);
1382 goto loop;
1384 else
1385 dollar_error(d_lineno, d_line, d_cptr);
1387 else if (cptr[1] == '$')
1389 if (ntags)
1391 tag = plhs[nrules]->tag;
1392 if (tag == 0) untyped_lhs();
1393 fprintf(f, "yyval.%s", tag);
1395 else
1396 fprintf(f, "yyval");
1397 cptr += 2;
1398 goto loop;
1400 else if (isdigit(cptr[1]))
1402 ++cptr;
1403 i = get_number();
1404 if (ntags)
1406 if (i <= 0 || i > n)
1407 unknown_rhs(i);
1408 tag = pitem[nitems + i - n - 1]->tag;
1409 if (tag == 0) untyped_rhs(i, pitem[nitems + i - n - 1]->name);
1410 fprintf(f, "yyvsp[%d].%s", i - n, tag);
1412 else
1414 if (i > n)
1415 dollar_warning(lineno, i);
1416 fprintf(f, "yyvsp[%d]", i - n);
1418 goto loop;
1420 else if (cptr[1] == '-')
1422 cptr += 2;
1423 i = get_number();
1424 if (ntags)
1425 unknown_rhs(-i);
1426 fprintf(f, "yyvsp[%d]", -i - n);
1427 goto loop;
1430 if (isalpha(c) || c == '_' || c == '$')
1434 putc(c, f);
1435 c = *++cptr;
1436 } while (isalnum(c) || c == '_' || c == '$');
1437 goto loop;
1439 putc(c, f);
1440 ++cptr;
1441 switch (c)
1443 case '\n':
1444 next_line:
1445 get_line();
1446 if (line) goto loop;
1447 unterminated_action(a_lineno, a_line, a_cptr);
1449 case ';':
1450 if (depth > 0) goto loop;
1451 fprintf(f, "\nbreak;\n");
1452 return;
1454 case '{':
1455 ++depth;
1456 goto loop;
1458 case '}':
1459 if (--depth > 0) goto loop;
1460 fprintf(f, "\nbreak;\n");
1461 return;
1463 case '\'':
1464 case '"':
1466 int s_lineno = lineno;
1467 char *s_line = dup_line();
1468 char *s_cptr = s_line + (cptr - line - 1);
1470 quote = c;
1471 for (;;)
1473 c = *cptr++;
1474 putc(c, f);
1475 if (c == quote)
1477 FREE(s_line);
1478 goto loop;
1480 if (c == '\n')
1481 unterminated_string(s_lineno, s_line, s_cptr);
1482 if (c == '\\')
1484 c = *cptr++;
1485 putc(c, f);
1486 if (c == '\n')
1488 get_line();
1489 if (line == 0)
1490 unterminated_string(s_lineno, s_line, s_cptr);
1496 case '/':
1497 c = *cptr;
1498 if (c == '/')
1500 putc('*', f);
1501 while ((c = *++cptr) != '\n')
1503 if (c == '*' && cptr[1] == '/')
1504 fprintf(f, "* ");
1505 else
1506 putc(c, f);
1508 fprintf(f, "*/\n");
1509 goto next_line;
1511 if (c == '*')
1513 int c_lineno = lineno;
1514 char *c_line = dup_line();
1515 char *c_cptr = c_line + (cptr - line - 1);
1517 putc('*', f);
1518 ++cptr;
1519 for (;;)
1521 c = *cptr++;
1522 putc(c, f);
1523 if (c == '*' && *cptr == '/')
1525 putc('/', f);
1526 ++cptr;
1527 FREE(c_line);
1528 goto loop;
1530 if (c == '\n')
1532 get_line();
1533 if (line == 0)
1534 unterminated_comment(c_lineno, c_line, c_cptr);
1538 goto loop;
1540 default:
1541 goto loop;
1546 static int
1547 mark_symbol(void)
1549 int c;
1550 bucket *bp = NULL;
1552 c = cptr[1];
1553 if (c == '%' || c == '\\')
1555 cptr += 2;
1556 return (1);
1559 if (c == '=')
1560 cptr += 2;
1561 else if ((c == 'p' || c == 'P') &&
1562 ((c = cptr[2]) == 'r' || c == 'R') &&
1563 ((c = cptr[3]) == 'e' || c == 'E') &&
1564 ((c = cptr[4]) == 'c' || c == 'C') &&
1565 ((c = cptr[5], !IS_IDENT(c))))
1566 cptr += 5;
1567 else
1568 syntax_error(lineno, line, cptr);
1570 c = nextc();
1571 if (isalpha(c) || c == '_' || c == '.' || c == '$')
1572 bp = get_name();
1573 else if (c == '\'' || c == '"')
1574 bp = get_literal();
1575 else
1577 syntax_error(lineno, line, cptr);
1578 /*NOTREACHED*/
1581 if (rprec[nrules] != UNDEFINED && bp->prec != rprec[nrules])
1582 prec_redeclared();
1584 rprec[nrules] = bp->prec;
1585 rassoc[nrules] = bp->assoc;
1586 return (0);
1590 static void
1591 read_grammar(void)
1593 int c;
1595 initialize_grammar();
1596 advance_to_start();
1598 for (;;)
1600 c = nextc();
1601 if (c == EOF) break;
1602 if (isalpha(c) || c == '_' || c == '.' || c == '$' || c == '\'' ||
1603 c == '"')
1604 add_symbol();
1605 else if (c == '{' || c == '=')
1606 copy_action();
1607 else if (c == '|')
1609 end_rule();
1610 start_rule(plhs[nrules-1], 0);
1611 ++cptr;
1613 else if (c == '%')
1615 if (mark_symbol()) break;
1617 else
1618 syntax_error(lineno, line, cptr);
1620 end_rule();
1624 static void
1625 free_tags(void)
1627 int i;
1629 if (tag_table == 0) return;
1631 for (i = 0; i < ntags; ++i)
1633 assert(tag_table[i]);
1634 FREE(tag_table[i]);
1636 FREE(tag_table);
1640 static void
1641 pack_names(void)
1643 bucket *bp;
1644 char *p, *s, *t;
1646 name_pool_size = 13; /* 13 == sizeof("$end") + sizeof("$accept") */
1647 for (bp = first_symbol; bp; bp = bp->next)
1648 name_pool_size += strlen(bp->name) + 1;
1649 name_pool = MALLOC(name_pool_size);
1650 if (name_pool == 0) no_space();
1652 strcpy(name_pool, "$accept");
1653 strcpy(name_pool+8, "$end");
1654 t = name_pool + 13;
1655 for (bp = first_symbol; bp; bp = bp->next)
1657 p = t;
1658 s = bp->name;
1659 while ((*t++ = *s++)) continue;
1660 FREE(bp->name);
1661 bp->name = p;
1666 static void
1667 check_symbols(void)
1669 bucket *bp;
1671 if (goal->class == UNKNOWN)
1672 undefined_goal(goal->name);
1674 for (bp = first_symbol; bp; bp = bp->next)
1676 if (bp->class == UNKNOWN)
1678 undefined_symbol_warning(bp->name);
1679 bp->class = TERM;
1685 static void
1686 pack_symbols(void)
1688 bucket *bp;
1689 bucket **v;
1690 int i, j, k, n;
1692 nsyms = 2;
1693 ntokens = 1;
1694 for (bp = first_symbol; bp; bp = bp->next)
1696 ++nsyms;
1697 if (bp->class == TERM) ++ntokens;
1699 start_symbol = ntokens;
1700 nvars = nsyms - ntokens;
1702 symbol_name = (char **) MALLOC(nsyms*sizeof(char *));
1703 if (symbol_name == 0) no_space();
1704 symbol_value = (short *) MALLOC(nsyms*sizeof(short));
1705 if (symbol_value == 0) no_space();
1706 symbol_prec = (short *) MALLOC(nsyms*sizeof(short));
1707 if (symbol_prec == 0) no_space();
1708 symbol_assoc = MALLOC(nsyms);
1709 if (symbol_assoc == 0) no_space();
1711 v = (bucket **) MALLOC(nsyms*sizeof(bucket *));
1712 if (v == 0) no_space();
1714 v[0] = 0;
1715 v[start_symbol] = 0;
1717 i = 1;
1718 j = start_symbol + 1;
1719 for (bp = first_symbol; bp; bp = bp->next)
1721 if (bp->class == TERM)
1722 v[i++] = bp;
1723 else
1724 v[j++] = bp;
1726 assert(i == ntokens && j == nsyms);
1728 for (i = 1; i < ntokens; ++i)
1729 v[i]->index = i;
1731 goal->index = start_symbol + 1;
1732 k = start_symbol + 2;
1733 while (++i < nsyms)
1734 if (v[i] != goal)
1736 v[i]->index = k;
1737 ++k;
1740 goal->value = 0;
1741 k = 1;
1742 for (i = start_symbol + 1; i < nsyms; ++i)
1744 if (v[i] != goal)
1746 v[i]->value = k;
1747 ++k;
1751 k = 0;
1752 for (i = 1; i < ntokens; ++i)
1754 n = v[i]->value;
1755 if (n > 256)
1757 for (j = k++; j > 0 && symbol_value[j-1] > n; --j)
1758 symbol_value[j] = symbol_value[j-1];
1759 symbol_value[j] = n;
1763 if (v[1]->value == UNDEFINED)
1764 v[1]->value = 256;
1766 j = 0;
1767 n = 257;
1768 for (i = 2; i < ntokens; ++i)
1770 if (v[i]->value == UNDEFINED)
1772 while (j < k && n == symbol_value[j])
1774 while (++j < k && n == symbol_value[j]) continue;
1775 ++n;
1777 v[i]->value = n;
1778 ++n;
1782 symbol_name[0] = name_pool + 8;
1783 symbol_value[0] = 0;
1784 symbol_prec[0] = 0;
1785 symbol_assoc[0] = TOKEN;
1786 for (i = 1; i < ntokens; ++i)
1788 symbol_name[i] = v[i]->name;
1789 symbol_value[i] = v[i]->value;
1790 symbol_prec[i] = v[i]->prec;
1791 symbol_assoc[i] = v[i]->assoc;
1793 symbol_name[start_symbol] = name_pool;
1794 symbol_value[start_symbol] = -1;
1795 symbol_prec[start_symbol] = 0;
1796 symbol_assoc[start_symbol] = TOKEN;
1797 for (++i; i < nsyms; ++i)
1799 k = v[i]->index;
1800 symbol_name[k] = v[i]->name;
1801 symbol_value[k] = v[i]->value;
1802 symbol_prec[k] = v[i]->prec;
1803 symbol_assoc[k] = v[i]->assoc;
1806 FREE(v);
1810 static void
1811 pack_grammar(void)
1813 int i, j;
1814 int assoc, loc_prec;
1816 ritem = (short *) MALLOC(nitems*sizeof(short));
1817 if (ritem == 0) no_space();
1818 rlhs = (short *) MALLOC(nrules*sizeof(short));
1819 if (rlhs == 0) no_space();
1820 rrhs = (short *) MALLOC((nrules+1)*sizeof(short));
1821 if (rrhs == 0) no_space();
1822 rprec = (short *) REALLOC(rprec, nrules*sizeof(short));
1823 if (rprec == 0) no_space();
1824 rassoc = REALLOC(rassoc, nrules);
1825 if (rassoc == 0) no_space();
1827 ritem[0] = -1;
1828 ritem[1] = goal->index;
1829 ritem[2] = 0;
1830 ritem[3] = -2;
1831 rlhs[0] = 0;
1832 rlhs[1] = 0;
1833 rlhs[2] = start_symbol;
1834 rrhs[0] = 0;
1835 rrhs[1] = 0;
1836 rrhs[2] = 1;
1838 j = 4;
1839 for (i = 3; i < nrules; ++i)
1841 rlhs[i] = plhs[i]->index;
1842 rrhs[i] = j;
1843 assoc = TOKEN;
1844 loc_prec = 0;
1845 while (pitem[j])
1847 ritem[j] = pitem[j]->index;
1848 if (pitem[j]->class == TERM)
1850 loc_prec = pitem[j]->prec;
1851 assoc = pitem[j]->assoc;
1853 ++j;
1855 ritem[j] = -i;
1856 ++j;
1857 if (rprec[i] == UNDEFINED)
1859 rprec[i] = loc_prec;
1860 rassoc[i] = assoc;
1863 rrhs[i] = j;
1865 FREE(plhs);
1866 FREE(pitem);
1870 static void
1871 print_grammar(void)
1873 int i, j, k;
1874 int spacing = 0;
1875 FILE *f = verbose_file;
1877 if (!vflag) return;
1879 k = 1;
1880 for (i = 2; i < nrules; ++i)
1882 if (rlhs[i] != rlhs[i-1])
1884 if (i != 2) fprintf(f, "\n");
1885 fprintf(f, "%4d %s :", i - 2, symbol_name[rlhs[i]]);
1886 spacing = strlen(symbol_name[rlhs[i]]) + 1;
1888 else
1890 fprintf(f, "%4d ", i - 2);
1891 j = spacing;
1892 while (--j >= 0) putc(' ', f);
1893 putc('|', f);
1896 while (ritem[k] >= 0)
1898 fprintf(f, " %s", symbol_name[ritem[k]]);
1899 ++k;
1901 ++k;
1902 putc('\n', f);
1907 void
1908 reader(void)
1910 write_section(banner);
1911 create_symbol_table();
1912 read_declarations();
1913 read_grammar();
1914 free_symbol_table();
1915 free_tags();
1916 pack_names();
1917 check_symbols();
1918 pack_symbols();
1919 pack_grammar();
1920 free_symbols();
1921 print_grammar();