Update URLs to prefer https: to http:
[bison.git] / src / symtab.c
blobbe075d2412b79cf2367b252dcba36df2d336cf4b
1 /* Symbol table manager for Bison.
3 Copyright (C) 1984, 1989, 2000-2002, 2004-2015, 2018-2021 Free
4 Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
21 #include <config.h>
22 #include "symtab.h"
24 #include "system.h"
26 #include <assure.h>
27 #include <fstrcmp.h>
28 #include <hash.h>
29 #include <quote.h>
31 #include "complain.h"
32 #include "getargs.h"
33 #include "gram.h"
34 #include "intprops.h"
36 /** Undefined token code. */
37 #define CODE_UNDEFINED (-1)
39 /* Undefined symbol number. */
40 #define NUMBER_UNDEFINED (-1)
43 static struct hash_table *symbol_table = NULL;
44 static struct hash_table *semantic_type_table = NULL;
46 /*----------------------------------------------------------------.
47 | Symbols sorted by tag. Allocated by table_sort, after which no |
48 | more symbols should be created. |
49 `----------------------------------------------------------------*/
51 static symbol **symbols_sorted = NULL;
52 static semantic_type **semantic_types_sorted = NULL;
55 /*------------------------.
56 | Distinguished symbols. |
57 `------------------------*/
59 symbol *errtoken = NULL;
60 symbol *undeftoken = NULL;
61 symbol *eoftoken = NULL;
62 symbol *acceptsymbol = NULL;
63 symbol *startsymbol = NULL;
64 location startsymbol_loc;
66 /* Precedence relation graph. */
67 static symgraph **prec_nodes;
69 /* Store which associativity is used. */
70 static bool *used_assoc = NULL;
72 bool tag_seen = false;
75 /* Whether SYM was defined by the user. */
77 static bool
78 symbol_is_user_defined (symbol *sym)
80 const bool eof_is_user_defined
81 = !eoftoken->alias || STRNEQ (eoftoken->alias->tag, "$end");
82 return sym->tag[0] != '$'
83 && (eof_is_user_defined || (sym != eoftoken && sym->alias != errtoken))
84 && sym != errtoken && sym->alias != errtoken
85 && sym != undeftoken && sym->alias != undeftoken;
89 /*--------------------------.
90 | Create a new sym_content. |
91 `--------------------------*/
93 static sym_content *
94 sym_content_new (symbol *s)
96 sym_content *res = xmalloc (sizeof *res);
98 res->symbol = s;
100 res->type_name = NULL;
101 res->type_loc = empty_loc;
102 for (int i = 0; i < CODE_PROPS_SIZE; ++i)
103 code_props_none_init (&res->props[i]);
105 res->number = NUMBER_UNDEFINED;
106 res->prec_loc = empty_loc;
107 res->prec = 0;
108 res->assoc = undef_assoc;
109 res->code = CODE_UNDEFINED;
111 res->class = unknown_sym;
112 res->status = undeclared;
114 return res;
117 /*---------------------------------.
118 | Create a new symbol, named TAG. |
119 `---------------------------------*/
121 static symbol *
122 symbol_new (uniqstr tag, location loc)
124 symbol *res = xmalloc (sizeof *res);
125 uniqstr_assert (tag);
127 /* If the tag is not a string (starts with a double quote), check
128 that it is valid for Yacc. */
129 if (tag[0] != '\"' && tag[0] != '\'' && strchr (tag, '-'))
130 complain (&loc, Wyacc,
131 _("POSIX Yacc forbids dashes in symbol names: %s"), tag);
133 res->tag = tag;
134 res->location = loc;
135 res->translatable = false;
136 res->location_of_lhs = false;
137 res->alias = NULL;
138 res->content = sym_content_new (res);
139 res->is_alias = false;
140 return res;
143 /*--------------------.
144 | Free a sym_content. |
145 `--------------------*/
147 static void
148 sym_content_free (sym_content *sym)
150 free (sym);
154 /*---------------------------------------------------------.
155 | Free a symbol and its associated content if appropriate. |
156 `---------------------------------------------------------*/
158 static void
159 symbol_free (void *ptr)
161 symbol *sym = (symbol *)ptr;
162 if (!sym->is_alias)
163 sym_content_free (sym->content);
164 free (sym);
168 /* If needed, swap first and second so that first has the earliest
169 location (according to location_cmp).
171 Many symbol features (e.g., token codes) are not assigned during
172 parsing, but in a second step, via a traversal of the symbol table
173 sorted on tag.
175 However, error messages make more sense if we keep the first
176 declaration first.
179 static void
180 symbols_sort (const symbol **first, const symbol **second)
182 if (0 < location_cmp ((*first)->location, (*second)->location))
184 const symbol* tmp = *first;
185 *first = *second;
186 *second = tmp;
190 /* Likewise, for locations. */
192 static void
193 locations_sort (location *first, location *second)
195 if (0 < location_cmp (*first, *second))
197 location tmp = *first;
198 *first = *second;
199 *second = tmp;
203 char const *
204 code_props_type_string (code_props_type kind)
206 switch (kind)
208 case destructor:
209 return "%destructor";
210 case printer:
211 return "%printer";
213 abort ();
217 /*----------------------------------------.
218 | Create a new semantic type, named TAG. |
219 `----------------------------------------*/
221 static semantic_type *
222 semantic_type_new (uniqstr tag, const location *loc)
224 semantic_type *res = xmalloc (sizeof *res);
226 uniqstr_assert (tag);
227 res->tag = tag;
228 res->location = loc ? *loc : empty_loc;
229 res->status = undeclared;
230 for (int i = 0; i < CODE_PROPS_SIZE; ++i)
231 code_props_none_init (&res->props[i]);
233 return res;
237 /*-----------------.
238 | Print a symbol. |
239 `-----------------*/
241 #define SYMBOL_INT_ATTR_PRINT(Attr) \
242 if (s->content) \
243 fprintf (f, " %s = %d", #Attr, s->content->Attr)
245 #define SYMBOL_STR_ATTR_PRINT(Attr) \
246 if (s->content && s->content->Attr) \
247 fprintf (f, " %s { %s }", #Attr, s->content->Attr)
249 #define SYMBOL_CODE_PRINT(Attr) \
250 if (s->content && s->content->props[Attr].code) \
251 fprintf (f, " %s { %s }", #Attr, s->content->props[Attr].code)
253 void
254 symbol_print (symbol const *s, FILE *f)
256 if (s)
258 symbol_class c = s->content->class;
259 fprintf (f, "%s: %s",
260 c == unknown_sym ? "unknown"
261 : c == pct_type_sym ? "%type"
262 : c == token_sym ? "token"
263 : c == nterm_sym ? "nterm"
264 : NULL, /* abort. */
265 s->tag);
266 putc (' ', f);
267 location_print (s->location, f);
268 SYMBOL_INT_ATTR_PRINT (code);
269 SYMBOL_INT_ATTR_PRINT (number);
270 SYMBOL_STR_ATTR_PRINT (type_name);
271 SYMBOL_CODE_PRINT (destructor);
272 SYMBOL_CODE_PRINT (printer);
274 else
275 fputs ("<NULL>", f);
278 #undef SYMBOL_ATTR_PRINT
279 #undef SYMBOL_CODE_PRINT
282 /*----------------------------------.
283 | Whether S is a valid identifier. |
284 `----------------------------------*/
286 static bool
287 is_identifier (uniqstr s)
289 static char const alphanum[26 + 26 + 1 + 10] =
290 "abcdefghijklmnopqrstuvwxyz"
291 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
293 "0123456789";
294 if (!s || ! memchr (alphanum, *s, sizeof alphanum - 10))
295 return false;
296 for (++s; *s; ++s)
297 if (! memchr (alphanum, *s, sizeof alphanum))
298 return false;
299 return true;
303 /*-----------------------------------------------.
304 | Get the identifier associated to this symbol. |
305 `-----------------------------------------------*/
307 uniqstr
308 symbol_id_get (symbol const *sym)
310 if (sym->alias)
311 sym = sym->alias;
312 return is_identifier (sym->tag) ? sym->tag : 0;
316 /*------------------------------------------------------------------.
317 | Complain that S's WHAT is redeclared at SECOND, and was first set |
318 | at FIRST. |
319 `------------------------------------------------------------------*/
321 static void
322 complain_symbol_redeclared (symbol *s, const char *what, location first,
323 location second)
325 locations_sort (&first, &second);
326 complain (&second, complaint, _("%s redeclaration for %s"), what, s->tag);
327 subcomplain (&first, complaint, _("previous declaration"));
330 static void
331 complain_semantic_type_redeclared (semantic_type *s, const char *what, location first,
332 location second)
334 locations_sort (&first, &second);
335 complain (&second, complaint, _("%s redeclaration for <%s>"), what, s->tag);
336 subcomplain (&first, complaint, _("previous declaration"));
339 static void
340 complain_class_redeclared (symbol *sym, symbol_class class, location second)
342 complain (&second, complaint,
343 class == token_sym
344 ? _("symbol %s redeclared as a token")
345 : _("symbol %s redeclared as a nonterminal"), sym->tag);
346 if (!location_empty (sym->location))
347 subcomplain (&sym->location, complaint, _("previous definition"));
350 static const symbol *
351 symbol_from_uniqstr_fuzzy (const uniqstr key)
353 aver (symbols_sorted);
354 #define FSTRCMP_THRESHOLD 0.6
355 double best_similarity = FSTRCMP_THRESHOLD;
356 const symbol *res = NULL;
357 size_t count = hash_get_n_entries (symbol_table);
358 for (int i = 0; i < count; ++i)
360 symbol *sym = symbols_sorted[i];
361 if (STRNEQ (key, sym->tag)
362 && (sym->content->status == declared
363 || sym->content->status == undeclared))
365 double similarity = fstrcmp_bounded (key, sym->tag, best_similarity);
366 if (best_similarity < similarity)
368 res = sym;
369 best_similarity = similarity;
373 return res;
376 static void
377 complain_symbol_undeclared (const symbol *sym)
379 assert (sym->content->status != declared);
380 const symbol *best = symbol_from_uniqstr_fuzzy (sym->tag);
381 if (best)
383 complain (&sym->location,
384 sym->content->status == needed ? complaint : Wother,
385 _("symbol %s is used, but is not defined as a token"
386 " and has no rules; did you mean %s?"),
387 quote_n (0, sym->tag),
388 quote_n (1, best->tag));
389 if (feature_flag & feature_caret)
390 location_caret_suggestion (sym->location, best->tag, stderr);
392 else
393 complain (&sym->location,
394 sym->content->status == needed ? complaint : Wother,
395 _("symbol %s is used, but is not defined as a token"
396 " and has no rules"),
397 quote (sym->tag));
400 void
401 symbol_location_as_lhs_set (symbol *sym, location loc)
403 if (!sym->location_of_lhs)
405 sym->location = loc;
406 sym->location_of_lhs = true;
411 /*-----------------------------------------------------------------.
412 | Set the TYPE_NAME associated with SYM. Does nothing if passed 0 |
413 | as TYPE_NAME. |
414 `-----------------------------------------------------------------*/
416 void
417 symbol_type_set (symbol *sym, uniqstr type_name, location loc)
419 if (type_name)
421 tag_seen = true;
422 if (sym->content->type_name)
423 complain_symbol_redeclared (sym, "%type",
424 sym->content->type_loc, loc);
425 else
427 uniqstr_assert (type_name);
428 sym->content->type_name = type_name;
429 sym->content->type_loc = loc;
434 /*--------------------------------------------------------.
435 | Set the DESTRUCTOR or PRINTER associated with the SYM. |
436 `--------------------------------------------------------*/
438 void
439 symbol_code_props_set (symbol *sym, code_props_type kind,
440 code_props const *code)
442 if (sym->content->props[kind].code)
443 complain_symbol_redeclared (sym, code_props_type_string (kind),
444 sym->content->props[kind].location,
445 code->location);
446 else
447 sym->content->props[kind] = *code;
451 /*-----------------------------------------------------.
452 | Set the DESTRUCTOR or PRINTER associated with TYPE. |
453 `-----------------------------------------------------*/
455 void
456 semantic_type_code_props_set (semantic_type *type,
457 code_props_type kind,
458 code_props const *code)
460 if (type->props[kind].code)
461 complain_semantic_type_redeclared (type, code_props_type_string (kind),
462 type->props[kind].location,
463 code->location);
464 else
465 type->props[kind] = *code;
468 /*---------------------------------------------------.
469 | Get the computed %destructor or %printer for SYM. |
470 `---------------------------------------------------*/
472 code_props *
473 symbol_code_props_get (symbol *sym, code_props_type kind)
475 /* Per-symbol code props. */
476 if (sym->content->props[kind].code)
477 return &sym->content->props[kind];
479 /* Per-type code props. */
480 if (sym->content->type_name)
482 code_props *code =
483 &semantic_type_get (sym->content->type_name, NULL)->props[kind];
484 if (code->code)
485 return code;
488 /* Apply default code props's only to user-defined symbols. */
489 if (symbol_is_user_defined (sym))
491 code_props *code = &semantic_type_get (sym->content->type_name ? "*" : "",
492 NULL)->props[kind];
493 if (code->code)
494 return code;
496 return &code_props_none;
499 /*-----------------------------------------------------------------.
500 | Set the PRECEDENCE associated with SYM. Does nothing if invoked |
501 | with UNDEF_ASSOC as ASSOC. |
502 `-----------------------------------------------------------------*/
504 void
505 symbol_precedence_set (symbol *sym, int prec, assoc a, location loc)
507 if (a != undef_assoc)
509 sym_content *s = sym->content;
510 if (s->prec)
511 complain_symbol_redeclared (sym, assoc_to_string (a),
512 s->prec_loc, loc);
513 else
515 s->prec = prec;
516 s->assoc = a;
517 s->prec_loc = loc;
521 /* Only terminals have a precedence. */
522 symbol_class_set (sym, token_sym, loc, false);
526 /*------------------------------------.
527 | Set the CLASS associated with SYM. |
528 `------------------------------------*/
530 static void
531 complain_pct_type_on_token (location *loc)
533 complain (loc, Wyacc,
534 _("POSIX yacc reserves %%type to nonterminals"));
537 void
538 symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring)
540 aver (class != unknown_sym);
541 sym_content *s = sym->content;
542 if (class == pct_type_sym)
544 if (s->class == token_sym)
545 complain_pct_type_on_token (&loc);
546 else if (s->class == unknown_sym)
547 s->class = class;
549 else if (s->class != unknown_sym && s->class != pct_type_sym
550 && s->class != class)
551 complain_class_redeclared (sym, class, loc);
552 else
554 if (class == token_sym && s->class == pct_type_sym)
555 complain_pct_type_on_token (&sym->location);
557 s->class = class;
559 if (declaring)
561 if (s->status == declared)
563 complain (&loc, Wother,
564 _("symbol %s redeclared"), sym->tag);
565 subcomplain (&sym->location, Wother,
566 _("previous declaration"));
568 else
570 sym->location = loc;
571 s->status = declared;
578 /*----------------------------.
579 | Set the token code of SYM. |
580 `----------------------------*/
582 void
583 symbol_code_set (symbol *sym, int code, location loc)
585 int *codep = &sym->content->code;
586 if (sym->content->class != token_sym)
587 complain (&loc, complaint,
588 _("nonterminals cannot be given a token code"));
589 else if (*codep != CODE_UNDEFINED
590 && *codep != code)
591 complain (&loc, complaint, _("redefining code of token %s"),
592 sym->tag);
593 else if (code == INT_MAX)
594 complain (&loc, complaint, _("code of token %s too large"),
595 sym->tag);
596 else
598 *codep = code;
599 /* User defined $end token? */
600 if (code == 0 && !eoftoken)
602 eoftoken = sym->content->symbol;
603 eoftoken->content->number = 0;
609 /*----------------------------------------------------------.
610 | If SYM is not defined, report an error, and consider it a |
611 | nonterminal. |
612 `----------------------------------------------------------*/
614 static void
615 symbol_check_defined (symbol *sym)
617 sym_content *s = sym->content;
618 if (s->class == unknown_sym || s->class == pct_type_sym)
620 complain_symbol_undeclared (sym);
621 s->class = nterm_sym;
624 if (s->number == NUMBER_UNDEFINED)
625 s->number = s->class == token_sym ? ntokens++ : nnterms++;
627 if (s->class == token_sym
628 && sym->tag[0] == '"'
629 && !sym->is_alias)
630 complain (&sym->location, Wdangling_alias,
631 _("string literal %s not attached to a symbol"),
632 sym->tag);
634 for (int i = 0; i < 2; ++i)
635 symbol_code_props_get (sym, i)->is_used = true;
637 /* Set the semantic type status associated to the current symbol to
638 'declared' so that we could check semantic types unnecessary uses. */
639 if (s->type_name)
641 semantic_type *sem_type = semantic_type_get (s->type_name, NULL);
642 if (sem_type)
643 sem_type->status = declared;
647 static void
648 semantic_type_check_defined (semantic_type *sem_type)
650 /* <*> and <> do not have to be "declared". */
651 if (sem_type->status == declared
652 || !*sem_type->tag
653 || STREQ (sem_type->tag, "*"))
655 for (int i = 0; i < 2; ++i)
656 if (sem_type->props[i].kind != CODE_PROPS_NONE
657 && ! sem_type->props[i].is_used)
658 complain (&sem_type->location, Wother,
659 _("useless %s for type <%s>"),
660 code_props_type_string (i), sem_type->tag);
662 else
663 complain (&sem_type->location, Wother,
664 _("type <%s> is used, but is not associated to any symbol"),
665 sem_type->tag);
668 /*-------------------------------------------------------------------.
669 | Merge the properties (precedence, associativity, etc.) of SYM, and |
670 | its string-named alias STR; check consistency. |
671 `-------------------------------------------------------------------*/
673 static void
674 symbol_merge_properties (symbol *sym, symbol *str)
676 if (str->content->type_name != sym->content->type_name)
678 if (str->content->type_name)
679 symbol_type_set (sym,
680 str->content->type_name, str->content->type_loc);
681 else
682 symbol_type_set (str,
683 sym->content->type_name, sym->content->type_loc);
687 for (int i = 0; i < CODE_PROPS_SIZE; ++i)
688 if (str->content->props[i].code)
689 symbol_code_props_set (sym, i, &str->content->props[i]);
690 else if (sym->content->props[i].code)
691 symbol_code_props_set (str, i, &sym->content->props[i]);
693 if (sym->content->prec || str->content->prec)
695 if (str->content->prec)
696 symbol_precedence_set (sym, str->content->prec, str->content->assoc,
697 str->content->prec_loc);
698 else
699 symbol_precedence_set (str, sym->content->prec, sym->content->assoc,
700 sym->content->prec_loc);
704 void
705 symbol_make_alias (symbol *sym, symbol *str, location loc)
707 if (sym->content->class != token_sym)
708 complain (&loc, complaint,
709 _("nonterminals cannot be given a string alias"));
710 else if (str->alias)
711 complain (&loc, Wother,
712 _("symbol %s used more than once as a literal string"), str->tag);
713 else if (sym->alias)
714 complain (&loc, Wother,
715 _("symbol %s given more than one literal string"), sym->tag);
716 else
718 symbol_merge_properties (sym, str);
719 sym_content_free (str->content);
720 str->content = sym->content;
721 str->content->symbol = str;
722 str->is_alias = true;
723 str->alias = sym;
724 sym->alias = str;
729 /*-------------------------------------------------------------------.
730 | Assign a symbol number, and write the definition of the token name |
731 | into FDEFINES. Put in SYMBOLS. |
732 `-------------------------------------------------------------------*/
734 static void
735 symbol_pack (symbol *sym)
737 aver (sym->content->number != NUMBER_UNDEFINED);
738 if (sym->content->class == nterm_sym)
739 sym->content->number += ntokens;
741 symbols[sym->content->number] = sym->content->symbol;
744 static void
745 complain_code_redeclared (int num, const symbol *first, const symbol *second)
747 symbols_sort (&first, &second);
748 complain (&second->location, complaint,
749 _("code %d reassigned to token %s"),
750 num, second->tag);
751 subcomplain (&first->location, complaint,
752 _("previous declaration for %s"),
753 first->tag);
756 /*-------------------------------------------------.
757 | Put SYM in TOKEN_TRANSLATIONS if it is a token. |
758 `-------------------------------------------------*/
760 static void
761 symbol_translation (const symbol *sym)
763 if (sym->content->class == token_sym && !sym->is_alias)
765 /* A token whose translation has already been set? */
766 if (token_translations[sym->content->code]
767 != undeftoken->content->number)
768 complain_code_redeclared
769 (sym->content->code,
770 symbols[token_translations[sym->content->code]], sym);
771 else
772 token_translations[sym->content->code]
773 = sym->content->number;
778 /*---------------------------------------.
779 | Symbol and semantic type hash tables. |
780 `---------------------------------------*/
782 /* Initial capacity of symbol and semantic type hash table. */
783 #define HT_INITIAL_CAPACITY 257
785 static inline bool
786 hash_compare_symbol (const symbol *m1, const symbol *m2)
788 /* Since tags are unique, we can compare the pointers themselves. */
789 return UNIQSTR_EQ (m1->tag, m2->tag);
792 static inline bool
793 hash_compare_semantic_type (const semantic_type *m1, const semantic_type *m2)
795 /* Since names are unique, we can compare the pointers themselves. */
796 return UNIQSTR_EQ (m1->tag, m2->tag);
799 static bool
800 hash_symbol_comparator (void const *m1, void const *m2)
802 return hash_compare_symbol (m1, m2);
805 static bool
806 hash_semantic_type_comparator (void const *m1, void const *m2)
808 return hash_compare_semantic_type (m1, m2);
811 static inline size_t
812 hash_symbol (const symbol *m, size_t tablesize)
814 /* Since tags are unique, we can hash the pointer itself. */
815 return ((uintptr_t) m->tag) % tablesize;
818 static inline size_t
819 hash_semantic_type (const semantic_type *m, size_t tablesize)
821 /* Since names are unique, we can hash the pointer itself. */
822 return ((uintptr_t) m->tag) % tablesize;
825 static size_t
826 hash_symbol_hasher (void const *m, size_t tablesize)
828 return hash_symbol (m, tablesize);
831 static size_t
832 hash_semantic_type_hasher (void const *m, size_t tablesize)
834 return hash_semantic_type (m, tablesize);
837 /*-------------------------------.
838 | Create the symbol hash table. |
839 `-------------------------------*/
841 void
842 symbols_new (void)
844 symbol_table = hash_xinitialize (HT_INITIAL_CAPACITY,
845 NULL,
846 hash_symbol_hasher,
847 hash_symbol_comparator,
848 symbol_free);
850 /* Construct the acceptsymbol symbol. */
851 acceptsymbol = symbol_get ("$accept", empty_loc);
852 acceptsymbol->content->class = nterm_sym;
853 acceptsymbol->content->number = nnterms++;
855 /* Construct the YYerror/"error" token */
856 errtoken = symbol_get ("YYerror", empty_loc);
857 errtoken->content->class = token_sym;
858 errtoken->content->number = ntokens++;
860 symbol *alias = symbol_get ("error", empty_loc);
861 symbol_class_set (alias, token_sym, empty_loc, false);
862 symbol_make_alias (errtoken, alias, empty_loc);
865 /* Construct the YYUNDEF/"$undefined" token that represents all
866 undefined literal tokens. It is always symbol number 2. */
867 undeftoken = symbol_get ("YYUNDEF", empty_loc);
868 undeftoken->content->class = token_sym;
869 undeftoken->content->number = ntokens++;
871 symbol *alias = symbol_get ("$undefined", empty_loc);
872 symbol_class_set (alias, token_sym, empty_loc, false);
873 symbol_make_alias (undeftoken, alias, empty_loc);
876 semantic_type_table = hash_xinitialize (HT_INITIAL_CAPACITY,
877 NULL,
878 hash_semantic_type_hasher,
879 hash_semantic_type_comparator,
880 free);
884 /*----------------------------------------------------------------.
885 | Find the symbol named KEY, and return it. If it does not exist |
886 | yet, create it. |
887 `----------------------------------------------------------------*/
889 symbol *
890 symbol_from_uniqstr (const uniqstr key, location loc)
892 symbol probe;
894 probe.tag = key;
895 symbol *res = hash_lookup (symbol_table, &probe);
897 if (!res)
899 /* First insertion in the hash. */
900 aver (!symbols_sorted);
901 res = symbol_new (key, loc);
902 hash_xinsert (symbol_table, res);
904 return res;
908 /*-----------------------------------------------------------------------.
909 | Find the semantic type named KEY, and return it. If it does not exist |
910 | yet, create it. |
911 `-----------------------------------------------------------------------*/
913 semantic_type *
914 semantic_type_from_uniqstr (const uniqstr key, const location *loc)
916 semantic_type probe;
918 probe.tag = key;
919 semantic_type *res = hash_lookup (semantic_type_table, &probe);
921 if (!res)
923 /* First insertion in the hash. */
924 res = semantic_type_new (key, loc);
925 hash_xinsert (semantic_type_table, res);
927 return res;
931 /*----------------------------------------------------------------.
932 | Find the symbol named KEY, and return it. If it does not exist |
933 | yet, create it. |
934 `----------------------------------------------------------------*/
936 symbol *
937 symbol_get (const char *key, location loc)
939 return symbol_from_uniqstr (uniqstr_new (key), loc);
943 /*-----------------------------------------------------------------------.
944 | Find the semantic type named KEY, and return it. If it does not exist |
945 | yet, create it. |
946 `-----------------------------------------------------------------------*/
948 semantic_type *
949 semantic_type_get (const char *key, const location *loc)
951 return semantic_type_from_uniqstr (uniqstr_new (key), loc);
955 /*------------------------------------------------------------------.
956 | Generate a dummy nonterminal, whose name cannot conflict with the |
957 | user's names. |
958 `------------------------------------------------------------------*/
960 symbol *
961 dummy_symbol_get (location loc)
963 /* Incremented for each generated symbol. */
964 static int dummy_count = 0;
965 char buf[32];
966 int len = snprintf (buf, sizeof buf, "$@%d", ++dummy_count);
967 assure (len < sizeof buf);
968 symbol *sym = symbol_get (buf, loc);
969 sym->content->class = nterm_sym;
970 return sym;
973 bool
974 symbol_is_dummy (symbol const *sym)
976 return sym->tag[0] == '@' || (sym->tag[0] == '$' && sym->tag[1] == '@');
979 /*-------------------.
980 | Free the symbols. |
981 `-------------------*/
983 void
984 symbols_free (void)
986 hash_free (symbol_table);
987 hash_free (semantic_type_table);
988 free (symbols);
989 free (symbols_sorted);
990 free (semantic_types_sorted);
994 static int
995 symbol_cmp (void const *a, void const *b)
997 return location_cmp ((*(symbol * const *)a)->location,
998 (*(symbol * const *)b)->location);
1001 /* Store in *SORTED an array of pointers to the symbols contained in
1002 TABLE, sorted by order of appearance (i.e., by location). */
1004 static void
1005 table_sort (struct hash_table *table, symbol ***sorted)
1007 aver (!*sorted);
1008 size_t count = hash_get_n_entries (table);
1009 *sorted = xnmalloc (count + 1, sizeof **sorted);
1010 hash_get_entries (table, (void**)*sorted, count);
1011 qsort (*sorted, count, sizeof **sorted, symbol_cmp);
1012 (*sorted)[count] = NULL;
1015 /*--------------------------------------------------------------.
1016 | Check that all the symbols are defined. Report any undefined |
1017 | symbols and consider them nonterminals. |
1018 `--------------------------------------------------------------*/
1020 void
1021 symbols_check_defined (void)
1023 table_sort (symbol_table, &symbols_sorted);
1024 /* semantic_type, like symbol, starts with a 'tag' field and then a
1025 'location' field. And here we only deal with arrays/hashes of
1026 pointers, sizeof is not an issue.
1028 So instead of implementing table_sort (and symbol_cmp) once for
1029 each type, let's lie a bit to the typing system, and treat
1030 'semantic_type' as if it were 'symbol'. */
1031 table_sort (semantic_type_table, (symbol ***) &semantic_types_sorted);
1033 for (int i = 0; symbols_sorted[i]; ++i)
1034 symbol_check_defined (symbols_sorted[i]);
1035 for (int i = 0; semantic_types_sorted[i]; ++i)
1036 semantic_type_check_defined (semantic_types_sorted[i]);
1039 /*------------------------------------------------------------------.
1040 | Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1041 | number. |
1042 `------------------------------------------------------------------*/
1044 static void
1045 symbols_token_translations_init (void)
1047 bool code_256_available_p = true;
1049 /* Find the highest token code, and whether 256, the POSIX preferred
1050 token code for the error token, is used. */
1051 max_code = 0;
1052 for (int i = 0; i < ntokens; ++i)
1054 sym_content *sym = symbols[i]->content;
1055 if (sym->code != CODE_UNDEFINED)
1057 if (sym->code > max_code)
1058 max_code = sym->code;
1059 if (sym->code == 256)
1060 code_256_available_p = false;
1064 /* If 256 is not used, assign it to error, to follow POSIX. */
1065 if (code_256_available_p
1066 && errtoken->content->code == CODE_UNDEFINED)
1067 errtoken->content->code = 256;
1069 /* Set the missing codes. */
1070 if (max_code < 256)
1071 max_code = 256;
1073 for (int i = 0; i < ntokens; ++i)
1075 sym_content *sym = symbols[i]->content;
1076 if (sym->code == CODE_UNDEFINED)
1078 IGNORE_TYPE_LIMITS_BEGIN
1079 if (INT_ADD_WRAPV (max_code, 1, &max_code))
1080 complain (NULL, fatal, _("token number too large"));
1081 IGNORE_TYPE_LIMITS_END
1082 sym->code = max_code;
1084 if (sym->code > max_code)
1085 max_code = sym->code;
1088 token_translations = xnmalloc (max_code + 1,
1089 sizeof *token_translations);
1091 /* Initialize all entries for literal tokens to the internal token
1092 number for $undefined, which represents all invalid inputs. */
1093 for (int i = 0; i < max_code + 1; ++i)
1094 token_translations[i] = undeftoken->content->number;
1095 for (int i = 0; symbols_sorted[i]; ++i)
1096 symbol_translation (symbols_sorted[i]);
1100 /* Whether some symbol requires internationalization. */
1101 static bool
1102 has_translations (void)
1104 for (const void *entry = hash_get_first (symbol_table);
1105 entry;
1106 entry = hash_get_next (symbol_table, entry))
1108 const symbol *sym = (const symbol *) entry;
1109 if (sym->translatable)
1110 return true;
1112 return false;
1115 /*----------------------------------------------------------------.
1116 | Assign symbol numbers, and write definition of token names into |
1117 | FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1118 `----------------------------------------------------------------*/
1120 void
1121 symbols_pack (void)
1123 symbols = xcalloc (nsyms, sizeof *symbols);
1124 for (int i = 0; symbols_sorted[i]; ++i)
1125 symbol_pack (symbols_sorted[i]);
1127 /* Aliases leave empty slots in symbols, so remove them. */
1129 int nsyms_old = nsyms;
1130 for (int writei = 0, readi = 0; readi < nsyms_old; readi += 1)
1132 if (symbols[readi] == NULL)
1134 nsyms -= 1;
1135 ntokens -= 1;
1137 else
1139 symbols[writei] = symbols[readi];
1140 symbols[writei]->content->number = writei;
1141 writei += 1;
1145 symbols = xnrealloc (symbols, nsyms, sizeof *symbols);
1147 symbols_token_translations_init ();
1149 if (startsymbol->content->class == unknown_sym)
1150 complain (&startsymbol_loc, fatal,
1151 _("the start symbol %s is undefined"),
1152 startsymbol->tag);
1153 else if (startsymbol->content->class == token_sym)
1154 complain (&startsymbol_loc, fatal,
1155 _("the start symbol %s is a token"),
1156 startsymbol->tag);
1158 // If some user tokens are internationalized, the internal ones
1159 // should be too.
1160 if (has_translations ())
1162 const bool eof_is_user_defined
1163 = !eoftoken->alias || STRNEQ (eoftoken->alias->tag, "$end");
1164 if (!eof_is_user_defined)
1165 eoftoken->alias->translatable = true;
1166 undeftoken->alias->translatable = true;
1167 errtoken->alias->translatable = true;
1171 /*---------------------------------.
1172 | Initialize relation graph nodes. |
1173 `---------------------------------*/
1175 static void
1176 init_prec_nodes (void)
1178 prec_nodes = xcalloc (nsyms, sizeof *prec_nodes);
1179 for (int i = 0; i < nsyms; ++i)
1181 prec_nodes[i] = xmalloc (sizeof *prec_nodes[i]);
1182 symgraph *s = prec_nodes[i];
1183 s->id = i;
1184 s->succ = 0;
1185 s->pred = 0;
1189 /*----------------.
1190 | Create a link. |
1191 `----------------*/
1193 static symgraphlink *
1194 symgraphlink_new (graphid id, symgraphlink *next)
1196 symgraphlink *res = xmalloc (sizeof *res);
1197 res->id = id;
1198 res->next = next;
1199 return res;
1203 /*------------------------------------------------------------------.
1204 | Register the second symbol of the precedence relation, and return |
1205 | whether this relation is new. Use only in register_precedence. |
1206 `------------------------------------------------------------------*/
1208 static bool
1209 register_precedence_second_symbol (symgraphlink **first, graphid sym)
1211 if (!*first || sym < (*first)->id)
1212 *first = symgraphlink_new (sym, *first);
1213 else
1215 symgraphlink *slist = *first;
1217 while (slist->next && slist->next->id <= sym)
1218 slist = slist->next;
1220 if (slist->id == sym)
1221 /* Relation already present. */
1222 return false;
1224 slist->next = symgraphlink_new (sym, slist->next);
1226 return true;
1229 /*------------------------------------------------------------------.
1230 | Register a new relation between symbols as used. The first symbol |
1231 | has a greater precedence than the second one. |
1232 `------------------------------------------------------------------*/
1234 void
1235 register_precedence (graphid first, graphid snd)
1237 if (!prec_nodes)
1238 init_prec_nodes ();
1239 register_precedence_second_symbol (&(prec_nodes[first]->succ), snd);
1240 register_precedence_second_symbol (&(prec_nodes[snd]->pred), first);
1244 /*---------------------------------------.
1245 | Deep clear a linked / adjacency list). |
1246 `---------------------------------------*/
1248 static void
1249 linkedlist_free (symgraphlink *node)
1251 if (node)
1253 while (node->next)
1255 symgraphlink *tmp = node->next;
1256 free (node);
1257 node = tmp;
1259 free (node);
1263 /*----------------------------------------------.
1264 | Clear and destroy association tracking table. |
1265 `----------------------------------------------*/
1267 static void
1268 assoc_free (void)
1270 for (int i = 0; i < nsyms; ++i)
1272 linkedlist_free (prec_nodes[i]->pred);
1273 linkedlist_free (prec_nodes[i]->succ);
1274 free (prec_nodes[i]);
1276 free (prec_nodes);
1279 /*---------------------------------------.
1280 | Initialize association tracking table. |
1281 `---------------------------------------*/
1283 static void
1284 init_assoc (void)
1286 used_assoc = xcalloc (nsyms, sizeof *used_assoc);
1287 for (graphid i = 0; i < nsyms; ++i)
1288 used_assoc[i] = false;
1291 /*------------------------------------------------------------------.
1292 | Test if the associativity for the symbols is defined and useless. |
1293 `------------------------------------------------------------------*/
1295 static inline bool
1296 is_assoc_useless (symbol *s)
1298 return s
1299 && s->content->assoc != undef_assoc
1300 && s->content->assoc != precedence_assoc
1301 && !used_assoc[s->content->number];
1304 /*-------------------------------.
1305 | Register a used associativity. |
1306 `-------------------------------*/
1308 void
1309 register_assoc (graphid i, graphid j)
1311 if (!used_assoc)
1312 init_assoc ();
1313 used_assoc[i] = true;
1314 used_assoc[j] = true;
1317 /*--------------------------------------------------.
1318 | Print a warning for unused precedence relations. |
1319 `--------------------------------------------------*/
1321 void
1322 print_precedence_warnings (void)
1324 if (!prec_nodes)
1325 init_prec_nodes ();
1326 if (!used_assoc)
1327 init_assoc ();
1328 for (int i = 0; i < nsyms; ++i)
1330 symbol *s = symbols[i];
1331 if (s
1332 && s->content->prec != 0
1333 && !prec_nodes[i]->pred
1334 && !prec_nodes[i]->succ)
1336 if (is_assoc_useless (s))
1337 complain (&s->content->prec_loc, Wprecedence,
1338 _("useless precedence and associativity for %s"), s->tag);
1339 else if (s->content->assoc == precedence_assoc)
1340 complain (&s->content->prec_loc, Wprecedence,
1341 _("useless precedence for %s"), s->tag);
1343 else if (is_assoc_useless (s))
1344 complain (&s->content->prec_loc, Wprecedence,
1345 _("useless associativity for %s, use %%precedence"), s->tag);
1347 free (used_assoc);
1348 assoc_free ();