tests: grep -E is not portable
[bison.git] / src / symtab.c
blob8aea627addc7afe12e68dc7cbc4de2cd0e60e87f
1 /* Symbol table manager for Bison.
3 Copyright (C) 1984, 1989, 2000-2002, 2004-2015, 2018-2021 Free
4 Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
21 #include <config.h>
22 #include "symtab.h"
24 #include "system.h"
26 #include <assure.h>
27 #include <fstrcmp.h>
28 #include <hash.h>
29 #include <quote.h>
31 #include "complain.h"
32 #include "getargs.h"
33 #include "gram.h"
34 #include "intprops.h"
36 /** Undefined token code. */
37 #define CODE_UNDEFINED (-1)
39 /* Undefined symbol number. */
40 #define NUMBER_UNDEFINED (-1)
43 static struct hash_table *symbol_table = NULL;
44 static struct hash_table *semantic_type_table = NULL;
46 /*----------------------------------------------------------------.
47 | Symbols sorted by tag. Allocated by table_sort, after which no |
48 | more symbols should be created. |
49 `----------------------------------------------------------------*/
51 static symbol **symbols_sorted = NULL;
52 static semantic_type **semantic_types_sorted = NULL;
55 /*------------------------.
56 | Distinguished symbols. |
57 `------------------------*/
59 symbol *errtoken = NULL;
60 symbol *undeftoken = NULL;
61 symbol *eoftoken = NULL;
62 symbol *acceptsymbol = NULL;
64 /* Precedence relation graph. */
65 static symgraph **prec_nodes;
67 /* Store which associativity is used. */
68 static bool *used_assoc = NULL;
70 bool tag_seen = false;
73 /* Whether SYM was defined by the user. */
75 static bool
76 symbol_is_user_defined (symbol *sym)
78 const bool eof_is_user_defined
79 = !eoftoken->alias || STRNEQ (eoftoken->alias->tag, "$end");
80 return sym->tag[0] != '$'
81 && (eof_is_user_defined || (sym != eoftoken && sym->alias != errtoken))
82 && sym != errtoken && sym->alias != errtoken
83 && sym != undeftoken && sym->alias != undeftoken;
87 /*--------------------------.
88 | Create a new sym_content. |
89 `--------------------------*/
91 static sym_content *
92 sym_content_new (symbol *s)
94 sym_content *res = xmalloc (sizeof *res);
96 res->symbol = s;
98 res->type_name = NULL;
99 res->type_loc = empty_loc;
100 for (int i = 0; i < CODE_PROPS_SIZE; ++i)
101 code_props_none_init (&res->props[i]);
103 res->number = NUMBER_UNDEFINED;
104 res->prec_loc = empty_loc;
105 res->prec = 0;
106 res->assoc = undef_assoc;
107 res->code = CODE_UNDEFINED;
109 res->class = unknown_sym;
110 res->status = undeclared;
112 return res;
115 /*---------------------------------.
116 | Create a new symbol, named TAG. |
117 `---------------------------------*/
119 static symbol *
120 symbol_new (uniqstr tag, location loc)
122 symbol *res = xmalloc (sizeof *res);
123 uniqstr_assert (tag);
125 /* If the tag is not a string (starts with a double quote), check
126 that it is valid for Yacc. */
127 if (tag[0] != '\"' && tag[0] != '\'' && strchr (tag, '-'))
128 complain (&loc, Wyacc,
129 _("POSIX Yacc forbids dashes in symbol names: %s"), tag);
131 res->tag = tag;
132 res->location = loc;
133 res->translatable = false;
134 res->location_of_lhs = false;
135 res->alias = NULL;
136 res->content = sym_content_new (res);
137 res->is_alias = false;
138 return res;
141 /*--------------------.
142 | Free a sym_content. |
143 `--------------------*/
145 static void
146 sym_content_free (sym_content *sym)
148 free (sym);
152 /*---------------------------------------------------------.
153 | Free a symbol and its associated content if appropriate. |
154 `---------------------------------------------------------*/
156 static void
157 symbol_free (void *ptr)
159 symbol *sym = (symbol *)ptr;
160 if (!sym->is_alias)
161 sym_content_free (sym->content);
162 free (sym);
166 /* If needed, swap first and second so that first has the earliest
167 location (according to location_cmp).
169 Many symbol features (e.g., token codes) are not assigned during
170 parsing, but in a second step, via a traversal of the symbol table
171 sorted on tag.
173 However, error messages make more sense if we keep the first
174 declaration first.
177 static void
178 symbols_sort (const symbol **first, const symbol **second)
180 if (0 < location_cmp ((*first)->location, (*second)->location))
182 const symbol* tmp = *first;
183 *first = *second;
184 *second = tmp;
188 /* Likewise, for locations. */
190 static void
191 locations_sort (location *first, location *second)
193 if (0 < location_cmp (*first, *second))
195 location tmp = *first;
196 *first = *second;
197 *second = tmp;
201 char const *
202 code_props_type_string (code_props_type kind)
204 switch (kind)
206 case destructor:
207 return "%destructor";
208 case printer:
209 return "%printer";
211 abort ();
215 /*----------------------------------------.
216 | Create a new semantic type, named TAG. |
217 `----------------------------------------*/
219 static semantic_type *
220 semantic_type_new (uniqstr tag, const location *loc)
222 semantic_type *res = xmalloc (sizeof *res);
224 uniqstr_assert (tag);
225 res->tag = tag;
226 res->location = loc ? *loc : empty_loc;
227 res->status = undeclared;
228 for (int i = 0; i < CODE_PROPS_SIZE; ++i)
229 code_props_none_init (&res->props[i]);
231 return res;
235 /*-----------------.
236 | Print a symbol. |
237 `-----------------*/
239 #define SYMBOL_INT_ATTR_PRINT(Attr) \
240 if (s->content) \
241 fprintf (f, " %s = %d", #Attr, s->content->Attr)
243 #define SYMBOL_STR_ATTR_PRINT(Attr) \
244 if (s->content && s->content->Attr) \
245 fprintf (f, " %s { %s }", #Attr, s->content->Attr)
247 #define SYMBOL_CODE_PRINT(Attr) \
248 if (s->content && s->content->props[Attr].code) \
249 fprintf (f, " %s { %s }", #Attr, s->content->props[Attr].code)
251 void
252 symbol_print (symbol const *s, FILE *f)
254 if (s)
256 symbol_class c = s->content->class;
257 fprintf (f, "%s: %s",
258 c == unknown_sym ? "unknown"
259 : c == pct_type_sym ? "%type"
260 : c == token_sym ? "token"
261 : c == nterm_sym ? "nterm"
262 : NULL, /* abort. */
263 s->tag);
264 putc (' ', f);
265 location_print (s->location, f);
266 SYMBOL_INT_ATTR_PRINT (code);
267 SYMBOL_INT_ATTR_PRINT (number);
268 SYMBOL_STR_ATTR_PRINT (type_name);
269 SYMBOL_CODE_PRINT (destructor);
270 SYMBOL_CODE_PRINT (printer);
272 else
273 fputs ("<NULL>", f);
276 #undef SYMBOL_ATTR_PRINT
277 #undef SYMBOL_CODE_PRINT
280 /*----------------------------------.
281 | Whether S is a valid identifier. |
282 `----------------------------------*/
284 static bool
285 is_identifier (uniqstr s)
287 static char const alphanum[26 + 26 + 1 + 10] =
288 "abcdefghijklmnopqrstuvwxyz"
289 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
291 "0123456789";
292 if (!s || ! memchr (alphanum, *s, sizeof alphanum - 10))
293 return false;
294 for (++s; *s; ++s)
295 if (! memchr (alphanum, *s, sizeof alphanum))
296 return false;
297 return true;
301 /*-----------------------------------------------.
302 | Get the identifier associated to this symbol. |
303 `-----------------------------------------------*/
305 uniqstr
306 symbol_id_get (symbol const *sym)
308 // There's one weird case: YYerror is the alias, and error is the
309 // base symbol. Return YYerror in that case.
310 if (sym->alias && is_identifier (sym->alias->tag))
311 return sym->alias->tag;
312 else if (is_identifier (sym->tag))
313 return sym->tag;
314 else
315 return NULL;
319 /*------------------------------------------------------------------.
320 | Complain that S's WHAT is redeclared at SECOND, and was first set |
321 | at FIRST. |
322 `------------------------------------------------------------------*/
324 static void
325 complain_symbol_redeclared (symbol *s, const char *what, location first,
326 location second)
328 locations_sort (&first, &second);
329 complain (&second, complaint, _("%s redeclaration for %s"), what, s->tag);
330 subcomplain (&first, complaint, _("previous declaration"));
333 static void
334 complain_semantic_type_redeclared (semantic_type *s, const char *what, location first,
335 location second)
337 locations_sort (&first, &second);
338 complain (&second, complaint, _("%s redeclaration for <%s>"), what, s->tag);
339 subcomplain (&first, complaint, _("previous declaration"));
342 static void
343 complain_class_redeclared (symbol *sym, symbol_class class, location second)
345 complain (&second, complaint,
346 class == token_sym
347 ? _("symbol %s redeclared as a token")
348 : _("symbol %s redeclared as a nonterminal"), sym->tag);
349 if (!location_empty (sym->location))
350 subcomplain (&sym->location, complaint, _("previous definition"));
353 static const symbol *
354 symbol_from_uniqstr_fuzzy (const uniqstr key)
356 aver (symbols_sorted);
357 #define FSTRCMP_THRESHOLD 0.6
358 double best_similarity = FSTRCMP_THRESHOLD;
359 const symbol *res = NULL;
360 size_t count = hash_get_n_entries (symbol_table);
361 for (int i = 0; i < count; ++i)
363 symbol *sym = symbols_sorted[i];
364 if (STRNEQ (key, sym->tag)
365 && (sym->content->status == declared
366 || sym->content->status == undeclared))
368 double similarity = fstrcmp_bounded (key, sym->tag, best_similarity);
369 if (best_similarity < similarity)
371 res = sym;
372 best_similarity = similarity;
376 return res;
379 static void
380 complain_symbol_undeclared (const symbol *sym)
382 assert (sym->content->status != declared);
383 const symbol *best = symbol_from_uniqstr_fuzzy (sym->tag);
384 if (best)
386 complain (&sym->location,
387 sym->content->status == needed ? complaint : Wother,
388 _("symbol %s is used, but is not defined as a token"
389 " and has no rules; did you mean %s?"),
390 quote_n (0, sym->tag),
391 quote_n (1, best->tag));
392 if (feature_flag & feature_caret)
393 location_caret_suggestion (sym->location, best->tag, stderr);
395 else
396 complain (&sym->location,
397 sym->content->status == needed ? complaint : Wother,
398 _("symbol %s is used, but is not defined as a token"
399 " and has no rules"),
400 quote (sym->tag));
403 void
404 symbol_location_as_lhs_set (symbol *sym, location loc)
406 if (!sym->location_of_lhs)
408 sym->location = loc;
409 sym->location_of_lhs = true;
414 /*-----------------------------------------------------------------.
415 | Set the TYPE_NAME associated with SYM. Does nothing if passed 0 |
416 | as TYPE_NAME. |
417 `-----------------------------------------------------------------*/
419 void
420 symbol_type_set (symbol *sym, uniqstr type_name, location loc)
422 if (type_name)
424 tag_seen = true;
425 if (sym->content->type_name)
426 complain_symbol_redeclared (sym, "%type",
427 sym->content->type_loc, loc);
428 else
430 uniqstr_assert (type_name);
431 sym->content->type_name = type_name;
432 sym->content->type_loc = loc;
437 /*--------------------------------------------------------.
438 | Set the DESTRUCTOR or PRINTER associated with the SYM. |
439 `--------------------------------------------------------*/
441 void
442 symbol_code_props_set (symbol *sym, code_props_type kind,
443 code_props const *code)
445 if (sym->content->props[kind].code)
446 complain_symbol_redeclared (sym, code_props_type_string (kind),
447 sym->content->props[kind].location,
448 code->location);
449 else
450 sym->content->props[kind] = *code;
454 /*-----------------------------------------------------.
455 | Set the DESTRUCTOR or PRINTER associated with TYPE. |
456 `-----------------------------------------------------*/
458 void
459 semantic_type_code_props_set (semantic_type *type,
460 code_props_type kind,
461 code_props const *code)
463 if (type->props[kind].code)
464 complain_semantic_type_redeclared (type, code_props_type_string (kind),
465 type->props[kind].location,
466 code->location);
467 else
468 type->props[kind] = *code;
471 /*---------------------------------------------------.
472 | Get the computed %destructor or %printer for SYM. |
473 `---------------------------------------------------*/
475 code_props *
476 symbol_code_props_get (symbol *sym, code_props_type kind)
478 /* Per-symbol code props. */
479 if (sym->content->props[kind].code)
480 return &sym->content->props[kind];
482 /* Per-type code props. */
483 if (sym->content->type_name)
485 code_props *code =
486 &semantic_type_get (sym->content->type_name, NULL)->props[kind];
487 if (code->code)
488 return code;
491 /* Apply default code props's only to user-defined symbols. */
492 if (symbol_is_user_defined (sym))
494 code_props *code = &semantic_type_get (sym->content->type_name ? "*" : "",
495 NULL)->props[kind];
496 if (code->code)
497 return code;
499 return &code_props_none;
502 /*-----------------------------------------------------------------.
503 | Set the PRECEDENCE associated with SYM. Does nothing if invoked |
504 | with UNDEF_ASSOC as ASSOC. |
505 `-----------------------------------------------------------------*/
507 void
508 symbol_precedence_set (symbol *sym, int prec, assoc a, location loc)
510 if (a != undef_assoc)
512 sym_content *s = sym->content;
513 if (s->prec)
514 complain_symbol_redeclared (sym, assoc_to_string (a),
515 s->prec_loc, loc);
516 else
518 s->prec = prec;
519 s->assoc = a;
520 s->prec_loc = loc;
524 /* Only terminals have a precedence. */
525 symbol_class_set (sym, token_sym, loc, false);
529 /*------------------------------------.
530 | Set the CLASS associated with SYM. |
531 `------------------------------------*/
533 static void
534 complain_pct_type_on_token (location *loc)
536 complain (loc, Wyacc,
537 _("POSIX yacc reserves %%type to nonterminals"));
540 void
541 symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring)
543 aver (class != unknown_sym);
544 sym_content *s = sym->content;
545 if (class == pct_type_sym)
547 if (s->class == token_sym)
548 complain_pct_type_on_token (&loc);
549 else if (s->class == unknown_sym)
550 s->class = class;
552 else if (s->class != unknown_sym && s->class != pct_type_sym
553 && s->class != class)
554 complain_class_redeclared (sym, class, loc);
555 else
557 if (class == token_sym && s->class == pct_type_sym)
558 complain_pct_type_on_token (&sym->location);
560 s->class = class;
562 if (declaring)
564 if (s->status == declared)
566 complain (&loc, Wother,
567 _("symbol %s redeclared"), sym->tag);
568 subcomplain (&sym->location, Wother,
569 _("previous declaration"));
571 else
573 sym->location = loc;
574 s->status = declared;
581 /*----------------------------.
582 | Set the token code of SYM. |
583 `----------------------------*/
585 void
586 symbol_code_set (symbol *sym, int code, location loc)
588 int *codep = &sym->content->code;
589 if (sym->content->class != token_sym)
590 complain (&loc, complaint,
591 _("nonterminals cannot be given a token code"));
592 else if (*codep != CODE_UNDEFINED
593 && *codep != code)
594 complain (&loc, complaint, _("redefining code of token %s"),
595 sym->tag);
596 else if (code == INT_MAX)
597 complain (&loc, complaint, _("code of token %s too large"),
598 sym->tag);
599 else
601 *codep = code;
602 /* User defined $end token? */
603 if (code == 0 && !eoftoken)
605 eoftoken = sym->content->symbol;
606 eoftoken->content->number = 0;
612 /*----------------------------------------------------------.
613 | If SYM is not defined, report an error, and consider it a |
614 | nonterminal. |
615 `----------------------------------------------------------*/
617 static void
618 symbol_check_defined (symbol *sym)
620 sym_content *s = sym->content;
621 if (s->class == unknown_sym || s->class == pct_type_sym)
623 complain_symbol_undeclared (sym);
624 s->class = nterm_sym;
627 if (s->number == NUMBER_UNDEFINED)
628 s->number = s->class == token_sym ? ntokens++ : nnterms++;
630 if (s->class == token_sym
631 && sym->tag[0] == '"'
632 && !sym->is_alias)
633 complain (&sym->location, Wdangling_alias,
634 _("string literal %s not attached to a symbol"),
635 sym->tag);
637 for (int i = 0; i < 2; ++i)
638 symbol_code_props_get (sym, i)->is_used = true;
640 /* Set the semantic type status associated to the current symbol to
641 'declared' so that we could check semantic types unnecessary uses. */
642 if (s->type_name)
644 semantic_type *sem_type = semantic_type_get (s->type_name, NULL);
645 if (sem_type)
646 sem_type->status = declared;
650 static void
651 semantic_type_check_defined (semantic_type *sem_type)
653 /* <*> and <> do not have to be "declared". */
654 if (sem_type->status == declared
655 || !*sem_type->tag
656 || STREQ (sem_type->tag, "*"))
658 for (int i = 0; i < 2; ++i)
659 if (sem_type->props[i].kind != CODE_PROPS_NONE
660 && ! sem_type->props[i].is_used)
661 complain (&sem_type->location, Wother,
662 _("useless %s for type <%s>"),
663 code_props_type_string (i), sem_type->tag);
665 else
666 complain (&sem_type->location, Wother,
667 _("type <%s> is used, but is not associated to any symbol"),
668 sem_type->tag);
671 /*-------------------------------------------------------------------.
672 | Merge the properties (precedence, associativity, etc.) of SYM, and |
673 | its string-named alias STR; check consistency. |
674 `-------------------------------------------------------------------*/
676 static void
677 symbol_merge_properties (symbol *sym, symbol *str)
679 if (str->content->type_name != sym->content->type_name)
681 if (str->content->type_name)
682 symbol_type_set (sym,
683 str->content->type_name, str->content->type_loc);
684 else
685 symbol_type_set (str,
686 sym->content->type_name, sym->content->type_loc);
690 for (int i = 0; i < CODE_PROPS_SIZE; ++i)
691 if (str->content->props[i].code)
692 symbol_code_props_set (sym, i, &str->content->props[i]);
693 else if (sym->content->props[i].code)
694 symbol_code_props_set (str, i, &sym->content->props[i]);
696 if (sym->content->prec || str->content->prec)
698 if (str->content->prec)
699 symbol_precedence_set (sym, str->content->prec, str->content->assoc,
700 str->content->prec_loc);
701 else
702 symbol_precedence_set (str, sym->content->prec, sym->content->assoc,
703 sym->content->prec_loc);
707 void
708 symbol_make_alias (symbol *sym, symbol *str, location loc)
710 if (sym->content->class != token_sym)
711 complain (&loc, complaint,
712 _("nonterminals cannot be given a string alias"));
713 else if (str->alias)
714 complain (&loc, Wother,
715 _("symbol %s used more than once as a literal string"), str->tag);
716 else if (sym->alias)
717 complain (&loc, Wother,
718 _("symbol %s given more than one literal string"), sym->tag);
719 else
721 symbol_merge_properties (sym, str);
722 sym_content_free (str->content);
723 str->content = sym->content;
724 str->content->symbol = str;
725 str->is_alias = true;
726 str->alias = sym;
727 sym->alias = str;
732 /*-------------------------------------------------------------------.
733 | Assign a symbol number, and write the definition of the token name |
734 | into FDEFINES. Put in SYMBOLS. |
735 `-------------------------------------------------------------------*/
737 static void
738 symbol_pack (symbol *sym)
740 aver (sym->content->number != NUMBER_UNDEFINED);
741 if (sym->content->class == nterm_sym)
742 sym->content->number += ntokens;
744 symbols[sym->content->number] = sym->content->symbol;
747 static void
748 complain_code_redeclared (int num, const symbol *first, const symbol *second)
750 symbols_sort (&first, &second);
751 complain (&second->location, complaint,
752 _("code %d reassigned to token %s"),
753 num, second->tag);
754 subcomplain (&first->location, complaint,
755 _("previous declaration for %s"),
756 first->tag);
759 /*-------------------------------------------------.
760 | Put SYM in TOKEN_TRANSLATIONS if it is a token. |
761 `-------------------------------------------------*/
763 static void
764 symbol_translation (const symbol *sym)
766 if (sym->content->class == token_sym && !sym->is_alias)
768 /* A token whose translation has already been set? */
769 if (token_translations[sym->content->code]
770 != undeftoken->content->number)
771 complain_code_redeclared
772 (sym->content->code,
773 symbols[token_translations[sym->content->code]], sym);
774 else
775 token_translations[sym->content->code]
776 = sym->content->number;
781 /*---------------------------------------.
782 | Symbol and semantic type hash tables. |
783 `---------------------------------------*/
785 /* Initial capacity of symbol and semantic type hash table. */
786 #define HT_INITIAL_CAPACITY 257
788 static inline bool
789 hash_compare_symbol (const symbol *m1, const symbol *m2)
791 /* Since tags are unique, we can compare the pointers themselves. */
792 return UNIQSTR_EQ (m1->tag, m2->tag);
795 static inline bool
796 hash_compare_semantic_type (const semantic_type *m1, const semantic_type *m2)
798 /* Since names are unique, we can compare the pointers themselves. */
799 return UNIQSTR_EQ (m1->tag, m2->tag);
802 static bool
803 hash_symbol_comparator (void const *m1, void const *m2)
805 return hash_compare_symbol (m1, m2);
808 static bool
809 hash_semantic_type_comparator (void const *m1, void const *m2)
811 return hash_compare_semantic_type (m1, m2);
814 static inline size_t
815 hash_symbol (const symbol *m, size_t tablesize)
817 /* Since tags are unique, we can hash the pointer itself. */
818 return ((uintptr_t) m->tag) % tablesize;
821 static inline size_t
822 hash_semantic_type (const semantic_type *m, size_t tablesize)
824 /* Since names are unique, we can hash the pointer itself. */
825 return ((uintptr_t) m->tag) % tablesize;
828 static size_t
829 hash_symbol_hasher (void const *m, size_t tablesize)
831 return hash_symbol (m, tablesize);
834 static size_t
835 hash_semantic_type_hasher (void const *m, size_t tablesize)
837 return hash_semantic_type (m, tablesize);
840 /*-------------------------------.
841 | Create the symbol hash table. |
842 `-------------------------------*/
844 void
845 symbols_new (void)
847 symbol_table = hash_xinitialize (HT_INITIAL_CAPACITY,
848 NULL,
849 hash_symbol_hasher,
850 hash_symbol_comparator,
851 symbol_free);
853 /* Construct the acceptsymbol symbol. */
854 acceptsymbol = symbol_get ("$accept", empty_loc);
855 acceptsymbol->content->class = nterm_sym;
856 acceptsymbol->content->number = nnterms++;
858 /* Construct the YYerror/"error" token */
859 errtoken = symbol_get ("YYerror", empty_loc);
860 errtoken->content->class = token_sym;
861 errtoken->content->number = ntokens++;
863 symbol *alias = symbol_get ("error", empty_loc);
864 symbol_class_set (alias, token_sym, empty_loc, false);
865 symbol_make_alias (errtoken, alias, empty_loc);
868 /* Construct the YYUNDEF/"$undefined" token that represents all
869 undefined literal tokens. It is always symbol number 2. */
870 undeftoken = symbol_get ("YYUNDEF", empty_loc);
871 undeftoken->content->class = token_sym;
872 undeftoken->content->number = ntokens++;
874 symbol *alias = symbol_get ("$undefined", empty_loc);
875 symbol_class_set (alias, token_sym, empty_loc, false);
876 symbol_make_alias (undeftoken, alias, empty_loc);
879 semantic_type_table = hash_xinitialize (HT_INITIAL_CAPACITY,
880 NULL,
881 hash_semantic_type_hasher,
882 hash_semantic_type_comparator,
883 free);
887 /*----------------------------------------------------------------.
888 | Find the symbol named KEY, and return it. If it does not exist |
889 | yet, create it. |
890 `----------------------------------------------------------------*/
892 symbol *
893 symbol_from_uniqstr (const uniqstr key, location loc)
895 symbol probe;
897 probe.tag = key;
898 symbol *res = hash_lookup (symbol_table, &probe);
900 if (!res)
902 /* First insertion in the hash. */
903 aver (!symbols_sorted);
904 res = symbol_new (key, loc);
905 hash_xinsert (symbol_table, res);
907 return res;
911 /*-----------------------------------------------------------------------.
912 | Find the semantic type named KEY, and return it. If it does not exist |
913 | yet, create it. |
914 `-----------------------------------------------------------------------*/
916 semantic_type *
917 semantic_type_from_uniqstr (const uniqstr key, const location *loc)
919 semantic_type probe;
921 probe.tag = key;
922 semantic_type *res = hash_lookup (semantic_type_table, &probe);
924 if (!res)
926 /* First insertion in the hash. */
927 res = semantic_type_new (key, loc);
928 hash_xinsert (semantic_type_table, res);
930 return res;
934 /*----------------------------------------------------------------.
935 | Find the symbol named KEY, and return it. If it does not exist |
936 | yet, create it. |
937 `----------------------------------------------------------------*/
939 symbol *
940 symbol_get (const char *key, location loc)
942 return symbol_from_uniqstr (uniqstr_new (key), loc);
946 /*-----------------------------------------------------------------------.
947 | Find the semantic type named KEY, and return it. If it does not exist |
948 | yet, create it. |
949 `-----------------------------------------------------------------------*/
951 semantic_type *
952 semantic_type_get (const char *key, const location *loc)
954 return semantic_type_from_uniqstr (uniqstr_new (key), loc);
958 /*------------------------------------------------------------------.
959 | Generate a dummy nonterminal, whose name cannot conflict with the |
960 | user's names. |
961 `------------------------------------------------------------------*/
963 symbol *
964 dummy_symbol_get (location loc)
966 /* Incremented for each generated symbol. */
967 static int dummy_count = 0;
968 char buf[32];
969 int len = snprintf (buf, sizeof buf, "$@%d", ++dummy_count);
970 assure (len < sizeof buf);
971 symbol *sym = symbol_get (buf, loc);
972 sym->content->class = nterm_sym;
973 return sym;
976 bool
977 symbol_is_dummy (symbol const *sym)
979 return sym->tag[0] == '@' || (sym->tag[0] == '$' && sym->tag[1] == '@');
982 /*-------------------.
983 | Free the symbols. |
984 `-------------------*/
986 void
987 symbols_free (void)
989 hash_free (symbol_table);
990 hash_free (semantic_type_table);
991 free (symbols);
992 free (symbols_sorted);
993 free (semantic_types_sorted);
997 static int
998 symbol_cmp (void const *a, void const *b)
1000 return location_cmp ((*(symbol * const *)a)->location,
1001 (*(symbol * const *)b)->location);
1004 /* Store in *SORTED an array of pointers to the symbols contained in
1005 TABLE, sorted by order of appearance (i.e., by location). */
1007 static void
1008 table_sort (struct hash_table *table, symbol ***sorted)
1010 aver (!*sorted);
1011 size_t count = hash_get_n_entries (table);
1012 *sorted = xnmalloc (count + 1, sizeof **sorted);
1013 hash_get_entries (table, (void**)*sorted, count);
1014 qsort (*sorted, count, sizeof **sorted, symbol_cmp);
1015 (*sorted)[count] = NULL;
1018 /*--------------------------------------------------------------.
1019 | Check that all the symbols are defined. Report any undefined |
1020 | symbols and consider them nonterminals. |
1021 `--------------------------------------------------------------*/
1023 void
1024 symbols_check_defined (void)
1026 table_sort (symbol_table, &symbols_sorted);
1027 /* semantic_type, like symbol, starts with a 'tag' field and then a
1028 'location' field. And here we only deal with arrays/hashes of
1029 pointers, sizeof is not an issue.
1031 So instead of implementing table_sort (and symbol_cmp) once for
1032 each type, let's lie a bit to the typing system, and treat
1033 'semantic_type' as if it were 'symbol'. */
1034 table_sort (semantic_type_table, (symbol ***) &semantic_types_sorted);
1036 for (int i = 0; symbols_sorted[i]; ++i)
1037 symbol_check_defined (symbols_sorted[i]);
1038 for (int i = 0; semantic_types_sorted[i]; ++i)
1039 semantic_type_check_defined (semantic_types_sorted[i]);
1042 /*------------------------------------------------------------------.
1043 | Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
1044 | number. |
1045 `------------------------------------------------------------------*/
1047 static void
1048 symbols_token_translations_init (void)
1050 bool code_256_available_p = true;
1052 /* Find the highest token code, and whether 256, the POSIX preferred
1053 token code for the error token, is used. */
1054 max_code = 0;
1055 for (int i = 0; i < ntokens; ++i)
1057 sym_content *sym = symbols[i]->content;
1058 if (sym->code != CODE_UNDEFINED)
1060 if (sym->code > max_code)
1061 max_code = sym->code;
1062 if (sym->code == 256)
1063 code_256_available_p = false;
1067 /* If 256 is not used, assign it to error, to follow POSIX. */
1068 if (code_256_available_p
1069 && errtoken->content->code == CODE_UNDEFINED)
1070 errtoken->content->code = 256;
1072 /* Set the missing codes. */
1073 if (max_code < 256)
1074 max_code = 256;
1076 for (int i = 0; i < ntokens; ++i)
1078 sym_content *sym = symbols[i]->content;
1079 if (sym->code == CODE_UNDEFINED)
1081 IGNORE_TYPE_LIMITS_BEGIN
1082 if (INT_ADD_WRAPV (max_code, 1, &max_code))
1083 complain (NULL, fatal, _("token number too large"));
1084 IGNORE_TYPE_LIMITS_END
1085 sym->code = max_code;
1087 if (sym->code > max_code)
1088 max_code = sym->code;
1091 token_translations = xnmalloc (max_code + 1,
1092 sizeof *token_translations);
1094 /* Initialize all entries for literal tokens to the internal token
1095 number for $undefined, which represents all invalid inputs. */
1096 for (int i = 0; i < max_code + 1; ++i)
1097 token_translations[i] = undeftoken->content->number;
1098 for (int i = 0; symbols_sorted[i]; ++i)
1099 symbol_translation (symbols_sorted[i]);
1103 /* Whether some symbol requires internationalization. */
1104 static bool
1105 has_translations (void)
1107 for (const void *entry = hash_get_first (symbol_table);
1108 entry;
1109 entry = hash_get_next (symbol_table, entry))
1111 const symbol *sym = (const symbol *) entry;
1112 if (sym->translatable)
1113 return true;
1115 return false;
1118 /*----------------------------------------------------------------.
1119 | Assign symbol numbers, and write definition of token names into |
1120 | FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
1121 `----------------------------------------------------------------*/
1123 void
1124 symbols_pack (void)
1126 symbols = xcalloc (nsyms, sizeof *symbols);
1127 for (int i = 0; symbols_sorted[i]; ++i)
1128 symbol_pack (symbols_sorted[i]);
1130 /* Aliases leave empty slots in symbols, so remove them. */
1132 int nsyms_old = nsyms;
1133 for (int writei = 0, readi = 0; readi < nsyms_old; readi += 1)
1135 if (symbols[readi] == NULL)
1137 nsyms -= 1;
1138 ntokens -= 1;
1140 else
1142 symbols[writei] = symbols[readi];
1143 symbols[writei]->content->number = writei;
1144 writei += 1;
1148 symbols = xnrealloc (symbols, nsyms, sizeof *symbols);
1150 symbols_token_translations_init ();
1152 // If some user tokens are internationalized, the internal ones
1153 // should be too.
1154 if (has_translations ())
1156 const bool eof_is_user_defined
1157 = !eoftoken->alias || STRNEQ (eoftoken->alias->tag, "$end");
1158 if (!eof_is_user_defined)
1159 eoftoken->alias->translatable = true;
1160 undeftoken->alias->translatable = true;
1161 errtoken->alias->translatable = true;
1165 /*---------------------------------.
1166 | Initialize relation graph nodes. |
1167 `---------------------------------*/
1169 static void
1170 init_prec_nodes (void)
1172 prec_nodes = xcalloc (nsyms, sizeof *prec_nodes);
1173 for (int i = 0; i < nsyms; ++i)
1175 prec_nodes[i] = xmalloc (sizeof *prec_nodes[i]);
1176 symgraph *s = prec_nodes[i];
1177 s->id = i;
1178 s->succ = 0;
1179 s->pred = 0;
1183 /*----------------.
1184 | Create a link. |
1185 `----------------*/
1187 static symgraphlink *
1188 symgraphlink_new (graphid id, symgraphlink *next)
1190 symgraphlink *res = xmalloc (sizeof *res);
1191 res->id = id;
1192 res->next = next;
1193 return res;
1197 /*------------------------------------------------------------------.
1198 | Register the second symbol of the precedence relation, and return |
1199 | whether this relation is new. Use only in register_precedence. |
1200 `------------------------------------------------------------------*/
1202 static bool
1203 register_precedence_second_symbol (symgraphlink **first, graphid sym)
1205 if (!*first || sym < (*first)->id)
1206 *first = symgraphlink_new (sym, *first);
1207 else
1209 symgraphlink *slist = *first;
1211 while (slist->next && slist->next->id <= sym)
1212 slist = slist->next;
1214 if (slist->id == sym)
1215 /* Relation already present. */
1216 return false;
1218 slist->next = symgraphlink_new (sym, slist->next);
1220 return true;
1223 /*------------------------------------------------------------------.
1224 | Register a new relation between symbols as used. The first symbol |
1225 | has a greater precedence than the second one. |
1226 `------------------------------------------------------------------*/
1228 void
1229 register_precedence (graphid first, graphid snd)
1231 if (!prec_nodes)
1232 init_prec_nodes ();
1233 register_precedence_second_symbol (&(prec_nodes[first]->succ), snd);
1234 register_precedence_second_symbol (&(prec_nodes[snd]->pred), first);
1238 /*---------------------------------------.
1239 | Deep clear a linked / adjacency list). |
1240 `---------------------------------------*/
1242 static void
1243 linkedlist_free (symgraphlink *node)
1245 if (node)
1247 while (node->next)
1249 symgraphlink *tmp = node->next;
1250 free (node);
1251 node = tmp;
1253 free (node);
1257 /*----------------------------------------------.
1258 | Clear and destroy association tracking table. |
1259 `----------------------------------------------*/
1261 static void
1262 assoc_free (void)
1264 for (int i = 0; i < nsyms; ++i)
1266 linkedlist_free (prec_nodes[i]->pred);
1267 linkedlist_free (prec_nodes[i]->succ);
1268 free (prec_nodes[i]);
1270 free (prec_nodes);
1273 /*---------------------------------------.
1274 | Initialize association tracking table. |
1275 `---------------------------------------*/
1277 static void
1278 init_assoc (void)
1280 used_assoc = xcalloc (nsyms, sizeof *used_assoc);
1281 for (graphid i = 0; i < nsyms; ++i)
1282 used_assoc[i] = false;
1285 /*------------------------------------------------------------------.
1286 | Test if the associativity for the symbols is defined and useless. |
1287 `------------------------------------------------------------------*/
1289 static inline bool
1290 is_assoc_useless (symbol *s)
1292 return s
1293 && s->content->assoc != undef_assoc
1294 && s->content->assoc != precedence_assoc
1295 && !used_assoc[s->content->number];
1298 /*-------------------------------.
1299 | Register a used associativity. |
1300 `-------------------------------*/
1302 void
1303 register_assoc (graphid i, graphid j)
1305 if (!used_assoc)
1306 init_assoc ();
1307 used_assoc[i] = true;
1308 used_assoc[j] = true;
1311 /*--------------------------------------------------.
1312 | Print a warning for unused precedence relations. |
1313 `--------------------------------------------------*/
1315 void
1316 print_precedence_warnings (void)
1318 if (!prec_nodes)
1319 init_prec_nodes ();
1320 if (!used_assoc)
1321 init_assoc ();
1322 for (int i = 0; i < nsyms; ++i)
1324 symbol *s = symbols[i];
1325 if (s
1326 && s->content->prec != 0
1327 && !prec_nodes[i]->pred
1328 && !prec_nodes[i]->succ)
1330 if (is_assoc_useless (s))
1331 complain (&s->content->prec_loc, Wprecedence,
1332 _("useless precedence and associativity for %s"), s->tag);
1333 else if (s->content->assoc == precedence_assoc)
1334 complain (&s->content->prec_loc, Wprecedence,
1335 _("useless precedence for %s"), s->tag);
1337 else if (is_assoc_useless (s))
1338 complain (&s->content->prec_loc, Wprecedence,
1339 _("useless associativity for %s, use %%precedence"), s->tag);
1341 free (used_assoc);
1342 assoc_free ();