gnulib: update
[bison.git] / src / reduce.c
blob112915bf993f3eee5c1c3e82d514af3f52b54c7f
1 /* Grammar reduction for Bison.
3 Copyright (C) 1988-1989, 2000-2003, 2005-2015, 2018-2021 Free
4 Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
22 /* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
25 /* Don't eliminate unreachable terminals: They may be used by the
26 user's parser. */
28 #include <config.h>
29 #include "system.h"
31 #include <bitset.h>
33 #include "complain.h"
34 #include "files.h"
35 #include "getargs.h"
36 #include "gram.h"
37 #include "print-xml.h"
38 #include "reader.h"
39 #include "reduce.h"
40 #include "symtab.h"
42 /* Set of nonterminals whose language is not empty. */
43 static bitset N;
45 /* Set of rules that have no useless nonterminals in their RHS. */
46 static bitset P;
48 /* Set of accessible symbols. */
49 static bitset V;
51 /* Set of symbols used to define rule precedence (so they are
52 'useless', but no warning should be issued). */
53 static bitset V1;
55 int nuseless_productions;
56 int nuseless_nonterminals;
58 #define bitset_swap(Lhs, Rhs) \
59 do { \
60 bitset lhs__ = Lhs; \
61 Lhs = Rhs; \
62 Rhs = lhs__; \
63 } while (0)
65 /*-------------------------------------------------------------------.
66 | Another way to do this would be with a set for each production and |
67 | then do subset tests against N0, but even for the C grammar the |
68 | whole reducing process takes only 2 seconds on my 8Mhz AT. |
69 `-------------------------------------------------------------------*/
71 static bool
72 useful_production (rule_number r, bitset N0)
74 /* A production is useful if all of the nonterminals in its appear
75 in the set of useful nonterminals. */
77 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
78 if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
79 return false;
80 return true;
84 /*-----------------------------------------------------------------.
85 | Compute N, the set of nonterminals whose language is not empty. |
86 | |
87 | Remember that rules are 1-origin, symbols are 0-origin. |
88 `-----------------------------------------------------------------*/
90 static void
91 useless_nonterminals (void)
93 /* N is set as built. Np is set being built this iteration. P is
94 set of all productions which have a RHS all in N. */
96 bitset Np = bitset_create (nnterms, BITSET_FIXED);
98 /* The set being computed is a set of nonterminals which can derive
99 the empty string or strings consisting of all terminals. At each
100 iteration a nonterminal is added to the set if there is a
101 production with that nonterminal as its LHS for which all the
102 nonterminals in its RHS are already in the set. Iterate until
103 the set being computed remains unchanged. Any nonterminals not
104 in the set at that point are useless in that they will never be
105 used in deriving a sentence of the language.
107 This iteration doesn't use any special traversal over the
108 productions. A set is kept of all productions for which all the
109 nonterminals in the RHS are in useful. Only productions not in
110 this set are scanned on each iteration. At the end, this set is
111 saved to be used when finding useful productions: only
112 productions in this set will appear in the final grammar. */
114 while (1)
116 bitset_copy (Np, N);
117 for (rule_number r = 0; r < nrules; ++r)
118 if (!bitset_test (P, r)
119 && useful_production (r, N))
121 bitset_set (Np, rules[r].lhs->number - ntokens);
122 bitset_set (P, r);
124 if (bitset_equal_p (N, Np))
125 break;
126 bitset_swap (N, Np);
128 bitset_free (N);
129 N = Np;
133 static void
134 inaccessable_symbols (void)
136 /* Find out which productions are reachable and which symbols are
137 used. Starting with an empty set of productions and a set of
138 symbols which only has the start symbol in it, iterate over all
139 productions until the set of productions remains unchanged for an
140 iteration. For each production which has a LHS in the set of
141 reachable symbols, add the production to the set of reachable
142 productions, and add all of the nonterminals in the RHS of the
143 production to the set of reachable symbols.
145 Consider only the (partially) reduced grammar which has only
146 nonterminals in N and productions in P.
148 The result is the set P of productions in the reduced grammar,
149 and the set V of symbols in the reduced grammar.
151 Although this algorithm also computes the set of terminals which
152 are reachable, no terminal will be deleted from the grammar. Some
153 terminals might not be in the grammar but might be generated by
154 semantic routines, and so the user might want them available with
155 specified numbers. (Is this true?) However, the nonreachable
156 terminals are printed (if running in verbose mode) so that the
157 user can know. */
159 bitset Vp = bitset_create (nsyms, BITSET_FIXED);
160 bitset Pp = bitset_create (nrules, BITSET_FIXED);
162 /* If the start symbol isn't useful, then nothing will be useful. */
163 if (bitset_test (N, acceptsymbol->content->number - ntokens))
165 bitset_set (V, acceptsymbol->content->number);
167 while (1)
169 bitset_copy (Vp, V);
170 for (rule_number r = 0; r < nrules; ++r)
171 if (!bitset_test (Pp, r)
172 && bitset_test (P, r)
173 && bitset_test (V, rules[r].lhs->number))
175 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
176 if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
177 bitset_set (Vp, *rhsp);
178 bitset_set (Pp, r);
180 if (bitset_equal_p (V, Vp))
181 break;
182 bitset_swap (V, Vp);
186 bitset_free (V);
187 V = Vp;
189 /* These tokens (numbered 0, 1, and 2) are internal to Bison.
190 Consider them useful. */
191 bitset_set (V, eoftoken->content->number); /* end-of-input token */
192 bitset_set (V, errtoken->content->number); /* error token */
193 bitset_set (V, undeftoken->content->number); /* some undefined token */
195 bitset_free (P);
196 P = Pp;
198 int nuseful_productions = bitset_count (P);
199 nuseless_productions = nrules - nuseful_productions;
201 int nuseful_nonterminals = 0;
202 for (symbol_number i = ntokens; i < nsyms; ++i)
203 nuseful_nonterminals += bitset_test (V, i);
204 nuseless_nonterminals = nnterms - nuseful_nonterminals;
206 /* A token that was used in %prec should not be warned about. */
207 for (rule_number r = 0; r < nrules; ++r)
208 if (rules[r].precsym != 0)
209 bitset_set (V1, rules[r].precsym->number);
213 /*-------------------------------------------------------------------.
214 | Put the useless productions at the end of RULES, and adjust NRULES |
215 | accordingly. |
216 `-------------------------------------------------------------------*/
218 static void
219 reduce_grammar_tables (void)
221 /* Report and flag useless productions. */
223 for (rule_number r = 0; r < nrules; ++r)
224 rules[r].useful = bitset_test (P, r);
225 grammar_rules_useless_report (_("rule useless in grammar"));
228 /* Map the nonterminals to their new index: useful first, useless
229 afterwards. Kept for later report. */
231 int useful = 0;
232 int useless = nrules - nuseless_productions;
233 rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
234 for (rule_number r = 0; r < nrules; ++r)
235 rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
236 free (rules);
237 rules = rules_sorted;
239 /* Renumber the rules markers in RITEMS. */
240 for (rule_number r = 0; r < nrules; ++r)
242 item_number *rhsp = rules[r].rhs;
243 for (/* Nothing. */; 0 <= *rhsp; ++rhsp)
244 continue;
245 *rhsp = rule_number_as_item_number (r);
246 rules[r].number = r;
248 nrules -= nuseless_productions;
251 /* Adjust NRITEMS. */
252 for (rule_number r = nrules; r < nrules + nuseless_productions; ++r)
253 nritems -= rule_rhs_length (&rules[r]) + 1;
257 /*------------------------------.
258 | Remove useless nonterminals. |
259 `------------------------------*/
261 symbol_number *nterm_map = NULL;
263 static void
264 nonterminals_reduce (void)
266 nterm_map = xnmalloc (nnterms, sizeof *nterm_map);
267 /* Map the nonterminals to their new index: useful first, useless
268 afterwards. Kept for later report. */
270 symbol_number n = ntokens;
271 for (symbol_number i = ntokens; i < nsyms; ++i)
272 if (bitset_test (V, i))
273 nterm_map[i - ntokens] = n++;
274 for (symbol_number i = ntokens; i < nsyms; ++i)
275 if (!bitset_test (V, i))
277 nterm_map[i - ntokens] = n++;
278 if (symbols[i]->content->status != used
279 && symbols[i] != acceptsymbol)
280 complain (&symbols[i]->location, Wother,
281 _("nonterminal useless in grammar: %s"),
282 symbols[i]->tag);
286 /* Shuffle elements of tables indexed by symbol number. */
288 symbol **symbols_sorted = xnmalloc (nnterms, sizeof *symbols_sorted);
289 for (symbol_number i = ntokens; i < nsyms; ++i)
290 symbols[i]->content->number = nterm_map[i - ntokens];
291 for (symbol_number i = ntokens; i < nsyms; ++i)
292 symbols_sorted[nterm_map[i - ntokens] - ntokens] = symbols[i];
293 for (symbol_number i = ntokens; i < nsyms; ++i)
294 symbols[i] = symbols_sorted[i - ntokens];
295 free (symbols_sorted);
298 /* Update nonterminal numbers in the RHS of the rules. LHS are
299 pointers to the symbol structure, they don't need renumbering. */
301 for (rule_number r = 0; r < nrules; ++r)
302 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
303 if (ISVAR (*rhsp))
304 *rhsp = symbol_number_as_item_number (nterm_map[*rhsp - ntokens]);
305 acceptsymbol->content->number = nterm_map[acceptsymbol->content->number - ntokens];
308 nsyms -= nuseless_nonterminals;
309 nnterms -= nuseless_nonterminals;
313 /*------------------------------------------------------------------.
314 | Output the detailed results of the reductions. For FILE.output. |
315 `------------------------------------------------------------------*/
317 void
318 reduce_output (FILE *out)
320 if (nuseless_nonterminals)
322 fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
323 for (int i = 0; i < nuseless_nonterminals; ++i)
324 fprintf (out, " %s\n", symbols[nsyms + i]->tag);
325 fputs ("\n\n", out);
329 bool b = false;
330 for (int i = 0; i < ntokens; ++i)
331 if (reduce_token_unused_in_grammar (i))
333 if (!b)
334 fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
335 b = true;
336 fprintf (out, " %s\n", symbols[i]->tag);
338 if (b)
339 fputs ("\n\n", out);
342 if (nuseless_productions)
343 grammar_rules_partial_print (out, _("Rules useless in grammar"),
344 rule_useless_in_grammar_p);
348 /*-------------------------------.
349 | Report the results to STDERR. |
350 `-------------------------------*/
352 static void
353 reduce_print (void)
355 if (nuseless_nonterminals)
356 complain (NULL, Wother, ngettext ("%d nonterminal useless in grammar",
357 "%d nonterminals useless in grammar",
358 nuseless_nonterminals),
359 nuseless_nonterminals);
360 if (nuseless_productions)
361 complain (NULL, Wother, ngettext ("%d rule useless in grammar",
362 "%d rules useless in grammar",
363 nuseless_productions),
364 nuseless_productions);
367 void
368 reduce_grammar (void)
370 /* Allocate the global sets used to compute the reduced grammar */
372 N = bitset_create (nnterms, BITSET_FIXED);
373 P = bitset_create (nrules, BITSET_FIXED);
374 V = bitset_create (nsyms, BITSET_FIXED);
375 V1 = bitset_create (nsyms, BITSET_FIXED);
377 useless_nonterminals ();
378 inaccessable_symbols ();
380 /* Did we reduce something? */
381 if (nuseless_nonterminals || nuseless_productions)
383 reduce_print ();
385 // Check that start symbols have non-empty languages.
386 bool failure = false;
387 for (symbol_list *list = start_symbols; list; list = list->next)
388 if (!bitset_test (N, list->content.sym->content->number - ntokens))
390 failure = true;
391 complain (&list->sym_loc, complaint,
392 _("start symbol %s does not derive any sentence"),
393 list->content.sym->tag);
395 if (failure)
396 exit (EXIT_FAILURE);
398 /* First reduce the nonterminals, as they renumber themselves in the
399 whole grammar. If you change the order, nonterms would be
400 renumbered only in the reduced grammar. */
401 if (nuseless_nonterminals)
402 nonterminals_reduce ();
403 if (nuseless_productions)
404 reduce_grammar_tables ();
407 if (trace_flag & trace_grammar)
409 grammar_dump (stderr, "Reduced Grammar");
411 fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals"
412 ", and %d productions.\n",
413 grammar_file, ntokens, nnterms, nrules);
417 bool
418 reduce_token_unused_in_grammar (symbol_number i)
420 aver (i < ntokens);
421 return !bitset_test (V, i) && !bitset_test (V1, i);
424 bool
425 reduce_nonterminal_useless_in_grammar (const sym_content *sym)
427 symbol_number n = sym->number;
428 aver (ntokens <= n && n < nsyms + nuseless_nonterminals);
429 return nsyms <= n;
432 /*-----------------------------------------------------------.
433 | Free the global sets used to compute the reduced grammar. |
434 `-----------------------------------------------------------*/
436 void
437 reduce_free (void)
439 bitset_free (N);
440 bitset_free (V);
441 bitset_free (V1);
442 bitset_free (P);
443 free (nterm_map);
444 nterm_map = NULL;