1 /* Grammar reduction for Bison.
3 Copyright (C) 1988-1989, 2000-2003, 2005-2015, 2018-2021 Free
4 Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
22 /* Reduce the grammar: Find and eliminate unreachable terminals,
23 nonterminals, and productions. David S. Bakin. */
25 /* Don't eliminate unreachable terminals: They may be used by the
37 #include "print-xml.h"
42 /* Set of nonterminals whose language is not empty. */
45 /* Set of rules that have no useless nonterminals in their RHS. */
48 /* Set of accessible symbols. */
51 /* Set of symbols used to define rule precedence (so they are
52 'useless', but no warning should be issued). */
55 int nuseless_productions
;
56 int nuseless_nonterminals
;
58 #define bitset_swap(Lhs, Rhs) \
65 /*-------------------------------------------------------------------.
66 | Another way to do this would be with a set for each production and |
67 | then do subset tests against N0, but even for the C grammar the |
68 | whole reducing process takes only 2 seconds on my 8Mhz AT. |
69 `-------------------------------------------------------------------*/
72 useful_production (rule_number r
, bitset N0
)
74 /* A production is useful if all of the nonterminals in its appear
75 in the set of useful nonterminals. */
77 for (item_number
*rhsp
= rules
[r
].rhs
; 0 <= *rhsp
; ++rhsp
)
78 if (ISVAR (*rhsp
) && !bitset_test (N0
, *rhsp
- ntokens
))
84 /*-----------------------------------------------------------------.
85 | Compute N, the set of nonterminals whose language is not empty. |
87 | Remember that rules are 1-origin, symbols are 0-origin. |
88 `-----------------------------------------------------------------*/
91 useless_nonterminals (void)
93 /* N is set as built. Np is set being built this iteration. P is
94 set of all productions which have a RHS all in N. */
96 bitset Np
= bitset_create (nnterms
, BITSET_FIXED
);
98 /* The set being computed is a set of nonterminals which can derive
99 the empty string or strings consisting of all terminals. At each
100 iteration a nonterminal is added to the set if there is a
101 production with that nonterminal as its LHS for which all the
102 nonterminals in its RHS are already in the set. Iterate until
103 the set being computed remains unchanged. Any nonterminals not
104 in the set at that point are useless in that they will never be
105 used in deriving a sentence of the language.
107 This iteration doesn't use any special traversal over the
108 productions. A set is kept of all productions for which all the
109 nonterminals in the RHS are in useful. Only productions not in
110 this set are scanned on each iteration. At the end, this set is
111 saved to be used when finding useful productions: only
112 productions in this set will appear in the final grammar. */
117 for (rule_number r
= 0; r
< nrules
; ++r
)
118 if (!bitset_test (P
, r
)
119 && useful_production (r
, N
))
121 bitset_set (Np
, rules
[r
].lhs
->number
- ntokens
);
124 if (bitset_equal_p (N
, Np
))
134 inaccessable_symbols (void)
136 /* Find out which productions are reachable and which symbols are
137 used. Starting with an empty set of productions and a set of
138 symbols which only has the start symbol in it, iterate over all
139 productions until the set of productions remains unchanged for an
140 iteration. For each production which has a LHS in the set of
141 reachable symbols, add the production to the set of reachable
142 productions, and add all of the nonterminals in the RHS of the
143 production to the set of reachable symbols.
145 Consider only the (partially) reduced grammar which has only
146 nonterminals in N and productions in P.
148 The result is the set P of productions in the reduced grammar,
149 and the set V of symbols in the reduced grammar.
151 Although this algorithm also computes the set of terminals which
152 are reachable, no terminal will be deleted from the grammar. Some
153 terminals might not be in the grammar but might be generated by
154 semantic routines, and so the user might want them available with
155 specified numbers. (Is this true?) However, the nonreachable
156 terminals are printed (if running in verbose mode) so that the
159 bitset Vp
= bitset_create (nsyms
, BITSET_FIXED
);
160 bitset Pp
= bitset_create (nrules
, BITSET_FIXED
);
162 /* If the start symbol isn't useful, then nothing will be useful. */
163 if (bitset_test (N
, acceptsymbol
->content
->number
- ntokens
))
165 bitset_set (V
, acceptsymbol
->content
->number
);
170 for (rule_number r
= 0; r
< nrules
; ++r
)
171 if (!bitset_test (Pp
, r
)
172 && bitset_test (P
, r
)
173 && bitset_test (V
, rules
[r
].lhs
->number
))
175 for (item_number
*rhsp
= rules
[r
].rhs
; 0 <= *rhsp
; ++rhsp
)
176 if (ISTOKEN (*rhsp
) || bitset_test (N
, *rhsp
- ntokens
))
177 bitset_set (Vp
, *rhsp
);
180 if (bitset_equal_p (V
, Vp
))
189 /* These tokens (numbered 0, 1, and 2) are internal to Bison.
190 Consider them useful. */
191 bitset_set (V
, eoftoken
->content
->number
); /* end-of-input token */
192 bitset_set (V
, errtoken
->content
->number
); /* error token */
193 bitset_set (V
, undeftoken
->content
->number
); /* some undefined token */
198 int nuseful_productions
= bitset_count (P
);
199 nuseless_productions
= nrules
- nuseful_productions
;
201 int nuseful_nonterminals
= 0;
202 for (symbol_number i
= ntokens
; i
< nsyms
; ++i
)
203 nuseful_nonterminals
+= bitset_test (V
, i
);
204 nuseless_nonterminals
= nnterms
- nuseful_nonterminals
;
206 /* A token that was used in %prec should not be warned about. */
207 for (rule_number r
= 0; r
< nrules
; ++r
)
208 if (rules
[r
].precsym
!= 0)
209 bitset_set (V1
, rules
[r
].precsym
->number
);
213 /*-------------------------------------------------------------------.
214 | Put the useless productions at the end of RULES, and adjust NRULES |
216 `-------------------------------------------------------------------*/
219 reduce_grammar_tables (void)
221 /* Report and flag useless productions. */
223 for (rule_number r
= 0; r
< nrules
; ++r
)
224 rules
[r
].useful
= bitset_test (P
, r
);
225 grammar_rules_useless_report (_("rule useless in grammar"));
228 /* Map the nonterminals to their new index: useful first, useless
229 afterwards. Kept for later report. */
232 int useless
= nrules
- nuseless_productions
;
233 rule
*rules_sorted
= xnmalloc (nrules
, sizeof *rules_sorted
);
234 for (rule_number r
= 0; r
< nrules
; ++r
)
235 rules_sorted
[rules
[r
].useful
? useful
++ : useless
++] = rules
[r
];
237 rules
= rules_sorted
;
239 /* Renumber the rules markers in RITEMS. */
240 for (rule_number r
= 0; r
< nrules
; ++r
)
242 item_number
*rhsp
= rules
[r
].rhs
;
243 for (/* Nothing. */; 0 <= *rhsp
; ++rhsp
)
245 *rhsp
= rule_number_as_item_number (r
);
248 nrules
-= nuseless_productions
;
251 /* Adjust NRITEMS. */
252 for (rule_number r
= nrules
; r
< nrules
+ nuseless_productions
; ++r
)
253 nritems
-= rule_rhs_length (&rules
[r
]) + 1;
257 /*------------------------------.
258 | Remove useless nonterminals. |
259 `------------------------------*/
261 symbol_number
*nterm_map
= NULL
;
264 nonterminals_reduce (void)
266 nterm_map
= xnmalloc (nnterms
, sizeof *nterm_map
);
267 /* Map the nonterminals to their new index: useful first, useless
268 afterwards. Kept for later report. */
270 symbol_number n
= ntokens
;
271 for (symbol_number i
= ntokens
; i
< nsyms
; ++i
)
272 if (bitset_test (V
, i
))
273 nterm_map
[i
- ntokens
] = n
++;
274 for (symbol_number i
= ntokens
; i
< nsyms
; ++i
)
275 if (!bitset_test (V
, i
))
277 nterm_map
[i
- ntokens
] = n
++;
278 if (symbols
[i
]->content
->status
!= used
279 && symbols
[i
] != acceptsymbol
)
280 complain (&symbols
[i
]->location
, Wother
,
281 _("nonterminal useless in grammar: %s"),
286 /* Shuffle elements of tables indexed by symbol number. */
288 symbol
**symbols_sorted
= xnmalloc (nnterms
, sizeof *symbols_sorted
);
289 for (symbol_number i
= ntokens
; i
< nsyms
; ++i
)
290 symbols
[i
]->content
->number
= nterm_map
[i
- ntokens
];
291 for (symbol_number i
= ntokens
; i
< nsyms
; ++i
)
292 symbols_sorted
[nterm_map
[i
- ntokens
] - ntokens
] = symbols
[i
];
293 for (symbol_number i
= ntokens
; i
< nsyms
; ++i
)
294 symbols
[i
] = symbols_sorted
[i
- ntokens
];
295 free (symbols_sorted
);
298 /* Update nonterminal numbers in the RHS of the rules. LHS are
299 pointers to the symbol structure, they don't need renumbering. */
301 for (rule_number r
= 0; r
< nrules
; ++r
)
302 for (item_number
*rhsp
= rules
[r
].rhs
; 0 <= *rhsp
; ++rhsp
)
304 *rhsp
= symbol_number_as_item_number (nterm_map
[*rhsp
- ntokens
]);
305 acceptsymbol
->content
->number
= nterm_map
[acceptsymbol
->content
->number
- ntokens
];
308 nsyms
-= nuseless_nonterminals
;
309 nnterms
-= nuseless_nonterminals
;
313 /*------------------------------------------------------------------.
314 | Output the detailed results of the reductions. For FILE.output. |
315 `------------------------------------------------------------------*/
318 reduce_output (FILE *out
)
320 if (nuseless_nonterminals
)
322 fprintf (out
, "%s\n\n", _("Nonterminals useless in grammar"));
323 for (int i
= 0; i
< nuseless_nonterminals
; ++i
)
324 fprintf (out
, " %s\n", symbols
[nsyms
+ i
]->tag
);
330 for (int i
= 0; i
< ntokens
; ++i
)
331 if (reduce_token_unused_in_grammar (i
))
334 fprintf (out
, "%s\n\n", _("Terminals unused in grammar"));
336 fprintf (out
, " %s\n", symbols
[i
]->tag
);
342 if (nuseless_productions
)
343 grammar_rules_partial_print (out
, _("Rules useless in grammar"),
344 rule_useless_in_grammar_p
);
348 /*-------------------------------.
349 | Report the results to STDERR. |
350 `-------------------------------*/
355 if (nuseless_nonterminals
)
356 complain (NULL
, Wother
, ngettext ("%d nonterminal useless in grammar",
357 "%d nonterminals useless in grammar",
358 nuseless_nonterminals
),
359 nuseless_nonterminals
);
360 if (nuseless_productions
)
361 complain (NULL
, Wother
, ngettext ("%d rule useless in grammar",
362 "%d rules useless in grammar",
363 nuseless_productions
),
364 nuseless_productions
);
368 reduce_grammar (void)
370 /* Allocate the global sets used to compute the reduced grammar */
372 N
= bitset_create (nnterms
, BITSET_FIXED
);
373 P
= bitset_create (nrules
, BITSET_FIXED
);
374 V
= bitset_create (nsyms
, BITSET_FIXED
);
375 V1
= bitset_create (nsyms
, BITSET_FIXED
);
377 useless_nonterminals ();
378 inaccessable_symbols ();
380 /* Did we reduce something? */
381 if (nuseless_nonterminals
|| nuseless_productions
)
385 // Check that start symbols have non-empty languages.
386 bool failure
= false;
387 for (symbol_list
*list
= start_symbols
; list
; list
= list
->next
)
388 if (!bitset_test (N
, list
->content
.sym
->content
->number
- ntokens
))
391 complain (&list
->sym_loc
, complaint
,
392 _("start symbol %s does not derive any sentence"),
393 list
->content
.sym
->tag
);
398 /* First reduce the nonterminals, as they renumber themselves in the
399 whole grammar. If you change the order, nonterms would be
400 renumbered only in the reduced grammar. */
401 if (nuseless_nonterminals
)
402 nonterminals_reduce ();
403 if (nuseless_productions
)
404 reduce_grammar_tables ();
407 if (trace_flag
& trace_grammar
)
409 grammar_dump (stderr
, "Reduced Grammar");
411 fprintf (stderr
, "reduced %s defines %d terminals, %d nonterminals"
412 ", and %d productions.\n",
413 grammar_file
, ntokens
, nnterms
, nrules
);
418 reduce_token_unused_in_grammar (symbol_number i
)
421 return !bitset_test (V
, i
) && !bitset_test (V1
, i
);
425 reduce_nonterminal_useless_in_grammar (const sym_content
*sym
)
427 symbol_number n
= sym
->number
;
428 aver (ntokens
<= n
&& n
< nsyms
+ nuseless_nonterminals
);
432 /*-----------------------------------------------------------.
433 | Free the global sets used to compute the reduced grammar. |
434 `-----------------------------------------------------------*/