gnulib: update
[bison.git] / src / lr0.c
blobbe5085eadb3067b631b5fb4106cc3fc7c60d94ba
1 /* Generate the LR(0) parser states for Bison.
3 Copyright (C) 1984, 1986, 1989, 2000-2002, 2004-2015, 2018-2021 Free
4 Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
22 /* See comments in state.h for the data structures that represent it.
23 The entry point is generate_states. */
25 #include <config.h>
26 #include "system.h"
28 #include <bitset.h>
30 #include "closure.h"
31 #include "complain.h"
32 #include "getargs.h"
33 #include "gram.h"
34 #include "lalr.h"
35 #include "lr0.h"
36 #include "reader.h"
37 #include "reduce.h"
38 #include "state.h"
39 #include "symtab.h"
41 typedef struct state_list
43 struct state_list *next;
44 state *state;
45 } state_list;
47 static state_list *first_state = NULL;
48 static state_list *last_state = NULL;
50 /* Print CORE for debugging. */
51 static void
52 core_print (size_t core_size, item_index *core, FILE *out)
54 for (int i = 0; i < core_size; ++i)
56 item_print (ritem + core[i], NULL, out);
57 fputc ('\n', out);
61 /*-----------------------------------------------------------------.
62 | A state was just discovered by transitioning on SYM from another |
63 | state. Queue this state for later examination, in order to find |
64 | its outgoing transitions. Return it. |
65 `-----------------------------------------------------------------*/
67 static state *
68 state_list_append (symbol_number sym, size_t core_size, item_index *core)
70 state_list *node = xmalloc (sizeof *node);
71 state *res = state_new (sym, core_size, core);
73 if (trace_flag & trace_automaton)
74 fprintf (stderr, "state_list_append (state = %d, symbol = %d (%s))\n",
75 nstates, sym, symbols[sym]->tag);
77 node->next = NULL;
78 node->state = res;
80 if (!first_state)
81 first_state = node;
82 if (last_state)
83 last_state->next = node;
84 last_state = node;
86 return res;
89 /* Symbols that can be "shifted" (including nonterminals) from the
90 current state. */
91 bitset shift_symbol;
93 static rule **redset;
94 /* For the current state, the list of pointers to states that can be
95 reached via a shift/goto. Could be indexed by the reaching symbol,
96 but labels of incoming transitions can be recovered by the state
97 itself. */
98 static state **shiftset;
101 /* KERNEL_BASE[symbol-number] -> list of item indices (offsets inside
102 RITEM) of length KERNEL_SIZE[symbol-number]. */
103 static item_index **kernel_base;
104 static int *kernel_size;
106 /* A single dimension array that serves as storage for
107 KERNEL_BASE. */
108 static item_index *kernel_items;
111 static void
112 allocate_itemsets (void)
114 /* Count the number of occurrences of all the symbols in RITEMS.
115 Note that useless productions (hence useless nonterminals) are
116 browsed too, hence we need to allocate room for _all_ the
117 symbols. */
118 size_t count = 0;
119 size_t *symbol_count = xcalloc (nsyms + nuseless_nonterminals,
120 sizeof *symbol_count);
122 for (rule_number r = 0; r < nrules; ++r)
123 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
125 symbol_number sym = item_number_as_symbol_number (*rhsp);
126 count += 1;
127 symbol_count[sym] += 1;
130 /* See comments before new_itemsets. All the vectors of items
131 live inside KERNEL_ITEMS. The number of active items after
132 some symbol S cannot be more than the number of times that S
133 appears as an item, which is SYMBOL_COUNT[S].
134 We allocate that much space for each symbol. */
136 kernel_base = xnmalloc (nsyms, sizeof *kernel_base);
137 kernel_items = xnmalloc (count, sizeof *kernel_items);
139 count = 0;
140 for (symbol_number i = 0; i < nsyms; i++)
142 kernel_base[i] = kernel_items + count;
143 count += symbol_count[i];
146 free (symbol_count);
147 kernel_size = xnmalloc (nsyms, sizeof *kernel_size);
150 /* Print the current kernel (in KERNEL_BASE). */
151 static void
152 kernel_print (FILE *out)
154 for (symbol_number i = 0; i < nsyms; ++i)
155 if (kernel_size[i])
157 fprintf (out, "kernel[%s] =\n", symbols[i]->tag);
158 core_print (kernel_size[i], kernel_base[i], out);
162 /* Make sure the kernel is in sane state. */
163 static void
164 kernel_check (void)
166 for (symbol_number i = 0; i < nsyms - 1; ++i)
167 assert (kernel_base[i] + kernel_size[i] <= kernel_base[i + 1]);
170 static void
171 allocate_storage (void)
173 allocate_itemsets ();
175 shiftset = xnmalloc (nsyms, sizeof *shiftset);
176 redset = xnmalloc (nrules, sizeof *redset);
177 state_hash_new ();
178 shift_symbol = bitset_create (nsyms, BITSET_FIXED);
182 static void
183 free_storage (void)
185 bitset_free (shift_symbol);
186 free (redset);
187 free (shiftset);
188 free (kernel_base);
189 free (kernel_size);
190 free (kernel_items);
191 state_hash_free ();
197 /*------------------------------------------------------------------.
198 | Find which term/nterm symbols can be "shifted" in S, and for each |
199 | one record which items would be active after that transition. |
200 | Uses the contents of itemset. |
202 | shift_symbol is a bitset of the term/nterm symbols that can be |
203 | shifted. For each symbol in the grammar, kernel_base[symbol] |
204 | points to a vector of item numbers activated if that symbol is |
205 | shifted, and kernel_size[symbol] is their numbers. |
207 | itemset is sorted on item index in ritem, which is sorted on rule |
208 | number. Compute each kernel_base[symbol] with the same sort. |
209 `------------------------------------------------------------------*/
211 static void
212 new_itemsets (state *s)
214 if (trace_flag & trace_automaton)
215 fprintf (stderr, "new_itemsets: begin: state = %d\n", s->number);
217 memset (kernel_size, 0, nsyms * sizeof *kernel_size);
219 bitset_zero (shift_symbol);
221 if (trace_flag & trace_automaton)
223 fprintf (stderr, "initial kernel:\n");
224 kernel_print (stderr);
227 for (size_t i = 0; i < nitemset; ++i)
228 if (item_number_is_symbol_number (ritem[itemset[i]]))
230 if (trace_flag & trace_automaton)
232 fputs ("working on: ", stderr);
233 item_print (ritem + itemset[i], NULL, stderr);
234 fputc ('\n', stderr);
236 symbol_number sym = item_number_as_symbol_number (ritem[itemset[i]]);
237 bitset_set (shift_symbol, sym);
238 kernel_base[sym][kernel_size[sym]] = itemset[i] + 1;
239 kernel_size[sym]++;
242 if (trace_flag & trace_automaton)
244 fprintf (stderr, "final kernel:\n");
245 kernel_print (stderr);
246 fprintf (stderr, "new_itemsets: end: state = %d\n\n", s->number);
248 kernel_check ();
253 /*--------------------------------------------------------------.
254 | Find the state we would get to (from the current state) by |
255 | shifting SYM. Create a new state if no equivalent one exists |
256 | already. Used by append_states. |
257 `--------------------------------------------------------------*/
259 static state *
260 get_state (symbol_number sym, size_t core_size, item_index *core)
262 if (trace_flag & trace_automaton)
264 fprintf (stderr, "Entering get_state, symbol = %d (%s), core:\n",
265 sym, symbols[sym]->tag);
266 core_print (core_size, core, stderr);
267 fputc ('\n', stderr);
270 state *s = state_hash_lookup (core_size, core);
271 if (!s)
272 s = state_list_append (sym, core_size, core);
274 if (trace_flag & trace_automaton)
275 fprintf (stderr, "Exiting get_state => %d\n", s->number);
277 return s;
280 /*---------------------------------------------------------------.
281 | Use the information computed by new_itemsets to find the state |
282 | numbers reached by each shift transition from S. |
284 | SHIFTSET is set up as a vector of those states. |
285 `---------------------------------------------------------------*/
287 static void
288 append_states (state *s)
290 if (trace_flag & trace_automaton)
291 fprintf (stderr, "append_states: begin: state = %d\n", s->number);
293 bitset_iterator iter;
294 symbol_number sym;
295 int i = 0;
296 BITSET_FOR_EACH (iter, shift_symbol, sym, 0)
298 shiftset[i] = get_state (sym, kernel_size[sym], kernel_base[sym]);
299 ++i;
302 if (trace_flag & trace_automaton)
303 fprintf (stderr, "append_states: end: state = %d\n", s->number);
307 /*----------------------------------------------------------------.
308 | Find which rules can be used for reduction transitions from the |
309 | current state and make a reductions structure for the state to |
310 | record their rule numbers. |
311 `----------------------------------------------------------------*/
313 static void
314 save_reductions (state *s)
316 int count = 0;
318 /* Find and count the active items that represent ends of rules. */
319 for (size_t i = 0; i < nitemset; ++i)
321 item_number item = ritem[itemset[i]];
322 if (item_number_is_rule_number (item))
324 rule_number r = item_number_as_rule_number (item);
325 redset[count++] = &rules[r];
326 if (r == 0)
328 /* This is "reduce 0", i.e., accept. */
329 aver (!final_state);
330 final_state = s;
335 if (trace_flag & trace_automaton)
337 fprintf (stderr, "reduction[%d] = {\n", s->number);
338 for (int i = 0; i < count; ++i)
340 rule_print (redset[i], NULL, stderr);
341 fputc ('\n', stderr);
343 fputs ("}\n", stderr);
346 /* Make a reductions structure and copy the data into it. */
347 state_reductions_set (s, count, redset);
351 /*---------------.
352 | Build STATES. |
353 `---------------*/
355 static void
356 set_states (void)
358 states = xcalloc (nstates, sizeof *states);
360 while (first_state)
362 state_list *this = first_state;
364 /* Pessimization, but simplification of the code: make sure all
365 the states have valid transitions and reductions members,
366 even if reduced to 0. It is too soon for errs, which are
367 computed later, but set_conflicts. */
368 state *s = this->state;
369 if (!s->transitions)
370 state_transitions_set (s, 0, 0);
371 if (!s->reductions)
372 state_reductions_set (s, 0, 0);
374 states[s->number] = s;
376 first_state = this->next;
377 free (this);
379 first_state = NULL;
380 last_state = NULL;
384 /*-------------------------------------------------------------------.
385 | Compute the LR(0) parser states (see state.h for details) from the |
386 | grammar. |
387 `-------------------------------------------------------------------*/
389 void
390 generate_states (void)
392 allocate_storage ();
393 closure_new (nritems);
395 /* Create the initial state, whose accessing symbol (by convention)
396 is 0, aka $end. */
398 /* The items of its core: beginning of all the rules of $accept. */
399 kernel_size[0] = 0;
400 for (rule_number r = 0; r < nrules && rules[r].lhs->symbol == acceptsymbol; ++r)
401 kernel_base[0][kernel_size[0]++] = rules[r].rhs - ritem;
402 state_list_append (0, kernel_size[0], kernel_base[0]);
405 /* States are queued when they are created; process them all. */
406 for (state_list *list = first_state; list; list = list->next)
408 state *s = list->state;
409 if (trace_flag & trace_automaton)
410 fprintf (stderr, "Processing state %d (reached by %s)\n",
411 s->number,
412 symbols[s->accessing_symbol]->tag);
413 /* Set up itemset for the transitions out of this state. itemset gets a
414 vector of all the items that could be accepted next. */
415 closure (s->items, s->nitems);
416 /* Record the reductions allowed out of this state. */
417 save_reductions (s);
418 /* Find the itemsets of the states that shifts/gotos can reach. */
419 new_itemsets (s);
420 /* Find or create the core structures for those states. */
421 append_states (s);
423 /* Create the shifts structures for the shifts to those states,
424 now that the state numbers transitioning to are known. */
425 state_transitions_set (s, bitset_count (shift_symbol), shiftset);
428 /* discard various storage */
429 free_storage ();
431 /* Set up STATES. */
432 set_states ();