gnulib: update
[bison.git] / src / lalr.c
blob7dda0a2bda946f1b63de24baf844b3b2e6eaa2a7
1 /* Compute lookahead criteria for Bison.
3 Copyright (C) 1984, 1986, 1989, 2000-2015, 2018-2021 Free Software
4 Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <https://www.gnu.org/licenses/>. */
22 /* Find which rules need lookahead in each state, and which lookahead
23 tokens they accept. */
25 #include <config.h>
26 #include "system.h"
28 #include <bitset.h>
29 #include <bitsetv.h>
31 #include "complain.h"
32 #include "derives.h"
33 #include "getargs.h"
34 #include "gram.h"
35 #include "lalr.h"
36 #include "lr0.h"
37 #include "muscle-tab.h"
38 #include "nullable.h"
39 #include "reader.h"
40 #include "relation.h"
41 #include "symtab.h"
43 goto_number *goto_map = NULL;
44 goto_number ngotos = 0;
45 state_number *from_state = NULL;
46 state_number *to_state = NULL;
47 bitsetv goto_follows = NULL;
49 /* Linked list of goto numbers. */
50 typedef struct goto_list
52 struct goto_list *next;
53 goto_number value;
54 } goto_list;
56 static goto_list *
57 goto_list_new (goto_number value, struct goto_list *next)
59 goto_list *res = xmalloc (sizeof *res);
60 res->next = next;
61 res->value = value;
62 return res;
65 /* LA is an nLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule
66 LArule[l] is applicable in the appropriate state when the next
67 token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j,
68 it is a conflict. */
70 static bitsetv LA = NULL;
71 size_t nLA;
74 /* "(p, A) includes (p', B)" iff
75 B → βAγ, γ nullable, and p'-- β --> p (i.e., state p' reaches p on label β).
77 Definition p.621 [DeRemer 1982].
79 INCLUDES[(p, A)] = [(p', B),...] */
80 static goto_number **includes;
82 /* "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
84 Definition p.621 [DeRemer 1982]. */
85 static goto_list **lookback;
87 static void
88 goto_print (goto_number i, FILE *out)
90 const state_number src = from_state[i];
91 const state_number dst = to_state[i];
92 symbol_number var = states[dst]->accessing_symbol;
93 fprintf (out,
94 "goto[%zu] = (%d, %s, %d)", i, src, symbols[var]->tag, dst);
97 void
98 set_goto_map (void)
100 /* Count the number of gotos (ngotos) per nterm (goto_map). */
101 if (trace_flag & trace_automaton)
102 fprintf (stderr, "nnterms: %d\n", nnterms);
103 goto_map = xcalloc (nnterms + 1, sizeof *goto_map);
104 ngotos = 0;
105 for (state_number s = 0; s < nstates; ++s)
107 transitions *trans = states[s]->transitions;
108 for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
110 ngotos++;
111 /* Abort if (ngotos + 1) would overflow. */
112 aver (ngotos != GOTO_NUMBER_MAXIMUM);
113 goto_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
117 goto_number *temp_map = xnmalloc (nnterms + 1, sizeof *temp_map);
119 goto_number k = 0;
120 for (symbol_number i = ntokens; i < nsyms; ++i)
122 temp_map[i - ntokens] = k;
123 k += goto_map[i - ntokens];
126 for (symbol_number i = ntokens; i < nsyms; ++i)
127 goto_map[i - ntokens] = temp_map[i - ntokens];
129 goto_map[nsyms - ntokens] = ngotos;
130 temp_map[nsyms - ntokens] = ngotos;
133 from_state = xcalloc (ngotos, sizeof *from_state);
134 to_state = xcalloc (ngotos, sizeof *to_state);
136 for (state_number s = 0; s < nstates; ++s)
138 const transitions *trans = states[s]->transitions;
139 for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
141 goto_number k = temp_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
142 from_state[k] = s;
143 to_state[k] = trans->states[i]->number;
147 free (temp_map);
149 if (trace_flag & trace_automaton)
151 for (int i = 0; i < nnterms; ++i)
152 fprintf (stderr, "goto_map[%d (%s)] = %ld .. %ld\n",
153 i, symbols[ntokens + i]->tag,
154 goto_map[i], goto_map[i+1] - 1);
155 for (int i = 0; i < ngotos; ++i)
157 goto_print (i, stderr);
158 fputc ('\n', stderr);
164 goto_number
165 map_goto (state_number src, symbol_number sym)
167 goto_number low = goto_map[sym - ntokens];
168 assert (goto_map[sym - ntokens] != goto_map[sym - ntokens + 1]);
169 goto_number high = goto_map[sym - ntokens + 1] - 1;
171 for (;;)
173 aver (low <= high);
174 goto_number middle = (low + high) / 2;
175 state_number s = from_state[middle];
176 if (s == src)
177 return middle;
178 else if (s < src)
179 low = middle + 1;
180 else
181 high = middle - 1;
185 /* Print FOLLOWS for debugging. */
186 static void
187 follows_print (const char* title, FILE *out)
189 fprintf (out, "%s:\n", title);
190 for (goto_number i = 0; i < ngotos; ++i)
192 fputs (" FOLLOWS[", out);
193 goto_print (i, out);
194 fputs ("] =", out);
195 bitset_iterator iter;
196 symbol_number sym;
197 BITSET_FOR_EACH (iter, goto_follows[i], sym, 0)
198 fprintf (out, " %s", symbols[sym]->tag);
199 fputc ('\n', out);
201 fputc ('\n', out);
204 /* Build goto_follows. */
205 static void
206 initialize_goto_follows (void)
208 goto_number **reads = xnmalloc (ngotos, sizeof *reads);
209 goto_number *edge = xnmalloc (ngotos, sizeof *edge);
211 goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
213 for (goto_number i = 0; i < ngotos; ++i)
215 state_number dst = to_state[i];
216 const transitions *trans = states[dst]->transitions;
218 int j;
219 FOR_EACH_SHIFT (trans, j)
220 bitset_set (goto_follows[i], TRANSITION_SYMBOL (trans, j));
222 /* Gotos outgoing from DST. */
223 goto_number nedges = 0;
224 for (; j < trans->num; ++j)
226 symbol_number sym = TRANSITION_SYMBOL (trans, j);
227 if (nullable[sym - ntokens])
229 assert (nedges < ngotos);
230 edge[nedges++] = map_goto (dst, sym);
234 if (nedges == 0)
235 reads[i] = NULL;
236 else
238 reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
239 memcpy (reads[i], edge, nedges * sizeof edge[0]);
240 reads[i][nedges] = END_NODE;
243 if (trace_flag & trace_automaton)
245 follows_print ("follows after shifts", stderr);
246 relation_print ("reads", reads, ngotos, goto_print, stderr);
249 relation_digraph (reads, ngotos, goto_follows);
250 if (trace_flag & trace_automaton)
251 follows_print ("follows after read", stderr);
253 for (goto_number i = 0; i < ngotos; ++i)
254 free (reads[i]);
255 free (reads);
256 free (edge);
260 /* Find the state which LOOKBACK[LOOKBACK_INDEX] is about. */
261 static const state *
262 lookback_find_state (int lookback_index)
264 state *res = NULL;
265 for (int j = 0; j < nstates; ++j)
266 if (states[j]->reductions
267 && states[j]->reductions->lookaheads)
269 if (states[j]->reductions->lookaheads - LA > lookback_index)
270 /* Went too far. */
271 break;
272 else
273 res = states[j];
275 /* Pacify "potential null pointer dereference" warning. */
276 if (!res)
277 abort ();
278 return res;
281 /* Print LOOKBACK for debugging. */
282 static void
283 lookback_print (FILE *out)
285 fputs ("lookback:\n", out);
286 for (int i = 0; i < nLA; ++i)
287 if (lookback[i])
289 fprintf (out, " %3d = ", i);
290 const state *s = lookback_find_state (i);
291 int rnum = i - (s->reductions->lookaheads - LA);
292 const rule *r = s->reductions->rules[rnum];
293 fprintf (out, "(%3d, ", s->number);
294 rule_print (r, NULL, out);
295 fputs (") ->", out);
296 for (goto_list *sp = lookback[i]; sp; sp = sp->next)
298 fputc (' ', out);
299 goto_print (sp->value, out);
301 fputc ('\n', out);
303 fputc ('\n', out);
306 /* Add (S, R) -> GOTONO to LOOKBACK.
308 "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
310 The goto number GOTONO, whose source is S (which is
311 inconsistent), */
312 static void
313 add_lookback_edge (state *s, rule const *r, goto_number gotono)
315 int ri = state_reduction_find (s, r);
316 int idx = (s->reductions->lookaheads - LA) + ri;
317 lookback[idx] = goto_list_new (gotono, lookback[idx]);
321 /* Compute INCLUDES and LOOKBACK. Corresponds to step E in Sec. 6 of
322 [DeRemer 1982]. */
323 static void
324 build_relations (void)
326 goto_number *edge = xnmalloc (ngotos, sizeof *edge);
327 state_number *path = xnmalloc (ritem_longest_rhs () + 1, sizeof *path);
329 includes = xnmalloc (ngotos, sizeof *includes);
331 /* For each goto (from SRC to DST labeled by nterm VAR), iterate
332 over each rule with VAR as LHS, and find the path PATH from SRC
333 labeled with the RHS of the rule. */
334 for (goto_number i = 0; i < ngotos; ++i)
336 const state_number src = from_state[i];
337 const state_number dst = to_state[i];
338 symbol_number var = states[dst]->accessing_symbol;
340 /* Size of EDGE. */
341 int nedges = 0;
342 for (rule **rulep = derives[var - ntokens]; *rulep; ++rulep)
344 rule const *r = *rulep;
345 state *s = states[src];
346 path[0] = s->number;
348 /* Length of PATH. */
349 int length = 1;
350 for (item_number const *rp = r->rhs; 0 <= *rp; rp++)
352 symbol_number sym = item_number_as_symbol_number (*rp);
353 s = transitions_to (s, sym);
354 path[length++] = s->number;
357 /* S is the end of PATH. */
358 if (!s->consistent)
359 add_lookback_edge (s, r, i);
361 /* Walk back PATH from penultimate to beginning.
363 The "0 <= p" part is actually useless: each rhs ends in a
364 rule number (for which ISVAR(...) is false), and there is
365 a sentinel (ritem[-1]=0) before the first rhs. */
366 for (int p = length - 2; 0 <= p && ISVAR (r->rhs[p]); --p)
368 symbol_number sym = item_number_as_symbol_number (r->rhs[p]);
369 goto_number g = map_goto (path[p], sym);
370 /* Insert G if not already in EDGE.
371 FIXME: linear search. A bitset instead? */
373 bool found = false;
374 for (int j = 0; !found && j < nedges; ++j)
375 found = edge[j] == g;
376 if (!found)
378 assert (nedges < ngotos);
379 edge[nedges++] = g;
382 if (!nullable[sym - ntokens])
383 break;
387 if (trace_flag & trace_automaton)
389 goto_print (i, stderr);
390 fputs (" edges = ", stderr);
391 for (int j = 0; j < nedges; ++j)
393 fputc (' ', stderr);
394 goto_print (edge[j], stderr);
396 fputc ('\n', stderr);
399 if (nedges == 0)
400 includes[i] = NULL;
401 else
403 includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
404 for (int j = 0; j < nedges; ++j)
405 includes[i][j] = edge[j];
406 includes[i][nedges] = END_NODE;
410 free (edge);
411 free (path);
413 relation_transpose (&includes, ngotos);
414 if (trace_flag & trace_automaton)
415 relation_print ("includes", includes, ngotos, goto_print, stderr);
418 /* Compute FOLLOWS from INCLUDES, and free INCLUDES. */
419 static void
420 compute_follows (void)
422 relation_digraph (includes, ngotos, goto_follows);
423 if (trace_flag & trace_sets)
424 follows_print ("follows after includes", stderr);
425 for (goto_number i = 0; i < ngotos; ++i)
426 free (includes[i]);
427 free (includes);
431 static void
432 compute_lookaheads (void)
434 if (trace_flag & trace_automaton)
435 lookback_print (stderr);
437 for (size_t i = 0; i < nLA; ++i)
438 for (goto_list *sp = lookback[i]; sp; sp = sp->next)
439 bitset_or (LA[i], LA[i], goto_follows[sp->value]);
441 /* Free LOOKBACK. */
442 for (size_t i = 0; i < nLA; ++i)
443 LIST_FREE (goto_list, lookback[i]);
444 free (lookback);
448 /*------------------------------------------------------.
449 | Count the number of lookahead tokens required for S. |
450 `------------------------------------------------------*/
452 static int
453 state_lookaheads_count (state *s, bool default_reduction_only_for_accept)
455 const reductions *reds = s->reductions;
456 const transitions *trans = s->transitions;
458 /* Transitions are only disabled during conflict resolution, and that
459 hasn't happened yet, so there should be no need to check that
460 transition 0 hasn't been disabled before checking if it is a shift.
461 However, this check was performed at one time, so we leave it as an
462 aver. */
463 aver (trans->num == 0 || !TRANSITION_IS_DISABLED (trans, 0));
465 /* We need a lookahead either to distinguish different reductions
466 (i.e., there are two or more), or to distinguish a reduction from a
467 shift. Otherwise, it is straightforward, and the state is
468 'consistent'. However, do not treat a state with any reductions as
469 consistent unless it is the accepting state (because there is never
470 a lookahead token that makes sense there, and so no lookahead token
471 should be read) if the user has otherwise disabled default
472 reductions. */
473 s->consistent =
474 !(reds->num > 1
475 || (reds->num == 1 && trans->num && TRANSITION_IS_SHIFT (trans, 0))
476 || (reds->num == 1 && !rule_is_initial (reds->rules[0])
477 && default_reduction_only_for_accept));
479 return s->consistent ? 0 : reds->num;
483 /*----------------------------------------------.
484 | Compute LA, NLA, and the lookaheads members. |
485 `----------------------------------------------*/
487 void
488 initialize_LA (void)
490 bool default_reduction_only_for_accept;
492 char *default_reductions =
493 muscle_percent_define_get ("lr.default-reduction");
494 default_reduction_only_for_accept = STREQ (default_reductions, "accepting");
495 free (default_reductions);
498 /* Compute the total number of reductions requiring a lookahead. */
499 nLA = 0;
500 for (state_number i = 0; i < nstates; ++i)
501 nLA += state_lookaheads_count (states[i],
502 default_reduction_only_for_accept);
503 /* Avoid having to special case 0. */
504 if (!nLA)
505 nLA = 1;
507 bitsetv pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
509 /* Initialize the members LOOKAHEADS for each state whose reductions
510 require lookahead tokens. */
511 for (state_number i = 0; i < nstates; ++i)
513 int count = state_lookaheads_count (states[i],
514 default_reduction_only_for_accept);
515 if (count)
517 states[i]->reductions->lookaheads = pLA;
518 pLA += count;
524 /*---------------------------------------------.
525 | Output the lookahead tokens for each state. |
526 `---------------------------------------------*/
528 static void
529 lookaheads_print (FILE *out)
531 fputs ("Lookaheads:\n", out);
532 for (state_number i = 0; i < nstates; ++i)
534 const reductions *reds = states[i]->reductions;
535 if (reds->num)
537 fprintf (out, " State %d:\n", i);
538 for (int j = 0; j < reds->num; ++j)
540 fprintf (out, " rule %d:", reds->rules[j]->number);
541 if (reds->lookaheads)
543 bitset_iterator iter;
544 int k;
545 BITSET_FOR_EACH (iter, reds->lookaheads[j], k, 0)
546 fprintf (out, " %s", symbols[k]->tag);
548 fputc ('\n', out);
552 fputc ('\n', out);
555 void
556 lalr (void)
558 if (trace_flag & trace_automaton)
560 fputc ('\n', stderr);
561 begin_use_class ("trace0", stderr);
562 fprintf (stderr, "lalr: begin");
563 end_use_class ("trace0", stderr);
564 fputc ('\n', stderr);
566 initialize_LA ();
567 set_goto_map ();
568 initialize_goto_follows ();
569 lookback = xcalloc (nLA, sizeof *lookback);
570 build_relations ();
571 compute_follows ();
572 compute_lookaheads ();
574 if (trace_flag & trace_sets)
575 lookaheads_print (stderr);
576 if (trace_flag & trace_automaton)
578 begin_use_class ("trace0", stderr);
579 fprintf (stderr, "lalr: done");
580 end_use_class ("trace0", stderr);
581 fputc ('\n', stderr);
586 void
587 lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
589 goto_number ngotos_reachable = 0;
590 symbol_number nonterminal = 0;
591 aver (nsyms == nnterms + ntokens);
593 for (goto_number i = 0; i < ngotos; ++i)
595 while (i == goto_map[nonterminal])
596 goto_map[nonterminal++] = ngotos_reachable;
597 /* If old_to_new[from_state[i]] = nstates_old, remove this goto
598 entry. */
599 if (old_to_new[from_state[i]] != nstates_old)
601 /* from_state[i] is not removed, so it and thus to_state[i] are
602 reachable, so to_state[i] != nstates_old. */
603 aver (old_to_new[to_state[i]] != nstates_old);
604 from_state[ngotos_reachable] = old_to_new[from_state[i]];
605 to_state[ngotos_reachable] = old_to_new[to_state[i]];
606 ++ngotos_reachable;
609 while (nonterminal <= nnterms)
611 aver (ngotos == goto_map[nonterminal]);
612 goto_map[nonterminal++] = ngotos_reachable;
614 ngotos = ngotos_reachable;
618 void
619 lalr_free (void)
621 for (state_number s = 0; s < nstates; ++s)
622 states[s]->reductions->lookaheads = NULL;
623 bitsetv_free (LA);