doc: refer to cex from sections dealing with conflicts
[bison.git] / src / lalr.c
blob0f5d7147a31b7514014a12a702a2408128c6e7a3
1 /* Compute lookahead criteria for Bison.
3 Copyright (C) 1984, 1986, 1989, 2000-2015, 2018-2020 Free Software
4 Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 /* Find which rules need lookahead in each state, and which lookahead
23 tokens they accept. */
25 #include <config.h>
26 #include "system.h"
28 #include <bitset.h>
29 #include <bitsetv.h>
31 #include "complain.h"
32 #include "derives.h"
33 #include "getargs.h"
34 #include "gram.h"
35 #include "lalr.h"
36 #include "lr0.h"
37 #include "muscle-tab.h"
38 #include "nullable.h"
39 #include "reader.h"
40 #include "relation.h"
41 #include "symtab.h"
43 /* goto_map[nterm - NTOKENS] -> number of gotos. */
44 goto_number *goto_map = NULL;
45 goto_number ngotos = 0;
46 state_number *from_state = NULL;
47 state_number *to_state = NULL;
48 bitsetv goto_follows = NULL;
50 /* Linked list of goto numbers. */
51 typedef struct goto_list
53 struct goto_list *next;
54 goto_number value;
55 } goto_list;
57 static goto_list *
58 goto_list_new (goto_number value, struct goto_list *next)
60 goto_list *res = xmalloc (sizeof *res);
61 res->next = next;
62 res->value = value;
63 return res;
66 /* LA is an nLA by NTOKENS matrix of bits. LA[l, i] is 1 if the rule
67 LArule[l] is applicable in the appropriate state when the next
68 token is symbol i. If LA[l, i] and LA[l, j] are both 1 for i != j,
69 it is a conflict. */
71 static bitsetv LA = NULL;
72 size_t nLA;
75 /* "(p, A) includes (p', B)" iff
76 B → βAγ, γ nullable, and p'-- β --> p (i.e., state p' reaches p on label β).
78 Definition p.621 [DeRemer 1982].
80 INCLUDES[(p, A)] = [(p', B),...] */
81 static goto_number **includes;
83 /* "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
85 Definition p.621 [DeRemer 1982]. */
86 static goto_list **lookback;
88 static void
89 goto_print (goto_number i, FILE *out)
91 const state_number src = from_state[i];
92 const state_number dst = to_state[i];
93 symbol_number var = states[dst]->accessing_symbol;
94 fprintf (out,
95 "goto[%zu] = (%d, %s, %d)", i, src, symbols[var]->tag, dst);
98 void
99 set_goto_map (void)
101 /* Count the number of gotos (ngotos) per nterm (goto_map). */
102 goto_map = xcalloc (nnterms + 1, sizeof *goto_map);
103 ngotos = 0;
104 for (state_number s = 0; s < nstates; ++s)
106 transitions *trans = states[s]->transitions;
107 for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
109 ngotos++;
110 /* Abort if (ngotos + 1) would overflow. */
111 aver (ngotos != GOTO_NUMBER_MAXIMUM);
112 goto_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
116 goto_number *temp_map = xnmalloc (nnterms + 1, sizeof *temp_map);
118 goto_number k = 0;
119 for (symbol_number i = ntokens; i < nsyms; ++i)
121 temp_map[i - ntokens] = k;
122 k += goto_map[i - ntokens];
125 for (symbol_number i = ntokens; i < nsyms; ++i)
126 goto_map[i - ntokens] = temp_map[i - ntokens];
128 goto_map[nsyms - ntokens] = ngotos;
129 temp_map[nsyms - ntokens] = ngotos;
132 from_state = xcalloc (ngotos, sizeof *from_state);
133 to_state = xcalloc (ngotos, sizeof *to_state);
135 for (state_number s = 0; s < nstates; ++s)
137 const transitions *trans = states[s]->transitions;
138 for (int i = trans->num - 1; 0 <= i && TRANSITION_IS_GOTO (trans, i); --i)
140 goto_number k = temp_map[TRANSITION_SYMBOL (trans, i) - ntokens]++;
141 from_state[k] = s;
142 to_state[k] = trans->states[i]->number;
146 free (temp_map);
148 if (trace_flag & trace_automaton)
149 for (int i = 0; i < ngotos; ++i)
151 goto_print (i, stderr);
152 fputc ('\n', stderr);
157 goto_number
158 map_goto (state_number src, symbol_number sym)
160 goto_number low = goto_map[sym - ntokens];
161 goto_number high = goto_map[sym - ntokens + 1] - 1;
163 for (;;)
165 aver (low <= high);
166 goto_number middle = (low + high) / 2;
167 state_number s = from_state[middle];
168 if (s == src)
169 return middle;
170 else if (s < src)
171 low = middle + 1;
172 else
173 high = middle - 1;
177 /* Print FOLLOWS for debugging. */
178 static void
179 follows_print (const char* title, FILE *out)
181 fprintf (out, "%s:\n", title);
182 for (goto_number i = 0; i < ngotos; ++i)
184 fputs (" FOLLOWS[", out);
185 goto_print (i, out);
186 fputs ("] =", out);
187 bitset_iterator iter;
188 symbol_number sym;
189 BITSET_FOR_EACH (iter, goto_follows[i], sym, 0)
190 fprintf (out, " %s", symbols[sym]->tag);
191 fputc ('\n', out);
193 fputc ('\n', out);
196 /* Build goto_follows. */
197 static void
198 initialize_goto_follows (void)
200 goto_number **reads = xnmalloc (ngotos, sizeof *reads);
201 goto_number *edge = xnmalloc (ngotos, sizeof *edge);
203 goto_follows = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
205 for (goto_number i = 0; i < ngotos; ++i)
207 state_number dst = to_state[i];
208 const transitions *trans = states[dst]->transitions;
210 int j;
211 FOR_EACH_SHIFT (trans, j)
212 bitset_set (goto_follows[i], TRANSITION_SYMBOL (trans, j));
214 /* Gotos outgoing from DST. */
215 goto_number nedges = 0;
216 for (; j < trans->num; ++j)
218 symbol_number sym = TRANSITION_SYMBOL (trans, j);
219 if (nullable[sym - ntokens])
221 assert (nedges < ngotos);
222 edge[nedges++] = map_goto (dst, sym);
226 if (nedges == 0)
227 reads[i] = NULL;
228 else
230 reads[i] = xnmalloc (nedges + 1, sizeof reads[i][0]);
231 memcpy (reads[i], edge, nedges * sizeof edge[0]);
232 reads[i][nedges] = END_NODE;
235 if (trace_flag & trace_automaton)
237 follows_print ("follows after shifts", stderr);
238 relation_print ("reads", reads, ngotos, goto_print, stderr);
241 relation_digraph (reads, ngotos, goto_follows);
242 if (trace_flag & trace_automaton)
243 follows_print ("follows after read", stderr);
245 for (goto_number i = 0; i < ngotos; ++i)
246 free (reads[i]);
247 free (reads);
248 free (edge);
252 /* Find the state which LOOKBACK[LOOKBACK_INDEX] is about. */
253 static const state *
254 lookback_find_state (int lookback_index)
256 state *res = NULL;
257 for (int j = 0; j < nstates; ++j)
258 if (states[j]->reductions
259 && states[j]->reductions->lookaheads)
261 if (states[j]->reductions->lookaheads - LA > lookback_index)
262 /* Went too far. */
263 break;
264 else
265 res = states[j];
267 /* Pacify "potential null pointer dereference" warning. */
268 if (!res)
269 abort ();
270 return res;
273 /* Print LOOKBACK for debugging. */
274 static void
275 lookback_print (FILE *out)
277 fputs ("lookback:\n", out);
278 for (int i = 0; i < nLA; ++i)
279 if (lookback[i])
281 fprintf (out, " %3d = ", i);
282 const state *s = lookback_find_state (i);
283 int rnum = i - (s->reductions->lookaheads - LA);
284 const rule *r = s->reductions->rules[rnum];
285 fprintf (out, "(%3d, ", s->number);
286 rule_print (r, NULL, out);
287 fputs (") ->", out);
288 for (goto_list *sp = lookback[i]; sp; sp = sp->next)
290 fputc (' ', out);
291 goto_print (sp->value, out);
293 fputc ('\n', out);
295 fputc ('\n', out);
298 /* Add (S, R) -> GOTONO to LOOKBACK.
300 "(q, A → ω) lookback (p, A)" iff state p reaches state q on label ω.
302 The goto number GOTONO, whose source is S (which is
303 inconsistent), */
304 static void
305 add_lookback_edge (state *s, rule const *r, goto_number gotono)
307 int ri = state_reduction_find (s, r);
308 int idx = (s->reductions->lookaheads - LA) + ri;
309 lookback[idx] = goto_list_new (gotono, lookback[idx]);
313 /* Compute INCLUDES and LOOKBACK. Corresponds to step E in Sec. 6 of
314 [DeRemer 1982]. */
315 static void
316 build_relations (void)
318 goto_number *edge = xnmalloc (ngotos, sizeof *edge);
319 state_number *path = xnmalloc (ritem_longest_rhs () + 1, sizeof *path);
321 includes = xnmalloc (ngotos, sizeof *includes);
323 /* For each goto (from SRC to DST labeled by nterm VAR), iterate
324 over each rule with VAR as LHS, and find the path PATH from SRC
325 labeled with the RHS of the rule. */
326 for (goto_number i = 0; i < ngotos; ++i)
328 const state_number src = from_state[i];
329 const state_number dst = to_state[i];
330 symbol_number var = states[dst]->accessing_symbol;
332 /* Size of EDGE. */
333 int nedges = 0;
334 for (rule **rulep = derives[var - ntokens]; *rulep; ++rulep)
336 rule const *r = *rulep;
337 state *s = states[src];
338 path[0] = s->number;
340 /* Length of PATH. */
341 int length = 1;
342 for (item_number const *rp = r->rhs; 0 <= *rp; rp++)
344 symbol_number sym = item_number_as_symbol_number (*rp);
345 s = transitions_to (s, sym);
346 path[length++] = s->number;
349 /* S is the end of PATH. */
350 if (!s->consistent)
351 add_lookback_edge (s, r, i);
353 /* Walk back PATH from penultimate to beginning.
355 The "0 <= p" part is actually useless: each rhs ends in a
356 rule number (for which ISVAR(...) is false), and there is
357 a sentinel (ritem[-1]=0) before the first rhs. */
358 for (int p = length - 2; 0 <= p && ISVAR (r->rhs[p]); --p)
360 symbol_number sym = item_number_as_symbol_number (r->rhs[p]);
361 goto_number g = map_goto (path[p], sym);
362 /* Insert G if not already in EDGE.
363 FIXME: linear search. A bitset instead? */
365 bool found = false;
366 for (int j = 0; !found && j < nedges; ++j)
367 found = edge[j] == g;
368 if (!found)
370 assert (nedges < ngotos);
371 edge[nedges++] = g;
374 if (!nullable[sym - ntokens])
375 break;
379 if (trace_flag & trace_automaton)
381 goto_print (i, stderr);
382 fputs (" edges = ", stderr);
383 for (int j = 0; j < nedges; ++j)
385 fputc (' ', stderr);
386 goto_print (edge[j], stderr);
388 fputc ('\n', stderr);
391 if (nedges == 0)
392 includes[i] = NULL;
393 else
395 includes[i] = xnmalloc (nedges + 1, sizeof includes[i][0]);
396 for (int j = 0; j < nedges; ++j)
397 includes[i][j] = edge[j];
398 includes[i][nedges] = END_NODE;
402 free (edge);
403 free (path);
405 relation_transpose (&includes, ngotos);
406 if (trace_flag & trace_automaton)
407 relation_print ("includes", includes, ngotos, goto_print, stderr);
410 /* Compute FOLLOWS from INCLUDES, and free INCLUDES. */
411 static void
412 compute_follows (void)
414 relation_digraph (includes, ngotos, goto_follows);
415 if (trace_flag & trace_sets)
416 follows_print ("follows after includes", stderr);
417 for (goto_number i = 0; i < ngotos; ++i)
418 free (includes[i]);
419 free (includes);
423 static void
424 compute_lookaheads (void)
426 if (trace_flag & trace_automaton)
427 lookback_print (stderr);
429 for (size_t i = 0; i < nLA; ++i)
430 for (goto_list *sp = lookback[i]; sp; sp = sp->next)
431 bitset_or (LA[i], LA[i], goto_follows[sp->value]);
433 /* Free LOOKBACK. */
434 for (size_t i = 0; i < nLA; ++i)
435 LIST_FREE (goto_list, lookback[i]);
436 free (lookback);
440 /*------------------------------------------------------.
441 | Count the number of lookahead tokens required for S. |
442 `------------------------------------------------------*/
444 static int
445 state_lookaheads_count (state *s, bool default_reduction_only_for_accept)
447 const reductions *reds = s->reductions;
448 const transitions *trans = s->transitions;
450 /* Transitions are only disabled during conflict resolution, and that
451 hasn't happened yet, so there should be no need to check that
452 transition 0 hasn't been disabled before checking if it is a shift.
453 However, this check was performed at one time, so we leave it as an
454 aver. */
455 aver (trans->num == 0 || !TRANSITION_IS_DISABLED (trans, 0));
457 /* We need a lookahead either to distinguish different reductions
458 (i.e., there are two or more), or to distinguish a reduction from a
459 shift. Otherwise, it is straightforward, and the state is
460 'consistent'. However, do not treat a state with any reductions as
461 consistent unless it is the accepting state (because there is never
462 a lookahead token that makes sense there, and so no lookahead token
463 should be read) if the user has otherwise disabled default
464 reductions. */
465 s->consistent =
466 !(reds->num > 1
467 || (reds->num == 1 && trans->num && TRANSITION_IS_SHIFT (trans, 0))
468 || (reds->num == 1 && reds->rules[0]->number != 0
469 && default_reduction_only_for_accept));
471 return s->consistent ? 0 : reds->num;
475 /*----------------------------------------------.
476 | Compute LA, NLA, and the lookaheads members. |
477 `----------------------------------------------*/
479 void
480 initialize_LA (void)
482 bool default_reduction_only_for_accept;
484 char *default_reductions =
485 muscle_percent_define_get ("lr.default-reduction");
486 default_reduction_only_for_accept = STREQ (default_reductions, "accepting");
487 free (default_reductions);
490 /* Compute the total number of reductions requiring a lookahead. */
491 nLA = 0;
492 for (state_number i = 0; i < nstates; ++i)
493 nLA += state_lookaheads_count (states[i],
494 default_reduction_only_for_accept);
495 /* Avoid having to special case 0. */
496 if (!nLA)
497 nLA = 1;
499 bitsetv pLA = LA = bitsetv_create (nLA, ntokens, BITSET_FIXED);
501 /* Initialize the members LOOKAHEADS for each state whose reductions
502 require lookahead tokens. */
503 for (state_number i = 0; i < nstates; ++i)
505 int count = state_lookaheads_count (states[i],
506 default_reduction_only_for_accept);
507 if (count)
509 states[i]->reductions->lookaheads = pLA;
510 pLA += count;
516 /*---------------------------------------------.
517 | Output the lookahead tokens for each state. |
518 `---------------------------------------------*/
520 static void
521 lookaheads_print (FILE *out)
523 fputs ("Lookaheads:\n", out);
524 for (state_number i = 0; i < nstates; ++i)
526 const reductions *reds = states[i]->reductions;
527 if (reds->num)
529 fprintf (out, " State %d:\n", i);
530 for (int j = 0; j < reds->num; ++j)
532 fprintf (out, " rule %d:", reds->rules[j]->number);
533 if (reds->lookaheads)
535 bitset_iterator iter;
536 int k;
537 BITSET_FOR_EACH (iter, reds->lookaheads[j], k, 0)
538 fprintf (out, " %s", symbols[k]->tag);
540 fputc ('\n', out);
544 fputc ('\n', out);
547 void
548 lalr (void)
550 if (trace_flag & trace_automaton)
552 fputc ('\n', stderr);
553 begin_use_class ("trace0", stderr);
554 fprintf (stderr, "lalr: begin");
555 end_use_class ("trace0", stderr);
556 fputc ('\n', stderr);
558 initialize_LA ();
559 set_goto_map ();
560 initialize_goto_follows ();
561 lookback = xcalloc (nLA, sizeof *lookback);
562 build_relations ();
563 compute_follows ();
564 compute_lookaheads ();
566 if (trace_flag & trace_sets)
567 lookaheads_print (stderr);
568 if (trace_flag & trace_automaton)
570 begin_use_class ("trace0", stderr);
571 fprintf (stderr, "lalr: done");
572 end_use_class ("trace0", stderr);
573 fputc ('\n', stderr);
578 void
579 lalr_update_state_numbers (state_number old_to_new[], state_number nstates_old)
581 goto_number ngotos_reachable = 0;
582 symbol_number nonterminal = 0;
583 aver (nsyms == nnterms + ntokens);
585 for (goto_number i = 0; i < ngotos; ++i)
587 while (i == goto_map[nonterminal])
588 goto_map[nonterminal++] = ngotos_reachable;
589 /* If old_to_new[from_state[i]] = nstates_old, remove this goto
590 entry. */
591 if (old_to_new[from_state[i]] != nstates_old)
593 /* from_state[i] is not removed, so it and thus to_state[i] are
594 reachable, so to_state[i] != nstates_old. */
595 aver (old_to_new[to_state[i]] != nstates_old);
596 from_state[ngotos_reachable] = old_to_new[from_state[i]];
597 to_state[ngotos_reachable] = old_to_new[to_state[i]];
598 ++ngotos_reachable;
601 while (nonterminal <= nnterms)
603 aver (ngotos == goto_map[nonterminal]);
604 goto_map[nonterminal++] = ngotos_reachable;
606 ngotos = ngotos_reachable;
610 void
611 lalr_free (void)
613 for (state_number s = 0; s < nstates; ++s)
614 states[s]->reductions->lookaheads = NULL;
615 bitsetv_free (LA);