gnulib: update
[bison.git] / src / ielr.c
blob44c6e7a7fd2e0b41f4670a7c7d744dcceab55929
1 /* IELR main implementation.
3 Copyright (C) 2009-2015, 2018-2021 Free Software Foundation, Inc.
5 This file is part of Bison, the GNU Compiler Compiler.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
20 #include <config.h>
22 #include "ielr.h"
24 #include "system.h"
26 #include <bitset.h>
27 #include <timevar.h>
29 #include "AnnotationList.h"
30 #include "derives.h"
31 #include "getargs.h"
32 #include "lalr.h"
33 #include "muscle-tab.h"
34 #include "nullable.h"
35 #include "relation.h"
36 #include "state.h"
37 #include "symtab.h"
39 /** Records the value of the \%define variable lr.type. */
40 typedef enum
42 LR_TYPE__LR0,
43 LR_TYPE__LALR,
44 LR_TYPE__IELR,
45 LR_TYPE__CANONICAL_LR
46 } LrType;
48 /* The user's requested LR type. */
49 static LrType
50 lr_type_get (void)
52 char *type = muscle_percent_define_get ("lr.type");
53 LrType res;
54 if (STREQ (type, "lr""(0)"))
55 res = LR_TYPE__LR0;
56 else if (STREQ (type, "lalr"))
57 res = LR_TYPE__LALR;
58 else if (STREQ (type, "ielr"))
59 res = LR_TYPE__IELR;
60 else if (STREQ (type, "canonical-lr"))
61 res = LR_TYPE__CANONICAL_LR;
62 else
64 aver (false);
65 abort ();
67 free (type);
68 return res;
71 /**
72 * \post:
73 * - \c result = a new \c bitset of size \c ::nritems such that any bit \c i
74 * is set iff <tt>ritem[i]</tt> is a nonterminal after which all ritems in
75 * the same RHS are nullable nonterminals. In other words, the follows of
76 * a goto on <tt>ritem[i]</tt> include the lookahead set of the item.
78 static bitset
79 ielr_compute_ritem_sees_lookahead_set (void)
81 bitset result = bitset_create (nritems, BITSET_FIXED);
82 int i = nritems-1;
83 while (0 < i)
85 --i;
86 // Walk the RHS right to left, as long as it's symbol,
87 // nonterminal, nullable.
88 while (!item_number_is_rule_number (ritem[i])
89 && ISVAR (ritem[i])
90 && nullable [item_number_as_symbol_number (ritem[i]) - ntokens])
91 bitset_set (result, i--);
92 if (!item_number_is_rule_number (ritem[i]) && ISVAR (ritem[i]))
93 bitset_set (result, i--);
94 // Flush the remainder of the RHS.
95 while (!item_number_is_rule_number (ritem[i]) && 0 < i)
96 --i;
98 if (trace_flag & trace_ielr)
100 fprintf (stderr, "ritem_sees_lookahead_set (indexes of ritems): ");
101 bitset_dump (stderr, result);
102 fprintf (stderr, "\n");
104 return result;
108 * \pre:
109 * - \c ritem_sees_lookahead_set was computed by
110 * \c ielr_compute_ritem_sees_lookahead_set.
111 * \post:
112 * - Each of \c *edgesp and \c *edge_countsp is a new array of size
113 * \c ::ngotos.
114 * - <tt>(*edgesp)[i]</tt> points to a \c goto_number array of size
115 * <tt>(*edge_countsp)[i]+1</tt>.
116 * - In such a \c goto_number array, the last element is \c ::END_NODE.
117 * - All remaining elements are the indices of the gotos to which there is an
118 * internal follow edge from goto \c i.
119 * - There is an internal follow edge from goto \c i to goto \c j iff both:
120 * - The from states of gotos \c i and \c j are the same.
121 * - The transition nonterminal for goto \c i appears as the first RHS
122 * symbol of at least one production for which both:
123 * - The LHS is the transition symbol of goto \c j.
124 * - All other RHS symbols are nullable nonterminals.
125 * - In other words, the follows of goto \c i include the follows of
126 * goto \c j and it's an internal edge because the from states are the
127 * same.
129 static void
130 ielr_compute_internal_follow_edges (bitset ritem_sees_lookahead_set,
131 goto_number ***edgesp, int **edge_countsp)
133 *edgesp = xnmalloc (ngotos, sizeof **edgesp);
134 *edge_countsp = xnmalloc (ngotos, sizeof **edge_countsp);
136 bitset sources = bitset_create (ngotos, BITSET_FIXED);
137 for (goto_number i = 0; i < ngotos; ++i)
138 (*edge_countsp)[i] = 0;
139 for (goto_number i = 0; i < ngotos; ++i)
141 int nsources = 0;
143 for (rule **rulep = derives[states[to_state[i]]->accessing_symbol
144 - ntokens];
145 *rulep;
146 ++rulep)
148 /* If there is at least one RHS symbol, if the first RHS symbol
149 is a nonterminal, and if all remaining RHS symbols (if any)
150 are nullable nonterminals, create an edge from the LHS
151 symbol's goto to the first RHS symbol's goto such that the RHS
152 symbol's goto will be the source of the edge after the
153 eventual relation_transpose below.
155 Unlike in ielr_compute_always_follows, I use a bitset for
156 edges rather than an array because it is possible that
157 multiple RHS's with the same first symbol could fit and thus
158 that we could end up with redundant edges. With the
159 possibility of redundant edges, it's hard to know ahead of
160 time how large to make such an array. Another possible
161 redundancy is that source and destination might be the same
162 goto. Eliminating all these possible redundancies now might
163 possibly help performance a little. I have not proven any of
164 this, but I'm guessing the bitset shouldn't entail much of a
165 performance penalty, if any. */
166 if (bitset_test (ritem_sees_lookahead_set,
167 (*rulep)->rhs - ritem))
169 goto_number source =
170 map_goto (from_state[i],
171 item_number_as_symbol_number (*(*rulep)->rhs));
172 if (i != source && !bitset_test (sources, source))
174 bitset_set (sources, source);
175 ++nsources;
176 ++(*edge_countsp)[source];
181 if (nsources == 0)
182 (*edgesp)[i] = NULL;
183 else
185 (*edgesp)[i] = xnmalloc (nsources + 1, sizeof *(*edgesp)[i]);
187 bitset_iterator biter_source;
188 bitset_bindex source;
189 int j = 0;
190 BITSET_FOR_EACH (biter_source, sources, source, 0)
191 (*edgesp)[i][j++] = source;
193 (*edgesp)[i][nsources] = END_NODE;
195 bitset_zero (sources);
197 bitset_free (sources);
200 relation_transpose (edgesp, ngotos);
202 if (trace_flag & trace_ielr)
203 relation_print ("internal_follow_edges", *edgesp, ngotos, NULL, stderr);
207 * \pre:
208 * - \c ritem_sees_lookahead_set was computed by
209 * \c ielr_compute_ritem_sees_lookahead_set.
210 * - \c internal_follow_edges was computed by
211 * \c ielr_compute_internal_follow_edges.
212 * \post:
213 * - \c *follow_kernel_itemsp is a new \c bitsetv in which the number of rows
214 * is \c ngotos and the number of columns is maximum number of kernel items
215 * in any state.
216 * - <tt>(*follow_kernel_itemsp)[i][j]</tt> is set iff the follows of goto
217 * \c i include the lookahead set of item \c j in the from state of goto
218 * \c i.
219 * - Thus, <tt>(*follow_kernel_itemsp)[i][j]</tt> is always unset if there is
220 * no item \c j in the from state of goto \c i.
222 static void
223 ielr_compute_follow_kernel_items (bitset ritem_sees_lookahead_set,
224 goto_number **internal_follow_edges,
225 bitsetv *follow_kernel_itemsp)
228 size_t max_nitems = 0;
229 for (state_number i = 0; i < nstates; ++i)
230 if (states[i]->nitems > max_nitems)
231 max_nitems = states[i]->nitems;
232 *follow_kernel_itemsp = bitsetv_create (ngotos, max_nitems, BITSET_FIXED);
234 for (goto_number i = 0; i < ngotos; ++i)
236 size_t nitems = states[from_state[i]]->nitems;
237 item_index *items = states[from_state[i]]->items;
238 for (size_t j = 0; j < nitems; ++j)
239 /* If this item has this goto and if all subsequent symbols in this
240 RHS (if any) are nullable nonterminals, then record this item as
241 one whose lookahead set is included in this goto's follows. */
242 if (item_number_is_symbol_number (ritem[items[j]])
243 && item_number_as_symbol_number (ritem[items[j]])
244 == states[to_state[i]]->accessing_symbol
245 && bitset_test (ritem_sees_lookahead_set, items[j]))
246 bitset_set ((*follow_kernel_itemsp)[i], j);
248 relation_digraph (internal_follow_edges, ngotos, *follow_kernel_itemsp);
250 if (trace_flag & trace_ielr)
252 fprintf (stderr, "follow_kernel_items:\n");
253 debug_bitsetv (*follow_kernel_itemsp);
258 * \pre
259 * - \c *edgesp and \c edge_counts were computed by
260 * \c ielr_compute_internal_follow_edges.
261 * \post
262 * - \c *always_followsp is a new \c bitsetv with \c ngotos rows and
263 * \c ntokens columns.
264 * - <tt>(*always_followsp)[i][j]</tt> is set iff token \c j is an always
265 * follow (that is, it's computed by internal and successor edges) of goto
266 * \c i.
267 * - Rows of \c *edgesp have been realloc'ed and extended to include
268 * successor follow edges. \c edge_counts has not been updated.
270 static void
271 ielr_compute_always_follows (goto_number ***edgesp,
272 int const edge_counts[],
273 bitsetv *always_followsp)
275 *always_followsp = bitsetv_create (ngotos, ntokens, BITSET_FIXED);
277 goto_number *edge_array = xnmalloc (ngotos, sizeof *edge_array);
278 for (goto_number i = 0; i < ngotos; ++i)
280 goto_number nedges = edge_counts[i];
282 int j;
283 transitions *trans = states[to_state[i]]->transitions;
284 FOR_EACH_SHIFT (trans, j)
285 bitset_set ((*always_followsp)[i], TRANSITION_SYMBOL (trans, j));
286 for (; j < trans->num; ++j)
288 symbol_number sym = TRANSITION_SYMBOL (trans, j);
289 if (nullable[sym - ntokens])
290 edge_array[nedges++] = map_goto (to_state[i], sym);
293 if (nedges - edge_counts[i])
295 (*edgesp)[i] =
296 xnrealloc ((*edgesp)[i], nedges + 1, sizeof *(*edgesp)[i]);
297 memcpy ((*edgesp)[i] + edge_counts[i], edge_array + edge_counts[i],
298 (nedges - edge_counts[i]) * sizeof *(*edgesp)[i]);
299 (*edgesp)[i][nedges] = END_NODE;
302 free (edge_array);
304 relation_digraph (*edgesp, ngotos, *always_followsp);
306 if (trace_flag & trace_ielr)
308 relation_print ("always follow edges", *edgesp, ngotos, NULL, stderr);
309 fprintf (stderr, "always_follows:\n");
310 debug_bitsetv (*always_followsp);
315 * \post
316 * - \c result is a new array of size \c ::nstates.
317 * - <tt>result[i]</tt> is an array of pointers to the predecessor
318 * <tt>state</tt>'s of state \c i.
319 * - The last element of such an array is \c NULL.
321 static state ***
322 ielr_compute_predecessors (void)
324 int *predecessor_counts = xnmalloc (nstates, sizeof *predecessor_counts);
325 state ***res = xnmalloc (nstates, sizeof *res);
326 for (state_number i = 0; i < nstates; ++i)
327 predecessor_counts[i] = 0;
328 for (state_number i = 0; i < nstates; ++i)
329 for (int j = 0; j < states[i]->transitions->num; ++j)
330 ++predecessor_counts[states[i]->transitions->states[j]->number];
331 for (state_number i = 0; i < nstates; ++i)
333 res[i] = xnmalloc (predecessor_counts[i]+1, sizeof *res[i]);
334 res[i][predecessor_counts[i]] = NULL;
335 predecessor_counts[i] = 0;
337 for (state_number i = 0; i < nstates; ++i)
338 for (int j = 0; j < states[i]->transitions->num; ++j)
340 state_number k = states[i]->transitions->states[j]->number;
341 res[k][predecessor_counts[k]++] = states[i];
343 free (predecessor_counts);
344 return res;
348 * \post
349 * - \c *follow_kernel_itemsp and \c *always_followsp were computed by
350 * \c ielr_compute_follow_kernel_items and
351 * \c ielr_compute_always_follows.
352 * - Iff <tt>predecessorsp != NULL</tt>, then \c *predecessorsp was computed
353 * by \c ielr_compute_predecessors.
355 static void
356 ielr_compute_auxiliary_tables (bitsetv *follow_kernel_itemsp,
357 bitsetv *always_followsp,
358 state ****predecessorsp)
360 goto_number **edges;
361 int *edge_counts;
363 bitset ritem_sees_lookahead_set = ielr_compute_ritem_sees_lookahead_set ();
364 ielr_compute_internal_follow_edges (ritem_sees_lookahead_set,
365 &edges, &edge_counts);
366 ielr_compute_follow_kernel_items (ritem_sees_lookahead_set, edges,
367 follow_kernel_itemsp);
368 bitset_free (ritem_sees_lookahead_set);
370 ielr_compute_always_follows (&edges, edge_counts, always_followsp);
371 for (int i = 0; i < ngotos; ++i)
372 free (edges[i]);
373 free (edges);
374 free (edge_counts);
375 if (predecessorsp)
376 *predecessorsp = ielr_compute_predecessors ();
380 * \note
381 * - FIXME: It might be an interesting experiment to compare the space and
382 * time efficiency of computing \c item_lookahead_sets either:
383 * - Fully up front.
384 * - Just-in-time, as implemented below.
385 * - Not at all. That is, just let annotations continue even when
386 * unnecessary.
388 bool
389 ielr_item_has_lookahead (state *s, symbol_number lhs, size_t item,
390 symbol_number lookahead, state ***predecessors,
391 bitset **item_lookahead_sets)
393 if (!item_lookahead_sets[s->number])
395 item_lookahead_sets[s->number] =
396 xnmalloc (s->nitems, sizeof item_lookahead_sets[s->number][0]);
397 for (size_t i = 0; i < s->nitems; ++i)
398 item_lookahead_sets[s->number][i] = NULL;
400 if (!item_lookahead_sets[s->number][item])
402 item_lookahead_sets[s->number][item] =
403 bitset_create (ntokens, BITSET_FIXED);
404 /* If this kernel item is the beginning of a RHS, it must be the kernel
405 item in the start state, and so its LHS has no follows and no goto to
406 check. If, instead, this kernel item is the successor of the start
407 state's kernel item, there are still no follows and no goto. This
408 situation is fortunate because we want to avoid the - 2 below in both
409 cases.
411 Actually, IELR(1) should never invoke this function for either of
412 those cases because (1) follow_kernel_items will never reference a
413 kernel item for this RHS because the end token blocks sight of the
414 lookahead set from the RHS's only nonterminal, and (2) no reduction
415 has a lookback dependency on this lookahead set. Nevertheless, I
416 didn't change this test to an aver just in case the usage of this
417 function evolves to need those two cases. In both cases, the current
418 implementation returns the right result. */
419 const rule *r = item_rule (&ritem[s->items[item]]);
420 const bool is_successor_of_initial_item
421 = rule_is_initial (r) && &ritem[s->items[item]] == r->rhs + 1;
422 aver (!is_successor_of_initial_item);
423 if (!is_successor_of_initial_item)
425 /* If the LHS symbol of this item isn't known (because this is a
426 top-level invocation), go get it. */
427 if (!lhs)
428 lhs = r->lhs->number;
429 /* If this kernel item is next to the beginning of the RHS, then
430 check all predecessors' goto follows for the LHS. */
431 if (item_number_is_rule_number (ritem[s->items[item] - 2]))
433 aver (lhs != acceptsymbol->content->number);
434 for (state **predecessor = predecessors[s->number];
435 *predecessor;
436 ++predecessor)
437 bitset_or (item_lookahead_sets[s->number][item],
438 item_lookahead_sets[s->number][item],
439 goto_follows[map_goto ((*predecessor)->number,
440 lhs)]);
442 /* If this kernel item is later in the RHS, then check all
443 predecessor items' lookahead sets. */
444 else
446 for (state **predecessor = predecessors[s->number];
447 *predecessor;
448 ++predecessor)
450 size_t predecessor_item;
451 for (predecessor_item = 0;
452 predecessor_item < (*predecessor)->nitems;
453 ++predecessor_item)
454 if ((*predecessor)->items[predecessor_item]
455 == s->items[item] - 1)
456 break;
457 aver (predecessor_item != (*predecessor)->nitems);
458 ielr_item_has_lookahead (*predecessor, lhs,
459 predecessor_item, 0 /*irrelevant*/,
460 predecessors, item_lookahead_sets);
461 bitset_or (item_lookahead_sets[s->number][item],
462 item_lookahead_sets[s->number][item],
463 item_lookahead_sets[(*predecessor)->number]
464 [predecessor_item]);
469 return bitset_test (item_lookahead_sets[s->number][item], lookahead);
473 * \pre
474 * - \c follow_kernel_items, \c always_follows, and \c predecessors
475 * were computed by \c ielr_compute_auxiliary_tables.
476 * \post
477 * - Each of <tt>*inadequacy_listsp</tt> and <tt>*annotation_listsp</tt>
478 * points to a new array of size \c ::nstates.
479 * - For <tt>0 <= i < ::nstates</tt>:
480 * - <tt>(*inadequacy_listsp)[i]</tt> contains the \c InadequacyList head
481 * node for <tt>states[i]</tt>.
482 * - <tt>(*annotation_listsp)[i]</tt> contains the \c AnnotationList head
483 * node for <tt>states[i]</tt>.
484 * - <tt>*max_annotationsp</tt> is the maximum number of annotations per
485 * state.
487 static void
488 ielr_compute_annotation_lists (bitsetv follow_kernel_items,
489 bitsetv always_follows, state ***predecessors,
490 AnnotationIndex *max_annotationsp,
491 InadequacyList ***inadequacy_listsp,
492 AnnotationList ***annotation_listsp,
493 struct obstack *annotations_obstackp)
495 bitset **item_lookahead_sets =
496 xnmalloc (nstates, sizeof *item_lookahead_sets);
497 AnnotationIndex *annotation_counts =
498 xnmalloc (nstates, sizeof *annotation_counts);
499 ContributionIndex max_contributions = 0;
500 int total_annotations = 0;
502 *inadequacy_listsp = xnmalloc (nstates, sizeof **inadequacy_listsp);
503 *annotation_listsp = xnmalloc (nstates, sizeof **annotation_listsp);
504 for (state_number i = 0; i < nstates; ++i)
506 item_lookahead_sets[i] = NULL;
507 (*inadequacy_listsp)[i] = NULL;
508 (*annotation_listsp)[i] = NULL;
509 annotation_counts[i] = 0;
512 InadequacyListNodeCount inadequacy_list_node_count = 0;
513 for (state_number i = 0; i < nstates; ++i)
514 AnnotationList__compute_from_inadequacies (
515 states[i], follow_kernel_items, always_follows, predecessors,
516 item_lookahead_sets, *inadequacy_listsp, *annotation_listsp,
517 annotation_counts, &max_contributions, annotations_obstackp,
518 &inadequacy_list_node_count);
520 *max_annotationsp = 0;
521 for (state_number i = 0; i < nstates; ++i)
523 if (annotation_counts[i] > *max_annotationsp)
524 *max_annotationsp = annotation_counts[i];
525 total_annotations += annotation_counts[i];
527 if (trace_flag & trace_ielr)
529 for (state_number i = 0; i < nstates; ++i)
531 fprintf (stderr, "Inadequacy annotations for state %d:\n", i);
532 AnnotationList__debug ((*annotation_listsp)[i],
533 states[i]->nitems, 2);
535 fprintf (stderr, "Number of LR(0)/LALR(1) states: %d\n", nstates);
536 fprintf (stderr, "Average number of annotations per state: %f\n",
537 (float)total_annotations/nstates);
538 fprintf (stderr, "Max number of annotations per state: %d\n",
539 *max_annotationsp);
540 fprintf (stderr, "Max number of contributions per annotation: %d\n",
541 max_contributions);
543 for (state_number i = 0; i < nstates; ++i)
544 if (item_lookahead_sets[i])
546 for (size_t j = 0; j < states[i]->nitems; ++j)
547 if (item_lookahead_sets[i][j])
548 bitset_free (item_lookahead_sets[i][j]);
549 free (item_lookahead_sets[i]);
551 free (item_lookahead_sets);
552 free (annotation_counts);
555 typedef struct state_list
557 struct state_list *next;
558 state *state;
559 /** Has this state been recomputed as a successor of another state? */
560 bool recomputedAsSuccessor;
562 * \c NULL iff all lookahead sets are empty. <tt>lookaheads[i] = NULL</tt>
563 * iff the lookahead set on item \c i is empty.
565 bitset *lookaheads;
567 * nextIsocore is the next state in a circularly linked-list of all states
568 * with the same core. The one originally computed by generate_states in
569 * lr0.c is lr0Isocore.
571 struct state_list *lr0Isocore;
572 struct state_list *nextIsocore;
573 } state_list;
575 MAYBE_UNUSED static void
576 state_list_print_ (const state_list *s, FILE *out, const char *sep)
578 if (s)
580 fprintf (out, "%s%d", sep, s->state->number);
581 state_list_print_ (s->next, out, " ");
585 MAYBE_UNUSED static void
586 state_list_print (const state_list *s, FILE *out)
588 fprintf (out, "{");
589 state_list_print_ (s, out, "");
590 fprintf (out, "}");
594 * \pre
595 * - \c follow_kernel_items and \c always_follows were computed by
596 * \c ielr_compute_auxiliary_tables.
597 * - <tt>n->class = nterm_sym</tt>.
598 * \post
599 * - \c follow_set contains the follow set for the goto on nonterminal \c n
600 * in state \c s based on the lookaheads stored in <tt>s->lookaheads</tt>.
602 static void
603 ielr_compute_goto_follow_set (bitsetv follow_kernel_items,
604 bitsetv always_follows, state_list *s,
605 sym_content *n, bitset follow_set)
607 goto_number n_goto = map_goto (s->lr0Isocore->state->number, n->number);
608 bitset_copy (follow_set, always_follows[n_goto]);
609 if (s->lookaheads)
611 bitset_iterator biter_item;
612 bitset_bindex item;
613 BITSET_FOR_EACH (biter_item, follow_kernel_items[n_goto], item, 0)
614 if (s->lookaheads[item])
615 bitset_or (follow_set, follow_set, s->lookaheads[item]);
620 * \pre
621 * - \c follow_kernel_items and \c always_follows were computed by
622 * \c ielr_compute_auxiliary_tables.
623 * - \c lookahead_filter was computed by
624 * \c AnnotationList__computeLookaheadFilter for the original LR(0) isocore
625 * of \c t.
626 * - The number of rows in \c lookaheads is at least the number of items in
627 * \c t, and the number of columns is \c ::ntokens.
628 * \post
629 * - <tt>lookaheads[i][j]</tt> is set iff both:
630 * - <tt>lookahead_filter[i][j]</tt> is set.
631 * - The isocore of \c t that will be the transition successor of \c s will
632 * inherit from \c s token \c j into the lookahead set of item \c i.
634 static void
635 ielr_compute_lookaheads (bitsetv follow_kernel_items, bitsetv always_follows,
636 state_list *s, state *t, bitsetv lookahead_filter,
637 bitsetv lookaheads)
639 size_t s_item = 0;
640 bitsetv_zero (lookaheads);
641 for (size_t t_item = 0; t_item < t->nitems; ++t_item)
643 /* If this kernel item is the beginning of a RHS, it must be the
644 kernel item in the start state, but t is supposed to be a successor
645 state. If, instead, this kernel item is the successor of the start
646 state's kernel item, the lookahead set is empty. This second case is
647 a special case to avoid the - 2 below, but the next successor can be
648 handled fine without special casing it. */
649 aver (t->items[t_item] != 0);
650 const rule *r = item_rule (&ritem[t->items[t_item]]);
651 const bool is_successor_of_initial_item
652 = rule_is_initial (r) && &ritem[t->items[t_item]] == r->rhs + 1;
653 if (!is_successor_of_initial_item
654 && !bitset_empty_p (lookahead_filter[t_item]))
656 /* Is this kernel item next to the beginning of the RHS? */
657 if (item_number_is_rule_number (ritem[t->items[t_item] - 2]))
658 ielr_compute_goto_follow_set (
659 follow_kernel_items, always_follows, s,
660 r->lhs,
661 lookaheads[t_item]);
662 else if (s->lookaheads)
664 /* We don't have to start the s item search at the beginning
665 every time because items from both states are sorted by their
666 indices in ritem. */
667 for (; s_item < s->state->nitems; ++s_item)
668 if (s->state->items[s_item] == t->items[t_item] - 1)
669 break;
670 aver (s_item != s->state->nitems);
671 if (s->lookaheads[s_item])
672 bitset_copy (lookaheads[t_item], s->lookaheads[s_item]);
674 bitset_and (lookaheads[t_item],
675 lookaheads[t_item], lookahead_filter[t_item]);
681 * \pre
682 * - \c follow_kernel_items and \c always_follows were computed by
683 * \c ielr_compute_auxiliary_tables.
684 * - Either:
685 * - <tt>annotation_lists = NULL</tt> and all bits in work2 are set.
686 * - \c annotation_lists was computed by \c ielr_compute_annotation_lists.
687 * - The number of rows in each of \c lookaheads and \c work2 is the maximum
688 * number of items in any state. The number of columns in each is
689 * \c ::ntokens.
690 * - \c lookaheads was computed by \c ielr_compute_lookaheads for \c t.
691 * - \c ::nstates is the total number of states, some not yet fully computed,
692 * in the list ending at \c *last_statep. It is at least the number of
693 * original LR(0) states.
694 * - The size of \c work1 is at least the number of annotations for the LR(0)
695 * isocore of \c t.
696 * \post
697 * - Either:
698 * - In the case that <tt>annotation_lists != NULL</tt>,
699 * <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if
700 * permitted by the annotations for the original LR(0) isocore of \c t.
701 * If this changed the lookaheads in that isocore, those changes were
702 * propagated to all already computed transition successors recursively
703 * possibly resulting in the splitting of some of those successors.
704 * - In the case that <tt>annotation_lists = NULL</tt>,
705 * <tt>lookaheads \@pre</tt> was merged with some isocore of \c t if the
706 * isocore's lookahead sets were identical to those specified by
707 * <tt>lookaheads \@pre</tt>.
708 * - If no such merge was permitted, a new isocore of \c t containing
709 * <tt>lookaheads \@pre</tt> was appended to the state list whose
710 * previous tail was <tt>*last_statep \@pre</tt> and \c ::nstates was
711 * incremented. It was also appended to \c t's isocore list.
712 * - <tt>*tp</tt> = the isocore of \c t into which
713 * <tt>lookaheads \@pre</tt> was placed/merged.
714 * - \c lookaheads, \c work1, and \c work2 may have been altered.
716 static void
717 ielr_compute_state (bitsetv follow_kernel_items, bitsetv always_follows,
718 AnnotationList **annotation_lists, state *t,
719 bitsetv lookaheads, state_list **last_statep,
720 ContributionIndex work1[], bitsetv work2, state **tp)
722 state_list *lr0_isocore = t->state_list->lr0Isocore;
723 state_list **this_isocorep;
725 /* Determine whether there's an isocore of t with which these lookaheads can
726 be merged. */
728 if (annotation_lists)
730 AnnotationIndex ai;
731 AnnotationList *a;
732 for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
734 ++ai, a = a->next)
735 work1[ai] =
736 AnnotationList__computeDominantContribution (
737 a, lr0_isocore->state->nitems, lookaheads, false);
739 for (this_isocorep = &t->state_list;
740 this_isocorep == &t->state_list || *this_isocorep != t->state_list;
741 this_isocorep = &(*this_isocorep)->nextIsocore)
743 if (!(*this_isocorep)->recomputedAsSuccessor)
744 break;
745 if (annotation_lists)
747 AnnotationIndex ai;
748 AnnotationList *a;
749 for (ai = 0, a = annotation_lists[lr0_isocore->state->number];
751 ++ai, a = a->next)
753 if (work1[ai] != ContributionIndex__none)
755 /* This isocore compatibility test depends on the fact
756 that, if the dominant contributions are the same for the
757 two isocores, then merging their lookahead sets will not
758 produce a state with a different dominant contribution.
760 ContributionIndex ci =
761 AnnotationList__computeDominantContribution (
762 a, lr0_isocore->state->nitems,
763 (*this_isocorep)->lookaheads, false);
764 if (ci != ContributionIndex__none && work1[ai] != ci)
765 break;
768 if (!a)
769 break;
771 else
773 size_t i;
774 for (i = 0; i < t->nitems; ++i)
776 if (!(*this_isocorep)->lookaheads
777 || !(*this_isocorep)->lookaheads[i])
779 if (!bitset_empty_p (lookaheads[i]))
780 break;
782 /* bitset_equal_p uses the size of the first argument,
783 so lookaheads[i] must be the second argument. */
784 else if (!bitset_equal_p ((*this_isocorep)->lookaheads[i],
785 lookaheads[i]))
786 break;
788 if (i == t->nitems)
789 break;
794 bool has_lookaheads = false;
795 for (size_t i = 0; i < lr0_isocore->state->nitems; ++i)
796 if (!bitset_empty_p (lookaheads[i]))
798 has_lookaheads = true;
799 break;
802 /* Merge with an existing isocore. */
803 if (this_isocorep == &t->state_list || *this_isocorep != t->state_list)
805 bool new_lookaheads = false;
806 *tp = (*this_isocorep)->state;
808 /* Merge lookaheads into the state and record whether any of them are
809 actually new. */
810 if (has_lookaheads)
812 if (!(*this_isocorep)->lookaheads)
814 (*this_isocorep)->lookaheads =
815 xnmalloc (t->nitems, sizeof (*this_isocorep)->lookaheads);
816 for (size_t i = 0; i < t->nitems; ++i)
817 (*this_isocorep)->lookaheads[i] = NULL;
819 for (size_t i = 0; i < t->nitems; ++i)
820 if (!bitset_empty_p (lookaheads[i]))
822 if (!(*this_isocorep)->lookaheads[i])
823 (*this_isocorep)->lookaheads[i] =
824 bitset_create (ntokens, BITSET_FIXED);
825 bitset_andn (lookaheads[i],
826 lookaheads[i], (*this_isocorep)->lookaheads[i]);
827 bitset_or ((*this_isocorep)->lookaheads[i],
828 lookaheads[i], (*this_isocorep)->lookaheads[i]);
829 if (!bitset_empty_p (lookaheads[i]))
830 new_lookaheads = true;
834 /* If new lookaheads were merged, propagate those lookaheads to the
835 successors, possibly splitting them. If *tp is being recomputed for
836 the first time, this isn't necessary because the main
837 ielr_split_states loop will handle the successors later. */
838 if (!(*this_isocorep)->recomputedAsSuccessor)
839 (*this_isocorep)->recomputedAsSuccessor = true;
840 else if (new_lookaheads)
842 /* When merging demands identical lookahead sets, it is impossible to
843 merge new lookaheads. */
844 aver (annotation_lists);
845 for (int i = 0; i < (*tp)->transitions->num; ++i)
847 state *t2 = (*tp)->transitions->states[i];
848 /* At any time, there's at most one state for which we have so
849 far initially recomputed only some of its successors in the
850 main ielr_split_states loop. Because we recompute successors
851 in order, we can just stop at the first such successor. Of
852 course, *tp might be a state some of whose successors have
853 been recomputed as successors of other states rather than as
854 successors of *tp. It's fine if we go ahead and propagate to
855 some of those. We'll propagate to them again (but stop when
856 we see nothing changes) and to the others when we reach *tp in
857 the main ielr_split_states loop later. */
858 if (!t2->state_list->recomputedAsSuccessor)
859 break;
860 AnnotationList__computeLookaheadFilter (
861 annotation_lists[t2->state_list->lr0Isocore->state->number],
862 t2->nitems, work2);
863 ielr_compute_lookaheads (follow_kernel_items, always_follows,
864 (*this_isocorep), t2, work2,
865 lookaheads);
866 /* FIXME: If splitting t2 here, it's possible that lookaheads
867 that had already propagated from *tp to t2 will be left in t2
868 after *tp has been removed as t2's predecessor:
869 - We're going to recompute all lookaheads in phase 4, so these
870 extra lookaheads won't appear in the final parser table.
871 - If t2 has just one annotation, then these extra lookaheads
872 cannot alter the dominating contribution to the associated
873 inadequacy and thus cannot needlessly prevent a future merge
874 of some new state with t2. We can be sure of this because:
875 - The fact that we're splitting t2 now means that some
876 predecessors (at least one) other than *tp must be
877 propagating contributions to t2.
878 - The fact that t2 was merged in the first place means that,
879 if *tp propagated any contributions, the dominating
880 contribution must be the same as that from those other
881 predecessors.
882 - Thus, if some new state to be merged with t2 in the future
883 proves to be compatible with the contributions propagated
884 by the other predecessors, it will also be compatible with
885 the contributions made by the extra lookaheads left behind
886 by *tp.
887 - However, if t2 has more than one annotation and these extra
888 lookaheads contribute to one of their inadequacies, it's
889 possible these extra lookaheads may needlessly prevent a
890 future merge with t2. For example:
891 - Let's say there's an inadequacy A that makes the split
892 necessary as follows:
893 - There's currently just one other predecessor and it
894 propagates to t2 some contributions to inadequacy A.
895 - The new lookaheads that we were attempting to propagate
896 from *tp to t2 made contributions to inadequacy A with a
897 different dominating contribution than those from that
898 other predecessor.
899 - The extra lookaheads either make no contribution to
900 inadequacy A or have the same dominating contribution as
901 the contributions from the other predecessor. Either
902 way, as explained above, they can't prevent a future
903 merge.
904 - Let's say there's an inadequacy B that causes the trouble
905 with future merges as follows:
906 - The extra lookaheads make contributions to inadequacy B.
907 - Those extra contributions did not prevent the original
908 merge to create t2 because the other predecessor
909 propagates to t2 no contributions to inadequacy B.
910 - Thus, those extra contributions may prevent a future
911 merge with t2 even though the merge would be fine if *tp
912 had not left them behind.
913 - Is the latter case common enough to worry about?
914 - Perhaps we should track all predecessors and iterate them
915 now to recreate t2 without those extra lookaheads. */
916 ielr_compute_state (follow_kernel_items, always_follows,
917 annotation_lists, t2, lookaheads,
918 last_statep, work1, work2,
919 &(*tp)->transitions->states[i]);
924 /* Create a new isocore. */
925 else
927 state_list *old_isocore = *this_isocorep;
928 (*last_statep)->next = *this_isocorep = xmalloc (sizeof **last_statep);
929 *last_statep = *this_isocorep;
930 (*last_statep)->state = *tp = state_new_isocore (t);
931 (*tp)->state_list = *last_statep;
932 (*last_statep)->recomputedAsSuccessor = true;
933 (*last_statep)->next = NULL;
934 (*last_statep)->lookaheads = NULL;
935 if (has_lookaheads)
937 (*last_statep)->lookaheads =
938 xnmalloc (t->nitems, sizeof (*last_statep)->lookaheads);
939 for (size_t i = 0; i < t->nitems; ++i)
941 if (bitset_empty_p (lookaheads[i]))
942 (*last_statep)->lookaheads[i] = NULL;
943 else
945 (*last_statep)->lookaheads[i] =
946 bitset_create (ntokens, BITSET_FIXED);
947 bitset_copy ((*last_statep)->lookaheads[i], lookaheads[i]);
951 (*last_statep)->lr0Isocore = lr0_isocore;
952 (*last_statep)->nextIsocore = old_isocore;
957 * \pre
958 * - \c follow_kernel_items and \c always_follows were computed by
959 * \c ielr_compute_auxiliary_tables.
960 * - Either:
961 * - <tt>annotation_lists = NULL</tt> and <tt>max_annotations=0</tt>.
962 * - \c annotation_lists and \c max_annotations were computed by
963 * \c ielr_compute_annotation_lists.
964 * \post
965 * - \c ::states is of size \c ::nstates (which might be greater than
966 * <tt>::nstates \@pre</tt>) and no longer contains any LR(1)-relative
967 * inadequacy. \c annotation_lists was used to determine state
968 * compatibility or, if <tt>annotation_lists = NULL</tt>, the canonical
969 * LR(1) state compatibility test was used.
970 * - If <tt>annotation_lists = NULL</tt>, reduction lookahead sets were
971 * computed in all states. tv_ielr_phase4 was pushed while they were
972 * computed from item lookahead sets.
974 static void
975 ielr_split_states (bitsetv follow_kernel_items, bitsetv always_follows,
976 AnnotationList **annotation_lists,
977 AnnotationIndex max_annotations)
979 state_list *first_state;
980 state_list *last_state;
981 bitsetv lookahead_filter = NULL;
982 bitsetv lookaheads;
984 /* Set up state list and some reusable bitsets. */
986 size_t max_nitems = 0;
987 state_list **nodep = &first_state;
988 for (state_number i = 0; i < nstates; ++i)
990 *nodep = states[i]->state_list = last_state = xmalloc (sizeof **nodep);
991 (*nodep)->state = states[i];
992 (*nodep)->recomputedAsSuccessor = false;
993 (*nodep)->lookaheads = NULL;
994 (*nodep)->lr0Isocore = *nodep;
995 (*nodep)->nextIsocore = *nodep;
996 nodep = &(*nodep)->next;
997 if (states[i]->nitems > max_nitems)
998 max_nitems = states[i]->nitems;
1000 *nodep = NULL;
1001 lookahead_filter = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
1002 if (!annotation_lists)
1003 bitsetv_ones (lookahead_filter);
1004 lookaheads = bitsetv_create (max_nitems, ntokens, BITSET_FIXED);
1007 /* Recompute states. */
1009 ContributionIndex *work = xnmalloc (max_annotations, sizeof *work);
1010 for (state_list *this_state = first_state;
1011 this_state;
1012 this_state = this_state->next)
1014 const state *s = this_state->state;
1015 for (int i = 0; i < s->transitions->num; ++i)
1017 state *t = s->transitions->states[i];
1018 if (annotation_lists)
1019 AnnotationList__computeLookaheadFilter (
1020 annotation_lists[t->state_list->lr0Isocore->state->number],
1021 t->nitems, lookahead_filter);
1022 ielr_compute_lookaheads (follow_kernel_items, always_follows,
1023 this_state, t, lookahead_filter,
1024 lookaheads);
1025 ielr_compute_state (follow_kernel_items, always_follows,
1026 annotation_lists, t, lookaheads, &last_state,
1027 work, lookahead_filter,
1028 &s->transitions->states[i]);
1031 free (work);
1034 bitsetv_free (lookahead_filter);
1035 bitsetv_free (lookaheads);
1037 /* Store states back in the states array. */
1038 states = xnrealloc (states, nstates, sizeof *states);
1039 for (state_list *node = first_state; node; node = node->next)
1040 states[node->state->number] = node->state;
1042 /* In the case of canonical LR(1), copy item lookahead sets to reduction
1043 lookahead sets. */
1044 if (!annotation_lists)
1046 timevar_push (tv_ielr_phase4);
1047 initialize_LA ();
1048 for (state_list *node = first_state; node; node = node->next)
1049 if (!node->state->consistent)
1051 size_t i = 0;
1052 item_index *itemset = node->state->items;
1053 for (size_t r = 0; r < node->state->reductions->num; ++r)
1055 rule *this_rule = node->state->reductions->rules[r];
1056 bitset lookahead_set =
1057 node->state->reductions->lookaheads[r];
1058 if (item_number_is_rule_number (*this_rule->rhs))
1059 ielr_compute_goto_follow_set (follow_kernel_items,
1060 always_follows, node,
1061 this_rule->lhs, lookahead_set);
1062 else if (node->lookaheads)
1064 /* We don't need to start the kernel item search back at
1065 i=0 because both items and reductions are sorted on rule
1066 number. */
1067 while (!item_number_is_rule_number (ritem[itemset[i]])
1068 || item_number_as_rule_number (ritem[itemset[i]])
1069 != this_rule->number)
1071 ++i;
1072 aver (i < node->state->nitems);
1074 if (node->lookaheads[i])
1075 bitset_copy (lookahead_set, node->lookaheads[i]);
1079 timevar_pop (tv_ielr_phase4);
1082 /* Free state list. */
1083 while (first_state)
1085 state_list *node = first_state;
1086 if (node->lookaheads)
1088 for (size_t i = 0; i < node->state->nitems; ++i)
1089 if (node->lookaheads[i])
1090 bitset_free (node->lookaheads[i]);
1091 free (node->lookaheads);
1093 first_state = node->next;
1094 free (node);
1099 void
1100 ielr (void)
1102 LrType lr_type = lr_type_get ();
1104 /* Phase 0: LALR(1). */
1105 switch (lr_type)
1107 case LR_TYPE__LR0:
1108 timevar_push (tv_lalr);
1109 set_goto_map ();
1110 timevar_pop (tv_lalr);
1111 return;
1113 case LR_TYPE__CANONICAL_LR:
1114 timevar_push (tv_lalr);
1115 set_goto_map ();
1116 timevar_pop (tv_lalr);
1117 break;
1119 case LR_TYPE__LALR:
1120 timevar_push (tv_lalr);
1121 lalr ();
1122 bitsetv_free (goto_follows);
1123 timevar_pop (tv_lalr);
1124 return;
1126 case LR_TYPE__IELR:
1127 timevar_push (tv_lalr);
1128 lalr ();
1129 timevar_pop (tv_lalr);
1130 break;
1134 bitsetv follow_kernel_items;
1135 bitsetv always_follows;
1136 InadequacyList **inadequacy_lists = NULL;
1137 AnnotationList **annotation_lists = NULL;
1138 struct obstack annotations_obstack;
1139 AnnotationIndex max_annotations = 0;
1142 /* Phase 1: Compute Auxiliary Tables. */
1143 state ***predecessors;
1144 timevar_push (tv_ielr_phase1);
1145 ielr_compute_auxiliary_tables (
1146 &follow_kernel_items, &always_follows,
1147 lr_type == LR_TYPE__CANONICAL_LR ? NULL : &predecessors);
1148 timevar_pop (tv_ielr_phase1);
1150 /* Phase 2: Compute Annotations. */
1151 timevar_push (tv_ielr_phase2);
1152 if (lr_type != LR_TYPE__CANONICAL_LR)
1154 obstack_init (&annotations_obstack);
1155 ielr_compute_annotation_lists (follow_kernel_items, always_follows,
1156 predecessors, &max_annotations,
1157 &inadequacy_lists, &annotation_lists,
1158 &annotations_obstack);
1159 for (state_number i = 0; i < nstates; ++i)
1160 free (predecessors[i]);
1161 free (predecessors);
1162 bitsetv_free (goto_follows);
1163 lalr_free ();
1165 timevar_pop (tv_ielr_phase2);
1168 /* Phase 3: Split States. */
1169 timevar_push (tv_ielr_phase3);
1171 state_number nstates_lr0 = nstates;
1172 ielr_split_states (follow_kernel_items, always_follows,
1173 annotation_lists, max_annotations);
1174 if (inadequacy_lists)
1175 for (state_number i = 0; i < nstates_lr0; ++i)
1176 InadequacyList__delete (inadequacy_lists[i]);
1178 free (inadequacy_lists);
1179 if (annotation_lists)
1180 obstack_free (&annotations_obstack, NULL);
1181 free (annotation_lists);
1182 bitsetv_free (follow_kernel_items);
1183 bitsetv_free (always_follows);
1184 timevar_pop (tv_ielr_phase3);
1187 /* Phase 4: Compute Reduction Lookaheads. */
1188 timevar_push (tv_ielr_phase4);
1189 free (goto_map);
1190 free (from_state);
1191 free (to_state);
1192 if (lr_type == LR_TYPE__CANONICAL_LR)
1194 /* Reduction lookaheads are computed in ielr_split_states above
1195 but are timed as part of phase 4. */
1196 set_goto_map ();
1198 else
1200 lalr ();
1201 bitsetv_free (goto_follows);
1203 timevar_pop (tv_ielr_phase4);