glr2.cc: prefer unnamed namespace to 'static'
[bison.git] / src / parse-simulation.c
blob57b095122c59c27bd53d1fbba6c1f313bf6a4873
1 /* Parser simulator for unifying counterexample search
3 Copyright (C) 2020-2021 Free Software Foundation, Inc.
5 This file is part of Bison, the GNU Compiler Compiler.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
20 #include <config.h>
22 #include "parse-simulation.h"
24 #include <gl_linked_list.h>
25 #include <gl_xlist.h>
26 #include <stdlib.h>
28 #include "lssi.h"
29 #include "nullable.h"
31 struct parse_state
33 // Path of state-items the parser has traversed.
34 struct si_chunk
36 // Elements newly added in this chunk.
37 state_item_list contents;
38 // Properties of the linked list this chunk represents.
39 const state_item *head_elt;
40 const state_item *tail_elt;
41 size_t total_size;
42 } state_items;
43 // List of derivations of the symbols.
44 struct deriv_chunk
46 derivation_list contents;
47 const derivation *head_elt;
48 const derivation *tail_elt;
49 size_t total_size;
50 } derivs;
51 struct parse_state *parent;
52 int reference_count;
53 // Incremented during productions, decremented during reductions.
54 int depth;
55 // Whether the contents of the chunks should be prepended or
56 // appended to the list the chunks represent.
57 bool prepend;
58 // Causes chunk contents to be freed when the reference count is
59 // one. Used when only the chunk metadata will be needed.
60 bool free_contents_early;
64 static void
65 ps_si_prepend (parse_state *ps, const state_item *si)
67 struct si_chunk *sic = &ps->state_items;
68 gl_list_add_first (sic->contents, si);
69 sic->head_elt = si;
70 ++sic->total_size;
71 if (!sic->tail_elt)
72 sic->tail_elt = si;
75 static void
76 ps_si_append (parse_state *ps, const state_item *si)
78 struct si_chunk *sic = &ps->state_items;
79 gl_list_add_last (sic->contents, si);
80 sic->tail_elt = si;
81 ++sic->total_size;
82 if (!sic->head_elt)
83 sic->head_elt = si;
86 static void
87 ps_derivs_prepend (parse_state *ps, derivation *d)
89 struct deriv_chunk *dc = &ps->derivs;
90 derivation_list_prepend (dc->contents, d);
91 dc->head_elt = d;
92 ++dc->total_size;
93 if (!dc->tail_elt)
94 dc->tail_elt = d;
97 static void
98 ps_derivs_append (parse_state *ps, derivation *d)
100 struct deriv_chunk *dc = &ps->derivs;
101 derivation_list_append (dc->contents, d);
102 dc->tail_elt = d;
103 ++dc->total_size;
104 if (!dc->head_elt)
105 dc->head_elt = d;
108 static int allocs = 0;
109 static int frees = 0;
111 static parse_state *
112 empty_parse_state (void)
114 parse_state *res = xcalloc (1, sizeof *res);
115 res->state_items.contents
116 = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
117 res->derivs.contents = derivation_list_new ();
118 ++allocs;
119 return res;
122 parse_state *
123 new_parse_state (const state_item *si)
125 parse_state *res = empty_parse_state ();
126 ps_si_append (res, si);
127 ps_derivs_append (res, derivation_dot ());
128 return res;
131 static parse_state *
132 copy_parse_state (bool prepend, parse_state *parent)
134 parse_state *res = xmalloc (sizeof *res);
135 *res = *parent;
136 res->state_items.contents
137 = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
138 res->derivs.contents = derivation_list_new ();
139 res->parent = parent;
140 res->prepend = prepend;
141 res->reference_count = 0;
142 res->free_contents_early = false;
143 parse_state_retain (parent);
144 ++allocs;
145 return res;
148 bool
149 parse_state_derivation_completed (const parse_state *ps)
151 return ps->derivs.total_size == 1;
154 derivation *
155 parse_state_derivation (const parse_state *ps)
157 return (derivation *) ps->derivs.head_elt;
160 const state_item *
161 parse_state_head (const parse_state *ps)
163 return ps->state_items.head_elt;
166 const state_item *
167 parse_state_tail (const parse_state *ps)
169 return ps->state_items.tail_elt;
173 parse_state_length (const parse_state *ps)
175 return ps->state_items.total_size;
179 parse_state_depth (const parse_state *ps)
181 return ps->depth;
184 void
185 parse_state_retain (parse_state *ps)
187 ++ps->reference_count;
190 void
191 parse_state_free_contents_early (parse_state *ps)
193 ps->free_contents_early = true;
196 void
197 free_parse_state (parse_state *original_ps)
199 bool free_contents = true;
200 parse_state *parent_ps = NULL;
201 for (parse_state *ps = original_ps; ps && free_contents; ps = parent_ps)
203 --ps->reference_count;
204 free_contents = (ps->reference_count == 1 && ps->free_contents_early)
205 || (ps->reference_count == 0 && !ps->free_contents_early);
206 // need to keep the parse state around for visited hash set,
207 // but its contents and parent can be freed
208 if (free_contents)
210 if (ps->state_items.contents)
211 gl_list_free (ps->state_items.contents);
212 if (ps->derivs.contents)
213 derivation_list_free (ps->derivs.contents);
215 parent_ps = ps->parent;
216 if (ps->reference_count <= 0)
218 free (ps);
219 ++frees;
224 size_t
225 parse_state_hasher (const parse_state *ps, size_t max)
227 const struct si_chunk *sis = &ps->state_items;
228 return ((state_item *) sis->head_elt - state_items +
229 (state_item *) sis->tail_elt - state_items + sis->total_size) % max;
232 bool
233 parse_state_comparator (const parse_state *ps1, const parse_state *ps2)
235 const struct si_chunk *sis1 = &ps1->state_items;
236 const struct si_chunk *sis2 = &ps2->state_items;
237 return sis1->head_elt == sis2->head_elt
238 && sis1->tail_elt == sis2->tail_elt
239 && sis1->total_size == sis2->total_size;
243 void
244 parse_state_completed_steps (const parse_state *ps, int *shifts, int *productions)
246 // traverse to the root parse_state,
247 // which will have a list of all completed productions.
248 const parse_state *root_ps = ps;
249 while (root_ps->parent)
250 root_ps = root_ps->parent;
252 state_item_list sis = root_ps->state_items.contents;
253 int count = 0;
255 state_item *last = NULL;
256 state_item *next = NULL;
257 for (gl_list_iterator_t it = gl_list_iterator (sis);
258 state_item_list_next (&it, &next);
261 if (last && last->state == next->state)
262 ++count;
263 last = next;
265 *productions = count;
266 *shifts = root_ps->state_items.total_size - count;
269 typedef void (*chunk_append_fn) (gl_list_t, const void *);
271 // A version of gl_list_add_last which has the chunk_append_fn
272 // signature.
273 static void
274 list_add_last (gl_list_t list, const void *elt)
276 gl_list_add_last (list, elt);
279 // takes an array of n gl_lists and flattens them into two list
280 // based off of the index split
281 static void
282 list_flatten_and_split (gl_list_t *list, gl_list_t *rets, int split, int n,
283 chunk_append_fn append_fn)
285 int ret_index = 0;
286 int ret_array = 0;
287 for (int i = 0; i < n; ++i)
289 const void *p = NULL;
290 gl_list_iterator_t it = gl_list_iterator (list[i]);
291 while (gl_list_iterator_next (&it, &p, NULL))
292 if (p)
294 gl_list_t l = (gl_list_t) p;
295 const void *si = NULL;
296 gl_list_iterator_t it2 = gl_list_iterator (l);
297 while (gl_list_iterator_next (&it2, &si, NULL))
299 if (ret_index++ == split)
300 ++ret_array;
301 if (rets[ret_array])
302 append_fn (rets[ret_array], si);
304 gl_list_iterator_free (&it2);
306 gl_list_iterator_free (&it);
310 static parse_state_list
311 parse_state_list_new (void)
313 return gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
314 (gl_listelement_dispose_fn)free_parse_state,
315 true);
318 static void
319 parse_state_list_append (parse_state_list pl, parse_state *ps)
321 parse_state_retain (ps);
322 gl_list_add_last (pl, ps);
325 // Emulates a reduction on a parse state by popping some amount of
326 // derivations and state_items off of the parse_state and returning
327 // the result in ret. Returns the derivation of what's popped.
328 static derivation_list
329 parser_pop (parse_state *ps, int deriv_index,
330 int si_index, parse_state *ret)
332 // prepend sis, append sis, prepend derivs, append derivs
333 gl_list_t chunks[4];
334 for (int i = 0; i < 4; ++i)
335 chunks[i] = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
336 for (parse_state *pn = ps; pn != NULL; pn = pn->parent)
337 if (pn->prepend)
339 gl_list_add_last (chunks[0], pn->state_items.contents);
340 gl_list_add_last (chunks[2], pn->derivs.contents);
342 else
344 gl_list_add_first (chunks[1], pn->state_items.contents);
345 gl_list_add_first (chunks[3], pn->derivs.contents);
347 derivation_list popped_derivs = derivation_list_new ();
348 gl_list_t ret_chunks[4] = { ret->state_items.contents, NULL,
349 ret->derivs.contents, popped_derivs
351 list_flatten_and_split (chunks, ret_chunks, si_index, 2,
352 list_add_last);
353 list_flatten_and_split (chunks + 2, ret_chunks + 2, deriv_index, 2,
354 (chunk_append_fn)derivation_list_append);
355 size_t s_size = gl_list_size (ret->state_items.contents);
356 ret->state_items.total_size = s_size;
357 if (s_size > 0)
359 ret->state_items.tail_elt = gl_list_get_at (ret->state_items.contents,
360 s_size - 1);
361 ret->state_items.head_elt =
362 gl_list_get_at (ret->state_items.contents, 0);
364 else
366 ret->state_items.tail_elt = NULL;
367 ret->state_items.head_elt = NULL;
369 size_t d_size = gl_list_size (ret->derivs.contents);
370 ret->derivs.total_size = d_size;
371 if (d_size > 0)
373 ret->derivs.tail_elt = gl_list_get_at (ret->derivs.contents,
374 d_size - 1);
375 ret->derivs.head_elt = gl_list_get_at (ret->derivs.contents, 0);
377 else
379 ret->derivs.tail_elt = NULL;
380 ret->derivs.head_elt = NULL;
382 for (int i = 0; i < 4; ++i)
383 gl_list_free (chunks[i]);
384 return popped_derivs;
387 void
388 parse_state_lists (parse_state *ps, state_item_list *sitems,
389 derivation_list *derivs)
391 parse_state *temp = empty_parse_state ();
392 size_t si_size = ps->state_items.total_size;
393 size_t deriv_size = ps->derivs.total_size;
394 derivation_list dl = parser_pop (ps, si_size, deriv_size, temp);
395 *sitems = temp->state_items.contents;
396 *derivs = temp->derivs.contents;
397 // prevent the return lists from being freed
398 temp->state_items.contents = NULL;
399 temp->derivs.contents = NULL;
400 free_parse_state (temp);
401 derivation_list_free (dl);
405 * Compute the parse states that result from taking a transition on
406 * nullable symbols whenever possible from the given state_item.
408 static void
409 nullable_closure (parse_state *ps, state_item *si, parse_state_list state_list)
411 parse_state *current_ps = ps;
412 state_item_number prev_sin = si - state_items;
413 for (state_item_number sin = si->trans; sin != -1;
414 prev_sin = sin, sin = state_items[sin].trans)
416 state_item *psi = &state_items[prev_sin];
417 symbol_number sp = item_number_as_symbol_number (*psi->item);
418 if (ISTOKEN (sp) || !nullable[sp - ntokens])
419 break;
421 state_item *nsi = &state_items[sin];
422 current_ps = copy_parse_state (false, current_ps);
423 ps_si_append (current_ps, nsi);
424 ps_derivs_append (current_ps,
425 derivation_new (sp, derivation_list_new (),
426 state_item_rule (nsi)));
427 parse_state_list_append (state_list, current_ps);
431 parse_state_list
432 simulate_transition (parse_state *ps)
434 const state_item *si = ps->state_items.tail_elt;
435 symbol_number sym = item_number_as_symbol_number (*si->item);
436 // Transition on the same next symbol, taking nullable
437 // symbols into account.
438 parse_state_list result = parse_state_list_new ();
439 state_item_number si_next = si->trans;
440 // Check for disabled transition, shouldn't happen as any
441 // state_items that lead to these should be disabled.
442 if (si_next < 0)
443 return result;
444 parse_state *next_ps = copy_parse_state (false, ps);
445 ps_si_append (next_ps, &state_items[si_next]);
446 ps_derivs_append (next_ps, derivation_new_leaf (sym));
447 parse_state_list_append (result, next_ps);
449 nullable_closure (next_ps, &state_items[si_next], result);
450 return result;
454 * Determine if the given symbols are equal or their first sets
455 * intersect.
457 static bool
458 compatible (symbol_number sym1, symbol_number sym2)
460 if (sym1 == sym2)
461 return true;
462 if (ISTOKEN (sym1) && ISVAR (sym2))
463 return bitset_test (FIRSTS (sym2), sym1);
464 else if (ISVAR (sym1) && ISTOKEN (sym2))
465 return bitset_test (FIRSTS (sym1), sym2);
466 else if (ISVAR (sym1) && ISVAR (sym2))
467 return !bitset_disjoint_p (FIRSTS (sym1), FIRSTS (sym2));
468 else
469 return false;
472 parse_state_list
473 simulate_production (parse_state *ps, symbol_number compat_sym)
475 parse_state_list result = parse_state_list_new ();
476 const state_item *si = parse_state_tail (ps);
477 if (si->prods)
479 bitset_iterator biter;
480 state_item_number sin;
481 BITSET_FOR_EACH (biter, si->prods, sin, 0)
483 // Take production step only if lhs is not nullable and
484 // if first rhs symbol is compatible with compat_sym
485 state_item *next = &state_items[sin];
486 item_number *itm1 = next->item;
487 if (!compatible (*itm1, compat_sym) || !production_allowed (si, next))
488 continue;
489 parse_state *next_ps = copy_parse_state (false, ps);
490 ps_si_append (next_ps, next);
491 parse_state_list_append (result, next_ps);
492 if (next_ps->depth >= 0)
493 ++next_ps->depth;
494 nullable_closure (next_ps, next, result);
497 return result;
500 // simulates a reduction on the given parse state, conflict_item is the
501 // item associated with ps's conflict. symbol_set is a lookahead set this
502 // reduction must be compatible with
503 parse_state_list
504 simulate_reduction (parse_state *ps, int rule_len, bitset symbol_set)
506 parse_state_list result = parse_state_list_new ();
508 int s_size = ps->state_items.total_size;
509 int d_size = ps->derivs.total_size;
510 if (ps->depth >= 0)
511 d_size--; // account for dot
512 parse_state *new_root = empty_parse_state ();
513 derivation_list popped_derivs =
514 parser_pop (ps, d_size - rule_len,
515 s_size - rule_len - 1, new_root);
517 // update derivation
518 state_item *si = (state_item *) ps->state_items.tail_elt;
519 const rule *r = item_rule (si->item);
520 symbol_number lhs = r->lhs->number;
521 derivation *deriv = derivation_new (lhs, popped_derivs, state_item_rule (si));
522 --new_root->depth;
523 ps_derivs_append (new_root, deriv);
525 if (s_size != rule_len + 1)
527 state_item *tail = (state_item *) new_root->state_items.tail_elt;
528 ps_si_append (new_root, &state_items[tail->trans]);
529 parse_state_list_append (result, new_root);
531 else
533 // The head state_item is a production item, so we need to prepend
534 // with possible source state-items.
535 const state_item *head = ps->state_items.head_elt;
536 state_item_list prev = lssi_reverse_production (head, symbol_set);
537 // TODO: better understand what causes this case.
538 if (gl_list_size (prev) == 0)
540 // new_root needs to have an RC of 1 to be freed correctly here.
541 parse_state_retain (new_root);
542 free_parse_state (new_root);
544 else
546 state_item *psis = NULL;
547 for (gl_list_iterator_t it = gl_list_iterator (prev);
548 state_item_list_next (&it, &psis);
551 // Prepend the result from the reverse production.
552 parse_state *copy = copy_parse_state (true, new_root);
553 ps_si_prepend (copy, psis);
555 // Append the left hand side to the end of the parser state
556 copy = copy_parse_state (false, copy);
557 struct si_chunk *sis = &copy->state_items;
558 const state_item *tail = sis->tail_elt;
559 ps_si_append (copy, &state_items[tail->trans]);
560 parse_state_list_append (result, copy);
561 nullable_closure (copy, (state_item *) sis->tail_elt, result);
564 gl_list_free (prev);
566 return result;
569 parse_state_list
570 parser_prepend (parse_state *ps)
572 parse_state_list res = parse_state_list_new ();
573 const state_item *head = ps->state_items.head_elt;
574 symbol_number prepend_sym =
575 item_number_as_symbol_number (*(head->item - 1));
576 bitset_iterator biter;
577 state_item_number sin;
578 BITSET_FOR_EACH (biter, head->revs, sin, 0)
580 parse_state *copy = copy_parse_state (true, ps);
581 ps_si_prepend (copy, &state_items[sin]);
582 if (SI_TRANSITION (head))
583 ps_derivs_prepend (copy, derivation_new_leaf (prepend_sym));
584 parse_state_list_append (res, copy);
586 return res;
589 void
590 print_parse_state (parse_state *ps)
592 FILE *out = stderr;
593 fprintf (out, "(size %zu depth %d rc %d)\n",
594 ps->state_items.total_size, ps->depth, ps->reference_count);
595 state_item_print (ps->state_items.head_elt, out, "");
596 state_item_print (ps->state_items.tail_elt, out, "");
597 if (ps->derivs.total_size > 0)
598 derivation_print (ps->derivs.head_elt, out, "");
599 putc ('\n', out);