1 /* Parser simulator for unifying counterexample search
3 Copyright (C) 2020-2021 Free Software Foundation, Inc.
5 This file is part of Bison, the GNU Compiler Compiler.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
22 #include "parse-simulation.h"
24 #include <gl_linked_list.h>
33 // Path of state-items the parser has traversed.
36 // Elements newly added in this chunk.
37 state_item_list contents
;
38 // Properties of the linked list this chunk represents.
39 const state_item
*head_elt
;
40 const state_item
*tail_elt
;
43 // List of derivations of the symbols.
46 derivation_list contents
;
47 const derivation
*head_elt
;
48 const derivation
*tail_elt
;
51 struct parse_state
*parent
;
53 // Incremented during productions, decremented during reductions.
55 // Whether the contents of the chunks should be prepended or
56 // appended to the list the chunks represent.
58 // Causes chunk contents to be freed when the reference count is
59 // one. Used when only the chunk metadata will be needed.
60 bool free_contents_early
;
65 ps_si_prepend (parse_state
*ps
, const state_item
*si
)
67 struct si_chunk
*sic
= &ps
->state_items
;
68 gl_list_add_first (sic
->contents
, si
);
76 ps_si_append (parse_state
*ps
, const state_item
*si
)
78 struct si_chunk
*sic
= &ps
->state_items
;
79 gl_list_add_last (sic
->contents
, si
);
87 ps_derivs_prepend (parse_state
*ps
, derivation
*d
)
89 struct deriv_chunk
*dc
= &ps
->derivs
;
90 derivation_list_prepend (dc
->contents
, d
);
98 ps_derivs_append (parse_state
*ps
, derivation
*d
)
100 struct deriv_chunk
*dc
= &ps
->derivs
;
101 derivation_list_append (dc
->contents
, d
);
108 static int allocs
= 0;
109 static int frees
= 0;
112 empty_parse_state (void)
114 parse_state
*res
= xcalloc (1, sizeof *res
);
115 res
->state_items
.contents
116 = gl_list_create_empty (GL_LINKED_LIST
, NULL
, NULL
, NULL
, true);
117 res
->derivs
.contents
= derivation_list_new ();
123 new_parse_state (const state_item
*si
)
125 parse_state
*res
= empty_parse_state ();
126 ps_si_append (res
, si
);
127 ps_derivs_append (res
, derivation_dot ());
132 copy_parse_state (bool prepend
, parse_state
*parent
)
134 parse_state
*res
= xmalloc (sizeof *res
);
136 res
->state_items
.contents
137 = gl_list_create_empty (GL_LINKED_LIST
, NULL
, NULL
, NULL
, true);
138 res
->derivs
.contents
= derivation_list_new ();
139 res
->parent
= parent
;
140 res
->prepend
= prepend
;
141 res
->reference_count
= 0;
142 res
->free_contents_early
= false;
143 parse_state_retain (parent
);
149 parse_state_derivation_completed (const parse_state
*ps
)
151 return ps
->derivs
.total_size
== 1;
155 parse_state_derivation (const parse_state
*ps
)
157 return (derivation
*) ps
->derivs
.head_elt
;
161 parse_state_head (const parse_state
*ps
)
163 return ps
->state_items
.head_elt
;
167 parse_state_tail (const parse_state
*ps
)
169 return ps
->state_items
.tail_elt
;
173 parse_state_length (const parse_state
*ps
)
175 return ps
->state_items
.total_size
;
179 parse_state_depth (const parse_state
*ps
)
185 parse_state_retain (parse_state
*ps
)
187 ++ps
->reference_count
;
191 parse_state_free_contents_early (parse_state
*ps
)
193 ps
->free_contents_early
= true;
197 free_parse_state (parse_state
*original_ps
)
199 bool free_contents
= true;
200 parse_state
*parent_ps
= NULL
;
201 for (parse_state
*ps
= original_ps
; ps
&& free_contents
; ps
= parent_ps
)
203 --ps
->reference_count
;
204 free_contents
= (ps
->reference_count
== 1 && ps
->free_contents_early
)
205 || (ps
->reference_count
== 0 && !ps
->free_contents_early
);
206 // need to keep the parse state around for visited hash set,
207 // but its contents and parent can be freed
210 if (ps
->state_items
.contents
)
211 gl_list_free (ps
->state_items
.contents
);
212 if (ps
->derivs
.contents
)
213 derivation_list_free (ps
->derivs
.contents
);
215 parent_ps
= ps
->parent
;
216 if (ps
->reference_count
<= 0)
225 parse_state_hasher (const parse_state
*ps
, size_t max
)
227 const struct si_chunk
*sis
= &ps
->state_items
;
228 return ((state_item
*) sis
->head_elt
- state_items
+
229 (state_item
*) sis
->tail_elt
- state_items
+ sis
->total_size
) % max
;
233 parse_state_comparator (const parse_state
*ps1
, const parse_state
*ps2
)
235 const struct si_chunk
*sis1
= &ps1
->state_items
;
236 const struct si_chunk
*sis2
= &ps2
->state_items
;
237 return sis1
->head_elt
== sis2
->head_elt
238 && sis1
->tail_elt
== sis2
->tail_elt
239 && sis1
->total_size
== sis2
->total_size
;
244 parse_state_completed_steps (const parse_state
*ps
, int *shifts
, int *productions
)
246 // traverse to the root parse_state,
247 // which will have a list of all completed productions.
248 const parse_state
*root_ps
= ps
;
249 while (root_ps
->parent
)
250 root_ps
= root_ps
->parent
;
252 state_item_list sis
= root_ps
->state_items
.contents
;
255 state_item
*last
= NULL
;
256 state_item
*next
= NULL
;
257 for (gl_list_iterator_t it
= gl_list_iterator (sis
);
258 state_item_list_next (&it
, &next
);
261 if (last
&& last
->state
== next
->state
)
265 *productions
= count
;
266 *shifts
= root_ps
->state_items
.total_size
- count
;
269 typedef void (*chunk_append_fn
) (gl_list_t
, const void *);
271 // A version of gl_list_add_last which has the chunk_append_fn
274 list_add_last (gl_list_t list
, const void *elt
)
276 gl_list_add_last (list
, elt
);
279 // takes an array of n gl_lists and flattens them into two list
280 // based off of the index split
282 list_flatten_and_split (gl_list_t
*list
, gl_list_t
*rets
, int split
, int n
,
283 chunk_append_fn append_fn
)
287 for (int i
= 0; i
< n
; ++i
)
289 const void *p
= NULL
;
290 gl_list_iterator_t it
= gl_list_iterator (list
[i
]);
291 while (gl_list_iterator_next (&it
, &p
, NULL
))
294 gl_list_t l
= (gl_list_t
) p
;
295 const void *si
= NULL
;
296 gl_list_iterator_t it2
= gl_list_iterator (l
);
297 while (gl_list_iterator_next (&it2
, &si
, NULL
))
299 if (ret_index
++ == split
)
302 append_fn (rets
[ret_array
], si
);
304 gl_list_iterator_free (&it2
);
306 gl_list_iterator_free (&it
);
310 static parse_state_list
311 parse_state_list_new (void)
313 return gl_list_create_empty (GL_LINKED_LIST
, NULL
, NULL
,
314 (gl_listelement_dispose_fn
)free_parse_state
,
319 parse_state_list_append (parse_state_list pl
, parse_state
*ps
)
321 parse_state_retain (ps
);
322 gl_list_add_last (pl
, ps
);
325 // Emulates a reduction on a parse state by popping some amount of
326 // derivations and state_items off of the parse_state and returning
327 // the result in ret. Returns the derivation of what's popped.
328 static derivation_list
329 parser_pop (parse_state
*ps
, int deriv_index
,
330 int si_index
, parse_state
*ret
)
332 // prepend sis, append sis, prepend derivs, append derivs
334 for (int i
= 0; i
< 4; ++i
)
335 chunks
[i
] = gl_list_create_empty (GL_LINKED_LIST
, NULL
, NULL
, NULL
, true);
336 for (parse_state
*pn
= ps
; pn
!= NULL
; pn
= pn
->parent
)
339 gl_list_add_last (chunks
[0], pn
->state_items
.contents
);
340 gl_list_add_last (chunks
[2], pn
->derivs
.contents
);
344 gl_list_add_first (chunks
[1], pn
->state_items
.contents
);
345 gl_list_add_first (chunks
[3], pn
->derivs
.contents
);
347 derivation_list popped_derivs
= derivation_list_new ();
348 gl_list_t ret_chunks
[4] = { ret
->state_items
.contents
, NULL
,
349 ret
->derivs
.contents
, popped_derivs
351 list_flatten_and_split (chunks
, ret_chunks
, si_index
, 2,
353 list_flatten_and_split (chunks
+ 2, ret_chunks
+ 2, deriv_index
, 2,
354 (chunk_append_fn
)derivation_list_append
);
355 size_t s_size
= gl_list_size (ret
->state_items
.contents
);
356 ret
->state_items
.total_size
= s_size
;
359 ret
->state_items
.tail_elt
= gl_list_get_at (ret
->state_items
.contents
,
361 ret
->state_items
.head_elt
=
362 gl_list_get_at (ret
->state_items
.contents
, 0);
366 ret
->state_items
.tail_elt
= NULL
;
367 ret
->state_items
.head_elt
= NULL
;
369 size_t d_size
= gl_list_size (ret
->derivs
.contents
);
370 ret
->derivs
.total_size
= d_size
;
373 ret
->derivs
.tail_elt
= gl_list_get_at (ret
->derivs
.contents
,
375 ret
->derivs
.head_elt
= gl_list_get_at (ret
->derivs
.contents
, 0);
379 ret
->derivs
.tail_elt
= NULL
;
380 ret
->derivs
.head_elt
= NULL
;
382 for (int i
= 0; i
< 4; ++i
)
383 gl_list_free (chunks
[i
]);
384 return popped_derivs
;
388 parse_state_lists (parse_state
*ps
, state_item_list
*sitems
,
389 derivation_list
*derivs
)
391 parse_state
*temp
= empty_parse_state ();
392 size_t si_size
= ps
->state_items
.total_size
;
393 size_t deriv_size
= ps
->derivs
.total_size
;
394 derivation_list dl
= parser_pop (ps
, si_size
, deriv_size
, temp
);
395 *sitems
= temp
->state_items
.contents
;
396 *derivs
= temp
->derivs
.contents
;
397 // prevent the return lists from being freed
398 temp
->state_items
.contents
= NULL
;
399 temp
->derivs
.contents
= NULL
;
400 free_parse_state (temp
);
401 derivation_list_free (dl
);
405 * Compute the parse states that result from taking a transition on
406 * nullable symbols whenever possible from the given state_item.
409 nullable_closure (parse_state
*ps
, state_item
*si
, parse_state_list state_list
)
411 parse_state
*current_ps
= ps
;
412 state_item_number prev_sin
= si
- state_items
;
413 for (state_item_number sin
= si
->trans
; sin
!= -1;
414 prev_sin
= sin
, sin
= state_items
[sin
].trans
)
416 state_item
*psi
= &state_items
[prev_sin
];
417 symbol_number sp
= item_number_as_symbol_number (*psi
->item
);
418 if (ISTOKEN (sp
) || !nullable
[sp
- ntokens
])
421 state_item
*nsi
= &state_items
[sin
];
422 current_ps
= copy_parse_state (false, current_ps
);
423 ps_si_append (current_ps
, nsi
);
424 ps_derivs_append (current_ps
,
425 derivation_new (sp
, derivation_list_new (),
426 state_item_rule (nsi
)));
427 parse_state_list_append (state_list
, current_ps
);
432 simulate_transition (parse_state
*ps
)
434 const state_item
*si
= ps
->state_items
.tail_elt
;
435 symbol_number sym
= item_number_as_symbol_number (*si
->item
);
436 // Transition on the same next symbol, taking nullable
437 // symbols into account.
438 parse_state_list result
= parse_state_list_new ();
439 state_item_number si_next
= si
->trans
;
440 // Check for disabled transition, shouldn't happen as any
441 // state_items that lead to these should be disabled.
444 parse_state
*next_ps
= copy_parse_state (false, ps
);
445 ps_si_append (next_ps
, &state_items
[si_next
]);
446 ps_derivs_append (next_ps
, derivation_new_leaf (sym
));
447 parse_state_list_append (result
, next_ps
);
449 nullable_closure (next_ps
, &state_items
[si_next
], result
);
454 * Determine if the given symbols are equal or their first sets
458 compatible (symbol_number sym1
, symbol_number sym2
)
462 if (ISTOKEN (sym1
) && ISVAR (sym2
))
463 return bitset_test (FIRSTS (sym2
), sym1
);
464 else if (ISVAR (sym1
) && ISTOKEN (sym2
))
465 return bitset_test (FIRSTS (sym1
), sym2
);
466 else if (ISVAR (sym1
) && ISVAR (sym2
))
467 return !bitset_disjoint_p (FIRSTS (sym1
), FIRSTS (sym2
));
473 simulate_production (parse_state
*ps
, symbol_number compat_sym
)
475 parse_state_list result
= parse_state_list_new ();
476 const state_item
*si
= parse_state_tail (ps
);
479 bitset_iterator biter
;
480 state_item_number sin
;
481 BITSET_FOR_EACH (biter
, si
->prods
, sin
, 0)
483 // Take production step only if lhs is not nullable and
484 // if first rhs symbol is compatible with compat_sym
485 state_item
*next
= &state_items
[sin
];
486 item_number
*itm1
= next
->item
;
487 if (!compatible (*itm1
, compat_sym
) || !production_allowed (si
, next
))
489 parse_state
*next_ps
= copy_parse_state (false, ps
);
490 ps_si_append (next_ps
, next
);
491 parse_state_list_append (result
, next_ps
);
492 if (next_ps
->depth
>= 0)
494 nullable_closure (next_ps
, next
, result
);
500 // simulates a reduction on the given parse state, conflict_item is the
501 // item associated with ps's conflict. symbol_set is a lookahead set this
502 // reduction must be compatible with
504 simulate_reduction (parse_state
*ps
, int rule_len
, bitset symbol_set
)
506 parse_state_list result
= parse_state_list_new ();
508 int s_size
= ps
->state_items
.total_size
;
509 int d_size
= ps
->derivs
.total_size
;
511 d_size
--; // account for dot
512 parse_state
*new_root
= empty_parse_state ();
513 derivation_list popped_derivs
=
514 parser_pop (ps
, d_size
- rule_len
,
515 s_size
- rule_len
- 1, new_root
);
518 state_item
*si
= (state_item
*) ps
->state_items
.tail_elt
;
519 const rule
*r
= item_rule (si
->item
);
520 symbol_number lhs
= r
->lhs
->number
;
521 derivation
*deriv
= derivation_new (lhs
, popped_derivs
, state_item_rule (si
));
523 ps_derivs_append (new_root
, deriv
);
525 if (s_size
!= rule_len
+ 1)
527 state_item
*tail
= (state_item
*) new_root
->state_items
.tail_elt
;
528 ps_si_append (new_root
, &state_items
[tail
->trans
]);
529 parse_state_list_append (result
, new_root
);
533 // The head state_item is a production item, so we need to prepend
534 // with possible source state-items.
535 const state_item
*head
= ps
->state_items
.head_elt
;
536 state_item_list prev
= lssi_reverse_production (head
, symbol_set
);
537 // TODO: better understand what causes this case.
538 if (gl_list_size (prev
) == 0)
540 // new_root needs to have an RC of 1 to be freed correctly here.
541 parse_state_retain (new_root
);
542 free_parse_state (new_root
);
546 state_item
*psis
= NULL
;
547 for (gl_list_iterator_t it
= gl_list_iterator (prev
);
548 state_item_list_next (&it
, &psis
);
551 // Prepend the result from the reverse production.
552 parse_state
*copy
= copy_parse_state (true, new_root
);
553 ps_si_prepend (copy
, psis
);
555 // Append the left hand side to the end of the parser state
556 copy
= copy_parse_state (false, copy
);
557 struct si_chunk
*sis
= ©
->state_items
;
558 const state_item
*tail
= sis
->tail_elt
;
559 ps_si_append (copy
, &state_items
[tail
->trans
]);
560 parse_state_list_append (result
, copy
);
561 nullable_closure (copy
, (state_item
*) sis
->tail_elt
, result
);
570 parser_prepend (parse_state
*ps
)
572 parse_state_list res
= parse_state_list_new ();
573 const state_item
*head
= ps
->state_items
.head_elt
;
574 symbol_number prepend_sym
=
575 item_number_as_symbol_number (*(head
->item
- 1));
576 bitset_iterator biter
;
577 state_item_number sin
;
578 BITSET_FOR_EACH (biter
, head
->revs
, sin
, 0)
580 parse_state
*copy
= copy_parse_state (true, ps
);
581 ps_si_prepend (copy
, &state_items
[sin
]);
582 if (SI_TRANSITION (head
))
583 ps_derivs_prepend (copy
, derivation_new_leaf (prepend_sym
));
584 parse_state_list_append (res
, copy
);
590 print_parse_state (parse_state
*ps
)
593 fprintf (out
, "(size %zu depth %d rc %d)\n",
594 ps
->state_items
.total_size
, ps
->depth
, ps
->reference_count
);
595 state_item_print (ps
->state_items
.head_elt
, out
, "");
596 state_item_print (ps
->state_items
.tail_elt
, out
, "");
597 if (ps
->derivs
.total_size
> 0)
598 derivation_print (ps
->derivs
.head_elt
, out
, "");