cex: improve readability of the subsections
[bison.git] / src / counterexample.c
blob7df75d6481dd3818d24460f6e8b84436b1e01570
1 /* Conflict counterexample generation
3 Copyright (C) 2020 Free Software Foundation, Inc.
5 This file is part of Bison, the GNU Compiler Compiler.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include <config.h>
22 #include "counterexample.h"
24 #include "system.h"
26 #include <gl_linked_list.h>
27 #include <gl_rbtreehash_list.h>
28 #include <hash.h>
29 #include <stdlib.h>
30 #include <textstyle.h>
31 #include <time.h>
33 #include "closure.h"
34 #include "complain.h"
35 #include "derivation.h"
36 #include "getargs.h"
37 #include "gram.h"
38 #include "lalr.h"
39 #include "lssi.h"
40 #include "nullable.h"
41 #include "parse-simulation.h"
44 #define TIME_LIMIT_ENFORCED true
45 /** If set to false, only consider the states on the shortest
46 * lookahead-sensitive path when constructing a unifying counterexample. */
47 #define EXTENDED_SEARCH false
49 /* costs for making various steps in a search */
50 #define PRODUCTION_COST 50
51 #define REDUCE_COST 1
52 #define SHIFT_COST 1
53 #define UNSHIFT_COST 1
54 #define EXTENDED_COST 10000
56 /** The time limit before printing an assurance message to the user to
57 * indicate that the search is still running. */
58 #define ASSURANCE_LIMIT 2.0f
60 /* The time limit before giving up looking for unifying counterexample. */
61 #define TIME_LIMIT 5.0f
63 #define CUMULATIVE_TIME_LIMIT 120.0f
65 // This is the fastest way to get the tail node from the gl_list API.
66 static gl_list_node_t
67 list_get_end (gl_list_t list)
69 gl_list_node_t sentinel = gl_list_add_last (list, NULL);
70 gl_list_node_t res = gl_list_previous_node (list, sentinel);
71 gl_list_remove_node (list, sentinel);
72 return res;
75 typedef struct
77 derivation *d1;
78 derivation *d2;
79 bool shift_reduce;
80 bool unifying;
81 bool timeout;
82 } counterexample;
84 static counterexample *
85 new_counterexample (derivation *d1, derivation *d2,
86 bool shift_reduce,
87 bool u, bool t)
89 counterexample *res = xmalloc (sizeof *res);
90 res->shift_reduce = shift_reduce;
91 if (shift_reduce)
93 // Display the shift first.
94 res->d1 = d2;
95 res->d2 = d1;
97 else
99 res->d1 = d1;
100 res->d2 = d2;
102 res->unifying = u;
103 res->timeout = t;
104 return res;
107 static void
108 free_counterexample (counterexample *cex)
110 derivation_free (cex->d1);
111 derivation_free (cex->d2);
112 free (cex);
115 static void
116 print_counterexample (const counterexample *cex, FILE *out, const char *prefix)
118 const bool flat = getenv ("YYFLAT");
119 fprintf (out, flat ? " %s%-20s " : " %s%s: ",
120 prefix, cex->unifying ? _("Example") : _("First example"));
121 derivation_print_leaves (cex->d1, out);
122 fprintf (out, flat ? " %s%-20s " : " %s%s",
123 prefix, cex->shift_reduce ? _("Shift derivation") : _("First derivation"));
124 derivation_print (cex->d1, out, prefix);
126 // If we output to the terminal (via stderr) and we have color
127 // support, display unifying examples a second time, as color allows
128 // to see the differences.
129 if (!cex->unifying || is_styled (stderr))
131 fprintf (out, flat ? " %s%-20s " : " %s%s: ",
132 prefix, cex->unifying ? _("Example") : _("Second example"));
133 derivation_print_leaves (cex->d2, out);
135 fprintf (out, flat ? " %s%-20s " : " %s%s",
136 prefix, cex->shift_reduce ? _("Reduce derivation") : _("Second derivation"));
137 derivation_print (cex->d2, out, prefix);
139 if (out != stderr)
140 putc ('\n', out);
145 * NON UNIFYING COUNTER EXAMPLES
149 // Search node for BFS on state items
150 struct si_bfs_node;
151 typedef struct si_bfs_node
153 state_item_number si;
154 struct si_bfs_node *parent;
155 int reference_count;
156 } si_bfs_node;
158 static si_bfs_node *
159 si_bfs_new (state_item_number si, si_bfs_node *parent)
161 si_bfs_node *res = xcalloc (1, sizeof *res);
162 res->si = si;
163 res->parent = parent;
164 res->reference_count = 1;
165 if (parent)
166 ++parent->reference_count;
167 return res;
170 static bool
171 si_bfs_contains (const si_bfs_node *n, state_item_number sin)
173 for (const si_bfs_node *search = n; search != NULL; search = search->parent)
174 if (search->si == sin)
175 return true;
176 return false;
179 static void
180 si_bfs_free (si_bfs_node *n)
182 if (n == NULL)
183 return;
184 --n->reference_count;
185 if (n->reference_count == 0)
187 si_bfs_free (n->parent);
188 free (n);
192 typedef gl_list_t si_bfs_node_list;
195 * start is a state_item such that conflict_sym is an element of FIRSTS of the
196 * nonterminal after the dot in start. Because of this, we should be able to
197 * find a production item starting with conflict_sym by only searching productions
198 * of the nonterminal and shifting over nullable nonterminals
200 * this returns the derivation of the productions that lead to conflict_sym
202 static inline derivation_list
203 expand_to_conflict (state_item_number start, symbol_number conflict_sym)
205 si_bfs_node *init = si_bfs_new (start, NULL);
207 si_bfs_node_list queue
208 = gl_list_create (GL_LINKED_LIST, NULL, NULL,
209 (gl_listelement_dispose_fn) si_bfs_free,
210 true, 1, (const void **) &init);
211 si_bfs_node *node = NULL;
212 // breadth-first search for a path of productions to the conflict symbol
213 while (gl_list_size (queue) > 0)
215 node = (si_bfs_node *) gl_list_get_at (queue, 0);
216 state_item *silast = &state_items[node->si];
217 symbol_number sym = item_number_as_symbol_number (*silast->item);
218 if (sym == conflict_sym)
219 break;
220 if (ISVAR (sym))
222 // add each production to the search
223 bitset_iterator biter;
224 state_item_number sin;
225 bitset sib = silast->prods;
226 BITSET_FOR_EACH (biter, sib, sin, 0)
228 // ignore productions already in the path
229 if (si_bfs_contains (node, sin))
230 continue;
231 si_bfs_node *next = si_bfs_new (sin, node);
232 gl_list_add_last (queue, next);
234 // for nullable nonterminals, add its goto to the search
235 if (nullable[sym - ntokens])
237 si_bfs_node *next = si_bfs_new (silast->trans, node);
238 gl_list_add_last (queue, next);
241 gl_list_remove_at (queue, 0);
243 if (gl_list_size (queue) == 0)
245 gl_list_free (queue);
246 fputs ("Error expanding derivation\n", stderr);
247 abort ();
250 derivation *dinit = derivation_new_leaf (conflict_sym);
251 derivation_list result = derivation_list_new ();
252 derivation_list_append (result, dinit);
253 // iterate backwards through the generated path to create a derivation
254 // of the conflict symbol containing derivations of each production step.
256 for (si_bfs_node *n = node; n != NULL; n = n->parent)
258 state_item *si = &state_items[n->si];
259 item_number *pos = si->item;
260 if (SI_PRODUCTION (si))
262 item_number *i = NULL;
263 for (i = pos + 1; !item_number_is_rule_number (*i); ++i)
264 derivation_list_append (result, derivation_new_leaf (*i));
265 symbol_number lhs =
266 rules[item_number_as_rule_number (*i)].lhs->number;
267 derivation *deriv = derivation_new (lhs, result);
268 result = derivation_list_new ();
269 derivation_list_append (result, deriv);
271 else
273 symbol_number sym = item_number_as_symbol_number (*(pos - 1));
274 derivation *deriv = derivation_new_leaf (sym);
275 derivation_list_prepend (result, deriv);
278 gl_list_free (queue);
279 derivation_free ((derivation*)gl_list_get_at (result, 0));
280 gl_list_remove_at (result, 0);
281 return result;
285 * Complete derivations for any pending productions in the given
286 * sequence of state-items. For example, the input could be a path
287 * of states that would give us the following input:
288 * Stmt ::= [lval ::= [VAR] '=' e ::=[ e::=['0'] '+' •
289 * So to complete the derivation of Stmt, we need an output like:
290 * Stmt ::= [lval ::= [VAR] '=' e ::=[ e::=['0'] '+' • e ] ';' ]
292 static derivation *
293 complete_diverging_example (symbol_number conflict_sym,
294 state_item_list path, derivation_list derivs)
296 // The idea is to transfer each pending symbol on the productions
297 // associated with the given StateItems to the resulting derivation.
298 derivation_list result = derivation_list_new ();
299 bool lookahead_required = false;
300 if (!derivs)
302 derivs = derivation_list_new ();
303 gl_list_add_last (result, derivation_dot ());
304 lookahead_required = true;
307 gl_list_node_t deriv = list_get_end (derivs);
309 // We go backwards through the path to create the derivation tree bottom-up.
310 // Effectively this loops through each production once, and generates a
311 // derivation of the left hand side by appending all of the rhs symbols.
312 // this becomes the derivation of the nonterminal after the dot in the
313 // next production, and all of the other symbols of the rule are added as normal.
314 for (gl_list_node_t state_node = list_get_end (path);
315 state_node != NULL;
316 state_node = gl_list_previous_node (path, state_node))
318 state_item *si = (state_item *) gl_list_node_value (path, state_node);
319 item_number *item = si->item;
320 item_number pos = *item;
321 // symbols after dot
322 if (gl_list_size (result) == 1 && !item_number_is_rule_number (pos)
323 && gl_list_get_at (result, 0) == derivation_dot ())
325 derivation_list_append (result,
326 derivation_new_leaf (item_number_as_symbol_number (pos)));
327 lookahead_required = false;
329 item_number *i = item;
330 // go through each symbol after the dot in the current rule, and
331 // add each symbol to its derivation.
332 for (state_item_number nsi = si - state_items;
333 !item_number_is_rule_number (*i);
334 ++i, nsi = state_items[nsi].trans)
336 // if the item is a reduction, we could skip to the wrong rule
337 // by starting at i + 1, so this continue is necessary
338 if (i == item)
339 continue;
340 symbol_number sym = item_number_as_symbol_number (*i);
341 if (!lookahead_required || sym == conflict_sym)
343 derivation_list_append (result, derivation_new_leaf (sym));
344 lookahead_required = false;
345 continue;
347 // Since PATH is a path to the conflict state-item,
348 // for a reduce conflict item, we will want to have a derivation
349 // that shows the conflict symbol from its lookahead set being used.
351 // Since reductions have the dot at the end of the item,
352 // this loop will be first executed on the last item in the path
353 // that's not a reduction. When that happens,
354 // the symbol after the dot should be a nonterminal,
355 // and we can look through successive nullable nonterminals
356 // for one with the conflict symbol in its first set.
357 if (bitset_test (FIRSTS (sym), conflict_sym))
359 lookahead_required = false;
360 derivation_list next_derivs =
361 expand_to_conflict (nsi, conflict_sym);
362 derivation *d = NULL;
363 for (gl_list_iterator_t it = gl_list_iterator (next_derivs);
364 derivation_list_next (&it, &d);)
365 derivation_list_append (result, d);
366 i += gl_list_size (next_derivs) - 1;
367 derivation_list_free (next_derivs);
369 else if (nullable[sym - ntokens])
371 derivation *d = derivation_new_leaf (sym);
372 derivation_list_append (result, d);
374 else
376 // We found a path to the conflict item, and despite it
377 // having the conflict symbol in its lookahead, no example
378 // containing the symbol after the conflict item
379 // can be found.
380 derivation_list_append (result, derivation_new_leaf (1));
381 lookahead_required = false;
384 const rule *r = &rules[item_number_as_rule_number (*i)];
385 // add derivations for symbols before dot
386 for (i = item - 1; !item_number_is_rule_number (*i) && i >= ritem; i--)
388 gl_list_node_t p = gl_list_previous_node (path, state_node);
389 if (p)
390 state_node = p;
391 if (deriv)
393 const void *tmp_deriv = gl_list_node_value (derivs, deriv);
394 deriv = gl_list_previous_node (derivs, deriv);
395 derivation_list_prepend (result, (derivation*)tmp_deriv);
397 else
398 derivation_list_prepend (result, derivation_new_leaf (*i));
400 // completing the derivation
401 derivation *new_deriv = derivation_new (r->lhs->number, result);
402 result = derivation_list_new ();
403 derivation_list_append (result, new_deriv);
405 derivation *res = (derivation *) gl_list_get_at (result, 0);
406 derivation_retain (res);
407 derivation_list_free (result);
408 derivation_list_free (derivs);
409 return res;
412 /* Iterate backwards through the shifts of the path in the reduce
413 conflict, and find a path of shifts from the shift conflict that
414 goes through the same states. */
415 static state_item_list
416 nonunifying_shift_path (state_item_list reduce_path, state_item *shift_conflict)
418 gl_list_node_t tmp = gl_list_add_last (reduce_path, NULL);
419 gl_list_node_t next_node = gl_list_previous_node (reduce_path, tmp);
420 gl_list_node_t node = gl_list_previous_node (reduce_path, next_node);
421 gl_list_remove_node (reduce_path, tmp);
422 state_item *si = shift_conflict;
423 state_item_list result =
424 gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
425 // FIXME: bool paths_merged;
426 for (; node != NULL; next_node = node,
427 node = gl_list_previous_node (reduce_path, node))
429 state_item *refsi =
430 (state_item *) gl_list_node_value (reduce_path, node);
431 state_item *nextrefsi =
432 (state_item *) gl_list_node_value (reduce_path, next_node);
433 if (nextrefsi == si)
435 gl_list_add_first (result, refsi);
436 si = refsi;
437 continue;
439 // skip reduction items
440 if (nextrefsi->item != refsi->item + 1 && refsi->item != ritem)
441 continue;
443 // bfs to find a shift to the right state
444 si_bfs_node *init = si_bfs_new (si - state_items, NULL);
445 si_bfs_node_list queue
446 = gl_list_create (GL_LINKED_LIST, NULL, NULL,
447 (gl_listelement_dispose_fn) si_bfs_free,
448 true, 1, (const void **) &init);
449 si_bfs_node *sis = NULL;
450 state_item *prevsi = NULL;
451 while (gl_list_size (queue) > 0)
453 sis = (si_bfs_node *) gl_list_get_at (queue, 0);
454 // if we end up in the start state, the shift couldn't be found.
455 if (sis->si == 0)
456 break;
458 state_item *search_si = &state_items[sis->si];
459 // if the current state-item is a production item,
460 // its reverse production items get added to the queue.
461 // Otherwise, look for a reverse transition to the target state.
462 bitset rsi = search_si->revs;
463 bitset_iterator biter;
464 state_item_number sin;
465 BITSET_FOR_EACH (biter, rsi, sin, 0)
467 prevsi = &state_items[sin];
468 if (SI_TRANSITION (search_si))
470 if (prevsi->state == refsi->state)
471 goto search_end;
473 else if (!si_bfs_contains (sis, sin))
475 si_bfs_node *prevsis = si_bfs_new (sin, sis);
476 gl_list_add_last (queue, prevsis);
479 gl_list_remove_at (queue, 0);
481 search_end:
482 // prepend path to shift we found
483 if (sis)
485 gl_list_node_t ln = gl_list_add_first (result, &state_items[sis->si]);
486 for (si_bfs_node *n = sis->parent; n; n = n->parent)
487 ln = gl_list_add_after (result, ln, &state_items[n->si]);
490 si = prevsi;
491 gl_list_free (queue);
493 if (trace_flag & trace_cex)
495 fputs ("SHIFT ITEM PATH:\n", stderr);
496 state_item *sip = NULL;
497 for (gl_list_iterator_t it = gl_list_iterator (result);
498 state_item_list_next (&it, &sip);
500 print_state_item (sip, stderr, "");
502 return result;
507 * Construct a nonunifying counterexample from the shortest
508 * lookahead-sensitive path.
510 static counterexample *
511 example_from_path (bool shift_reduce,
512 state_item_number itm2,
513 state_item_list shortest_path, symbol_number next_sym)
515 derivation *deriv1 =
516 complete_diverging_example (next_sym, shortest_path, NULL);
517 state_item_list path_2
518 = shift_reduce
519 ? nonunifying_shift_path (shortest_path, &state_items [itm2])
520 : shortest_path_from_start (itm2, next_sym);
521 derivation *deriv2 = complete_diverging_example (next_sym, path_2, NULL);
522 gl_list_free (path_2);
523 return new_counterexample (deriv1, deriv2, shift_reduce, false, true);
528 * UNIFYING COUNTER EXAMPLES
532 /* A search state keeps track of two parser simulations,
533 * one starting at each conflict. Complexity is a metric
534 * which sums different parser actions with varying weights.
536 typedef struct
538 parse_state *states[2];
539 int complexity;
540 } search_state;
542 static search_state *
543 initial_search_state (state_item *conflict1, state_item *conflict2)
545 search_state *res = xmalloc (sizeof *res);
546 res->states[0] = new_parse_state (conflict1);
547 res->states[1] = new_parse_state (conflict2);
548 parse_state_retain (res->states[0]);
549 parse_state_retain (res->states[1]);
550 res->complexity = 0;
551 return res;
554 static search_state *
555 new_search_state (parse_state *ps1, parse_state *ps2, int complexity)
557 search_state *res = xmalloc (sizeof *res);
558 res->states[0] = ps1;
559 res->states[1] = ps2;
560 parse_state_retain (res->states[0]);
561 parse_state_retain (res->states[1]);
562 res->complexity = complexity;
563 return res;
566 static search_state *
567 copy_search_state (search_state *parent)
569 search_state *res = xmalloc (sizeof *res);
570 *res = *parent;
571 parse_state_retain (res->states[0]);
572 parse_state_retain (res->states[1]);
573 return res;
576 static void
577 search_state_free_children (search_state *ss)
579 free_parse_state (ss->states[0]);
580 free_parse_state (ss->states[1]);
583 static void
584 search_state_free (search_state *ss)
586 if (ss == NULL)
587 return;
588 search_state_free_children (ss);
589 free (ss);
592 /* For debugging traces. */
593 static void
594 search_state_print (search_state *ss)
596 fputs ("CONFLICT 1 ", stderr);
597 print_parse_state (ss->states[0]);
598 fputs ("CONFLICT 2 ", stderr);
599 print_parse_state (ss->states[1]);
600 putc ('\n', stderr);
603 typedef gl_list_t search_state_list;
605 static inline bool
606 search_state_list_next (gl_list_iterator_t *it, search_state **ss)
608 const void *p = NULL;
609 bool res = gl_list_iterator_next (it, &p, NULL);
610 if (res)
611 *ss = (search_state*) p;
612 else
613 gl_list_iterator_free (it);
614 return res;
618 * When a search state is copied, this is used to
619 * directly set one of the parse states
621 static inline void
622 ss_set_parse_state (search_state *ss, int idx, parse_state *ps)
624 free_parse_state (ss->states[idx]);
625 ss->states[idx] = ps;
626 parse_state_retain (ps);
630 * Construct a nonunifying example from a search state
631 * which has its parse states unified at the beginning
632 * but not the end of the example.
634 static counterexample *
635 complete_diverging_examples (search_state *ss,
636 symbol_number next_sym,
637 bool shift_reduce)
639 derivation *new_derivs[2];
640 for (int i = 0; i < 2; ++i)
642 state_item_list sitems;
643 derivation_list derivs;
644 parse_state_lists (ss->states[i], &sitems, &derivs);
645 new_derivs[i] = complete_diverging_example (next_sym, sitems, derivs);
646 gl_list_free (sitems);
648 return new_counterexample (new_derivs[0], new_derivs[1],
649 shift_reduce, false, true);
653 * Search states are stored in bundles with those that
654 * share the same complexity. This is so the priority
655 * queue takes less overhead.
657 typedef struct
659 search_state_list states;
660 int complexity;
661 } search_state_bundle;
663 static void
664 ssb_free (search_state_bundle *ssb)
666 gl_list_free (ssb->states);
667 free (ssb);
670 static size_t
671 ssb_hasher (search_state_bundle *ssb)
673 return ssb->complexity;
676 static int
677 ssb_comp (const search_state_bundle *s1, const search_state_bundle *s2)
679 return s1->complexity - s2->complexity;
682 static bool
683 ssb_equals (const search_state_bundle *s1, const search_state_bundle *s2)
685 return s1->complexity == s2->complexity;
688 typedef gl_list_t ssb_list;
690 static size_t
691 visited_hasher (const search_state *ss, size_t max)
693 return (parse_state_hasher (ss->states[0], max)
694 + parse_state_hasher (ss->states[1], max)) % max;
697 static bool
698 visited_comparator (const search_state *ss1, const search_state *ss2)
700 return parse_state_comparator (ss1->states[0], ss2->states[0])
701 && parse_state_comparator (ss1->states[1], ss2->states[1]);
704 /* Priority queue for search states with minimal complexity. */
705 static ssb_list ssb_queue;
706 static Hash_table *visited;
707 /* The set of parser states on the shortest lookahead-sensitive path. */
708 static bitset scp_set = NULL;
709 /* The set of parser states used for the conflict reduction rule. */
710 static bitset rpp_set = NULL;
712 static void
713 ssb_append (search_state *ss)
715 if (hash_lookup (visited, ss))
717 search_state_free (ss);
718 return;
720 hash_xinsert (visited, ss);
721 // if states are only referenced by the visited set,
722 // their contents should be freed as we only need
723 // the metadata necessary to compute a hash.
724 parse_state_free_contents_early (ss->states[0]);
725 parse_state_free_contents_early (ss->states[1]);
726 parse_state_retain (ss->states[0]);
727 parse_state_retain (ss->states[1]);
728 search_state_bundle *ssb = xmalloc (sizeof *ssb);
729 ssb->complexity = ss->complexity;
730 gl_list_node_t n = gl_list_search (ssb_queue, ssb);
731 if (!n)
733 ssb->states =
734 gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
735 (gl_listelement_dispose_fn)search_state_free_children,
736 true);
737 gl_sortedlist_add (ssb_queue, (gl_listelement_compar_fn) ssb_comp, ssb);
739 else
741 free (ssb);
742 ssb = (search_state_bundle *) gl_list_node_value (ssb_queue, n);
744 gl_list_add_last (ssb->states, ss);
748 * The following functions perform various actions on parse states
749 * and assign complexities to the newly generated search states.
751 static void
752 production_step (search_state *ss, int parser_state)
754 const state_item *other_si = parse_state_tail (ss->states[1 - parser_state]);
755 symbol_number other_sym = item_number_as_symbol_number (*other_si->item);
756 parse_state_list prods =
757 simulate_production (ss->states[parser_state], other_sym);
758 int complexity = ss->complexity + PRODUCTION_COST;
760 parse_state *ps = NULL;
761 for (gl_list_iterator_t it = gl_list_iterator (prods);
762 parse_state_list_next (&it, &ps);
765 search_state *copy = copy_search_state (ss);
766 ss_set_parse_state (copy, parser_state, ps);
767 copy->complexity = complexity;
768 ssb_append (copy);
770 gl_list_free (prods);
773 static inline int
774 reduction_cost (const parse_state *ps)
776 int shifts;
777 int productions;
778 parse_state_completed_steps (ps, &shifts, &productions);
779 return SHIFT_COST * shifts + PRODUCTION_COST * productions;
782 static search_state_list
783 reduction_step (search_state *ss, const item_number *conflict_item,
784 int parser_state, int rule_len)
786 (void) conflict_item; // FIXME: Unused
787 search_state_list result =
788 gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, 1);
790 parse_state *ps = ss->states[parser_state];
791 const state_item *si = parse_state_tail (ps);
792 bitset symbol_set = si->lookahead;
793 parse_state *other = ss->states[1 - parser_state];
794 const state_item *other_si = parse_state_tail (other);
795 // if the other state can transition on a symbol,
796 // the reduction needs to have that symbol in its lookahead
797 if (item_number_is_symbol_number (*other_si->item))
799 symbol_number other_sym =
800 item_number_as_symbol_number (*other_si->item);
801 if (!intersect_symbol (other_sym, symbol_set))
802 return result;
803 symbol_set = bitset_create (nsyms, BITSET_FIXED);
804 bitset_set (symbol_set, other_sym);
807 // FIXME: search_state *new_root = copy_search_state (ss);
808 parse_state_list reduced =
809 simulate_reduction (ps, rule_len, symbol_set);
810 parse_state *reduced_ps = NULL;
811 for (gl_list_iterator_t it = gl_list_iterator (reduced);
812 parse_state_list_next (&it, &reduced_ps);
815 search_state *copy = copy_search_state (ss);
816 ss_set_parse_state (copy, parser_state, reduced_ps);
817 int r_cost = reduction_cost (reduced_ps);
818 copy->complexity += r_cost + PRODUCTION_COST + 2 * SHIFT_COST;
819 gl_list_add_last (result, copy);
821 gl_list_free (reduced);
822 if (symbol_set != si->lookahead)
823 bitset_free (symbol_set);
824 return result;
828 * Attempt to prepend the given symbol to this search state, respecting
829 * the given subsequent next symbol on each path. If a reverse transition
830 * cannot be made on both states, possible reverse productions are prepended
832 static void
833 search_state_prepend (search_state *ss, symbol_number sym, bitset guide)
835 (void) sym; // FIXME: Unused.
836 const state_item *si1src = parse_state_head (ss->states[0]);
837 const state_item *si2src = parse_state_head (ss->states[1]);
839 bool prod1 = SI_PRODUCTION (si1src);
840 // If one can make a reverse transition and the other can't, only apply
841 // the reverse productions that the other state can make in an attempt to
842 // make progress.
843 if (prod1 != SI_PRODUCTION (si2src))
845 int prod_state = prod1 ? 0 : 1;
846 parse_state_list prev = parser_prepend (ss->states[prod_state]);
847 parse_state *ps = NULL;
848 for (gl_list_iterator_t iter = gl_list_iterator (prev);
849 parse_state_list_next (&iter, &ps);
852 const state_item *psi = parse_state_head (ps);
853 bool guided = bitset_test (guide, psi->state->number);
854 if (!guided && !EXTENDED_SEARCH)
855 continue;
857 search_state *copy = copy_search_state (ss);
858 ss_set_parse_state (copy, prod_state, ps);
859 copy->complexity += PRODUCTION_COST;
860 if (!guided)
861 copy->complexity += EXTENDED_COST;
862 ssb_append (copy);
864 gl_list_free (prev);
865 return;
867 // The parse state heads are either both production items or both
868 // transition items. So all prepend options will either be
869 // reverse transitions or reverse productions
870 int complexity_cost = prod1 ? PRODUCTION_COST : UNSHIFT_COST;
871 complexity_cost *= 2;
873 parse_state_list prev1 = parser_prepend (ss->states[0]);
874 parse_state_list prev2 = parser_prepend (ss->states[1]);
876 // loop through each pair of possible prepend states and append search
877 // states for each pair where the parser states correspond to the same
878 // parsed input.
879 parse_state *ps1 = NULL;
880 for (gl_list_iterator_t iter1 = gl_list_iterator (prev1);
881 parse_state_list_next (&iter1, &ps1);
884 const state_item *psi1 = parse_state_head (ps1);
885 bool guided1 = bitset_test (guide, psi1->state->number);
886 if (!guided1 && !EXTENDED_SEARCH)
887 continue;
889 parse_state *ps2 = NULL;
890 for (gl_list_iterator_t iter2 = gl_list_iterator (prev2);
891 parse_state_list_next (&iter2, &ps2);
894 const state_item *psi2 = parse_state_head (ps2);
896 bool guided2 = bitset_test (guide, psi2->state->number);
897 if (!guided2 && !EXTENDED_SEARCH)
898 continue;
899 // Only consider prepend state items that share the same state.
900 if (psi1->state != psi2->state)
901 continue;
903 int complexity = ss->complexity;
904 if (prod1)
905 complexity += PRODUCTION_COST * 2;
906 else
907 complexity += UNSHIFT_COST * 2;
908 // penalty for not being along the guide path
909 if (!guided1 || !guided2)
910 complexity += EXTENDED_COST;
911 ssb_append (new_search_state (ps1, ps2, complexity));
914 gl_list_free (prev1);
915 gl_list_free (prev2);
919 * Determine if the productions associated with the given parser items have
920 * the same prefix up to the dot.
922 static bool
923 have_common_prefix (const item_number *itm1, const item_number *itm2)
925 int i = 0;
926 for (; !item_number_is_rule_number (itm1[i]); ++i)
927 if (itm1[i] != itm2[i])
928 return false;
929 return item_number_is_rule_number (itm2[i]);
933 * The start and end locations of an item in ritem.
935 static const item_number *
936 item_rule_start (const item_number *item)
938 const item_number *res = NULL;
939 for (res = item;
940 ritem < res && item_number_is_symbol_number (*(res - 1));
941 --res)
942 continue;
943 return res;
946 static const item_number *
947 item_rule_end (const item_number *item)
949 const item_number *res = NULL;
950 for (res = item; item_number_is_symbol_number (*res); ++res)
951 continue;
952 return res;
956 * Perform the appropriate possible parser actions
957 * on a search state and add the results to the
958 * search state priority queue.
960 static inline void
961 generate_next_states (search_state *ss, state_item *conflict1,
962 state_item *conflict2)
964 // Compute the successor configurations.
965 parse_state *ps1 = ss->states[0];
966 parse_state *ps2 = ss->states[1];
967 const state_item *si1 = parse_state_tail (ps1);
968 const state_item *si2 = parse_state_tail (ps2);
969 bool si1reduce = item_number_is_rule_number (*si1->item);
970 bool si2reduce = item_number_is_rule_number (*si2->item);
971 if (!si1reduce && !si2reduce)
973 // Transition if both paths end at the same symbol
974 if (*si1->item == *si2->item)
976 int complexity = ss->complexity + 2 * SHIFT_COST;
977 parse_state_list trans1 = simulate_transition (ps1);
978 parse_state_list trans2 = simulate_transition (ps2);
979 parse_state *tps1 = NULL;
980 parse_state *tps2 = NULL;
981 for (gl_list_iterator_t it1 = gl_list_iterator (trans1);
982 parse_state_list_next (&it1, &tps1);
984 for (gl_list_iterator_t it2 = gl_list_iterator (trans2);
985 parse_state_list_next (&it2, &tps2);
987 ssb_append (new_search_state (tps1, tps2, complexity));
988 gl_list_free (trans1);
989 gl_list_free (trans2);
992 // Take production steps if possible.
993 production_step (ss, 0);
994 production_step (ss, 1);
996 // One of the states requires a reduction
997 else
999 const item_number *rhs1 = item_rule_start (si1->item);
1000 const item_number *rhe1 = item_rule_end (si1->item);
1001 int len1 = rhe1 - rhs1;
1002 int size1 = parse_state_length (ps1);
1003 bool ready1 = si1reduce && len1 < size1;
1005 const item_number *rhs2 = item_rule_start (si2->item);
1006 const item_number *rhe2 = item_rule_end (si2->item);
1007 int len2 = rhe2 - rhs2;
1008 int size2 = parse_state_length (ps2);
1009 bool ready2 = si2reduce && len2 < size2;
1010 // If there is a path ready for reduction without being
1011 // prepended further, reduce.
1012 if (ready1 && ready2)
1014 search_state_list reduced1 = reduction_step (ss, conflict1->item, 0, len1);
1015 gl_list_add_last (reduced1, ss);
1016 search_state *red1 = NULL;
1017 for (gl_list_iterator_t iter = gl_list_iterator (reduced1);
1018 search_state_list_next (&iter, &red1);
1021 search_state_list reduced2 =
1022 reduction_step (red1, conflict2->item, 1, len2);
1023 search_state *red2 = NULL;
1024 for (gl_list_iterator_t iter2 = gl_list_iterator (reduced2);
1025 search_state_list_next (&iter2, &red2);
1027 ssb_append (red2);
1028 // Avoid duplicates.
1029 if (red1 != ss)
1030 ssb_append (red1);
1031 gl_list_free (reduced2);
1033 gl_list_free (reduced1);
1035 else if (ready1)
1037 search_state_list reduced1 = reduction_step (ss, conflict1->item, 0, len1);
1038 search_state *red1 = NULL;
1039 for (gl_list_iterator_t iter = gl_list_iterator (reduced1);
1040 search_state_list_next (&iter, &red1);
1042 ssb_append (red1);
1043 gl_list_free (reduced1);
1045 else if (ready2)
1047 search_state_list reduced2 = reduction_step (ss, conflict2->item, 1, len2);
1048 search_state *red2 = NULL;
1049 for (gl_list_iterator_t iter2 = gl_list_iterator (reduced2);
1050 search_state_list_next (&iter2, &red2);
1052 ssb_append (red2);
1053 gl_list_free (reduced2);
1055 /* Both states end with a reduction, yet they don't have enough symbols
1056 * to reduce. This means symbols are missing from the beginning of the
1057 * rule, so we must prepend */
1058 else
1060 const symbol_number sym
1061 = si1reduce && !ready1
1062 ? *(rhe1 - size1)
1063 : *(rhe2 - size2);
1064 search_state_prepend (ss, sym,
1065 parse_state_depth (ss->states[0]) >= 0
1066 ? rpp_set : scp_set);
1072 * Perform the actual counterexample search,
1073 * keeps track of what stage of the search algorithm
1074 * we are at and gives the appropriate counterexample
1075 * type based off of time constraints.
1077 static counterexample *
1078 unifying_example (state_item_number itm1,
1079 state_item_number itm2,
1080 bool shift_reduce,
1081 state_item_list reduce_path, symbol_number next_sym)
1083 state_item *conflict1 = &state_items[itm1];
1084 state_item *conflict2 = &state_items[itm2];
1085 search_state *initial = initial_search_state (conflict1, conflict2);
1086 ssb_queue = gl_list_create_empty (GL_RBTREEHASH_LIST,
1087 (gl_listelement_equals_fn) ssb_equals,
1088 (gl_listelement_hashcode_fn) ssb_hasher,
1089 (gl_listelement_dispose_fn) ssb_free,
1090 false);
1091 visited =
1092 hash_initialize (32, NULL, (Hash_hasher) visited_hasher,
1093 (Hash_comparator) visited_comparator,
1094 (Hash_data_freer) search_state_free);
1095 ssb_append (initial);
1096 time_t start = time (NULL);
1097 bool assurance_printed = false;
1098 search_state *stage3result = NULL;
1099 counterexample *cex = NULL;
1100 while (gl_list_size (ssb_queue) > 0)
1102 const search_state_bundle *ssb = gl_list_get_at (ssb_queue, 0);
1104 search_state *ss = NULL;
1105 for (gl_list_iterator_t it = gl_list_iterator (ssb->states);
1106 search_state_list_next (&it, &ss);
1109 if (trace_flag & trace_cex)
1110 search_state_print (ss);
1111 // Stage 1/2 completing the rules containing the conflicts
1112 parse_state *ps1 = ss->states[0];
1113 parse_state *ps2 = ss->states[1];
1114 if (parse_state_depth (ps1) < 0 && parse_state_depth (ps2) < 0)
1116 // Stage 3: reduce and shift conflict items completed.
1117 const state_item *si1src = parse_state_head (ps1);
1118 const state_item *si2src = parse_state_head (ps2);
1119 if (item_rule (si1src->item)->lhs == item_rule (si2src->item)->lhs
1120 && have_common_prefix (si1src->item, si2src->item))
1122 // Stage 4: both paths share a prefix
1123 derivation *d1 = parse_state_derivation (ps1);
1124 derivation *d2 = parse_state_derivation (ps2);
1125 if (parse_state_derivation_completed (ps1)
1126 && parse_state_derivation_completed (ps2))
1128 // Once we have two derivations for the same symbol,
1129 // we've found a unifying counterexample.
1130 cex = new_counterexample (d1, d2, shift_reduce, true, false);
1131 derivation_retain (d1);
1132 derivation_retain (d2);
1133 goto cex_search_end;
1135 if (!stage3result)
1136 stage3result = copy_search_state (ss);
1139 if (TIME_LIMIT_ENFORCED)
1141 float time_passed = difftime (time (NULL), start);
1142 if (!assurance_printed && time_passed > ASSURANCE_LIMIT
1143 && stage3result)
1145 fputs ("Productions leading up to the conflict state found. "
1146 "Still finding a possible unifying counterexample...",
1147 stderr);
1148 assurance_printed = true;
1150 if (time_passed > TIME_LIMIT)
1152 fprintf (stderr, "time limit exceeded: %f\n", time_passed);
1153 goto cex_search_end;
1156 generate_next_states (ss, conflict1, conflict2);
1158 gl_sortedlist_remove (ssb_queue,
1159 (gl_listelement_compar_fn) ssb_comp, ssb);
1161 cex_search_end:;
1162 if (!cex)
1164 // No unifying counterexamples
1165 // If a search state from Stage 3 is available, use it
1166 // to construct a more compact nonunifying counterexample.
1167 if (stage3result)
1168 cex = complete_diverging_examples (stage3result, next_sym, shift_reduce);
1169 // Otherwise, construct a nonunifying counterexample that
1170 // begins from the start state using the shortest
1171 // lookahead-sensitive path to the reduce item.
1172 else
1173 cex = example_from_path (shift_reduce, itm2, reduce_path, next_sym);
1175 gl_list_free (ssb_queue);
1176 hash_free (visited);
1177 if (stage3result)
1178 search_state_free (stage3result);
1179 return cex;
1182 static time_t cumulative_time;
1184 void
1185 counterexample_init (void)
1187 time (&cumulative_time);
1188 scp_set = bitset_create (nstates, BITSET_FIXED);
1189 rpp_set = bitset_create (nstates, BITSET_FIXED);
1190 state_items_init ();
1194 void
1195 counterexample_free (void)
1197 if (scp_set)
1199 bitset_free (scp_set);
1200 bitset_free (rpp_set);
1201 state_items_free ();
1206 * Report a counterexample for conflict on symbol next_sym
1207 * between the given state-items
1209 static void
1210 counterexample_report (state_item_number itm1, state_item_number itm2,
1211 symbol_number next_sym, bool shift_reduce,
1212 FILE *out, const char *prefix)
1214 // Compute the shortest lookahead-sensitive path and associated sets of
1215 // parser states.
1216 state_item_list shortest_path = shortest_path_from_start (itm1, next_sym);
1217 bool reduce_prod_reached = false;
1218 const rule *reduce_rule = item_rule (state_items[itm1].item);
1220 bitset_zero (scp_set);
1221 bitset_zero (rpp_set);
1223 state_item *si = NULL;
1224 for (gl_list_iterator_t it = gl_list_iterator (shortest_path);
1225 state_item_list_next (&it, &si);
1228 bitset_set (scp_set, si->state->number);
1229 reduce_prod_reached = reduce_prod_reached
1230 || item_rule (si->item) == reduce_rule;
1231 if (reduce_prod_reached)
1232 bitset_set (rpp_set, si->state->number);
1234 time_t t = time (NULL);
1235 counterexample *cex
1236 = difftime (t, cumulative_time) < CUMULATIVE_TIME_LIMIT
1237 ? unifying_example (itm1, itm2, shift_reduce, shortest_path, next_sym)
1238 : example_from_path (shift_reduce, itm2, shortest_path, next_sym);
1240 gl_list_free (shortest_path);
1241 print_counterexample (cex, out, prefix);
1242 free_counterexample (cex);
1246 // ITM1 denotes a shift, ITM2 a reduce.
1247 static void
1248 counterexample_report_shift_reduce (state_item_number itm1, state_item_number itm2,
1249 symbol_number next_sym,
1250 FILE *out, const char *prefix)
1252 if (out == stderr)
1253 complain (NULL, Wcounterexamples,
1254 _("shift/reduce conflict on token %s"), symbols[next_sym]->tag);
1255 else
1257 fputs (prefix, out);
1258 fprintf (out, _("shift/reduce conflict on token %s"), symbols[next_sym]->tag);
1259 fprintf (out, "%s\n", _(":"));
1261 // In the report, print the items.
1262 if (out != stderr || trace_flag & trace_cex)
1264 print_state_item (&state_items[itm1], out, prefix);
1265 print_state_item (&state_items[itm2], out, prefix);
1267 counterexample_report (itm1, itm2, next_sym, true, out, prefix);
1270 static void
1271 counterexample_report_reduce_reduce (state_item_number itm1, state_item_number itm2,
1272 bitset conflict_syms,
1273 FILE *out, const char *prefix)
1276 struct obstack obstack;
1277 obstack_init (&obstack);
1278 bitset_iterator biter;
1279 state_item_number sym;
1280 const char *sep = "";
1281 BITSET_FOR_EACH (biter, conflict_syms, sym, 0)
1283 obstack_printf (&obstack, "%s%s", sep, symbols[sym]->tag);
1284 sep = ", ";
1286 char *tokens = obstack_finish0 (&obstack);
1287 if (out == stderr)
1288 complain (NULL, Wcounterexamples,
1289 ngettext ("reduce/reduce conflict on token %s",
1290 "reduce/reduce conflict on tokens %s",
1291 bitset_count (conflict_syms)),
1292 tokens);
1293 else
1295 fputs (prefix, out);
1296 fprintf (out,
1297 ngettext ("reduce/reduce conflict on token %s",
1298 "reduce/reduce conflict on tokens %s",
1299 bitset_count (conflict_syms)),
1300 tokens);
1301 fprintf (out, "%s\n", _(":"));
1303 obstack_free (&obstack, NULL);
1305 // In the report, print the items.
1306 if (out != stderr || trace_flag & trace_cex)
1308 print_state_item (&state_items[itm1], out, prefix);
1309 print_state_item (&state_items[itm2], out, prefix);
1311 counterexample_report (itm1, itm2, bitset_first (conflict_syms),
1312 false, out, prefix);
1315 static state_item_number
1316 find_state_item_number (const rule *r, state_number sn)
1318 for (state_item_number i = state_item_map[sn]; i < state_item_map[sn + 1]; ++i)
1319 if (!SI_DISABLED (i)
1320 && item_number_as_rule_number (*state_items[i].item) == r->number)
1321 return i;
1322 abort ();
1325 void
1326 counterexample_report_state (const state *s, FILE *out, const char *prefix)
1328 const state_number sn = s->number;
1329 const reductions *reds = s->reductions;
1330 bitset lookaheads = bitset_create (ntokens, BITSET_FIXED);
1331 for (int i = 0; i < reds->num; ++i)
1333 const rule *r1 = reds->rules[i];
1334 const state_item_number c1 = find_state_item_number (r1, sn);
1335 for (state_item_number c2 = state_item_map[sn]; c2 < state_item_map[sn + 1]; ++c2)
1336 if (!SI_DISABLED (c2))
1338 item_number conf = *state_items[c2].item;
1339 if (item_number_is_symbol_number (conf)
1340 && bitset_test (reds->lookaheads[i], conf))
1341 counterexample_report_shift_reduce (c1, c2, conf, out, prefix);
1343 for (int j = i+1; j < reds->num; ++j)
1345 const rule *r2 = reds->rules[j];
1346 // Conflicts: common lookaheads.
1347 bitset_intersection (lookaheads,
1348 reds->lookaheads[i],
1349 reds->lookaheads[j]);
1350 if (!bitset_empty_p (lookaheads))
1351 for (state_item_number c2 = state_item_map[sn]; c2 < state_item_map[sn + 1]; ++c2)
1352 if (!SI_DISABLED (c2)
1353 && item_rule (state_items[c2].item) == r2)
1355 counterexample_report_reduce_reduce (c1, c2, lookaheads, out, prefix);
1356 break;
1360 bitset_free (lookaheads);