glr2.cc: require C++11
[bison.git] / src / counterexample.c
blobf6f85fb1ec118ea1312440027b534cbb6851f93a
1 /* Conflict counterexample generation
3 Copyright (C) 2020-2021 Free Software Foundation, Inc.
5 This file is part of Bison, the GNU Compiler Compiler.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
20 #include <config.h>
22 #include "counterexample.h"
24 #include "system.h"
26 #include <errno.h>
27 #include <gl_linked_list.h>
28 #include <gl_rbtreehash_list.h>
29 #include <hash.h>
30 #include <mbswidth.h>
31 #include <stdlib.h>
32 #include <textstyle.h>
33 #include <time.h>
35 #include "closure.h"
36 #include "complain.h"
37 #include "derivation.h"
38 #include "getargs.h"
39 #include "gram.h"
40 #include "lalr.h"
41 #include "lssi.h"
42 #include "nullable.h"
43 #include "parse-simulation.h"
46 #define TIME_LIMIT_ENFORCED true
47 /** If set to false, only consider the states on the shortest
48 * lookahead-sensitive path when constructing a unifying counterexample. */
49 #define EXTENDED_SEARCH false
51 /* costs for making various steps in a search */
52 #define PRODUCTION_COST 50
53 #define REDUCE_COST 1
54 #define SHIFT_COST 1
55 #define UNSHIFT_COST 1
56 #define EXTENDED_COST 10000
58 /** The time limit before printing an assurance message to the user to
59 * indicate that the search is still running. */
60 #define ASSURANCE_LIMIT 2.0f
62 /* The time limit before giving up looking for unifying counterexample. */
63 static float time_limit = 5.0f;
65 #define CUMULATIVE_TIME_LIMIT 120.0f
67 // This is the fastest way to get the tail node from the gl_list API.
68 static gl_list_node_t
69 list_get_end (gl_list_t list)
71 gl_list_node_t sentinel = gl_list_add_last (list, NULL);
72 gl_list_node_t res = gl_list_previous_node (list, sentinel);
73 gl_list_remove_node (list, sentinel);
74 return res;
77 typedef struct
79 derivation *d1;
80 derivation *d2;
81 bool shift_reduce;
82 bool unifying;
83 bool timeout;
84 } counterexample;
86 static counterexample *
87 new_counterexample (derivation *d1, derivation *d2,
88 bool shift_reduce,
89 bool u, bool t)
91 counterexample *res = xmalloc (sizeof *res);
92 res->shift_reduce = shift_reduce;
93 if (shift_reduce)
95 // Display the shift first.
96 res->d1 = d2;
97 res->d2 = d1;
99 else
101 res->d1 = d1;
102 res->d2 = d2;
104 res->unifying = u;
105 res->timeout = t;
106 return res;
109 static void
110 free_counterexample (counterexample *cex)
112 derivation_free (cex->d1);
113 derivation_free (cex->d2);
114 free (cex);
117 static void
118 counterexample_print (const counterexample *cex, FILE *out, const char *prefix)
120 const bool flat = getenv ("YYFLAT");
121 const char *example1_label
122 = cex->unifying ? _("Example") : _("First example");
123 const char *example2_label
124 = cex->unifying ? _("Example") : _("Second example");
125 const char *deriv1_label
126 = cex->shift_reduce ? _("Shift derivation") : _("First reduce derivation");
127 const char *deriv2_label
128 = cex->shift_reduce ? _("Reduce derivation") : _("Second reduce derivation");
129 const int width =
130 max_int (max_int (mbswidth (example1_label, 0), mbswidth (example2_label, 0)),
131 max_int (mbswidth (deriv1_label, 0), mbswidth (deriv2_label, 0)));
132 if (flat)
133 fprintf (out, " %s%s%*s ", prefix,
134 example1_label, width - mbswidth (example1_label, 0), "");
135 else
136 fprintf (out, " %s%s: ", prefix, example1_label);
137 derivation_print_leaves (cex->d1, out);
138 if (flat)
139 fprintf (out, " %s%s%*s ", prefix,
140 deriv1_label, width - mbswidth (deriv1_label, 0), "");
141 else
142 fprintf (out, " %s%s", prefix, deriv1_label);
143 derivation_print (cex->d1, out, prefix);
145 // If we output to the terminal (via stderr) and we have color
146 // support, display unifying examples a second time, as color allows
147 // to see the differences.
148 if (!cex->unifying || is_styled (stderr))
150 if (flat)
151 fprintf (out, " %s%s%*s ", prefix,
152 example2_label, width - mbswidth (example2_label, 0), "");
153 else
154 fprintf (out, " %s%s: ", prefix, example2_label);
155 derivation_print_leaves (cex->d2, out);
157 if (flat)
158 fprintf (out, " %s%s%*s ", prefix,
159 deriv2_label, width - mbswidth (deriv2_label, 0), "");
160 else
161 fprintf (out, " %s%s", prefix, deriv2_label);
162 derivation_print (cex->d2, out, prefix);
164 if (out != stderr)
165 putc ('\n', out);
170 * NON UNIFYING COUNTER EXAMPLES
174 // Search node for BFS on state items
175 struct si_bfs_node;
176 typedef struct si_bfs_node
178 state_item_number si;
179 struct si_bfs_node *parent;
180 int reference_count;
181 } si_bfs_node;
183 static si_bfs_node *
184 si_bfs_new (state_item_number si, si_bfs_node *parent)
186 si_bfs_node *res = xcalloc (1, sizeof *res);
187 res->si = si;
188 res->parent = parent;
189 res->reference_count = 1;
190 if (parent)
191 ++parent->reference_count;
192 return res;
195 static bool
196 si_bfs_contains (const si_bfs_node *n, state_item_number sin)
198 for (const si_bfs_node *search = n; search != NULL; search = search->parent)
199 if (search->si == sin)
200 return true;
201 return false;
204 static void
205 si_bfs_free (si_bfs_node *n)
207 if (n == NULL)
208 return;
209 --n->reference_count;
210 if (n->reference_count == 0)
212 si_bfs_free (n->parent);
213 free (n);
217 typedef gl_list_t si_bfs_node_list;
220 * start is a state_item such that conflict_sym is an element of FIRSTS of the
221 * nonterminal after the dot in start. Because of this, we should be able to
222 * find a production item starting with conflict_sym by only searching productions
223 * of the nonterminal and shifting over nullable nonterminals
225 * this returns the derivation of the productions that lead to conflict_sym
227 static inline derivation_list
228 expand_to_conflict (state_item_number start, symbol_number conflict_sym)
230 si_bfs_node *init = si_bfs_new (start, NULL);
232 si_bfs_node_list queue
233 = gl_list_create (GL_LINKED_LIST, NULL, NULL,
234 (gl_listelement_dispose_fn) si_bfs_free,
235 true, 1, (const void **) &init);
236 si_bfs_node *node = NULL;
237 // breadth-first search for a path of productions to the conflict symbol
238 while (gl_list_size (queue) > 0)
240 node = (si_bfs_node *) gl_list_get_at (queue, 0);
241 state_item *silast = &state_items[node->si];
242 symbol_number sym = item_number_as_symbol_number (*silast->item);
243 if (sym == conflict_sym)
244 break;
245 if (ISVAR (sym))
247 // add each production to the search
248 bitset_iterator biter;
249 state_item_number sin;
250 bitset sib = silast->prods;
251 BITSET_FOR_EACH (biter, sib, sin, 0)
253 // ignore productions already in the path
254 if (si_bfs_contains (node, sin))
255 continue;
256 si_bfs_node *next = si_bfs_new (sin, node);
257 gl_list_add_last (queue, next);
259 // for nullable nonterminals, add its goto to the search
260 if (nullable[sym - ntokens])
262 si_bfs_node *next = si_bfs_new (silast->trans, node);
263 gl_list_add_last (queue, next);
266 gl_list_remove_at (queue, 0);
268 if (gl_list_size (queue) == 0)
270 gl_list_free (queue);
271 fputs ("Error expanding derivation\n", stderr);
272 abort ();
275 derivation *dinit = derivation_new_leaf (conflict_sym);
276 derivation_list result = derivation_list_new ();
277 derivation_list_append (result, dinit);
278 // iterate backwards through the generated path to create a derivation
279 // of the conflict symbol containing derivations of each production step.
281 for (si_bfs_node *n = node; n != NULL; n = n->parent)
283 state_item *si = &state_items[n->si];
284 item_number *pos = si->item;
285 if (SI_PRODUCTION (si))
287 item_number *i = NULL;
288 for (i = pos + 1; !item_number_is_rule_number (*i); ++i)
289 derivation_list_append (result, derivation_new_leaf (*i));
290 symbol_number lhs =
291 rules[item_number_as_rule_number (*i)].lhs->number;
292 derivation *deriv = derivation_new (lhs, result,
293 state_item_rule (si));
294 result = derivation_list_new ();
295 derivation_list_append (result, deriv);
297 else
299 symbol_number sym = item_number_as_symbol_number (*(pos - 1));
300 derivation *deriv = derivation_new_leaf (sym);
301 derivation_list_prepend (result, deriv);
304 gl_list_free (queue);
305 derivation_free ((derivation*)gl_list_get_at (result, 0));
306 gl_list_remove_at (result, 0);
307 return result;
311 * Complete derivations for any pending productions in the given
312 * sequence of state-items. For example, the input could be a path
313 * of states that would give us the following input:
314 * Stmt ::= [lval ::= [VAR] '=' e ::=[ e::=['0'] '+' •
315 * So to complete the derivation of Stmt, we need an output like:
316 * Stmt ::= [lval ::= [VAR] '=' e ::=[ e::=['0'] '+' • e ] ';' ]
318 static derivation *
319 complete_diverging_example (symbol_number conflict_sym,
320 state_item_list path, derivation_list derivs)
322 // The idea is to transfer each pending symbol on the productions
323 // associated with the given StateItems to the resulting derivation.
324 derivation_list result = derivation_list_new ();
325 bool lookahead_required = false;
326 if (!derivs)
328 derivs = derivation_list_new ();
329 gl_list_add_last (result, derivation_dot ());
330 lookahead_required = true;
333 gl_list_node_t deriv = list_get_end (derivs);
335 // We go backwards through the path to create the derivation tree bottom-up.
336 // Effectively this loops through each production once, and generates a
337 // derivation of the left hand side by appending all of the rhs symbols.
338 // this becomes the derivation of the nonterminal after the dot in the
339 // next production, and all of the other symbols of the rule are added as normal.
340 for (gl_list_node_t state_node = list_get_end (path);
341 state_node != NULL;
342 state_node = gl_list_previous_node (path, state_node))
344 state_item *si = (state_item *) gl_list_node_value (path, state_node);
345 item_number *item = si->item;
346 item_number pos = *item;
347 // symbols after dot
348 if (gl_list_size (result) == 1 && !item_number_is_rule_number (pos)
349 && gl_list_get_at (result, 0) == derivation_dot ())
351 derivation_list_append (result,
352 derivation_new_leaf (item_number_as_symbol_number (pos)));
353 lookahead_required = false;
355 item_number *i = item;
356 // go through each symbol after the dot in the current rule, and
357 // add each symbol to its derivation.
358 for (state_item_number nsi = si - state_items;
359 !item_number_is_rule_number (*i);
360 ++i, nsi = state_items[nsi].trans)
362 // if the item is a reduction, we could skip to the wrong rule
363 // by starting at i + 1, so this continue is necessary
364 if (i == item)
365 continue;
366 symbol_number sym = item_number_as_symbol_number (*i);
367 if (!lookahead_required || sym == conflict_sym)
369 derivation_list_append (result, derivation_new_leaf (sym));
370 lookahead_required = false;
371 continue;
373 // Since PATH is a path to the conflict state-item,
374 // for a reduce conflict item, we will want to have a derivation
375 // that shows the conflict symbol from its lookahead set being used.
377 // Since reductions have the dot at the end of the item,
378 // this loop will be first executed on the last item in the path
379 // that's not a reduction. When that happens,
380 // the symbol after the dot should be a nonterminal,
381 // and we can look through successive nullable nonterminals
382 // for one with the conflict symbol in its first set.
383 if (bitset_test (FIRSTS (sym), conflict_sym))
385 lookahead_required = false;
386 derivation_list next_derivs =
387 expand_to_conflict (nsi, conflict_sym);
388 derivation *d = NULL;
389 for (gl_list_iterator_t it = gl_list_iterator (next_derivs);
390 derivation_list_next (&it, &d);)
391 derivation_list_append (result, d);
392 i += gl_list_size (next_derivs) - 1;
393 derivation_list_free (next_derivs);
395 else if (nullable[sym - ntokens])
397 derivation *d = derivation_new_leaf (sym);
398 derivation_list_append (result, d);
400 else
402 // We found a path to the conflict item, and despite it
403 // having the conflict symbol in its lookahead, no example
404 // containing the symbol after the conflict item
405 // can be found.
406 derivation_list_append (result, derivation_new_leaf (1));
407 lookahead_required = false;
410 const rule *r = &rules[item_number_as_rule_number (*i)];
411 // add derivations for symbols before dot
412 for (i = item - 1; !item_number_is_rule_number (*i) && i >= ritem; i--)
414 gl_list_node_t p = gl_list_previous_node (path, state_node);
415 if (p)
416 state_node = p;
417 if (deriv)
419 const void *tmp_deriv = gl_list_node_value (derivs, deriv);
420 deriv = gl_list_previous_node (derivs, deriv);
421 derivation_list_prepend (result, (derivation*)tmp_deriv);
423 else
424 derivation_list_prepend (result, derivation_new_leaf (*i));
426 // completing the derivation
427 derivation *new_deriv = derivation_new (r->lhs->number, result, r);
428 result = derivation_list_new ();
429 derivation_list_append (result, new_deriv);
431 derivation *res = (derivation *) gl_list_get_at (result, 0);
432 derivation_retain (res);
433 derivation_list_free (result);
434 derivation_list_free (derivs);
435 return res;
438 /* Iterate backwards through the shifts of the path in the reduce
439 conflict, and find a path of shifts from the shift conflict that
440 goes through the same states. */
441 static state_item_list
442 nonunifying_shift_path (state_item_list reduce_path, state_item *shift_conflict)
444 gl_list_node_t tmp = gl_list_add_last (reduce_path, NULL);
445 gl_list_node_t next_node = gl_list_previous_node (reduce_path, tmp);
446 gl_list_node_t node = gl_list_previous_node (reduce_path, next_node);
447 gl_list_remove_node (reduce_path, tmp);
448 state_item *si = shift_conflict;
449 state_item_list result =
450 gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
451 // FIXME: bool paths_merged;
452 for (; node != NULL; next_node = node,
453 node = gl_list_previous_node (reduce_path, node))
455 state_item *refsi =
456 (state_item *) gl_list_node_value (reduce_path, node);
457 state_item *nextrefsi =
458 (state_item *) gl_list_node_value (reduce_path, next_node);
459 if (nextrefsi == si)
461 gl_list_add_first (result, refsi);
462 si = refsi;
463 continue;
465 // skip reduction items
466 if (nextrefsi->item != refsi->item + 1 && refsi->item != ritem)
467 continue;
469 // bfs to find a shift to the right state
470 si_bfs_node *init = si_bfs_new (si - state_items, NULL);
471 si_bfs_node_list queue
472 = gl_list_create (GL_LINKED_LIST, NULL, NULL,
473 (gl_listelement_dispose_fn) si_bfs_free,
474 true, 1, (const void **) &init);
475 si_bfs_node *sis = NULL;
476 state_item *prevsi = NULL;
477 while (gl_list_size (queue) > 0)
479 sis = (si_bfs_node *) gl_list_get_at (queue, 0);
480 // if we end up in the start state, the shift couldn't be found.
481 if (sis->si == 0)
482 break;
484 state_item *search_si = &state_items[sis->si];
485 // if the current state-item is a production item,
486 // its reverse production items get added to the queue.
487 // Otherwise, look for a reverse transition to the target state.
488 bitset rsi = search_si->revs;
489 bitset_iterator biter;
490 state_item_number sin;
491 BITSET_FOR_EACH (biter, rsi, sin, 0)
493 prevsi = &state_items[sin];
494 if (SI_TRANSITION (search_si))
496 if (prevsi->state == refsi->state)
497 goto search_end;
499 else if (!si_bfs_contains (sis, sin))
501 si_bfs_node *prevsis = si_bfs_new (sin, sis);
502 gl_list_add_last (queue, prevsis);
505 gl_list_remove_at (queue, 0);
507 search_end:
508 // prepend path to shift we found
509 if (sis)
511 gl_list_node_t ln = gl_list_add_first (result, &state_items[sis->si]);
512 for (si_bfs_node *n = sis->parent; n; n = n->parent)
513 ln = gl_list_add_after (result, ln, &state_items[n->si]);
516 si = prevsi;
517 gl_list_free (queue);
519 if (trace_flag & trace_cex)
521 fputs ("SHIFT ITEM PATH:\n", stderr);
522 state_item *sip = NULL;
523 for (gl_list_iterator_t it = gl_list_iterator (result);
524 state_item_list_next (&it, &sip);
526 state_item_print (sip, stderr, "");
528 return result;
533 * Construct a nonunifying counterexample from the shortest
534 * lookahead-sensitive path.
536 static counterexample *
537 example_from_path (bool shift_reduce,
538 state_item_number itm2,
539 state_item_list shortest_path, symbol_number next_sym)
541 derivation *deriv1 =
542 complete_diverging_example (next_sym, shortest_path, NULL);
543 state_item_list path_2
544 = shift_reduce
545 ? nonunifying_shift_path (shortest_path, &state_items [itm2])
546 : shortest_path_from_start (itm2, next_sym);
547 derivation *deriv2 = complete_diverging_example (next_sym, path_2, NULL);
548 gl_list_free (path_2);
549 return new_counterexample (deriv1, deriv2, shift_reduce, false, true);
554 * UNIFYING COUNTER EXAMPLES
558 /* A search state keeps track of two parser simulations,
559 * one starting at each conflict. Complexity is a metric
560 * which sums different parser actions with varying weights.
562 typedef struct
564 parse_state *states[2];
565 int complexity;
566 } search_state;
568 static search_state *
569 initial_search_state (state_item *conflict1, state_item *conflict2)
571 search_state *res = xmalloc (sizeof *res);
572 res->states[0] = new_parse_state (conflict1);
573 res->states[1] = new_parse_state (conflict2);
574 parse_state_retain (res->states[0]);
575 parse_state_retain (res->states[1]);
576 res->complexity = 0;
577 return res;
580 static search_state *
581 new_search_state (parse_state *ps1, parse_state *ps2, int complexity)
583 search_state *res = xmalloc (sizeof *res);
584 res->states[0] = ps1;
585 res->states[1] = ps2;
586 parse_state_retain (res->states[0]);
587 parse_state_retain (res->states[1]);
588 res->complexity = complexity;
589 return res;
592 static search_state *
593 copy_search_state (search_state *parent)
595 search_state *res = xmalloc (sizeof *res);
596 *res = *parent;
597 parse_state_retain (res->states[0]);
598 parse_state_retain (res->states[1]);
599 return res;
602 static void
603 search_state_free_children (search_state *ss)
605 free_parse_state (ss->states[0]);
606 free_parse_state (ss->states[1]);
609 static void
610 search_state_free (search_state *ss)
612 if (ss == NULL)
613 return;
614 search_state_free_children (ss);
615 free (ss);
618 /* For debugging traces. */
619 static void
620 search_state_print (search_state *ss)
622 fputs ("CONFLICT 1 ", stderr);
623 print_parse_state (ss->states[0]);
624 fputs ("CONFLICT 2 ", stderr);
625 print_parse_state (ss->states[1]);
626 putc ('\n', stderr);
629 typedef gl_list_t search_state_list;
631 static inline bool
632 search_state_list_next (gl_list_iterator_t *it, search_state **ss)
634 const void *p = NULL;
635 bool res = gl_list_iterator_next (it, &p, NULL);
636 if (res)
637 *ss = (search_state*) p;
638 else
639 gl_list_iterator_free (it);
640 return res;
644 * When a search state is copied, this is used to
645 * directly set one of the parse states
647 static inline void
648 ss_set_parse_state (search_state *ss, int idx, parse_state *ps)
650 free_parse_state (ss->states[idx]);
651 ss->states[idx] = ps;
652 parse_state_retain (ps);
656 * Construct a nonunifying example from a search state
657 * which has its parse states unified at the beginning
658 * but not the end of the example.
660 static counterexample *
661 complete_diverging_examples (search_state *ss,
662 symbol_number next_sym,
663 bool shift_reduce)
665 derivation *new_derivs[2];
666 for (int i = 0; i < 2; ++i)
668 state_item_list sitems;
669 derivation_list derivs;
670 parse_state_lists (ss->states[i], &sitems, &derivs);
671 new_derivs[i] = complete_diverging_example (next_sym, sitems, derivs);
672 gl_list_free (sitems);
674 return new_counterexample (new_derivs[0], new_derivs[1],
675 shift_reduce, false, true);
679 * Search states are stored in bundles with those that
680 * share the same complexity. This is so the priority
681 * queue takes less overhead.
683 typedef struct
685 search_state_list states;
686 int complexity;
687 } search_state_bundle;
689 static void
690 ssb_free (search_state_bundle *ssb)
692 gl_list_free (ssb->states);
693 free (ssb);
696 static size_t
697 ssb_hasher (search_state_bundle *ssb)
699 return ssb->complexity;
702 static int
703 ssb_comp (const search_state_bundle *s1, const search_state_bundle *s2)
705 return s1->complexity - s2->complexity;
708 static bool
709 ssb_equals (const search_state_bundle *s1, const search_state_bundle *s2)
711 return s1->complexity == s2->complexity;
714 typedef gl_list_t ssb_list;
716 static size_t
717 visited_hasher (const search_state *ss, size_t max)
719 return (parse_state_hasher (ss->states[0], max)
720 + parse_state_hasher (ss->states[1], max)) % max;
723 static bool
724 visited_comparator (const search_state *ss1, const search_state *ss2)
726 return parse_state_comparator (ss1->states[0], ss2->states[0])
727 && parse_state_comparator (ss1->states[1], ss2->states[1]);
730 /* Priority queue for search states with minimal complexity. */
731 static ssb_list ssb_queue;
732 static Hash_table *visited;
733 /* The set of parser states on the shortest lookahead-sensitive path. */
734 static bitset scp_set = NULL;
735 /* The set of parser states used for the conflict reduction rule. */
736 static bitset rpp_set = NULL;
738 static void
739 ssb_append (search_state *ss)
741 if (hash_lookup (visited, ss))
743 search_state_free (ss);
744 return;
746 hash_xinsert (visited, ss);
747 // if states are only referenced by the visited set,
748 // their contents should be freed as we only need
749 // the metadata necessary to compute a hash.
750 parse_state_free_contents_early (ss->states[0]);
751 parse_state_free_contents_early (ss->states[1]);
752 parse_state_retain (ss->states[0]);
753 parse_state_retain (ss->states[1]);
754 search_state_bundle *ssb = xmalloc (sizeof *ssb);
755 ssb->complexity = ss->complexity;
756 gl_list_node_t n = gl_list_search (ssb_queue, ssb);
757 if (!n)
759 ssb->states =
760 gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
761 (gl_listelement_dispose_fn)search_state_free_children,
762 true);
763 gl_sortedlist_add (ssb_queue, (gl_listelement_compar_fn) ssb_comp, ssb);
765 else
767 free (ssb);
768 ssb = (search_state_bundle *) gl_list_node_value (ssb_queue, n);
770 gl_list_add_last (ssb->states, ss);
774 * The following functions perform various actions on parse states
775 * and assign complexities to the newly generated search states.
777 static void
778 production_step (search_state *ss, int parser_state)
780 const state_item *other_si = parse_state_tail (ss->states[1 - parser_state]);
781 symbol_number other_sym = item_number_as_symbol_number (*other_si->item);
782 parse_state_list prods =
783 simulate_production (ss->states[parser_state], other_sym);
784 int complexity = ss->complexity + PRODUCTION_COST;
786 parse_state *ps = NULL;
787 for (gl_list_iterator_t it = gl_list_iterator (prods);
788 parse_state_list_next (&it, &ps);
791 search_state *copy = copy_search_state (ss);
792 ss_set_parse_state (copy, parser_state, ps);
793 copy->complexity = complexity;
794 ssb_append (copy);
796 gl_list_free (prods);
799 static inline int
800 reduction_cost (const parse_state *ps)
802 int shifts;
803 int productions;
804 parse_state_completed_steps (ps, &shifts, &productions);
805 return SHIFT_COST * shifts + PRODUCTION_COST * productions;
808 static search_state_list
809 reduction_step (search_state *ss, const item_number *conflict_item,
810 int parser_state, int rule_len)
812 (void) conflict_item; // FIXME: Unused
813 search_state_list result =
814 gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, 1);
816 parse_state *ps = ss->states[parser_state];
817 const state_item *si = parse_state_tail (ps);
818 bitset symbol_set = si->lookahead;
819 parse_state *other = ss->states[1 - parser_state];
820 const state_item *other_si = parse_state_tail (other);
821 // if the other state can transition on a symbol,
822 // the reduction needs to have that symbol in its lookahead
823 if (item_number_is_symbol_number (*other_si->item))
825 symbol_number other_sym =
826 item_number_as_symbol_number (*other_si->item);
827 if (!intersect_symbol (other_sym, symbol_set))
828 return result;
829 symbol_set = bitset_create (nsyms, BITSET_FIXED);
830 bitset_set (symbol_set, other_sym);
833 // FIXME: search_state *new_root = copy_search_state (ss);
834 parse_state_list reduced =
835 simulate_reduction (ps, rule_len, symbol_set);
836 parse_state *reduced_ps = NULL;
837 for (gl_list_iterator_t it = gl_list_iterator (reduced);
838 parse_state_list_next (&it, &reduced_ps);
841 search_state *copy = copy_search_state (ss);
842 ss_set_parse_state (copy, parser_state, reduced_ps);
843 int r_cost = reduction_cost (reduced_ps);
844 copy->complexity += r_cost + PRODUCTION_COST + 2 * SHIFT_COST;
845 gl_list_add_last (result, copy);
847 gl_list_free (reduced);
848 if (symbol_set != si->lookahead)
849 bitset_free (symbol_set);
850 return result;
854 * Attempt to prepend the given symbol to this search state, respecting
855 * the given subsequent next symbol on each path. If a reverse transition
856 * cannot be made on both states, possible reverse productions are prepended
858 static void
859 search_state_prepend (search_state *ss, symbol_number sym, bitset guide)
861 (void) sym; // FIXME: Unused.
862 const state_item *si1src = parse_state_head (ss->states[0]);
863 const state_item *si2src = parse_state_head (ss->states[1]);
865 bool prod1 = SI_PRODUCTION (si1src);
866 // If one can make a reverse transition and the other can't, only apply
867 // the reverse productions that the other state can make in an attempt to
868 // make progress.
869 if (prod1 != SI_PRODUCTION (si2src))
871 int prod_state = prod1 ? 0 : 1;
872 parse_state_list prev = parser_prepend (ss->states[prod_state]);
873 parse_state *ps = NULL;
874 for (gl_list_iterator_t iter = gl_list_iterator (prev);
875 parse_state_list_next (&iter, &ps);
878 const state_item *psi = parse_state_head (ps);
879 bool guided = bitset_test (guide, psi->state->number);
880 if (!guided && !EXTENDED_SEARCH)
881 continue;
883 search_state *copy = copy_search_state (ss);
884 ss_set_parse_state (copy, prod_state, ps);
885 copy->complexity += PRODUCTION_COST;
886 if (!guided)
887 copy->complexity += EXTENDED_COST;
888 ssb_append (copy);
890 gl_list_free (prev);
891 return;
893 // The parse state heads are either both production items or both
894 // transition items. So all prepend options will either be
895 // reverse transitions or reverse productions
896 int complexity_cost = prod1 ? PRODUCTION_COST : UNSHIFT_COST;
897 complexity_cost *= 2;
899 parse_state_list prev1 = parser_prepend (ss->states[0]);
900 parse_state_list prev2 = parser_prepend (ss->states[1]);
902 // loop through each pair of possible prepend states and append search
903 // states for each pair where the parser states correspond to the same
904 // parsed input.
905 parse_state *ps1 = NULL;
906 for (gl_list_iterator_t iter1 = gl_list_iterator (prev1);
907 parse_state_list_next (&iter1, &ps1);
910 const state_item *psi1 = parse_state_head (ps1);
911 bool guided1 = bitset_test (guide, psi1->state->number);
912 if (!guided1 && !EXTENDED_SEARCH)
913 continue;
915 parse_state *ps2 = NULL;
916 for (gl_list_iterator_t iter2 = gl_list_iterator (prev2);
917 parse_state_list_next (&iter2, &ps2);
920 const state_item *psi2 = parse_state_head (ps2);
922 bool guided2 = bitset_test (guide, psi2->state->number);
923 if (!guided2 && !EXTENDED_SEARCH)
924 continue;
925 // Only consider prepend state items that share the same state.
926 if (psi1->state != psi2->state)
927 continue;
929 int complexity = ss->complexity;
930 if (prod1)
931 complexity += PRODUCTION_COST * 2;
932 else
933 complexity += UNSHIFT_COST * 2;
934 // penalty for not being along the guide path
935 if (!guided1 || !guided2)
936 complexity += EXTENDED_COST;
937 ssb_append (new_search_state (ps1, ps2, complexity));
940 gl_list_free (prev1);
941 gl_list_free (prev2);
945 * Determine if the productions associated with the given parser items have
946 * the same prefix up to the dot.
948 static bool
949 have_common_prefix (const item_number *itm1, const item_number *itm2)
951 int i = 0;
952 for (; !item_number_is_rule_number (itm1[i]); ++i)
953 if (itm1[i] != itm2[i])
954 return false;
955 return item_number_is_rule_number (itm2[i]);
959 * The start and end locations of an item in ritem.
961 static const item_number *
962 item_rule_start (const item_number *item)
964 const item_number *res = NULL;
965 for (res = item;
966 ritem < res && item_number_is_symbol_number (*(res - 1));
967 --res)
968 continue;
969 return res;
972 static const item_number *
973 item_rule_end (const item_number *item)
975 const item_number *res = NULL;
976 for (res = item; item_number_is_symbol_number (*res); ++res)
977 continue;
978 return res;
982 * Perform the appropriate possible parser actions
983 * on a search state and add the results to the
984 * search state priority queue.
986 static inline void
987 generate_next_states (search_state *ss, state_item *conflict1,
988 state_item *conflict2)
990 // Compute the successor configurations.
991 parse_state *ps1 = ss->states[0];
992 parse_state *ps2 = ss->states[1];
993 const state_item *si1 = parse_state_tail (ps1);
994 const state_item *si2 = parse_state_tail (ps2);
995 bool si1reduce = item_number_is_rule_number (*si1->item);
996 bool si2reduce = item_number_is_rule_number (*si2->item);
997 if (!si1reduce && !si2reduce)
999 // Transition if both paths end at the same symbol
1000 if (*si1->item == *si2->item)
1002 int complexity = ss->complexity + 2 * SHIFT_COST;
1003 parse_state_list trans1 = simulate_transition (ps1);
1004 parse_state_list trans2 = simulate_transition (ps2);
1005 parse_state *tps1 = NULL;
1006 parse_state *tps2 = NULL;
1007 for (gl_list_iterator_t it1 = gl_list_iterator (trans1);
1008 parse_state_list_next (&it1, &tps1);
1010 for (gl_list_iterator_t it2 = gl_list_iterator (trans2);
1011 parse_state_list_next (&it2, &tps2);
1013 ssb_append (new_search_state (tps1, tps2, complexity));
1014 gl_list_free (trans1);
1015 gl_list_free (trans2);
1018 // Take production steps if possible.
1019 production_step (ss, 0);
1020 production_step (ss, 1);
1022 // One of the states requires a reduction
1023 else
1025 const item_number *rhs1 = item_rule_start (si1->item);
1026 const item_number *rhe1 = item_rule_end (si1->item);
1027 int len1 = rhe1 - rhs1;
1028 int size1 = parse_state_length (ps1);
1029 bool ready1 = si1reduce && len1 < size1;
1031 const item_number *rhs2 = item_rule_start (si2->item);
1032 const item_number *rhe2 = item_rule_end (si2->item);
1033 int len2 = rhe2 - rhs2;
1034 int size2 = parse_state_length (ps2);
1035 bool ready2 = si2reduce && len2 < size2;
1036 // If there is a path ready for reduction without being
1037 // prepended further, reduce.
1038 if (ready1 && ready2)
1040 search_state_list reduced1 = reduction_step (ss, conflict1->item, 0, len1);
1041 gl_list_add_last (reduced1, ss);
1042 search_state *red1 = NULL;
1043 for (gl_list_iterator_t iter = gl_list_iterator (reduced1);
1044 search_state_list_next (&iter, &red1);
1047 search_state_list reduced2 =
1048 reduction_step (red1, conflict2->item, 1, len2);
1049 search_state *red2 = NULL;
1050 for (gl_list_iterator_t iter2 = gl_list_iterator (reduced2);
1051 search_state_list_next (&iter2, &red2);
1053 ssb_append (red2);
1054 // Avoid duplicates.
1055 if (red1 != ss)
1056 ssb_append (red1);
1057 gl_list_free (reduced2);
1059 gl_list_free (reduced1);
1061 else if (ready1)
1063 search_state_list reduced1 = reduction_step (ss, conflict1->item, 0, len1);
1064 search_state *red1 = NULL;
1065 for (gl_list_iterator_t iter = gl_list_iterator (reduced1);
1066 search_state_list_next (&iter, &red1);
1068 ssb_append (red1);
1069 gl_list_free (reduced1);
1071 else if (ready2)
1073 search_state_list reduced2 = reduction_step (ss, conflict2->item, 1, len2);
1074 search_state *red2 = NULL;
1075 for (gl_list_iterator_t iter2 = gl_list_iterator (reduced2);
1076 search_state_list_next (&iter2, &red2);
1078 ssb_append (red2);
1079 gl_list_free (reduced2);
1081 /* Both states end with a reduction, yet they don't have enough symbols
1082 * to reduce. This means symbols are missing from the beginning of the
1083 * rule, so we must prepend */
1084 else
1086 const symbol_number sym
1087 = si1reduce && !ready1
1088 ? *(rhe1 - size1)
1089 : *(rhe2 - size2);
1090 search_state_prepend (ss, sym,
1091 parse_state_depth (ss->states[0]) >= 0
1092 ? rpp_set : scp_set);
1098 * Perform the actual counterexample search,
1099 * keeps track of what stage of the search algorithm
1100 * we are at and gives the appropriate counterexample
1101 * type based off of time constraints.
1103 static counterexample *
1104 unifying_example (state_item_number itm1,
1105 state_item_number itm2,
1106 bool shift_reduce,
1107 state_item_list reduce_path, symbol_number next_sym)
1109 state_item *conflict1 = &state_items[itm1];
1110 state_item *conflict2 = &state_items[itm2];
1111 search_state *initial = initial_search_state (conflict1, conflict2);
1112 ssb_queue = gl_list_create_empty (GL_RBTREEHASH_LIST,
1113 (gl_listelement_equals_fn) ssb_equals,
1114 (gl_listelement_hashcode_fn) ssb_hasher,
1115 (gl_listelement_dispose_fn) ssb_free,
1116 false);
1117 visited =
1118 hash_initialize (32, NULL, (Hash_hasher) visited_hasher,
1119 (Hash_comparator) visited_comparator,
1120 (Hash_data_freer) search_state_free);
1121 ssb_append (initial);
1122 time_t start = time (NULL);
1123 bool assurance_printed = false;
1124 search_state *stage3result = NULL;
1125 counterexample *cex = NULL;
1126 while (gl_list_size (ssb_queue) > 0)
1128 const search_state_bundle *ssb = gl_list_get_at (ssb_queue, 0);
1130 search_state *ss = NULL;
1131 for (gl_list_iterator_t it = gl_list_iterator (ssb->states);
1132 search_state_list_next (&it, &ss);
1135 if (trace_flag & trace_cex)
1136 search_state_print (ss);
1137 // Stage 1/2 completing the rules containing the conflicts
1138 parse_state *ps1 = ss->states[0];
1139 parse_state *ps2 = ss->states[1];
1140 if (parse_state_depth (ps1) < 0 && parse_state_depth (ps2) < 0)
1142 // Stage 3: reduce and shift conflict items completed.
1143 const state_item *si1src = parse_state_head (ps1);
1144 const state_item *si2src = parse_state_head (ps2);
1145 if (item_rule (si1src->item)->lhs == item_rule (si2src->item)->lhs
1146 && have_common_prefix (si1src->item, si2src->item))
1148 // Stage 4: both paths share a prefix
1149 derivation *d1 = parse_state_derivation (ps1);
1150 derivation *d2 = parse_state_derivation (ps2);
1151 if (parse_state_derivation_completed (ps1)
1152 && parse_state_derivation_completed (ps2))
1154 // Once we have two derivations for the same symbol,
1155 // we've found a unifying counterexample.
1156 cex = new_counterexample (d1, d2, shift_reduce, true, false);
1157 derivation_retain (d1);
1158 derivation_retain (d2);
1159 goto cex_search_end;
1161 if (!stage3result)
1162 stage3result = copy_search_state (ss);
1165 if (TIME_LIMIT_ENFORCED)
1167 float time_passed = difftime (time (NULL), start);
1168 if (!assurance_printed && time_passed > ASSURANCE_LIMIT
1169 && stage3result)
1171 fputs ("Productions leading up to the conflict state found. "
1172 "Still finding a possible unifying counterexample...",
1173 stderr);
1174 assurance_printed = true;
1176 if (time_passed > time_limit)
1178 fprintf (stderr, "time limit exceeded: %f\n", time_passed);
1179 goto cex_search_end;
1182 generate_next_states (ss, conflict1, conflict2);
1184 gl_sortedlist_remove (ssb_queue,
1185 (gl_listelement_compar_fn) ssb_comp, ssb);
1187 cex_search_end:;
1188 if (!cex)
1190 // No unifying counterexamples
1191 // If a search state from Stage 3 is available, use it
1192 // to construct a more compact nonunifying counterexample.
1193 if (stage3result)
1194 cex = complete_diverging_examples (stage3result, next_sym, shift_reduce);
1195 // Otherwise, construct a nonunifying counterexample that
1196 // begins from the start state using the shortest
1197 // lookahead-sensitive path to the reduce item.
1198 else
1199 cex = example_from_path (shift_reduce, itm2, reduce_path, next_sym);
1201 gl_list_free (ssb_queue);
1202 hash_free (visited);
1203 if (stage3result)
1204 search_state_free (stage3result);
1205 return cex;
1208 static time_t cumulative_time;
1210 void
1211 counterexample_init (void)
1213 /* Recognize $TIME_LIMIT. Not a public feature, just to help
1214 debugging. If we need something public, a %define/-D/-F variable
1215 would be more appropriate. */
1217 const char *cp = getenv ("TIME_LIMIT");
1218 if (cp)
1220 char *end = NULL;
1221 float v = strtof (cp, &end);
1222 if (*end == '\0' && errno == 0)
1223 time_limit = v;
1226 time (&cumulative_time);
1227 scp_set = bitset_create (nstates, BITSET_FIXED);
1228 rpp_set = bitset_create (nstates, BITSET_FIXED);
1229 state_items_init ();
1233 void
1234 counterexample_free (void)
1236 if (scp_set)
1238 bitset_free (scp_set);
1239 bitset_free (rpp_set);
1240 state_items_free ();
1245 * Report a counterexample for conflict on symbol next_sym
1246 * between the given state-items
1248 static void
1249 counterexample_report (state_item_number itm1, state_item_number itm2,
1250 symbol_number next_sym, bool shift_reduce,
1251 FILE *out, const char *prefix)
1253 // Compute the shortest lookahead-sensitive path and associated sets of
1254 // parser states.
1255 state_item_list shortest_path = shortest_path_from_start (itm1, next_sym);
1256 bool reduce_prod_reached = false;
1257 const rule *reduce_rule = item_rule (state_items[itm1].item);
1259 bitset_zero (scp_set);
1260 bitset_zero (rpp_set);
1262 state_item *si = NULL;
1263 for (gl_list_iterator_t it = gl_list_iterator (shortest_path);
1264 state_item_list_next (&it, &si);
1267 bitset_set (scp_set, si->state->number);
1268 reduce_prod_reached = reduce_prod_reached
1269 || item_rule (si->item) == reduce_rule;
1270 if (reduce_prod_reached)
1271 bitset_set (rpp_set, si->state->number);
1273 time_t t = time (NULL);
1274 counterexample *cex
1275 = difftime (t, cumulative_time) < CUMULATIVE_TIME_LIMIT
1276 ? unifying_example (itm1, itm2, shift_reduce, shortest_path, next_sym)
1277 : example_from_path (shift_reduce, itm2, shortest_path, next_sym);
1279 gl_list_free (shortest_path);
1280 counterexample_print (cex, out, prefix);
1281 free_counterexample (cex);
1285 // ITM1 denotes a shift, ITM2 a reduce.
1286 static void
1287 counterexample_report_shift_reduce (state_item_number itm1, state_item_number itm2,
1288 symbol_number next_sym,
1289 FILE *out, const char *prefix)
1291 if (out == stderr)
1292 complain (NULL, Wcounterexamples,
1293 _("shift/reduce conflict on token %s"), symbols[next_sym]->tag);
1294 else
1296 fputs (prefix, out);
1297 fprintf (out, _("shift/reduce conflict on token %s"), symbols[next_sym]->tag);
1298 fprintf (out, "%s\n", _(":"));
1300 // In the report, print the items.
1301 if (out != stderr || trace_flag & trace_cex)
1303 state_item_print (&state_items[itm1], out, prefix);
1304 state_item_print (&state_items[itm2], out, prefix);
1306 counterexample_report (itm1, itm2, next_sym, true, out, prefix);
1309 static void
1310 counterexample_report_reduce_reduce (state_item_number itm1, state_item_number itm2,
1311 bitset conflict_syms,
1312 FILE *out, const char *prefix)
1315 struct obstack obstack;
1316 obstack_init (&obstack);
1317 bitset_iterator biter;
1318 state_item_number sym;
1319 const char *sep = "";
1320 BITSET_FOR_EACH (biter, conflict_syms, sym, 0)
1322 obstack_printf (&obstack, "%s%s", sep, symbols[sym]->tag);
1323 sep = ", ";
1325 char *tokens = obstack_finish0 (&obstack);
1326 if (out == stderr)
1327 complain (NULL, Wcounterexamples,
1328 ngettext ("reduce/reduce conflict on token %s",
1329 "reduce/reduce conflict on tokens %s",
1330 bitset_count (conflict_syms)),
1331 tokens);
1332 else
1334 fputs (prefix, out);
1335 fprintf (out,
1336 ngettext ("reduce/reduce conflict on token %s",
1337 "reduce/reduce conflict on tokens %s",
1338 bitset_count (conflict_syms)),
1339 tokens);
1340 fprintf (out, "%s\n", _(":"));
1342 obstack_free (&obstack, NULL);
1344 // In the report, print the items.
1345 if (out != stderr || trace_flag & trace_cex)
1347 state_item_print (&state_items[itm1], out, prefix);
1348 state_item_print (&state_items[itm2], out, prefix);
1350 counterexample_report (itm1, itm2, bitset_first (conflict_syms),
1351 false, out, prefix);
1354 static state_item_number
1355 find_state_item_number (const rule *r, state_number sn)
1357 for (state_item_number i = state_item_map[sn]; i < state_item_map[sn + 1]; ++i)
1358 if (!SI_DISABLED (i)
1359 && item_number_as_rule_number (*state_items[i].item) == r->number)
1360 return i;
1361 abort ();
1364 void
1365 counterexample_report_state (const state *s, FILE *out, const char *prefix)
1367 const state_number sn = s->number;
1368 const reductions *reds = s->reductions;
1369 bitset lookaheads = bitset_create (ntokens, BITSET_FIXED);
1370 for (int i = 0; i < reds->num; ++i)
1372 const rule *r1 = reds->rules[i];
1373 const state_item_number c1 = find_state_item_number (r1, sn);
1374 for (state_item_number c2 = state_item_map[sn]; c2 < state_item_map[sn + 1]; ++c2)
1375 if (!SI_DISABLED (c2))
1377 item_number conf = *state_items[c2].item;
1378 if (item_number_is_symbol_number (conf)
1379 && bitset_test (reds->lookaheads[i], conf))
1380 counterexample_report_shift_reduce (c1, c2, conf, out, prefix);
1382 for (int j = i+1; j < reds->num; ++j)
1384 const rule *r2 = reds->rules[j];
1385 // Conflicts: common lookaheads.
1386 bitset_intersection (lookaheads,
1387 reds->lookaheads[i],
1388 reds->lookaheads[j]);
1389 if (!bitset_empty_p (lookaheads))
1390 for (state_item_number c2 = state_item_map[sn]; c2 < state_item_map[sn + 1]; ++c2)
1391 if (!SI_DISABLED (c2)
1392 && item_rule (state_items[c2].item) == r2)
1394 counterexample_report_reduce_reduce (c1, c2, lookaheads, out, prefix);
1395 break;
1399 bitset_free (lookaheads);