1 /* Conflict counterexample generation
3 Copyright (C) 2020-2021 Free Software Foundation, Inc.
5 This file is part of Bison, the GNU Compiler Compiler.
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
22 #include "counterexample.h"
27 #include <gl_linked_list.h>
28 #include <gl_rbtreehash_list.h>
32 #include <textstyle.h>
37 #include "derivation.h"
43 #include "parse-simulation.h"
46 #define TIME_LIMIT_ENFORCED true
47 /** If set to false, only consider the states on the shortest
48 * lookahead-sensitive path when constructing a unifying counterexample. */
49 #define EXTENDED_SEARCH false
51 /* costs for making various steps in a search */
52 #define PRODUCTION_COST 50
55 #define UNSHIFT_COST 1
56 #define EXTENDED_COST 10000
58 /** The time limit before printing an assurance message to the user to
59 * indicate that the search is still running. */
60 #define ASSURANCE_LIMIT 2.0f
62 /* The time limit before giving up looking for unifying counterexample. */
63 static float time_limit
= 5.0f
;
65 #define CUMULATIVE_TIME_LIMIT 120.0f
67 // This is the fastest way to get the tail node from the gl_list API.
69 list_get_end (gl_list_t list
)
71 gl_list_node_t sentinel
= gl_list_add_last (list
, NULL
);
72 gl_list_node_t res
= gl_list_previous_node (list
, sentinel
);
73 gl_list_remove_node (list
, sentinel
);
86 static counterexample
*
87 new_counterexample (derivation
*d1
, derivation
*d2
,
91 counterexample
*res
= xmalloc (sizeof *res
);
92 res
->shift_reduce
= shift_reduce
;
95 // Display the shift first.
110 free_counterexample (counterexample
*cex
)
112 derivation_free (cex
->d1
);
113 derivation_free (cex
->d2
);
118 counterexample_print (const counterexample
*cex
, FILE *out
, const char *prefix
)
120 const bool flat
= getenv ("YYFLAT");
121 const char *example1_label
122 = cex
->unifying
? _("Example") : _("First example");
123 const char *example2_label
124 = cex
->unifying
? _("Example") : _("Second example");
125 const char *deriv1_label
126 = cex
->shift_reduce
? _("Shift derivation") : _("First reduce derivation");
127 const char *deriv2_label
128 = cex
->shift_reduce
? _("Reduce derivation") : _("Second reduce derivation");
130 max_int (max_int (mbswidth (example1_label
, 0), mbswidth (example2_label
, 0)),
131 max_int (mbswidth (deriv1_label
, 0), mbswidth (deriv2_label
, 0)));
133 fprintf (out
, " %s%s%*s ", prefix
,
134 example1_label
, width
- mbswidth (example1_label
, 0), "");
136 fprintf (out
, " %s%s: ", prefix
, example1_label
);
137 derivation_print_leaves (cex
->d1
, out
);
139 fprintf (out
, " %s%s%*s ", prefix
,
140 deriv1_label
, width
- mbswidth (deriv1_label
, 0), "");
142 fprintf (out
, " %s%s", prefix
, deriv1_label
);
143 derivation_print (cex
->d1
, out
, prefix
);
145 // If we output to the terminal (via stderr) and we have color
146 // support, display unifying examples a second time, as color allows
147 // to see the differences.
148 if (!cex
->unifying
|| is_styled (stderr
))
151 fprintf (out
, " %s%s%*s ", prefix
,
152 example2_label
, width
- mbswidth (example2_label
, 0), "");
154 fprintf (out
, " %s%s: ", prefix
, example2_label
);
155 derivation_print_leaves (cex
->d2
, out
);
158 fprintf (out
, " %s%s%*s ", prefix
,
159 deriv2_label
, width
- mbswidth (deriv2_label
, 0), "");
161 fprintf (out
, " %s%s", prefix
, deriv2_label
);
162 derivation_print (cex
->d2
, out
, prefix
);
170 * NON UNIFYING COUNTER EXAMPLES
174 // Search node for BFS on state items
176 typedef struct si_bfs_node
178 state_item_number si
;
179 struct si_bfs_node
*parent
;
184 si_bfs_new (state_item_number si
, si_bfs_node
*parent
)
186 si_bfs_node
*res
= xcalloc (1, sizeof *res
);
188 res
->parent
= parent
;
189 res
->reference_count
= 1;
191 ++parent
->reference_count
;
196 si_bfs_contains (const si_bfs_node
*n
, state_item_number sin
)
198 for (const si_bfs_node
*search
= n
; search
!= NULL
; search
= search
->parent
)
199 if (search
->si
== sin
)
205 si_bfs_free (si_bfs_node
*n
)
209 --n
->reference_count
;
210 if (n
->reference_count
== 0)
212 si_bfs_free (n
->parent
);
217 typedef gl_list_t si_bfs_node_list
;
220 * start is a state_item such that conflict_sym is an element of FIRSTS of the
221 * nonterminal after the dot in start. Because of this, we should be able to
222 * find a production item starting with conflict_sym by only searching productions
223 * of the nonterminal and shifting over nullable nonterminals
225 * this returns the derivation of the productions that lead to conflict_sym
227 static inline derivation_list
228 expand_to_conflict (state_item_number start
, symbol_number conflict_sym
)
230 si_bfs_node
*init
= si_bfs_new (start
, NULL
);
232 si_bfs_node_list queue
233 = gl_list_create (GL_LINKED_LIST
, NULL
, NULL
,
234 (gl_listelement_dispose_fn
) si_bfs_free
,
235 true, 1, (const void **) &init
);
236 si_bfs_node
*node
= NULL
;
237 // breadth-first search for a path of productions to the conflict symbol
238 while (gl_list_size (queue
) > 0)
240 node
= (si_bfs_node
*) gl_list_get_at (queue
, 0);
241 state_item
*silast
= &state_items
[node
->si
];
242 symbol_number sym
= item_number_as_symbol_number (*silast
->item
);
243 if (sym
== conflict_sym
)
247 // add each production to the search
248 bitset_iterator biter
;
249 state_item_number sin
;
250 bitset sib
= silast
->prods
;
251 BITSET_FOR_EACH (biter
, sib
, sin
, 0)
253 // ignore productions already in the path
254 if (si_bfs_contains (node
, sin
))
256 si_bfs_node
*next
= si_bfs_new (sin
, node
);
257 gl_list_add_last (queue
, next
);
259 // for nullable nonterminals, add its goto to the search
260 if (nullable
[sym
- ntokens
])
262 si_bfs_node
*next
= si_bfs_new (silast
->trans
, node
);
263 gl_list_add_last (queue
, next
);
266 gl_list_remove_at (queue
, 0);
268 if (gl_list_size (queue
) == 0)
270 gl_list_free (queue
);
271 fputs ("Error expanding derivation\n", stderr
);
275 derivation
*dinit
= derivation_new_leaf (conflict_sym
);
276 derivation_list result
= derivation_list_new ();
277 derivation_list_append (result
, dinit
);
278 // iterate backwards through the generated path to create a derivation
279 // of the conflict symbol containing derivations of each production step.
281 for (si_bfs_node
*n
= node
; n
!= NULL
; n
= n
->parent
)
283 state_item
*si
= &state_items
[n
->si
];
284 item_number
*pos
= si
->item
;
285 if (SI_PRODUCTION (si
))
287 item_number
*i
= NULL
;
288 for (i
= pos
+ 1; !item_number_is_rule_number (*i
); ++i
)
289 derivation_list_append (result
, derivation_new_leaf (*i
));
291 rules
[item_number_as_rule_number (*i
)].lhs
->number
;
292 derivation
*deriv
= derivation_new (lhs
, result
,
293 state_item_rule (si
));
294 result
= derivation_list_new ();
295 derivation_list_append (result
, deriv
);
299 symbol_number sym
= item_number_as_symbol_number (*(pos
- 1));
300 derivation
*deriv
= derivation_new_leaf (sym
);
301 derivation_list_prepend (result
, deriv
);
304 gl_list_free (queue
);
305 derivation_free ((derivation
*)gl_list_get_at (result
, 0));
306 gl_list_remove_at (result
, 0);
311 * Complete derivations for any pending productions in the given
312 * sequence of state-items. For example, the input could be a path
313 * of states that would give us the following input:
314 * Stmt ::= [lval ::= [VAR] '=' e ::=[ e::=['0'] '+' •
315 * So to complete the derivation of Stmt, we need an output like:
316 * Stmt ::= [lval ::= [VAR] '=' e ::=[ e::=['0'] '+' • e ] ';' ]
319 complete_diverging_example (symbol_number conflict_sym
,
320 state_item_list path
, derivation_list derivs
)
322 // The idea is to transfer each pending symbol on the productions
323 // associated with the given StateItems to the resulting derivation.
324 derivation_list result
= derivation_list_new ();
325 bool lookahead_required
= false;
328 derivs
= derivation_list_new ();
329 gl_list_add_last (result
, derivation_dot ());
330 lookahead_required
= true;
333 gl_list_node_t deriv
= list_get_end (derivs
);
335 // We go backwards through the path to create the derivation tree bottom-up.
336 // Effectively this loops through each production once, and generates a
337 // derivation of the left hand side by appending all of the rhs symbols.
338 // this becomes the derivation of the nonterminal after the dot in the
339 // next production, and all of the other symbols of the rule are added as normal.
340 for (gl_list_node_t state_node
= list_get_end (path
);
342 state_node
= gl_list_previous_node (path
, state_node
))
344 state_item
*si
= (state_item
*) gl_list_node_value (path
, state_node
);
345 item_number
*item
= si
->item
;
346 item_number pos
= *item
;
348 if (gl_list_size (result
) == 1 && !item_number_is_rule_number (pos
)
349 && gl_list_get_at (result
, 0) == derivation_dot ())
351 derivation_list_append (result
,
352 derivation_new_leaf (item_number_as_symbol_number (pos
)));
353 lookahead_required
= false;
355 item_number
*i
= item
;
356 // go through each symbol after the dot in the current rule, and
357 // add each symbol to its derivation.
358 for (state_item_number nsi
= si
- state_items
;
359 !item_number_is_rule_number (*i
);
360 ++i
, nsi
= state_items
[nsi
].trans
)
362 // if the item is a reduction, we could skip to the wrong rule
363 // by starting at i + 1, so this continue is necessary
366 symbol_number sym
= item_number_as_symbol_number (*i
);
367 if (!lookahead_required
|| sym
== conflict_sym
)
369 derivation_list_append (result
, derivation_new_leaf (sym
));
370 lookahead_required
= false;
373 // Since PATH is a path to the conflict state-item,
374 // for a reduce conflict item, we will want to have a derivation
375 // that shows the conflict symbol from its lookahead set being used.
377 // Since reductions have the dot at the end of the item,
378 // this loop will be first executed on the last item in the path
379 // that's not a reduction. When that happens,
380 // the symbol after the dot should be a nonterminal,
381 // and we can look through successive nullable nonterminals
382 // for one with the conflict symbol in its first set.
383 if (bitset_test (FIRSTS (sym
), conflict_sym
))
385 lookahead_required
= false;
386 derivation_list next_derivs
=
387 expand_to_conflict (nsi
, conflict_sym
);
388 derivation
*d
= NULL
;
389 for (gl_list_iterator_t it
= gl_list_iterator (next_derivs
);
390 derivation_list_next (&it
, &d
);)
391 derivation_list_append (result
, d
);
392 i
+= gl_list_size (next_derivs
) - 1;
393 derivation_list_free (next_derivs
);
395 else if (nullable
[sym
- ntokens
])
397 derivation
*d
= derivation_new_leaf (sym
);
398 derivation_list_append (result
, d
);
402 // We found a path to the conflict item, and despite it
403 // having the conflict symbol in its lookahead, no example
404 // containing the symbol after the conflict item
406 derivation_list_append (result
, derivation_new_leaf (1));
407 lookahead_required
= false;
410 const rule
*r
= &rules
[item_number_as_rule_number (*i
)];
411 // add derivations for symbols before dot
412 for (i
= item
- 1; !item_number_is_rule_number (*i
) && i
>= ritem
; i
--)
414 gl_list_node_t p
= gl_list_previous_node (path
, state_node
);
419 const void *tmp_deriv
= gl_list_node_value (derivs
, deriv
);
420 deriv
= gl_list_previous_node (derivs
, deriv
);
421 derivation_list_prepend (result
, (derivation
*)tmp_deriv
);
424 derivation_list_prepend (result
, derivation_new_leaf (*i
));
426 // completing the derivation
427 derivation
*new_deriv
= derivation_new (r
->lhs
->number
, result
, r
);
428 result
= derivation_list_new ();
429 derivation_list_append (result
, new_deriv
);
431 derivation
*res
= (derivation
*) gl_list_get_at (result
, 0);
432 derivation_retain (res
);
433 derivation_list_free (result
);
434 derivation_list_free (derivs
);
438 /* Iterate backwards through the shifts of the path in the reduce
439 conflict, and find a path of shifts from the shift conflict that
440 goes through the same states. */
441 static state_item_list
442 nonunifying_shift_path (state_item_list reduce_path
, state_item
*shift_conflict
)
444 gl_list_node_t tmp
= gl_list_add_last (reduce_path
, NULL
);
445 gl_list_node_t next_node
= gl_list_previous_node (reduce_path
, tmp
);
446 gl_list_node_t node
= gl_list_previous_node (reduce_path
, next_node
);
447 gl_list_remove_node (reduce_path
, tmp
);
448 state_item
*si
= shift_conflict
;
449 state_item_list result
=
450 gl_list_create_empty (GL_LINKED_LIST
, NULL
, NULL
, NULL
, true);
451 // FIXME: bool paths_merged;
452 for (; node
!= NULL
; next_node
= node
,
453 node
= gl_list_previous_node (reduce_path
, node
))
456 (state_item
*) gl_list_node_value (reduce_path
, node
);
457 state_item
*nextrefsi
=
458 (state_item
*) gl_list_node_value (reduce_path
, next_node
);
461 gl_list_add_first (result
, refsi
);
465 // skip reduction items
466 if (nextrefsi
->item
!= refsi
->item
+ 1 && refsi
->item
!= ritem
)
469 // bfs to find a shift to the right state
470 si_bfs_node
*init
= si_bfs_new (si
- state_items
, NULL
);
471 si_bfs_node_list queue
472 = gl_list_create (GL_LINKED_LIST
, NULL
, NULL
,
473 (gl_listelement_dispose_fn
) si_bfs_free
,
474 true, 1, (const void **) &init
);
475 si_bfs_node
*sis
= NULL
;
476 state_item
*prevsi
= NULL
;
477 while (gl_list_size (queue
) > 0)
479 sis
= (si_bfs_node
*) gl_list_get_at (queue
, 0);
480 // if we end up in the start state, the shift couldn't be found.
484 state_item
*search_si
= &state_items
[sis
->si
];
485 // if the current state-item is a production item,
486 // its reverse production items get added to the queue.
487 // Otherwise, look for a reverse transition to the target state.
488 bitset rsi
= search_si
->revs
;
489 bitset_iterator biter
;
490 state_item_number sin
;
491 BITSET_FOR_EACH (biter
, rsi
, sin
, 0)
493 prevsi
= &state_items
[sin
];
494 if (SI_TRANSITION (search_si
))
496 if (prevsi
->state
== refsi
->state
)
499 else if (!si_bfs_contains (sis
, sin
))
501 si_bfs_node
*prevsis
= si_bfs_new (sin
, sis
);
502 gl_list_add_last (queue
, prevsis
);
505 gl_list_remove_at (queue
, 0);
508 // prepend path to shift we found
511 gl_list_node_t ln
= gl_list_add_first (result
, &state_items
[sis
->si
]);
512 for (si_bfs_node
*n
= sis
->parent
; n
; n
= n
->parent
)
513 ln
= gl_list_add_after (result
, ln
, &state_items
[n
->si
]);
517 gl_list_free (queue
);
519 if (trace_flag
& trace_cex
)
521 fputs ("SHIFT ITEM PATH:\n", stderr
);
522 state_item
*sip
= NULL
;
523 for (gl_list_iterator_t it
= gl_list_iterator (result
);
524 state_item_list_next (&it
, &sip
);
526 state_item_print (sip
, stderr
, "");
533 * Construct a nonunifying counterexample from the shortest
534 * lookahead-sensitive path.
536 static counterexample
*
537 example_from_path (bool shift_reduce
,
538 state_item_number itm2
,
539 state_item_list shortest_path
, symbol_number next_sym
)
542 complete_diverging_example (next_sym
, shortest_path
, NULL
);
543 state_item_list path_2
545 ? nonunifying_shift_path (shortest_path
, &state_items
[itm2
])
546 : shortest_path_from_start (itm2
, next_sym
);
547 derivation
*deriv2
= complete_diverging_example (next_sym
, path_2
, NULL
);
548 gl_list_free (path_2
);
549 return new_counterexample (deriv1
, deriv2
, shift_reduce
, false, true);
554 * UNIFYING COUNTER EXAMPLES
558 /* A search state keeps track of two parser simulations,
559 * one starting at each conflict. Complexity is a metric
560 * which sums different parser actions with varying weights.
564 parse_state
*states
[2];
568 static search_state
*
569 initial_search_state (state_item
*conflict1
, state_item
*conflict2
)
571 search_state
*res
= xmalloc (sizeof *res
);
572 res
->states
[0] = new_parse_state (conflict1
);
573 res
->states
[1] = new_parse_state (conflict2
);
574 parse_state_retain (res
->states
[0]);
575 parse_state_retain (res
->states
[1]);
580 static search_state
*
581 new_search_state (parse_state
*ps1
, parse_state
*ps2
, int complexity
)
583 search_state
*res
= xmalloc (sizeof *res
);
584 res
->states
[0] = ps1
;
585 res
->states
[1] = ps2
;
586 parse_state_retain (res
->states
[0]);
587 parse_state_retain (res
->states
[1]);
588 res
->complexity
= complexity
;
592 static search_state
*
593 copy_search_state (search_state
*parent
)
595 search_state
*res
= xmalloc (sizeof *res
);
597 parse_state_retain (res
->states
[0]);
598 parse_state_retain (res
->states
[1]);
603 search_state_free_children (search_state
*ss
)
605 free_parse_state (ss
->states
[0]);
606 free_parse_state (ss
->states
[1]);
610 search_state_free (search_state
*ss
)
614 search_state_free_children (ss
);
618 /* For debugging traces. */
620 search_state_print (search_state
*ss
)
622 fputs ("CONFLICT 1 ", stderr
);
623 print_parse_state (ss
->states
[0]);
624 fputs ("CONFLICT 2 ", stderr
);
625 print_parse_state (ss
->states
[1]);
629 typedef gl_list_t search_state_list
;
632 search_state_list_next (gl_list_iterator_t
*it
, search_state
**ss
)
634 const void *p
= NULL
;
635 bool res
= gl_list_iterator_next (it
, &p
, NULL
);
637 *ss
= (search_state
*) p
;
639 gl_list_iterator_free (it
);
644 * When a search state is copied, this is used to
645 * directly set one of the parse states
648 ss_set_parse_state (search_state
*ss
, int idx
, parse_state
*ps
)
650 free_parse_state (ss
->states
[idx
]);
651 ss
->states
[idx
] = ps
;
652 parse_state_retain (ps
);
656 * Construct a nonunifying example from a search state
657 * which has its parse states unified at the beginning
658 * but not the end of the example.
660 static counterexample
*
661 complete_diverging_examples (search_state
*ss
,
662 symbol_number next_sym
,
665 derivation
*new_derivs
[2];
666 for (int i
= 0; i
< 2; ++i
)
668 state_item_list sitems
;
669 derivation_list derivs
;
670 parse_state_lists (ss
->states
[i
], &sitems
, &derivs
);
671 new_derivs
[i
] = complete_diverging_example (next_sym
, sitems
, derivs
);
672 gl_list_free (sitems
);
674 return new_counterexample (new_derivs
[0], new_derivs
[1],
675 shift_reduce
, false, true);
679 * Search states are stored in bundles with those that
680 * share the same complexity. This is so the priority
681 * queue takes less overhead.
685 search_state_list states
;
687 } search_state_bundle
;
690 ssb_free (search_state_bundle
*ssb
)
692 gl_list_free (ssb
->states
);
697 ssb_hasher (search_state_bundle
*ssb
)
699 return ssb
->complexity
;
703 ssb_comp (const search_state_bundle
*s1
, const search_state_bundle
*s2
)
705 return s1
->complexity
- s2
->complexity
;
709 ssb_equals (const search_state_bundle
*s1
, const search_state_bundle
*s2
)
711 return s1
->complexity
== s2
->complexity
;
714 typedef gl_list_t ssb_list
;
717 visited_hasher (const search_state
*ss
, size_t max
)
719 return (parse_state_hasher (ss
->states
[0], max
)
720 + parse_state_hasher (ss
->states
[1], max
)) % max
;
724 visited_comparator (const search_state
*ss1
, const search_state
*ss2
)
726 return parse_state_comparator (ss1
->states
[0], ss2
->states
[0])
727 && parse_state_comparator (ss1
->states
[1], ss2
->states
[1]);
730 /* Priority queue for search states with minimal complexity. */
731 static ssb_list ssb_queue
;
732 static Hash_table
*visited
;
733 /* The set of parser states on the shortest lookahead-sensitive path. */
734 static bitset scp_set
= NULL
;
735 /* The set of parser states used for the conflict reduction rule. */
736 static bitset rpp_set
= NULL
;
739 ssb_append (search_state
*ss
)
741 if (hash_lookup (visited
, ss
))
743 search_state_free (ss
);
746 hash_xinsert (visited
, ss
);
747 // if states are only referenced by the visited set,
748 // their contents should be freed as we only need
749 // the metadata necessary to compute a hash.
750 parse_state_free_contents_early (ss
->states
[0]);
751 parse_state_free_contents_early (ss
->states
[1]);
752 parse_state_retain (ss
->states
[0]);
753 parse_state_retain (ss
->states
[1]);
754 search_state_bundle
*ssb
= xmalloc (sizeof *ssb
);
755 ssb
->complexity
= ss
->complexity
;
756 gl_list_node_t n
= gl_list_search (ssb_queue
, ssb
);
760 gl_list_create_empty (GL_LINKED_LIST
, NULL
, NULL
,
761 (gl_listelement_dispose_fn
)search_state_free_children
,
763 gl_sortedlist_add (ssb_queue
, (gl_listelement_compar_fn
) ssb_comp
, ssb
);
768 ssb
= (search_state_bundle
*) gl_list_node_value (ssb_queue
, n
);
770 gl_list_add_last (ssb
->states
, ss
);
774 * The following functions perform various actions on parse states
775 * and assign complexities to the newly generated search states.
778 production_step (search_state
*ss
, int parser_state
)
780 const state_item
*other_si
= parse_state_tail (ss
->states
[1 - parser_state
]);
781 symbol_number other_sym
= item_number_as_symbol_number (*other_si
->item
);
782 parse_state_list prods
=
783 simulate_production (ss
->states
[parser_state
], other_sym
);
784 int complexity
= ss
->complexity
+ PRODUCTION_COST
;
786 parse_state
*ps
= NULL
;
787 for (gl_list_iterator_t it
= gl_list_iterator (prods
);
788 parse_state_list_next (&it
, &ps
);
791 search_state
*copy
= copy_search_state (ss
);
792 ss_set_parse_state (copy
, parser_state
, ps
);
793 copy
->complexity
= complexity
;
796 gl_list_free (prods
);
800 reduction_cost (const parse_state
*ps
)
804 parse_state_completed_steps (ps
, &shifts
, &productions
);
805 return SHIFT_COST
* shifts
+ PRODUCTION_COST
* productions
;
808 static search_state_list
809 reduction_step (search_state
*ss
, const item_number
*conflict_item
,
810 int parser_state
, int rule_len
)
812 (void) conflict_item
; // FIXME: Unused
813 search_state_list result
=
814 gl_list_create_empty (GL_LINKED_LIST
, NULL
, NULL
, NULL
, 1);
816 parse_state
*ps
= ss
->states
[parser_state
];
817 const state_item
*si
= parse_state_tail (ps
);
818 bitset symbol_set
= si
->lookahead
;
819 parse_state
*other
= ss
->states
[1 - parser_state
];
820 const state_item
*other_si
= parse_state_tail (other
);
821 // if the other state can transition on a symbol,
822 // the reduction needs to have that symbol in its lookahead
823 if (item_number_is_symbol_number (*other_si
->item
))
825 symbol_number other_sym
=
826 item_number_as_symbol_number (*other_si
->item
);
827 if (!intersect_symbol (other_sym
, symbol_set
))
829 symbol_set
= bitset_create (nsyms
, BITSET_FIXED
);
830 bitset_set (symbol_set
, other_sym
);
833 // FIXME: search_state *new_root = copy_search_state (ss);
834 parse_state_list reduced
=
835 simulate_reduction (ps
, rule_len
, symbol_set
);
836 parse_state
*reduced_ps
= NULL
;
837 for (gl_list_iterator_t it
= gl_list_iterator (reduced
);
838 parse_state_list_next (&it
, &reduced_ps
);
841 search_state
*copy
= copy_search_state (ss
);
842 ss_set_parse_state (copy
, parser_state
, reduced_ps
);
843 int r_cost
= reduction_cost (reduced_ps
);
844 copy
->complexity
+= r_cost
+ PRODUCTION_COST
+ 2 * SHIFT_COST
;
845 gl_list_add_last (result
, copy
);
847 gl_list_free (reduced
);
848 if (symbol_set
!= si
->lookahead
)
849 bitset_free (symbol_set
);
854 * Attempt to prepend the given symbol to this search state, respecting
855 * the given subsequent next symbol on each path. If a reverse transition
856 * cannot be made on both states, possible reverse productions are prepended
859 search_state_prepend (search_state
*ss
, symbol_number sym
, bitset guide
)
861 (void) sym
; // FIXME: Unused.
862 const state_item
*si1src
= parse_state_head (ss
->states
[0]);
863 const state_item
*si2src
= parse_state_head (ss
->states
[1]);
865 bool prod1
= SI_PRODUCTION (si1src
);
866 // If one can make a reverse transition and the other can't, only apply
867 // the reverse productions that the other state can make in an attempt to
869 if (prod1
!= SI_PRODUCTION (si2src
))
871 int prod_state
= prod1
? 0 : 1;
872 parse_state_list prev
= parser_prepend (ss
->states
[prod_state
]);
873 parse_state
*ps
= NULL
;
874 for (gl_list_iterator_t iter
= gl_list_iterator (prev
);
875 parse_state_list_next (&iter
, &ps
);
878 const state_item
*psi
= parse_state_head (ps
);
879 bool guided
= bitset_test (guide
, psi
->state
->number
);
880 if (!guided
&& !EXTENDED_SEARCH
)
883 search_state
*copy
= copy_search_state (ss
);
884 ss_set_parse_state (copy
, prod_state
, ps
);
885 copy
->complexity
+= PRODUCTION_COST
;
887 copy
->complexity
+= EXTENDED_COST
;
893 // The parse state heads are either both production items or both
894 // transition items. So all prepend options will either be
895 // reverse transitions or reverse productions
896 int complexity_cost
= prod1
? PRODUCTION_COST
: UNSHIFT_COST
;
897 complexity_cost
*= 2;
899 parse_state_list prev1
= parser_prepend (ss
->states
[0]);
900 parse_state_list prev2
= parser_prepend (ss
->states
[1]);
902 // loop through each pair of possible prepend states and append search
903 // states for each pair where the parser states correspond to the same
905 parse_state
*ps1
= NULL
;
906 for (gl_list_iterator_t iter1
= gl_list_iterator (prev1
);
907 parse_state_list_next (&iter1
, &ps1
);
910 const state_item
*psi1
= parse_state_head (ps1
);
911 bool guided1
= bitset_test (guide
, psi1
->state
->number
);
912 if (!guided1
&& !EXTENDED_SEARCH
)
915 parse_state
*ps2
= NULL
;
916 for (gl_list_iterator_t iter2
= gl_list_iterator (prev2
);
917 parse_state_list_next (&iter2
, &ps2
);
920 const state_item
*psi2
= parse_state_head (ps2
);
922 bool guided2
= bitset_test (guide
, psi2
->state
->number
);
923 if (!guided2
&& !EXTENDED_SEARCH
)
925 // Only consider prepend state items that share the same state.
926 if (psi1
->state
!= psi2
->state
)
929 int complexity
= ss
->complexity
;
931 complexity
+= PRODUCTION_COST
* 2;
933 complexity
+= UNSHIFT_COST
* 2;
934 // penalty for not being along the guide path
935 if (!guided1
|| !guided2
)
936 complexity
+= EXTENDED_COST
;
937 ssb_append (new_search_state (ps1
, ps2
, complexity
));
940 gl_list_free (prev1
);
941 gl_list_free (prev2
);
945 * Determine if the productions associated with the given parser items have
946 * the same prefix up to the dot.
949 have_common_prefix (const item_number
*itm1
, const item_number
*itm2
)
952 for (; !item_number_is_rule_number (itm1
[i
]); ++i
)
953 if (itm1
[i
] != itm2
[i
])
955 return item_number_is_rule_number (itm2
[i
]);
959 * The start and end locations of an item in ritem.
961 static const item_number
*
962 item_rule_start (const item_number
*item
)
964 const item_number
*res
= NULL
;
966 ritem
< res
&& item_number_is_symbol_number (*(res
- 1));
972 static const item_number
*
973 item_rule_end (const item_number
*item
)
975 const item_number
*res
= NULL
;
976 for (res
= item
; item_number_is_symbol_number (*res
); ++res
)
982 * Perform the appropriate possible parser actions
983 * on a search state and add the results to the
984 * search state priority queue.
987 generate_next_states (search_state
*ss
, state_item
*conflict1
,
988 state_item
*conflict2
)
990 // Compute the successor configurations.
991 parse_state
*ps1
= ss
->states
[0];
992 parse_state
*ps2
= ss
->states
[1];
993 const state_item
*si1
= parse_state_tail (ps1
);
994 const state_item
*si2
= parse_state_tail (ps2
);
995 bool si1reduce
= item_number_is_rule_number (*si1
->item
);
996 bool si2reduce
= item_number_is_rule_number (*si2
->item
);
997 if (!si1reduce
&& !si2reduce
)
999 // Transition if both paths end at the same symbol
1000 if (*si1
->item
== *si2
->item
)
1002 int complexity
= ss
->complexity
+ 2 * SHIFT_COST
;
1003 parse_state_list trans1
= simulate_transition (ps1
);
1004 parse_state_list trans2
= simulate_transition (ps2
);
1005 parse_state
*tps1
= NULL
;
1006 parse_state
*tps2
= NULL
;
1007 for (gl_list_iterator_t it1
= gl_list_iterator (trans1
);
1008 parse_state_list_next (&it1
, &tps1
);
1010 for (gl_list_iterator_t it2
= gl_list_iterator (trans2
);
1011 parse_state_list_next (&it2
, &tps2
);
1013 ssb_append (new_search_state (tps1
, tps2
, complexity
));
1014 gl_list_free (trans1
);
1015 gl_list_free (trans2
);
1018 // Take production steps if possible.
1019 production_step (ss
, 0);
1020 production_step (ss
, 1);
1022 // One of the states requires a reduction
1025 const item_number
*rhs1
= item_rule_start (si1
->item
);
1026 const item_number
*rhe1
= item_rule_end (si1
->item
);
1027 int len1
= rhe1
- rhs1
;
1028 int size1
= parse_state_length (ps1
);
1029 bool ready1
= si1reduce
&& len1
< size1
;
1031 const item_number
*rhs2
= item_rule_start (si2
->item
);
1032 const item_number
*rhe2
= item_rule_end (si2
->item
);
1033 int len2
= rhe2
- rhs2
;
1034 int size2
= parse_state_length (ps2
);
1035 bool ready2
= si2reduce
&& len2
< size2
;
1036 // If there is a path ready for reduction without being
1037 // prepended further, reduce.
1038 if (ready1
&& ready2
)
1040 search_state_list reduced1
= reduction_step (ss
, conflict1
->item
, 0, len1
);
1041 gl_list_add_last (reduced1
, ss
);
1042 search_state
*red1
= NULL
;
1043 for (gl_list_iterator_t iter
= gl_list_iterator (reduced1
);
1044 search_state_list_next (&iter
, &red1
);
1047 search_state_list reduced2
=
1048 reduction_step (red1
, conflict2
->item
, 1, len2
);
1049 search_state
*red2
= NULL
;
1050 for (gl_list_iterator_t iter2
= gl_list_iterator (reduced2
);
1051 search_state_list_next (&iter2
, &red2
);
1054 // Avoid duplicates.
1057 gl_list_free (reduced2
);
1059 gl_list_free (reduced1
);
1063 search_state_list reduced1
= reduction_step (ss
, conflict1
->item
, 0, len1
);
1064 search_state
*red1
= NULL
;
1065 for (gl_list_iterator_t iter
= gl_list_iterator (reduced1
);
1066 search_state_list_next (&iter
, &red1
);
1069 gl_list_free (reduced1
);
1073 search_state_list reduced2
= reduction_step (ss
, conflict2
->item
, 1, len2
);
1074 search_state
*red2
= NULL
;
1075 for (gl_list_iterator_t iter2
= gl_list_iterator (reduced2
);
1076 search_state_list_next (&iter2
, &red2
);
1079 gl_list_free (reduced2
);
1081 /* Both states end with a reduction, yet they don't have enough symbols
1082 * to reduce. This means symbols are missing from the beginning of the
1083 * rule, so we must prepend */
1086 const symbol_number sym
1087 = si1reduce
&& !ready1
1090 search_state_prepend (ss
, sym
,
1091 parse_state_depth (ss
->states
[0]) >= 0
1092 ? rpp_set
: scp_set
);
1098 * Perform the actual counterexample search,
1099 * keeps track of what stage of the search algorithm
1100 * we are at and gives the appropriate counterexample
1101 * type based off of time constraints.
1103 static counterexample
*
1104 unifying_example (state_item_number itm1
,
1105 state_item_number itm2
,
1107 state_item_list reduce_path
, symbol_number next_sym
)
1109 state_item
*conflict1
= &state_items
[itm1
];
1110 state_item
*conflict2
= &state_items
[itm2
];
1111 search_state
*initial
= initial_search_state (conflict1
, conflict2
);
1112 ssb_queue
= gl_list_create_empty (GL_RBTREEHASH_LIST
,
1113 (gl_listelement_equals_fn
) ssb_equals
,
1114 (gl_listelement_hashcode_fn
) ssb_hasher
,
1115 (gl_listelement_dispose_fn
) ssb_free
,
1118 hash_initialize (32, NULL
, (Hash_hasher
) visited_hasher
,
1119 (Hash_comparator
) visited_comparator
,
1120 (Hash_data_freer
) search_state_free
);
1121 ssb_append (initial
);
1122 time_t start
= time (NULL
);
1123 bool assurance_printed
= false;
1124 search_state
*stage3result
= NULL
;
1125 counterexample
*cex
= NULL
;
1126 while (gl_list_size (ssb_queue
) > 0)
1128 const search_state_bundle
*ssb
= gl_list_get_at (ssb_queue
, 0);
1130 search_state
*ss
= NULL
;
1131 for (gl_list_iterator_t it
= gl_list_iterator (ssb
->states
);
1132 search_state_list_next (&it
, &ss
);
1135 if (trace_flag
& trace_cex
)
1136 search_state_print (ss
);
1137 // Stage 1/2 completing the rules containing the conflicts
1138 parse_state
*ps1
= ss
->states
[0];
1139 parse_state
*ps2
= ss
->states
[1];
1140 if (parse_state_depth (ps1
) < 0 && parse_state_depth (ps2
) < 0)
1142 // Stage 3: reduce and shift conflict items completed.
1143 const state_item
*si1src
= parse_state_head (ps1
);
1144 const state_item
*si2src
= parse_state_head (ps2
);
1145 if (item_rule (si1src
->item
)->lhs
== item_rule (si2src
->item
)->lhs
1146 && have_common_prefix (si1src
->item
, si2src
->item
))
1148 // Stage 4: both paths share a prefix
1149 derivation
*d1
= parse_state_derivation (ps1
);
1150 derivation
*d2
= parse_state_derivation (ps2
);
1151 if (parse_state_derivation_completed (ps1
)
1152 && parse_state_derivation_completed (ps2
))
1154 // Once we have two derivations for the same symbol,
1155 // we've found a unifying counterexample.
1156 cex
= new_counterexample (d1
, d2
, shift_reduce
, true, false);
1157 derivation_retain (d1
);
1158 derivation_retain (d2
);
1159 goto cex_search_end
;
1162 stage3result
= copy_search_state (ss
);
1165 if (TIME_LIMIT_ENFORCED
)
1167 float time_passed
= difftime (time (NULL
), start
);
1168 if (!assurance_printed
&& time_passed
> ASSURANCE_LIMIT
1171 fputs ("Productions leading up to the conflict state found. "
1172 "Still finding a possible unifying counterexample...",
1174 assurance_printed
= true;
1176 if (time_passed
> time_limit
)
1178 fprintf (stderr
, "time limit exceeded: %f\n", time_passed
);
1179 goto cex_search_end
;
1182 generate_next_states (ss
, conflict1
, conflict2
);
1184 gl_sortedlist_remove (ssb_queue
,
1185 (gl_listelement_compar_fn
) ssb_comp
, ssb
);
1190 // No unifying counterexamples
1191 // If a search state from Stage 3 is available, use it
1192 // to construct a more compact nonunifying counterexample.
1194 cex
= complete_diverging_examples (stage3result
, next_sym
, shift_reduce
);
1195 // Otherwise, construct a nonunifying counterexample that
1196 // begins from the start state using the shortest
1197 // lookahead-sensitive path to the reduce item.
1199 cex
= example_from_path (shift_reduce
, itm2
, reduce_path
, next_sym
);
1201 gl_list_free (ssb_queue
);
1202 hash_free (visited
);
1204 search_state_free (stage3result
);
1208 static time_t cumulative_time
;
1211 counterexample_init (void)
1213 /* Recognize $TIME_LIMIT. Not a public feature, just to help
1214 debugging. If we need something public, a %define/-D/-F variable
1215 would be more appropriate. */
1217 const char *cp
= getenv ("TIME_LIMIT");
1221 float v
= strtof (cp
, &end
);
1222 if (*end
== '\0' && errno
== 0)
1226 time (&cumulative_time
);
1227 scp_set
= bitset_create (nstates
, BITSET_FIXED
);
1228 rpp_set
= bitset_create (nstates
, BITSET_FIXED
);
1229 state_items_init ();
1234 counterexample_free (void)
1238 bitset_free (scp_set
);
1239 bitset_free (rpp_set
);
1240 state_items_free ();
1245 * Report a counterexample for conflict on symbol next_sym
1246 * between the given state-items
1249 counterexample_report (state_item_number itm1
, state_item_number itm2
,
1250 symbol_number next_sym
, bool shift_reduce
,
1251 FILE *out
, const char *prefix
)
1253 // Compute the shortest lookahead-sensitive path and associated sets of
1255 state_item_list shortest_path
= shortest_path_from_start (itm1
, next_sym
);
1256 bool reduce_prod_reached
= false;
1257 const rule
*reduce_rule
= item_rule (state_items
[itm1
].item
);
1259 bitset_zero (scp_set
);
1260 bitset_zero (rpp_set
);
1262 state_item
*si
= NULL
;
1263 for (gl_list_iterator_t it
= gl_list_iterator (shortest_path
);
1264 state_item_list_next (&it
, &si
);
1267 bitset_set (scp_set
, si
->state
->number
);
1268 reduce_prod_reached
= reduce_prod_reached
1269 || item_rule (si
->item
) == reduce_rule
;
1270 if (reduce_prod_reached
)
1271 bitset_set (rpp_set
, si
->state
->number
);
1273 time_t t
= time (NULL
);
1275 = difftime (t
, cumulative_time
) < CUMULATIVE_TIME_LIMIT
1276 ? unifying_example (itm1
, itm2
, shift_reduce
, shortest_path
, next_sym
)
1277 : example_from_path (shift_reduce
, itm2
, shortest_path
, next_sym
);
1279 gl_list_free (shortest_path
);
1280 counterexample_print (cex
, out
, prefix
);
1281 free_counterexample (cex
);
1285 // ITM1 denotes a shift, ITM2 a reduce.
1287 counterexample_report_shift_reduce (state_item_number itm1
, state_item_number itm2
,
1288 symbol_number next_sym
,
1289 FILE *out
, const char *prefix
)
1292 complain (NULL
, Wcounterexamples
,
1293 _("shift/reduce conflict on token %s"), symbols
[next_sym
]->tag
);
1296 fputs (prefix
, out
);
1297 fprintf (out
, _("shift/reduce conflict on token %s"), symbols
[next_sym
]->tag
);
1298 fprintf (out
, "%s\n", _(":"));
1300 // In the report, print the items.
1301 if (out
!= stderr
|| trace_flag
& trace_cex
)
1303 state_item_print (&state_items
[itm1
], out
, prefix
);
1304 state_item_print (&state_items
[itm2
], out
, prefix
);
1306 counterexample_report (itm1
, itm2
, next_sym
, true, out
, prefix
);
1310 counterexample_report_reduce_reduce (state_item_number itm1
, state_item_number itm2
,
1311 bitset conflict_syms
,
1312 FILE *out
, const char *prefix
)
1315 struct obstack obstack
;
1316 obstack_init (&obstack
);
1317 bitset_iterator biter
;
1318 state_item_number sym
;
1319 const char *sep
= "";
1320 BITSET_FOR_EACH (biter
, conflict_syms
, sym
, 0)
1322 obstack_printf (&obstack
, "%s%s", sep
, symbols
[sym
]->tag
);
1325 char *tokens
= obstack_finish0 (&obstack
);
1327 complain (NULL
, Wcounterexamples
,
1328 ngettext ("reduce/reduce conflict on token %s",
1329 "reduce/reduce conflict on tokens %s",
1330 bitset_count (conflict_syms
)),
1334 fputs (prefix
, out
);
1336 ngettext ("reduce/reduce conflict on token %s",
1337 "reduce/reduce conflict on tokens %s",
1338 bitset_count (conflict_syms
)),
1340 fprintf (out
, "%s\n", _(":"));
1342 obstack_free (&obstack
, NULL
);
1344 // In the report, print the items.
1345 if (out
!= stderr
|| trace_flag
& trace_cex
)
1347 state_item_print (&state_items
[itm1
], out
, prefix
);
1348 state_item_print (&state_items
[itm2
], out
, prefix
);
1350 counterexample_report (itm1
, itm2
, bitset_first (conflict_syms
),
1351 false, out
, prefix
);
1354 static state_item_number
1355 find_state_item_number (const rule
*r
, state_number sn
)
1357 for (state_item_number i
= state_item_map
[sn
]; i
< state_item_map
[sn
+ 1]; ++i
)
1358 if (!SI_DISABLED (i
)
1359 && item_number_as_rule_number (*state_items
[i
].item
) == r
->number
)
1365 counterexample_report_state (const state
*s
, FILE *out
, const char *prefix
)
1367 const state_number sn
= s
->number
;
1368 const reductions
*reds
= s
->reductions
;
1369 bitset lookaheads
= bitset_create (ntokens
, BITSET_FIXED
);
1370 for (int i
= 0; i
< reds
->num
; ++i
)
1372 const rule
*r1
= reds
->rules
[i
];
1373 const state_item_number c1
= find_state_item_number (r1
, sn
);
1374 for (state_item_number c2
= state_item_map
[sn
]; c2
< state_item_map
[sn
+ 1]; ++c2
)
1375 if (!SI_DISABLED (c2
))
1377 item_number conf
= *state_items
[c2
].item
;
1378 if (item_number_is_symbol_number (conf
)
1379 && bitset_test (reds
->lookaheads
[i
], conf
))
1380 counterexample_report_shift_reduce (c1
, c2
, conf
, out
, prefix
);
1382 for (int j
= i
+1; j
< reds
->num
; ++j
)
1384 const rule
*r2
= reds
->rules
[j
];
1385 // Conflicts: common lookaheads.
1386 bitset_intersection (lookaheads
,
1387 reds
->lookaheads
[i
],
1388 reds
->lookaheads
[j
]);
1389 if (!bitset_empty_p (lookaheads
))
1390 for (state_item_number c2
= state_item_map
[sn
]; c2
< state_item_map
[sn
+ 1]; ++c2
)
1391 if (!SI_DISABLED (c2
)
1392 && item_rule (state_items
[c2
].item
) == r2
)
1394 counterexample_report_reduce_reduce (c1
, c2
, lookaheads
, out
, prefix
);
1399 bitset_free (lookaheads
);