1 /* Output the generated parsing program for Bison.
3 Copyright (C) 1984, 1986, 1989, 1992, 2000-2006, 2009-2015, 2018-2021
4 Free Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
28 #include "conflicts.h"
33 #include "muscle-tab.h"
38 /* Several tables are indexed both by state and nonterminal numbers.
39 We call such an index a 'vector'; i.e., a vector is either a state
40 or a nonterminal number.
42 Of course vector_number_t ought to be wide enough to contain
43 state_number and symbol_number. */
44 typedef int vector_number
;
46 #if 0 /* Not currently used. */
47 static inline vector_number
48 state_number_to_vector_number (state_number s
)
54 static inline vector_number
55 symbol_number_to_vector_number (symbol_number sym
)
57 return state_number_as_int (nstates
) + sym
- ntokens
;
63 /* FROMS and TOS are indexed by vector_number.
65 If VECTOR is a nonterminal, (FROMS[VECTOR], TOS[VECTOR]) form an
66 array of state numbers of the non defaulted GOTO on VECTOR.
68 If VECTOR is a state, TOS[VECTOR] is the array of actions to do on
69 the (array of) symbols FROMS[VECTOR].
71 In both cases, TALLY[VECTOR] is the size of the arrays
72 FROMS[VECTOR], TOS[VECTOR]; and WIDTH[VECTOR] =
73 (FROMS[VECTOR][SIZE] - FROMS[VECTOR][0] + 1) where SIZE =
76 FROMS therefore contains symbol_number and action_number,
77 TOS state_number and action_number,
79 WIDTH differences of FROMS.
81 Let base_number be the type of FROMS, TOS, and WIDTH. */
82 #define BASE_MAXIMUM INT_MAX
83 #define BASE_MINIMUM INT_MIN
85 static base_number
**froms
;
86 static base_number
**tos
;
87 static int **conflict_tos
;
89 static base_number
*width
;
92 /* For a given state, N = ACTROW[SYMBOL]:
94 If N = 0, stands for 'run the default action'.
95 If N = MIN, stands for 'raise a syntax error'.
96 If N > 0, stands for 'shift SYMBOL and go to n'.
97 If N < 0, stands for 'reduce -N'. */
98 typedef int action_number
;
99 #define ACTION_NUMBER_MINIMUM INT_MIN
101 static action_number
*actrow
;
103 /* FROMS and TOS are reordered to be compressed. ORDER[VECTOR] is the
104 new vector number of VECTOR. We skip 'empty' vectors (i.e.,
105 TALLY[VECTOR] = 0), and call these 'entries'. */
106 static vector_number
*order
;
109 base_number
*base
= NULL
;
110 /* A distinguished value of BASE, negative infinite. During the
111 computation equals to BASE_MINIMUM, later mapped to BASE_NINF to
112 keep parser tables small. */
113 base_number base_ninf
= 0;
115 /* Bitset representing an integer set in the range
116 POS_SET_OFFSET..(POS_SET_OFFSET + SIZE). POS_SET_OFFSET is
118 static bitset pos_set
= NULL
;
119 /* The integer denoted by bitno 0 in pos_set. */
120 static int pos_set_base
= 0;
122 static int *conflrow
;
125 int conflict_list_cnt
;
126 static int conflict_list_free
;
128 /* TABLE_SIZE is the allocated size of both TABLE and CHECK. We start
129 with more or less the original hard-coded value (which was
131 static int table_size
= 32768;
134 /* The value used in TABLE to denote explicit syntax errors
135 (%nonassoc), a negative infinite. First defaults to ACTION_NUMBER_MINIMUM,
136 but in order to keep small tables, renumbered as TABLE_ERROR, which
137 is the smallest (non error) value minus 1. */
138 base_number table_ninf
= 0;
142 state_number
*yydefgoto
;
143 rule_number
*yydefact
;
154 fprintf (stderr
, "pos_set (%ld, %d) =", bitset_size (pos_set
), pos_set_base
);
155 bitset_iterator biter
;
157 BITSET_FOR_EACH (biter
, pos_set
, i
, 0)
158 fprintf (stderr
, " %d", i
+ pos_set_base
);
164 /* The size and base of POS_SET are not known, we need to be able to
165 move the base farther "on the left", and grow "on the right".
167 It would be nice to be able to predict the base accurately, but it
168 seems difficult (-nstates seems to work most of the time, except
169 when there are useless tokens).
171 FIXME: The current approach is correct, but with poor performances.
172 Bitsets need to support 'assign' and 'shift'. And instead of
173 extending POS_SET just for the out-of-range new values, we need
174 something like doubling the size.
178 pos_set_set (int pos
)
180 int bitno
= pos
- pos_set_base
;
183 const int delta
= pos_set_base
- pos
;
184 const int old_size
= bitset_size (pos_set
);
185 const int new_size
= old_size
+ delta
;
186 bitset_resize (pos_set
, new_size
);
187 // Shift all the bits by DELTA.
188 // FIXME: add bitset_assign, and bitset_shift?
189 for (int i
= new_size
- 1; delta
<= i
; --i
)
190 if (bitset_test (pos_set
, i
))
191 bitset_set (pos_set
, i
+ delta
);
193 bitset_reset (pos_set
, i
+ delta
);
197 else if (bitset_size (pos_set
) <= bitno
)
198 bitset_resize (pos_set
, bitno
+ 1);
199 bitset_set (pos_set
, bitno
);
203 pos_set_test (int pos
)
205 const int bitno
= pos
- pos_set_base
;
206 return bitset_test (pos_set
, bitno
);
210 /*-------------------------------------------------------------------.
211 | If TABLE, CONFLICT_TABLE, and CHECK are too small to be addressed |
212 | at DESIRED, grow them. TABLE[DESIRED] can be used, so the desired |
213 | size is at least DESIRED + 1. |
214 `-------------------------------------------------------------------*/
217 table_grow (int desired
)
219 int old_size
= table_size
;
221 while (table_size
<= desired
)
224 if (trace_flag
& trace_resource
)
225 fprintf (stderr
, "growing tables from %d to %d\n",
226 old_size
, table_size
);
228 table
= xnrealloc (table
, table_size
, sizeof *table
);
229 memset (table
+ old_size
, 0,
230 sizeof *table
* (table_size
- old_size
));
232 conflict_table
= xnrealloc (conflict_table
, table_size
,
233 sizeof *conflict_table
);
234 memset (conflict_table
+ old_size
, 0,
235 sizeof *conflict_table
* (table_size
- old_size
));
237 check
= xnrealloc (check
, table_size
, sizeof *check
);
238 for (int i
= old_size
; i
< table_size
; ++i
)
245 /*-------------------------------------------------------------------.
246 | For GLR parsers, for each conflicted token in S, as indicated |
247 | by non-zero entries in CONFLROW, create a list of possible |
248 | reductions that are alternatives to the shift or reduction |
249 | currently recorded for that token in S. Store the alternative |
250 | reductions followed by a 0 in CONFLICT_LIST, updating |
251 | CONFLICT_LIST_CNT, and storing an index to the start of the list |
252 | back into CONFLROW. |
253 `-------------------------------------------------------------------*/
256 conflict_row (state
*s
)
258 if (!nondeterministic_parser
)
261 const reductions
*reds
= s
->reductions
;
262 for (state_number j
= 0; j
< ntokens
; j
+= 1)
265 conflrow
[j
] = conflict_list_cnt
;
267 /* Find all reductions for token J, and record all that do not
269 for (int i
= 0; i
< reds
->num
; i
+= 1)
270 if (bitset_test (reds
->lookaheads
[i
], j
)
272 != rule_number_as_item_number (reds
->rules
[i
]->number
)))
274 aver (0 < conflict_list_free
);
275 conflict_list
[conflict_list_cnt
] = reds
->rules
[i
]->number
+ 1;
276 conflict_list_cnt
+= 1;
277 conflict_list_free
-= 1;
280 /* Leave a 0 at the end. */
281 aver (0 < conflict_list_free
);
282 conflict_list
[conflict_list_cnt
] = 0;
283 conflict_list_cnt
+= 1;
284 conflict_list_free
-= 1;
289 /*------------------------------------------------------------------.
290 | Decide what to do for each type of token if seen as the |
291 | lookahead in specified state. The value returned is used as the |
292 | default action (yydefact) for the state. In addition, ACTROW is |
293 | filled with what to do for each kind of token, index by symbol |
294 | number, with zero meaning do the default action. The value |
295 | ACTION_NUMBER_MINIMUM, a very negative number, means this |
296 | situation is an error. The parser recognizes this value |
299 | This is where conflicts are resolved. The loop over lookahead |
300 | rules considered lower-numbered rules last, and the last rule |
301 | considered that likes a token gets to handle it. |
303 | For GLR parsers, also sets CONFLROW[SYM] to an index into |
304 | CONFLICT_LIST iff there is an unresolved conflict (s/r or r/r) |
305 | with symbol SYM. The default reduction is not used for a symbol |
306 | that has any such conflicts. |
307 `------------------------------------------------------------------*/
310 action_row (state
*s
)
312 for (state_number i
= 0; i
< ntokens
; i
++)
313 actrow
[i
] = conflrow
[i
] = 0;
315 reductions
*reds
= s
->reductions
;
316 bool conflicted
= false;
317 if (reds
->lookaheads
)
318 /* loop over all the rules available here which require
319 lookahead (in reverse order to give precedence to the first
321 for (int i
= reds
->num
- 1; 0 <= i
; --i
)
322 /* and find each token which the rule finds acceptable
325 bitset_iterator biter
;
327 BITSET_FOR_EACH (biter
, reds
->lookaheads
[i
], j
, 0)
329 /* and record this rule as the rule to use if that
336 actrow
[j
] = rule_number_as_item_number (reds
->rules
[i
]->number
);
340 /* Now see which tokens are allowed for shifts in this state. For
341 them, record the shift as the thing to do. So shift is preferred
343 transitions
*trans
= s
->transitions
;
344 /* Set to nonzero to inhibit having any default reduction. */
345 bool nodefault
= false;
348 FOR_EACH_SHIFT (trans
, i
)
350 symbol_number sym
= TRANSITION_SYMBOL (trans
, i
);
351 state
*shift_state
= trans
->states
[i
];
353 if (actrow
[sym
] != 0)
358 actrow
[sym
] = state_number_as_int (shift_state
->number
);
360 /* Do not use any default reduction if there is a shift for
362 if (sym
== errtoken
->content
->number
)
367 /* See which tokens are an explicit error in this state (due to
368 %nonassoc). For them, record ACTION_NUMBER_MINIMUM as the
370 errs
*errp
= s
->errs
;
371 for (int i
= 0; i
< errp
->num
; i
++)
373 symbol
*sym
= errp
->symbols
[i
];
374 actrow
[sym
->content
->number
] = ACTION_NUMBER_MINIMUM
;
377 /* Turn off default reductions where requested by the user. See
378 state_lookaheads_count in lalr.c to understand when states are
379 labeled as consistent. */
381 char *default_reductions
=
382 muscle_percent_define_get ("lr.default-reduction");
383 if (STRNEQ (default_reductions
, "most") && !s
->consistent
)
385 free (default_reductions
);
388 /* Now find the most common reduction and make it the default action
390 rule
*default_reduction
= NULL
;
391 if (reds
->num
>= 1 && !nodefault
)
394 default_reduction
= reds
->rules
[0];
398 for (int i
= 0; i
< reds
->num
; i
++)
401 rule
*r
= reds
->rules
[i
];
402 for (symbol_number j
= 0; j
< ntokens
; j
++)
403 if (actrow
[j
] == rule_number_as_item_number (r
->number
))
409 default_reduction
= r
;
413 /* GLR parsers need space for conflict lists, so we can't
414 default conflicted entries. For non-conflicted entries
415 or as long as we are not building a GLR parser,
416 actions that match the default are replaced with zero,
417 which means "use the default". */
420 for (symbol_number j
= 0; j
< ntokens
; j
++)
422 == rule_number_as_item_number (default_reduction
->number
)
423 && ! (nondeterministic_parser
&& conflrow
[j
]))
428 /* If have no default reduction, the default is an error.
429 So replace any action which says "error" with "use default". */
431 if (!default_reduction
)
432 for (symbol_number i
= 0; i
< ntokens
; i
++)
433 if (actrow
[i
] == ACTION_NUMBER_MINIMUM
)
439 return default_reduction
;
443 /*----------------------------------------.
444 | Set FROMS, TOS, TALLY and WIDTH for S. |
445 `----------------------------------------*/
448 save_row (state_number s
)
450 /* Number of non default actions in S. */
452 for (symbol_number i
= 0; i
< ntokens
; i
++)
458 /* Allocate non defaulted actions. */
459 base_number
*sp1
= froms
[s
] = xnmalloc (count
, sizeof *sp1
);
460 base_number
*sp2
= tos
[s
] = xnmalloc (count
, sizeof *sp2
);
461 int *sp3
= conflict_tos
[s
] =
462 nondeterministic_parser
? xnmalloc (count
, sizeof *sp3
) : NULL
;
464 /* Store non defaulted actions. */
465 for (symbol_number i
= 0; i
< ntokens
; i
++)
470 if (nondeterministic_parser
)
471 *sp3
++ = conflrow
[i
];
475 width
[s
] = sp1
[-1] - froms
[s
][0] + 1;
480 /*------------------------------------------------------------------.
481 | Figure out the actions for the specified state, indexed by |
482 | lookahead token kind. |
484 | The YYDEFACT table is output now. The detailed info is saved for |
485 | putting into YYTABLE later. |
486 `------------------------------------------------------------------*/
491 int nconflict
= nondeterministic_parser
? conflicts_total_count () : 0;
493 yydefact
= xnmalloc (nstates
, sizeof *yydefact
);
495 actrow
= xnmalloc (ntokens
, sizeof *actrow
);
496 conflrow
= xnmalloc (ntokens
, sizeof *conflrow
);
498 conflict_list
= xnmalloc (1 + 2 * nconflict
, sizeof *conflict_list
);
499 conflict_list_free
= 2 * nconflict
;
500 conflict_list_cnt
= 1;
502 /* Find the rules which are reduced. */
503 if (!nondeterministic_parser
)
504 for (rule_number r
= 0; r
< nrules
; ++r
)
505 rules
[r
].useful
= false;
507 for (state_number i
= 0; i
< nstates
; ++i
)
509 rule
*default_reduction
= action_row (states
[i
]);
510 yydefact
[i
] = default_reduction
? default_reduction
->number
+ 1 : 0;
513 /* Now that the parser was computed, we can find which rules are
514 really reduced, and which are not because of SR or RR
516 if (!nondeterministic_parser
)
518 for (symbol_number j
= 0; j
< ntokens
; ++j
)
519 if (actrow
[j
] < 0 && actrow
[j
] != ACTION_NUMBER_MINIMUM
)
520 rules
[item_number_as_rule_number (actrow
[j
])].useful
= true;
522 rules
[yydefact
[i
] - 1].useful
= true;
530 /*------------------------------------------------------------------.
531 | Compute FROMS[VECTOR], TOS[VECTOR], TALLY[VECTOR], WIDTH[VECTOR], |
532 | i.e., the information related to non defaulted GOTO on the nterm |
535 | DEFAULT_STATE is the principal destination on SYM, i.e., the |
536 | default GOTO destination on SYM. |
537 `------------------------------------------------------------------*/
540 save_column (symbol_number sym
, state_number default_state
)
542 const goto_number begin
= goto_map
[sym
- ntokens
];
543 const goto_number end
= goto_map
[sym
- ntokens
+ 1];
545 /* Number of non default GOTO. */
547 for (goto_number i
= begin
; i
< end
; i
++)
548 if (to_state
[i
] != default_state
)
553 /* Allocate room for non defaulted gotos. */
554 vector_number symno
= symbol_number_to_vector_number (sym
);
555 base_number
*sp1
= froms
[symno
] = xnmalloc (count
, sizeof *sp1
);
556 base_number
*sp2
= tos
[symno
] = xnmalloc (count
, sizeof *sp2
);
558 /* Store the state numbers of the non defaulted gotos. */
559 for (goto_number i
= begin
; i
< end
; i
++)
560 if (to_state
[i
] != default_state
)
562 *sp1
++ = from_state
[i
];
563 *sp2
++ = to_state
[i
];
566 tally
[symno
] = count
;
567 width
[symno
] = sp1
[-1] - froms
[symno
][0] + 1;
572 /*----------------------------------------------------------------.
573 | The default state for SYM: the state which is 'the' most common |
574 | GOTO destination on SYM (an nterm). |
575 `----------------------------------------------------------------*/
578 default_goto (symbol_number sym
, size_t state_count
[])
580 const goto_number begin
= goto_map
[sym
- ntokens
];
581 const goto_number end
= goto_map
[sym
- ntokens
+ 1];
583 /* In the case this symbol is never reduced to ($accept), use state
584 0. We used to use -1, but as a result the yydefgoto table must
585 be signed, which (1) might trigger compiler warnings when storing
586 a value from yydefgoto into a state number (nonnegative), and (2)
587 wastes bits which might result in using a int16 where a uint8
589 state_number res
= 0;
593 for (state_number s
= 0; s
< nstates
; s
++)
596 for (goto_number i
= begin
; i
< end
; i
++)
597 state_count
[to_state
[i
]]++;
600 for (state_number s
= 0; s
< nstates
; s
++)
601 if (max
< state_count
[s
])
603 max
= state_count
[s
];
611 /*-------------------------------------------------------------------.
612 | Figure out what to do after reducing with each rule, depending on |
613 | the saved state from before the beginning of parsing the data that |
614 | matched this rule. |
616 | The YYDEFGOTO table is output now. The detailed info is saved for |
617 | putting into YYTABLE later. |
618 `-------------------------------------------------------------------*/
623 size_t *state_count
= xnmalloc (nstates
, sizeof *state_count
);
624 yydefgoto
= xnmalloc (nnterms
, sizeof *yydefgoto
);
626 /* For a given nterm I, STATE_COUNT[S] is the number of times there
627 is a GOTO to S on I. */
628 for (symbol_number i
= ntokens
; i
< nsyms
; ++i
)
630 state_number default_state
= default_goto (i
, state_count
);
631 save_column (i
, default_state
);
632 yydefgoto
[i
- ntokens
] = default_state
;
638 /*------------------------------------------------------------------.
639 | Compute ORDER, a reordering of vectors, in order to decide how to |
640 | pack the actions and gotos information into yytable. |
641 `------------------------------------------------------------------*/
648 for (int i
= 0; i
< nvectors
; i
++)
651 const size_t t
= tally
[i
];
652 const int w
= width
[i
];
653 int j
= nentries
- 1;
655 while (0 <= j
&& width
[order
[j
]] < w
)
658 while (0 <= j
&& width
[order
[j
]] == w
&& tally
[order
[j
]] < t
)
661 for (int k
= nentries
- 1; k
> j
; k
--)
662 order
[k
+ 1] = order
[k
];
670 /* If VECTOR is a state whose actions (reflected by FROMS, TOS, TALLY
671 and WIDTH of VECTOR) are common to a previous state, return this
674 In any other case, return -1. */
677 matching_state (vector_number vector
)
679 vector_number i
= order
[vector
];
680 /* If VECTOR is a nterm, return -1. */
686 /* If VECTOR has GLR conflicts, return -1 */
687 if (conflict_tos
[i
] != NULL
)
688 for (int j
= 0; j
< t
; j
+= 1)
689 if (conflict_tos
[i
][j
] != 0)
692 for (int prev
= vector
- 1; 0 <= prev
; prev
--)
694 vector_number j
= order
[prev
];
695 /* Given how ORDER was computed, if the WIDTH or TALLY is
696 different, there cannot be a matching state. */
697 if (width
[j
] != w
|| tally
[j
] != t
)
702 for (int k
= 0; match
&& k
< t
; k
++)
703 if (tos
[j
][k
] != tos
[i
][k
]
704 || froms
[j
][k
] != froms
[i
][k
]
705 || (conflict_tos
[j
] != NULL
&& conflict_tos
[j
][k
] != 0))
717 pack_vector (vector_number vector
)
719 vector_number i
= order
[vector
];
721 base_number
*from
= froms
[i
];
722 base_number
*to
= tos
[i
];
723 int *conflict_to
= conflict_tos
[i
];
727 for (base_number res
= lowzero
- from
[0]; ; res
++)
730 aver (res
< table_size
);
732 for (int k
= 0; ok
&& k
< t
; k
++)
734 int loc
= res
+ state_number_as_int (from
[k
]);
735 if (table_size
<= loc
)
742 if (ok
&& pos_set_test (res
))
748 int loc
PACIFY_CC (= -1);
749 for (int k
= 0; k
< t
; k
++)
751 loc
= res
+ state_number_as_int (from
[k
]);
753 if (nondeterministic_parser
&& conflict_to
!= NULL
)
754 conflict_table
[loc
] = conflict_to
[k
];
755 check
[loc
] = from
[k
];
758 while (table
[lowzero
] != 0)
764 aver (BASE_MINIMUM
<= res
&& res
<= BASE_MAXIMUM
);
771 /*-------------------------------------------------------------.
772 | Remap the negative infinite in TAB from NINF to the greatest |
773 | possible smallest value. Return it. |
775 | In most case this allows us to use shorts instead of ints in |
777 `-------------------------------------------------------------*/
780 table_ninf_remap (base_number tab
[], int size
, base_number ninf
)
784 for (int i
= 0; i
< size
; i
++)
785 if (tab
[i
] < res
&& tab
[i
] != ninf
)
790 for (int i
= 0; i
< size
; i
++)
800 base
= xnmalloc (nvectors
, sizeof *base
);
801 pos_set
= bitset_create (table_size
+ nstates
, BITSET_FRUGAL
);
802 pos_set_base
= -nstates
;
803 table
= xcalloc (table_size
, sizeof *table
);
804 conflict_table
= xcalloc (table_size
, sizeof *conflict_table
);
805 check
= xnmalloc (table_size
, sizeof *check
);
810 for (int i
= 0; i
< nvectors
; i
++)
811 base
[i
] = BASE_MINIMUM
;
813 for (int i
= 0; i
< table_size
; i
++)
816 for (int i
= 0; i
< nentries
; i
++)
818 state_number s
= matching_state (i
);
822 /* A new set of state actions, or a nonterminal. */
823 place
= pack_vector (i
);
825 /* Action of I were already coded for S. */
829 base
[order
[i
]] = place
;
832 /* Use the greatest possible negative infinites. */
833 base_ninf
= table_ninf_remap (base
, nvectors
, BASE_MINIMUM
);
834 table_ninf
= table_ninf_remap (table
, high
+ 1, ACTION_NUMBER_MINIMUM
);
836 bitset_free (pos_set
);
841 /*-----------------------------------------------------------------.
842 | Compute and output yydefact, yydefgoto, yypact, yypgoto, yytable |
844 `-----------------------------------------------------------------*/
847 tables_generate (void)
849 /* This is a poor way to make sure the sizes are properly
850 correlated. In particular the signedness is not taken into
851 account. But it's not useless. */
852 verify (sizeof nstates
<= sizeof nvectors
);
853 verify (sizeof nnterms
<= sizeof nvectors
);
855 nvectors
= state_number_as_int (nstates
) + nnterms
;
857 froms
= xcalloc (nvectors
, sizeof *froms
);
858 tos
= xcalloc (nvectors
, sizeof *tos
);
859 conflict_tos
= xcalloc (nvectors
, sizeof *conflict_tos
);
860 tally
= xcalloc (nvectors
, sizeof *tally
);
861 width
= xnmalloc (nvectors
, sizeof *width
);
870 order
= xcalloc (nvectors
, sizeof *order
);
878 for (int i
= 0; i
< nvectors
; i
++)
882 free (conflict_tos
[i
]);
891 /*-------------------------.
892 | Free the parser tables. |
893 `-------------------------*/
899 free (conflict_table
);
900 free (conflict_list
);