1 /* Output the generated parsing program for Bison.
3 Copyright (C) 1984, 1986, 1989, 1992, 2000-2006, 2009-2015, 2018-2020
4 Free Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
28 #include "conflicts.h"
33 #include "muscle-tab.h"
38 /* Several tables are indexed both by state and nonterminal numbers.
39 We call such an index a 'vector'; i.e., a vector is either a state
40 or a nonterminal number.
42 Of course vector_number_t ought to be wide enough to contain
43 state_number and symbol_number. */
44 typedef int vector_number
;
46 #if 0 /* Not currently used. */
47 static inline vector_number
48 state_number_to_vector_number (state_number s
)
54 static inline vector_number
55 symbol_number_to_vector_number (symbol_number sym
)
57 return state_number_as_int (nstates
) + sym
- ntokens
;
63 /* FROMS and TOS are indexed by vector_number.
65 If VECTOR is a nonterminal, (FROMS[VECTOR], TOS[VECTOR]) form an
66 array of state numbers of the non defaulted GOTO on VECTOR.
68 If VECTOR is a state, TOS[VECTOR] is the array of actions to do on
69 the (array of) symbols FROMS[VECTOR].
71 In both cases, TALLY[VECTOR] is the size of the arrays
72 FROMS[VECTOR], TOS[VECTOR]; and WIDTH[VECTOR] =
73 (FROMS[VECTOR][SIZE] - FROMS[VECTOR][0] + 1) where SIZE =
76 FROMS therefore contains symbol_number and action_number,
77 TOS state_number and action_number,
79 WIDTH differences of FROMS.
81 Let base_number be the type of FROMS, TOS, and WIDTH. */
82 #define BASE_MAXIMUM INT_MAX
83 #define BASE_MINIMUM INT_MIN
85 static base_number
**froms
;
86 static base_number
**tos
;
87 static int **conflict_tos
;
89 static base_number
*width
;
92 /* For a given state, N = ACTROW[SYMBOL]:
94 If N = 0, stands for 'run the default action'.
95 If N = MIN, stands for 'raise a syntax error'.
96 If N > 0, stands for 'shift SYMBOL and go to n'.
97 If N < 0, stands for 'reduce -N'. */
98 typedef int action_number
;
99 #define ACTION_NUMBER_MINIMUM INT_MIN
101 static action_number
*actrow
;
103 /* FROMS and TOS are reordered to be compressed. ORDER[VECTOR] is the
104 new vector number of VECTOR. We skip 'empty' vectors (i.e.,
105 TALLY[VECTOR] = 0), and call these 'entries'. */
106 static vector_number
*order
;
109 base_number
*base
= NULL
;
110 /* A distinguished value of BASE, negative infinite. During the
111 computation equals to BASE_MINIMUM, later mapped to BASE_NINF to
112 keep parser tables small. */
113 base_number base_ninf
= 0;
114 /* Bitset representing an integer set in the range
115 -nstates..table_size (as an upper bound) */
116 static bitset pos_set
= NULL
;
118 static int *conflrow
;
121 int conflict_list_cnt
;
122 static int conflict_list_free
;
124 /* TABLE_SIZE is the allocated size of both TABLE and CHECK. We start
125 with more or less the original hard-coded value (which was
127 static int table_size
= 32768;
130 /* The value used in TABLE to denote explicit syntax errors
131 (%nonassoc), a negative infinite. First defaults to ACTION_NUMBER_MINIMUM,
132 but in order to keep small tables, renumbered as TABLE_ERROR, which
133 is the smallest (non error) value minus 1. */
134 base_number table_ninf
= 0;
138 state_number
*yydefgoto
;
139 rule_number
*yydefact
;
141 /*-------------------------------------------------------------------.
142 | If TABLE, CONFLICT_TABLE, and CHECK are too small to be addressed |
143 | at DESIRED, grow them. TABLE[DESIRED] can be used, so the desired |
144 | size is at least DESIRED + 1. |
145 `-------------------------------------------------------------------*/
148 table_grow (int desired
)
150 int old_size
= table_size
;
152 while (table_size
<= desired
)
155 if (trace_flag
& trace_resource
)
156 fprintf (stderr
, "growing tables from %d to %d\n",
157 old_size
, table_size
);
159 table
= xnrealloc (table
, table_size
, sizeof *table
);
160 memset (table
+ old_size
, 0,
161 sizeof *table
* (table_size
- old_size
));
163 conflict_table
= xnrealloc (conflict_table
, table_size
,
164 sizeof *conflict_table
);
165 memset (conflict_table
+ old_size
, 0,
166 sizeof *conflict_table
* (table_size
- old_size
));
168 check
= xnrealloc (check
, table_size
, sizeof *check
);
169 for (int i
= old_size
; i
< table_size
; ++i
)
172 bitset_resize (pos_set
, table_size
+ nstates
);
178 /*-------------------------------------------------------------------.
179 | For GLR parsers, for each conflicted token in S, as indicated |
180 | by non-zero entries in CONFLROW, create a list of possible |
181 | reductions that are alternatives to the shift or reduction |
182 | currently recorded for that token in S. Store the alternative |
183 | reductions followed by a 0 in CONFLICT_LIST, updating |
184 | CONFLICT_LIST_CNT, and storing an index to the start of the list |
185 | back into CONFLROW. |
186 `-------------------------------------------------------------------*/
189 conflict_row (state
*s
)
191 if (!nondeterministic_parser
)
194 const reductions
*reds
= s
->reductions
;
195 for (state_number j
= 0; j
< ntokens
; j
+= 1)
198 conflrow
[j
] = conflict_list_cnt
;
200 /* Find all reductions for token J, and record all that do not
202 for (int i
= 0; i
< reds
->num
; i
+= 1)
203 if (bitset_test (reds
->lookaheads
[i
], j
)
205 != rule_number_as_item_number (reds
->rules
[i
]->number
)))
207 aver (0 < conflict_list_free
);
208 conflict_list
[conflict_list_cnt
] = reds
->rules
[i
]->number
+ 1;
209 conflict_list_cnt
+= 1;
210 conflict_list_free
-= 1;
213 /* Leave a 0 at the end. */
214 aver (0 < conflict_list_free
);
215 conflict_list
[conflict_list_cnt
] = 0;
216 conflict_list_cnt
+= 1;
217 conflict_list_free
-= 1;
222 /*------------------------------------------------------------------.
223 | Decide what to do for each type of token if seen as the |
224 | lookahead in specified state. The value returned is used as the |
225 | default action (yydefact) for the state. In addition, ACTROW is |
226 | filled with what to do for each kind of token, index by symbol |
227 | number, with zero meaning do the default action. The value |
228 | ACTION_NUMBER_MINIMUM, a very negative number, means this |
229 | situation is an error. The parser recognizes this value |
232 | This is where conflicts are resolved. The loop over lookahead |
233 | rules considered lower-numbered rules last, and the last rule |
234 | considered that likes a token gets to handle it. |
236 | For GLR parsers, also sets CONFLROW[SYM] to an index into |
237 | CONFLICT_LIST iff there is an unresolved conflict (s/r or r/r) |
238 | with symbol SYM. The default reduction is not used for a symbol |
239 | that has any such conflicts. |
240 `------------------------------------------------------------------*/
243 action_row (state
*s
)
245 for (state_number i
= 0; i
< ntokens
; i
++)
246 actrow
[i
] = conflrow
[i
] = 0;
248 reductions
*reds
= s
->reductions
;
249 bool conflicted
= false;
250 if (reds
->lookaheads
)
251 /* loop over all the rules available here which require
252 lookahead (in reverse order to give precedence to the first
254 for (int i
= reds
->num
- 1; 0 <= i
; --i
)
255 /* and find each token which the rule finds acceptable
258 bitset_iterator biter
;
260 BITSET_FOR_EACH (biter
, reds
->lookaheads
[i
], j
, 0)
262 /* and record this rule as the rule to use if that
269 actrow
[j
] = rule_number_as_item_number (reds
->rules
[i
]->number
);
273 /* Now see which tokens are allowed for shifts in this state. For
274 them, record the shift as the thing to do. So shift is preferred
276 transitions
*trans
= s
->transitions
;
277 /* Set to nonzero to inhibit having any default reduction. */
278 bool nodefault
= false;
281 FOR_EACH_SHIFT (trans
, i
)
283 symbol_number sym
= TRANSITION_SYMBOL (trans
, i
);
284 state
*shift_state
= trans
->states
[i
];
286 if (actrow
[sym
] != 0)
291 actrow
[sym
] = state_number_as_int (shift_state
->number
);
293 /* Do not use any default reduction if there is a shift for
295 if (sym
== errtoken
->content
->number
)
300 /* See which tokens are an explicit error in this state (due to
301 %nonassoc). For them, record ACTION_NUMBER_MINIMUM as the
303 errs
*errp
= s
->errs
;
304 for (int i
= 0; i
< errp
->num
; i
++)
306 symbol
*sym
= errp
->symbols
[i
];
307 actrow
[sym
->content
->number
] = ACTION_NUMBER_MINIMUM
;
310 /* Turn off default reductions where requested by the user. See
311 state_lookaheads_count in lalr.c to understand when states are
312 labeled as consistent. */
314 char *default_reductions
=
315 muscle_percent_define_get ("lr.default-reduction");
316 if (STRNEQ (default_reductions
, "most") && !s
->consistent
)
318 free (default_reductions
);
321 /* Now find the most common reduction and make it the default action
323 rule
*default_reduction
= NULL
;
324 if (reds
->num
>= 1 && !nodefault
)
327 default_reduction
= reds
->rules
[0];
331 for (int i
= 0; i
< reds
->num
; i
++)
334 rule
*r
= reds
->rules
[i
];
335 for (symbol_number j
= 0; j
< ntokens
; j
++)
336 if (actrow
[j
] == rule_number_as_item_number (r
->number
))
342 default_reduction
= r
;
346 /* GLR parsers need space for conflict lists, so we can't
347 default conflicted entries. For non-conflicted entries
348 or as long as we are not building a GLR parser,
349 actions that match the default are replaced with zero,
350 which means "use the default". */
353 for (symbol_number j
= 0; j
< ntokens
; j
++)
355 == rule_number_as_item_number (default_reduction
->number
)
356 && ! (nondeterministic_parser
&& conflrow
[j
]))
361 /* If have no default reduction, the default is an error.
362 So replace any action which says "error" with "use default". */
364 if (!default_reduction
)
365 for (symbol_number i
= 0; i
< ntokens
; i
++)
366 if (actrow
[i
] == ACTION_NUMBER_MINIMUM
)
372 return default_reduction
;
376 /*----------------------------------------.
377 | Set FROMS, TOS, TALLY and WIDTH for S. |
378 `----------------------------------------*/
381 save_row (state_number s
)
383 /* Number of non default actions in S. */
385 for (symbol_number i
= 0; i
< ntokens
; i
++)
391 /* Allocate non defaulted actions. */
392 base_number
*sp1
= froms
[s
] = xnmalloc (count
, sizeof *sp1
);
393 base_number
*sp2
= tos
[s
] = xnmalloc (count
, sizeof *sp2
);
394 int *sp3
= conflict_tos
[s
] =
395 nondeterministic_parser
? xnmalloc (count
, sizeof *sp3
) : NULL
;
397 /* Store non defaulted actions. */
398 for (symbol_number i
= 0; i
< ntokens
; i
++)
403 if (nondeterministic_parser
)
404 *sp3
++ = conflrow
[i
];
408 width
[s
] = sp1
[-1] - froms
[s
][0] + 1;
413 /*------------------------------------------------------------------.
414 | Figure out the actions for the specified state, indexed by |
415 | lookahead token kind. |
417 | The YYDEFACT table is output now. The detailed info is saved for |
418 | putting into YYTABLE later. |
419 `------------------------------------------------------------------*/
424 int nconflict
= nondeterministic_parser
? conflicts_total_count () : 0;
426 yydefact
= xnmalloc (nstates
, sizeof *yydefact
);
428 actrow
= xnmalloc (ntokens
, sizeof *actrow
);
429 conflrow
= xnmalloc (ntokens
, sizeof *conflrow
);
431 conflict_list
= xnmalloc (1 + 2 * nconflict
, sizeof *conflict_list
);
432 conflict_list_free
= 2 * nconflict
;
433 conflict_list_cnt
= 1;
435 /* Find the rules which are reduced. */
436 if (!nondeterministic_parser
)
437 for (rule_number r
= 0; r
< nrules
; ++r
)
438 rules
[r
].useful
= false;
440 for (state_number i
= 0; i
< nstates
; ++i
)
442 rule
*default_reduction
= action_row (states
[i
]);
443 yydefact
[i
] = default_reduction
? default_reduction
->number
+ 1 : 0;
446 /* Now that the parser was computed, we can find which rules are
447 really reduced, and which are not because of SR or RR
449 if (!nondeterministic_parser
)
451 for (symbol_number j
= 0; j
< ntokens
; ++j
)
452 if (actrow
[j
] < 0 && actrow
[j
] != ACTION_NUMBER_MINIMUM
)
453 rules
[item_number_as_rule_number (actrow
[j
])].useful
= true;
455 rules
[yydefact
[i
] - 1].useful
= true;
463 /*------------------------------------------------------------------.
464 | Compute FROMS[VECTOR], TOS[VECTOR], TALLY[VECTOR], WIDTH[VECTOR], |
465 | i.e., the information related to non defaulted GOTO on the nterm |
468 | DEFAULT_STATE is the principal destination on SYM, i.e., the |
469 | default GOTO destination on SYM. |
470 `------------------------------------------------------------------*/
473 save_column (symbol_number sym
, state_number default_state
)
475 const goto_number begin
= goto_map
[sym
- ntokens
];
476 const goto_number end
= goto_map
[sym
- ntokens
+ 1];
478 /* Number of non default GOTO. */
480 for (goto_number i
= begin
; i
< end
; i
++)
481 if (to_state
[i
] != default_state
)
486 /* Allocate room for non defaulted gotos. */
487 vector_number symno
= symbol_number_to_vector_number (sym
);
488 base_number
*sp1
= froms
[symno
] = xnmalloc (count
, sizeof *sp1
);
489 base_number
*sp2
= tos
[symno
] = xnmalloc (count
, sizeof *sp2
);
491 /* Store the state numbers of the non defaulted gotos. */
492 for (goto_number i
= begin
; i
< end
; i
++)
493 if (to_state
[i
] != default_state
)
495 *sp1
++ = from_state
[i
];
496 *sp2
++ = to_state
[i
];
499 tally
[symno
] = count
;
500 width
[symno
] = sp1
[-1] - froms
[symno
][0] + 1;
505 /*----------------------------------------------------------------.
506 | The default state for SYM: the state which is 'the' most common |
507 | GOTO destination on SYM (an nterm). |
508 `----------------------------------------------------------------*/
511 default_goto (symbol_number sym
, size_t state_count
[])
513 const goto_number begin
= goto_map
[sym
- ntokens
];
514 const goto_number end
= goto_map
[sym
- ntokens
+ 1];
516 /* In the case this symbol is never reduced to ($accept), use state
517 0. We used to use -1, but as a result the yydefgoto table must
518 be signed, which (1) might trigger compiler warnings when storing
519 a value from yydefgoto into a state number (nonnegative), and (2)
520 wastes bits which might result in using a int16 where a uint8
522 state_number res
= 0;
526 for (state_number s
= 0; s
< nstates
; s
++)
529 for (goto_number i
= begin
; i
< end
; i
++)
530 state_count
[to_state
[i
]]++;
533 for (state_number s
= 0; s
< nstates
; s
++)
534 if (max
< state_count
[s
])
536 max
= state_count
[s
];
544 /*-------------------------------------------------------------------.
545 | Figure out what to do after reducing with each rule, depending on |
546 | the saved state from before the beginning of parsing the data that |
547 | matched this rule. |
549 | The YYDEFGOTO table is output now. The detailed info is saved for |
550 | putting into YYTABLE later. |
551 `-------------------------------------------------------------------*/
556 size_t *state_count
= xnmalloc (nstates
, sizeof *state_count
);
557 yydefgoto
= xnmalloc (nnterms
, sizeof *yydefgoto
);
559 /* For a given nterm I, STATE_COUNT[S] is the number of times there
560 is a GOTO to S on I. */
561 for (symbol_number i
= ntokens
; i
< nsyms
; ++i
)
563 state_number default_state
= default_goto (i
, state_count
);
564 save_column (i
, default_state
);
565 yydefgoto
[i
- ntokens
] = default_state
;
571 /*------------------------------------------------------------------.
572 | Compute ORDER, a reordering of vectors, in order to decide how to |
573 | pack the actions and gotos information into yytable. |
574 `------------------------------------------------------------------*/
581 for (int i
= 0; i
< nvectors
; i
++)
584 const size_t t
= tally
[i
];
585 const int w
= width
[i
];
586 int j
= nentries
- 1;
588 while (0 <= j
&& width
[order
[j
]] < w
)
591 while (0 <= j
&& width
[order
[j
]] == w
&& tally
[order
[j
]] < t
)
594 for (int k
= nentries
- 1; k
> j
; k
--)
595 order
[k
+ 1] = order
[k
];
603 /* If VECTOR is a state whose actions (reflected by FROMS, TOS, TALLY
604 and WIDTH of VECTOR) are common to a previous state, return this
607 In any other case, return -1. */
610 matching_state (vector_number vector
)
612 vector_number i
= order
[vector
];
613 /* If VECTOR is a nterm, return -1. */
619 /* If VECTOR has GLR conflicts, return -1 */
620 if (conflict_tos
[i
] != NULL
)
621 for (int j
= 0; j
< t
; j
+= 1)
622 if (conflict_tos
[i
][j
] != 0)
625 for (int prev
= vector
- 1; 0 <= prev
; prev
--)
627 vector_number j
= order
[prev
];
628 /* Given how ORDER was computed, if the WIDTH or TALLY is
629 different, there cannot be a matching state. */
630 if (width
[j
] != w
|| tally
[j
] != t
)
635 for (int k
= 0; match
&& k
< t
; k
++)
636 if (tos
[j
][k
] != tos
[i
][k
]
637 || froms
[j
][k
] != froms
[i
][k
]
638 || (conflict_tos
[j
] != NULL
&& conflict_tos
[j
][k
] != 0))
650 pack_vector (vector_number vector
)
652 vector_number i
= order
[vector
];
654 base_number
*from
= froms
[i
];
655 base_number
*to
= tos
[i
];
656 int *conflict_to
= conflict_tos
[i
];
660 for (base_number res
= lowzero
- from
[0]; ; res
++)
663 aver (res
< table_size
);
665 for (int k
= 0; ok
&& k
< t
; k
++)
667 int loc
= res
+ state_number_as_int (from
[k
]);
668 if (table_size
<= loc
)
675 if (ok
&& bitset_test (pos_set
, nstates
+ res
))
681 int loc
PACIFY_CC (= -1);
682 for (int k
= 0; k
< t
; k
++)
684 loc
= res
+ state_number_as_int (from
[k
]);
686 if (nondeterministic_parser
&& conflict_to
!= NULL
)
687 conflict_table
[loc
] = conflict_to
[k
];
688 check
[loc
] = from
[k
];
691 while (table
[lowzero
] != 0)
697 aver (BASE_MINIMUM
<= res
&& res
<= BASE_MAXIMUM
);
704 /*-------------------------------------------------------------.
705 | Remap the negative infinite in TAB from NINF to the greatest |
706 | possible smallest value. Return it. |
708 | In most case this allows us to use shorts instead of ints in |
710 `-------------------------------------------------------------*/
713 table_ninf_remap (base_number tab
[], int size
, base_number ninf
)
717 for (int i
= 0; i
< size
; i
++)
718 if (tab
[i
] < res
&& tab
[i
] != ninf
)
723 for (int i
= 0; i
< size
; i
++)
733 base
= xnmalloc (nvectors
, sizeof *base
);
734 pos_set
= bitset_create (table_size
+ nstates
, BITSET_FRUGAL
);
735 table
= xcalloc (table_size
, sizeof *table
);
736 conflict_table
= xcalloc (table_size
, sizeof *conflict_table
);
737 check
= xnmalloc (table_size
, sizeof *check
);
742 for (int i
= 0; i
< nvectors
; i
++)
743 base
[i
] = BASE_MINIMUM
;
745 for (int i
= 0; i
< table_size
; i
++)
748 for (int i
= 0; i
< nentries
; i
++)
750 state_number s
= matching_state (i
);
754 /* A new set of state actions, or a nonterminal. */
755 place
= pack_vector (i
);
757 /* Action of I were already coded for S. */
760 /* Store PLACE into POS_SET. PLACE might not belong to the set
761 of possible values for instance with useless tokens. It
762 would be more satisfying to eliminate the need for this
764 if (0 <= nstates
+ place
)
765 bitset_set (pos_set
, nstates
+ place
);
766 base
[order
[i
]] = place
;
769 /* Use the greatest possible negative infinites. */
770 base_ninf
= table_ninf_remap (base
, nvectors
, BASE_MINIMUM
);
771 table_ninf
= table_ninf_remap (table
, high
+ 1, ACTION_NUMBER_MINIMUM
);
773 bitset_free (pos_set
);
778 /*-----------------------------------------------------------------.
779 | Compute and output yydefact, yydefgoto, yypact, yypgoto, yytable |
781 `-----------------------------------------------------------------*/
784 tables_generate (void)
786 /* This is a poor way to make sure the sizes are properly
787 correlated. In particular the signedness is not taken into
788 account. But it's not useless. */
789 verify (sizeof nstates
<= sizeof nvectors
);
790 verify (sizeof nnterms
<= sizeof nvectors
);
792 nvectors
= state_number_as_int (nstates
) + nnterms
;
794 froms
= xcalloc (nvectors
, sizeof *froms
);
795 tos
= xcalloc (nvectors
, sizeof *tos
);
796 conflict_tos
= xcalloc (nvectors
, sizeof *conflict_tos
);
797 tally
= xcalloc (nvectors
, sizeof *tally
);
798 width
= xnmalloc (nvectors
, sizeof *width
);
807 order
= xcalloc (nvectors
, sizeof *order
);
815 for (int i
= 0; i
< nvectors
; i
++)
819 free (conflict_tos
[i
]);
828 /*-------------------------.
829 | Free the parser tables. |
830 `-------------------------*/
836 free (conflict_table
);
837 free (conflict_list
);