1 /* Data definitions for internal representation of Bison's input.
3 Copyright (C) 1984, 1986, 1989, 1992, 2001-2007, 2009-2015, 2018-2020
4 Free Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 /* Representation of the grammar rules:
26 NTOKENS is the number of tokens, and NVARS is the number of
27 variables (nonterminals). NSYMS is the total number, ntokens +
30 Each symbol (either token or variable) receives a symbol number.
31 Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1
32 are for variables. Symbol number zero is the end-of-input token.
33 This token is counted in ntokens. The true number of token values
34 assigned is NTOKENS reduced by one for each alias declaration.
36 The rules receive rule numbers 1 to NRULES in the order they are
37 written. More precisely Bison augments the grammar with the
38 initial rule, '$accept: START-SYMBOL $end', which is numbered 1,
39 all the user rules are 2, 3 etc. Each time a rule number is
40 presented to the user, we subtract 1, so *displayed* rule numbers
43 Internally, we cannot use the number 0 for a rule because for
44 instance RITEM stores both symbol (the RHS) and rule numbers: the
45 symbols are integers >= 0, and rule numbers are stored negative.
46 Therefore 0 cannot be used, since it would be both the rule number
47 0, and the token $end.
49 Actions are accessed via the rule number.
51 The rules themselves are described by several arrays: amongst which
54 RULES is an array of rules, whose members are:
56 RULES[R].lhs -- the symbol of the left hand side of rule R.
58 RULES[R].rhs -- the beginning of the portion of RITEM for rule R.
60 RULES[R].prec -- the symbol providing the precedence level of R.
62 RULES[R].precsym -- the symbol attached (via %prec) to give its
63 precedence to R. Of course, if set, it is equal to 'prec', but we
64 need to distinguish one from the other when reducing: a symbol used
65 in a %prec is not useless.
67 RULES[R].assoc -- the associativity of R.
69 RULES[R].dprec -- the dynamic precedence level of R (for GLR
72 RULES[R].merger -- index of merging function for R (for GLR
75 RULES[R].line -- the line where R was defined.
77 RULES[R].useful -- whether the rule is used. False if thrown away
80 The right hand side is stored as symbol numbers in a portion of
83 The length of the portion is one greater than the number of symbols
84 in the rule's right hand side. The last element in the portion
85 contains -R, which identifies it as the end of a portion and says
88 The portions of RITEM come in order of increasing rule number.
89 NRITEMS is the total length of RITEM. Each element of RITEM is
90 called an "item" and its index in RITEM is an item number.
92 Item numbers are used in the finite state machine to represent
93 places that parsing can get to.
95 SYMBOLS[I]->prec records the precedence level of each symbol.
97 Precedence levels are assigned in increasing order starting with 1
98 so that numerically higher precedence values mean tighter binding
99 as they ought to. Zero as a symbol or rule's precedence means none
102 Associativities are recorded similarly in SYMBOLS[I]->assoc. */
106 # include <unicodeio.h>
108 # include "location.h"
111 # define ISTOKEN(i) ((i) < ntokens)
112 # define ISVAR(i) ((i) >= ntokens)
118 /* Elements of ritem. */
119 typedef int item_number
;
120 # define ITEM_NUMBER_MAX INT_MAX
121 extern item_number
*ritem
;
124 /* Indices into ritem. */
125 typedef unsigned int item_index
;
127 /* There is weird relationship between OT1H item_number and OTOH
128 symbol_number and rule_number: we store the latter in
129 item_number. symbol_number values are stored as-is, while
130 the negation of (rule_number + 1) is stored.
132 Therefore, a symbol_number must be a valid item_number, and we
133 sometimes have to perform the converse transformation. */
135 static inline item_number
136 symbol_number_as_item_number (symbol_number sym
)
141 static inline symbol_number
142 item_number_as_symbol_number (item_number i
)
148 item_number_is_symbol_number (item_number i
)
154 typedef int rule_number
;
155 # define RULE_NUMBER_MAX INT_MAX
157 static inline item_number
158 rule_number_as_item_number (rule_number r
)
163 static inline rule_number
164 item_number_as_rule_number (item_number i
)
170 item_number_is_rule_number (item_number i
)
182 /* The number of the rule in the source. It is usually the index in
183 RULES too, except if there are useless rules. */
186 /* The index in RULES. Usually the rule number in the source,
187 except if some rules are useless. */
193 /* This symbol provides both the associativity, and the precedence. */
199 /* This symbol was attached to the rule via %prec. */
200 sym_content
*precsym
;
202 /* Location of the rhs. */
207 /* Counts of the numbers of expected conflicts for this rule, or -1 if none
209 int expected_sr_conflicts
;
210 int expected_rr_conflicts
;
216 /* The used rules (size NRULES). */
218 extern rule_number nrules
;
220 /* Fallback in case we can't print "•". */
222 print_dot_fallback (unsigned int code _GL_UNUSED
,
223 const char *msg _GL_UNUSED
,
226 FILE *out
= (FILE *) callback_arg
;
231 /* Print "•", the symbol used to represent a point in an item (aka, a
234 print_dot (FILE *out
)
236 unicode_to_mb (0x2022, fwrite_success_callback
, print_dot_fallback
, out
);
239 /* Get the rule associated to this item. ITEM points inside RITEM. */
240 static inline rule
const *
241 item_rule (item_number
const *item
)
243 item_number
const *sp
= item
;
244 while (!item_number_is_rule_number (*sp
))
246 rule_number r
= item_number_as_rule_number (*sp
);
250 /* Pretty-print this ITEM (as in the report). ITEM points inside
251 RITEM. PREVIOUS_RULE is used to see if the lhs is common, in which
252 case LHS is factored. Passing NULL is fine. */
253 void item_print (item_number
*item
, rule
const *previous_rule
,
256 /* A function that selects a rule. */
257 typedef bool (*rule_filter
) (rule
const *);
259 /* Whether the rule has a 'number' smaller than NRULES. That is, it
260 is useful in the grammar. */
261 bool rule_useful_in_grammar_p (rule
const *r
);
263 /* Whether the rule has a 'number' higher than NRULES. That is, it is
264 useless in the grammar. */
265 bool rule_useless_in_grammar_p (rule
const *r
);
267 /* Whether the rule is not flagged as useful but is useful in the
268 grammar. In other words, it was discarded because of conflicts. */
269 bool rule_useless_in_parser_p (rule
const *r
);
271 /* Whether the rule has a single RHS, and no user action. */
272 bool rule_useless_chain_p (rule
const *r
);
274 /* Print this rule's number and lhs on OUT. If a PREVIOUS_LHS was
275 already displayed (by a previous call for another rule), avoid
276 useless repetitions. */
277 void rule_lhs_print (rule
const *r
, sym_content
const *previous_lhs
,
279 void rule_lhs_print_xml (rule
const *r
, FILE *out
, int level
);
281 /* The length of the RHS. */
282 size_t rule_rhs_length (rule
const *r
);
284 /* Print this rule's RHS on OUT. */
285 void rule_rhs_print (rule
const *r
, FILE *out
);
287 /* Print this rule on OUT. If a PREVIOUS_RULE was already displayed,
288 avoid useless repetitions of their LHS. */
289 void rule_print (rule
const *r
, rule
const *prev_rule
, FILE *out
);
293 /* Table of the symbols, indexed by the symbol number. */
294 extern symbol
**symbols
;
296 /* TOKEN_TRANSLATION -- a table indexed by a token number as returned
297 by the user's yylex routine, it yields the internal token number
298 used by the parser and throughout bison. */
299 extern symbol_number
*token_translations
;
304 /* Dump RITEM for traces. */
305 void ritem_print (FILE *out
);
307 /* The size of the longest rule RHS. */
308 size_t ritem_longest_rhs (void);
310 /* Print the grammar's rules that match FILTER on OUT under TITLE. */
311 void grammar_rules_partial_print (FILE *out
, const char *title
,
314 /* Print the grammar's useful rules on OUT. */
315 void grammar_rules_print (FILE *out
);
316 /* Print all of the grammar's rules with a "usefulness" attribute. */
317 void grammar_rules_print_xml (FILE *out
, int level
);
319 /* Dump the grammar. */
320 void grammar_dump (FILE *out
, const char *title
);
322 /* Report on STDERR the rules that are not flagged USEFUL, using the
323 MESSAGE (which can be 'rule useless in grammar' when invoked after grammar
324 reduction, or 'rule useless in parser due to conflicts' after conflicts
325 were taken into account). */
326 void grammar_rules_useless_report (const char *message
);
328 /* Free the packed grammar. */
329 void grammar_free (void);
331 /* The version %required by the grammar file, as an int (100 * major +
332 minor). 0 if unspecified. */
333 extern int required_version
;
335 #endif /* !GRAM_H_ */