style: factor the access to a rule from its items
[bison.git] / src / gram.h
blob356066f109ceda4f9f1a2b95845f20bc6a91f413
1 /* Data definitions for internal representation of Bison's input.
3 Copyright (C) 1984, 1986, 1989, 1992, 2001-2007, 2009-2015, 2018-2020
4 Free Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 #ifndef GRAM_H_
22 # define GRAM_H_
24 /* Representation of the grammar rules:
26 NTOKENS is the number of tokens, and NVARS is the number of
27 variables (nonterminals). NSYMS is the total number, ntokens +
28 nvars.
30 Each symbol (either token or variable) receives a symbol number.
31 Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1
32 are for variables. Symbol number zero is the end-of-input token.
33 This token is counted in ntokens. The true number of token values
34 assigned is NTOKENS reduced by one for each alias declaration.
36 The rules receive rule numbers 1 to NRULES in the order they are
37 written. More precisely Bison augments the grammar with the
38 initial rule, '$accept: START-SYMBOL $end', which is numbered 1,
39 all the user rules are 2, 3 etc. Each time a rule number is
40 presented to the user, we subtract 1, so *displayed* rule numbers
41 are 0, 1, 2...
43 Internally, we cannot use the number 0 for a rule because for
44 instance RITEM stores both symbol (the RHS) and rule numbers: the
45 symbols are integers >= 0, and rule numbers are stored negative.
46 Therefore 0 cannot be used, since it would be both the rule number
47 0, and the token $end.
49 Actions are accessed via the rule number.
51 The rules themselves are described by several arrays: amongst which
52 RITEM, and RULES.
54 RULES is an array of rules, whose members are:
56 RULES[R].lhs -- the symbol of the left hand side of rule R.
58 RULES[R].rhs -- the beginning of the portion of RITEM for rule R.
60 RULES[R].prec -- the symbol providing the precedence level of R.
62 RULES[R].precsym -- the symbol attached (via %prec) to give its
63 precedence to R. Of course, if set, it is equal to 'prec', but we
64 need to distinguish one from the other when reducing: a symbol used
65 in a %prec is not useless.
67 RULES[R].assoc -- the associativity of R.
69 RULES[R].dprec -- the dynamic precedence level of R (for GLR
70 parsing).
72 RULES[R].merger -- index of merging function for R (for GLR
73 parsing).
75 RULES[R].line -- the line where R was defined.
77 RULES[R].useful -- whether the rule is used. False if thrown away
78 by reduce().
80 The right hand side is stored as symbol numbers in a portion of
81 RITEM.
83 The length of the portion is one greater than the number of symbols
84 in the rule's right hand side. The last element in the portion
85 contains -R, which identifies it as the end of a portion and says
86 which rule it is for.
88 The portions of RITEM come in order of increasing rule number.
89 NRITEMS is the total length of RITEM. Each element of RITEM is
90 called an "item" and its index in RITEM is an item number.
92 Item numbers are used in the finite state machine to represent
93 places that parsing can get to.
95 SYMBOLS[I]->prec records the precedence level of each symbol.
97 Precedence levels are assigned in increasing order starting with 1
98 so that numerically higher precedence values mean tighter binding
99 as they ought to. Zero as a symbol or rule's precedence means none
100 is assigned.
102 Associativities are recorded similarly in SYMBOLS[I]->assoc. */
104 # include "system.h"
106 # include <unicodeio.h>
108 # include "location.h"
109 # include "symtab.h"
111 # define ISTOKEN(i) ((i) < ntokens)
112 # define ISVAR(i) ((i) >= ntokens)
114 extern int nsyms;
115 extern int ntokens;
116 extern int nvars;
118 /* Elements of ritem. */
119 typedef int item_number;
120 # define ITEM_NUMBER_MAX INT_MAX
121 extern item_number *ritem;
122 extern int nritems;
124 /* Indices into ritem. */
125 typedef unsigned int item_index;
127 /* There is weird relationship between OT1H item_number and OTOH
128 symbol_number and rule_number: we store the latter in
129 item_number. symbol_number values are stored as-is, while
130 the negation of (rule_number + 1) is stored.
132 Therefore, a symbol_number must be a valid item_number, and we
133 sometimes have to perform the converse transformation. */
135 static inline item_number
136 symbol_number_as_item_number (symbol_number sym)
138 return sym;
141 static inline symbol_number
142 item_number_as_symbol_number (item_number i)
144 return i;
147 static inline bool
148 item_number_is_symbol_number (item_number i)
150 return i >= 0;
153 /* Rule numbers. */
154 typedef int rule_number;
155 # define RULE_NUMBER_MAX INT_MAX
157 static inline item_number
158 rule_number_as_item_number (rule_number r)
160 return -1 - r;
163 static inline rule_number
164 item_number_as_rule_number (item_number i)
166 return -1 - i;
169 static inline bool
170 item_number_is_rule_number (item_number i)
172 return i < 0;
176 /*--------.
177 | Rules. |
178 `--------*/
180 typedef struct
182 /* The number of the rule in the source. It is usually the index in
183 RULES too, except if there are useless rules. */
184 rule_number code;
186 /* The index in RULES. Usually the rule number in the source,
187 except if some rules are useless. */
188 rule_number number;
190 sym_content *lhs;
191 item_number *rhs;
193 /* This symbol provides both the associativity, and the precedence. */
194 sym_content *prec;
196 int dprec;
197 int merger;
199 /* This symbol was attached to the rule via %prec. */
200 sym_content *precsym;
202 /* Location of the rhs. */
203 location location;
204 bool useful;
205 bool is_predicate;
207 /* Counts of the numbers of expected conflicts for this rule, or -1 if none
208 given. */
209 int expected_sr_conflicts;
210 int expected_rr_conflicts;
212 const char *action;
213 location action_loc;
214 } rule;
216 /* The used rules (size NRULES). */
217 extern rule *rules;
218 extern rule_number nrules;
220 /* Fallback in case we can't print "•". */
221 static inline long
222 print_dot_fallback (unsigned int code _GL_UNUSED,
223 const char *msg _GL_UNUSED,
224 void *callback_arg)
226 FILE *out = (FILE *) callback_arg;
227 putc ('.', out);
228 return -1;
231 /* Print "•", the symbol used to represent a point in an item (aka, a
232 dotted rule). */
233 static inline void
234 print_dot (FILE *out)
236 unicode_to_mb (0x2022, fwrite_success_callback, print_dot_fallback, out);
239 /* Get the rule associated to this item. ITEM points inside RITEM. */
240 static inline rule const *
241 item_rule (item_number const *item)
243 item_number const *sp = item;
244 while (!item_number_is_rule_number (*sp))
245 ++sp;
246 rule_number r = item_number_as_rule_number (*sp);
247 return &rules[r];
250 /* Pretty-print this ITEM (as in the report). ITEM points inside
251 RITEM. PREVIOUS_RULE is used to see if the lhs is common, in which
252 case LHS is factored. Passing NULL is fine. */
253 void item_print (item_number *item, rule const *previous_rule,
254 FILE *out);
256 /* A function that selects a rule. */
257 typedef bool (*rule_filter) (rule const *);
259 /* Whether the rule has a 'number' smaller than NRULES. That is, it
260 is useful in the grammar. */
261 bool rule_useful_in_grammar_p (rule const *r);
263 /* Whether the rule has a 'number' higher than NRULES. That is, it is
264 useless in the grammar. */
265 bool rule_useless_in_grammar_p (rule const *r);
267 /* Whether the rule is not flagged as useful but is useful in the
268 grammar. In other words, it was discarded because of conflicts. */
269 bool rule_useless_in_parser_p (rule const *r);
271 /* Whether the rule has a single RHS, and no user action. */
272 bool rule_useless_chain_p (rule const *r);
274 /* Print this rule's number and lhs on OUT. If a PREVIOUS_LHS was
275 already displayed (by a previous call for another rule), avoid
276 useless repetitions. */
277 void rule_lhs_print (rule const *r, sym_content const *previous_lhs,
278 FILE *out);
279 void rule_lhs_print_xml (rule const *r, FILE *out, int level);
281 /* The length of the RHS. */
282 size_t rule_rhs_length (rule const *r);
284 /* Print this rule's RHS on OUT. */
285 void rule_rhs_print (rule const *r, FILE *out);
287 /* Print this rule on OUT. If a PREVIOUS_RULE was already displayed,
288 avoid useless repetitions of their LHS. */
289 void rule_print (rule const *r, rule const *prev_rule, FILE *out);
293 /* Table of the symbols, indexed by the symbol number. */
294 extern symbol **symbols;
296 /* TOKEN_TRANSLATION -- a table indexed by a token number as returned
297 by the user's yylex routine, it yields the internal token number
298 used by the parser and throughout bison. */
299 extern symbol_number *token_translations;
300 extern int max_code;
304 /* Dump RITEM for traces. */
305 void ritem_print (FILE *out);
307 /* The size of the longest rule RHS. */
308 size_t ritem_longest_rhs (void);
310 /* Print the grammar's rules that match FILTER on OUT under TITLE. */
311 void grammar_rules_partial_print (FILE *out, const char *title,
312 rule_filter filter);
314 /* Print the grammar's useful rules on OUT. */
315 void grammar_rules_print (FILE *out);
316 /* Print all of the grammar's rules with a "usefulness" attribute. */
317 void grammar_rules_print_xml (FILE *out, int level);
319 /* Dump the grammar. */
320 void grammar_dump (FILE *out, const char *title);
322 /* Report on STDERR the rules that are not flagged USEFUL, using the
323 MESSAGE (which can be 'rule useless in grammar' when invoked after grammar
324 reduction, or 'rule useless in parser due to conflicts' after conflicts
325 were taken into account). */
326 void grammar_rules_useless_report (const char *message);
328 /* Free the packed grammar. */
329 void grammar_free (void);
331 /* The version %required by the grammar file, as an int (100 * major +
332 minor). 0 if unspecified. */
333 extern int required_version;
335 #endif /* !GRAM_H_ */