src/gram.h

   1 /* Data definitions for internal representation of Bison's input.
   2
   3    Copyright (C) 1984, 1986, 1989, 1992, 2001-2007, 2009-2015, 2018-2021
   4    Free Software Foundation, Inc.
   5
   6    This file is part of Bison, the GNU Compiler Compiler.
   7
   8    This program is free software: you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation, either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  20
  21 #ifndef GRAM_H_
  22 # define GRAM_H_
  23
  24 /* Representation of the grammar rules:
  25
  26    NTOKENS is the number of tokens, and NNTERMS is the number of
  27    nonterminals (aka variables).  NSYMS is the total number, NTOKENS +
  28    NNTERMS.
  29
  30    Each symbol (either token or nterm) receives a symbol number.
  31    Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1
  32    are for nterms.  Symbol number zero is the end-of-input token.
  33    This token is counted in ntokens.  The true number of token values
  34    assigned is NTOKENS reduced by one for each alias declaration.
  35
  36    The rules receive rule numbers 1 to NRULES in the order they are
  37    written.  More precisely Bison augments the grammar with the
  38    initial rule, '$accept: START-SYMBOL $end', which is numbered 1,
  39    all the user rules are 2, 3 etc.  Each time a rule number is
  40    presented to the user, we subtract 1, so *displayed* rule numbers
  41    are 0, 1, 2...
  42
  43    Internally, we cannot use the number 0 for a rule because for
  44    instance RITEM stores both symbols (the RHS) and rule numbers: the
  45    symbols are integers >= 0, and rule numbers are stored negative.
  46    Therefore 0 cannot be used, since it would be both the rule number
  47    0, and the token $end.
  48
  49    Actions are accessed via the rule number.
  50
  51    The rules themselves are described by several arrays: amongst which
  52    RITEM, and RULES.
  53
  54    RULES is an array of rules, whose members are:
  55
  56    RULES[R].lhs -- the symbol of the left hand side of rule R.
  57
  58    RULES[R].rhs -- the beginning of the portion of RITEM for rule R.
  59
  60    RULES[R].prec -- the symbol providing the precedence level of R.
  61
  62    RULES[R].precsym -- the symbol attached (via %prec) to give its
  63    precedence to R.  Of course, if set, it is equal to 'prec', but we
  64    need to distinguish one from the other when reducing: a symbol used
  65    in a %prec is not useless.
  66
  67    RULES[R].assoc -- the associativity of R.
  68
  69    RULES[R].dprec -- the dynamic precedence level of R (for GLR
  70    parsing).
  71
  72    RULES[R].merger -- index of merging function for R (for GLR
  73    parsing).
  74
  75    RULES[R].line -- the line where R was defined.
  76
  77    RULES[R].useful -- whether the rule is used.  False if thrown away
  78    by reduce().
  79
  80    The right hand side of rules is stored as symbol numbers in a
  81    portion of RITEM.
  82
  83    The length of the portion is one greater than the number of symbols
  84    in the rule's right hand side.  The last element in the portion
  85    contains -R, which identifies it as the end of a portion and says
  86    which rule it is for.
  87
  88    The portions of RITEM come in order of increasing rule number.
  89    NRITEMS is the total length of RITEM.  Each element of RITEM is
  90    called an "item" of type item_number and its index in RITEM is an
  91    item_index.
  92
  93    Item numbers are used in the finite state machine to represent
  94    places that parsing can get to.
  95
  96    SYMBOLS[I]->prec records the precedence level of each symbol.
  97
  98    Precedence levels are assigned in increasing order starting with 1
  99    so that numerically higher precedence values mean tighter binding
 100    as they ought to.  Zero as a symbol or rule's precedence means none
 101    is assigned.
 102
 103    Associativities are recorded similarly in SYMBOLS[I]->assoc.  */
 104
 105 # include "system.h"
 106
 107 # include "location.h"
 108 # include "symtab.h"
 109
 110 # define ISTOKEN(i)     ((i) < ntokens)
 111 # define ISVAR(i)       ((i) >= ntokens)
 112
 113 extern int nsyms;
 114 extern int ntokens;
 115 extern int nnterms;
 116
 117 /* Elements of ritem. */
 118 typedef int item_number;
 119 # define ITEM_NUMBER_MAX INT_MAX
 120 extern item_number *ritem;
 121 extern int nritems;
 122
 123 /* Indices into ritem. */
 124 typedef unsigned int item_index;
 125
 126 /* There is weird relationship between OT1H item_number and OTOH
 127    symbol_number and rule_number: we store the latter in
 128    item_number.  symbol_number values are stored as-is, while
 129    the negation of (rule_number + 1) is stored.
 130
 131    Therefore, a symbol_number must be a valid item_number, and we
 132    sometimes have to perform the converse transformation.  */
 133
 134 static inline item_number
 135 symbol_number_as_item_number (symbol_number sym)
 136 {
 137   return sym;
 138 }
 139
 140 static inline symbol_number
 141 item_number_as_symbol_number (item_number i)
 142 {
 143   return i;
 144 }
 145
 146 static inline bool
 147 item_number_is_symbol_number (item_number i)
 148 {
 149   return i >= 0;
 150 }
 151
 152 /* Rule numbers.  */
 153 typedef int rule_number;
 154 # define RULE_NUMBER_MAX INT_MAX
 155
 156 static inline item_number
 157 rule_number_as_item_number (rule_number r)
 158 {
 159   return -1 - r;
 160 }
 161
 162 static inline rule_number
 163 item_number_as_rule_number (item_number i)
 164 {
 165   return -1 - i;
 166 }
 167
 168 static inline bool
 169 item_number_is_rule_number (item_number i)
 170 {
 171   return i < 0;
 172 }
 173
 174
 175 /*--------.
 176 | Rules.  |
 177 `--------*/
 178
 179 typedef struct
 180 {
 181   /* The number of the rule in the source.  It is usually the index in
 182      RULES too, except if there are useless rules.  */
 183   rule_number code;
 184
 185   /* The index in RULES.  Usually the rule number in the source,
 186      except if some rules are useless.  */
 187   rule_number number;
 188
 189   sym_content *lhs;
 190   item_number *rhs;
 191
 192   /* This symbol provides both the associativity, and the precedence. */
 193   sym_content *prec;
 194
 195   int dprec;
 196   int merger;
 197
 198   /* This symbol was attached to the rule via %prec. */
 199   sym_content *precsym;
 200
 201   /* Location of the rhs.  */
 202   location location;
 203   bool useful;
 204   bool is_predicate;
 205
 206   /* Counts of the numbers of expected conflicts for this rule, or -1 if none
 207      given. */
 208   int expected_sr_conflicts;
 209   int expected_rr_conflicts;
 210
 211   const char *action;
 212   location action_loc;
 213 } rule;
 214
 215 /* The used rules (size NRULES).  */
 216 extern rule *rules;
 217 extern rule_number nrules;
 218
 219 /* Get the rule associated to this item.  ITEM points inside RITEM.  */
 220 static inline rule const *
 221 item_rule (item_number const *item)
 222 {
 223   item_number const *sp = item;
 224   while (!item_number_is_rule_number (*sp))
 225     ++sp;
 226   rule_number r = item_number_as_rule_number (*sp);
 227   return &rules[r];
 228 }
 229
 230 /* Pretty-print this ITEM (as in the report).  ITEM points inside
 231    RITEM.  PREVIOUS_RULE is used to see if the lhs is common, in which
 232    case LHS is factored.  Passing NULL is fine.  */
 233 void item_print (item_number *item, rule const *previous_rule,
 234                  FILE *out);
 235
 236 /*--------.
 237 | Rules.  |
 238 `--------*/
 239
 240 /* A function that selects a rule.  */
 241 typedef bool (*rule_filter) (rule const *);
 242
 243 /* Whether is an accepting rule (i.e., its reduction terminates
 244    parsing with success). */
 245 static inline bool
 246 rule_is_initial (rule const *r)
 247 {
 248   /* In the case of multistart, we need to check whether the LHS is
 249      $accept.  In the case of "unistart", it would suffice to
 250      check whether this is rule number 0.  */
 251   return r->lhs == acceptsymbol->content;
 252 }
 253
 254 /* Whether the rule has a 'number' smaller than NRULES.  That is, it
 255    is useful in the grammar.  */
 256 bool rule_useful_in_grammar_p (rule const *r);
 257
 258 /* Whether the rule has a 'number' higher than NRULES.  That is, it is
 259    useless in the grammar.  */
 260 bool rule_useless_in_grammar_p (rule const *r);
 261
 262 /* Whether the rule is not flagged as useful but is useful in the
 263    grammar.  In other words, it was discarded because of conflicts.  */
 264 bool rule_useless_in_parser_p (rule const *r);
 265
 266 /* Whether the rule has a single RHS, and no user action. */
 267 bool rule_useless_chain_p (rule const *r);
 268
 269 /* Print this rule's number and lhs on OUT.  If a PREVIOUS_LHS was
 270    already displayed (by a previous call for another rule), avoid
 271    useless repetitions.  */
 272 void rule_lhs_print (rule const *r, sym_content const *previous_lhs,
 273                      FILE *out);
 274 void rule_lhs_print_xml (rule const *r, FILE *out, int level);
 275
 276 /* The length of the RHS.  */
 277 size_t rule_rhs_length (rule const *r);
 278
 279 /* Print this rule's RHS on OUT.  */
 280 void rule_rhs_print (rule const *r, FILE *out);
 281
 282 /* Print this rule on OUT.  If a PREVIOUS_RULE was already displayed,
 283    avoid useless repetitions of their LHS. */
 284 void rule_print (rule const *r, rule const *prev_rule, FILE *out);
 285
 286
 287
 288 /* Table of the symbols, indexed by the symbol number. */
 289 extern symbol **symbols;
 290
 291 /* TOKEN_TRANSLATION -- a table indexed by a token number as returned
 292    by the user's yylex routine, it yields the internal token number
 293    used by the parser and throughout bison.  */
 294 extern symbol_number *token_translations;
 295 extern int max_code;
 296
 297
 298
 299 /* Dump RITEM for traces. */
 300 void ritem_print (FILE *out);
 301
 302 /* The size of the longest rule RHS.  */
 303 size_t ritem_longest_rhs (void);
 304
 305 /* Print the grammar's rules that match FILTER on OUT under TITLE.  */
 306 void grammar_rules_partial_print (FILE *out, const char *title,
 307                                   rule_filter filter);
 308
 309 /* Print the grammar's useful rules on OUT.  */
 310 void grammar_rules_print (FILE *out);
 311 /* Print all of the grammar's rules with a "usefulness" attribute.  */
 312 void grammar_rules_print_xml (FILE *out, int level);
 313
 314 /* Dump the grammar. */
 315 void grammar_dump (FILE *out, const char *title);
 316
 317 /* Report on STDERR the rules that are not flagged USEFUL, using the
 318    MESSAGE (which can be 'rule useless in grammar' when invoked after grammar
 319    reduction, or 'rule useless in parser due to conflicts' after conflicts
 320    were taken into account).  */
 321 void grammar_rules_useless_report (const char *message);
 322
 323 /* Free the packed grammar. */
 324 void grammar_free (void);
 325
 326 /* The version %required by the grammar file, as an int (100 * major +
 327    minor).  0 if unspecified.  */
 328 extern int required_version;
 329
 330 #endif /* !GRAM_H_ */