src/gram.h

   1 /* Data definitions for internal representation of Bison's input.
   2
   3    Copyright (C) 1984, 1986, 1989, 1992, 2001-2007, 2009-2015, 2018-2020
   4    Free Software Foundation, Inc.
   5
   6    This file is part of Bison, the GNU Compiler Compiler.
   7
   8    This program is free software: you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation, either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 #ifndef GRAM_H_
  22 # define GRAM_H_
  23
  24 /* Representation of the grammar rules:
  25
  26    NTOKENS is the number of tokens, and NVARS is the number of
  27    variables (nonterminals).  NSYMS is the total number, ntokens +
  28    nvars.
  29
  30    Each symbol (either token or variable) receives a symbol number.
  31    Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1
  32    are for variables.  Symbol number zero is the end-of-input token.
  33    This token is counted in ntokens.  The true number of token values
  34    assigned is NTOKENS reduced by one for each alias declaration.
  35
  36    The rules receive rule numbers 1 to NRULES in the order they are
  37    written.  More precisely Bison augments the grammar with the
  38    initial rule, '$accept: START-SYMBOL $end', which is numbered 1,
  39    all the user rules are 2, 3 etc.  Each time a rule number is
  40    presented to the user, we subtract 1, so *displayed* rule numbers
  41    are 0, 1, 2...
  42
  43    Internally, we cannot use the number 0 for a rule because for
  44    instance RITEM stores both symbol (the RHS) and rule numbers: the
  45    symbols are integers >= 0, and rule numbers are stored negative.
  46    Therefore 0 cannot be used, since it would be both the rule number
  47    0, and the token $end.
  48
  49    Actions are accessed via the rule number.
  50
  51    The rules themselves are described by several arrays: amongst which
  52    RITEM, and RULES.
  53
  54    RULES is an array of rules, whose members are:
  55
  56    RULES[R].lhs -- the symbol of the left hand side of rule R.
  57
  58    RULES[R].rhs -- the beginning of the portion of RITEM for rule R.
  59
  60    RULES[R].prec -- the symbol providing the precedence level of R.
  61
  62    RULES[R].precsym -- the symbol attached (via %prec) to give its
  63    precedence to R.  Of course, if set, it is equal to 'prec', but we
  64    need to distinguish one from the other when reducing: a symbol used
  65    in a %prec is not useless.
  66
  67    RULES[R].assoc -- the associativity of R.
  68
  69    RULES[R].dprec -- the dynamic precedence level of R (for GLR
  70    parsing).
  71
  72    RULES[R].merger -- index of merging function for R (for GLR
  73    parsing).
  74
  75    RULES[R].line -- the line where R was defined.
  76
  77    RULES[R].useful -- whether the rule is used.  False if thrown away
  78    by reduce().
  79
  80    The right hand side is stored as symbol numbers in a portion of
  81    RITEM.
  82
  83    The length of the portion is one greater than the number of symbols
  84    in the rule's right hand side.  The last element in the portion
  85    contains -R, which identifies it as the end of a portion and says
  86    which rule it is for.
  87
  88    The portions of RITEM come in order of increasing rule number.
  89    NRITEMS is the total length of RITEM.  Each element of RITEM is
  90    called an "item" and its index in RITEM is an item number.
  91
  92    Item numbers are used in the finite state machine to represent
  93    places that parsing can get to.
  94
  95    SYMBOLS[I]->prec records the precedence level of each symbol.
  96
  97    Precedence levels are assigned in increasing order starting with 1
  98    so that numerically higher precedence values mean tighter binding
  99    as they ought to.  Zero as a symbol or rule's precedence means none
 100    is assigned.
 101
 102    Associativities are recorded similarly in SYMBOLS[I]->assoc.  */
 103
 104 # include "system.h"
 105
 106 # include <unicodeio.h>
 107
 108 # include "location.h"
 109 # include "symtab.h"
 110
 111 # define ISTOKEN(i)     ((i) < ntokens)
 112 # define ISVAR(i)       ((i) >= ntokens)
 113
 114 extern int nsyms;
 115 extern int ntokens;
 116 extern int nvars;
 117
 118 /* Elements of ritem. */
 119 typedef int item_number;
 120 # define ITEM_NUMBER_MAX INT_MAX
 121 extern item_number *ritem;
 122 extern int nritems;
 123
 124 /* Indices into ritem. */
 125 typedef unsigned int item_index;
 126
 127 /* There is weird relationship between OT1H item_number and OTOH
 128    symbol_number and rule_number: we store the latter in
 129    item_number.  symbol_number values are stored as-is, while
 130    the negation of (rule_number + 1) is stored.
 131
 132    Therefore, a symbol_number must be a valid item_number, and we
 133    sometimes have to perform the converse transformation.  */
 134
 135 static inline item_number
 136 symbol_number_as_item_number (symbol_number sym)
 137 {
 138   return sym;
 139 }
 140
 141 static inline symbol_number
 142 item_number_as_symbol_number (item_number i)
 143 {
 144   return i;
 145 }
 146
 147 static inline bool
 148 item_number_is_symbol_number (item_number i)
 149 {
 150   return i >= 0;
 151 }
 152
 153 /* Rule numbers.  */
 154 typedef int rule_number;
 155 # define RULE_NUMBER_MAX INT_MAX
 156
 157 static inline item_number
 158 rule_number_as_item_number (rule_number r)
 159 {
 160   return -1 - r;
 161 }
 162
 163 static inline rule_number
 164 item_number_as_rule_number (item_number i)
 165 {
 166   return -1 - i;
 167 }
 168
 169 static inline bool
 170 item_number_is_rule_number (item_number i)
 171 {
 172   return i < 0;
 173 }
 174
 175
 176 /*--------.
 177 | Rules.  |
 178 `--------*/
 179
 180 typedef struct
 181 {
 182   /* The number of the rule in the source.  It is usually the index in
 183      RULES too, except if there are useless rules.  */
 184   rule_number code;
 185
 186   /* The index in RULES.  Usually the rule number in the source,
 187      except if some rules are useless.  */
 188   rule_number number;
 189
 190   sym_content *lhs;
 191   item_number *rhs;
 192
 193   /* This symbol provides both the associativity, and the precedence. */
 194   sym_content *prec;
 195
 196   int dprec;
 197   int merger;
 198
 199   /* This symbol was attached to the rule via %prec. */
 200   sym_content *precsym;
 201
 202   /* Location of the rhs.  */
 203   location location;
 204   bool useful;
 205   bool is_predicate;
 206
 207   /* Counts of the numbers of expected conflicts for this rule, or -1 if none
 208      given. */
 209   int expected_sr_conflicts;
 210   int expected_rr_conflicts;
 211
 212   const char *action;
 213   location action_loc;
 214 } rule;
 215
 216 /* The used rules (size NRULES).  */
 217 extern rule *rules;
 218 extern rule_number nrules;
 219
 220 /* Fallback in case we can't print "•".  */
 221 static inline long
 222 print_dot_fallback (unsigned int code _GL_UNUSED,
 223                     const char *msg _GL_UNUSED,
 224                     void *callback_arg)
 225 {
 226   FILE *out = (FILE *) callback_arg;
 227   putc ('.', out);
 228   return -1;
 229 }
 230
 231 /* Print "•", the symbol used to represent a point in an item (aka, a
 232    dotted rule).  */
 233 static inline void
 234 print_dot (FILE *out)
 235 {
 236   unicode_to_mb (0x2022, fwrite_success_callback, print_dot_fallback, out);
 237 }
 238
 239 /* Get the rule associated to this item.  ITEM points inside RITEM.  */
 240 static inline rule const *
 241 item_rule (item_number const *item)
 242 {
 243   item_number const *sp = item;
 244   while (!item_number_is_rule_number (*sp))
 245     ++sp;
 246   rule_number r = item_number_as_rule_number (*sp);
 247   return &rules[r];
 248 }
 249
 250 /* Pretty-print this ITEM (as in the report).  ITEM points inside
 251    RITEM.  PREVIOUS_RULE is used to see if the lhs is common, in which
 252    case LHS is factored.  Passing NULL is fine.  */
 253 void item_print (item_number *item, rule const *previous_rule,
 254                  FILE *out);
 255
 256 /* A function that selects a rule.  */
 257 typedef bool (*rule_filter) (rule const *);
 258
 259 /* Whether the rule has a 'number' smaller than NRULES.  That is, it
 260    is useful in the grammar.  */
 261 bool rule_useful_in_grammar_p (rule const *r);
 262
 263 /* Whether the rule has a 'number' higher than NRULES.  That is, it is
 264    useless in the grammar.  */
 265 bool rule_useless_in_grammar_p (rule const *r);
 266
 267 /* Whether the rule is not flagged as useful but is useful in the
 268    grammar.  In other words, it was discarded because of conflicts.  */
 269 bool rule_useless_in_parser_p (rule const *r);
 270
 271 /* Whether the rule has a single RHS, and no user action. */
 272 bool rule_useless_chain_p (rule const *r);
 273
 274 /* Print this rule's number and lhs on OUT.  If a PREVIOUS_LHS was
 275    already displayed (by a previous call for another rule), avoid
 276    useless repetitions.  */
 277 void rule_lhs_print (rule const *r, sym_content const *previous_lhs,
 278                      FILE *out);
 279 void rule_lhs_print_xml (rule const *r, FILE *out, int level);
 280
 281 /* The length of the RHS.  */
 282 size_t rule_rhs_length (rule const *r);
 283
 284 /* Print this rule's RHS on OUT.  */
 285 void rule_rhs_print (rule const *r, FILE *out);
 286
 287 /* Print this rule on OUT.  If a PREVIOUS_RULE was already displayed,
 288    avoid useless repetitions of their LHS. */
 289 void rule_print (rule const *r, rule const *prev_rule, FILE *out);
 290
 291
 292
 293 /* Table of the symbols, indexed by the symbol number. */
 294 extern symbol **symbols;
 295
 296 /* TOKEN_TRANSLATION -- a table indexed by a token number as returned
 297    by the user's yylex routine, it yields the internal token number
 298    used by the parser and throughout bison.  */
 299 extern symbol_number *token_translations;
 300 extern int max_code;
 301
 302
 303
 304 /* Dump RITEM for traces. */
 305 void ritem_print (FILE *out);
 306
 307 /* The size of the longest rule RHS.  */
 308 size_t ritem_longest_rhs (void);
 309
 310 /* Print the grammar's rules that match FILTER on OUT under TITLE.  */
 311 void grammar_rules_partial_print (FILE *out, const char *title,
 312                                   rule_filter filter);
 313
 314 /* Print the grammar's useful rules on OUT.  */
 315 void grammar_rules_print (FILE *out);
 316 /* Print all of the grammar's rules with a "usefulness" attribute.  */
 317 void grammar_rules_print_xml (FILE *out, int level);
 318
 319 /* Dump the grammar. */
 320 void grammar_dump (FILE *out, const char *title);
 321
 322 /* Report on STDERR the rules that are not flagged USEFUL, using the
 323    MESSAGE (which can be 'rule useless in grammar' when invoked after grammar
 324    reduction, or 'rule useless in parser due to conflicts' after conflicts
 325    were taken into account).  */
 326 void grammar_rules_useless_report (const char *message);
 327
 328 /* Free the packed grammar. */
 329 void grammar_free (void);
 330
 331 /* The version %required by the grammar file, as an int (100 * major +
 332    minor).  0 if unspecified.  */
 333 extern int required_version;
 334
 335 #endif /* !GRAM_H_ */