src/reduce.c

   1 /* Grammar reduction for Bison.
   2
   3    Copyright (C) 1988-1989, 2000-2003, 2005-2015, 2018-2021 Free
   4    Software Foundation, Inc.
   5
   6    This file is part of Bison, the GNU Compiler Compiler.
   7
   8    This program is free software: you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation, either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  20
  21
  22 /* Reduce the grammar: Find and eliminate unreachable terminals,
  23    nonterminals, and productions.  David S. Bakin.  */
  24
  25 /* Don't eliminate unreachable terminals: They may be used by the
  26    user's parser.  */
  27
  28 #include <config.h>
  29 #include "system.h"
  30
  31 #include <bitset.h>
  32
  33 #include "complain.h"
  34 #include "files.h"
  35 #include "getargs.h"
  36 #include "gram.h"
  37 #include "print-xml.h"
  38 #include "reader.h"
  39 #include "reduce.h"
  40 #include "symtab.h"
  41
  42 /* Set of nonterminals whose language is not empty.  */
  43 static bitset N;
  44
  45 /* Set of rules that have no useless nonterminals in their RHS.  */
  46 static bitset P;
  47
  48 /* Set of accessible symbols.  */
  49 static bitset V;
  50
  51 /* Set of symbols used to define rule precedence (so they are
  52    'useless', but no warning should be issued).  */
  53 static bitset V1;
  54
  55 int nuseless_productions;
  56 int nuseless_nonterminals;
  57 \f
  58 #define bitset_swap(Lhs, Rhs)                   \
  59   do {                                          \
  60     bitset lhs__ = Lhs;                         \
  61     Lhs = Rhs;                                  \
  62     Rhs = lhs__;                                \
  63   } while (0)
  64
  65 /*-------------------------------------------------------------------.
  66 | Another way to do this would be with a set for each production and |
  67 | then do subset tests against N0, but even for the C grammar the    |
  68 | whole reducing process takes only 2 seconds on my 8Mhz AT.         |
  69 `-------------------------------------------------------------------*/
  70
  71 static bool
  72 useful_production (rule_number r, bitset N0)
  73 {
  74   /* A production is useful if all of the nonterminals in its appear
  75      in the set of useful nonterminals.  */
  76
  77   for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
  78     if (ISVAR (*rhsp) && !bitset_test (N0, *rhsp - ntokens))
  79       return false;
  80   return true;
  81 }
  82
  83
  84 /*-----------------------------------------------------------------.
  85 | Compute N, the set of nonterminals whose language is not empty.  |
  86 |                                                                  |
  87 | Remember that rules are 1-origin, symbols are 0-origin.          |
  88 `-----------------------------------------------------------------*/
  89
  90 static void
  91 useless_nonterminals (void)
  92 {
  93   /* N is set as built.  Np is set being built this iteration. P is
  94      set of all productions which have a RHS all in N.  */
  95
  96   bitset Np = bitset_create (nnterms, BITSET_FIXED);
  97
  98   /* The set being computed is a set of nonterminals which can derive
  99      the empty string or strings consisting of all terminals. At each
 100      iteration a nonterminal is added to the set if there is a
 101      production with that nonterminal as its LHS for which all the
 102      nonterminals in its RHS are already in the set.  Iterate until
 103      the set being computed remains unchanged.  Any nonterminals not
 104      in the set at that point are useless in that they will never be
 105      used in deriving a sentence of the language.
 106
 107      This iteration doesn't use any special traversal over the
 108      productions.  A set is kept of all productions for which all the
 109      nonterminals in the RHS are in useful.  Only productions not in
 110      this set are scanned on each iteration.  At the end, this set is
 111      saved to be used when finding useful productions: only
 112      productions in this set will appear in the final grammar.  */
 113
 114   while (1)
 115     {
 116       bitset_copy (Np, N);
 117       for (rule_number r = 0; r < nrules; ++r)
 118         if (!bitset_test (P, r)
 119             && useful_production (r, N))
 120           {
 121             bitset_set (Np, rules[r].lhs->number - ntokens);
 122             bitset_set (P, r);
 123           }
 124       if (bitset_equal_p (N, Np))
 125         break;
 126       bitset_swap (N, Np);
 127     }
 128   bitset_free (N);
 129   N = Np;
 130 }
 131
 132
 133 static void
 134 inaccessable_symbols (void)
 135 {
 136   /* Find out which productions are reachable and which symbols are
 137      used.  Starting with an empty set of productions and a set of
 138      symbols which only has the start symbol in it, iterate over all
 139      productions until the set of productions remains unchanged for an
 140      iteration.  For each production which has a LHS in the set of
 141      reachable symbols, add the production to the set of reachable
 142      productions, and add all of the nonterminals in the RHS of the
 143      production to the set of reachable symbols.
 144
 145      Consider only the (partially) reduced grammar which has only
 146      nonterminals in N and productions in P.
 147
 148      The result is the set P of productions in the reduced grammar,
 149      and the set V of symbols in the reduced grammar.
 150
 151      Although this algorithm also computes the set of terminals which
 152      are reachable, no terminal will be deleted from the grammar. Some
 153      terminals might not be in the grammar but might be generated by
 154      semantic routines, and so the user might want them available with
 155      specified numbers.  (Is this true?)  However, the nonreachable
 156      terminals are printed (if running in verbose mode) so that the
 157      user can know.  */
 158
 159   bitset Vp = bitset_create (nsyms, BITSET_FIXED);
 160   bitset Pp = bitset_create (nrules, BITSET_FIXED);
 161
 162   /* If the start symbol isn't useful, then nothing will be useful. */
 163   if (bitset_test (N, acceptsymbol->content->number - ntokens))
 164     {
 165       bitset_set (V, acceptsymbol->content->number);
 166
 167       while (1)
 168         {
 169           bitset_copy (Vp, V);
 170           for (rule_number r = 0; r < nrules; ++r)
 171             if (!bitset_test (Pp, r)
 172                 && bitset_test (P, r)
 173                 && bitset_test (V, rules[r].lhs->number))
 174               {
 175                 for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
 176                   if (ISTOKEN (*rhsp) || bitset_test (N, *rhsp - ntokens))
 177                     bitset_set (Vp, *rhsp);
 178                 bitset_set (Pp, r);
 179               }
 180           if (bitset_equal_p (V, Vp))
 181             break;
 182           bitset_swap (V, Vp);
 183         }
 184     }
 185
 186   bitset_free (V);
 187   V = Vp;
 188
 189   /* These tokens (numbered 0, 1, and 2) are internal to Bison.
 190      Consider them useful. */
 191   bitset_set (V, eoftoken->content->number);   /* end-of-input token */
 192   bitset_set (V, errtoken->content->number);   /* error token */
 193   bitset_set (V, undeftoken->content->number); /* some undefined token */
 194
 195   bitset_free (P);
 196   P = Pp;
 197
 198   int nuseful_productions = bitset_count (P);
 199   nuseless_productions = nrules - nuseful_productions;
 200
 201   int nuseful_nonterminals = 0;
 202   for (symbol_number i = ntokens; i < nsyms; ++i)
 203     nuseful_nonterminals += bitset_test (V, i);
 204   nuseless_nonterminals = nnterms - nuseful_nonterminals;
 205
 206   /* A token that was used in %prec should not be warned about.  */
 207   for (rule_number r = 0; r < nrules; ++r)
 208     if (rules[r].precsym != 0)
 209       bitset_set (V1, rules[r].precsym->number);
 210 }
 211
 212
 213 /*-------------------------------------------------------------------.
 214 | Put the useless productions at the end of RULES, and adjust NRULES |
 215 | accordingly.                                                       |
 216 `-------------------------------------------------------------------*/
 217
 218 static void
 219 reduce_grammar_tables (void)
 220 {
 221   /* Report and flag useless productions.  */
 222   {
 223     for (rule_number r = 0; r < nrules; ++r)
 224       rules[r].useful = bitset_test (P, r);
 225     grammar_rules_useless_report (_("rule useless in grammar"));
 226   }
 227
 228   /* Map the nonterminals to their new index: useful first, useless
 229      afterwards.  Kept for later report.  */
 230   {
 231     int useful = 0;
 232     int useless = nrules - nuseless_productions;
 233     rule *rules_sorted = xnmalloc (nrules, sizeof *rules_sorted);
 234     for (rule_number r = 0; r < nrules; ++r)
 235       rules_sorted[rules[r].useful ? useful++ : useless++] = rules[r];
 236     free (rules);
 237     rules = rules_sorted;
 238
 239     /* Renumber the rules markers in RITEMS.  */
 240     for (rule_number r = 0; r < nrules; ++r)
 241       {
 242         item_number *rhsp = rules[r].rhs;
 243         for (/* Nothing. */; 0 <= *rhsp; ++rhsp)
 244           continue;
 245         *rhsp = rule_number_as_item_number (r);
 246         rules[r].number = r;
 247       }
 248     nrules -= nuseless_productions;
 249   }
 250
 251   /* Adjust NRITEMS.  */
 252   for (rule_number r = nrules; r < nrules + nuseless_productions; ++r)
 253     nritems -= rule_rhs_length (&rules[r]) + 1;
 254 }
 255
 256
 257 /*------------------------------.
 258 | Remove useless nonterminals.  |
 259 `------------------------------*/
 260
 261 symbol_number *nterm_map = NULL;
 262
 263 static void
 264 nonterminals_reduce (void)
 265 {
 266   nterm_map = xnmalloc (nnterms, sizeof *nterm_map);
 267   /* Map the nonterminals to their new index: useful first, useless
 268      afterwards.  Kept for later report.  */
 269   {
 270     symbol_number n = ntokens;
 271     for (symbol_number i = ntokens; i < nsyms; ++i)
 272       if (bitset_test (V, i))
 273         nterm_map[i - ntokens] = n++;
 274     for (symbol_number i = ntokens; i < nsyms; ++i)
 275       if (!bitset_test (V, i))
 276         {
 277           nterm_map[i - ntokens] = n++;
 278           if (symbols[i]->content->status != used
 279               && symbols[i] != acceptsymbol)
 280             complain (&symbols[i]->location, Wother,
 281                       _("nonterminal useless in grammar: %s"),
 282                       symbols[i]->tag);
 283         }
 284   }
 285
 286   /* Shuffle elements of tables indexed by symbol number.  */
 287   {
 288     symbol **symbols_sorted = xnmalloc (nnterms, sizeof *symbols_sorted);
 289     for (symbol_number i = ntokens; i < nsyms; ++i)
 290       symbols[i]->content->number = nterm_map[i - ntokens];
 291     for (symbol_number i = ntokens; i < nsyms; ++i)
 292       symbols_sorted[nterm_map[i - ntokens] - ntokens] = symbols[i];
 293     for (symbol_number i = ntokens; i < nsyms; ++i)
 294       symbols[i] = symbols_sorted[i - ntokens];
 295     free (symbols_sorted);
 296   }
 297
 298   /* Update nonterminal numbers in the RHS of the rules.  LHS are
 299      pointers to the symbol structure, they don't need renumbering. */
 300   {
 301     for (rule_number r = 0; r < nrules; ++r)
 302       for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
 303         if (ISVAR (*rhsp))
 304           *rhsp = symbol_number_as_item_number (nterm_map[*rhsp - ntokens]);
 305     acceptsymbol->content->number = nterm_map[acceptsymbol->content->number - ntokens];
 306   }
 307
 308   nsyms -= nuseless_nonterminals;
 309   nnterms -= nuseless_nonterminals;
 310 }
 311
 312
 313 /*------------------------------------------------------------------.
 314 | Output the detailed results of the reductions.  For FILE.output.  |
 315 `------------------------------------------------------------------*/
 316
 317 void
 318 reduce_output (FILE *out)
 319 {
 320   if (nuseless_nonterminals)
 321     {
 322       fprintf (out, "%s\n\n", _("Nonterminals useless in grammar"));
 323       for (int i = 0; i < nuseless_nonterminals; ++i)
 324         fprintf (out, "    %s\n", symbols[nsyms + i]->tag);
 325       fputs ("\n\n", out);
 326     }
 327
 328   {
 329     bool b = false;
 330     for (int i = 0; i < ntokens; ++i)
 331       if (reduce_token_unused_in_grammar (i))
 332         {
 333           if (!b)
 334             fprintf (out, "%s\n\n", _("Terminals unused in grammar"));
 335           b = true;
 336           fprintf (out, "    %s\n", symbols[i]->tag);
 337         }
 338     if (b)
 339       fputs ("\n\n", out);
 340   }
 341
 342   if (nuseless_productions)
 343     grammar_rules_partial_print (out, _("Rules useless in grammar"),
 344                                  rule_useless_in_grammar_p);
 345 }
 346 \f
 347
 348 /*-------------------------------.
 349 | Report the results to STDERR.  |
 350 `-------------------------------*/
 351
 352 static void
 353 reduce_print (void)
 354 {
 355   if (nuseless_nonterminals)
 356     complain (NULL, Wother, ngettext ("%d nonterminal useless in grammar",
 357                                       "%d nonterminals useless in grammar",
 358                                       nuseless_nonterminals),
 359               nuseless_nonterminals);
 360   if (nuseless_productions)
 361     complain (NULL, Wother, ngettext ("%d rule useless in grammar",
 362                                       "%d rules useless in grammar",
 363                                       nuseless_productions),
 364               nuseless_productions);
 365 }
 366 \f
 367 void
 368 reduce_grammar (void)
 369 {
 370   /* Allocate the global sets used to compute the reduced grammar */
 371
 372   N = bitset_create (nnterms, BITSET_FIXED);
 373   P =  bitset_create (nrules, BITSET_FIXED);
 374   V = bitset_create (nsyms, BITSET_FIXED);
 375   V1 = bitset_create (nsyms, BITSET_FIXED);
 376
 377   useless_nonterminals ();
 378   inaccessable_symbols ();
 379
 380   /* Did we reduce something? */
 381   if (nuseless_nonterminals || nuseless_productions)
 382     {
 383       reduce_print ();
 384
 385       // Check that start symbols have non-empty languages.
 386       bool failure = false;
 387       for (symbol_list *list = start_symbols; list; list = list->next)
 388         if (!bitset_test (N, list->content.sym->content->number - ntokens))
 389           {
 390             failure = true;
 391             complain (&list->sym_loc, complaint,
 392                       _("start symbol %s does not derive any sentence"),
 393                       list->content.sym->tag);
 394           }
 395       if (failure)
 396         exit (EXIT_FAILURE);
 397
 398       /* First reduce the nonterminals, as they renumber themselves in the
 399          whole grammar.  If you change the order, nonterms would be
 400          renumbered only in the reduced grammar.  */
 401       if (nuseless_nonterminals)
 402         nonterminals_reduce ();
 403       if (nuseless_productions)
 404         reduce_grammar_tables ();
 405     }
 406
 407   if (trace_flag & trace_grammar)
 408     {
 409       grammar_dump (stderr, "Reduced Grammar");
 410
 411       fprintf (stderr, "reduced %s defines %d terminals, %d nonterminals"
 412                ", and %d productions.\n",
 413                grammar_file, ntokens, nnterms, nrules);
 414     }
 415 }
 416
 417 bool
 418 reduce_token_unused_in_grammar (symbol_number i)
 419 {
 420   aver (i < ntokens);
 421   return !bitset_test (V, i) && !bitset_test (V1, i);
 422 }
 423
 424 bool
 425 reduce_nonterminal_useless_in_grammar (const sym_content *sym)
 426 {
 427   symbol_number n = sym->number;
 428   aver (ntokens <= n && n < nsyms + nuseless_nonterminals);
 429   return nsyms <= n;
 430 }
 431
 432 /*-----------------------------------------------------------.
 433 | Free the global sets used to compute the reduced grammar.  |
 434 `-----------------------------------------------------------*/
 435
 436 void
 437 reduce_free (void)
 438 {
 439   bitset_free (N);
 440   bitset_free (V);
 441   bitset_free (V1);
 442   bitset_free (P);
 443   free (nterm_map);
 444   nterm_map = NULL;
 445 }