src/reader.c

   1 /* Input parser for Bison
   2
   3    Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007,
   4    2009-2015, 2018-2021 Free Software Foundation, Inc.
   5
   6    This file is part of Bison, the GNU Compiler Compiler.
   7
   8    This program is free software: you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation, either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  20
  21 #include <config.h>
  22 #include "system.h"
  23
  24 #include <quote.h>
  25 #include <vasnprintf.h>
  26
  27 #include "complain.h"
  28 #include "conflicts.h"
  29 #include "files.h"
  30 #include "fixits.h"
  31 #include "getargs.h"
  32 #include "gram.h"
  33 #include "muscle-tab.h"
  34 #include "reader.h"
  35 #include "symlist.h"
  36 #include "symtab.h"
  37 #include "scan-gram.h"
  38 #include "scan-code.h"
  39
  40 static void prepare_percent_define_front_end_variables (void);
  41 static void check_and_convert_grammar (void);
  42
  43 static symbol_list *grammar = NULL;
  44 symbol_list *start_symbols = NULL;
  45 merger_list *merge_functions = NULL;
  46
  47 /* Was %union seen?  */
  48 bool union_seen = false;
  49
  50 /* Should rules have a default precedence?  */
  51 bool default_prec = true;
  52 \f
  53
  54 void
  55 grammar_start_symbols_add (symbol_list *syms)
  56 {
  57   /* Report and ignore duplicates.  Append the others to START_SYMBOLS.  */
  58   symbol_list *last = symbol_list_last (start_symbols);
  59   for (symbol_list *l = syms; l && l->content.sym; /* nothing */)
  60     {
  61       /* Is there a previous definition?  */
  62       symbol_list *first = symbol_list_find_symbol (start_symbols, l->content.sym);
  63       if (first)
  64         {
  65           duplicate_directive ("%start", first->sym_loc, l->sym_loc);
  66           symbol_list *dupl = l;
  67           l = l->next;
  68           dupl->next = NULL;
  69           symbol_list_free (dupl);
  70         }
  71       else
  72         {
  73           if (last)
  74             {
  75               last->next = l;
  76               last = l;
  77             }
  78           else
  79             {
  80               last = l;
  81               start_symbols = last;
  82             }
  83           symbol_list *next = l->next;
  84           l->next = NULL;
  85           l = next;
  86         }
  87     }
  88 }
  89
  90 \f
  91
  92 /*------------------------------------------------------------------------.
  93 | Return the merger index for a merging function named NAME.  Records the |
  94 | function, if new, in MERGER_LIST.                                       |
  95 `------------------------------------------------------------------------*/
  96
  97 static int
  98 get_merge_function (uniqstr name)
  99 {
 100   if (! glr_parser)
 101     return 0;
 102
 103   merger_list *syms;
 104   merger_list head;
 105   int n;
 106
 107   head.next = merge_functions;
 108   for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
 109     if (UNIQSTR_EQ (name, syms->next->name))
 110       break;
 111   if (syms->next == NULL)
 112     {
 113       syms->next = xmalloc (sizeof syms->next[0]);
 114       syms->next->name = uniqstr_new (name);
 115       /* After all symbol type declarations have been parsed, packgram invokes
 116          record_merge_function_type to set the type.  */
 117       syms->next->sym = NULL;
 118       syms->next->next = NULL;
 119       merge_functions = head.next;
 120     }
 121   return n;
 122 }
 123
 124 /*-------------------------------------------------------------------.
 125 | For the existing merging function with index MERGER, record that   |
 126 | the result type is that of SYM, as required by the lhs (i.e., SYM) |
 127 | of the rule whose %merge declaration is at DECLARATION_LOC.        |
 128 `-------------------------------------------------------------------*/
 129
 130 static void
 131 record_merge_function_type (int merger, symbol *sym, location declaration_loc)
 132 {
 133   if (merger <= 0)
 134     return;
 135
 136   uniqstr type
 137     = sym->content->type_name ? sym->content->type_name : uniqstr_new ("");
 138
 139   merger_list *merge_function;
 140   int merger_find = 1;
 141   for (merge_function = merge_functions;
 142        merge_function != NULL && merger_find != merger;
 143        merge_function = merge_function->next)
 144     merger_find += 1;
 145   aver (merge_function != NULL && merger_find == merger);
 146   if (merge_function->sym && merge_function->sym->content->type_name)
 147     {
 148       if (!UNIQSTR_EQ (merge_function->sym->content->type_name, type))
 149         {
 150           complain (&declaration_loc, complaint,
 151                     _("result type clash on merge function %s: "
 152                       "<%s> != <%s>"),
 153                     quote (merge_function->name), type,
 154                     merge_function->sym->content->type_name);
 155           subcomplain (&merge_function->type_declaration_loc, complaint,
 156                        _("previous declaration"));
 157         }
 158     }
 159   else
 160     {
 161       merge_function->sym = sym;
 162       merge_function->type_declaration_loc = declaration_loc;
 163     }
 164 }
 165
 166 /*--------------------------------------.
 167 | Free all merge-function definitions.  |
 168 `--------------------------------------*/
 169
 170 void
 171 free_merger_functions (void)
 172 {
 173   merger_list *L0 = merge_functions;
 174   while (L0)
 175     {
 176       merger_list *L1 = L0->next;
 177       free (L0);
 178       L0 = L1;
 179     }
 180 }
 181
 182 \f
 183 /*-------------------------------------------------------------------.
 184 | Parse the input grammar into a one symbol_list structure.  Each    |
 185 | rule is represented by a sequence of symbols: the left hand side   |
 186 | followed by the contents of the right hand side, followed by a     |
 187 | null pointer instead of a symbol to terminate the rule.  The next  |
 188 | symbol is the lhs of the following rule.                           |
 189 |                                                                    |
 190 | All actions are copied out, labelled by the rule number they apply |
 191 | to.                                                                |
 192 `-------------------------------------------------------------------*/
 193
 194 /* The (currently) last symbol of GRAMMAR. */
 195 static symbol_list *grammar_end = NULL;
 196
 197 /* Append SYM to the grammar.  */
 198 static symbol_list *
 199 grammar_symbol_append (symbol *sym, location loc)
 200 {
 201   symbol_list *p = symbol_list_sym_new (sym, loc);
 202
 203   if (grammar_end)
 204     grammar_end->next = p;
 205   else
 206     grammar = p;
 207
 208   grammar_end = p;
 209
 210   /* A null SYM stands for an end of rule; it is not an actual
 211      part of it.  */
 212   if (sym)
 213     ++nritems;
 214
 215   return p;
 216 }
 217
 218 static void
 219 assign_named_ref (symbol_list *p, named_ref *name)
 220 {
 221   symbol *sym = p->content.sym;
 222
 223   if (name->id == sym->tag)
 224     {
 225       complain (&name->loc, Wother,
 226                 _("duplicated symbol name for %s ignored"),
 227                 quote (sym->tag));
 228       named_ref_free (name);
 229     }
 230   else
 231     p->named_ref = name;
 232 }
 233
 234
 235 /* The rule currently being defined, and the previous rule.
 236    CURRENT_RULE points to the first LHS of the current rule, while
 237    PREVIOUS_RULE_END points to the *end* of the previous rule (NULL).  */
 238 static symbol_list *current_rule = NULL;
 239 static symbol_list *previous_rule_end = NULL;
 240
 241
 242 /*----------------------------------------------.
 243 | Create a new rule for LHS in to the GRAMMAR.  |
 244 `----------------------------------------------*/
 245
 246 void
 247 grammar_current_rule_begin (symbol *lhs, location loc,
 248                             named_ref *lhs_name)
 249 {
 250   /* Start a new rule and record its lhs.  */
 251   ++nrules;
 252   previous_rule_end = grammar_end;
 253
 254   current_rule = grammar_symbol_append (lhs, loc);
 255   if (lhs_name)
 256     assign_named_ref (current_rule, named_ref_copy (lhs_name));
 257
 258   /* Mark the rule's lhs as a nonterminal if not already so.  */
 259   if (lhs->content->class == unknown_sym || lhs->content->class == pct_type_sym)
 260     symbol_class_set (lhs, nterm_sym, empty_loc, false);
 261   else if (lhs->content->class == token_sym)
 262     complain (&loc, complaint, _("rule given for %s, which is a token"),
 263               lhs->tag);
 264 }
 265
 266
 267 /*----------------------------------------------------------------------.
 268 | A symbol should be used if either:                                    |
 269 |   1. It has a destructor.                                             |
 270 |   2. The symbol is a midrule symbol (i.e., the generated LHS          |
 271 |      replacing a midrule action) that was assigned to or used, as in  |
 272 |      "exp: { $$ = 1; } { $$ = $1; }".                                 |
 273 `----------------------------------------------------------------------*/
 274
 275 static bool
 276 symbol_should_be_used (symbol_list const *s, bool *midrule_warning)
 277 {
 278   if (symbol_code_props_get (s->content.sym, destructor)->code)
 279     return true;
 280   if ((s->midrule && s->midrule->action_props.is_value_used)
 281       || (s->midrule_parent_rule
 282           && (symbol_list_n_get (s->midrule_parent_rule,
 283                                  s->midrule_parent_rhs_index)
 284               ->action_props.is_value_used)))
 285     {
 286       *midrule_warning = true;
 287       return true;
 288     }
 289   return false;
 290 }
 291
 292 /*-----------------------------------------------------------------.
 293 | Check that the rule R is properly defined.  For instance, there  |
 294 | should be no type clash on the default action.  Possibly install |
 295 | the default action.                                              |
 296 `-----------------------------------------------------------------*/
 297
 298 static void
 299 grammar_rule_check_and_complete (symbol_list *r)
 300 {
 301   const symbol *lhs = r->content.sym;
 302   const symbol *first_rhs = r->next->content.sym;
 303
 304   /* Type check.
 305
 306      If there is an action, then there is nothing we can do: the user
 307      is allowed to shoot herself in the foot.
 308
 309      Don't worry about the default action if $$ is untyped, since $$'s
 310      value can't be used.  */
 311   if (!r->action_props.code && lhs->content->type_name)
 312     {
 313       /* If $$ is being set in default way, report if any type mismatch.  */
 314       if (first_rhs)
 315         {
 316           char const *lhs_type = lhs->content->type_name;
 317           char const *rhs_type =
 318             first_rhs->content->type_name ? first_rhs->content->type_name : "";
 319           if (!UNIQSTR_EQ (lhs_type, rhs_type))
 320             complain (&r->rhs_loc, Wother,
 321                       _("type clash on default action: <%s> != <%s>"),
 322                       lhs_type, rhs_type);
 323           else
 324             {
 325               /* Install the default action only for C++.  */
 326               const bool is_cxx =
 327                 STREQ (language->language, "c++")
 328                 || (skeleton && (STREQ (skeleton, "glr.cc")
 329                                  || STREQ (skeleton, "glr2.cc")
 330                                  || STREQ (skeleton, "lalr1.cc")));
 331               if (is_cxx)
 332                 {
 333                   code_props_rule_action_init (&r->action_props, "{ $$ = $1; }",
 334                                                r->rhs_loc, r,
 335                                                /* name */ NULL,
 336                                                /* type */ NULL,
 337                                                /* is_predicate */ false);
 338                   code_props_translate_code (&r->action_props);
 339                 }
 340             }
 341         }
 342       /* Warn if there is no default for $$ but we need one.  */
 343       else
 344         complain (&r->rhs_loc, Wother,
 345                   _("empty rule for typed nonterminal, and no action"));
 346     }
 347
 348   /* For each start symbol, build the action of its start rule.  Use
 349      the same obstack as the one used by scan-code, which is in charge
 350      of actions. */
 351   const bool multistart = start_symbols && start_symbols->next;
 352   if (multistart && lhs == acceptsymbol)
 353     {
 354       const symbol *start = r->next->next->content.sym;
 355       if (start->content->type_name)
 356         obstack_printf (obstack_for_actions,
 357                         "{ ]b4_accept""([%s%d])[; }",
 358                         start->content->class == nterm_sym ? "orig " : "",
 359                         start->content->number);
 360       else
 361         obstack_printf (obstack_for_actions,
 362                         "{ ]b4_accept[; }");
 363       code_props_rule_action_init (&r->action_props,
 364                                    obstack_finish0 (obstack_for_actions),
 365                                    r->rhs_loc, r,
 366                                    /* name */ NULL,
 367                                    /* type */ NULL,
 368                                    /* is_predicate */ false);
 369     }
 370
 371
 372   /* Check that symbol values that should be used are in fact used.
 373      Don't check the generated start rules.  It has no action, so some
 374      rhs symbols may appear unused, but the parsing algorithm ensures
 375      that %destructor's are invoked appropriately.  */
 376   if (lhs != acceptsymbol)
 377     {
 378       int n = 0;
 379       for (symbol_list const *l = r; l && l->content.sym; l = l->next, ++n)
 380         {
 381           bool midrule_warning = false;
 382           if (!l->action_props.is_value_used
 383               && symbol_should_be_used (l, &midrule_warning)
 384               /* The default action, $$ = $1, 'uses' both.  */
 385               && (r->action_props.code || (n != 0 && n != 1)))
 386             {
 387               warnings warn_flag = midrule_warning ? Wmidrule_values : Wother;
 388               if (n)
 389                 complain (&l->sym_loc, warn_flag, _("unused value: $%d"), n);
 390               else
 391                 complain (&l->rhs_loc, warn_flag, _("unset value: $$"));
 392             }
 393         }
 394     }
 395
 396   /* Check that %empty => empty rule.  */
 397   if (r->percent_empty_loc.start.file
 398       && r->next && r->next->content.sym)
 399     {
 400       complain (&r->percent_empty_loc, complaint,
 401                 _("%%empty on non-empty rule"));
 402       fixits_register (&r->percent_empty_loc, "");
 403     }
 404
 405   /* Check that empty rule => %empty.  */
 406   if (!(r->next && r->next->content.sym)
 407       && !r->midrule_parent_rule
 408       && !r->percent_empty_loc.start.file
 409       && warning_is_enabled (Wempty_rule))
 410     {
 411       complain (&r->rhs_loc, Wempty_rule, _("empty rule without %%empty"));
 412       if (feature_flag & feature_caret)
 413         location_caret_suggestion (r->rhs_loc, "%empty", stderr);
 414       location loc = r->rhs_loc;
 415       loc.end = loc.start;
 416       fixits_register (&loc, " %empty ");
 417     }
 418
 419   /* See comments in grammar_current_rule_prec_set for how POSIX
 420      mandates this complaint.  It's only for identifiers, so skip
 421      it for char literals and strings, which are always tokens.  */
 422   if (r->ruleprec
 423       && r->ruleprec->tag[0] != '\'' && r->ruleprec->tag[0] != '"'
 424       && r->ruleprec->content->status != declared
 425       && !r->ruleprec->content->prec)
 426     complain (&r->rhs_loc, Wother,
 427               _("token for %%prec is not defined: %s"), r->ruleprec->tag);
 428
 429   /* Check that the (main) action was not typed.  */
 430   if (r->action_props.type)
 431     complain (&r->rhs_loc, Wother,
 432               _("only midrule actions can be typed: %s"), r->action_props.type);
 433 }
 434
 435
 436 /*-------------------------------------.
 437 | End the currently being grown rule.  |
 438 `-------------------------------------*/
 439
 440 void
 441 grammar_current_rule_end (location loc)
 442 {
 443   /* Put an empty link in the list to mark the end of this rule  */
 444   grammar_symbol_append (NULL, grammar_end->rhs_loc);
 445   current_rule->rhs_loc = loc;
 446 }
 447
 448
 449 /*-------------------------------------------------------------------.
 450 | The previous action turns out to be a midrule action.  Attach it   |
 451 | to the current rule, i.e., create a dummy symbol, attach it this   |
 452 | midrule action, and append this dummy nonterminal to the current   |
 453 | rule.                                                              |
 454 `-------------------------------------------------------------------*/
 455
 456 void
 457 grammar_midrule_action (void)
 458 {
 459   /* Since the action was written out with this rule's number, we must
 460      give the new rule this number by inserting the new rule before
 461      it.  */
 462
 463   /* Make a DUMMY nonterminal, whose location is that of the midrule
 464      action.  Create the MIDRULE.  */
 465   location dummy_loc = current_rule->action_props.location;
 466   symbol *dummy = dummy_symbol_get (dummy_loc);
 467   symbol_type_set (dummy,
 468                    current_rule->action_props.type, current_rule->action_props.location);
 469   symbol_list *midrule = symbol_list_sym_new (dummy, dummy_loc);
 470
 471   /* Remember named_ref of previous action. */
 472   named_ref *action_name = current_rule->action_props.named_ref;
 473
 474   /* Make a new rule, whose body is empty, before the current one, so
 475      that the action just read can belong to it.  */
 476   ++nrules;
 477   ++nritems;
 478   /* Attach its location and actions to that of the DUMMY.  */
 479   midrule->rhs_loc = dummy_loc;
 480   code_props_rule_action_init (&midrule->action_props,
 481                                current_rule->action_props.code,
 482                                current_rule->action_props.location,
 483                                midrule,
 484                                /* name_ref */ NULL,
 485                                /* type */ NULL,
 486                                current_rule->action_props.is_predicate);
 487   code_props_none_init (&current_rule->action_props);
 488
 489   midrule->expected_sr_conflicts = current_rule->expected_sr_conflicts;
 490   midrule->expected_rr_conflicts = current_rule->expected_rr_conflicts;
 491   current_rule->expected_sr_conflicts = -1;
 492   current_rule->expected_rr_conflicts = -1;
 493
 494   if (previous_rule_end)
 495     previous_rule_end->next = midrule;
 496   else
 497     grammar = midrule;
 498
 499   /* End the dummy's rule.  */
 500   midrule->next = symbol_list_sym_new (NULL, dummy_loc);
 501   midrule->next->next = current_rule;
 502
 503   previous_rule_end = midrule->next;
 504
 505   /* Insert the dummy nonterminal replacing the midrule action into
 506      the current rule.  Bind it to its dedicated rule.  */
 507   grammar_current_rule_symbol_append (dummy, dummy_loc,
 508                                       action_name);
 509   grammar_end->midrule = midrule;
 510   midrule->midrule_parent_rule = current_rule;
 511   midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next);
 512 }
 513
 514 /* Set the precedence symbol of the current rule to PRECSYM. */
 515
 516 void
 517 grammar_current_rule_prec_set (symbol *precsym, location loc)
 518 {
 519   /* POSIX says that any identifier is a nonterminal if it does not
 520      appear on the LHS of a grammar rule and is not defined by %token
 521      or by one of the directives that assigns precedence to a token.
 522      We ignore this here because the only kind of identifier that
 523      POSIX allows to follow a %prec is a token and because assuming
 524      it's a token now can produce more logical error messages.
 525      Nevertheless, grammar_rule_check_and_complete does obey what we
 526      believe is the real intent of POSIX here: that an error be
 527      reported for any identifier that appears after %prec but that is
 528      not defined separately as a token.  */
 529   symbol_class_set (precsym, token_sym, loc, false);
 530   if (current_rule->ruleprec)
 531     duplicate_rule_directive ("%prec",
 532                               current_rule->ruleprec->location, loc);
 533   else
 534     current_rule->ruleprec = precsym;
 535 }
 536
 537 /* Set %empty for the current rule. */
 538
 539 void
 540 grammar_current_rule_empty_set (location loc)
 541 {
 542   /* If %empty is used and -Wno-empty-rule is not, then enable
 543      -Wempty-rule.  */
 544   if (warning_is_unset (Wempty_rule))
 545     warning_argmatch ("empty-rule", 0, 0);
 546   if (current_rule->percent_empty_loc.start.file)
 547     duplicate_rule_directive ("%empty",
 548                               current_rule->percent_empty_loc, loc);
 549   else
 550     current_rule->percent_empty_loc = loc;
 551 }
 552
 553 /* Attach dynamic precedence DPREC to the current rule. */
 554
 555 void
 556 grammar_current_rule_dprec_set (int dprec, location loc)
 557 {
 558   if (! glr_parser)
 559     complain (&loc, Wother, _("%s affects only GLR parsers"),
 560               "%dprec");
 561   if (dprec <= 0)
 562     complain (&loc, complaint, _("%s must be followed by positive number"),
 563               "%dprec");
 564   else if (current_rule->dprec != 0)
 565     duplicate_rule_directive ("%dprec",
 566                               current_rule->dprec_loc, loc);
 567   else
 568     {
 569       current_rule->dprec = dprec;
 570       current_rule->dprec_loc = loc;
 571     }
 572 }
 573
 574 /* Attach a merge function NAME with argument type TYPE to current
 575    rule. */
 576
 577 void
 578 grammar_current_rule_merge_set (uniqstr name, location loc)
 579 {
 580   if (! glr_parser)
 581     complain (&loc, Wother, _("%s affects only GLR parsers"),
 582               "%merge");
 583   if (current_rule->merger != 0)
 584     duplicate_rule_directive ("%merge",
 585                               current_rule->merger_declaration_loc, loc);
 586   else
 587     {
 588       current_rule->merger = get_merge_function (name);
 589       current_rule->merger_declaration_loc = loc;
 590     }
 591 }
 592
 593 /* Attach SYM to the current rule.  If needed, move the previous
 594    action as a midrule action.  */
 595
 596 void
 597 grammar_current_rule_symbol_append (symbol *sym, location loc,
 598                                     named_ref *name)
 599 {
 600   if (current_rule->action_props.code)
 601     grammar_midrule_action ();
 602   symbol_list *p = grammar_symbol_append (sym, loc);
 603   if (name)
 604     assign_named_ref (p, name);
 605   if (sym->content->status == undeclared || sym->content->status == used)
 606     sym->content->status = needed;
 607 }
 608
 609 void
 610 grammar_current_rule_action_append (const char *action, location loc,
 611                                     named_ref *name, uniqstr type)
 612 {
 613   if (current_rule->action_props.code)
 614     grammar_midrule_action ();
 615   if (type)
 616     complain (&loc, Wyacc,
 617               _("POSIX Yacc does not support typed midrule actions"));
 618   /* After all symbol declarations have been parsed, packgram invokes
 619      code_props_translate_code.  */
 620   code_props_rule_action_init (&current_rule->action_props, action, loc,
 621                                current_rule,
 622                                name, type,
 623                                /* is_predicate */ false);
 624 }
 625
 626 void
 627 grammar_current_rule_predicate_append (const char *pred, location loc)
 628 {
 629   if (current_rule->action_props.code)
 630     grammar_midrule_action ();
 631   code_props_rule_action_init (&current_rule->action_props, pred, loc,
 632                                current_rule,
 633                                NULL, NULL,
 634                                /* is_predicate */ true);
 635 }
 636
 637 /* Set the expected number of shift/reduce (reduce/reduce) conflicts
 638  * for the current rule.  If a midrule is encountered later, the count
 639  * is transferred to it and reset in the current rule to -1. */
 640
 641 void
 642 grammar_current_rule_expect_sr (int count, location loc)
 643 {
 644   (void) loc;
 645   current_rule->expected_sr_conflicts = count;
 646 }
 647
 648 void
 649 grammar_current_rule_expect_rr (int count, location loc)
 650 {
 651   if (! glr_parser)
 652     complain (&loc, Wother, _("%s affects only GLR parsers"),
 653               "%expect-rr");
 654   else
 655     current_rule->expected_rr_conflicts = count;
 656 }
 657
 658 \f
 659 /*---------------------------------------------.
 660 | Build RULES and RITEM from what was parsed.  |
 661 `---------------------------------------------*/
 662
 663 static void
 664 packgram (void)
 665 {
 666   int itemno = 0;
 667   ritem = xnmalloc (nritems + 1, sizeof *ritem);
 668   /* This sentinel is used by build_relations() in lalr.c.  */
 669   *ritem++ = 0;
 670
 671   rule_number ruleno = 0;
 672   rules = xnmalloc (nrules, sizeof *rules);
 673
 674   for (symbol_list *p = grammar; p; p = p->next)
 675     {
 676       symbol_list *lhs = p;
 677       record_merge_function_type (lhs->merger, lhs->content.sym,
 678                                   lhs->merger_declaration_loc);
 679       /* If the midrule's $$ is set or its $n is used, remove the '$' from the
 680          symbol name so that it's a user-defined symbol so that the default
 681          %destructor and %printer apply.  */
 682       if (lhs->midrule_parent_rule /* i.e., symbol_is_dummy (lhs->content.sym).  */
 683           && (lhs->action_props.is_value_used
 684               || (symbol_list_n_get (lhs->midrule_parent_rule,
 685                                      lhs->midrule_parent_rhs_index)
 686                   ->action_props.is_value_used)))
 687         lhs->content.sym->tag += 1;
 688
 689       grammar_rule_check_and_complete (lhs);
 690
 691       rules[ruleno].code = ruleno;
 692       rules[ruleno].number = ruleno;
 693       rules[ruleno].lhs = lhs->content.sym->content;
 694       rules[ruleno].rhs = ritem + itemno;
 695       rules[ruleno].prec = NULL;
 696       rules[ruleno].dprec = lhs->dprec;
 697       rules[ruleno].merger = lhs->merger;
 698       rules[ruleno].precsym = NULL;
 699       rules[ruleno].location = lhs->rhs_loc;
 700       rules[ruleno].useful = true;
 701       rules[ruleno].action = lhs->action_props.code;
 702       rules[ruleno].action_loc = lhs->action_props.location;
 703       rules[ruleno].is_predicate = lhs->action_props.is_predicate;
 704       rules[ruleno].expected_sr_conflicts = lhs->expected_sr_conflicts;
 705       rules[ruleno].expected_rr_conflicts = lhs->expected_rr_conflicts;
 706
 707       /* Traverse the rhs.  */
 708       {
 709         size_t rule_length = 0;
 710         for (p = lhs->next; p->content.sym; p = p->next)
 711           {
 712             ++rule_length;
 713
 714             /* Don't allow rule_length == INT_MAX, since that might
 715                cause confusion with strtol if INT_MAX == LONG_MAX.  */
 716             if (rule_length == INT_MAX)
 717               complain (&rules[ruleno].location, fatal, _("rule is too long"));
 718
 719             /* item_number = symbol_number.
 720                But the former needs to contain more: negative rule numbers. */
 721             ritem[itemno++] =
 722               symbol_number_as_item_number (p->content.sym->content->number);
 723             /* A rule gets by default the precedence and associativity
 724                of its last token.  */
 725             if (p->content.sym->content->class == token_sym && default_prec)
 726               rules[ruleno].prec = p->content.sym->content;
 727           }
 728       }
 729
 730       /* If this rule has a %prec,
 731          the specified symbol's precedence replaces the default.  */
 732       if (lhs->ruleprec)
 733         {
 734           rules[ruleno].precsym = lhs->ruleprec->content;
 735           rules[ruleno].prec = lhs->ruleprec->content;
 736         }
 737
 738       /* An item ends by the rule number (negated).  */
 739       ritem[itemno++] = rule_number_as_item_number (ruleno);
 740       aver (itemno < ITEM_NUMBER_MAX);
 741       ++ruleno;
 742       aver (ruleno < RULE_NUMBER_MAX);
 743     }
 744
 745   aver (itemno == nritems);
 746
 747   if (trace_flag & trace_sets)
 748     ritem_print (stderr);
 749 }
 750
 751
 752 /*--------------------------------------------------------------.
 753 | Read in the grammar specification and record it in the format |
 754 | described in gram.h.                                          |
 755 `--------------------------------------------------------------*/
 756
 757 void
 758 reader (const char *gram)
 759 {
 760   /* Set up symbol_table, semantic_type_table, and the built-in
 761      symbols.  */
 762   symbols_new ();
 763
 764   gram_scanner_open (gram);
 765   parser_init ();
 766   gram_parse ();
 767   gram_scanner_close ();
 768
 769   prepare_percent_define_front_end_variables ();
 770
 771   if (complaint_status  < status_complaint)
 772     check_and_convert_grammar ();
 773 }
 774
 775 static void
 776 prepare_percent_define_front_end_variables (void)
 777 {
 778   /* Set %define front-end variable defaults.  */
 779   muscle_percent_define_default ("lr.keep-unreachable-state", "false");
 780   {
 781     /* IELR would be a better default, but LALR is historically the
 782        default.  */
 783     muscle_percent_define_default ("lr.type", "lalr");
 784     char *lr_type = muscle_percent_define_get ("lr.type");
 785     if (STRNEQ (lr_type, "canonical-lr"))
 786       muscle_percent_define_default ("lr.default-reduction", "most");
 787     else
 788       muscle_percent_define_default ("lr.default-reduction", "accepting");
 789     free (lr_type);
 790   }
 791   muscle_percent_define_default ("tool.xsltproc", "xsltproc");
 792
 793   /* Check %define front-end variables.  */
 794   {
 795     static char const * const values[] =
 796       {
 797        "lr.type", "lr""(0)", "lalr", "ielr", "canonical-lr", NULL,
 798        "lr.default-reduction", "most", "consistent", "accepting", NULL,
 799        NULL
 800       };
 801     muscle_percent_define_check_values (values);
 802   }
 803 }
 804
 805 /* Find the first LHS which is not a dummy.  */
 806
 807 static symbol *
 808 find_start_symbol (void)
 809 {
 810   symbol_list *res = grammar;
 811   /* Skip all the possible dummy rules of the first rule.  */
 812   for (; symbol_is_dummy (res->content.sym); res = res->next)
 813     /* Skip the LHS, and then all the RHS of the dummy rule.  */
 814     for (res = res->next; res->content.sym; res = res->next)
 815       continue;
 816   return res->content.sym;
 817 }
 818
 819
 820 /* Insert an initial rule, whose location is that of the first rule
 821    (not that of the start symbol):
 822
 823    $accept: SWITCHING_TOKEN START $end.  */
 824 static void
 825 create_start_rule (symbol *swtok, symbol *start)
 826 {
 827   symbol_list *initial_rule = symbol_list_sym_new (acceptsymbol, empty_loc);
 828   initial_rule->rhs_loc = grammar->rhs_loc;
 829   symbol_list *p = initial_rule;
 830   if (swtok)
 831     {
 832       // Cannot create the action now, as the symbols have not yet
 833       // been assigned their number (by symbol_pack), which we need to
 834       // know the type name.  So the action is created in
 835       // grammar_rule_check_and_complete, which is run after
 836       // symbol_pack.
 837       p->next = symbol_list_sym_new (swtok, empty_loc);
 838       p = p->next;
 839     }
 840   p->next = symbol_list_sym_new (start, empty_loc);
 841   p = p->next;
 842   p->next = symbol_list_sym_new (eoftoken, empty_loc);
 843   p = p->next;
 844   p->next = symbol_list_sym_new (NULL, empty_loc);
 845   p = p->next;
 846   p->next = grammar;
 847   nrules += 1;
 848   nritems += 3 + !!swtok;
 849   grammar = initial_rule;
 850 }
 851
 852 /* Fetch (or create) a token "YY_PARSE_foo" for start symbol "foo".
 853
 854    We don't use the simple "YY_FOO" because (i) we might get clashes
 855    with some of our symbols (e.g., cast => YY_CAST), and (ii) upcasing
 856    introduces possible clashes between terminal FOO and nonterminal
 857    foo.  */
 858 symbol *
 859 switching_token (const symbol *start)
 860 {
 861   char buf[100];
 862   size_t len = sizeof buf;
 863   char *name = asnprintf (buf, &len, "YY_PARSE_%s", symbol_id_get (start));
 864   if (!name)
 865     xalloc_die ();
 866   // Setting the location ensures deterministic symbol numbers.
 867   symbol *res = symbol_get (name, start->location);
 868   if (name != buf)
 869     free (name);
 870   symbol_class_set (res, token_sym, start->location, false);
 871   return res;
 872 }
 873
 874 /* Create the start rules in reverse order, since they are inserted at
 875    the top of the grammar.  That way the rules follow the order of
 876    declaration to %start.  */
 877
 878 static void
 879 create_multiple_start_rules (symbol_list *start_syms)
 880 {
 881   if (start_syms)
 882     {
 883       create_multiple_start_rules (start_syms->next);
 884       assert (start_syms->content_type == SYMLIST_SYMBOL);
 885       symbol *start = start_syms->content.sym;
 886       symbol *swtok = switching_token (start);
 887       create_start_rule (swtok, start);
 888     }
 889 }
 890
 891 /* For each start symbol "foo", create the rule "$accept: YY_FOO
 892    foo $end". */
 893 static void
 894 create_start_rules (void)
 895 {
 896   if (!start_symbols)
 897     {
 898       symbol *start = find_start_symbol ();
 899       start_symbols = symbol_list_sym_new (start, start->location);
 900     }
 901
 902   const bool several = start_symbols->next;
 903   if (several)
 904     create_multiple_start_rules (start_symbols);
 905   else
 906     {
 907       symbol *start = start_symbols->content.sym;
 908       create_start_rule (NULL, start);
 909     }
 910 }
 911
 912 static void
 913 check_start_symbols (void)
 914 {
 915   const bool multistart = start_symbols && start_symbols->next;
 916   // Sanity checks on the start symbols.
 917   for (symbol_list *list = start_symbols; list; list = list->next)
 918     {
 919       const symbol *start = list->content.sym;
 920       if (start->content->class == unknown_sym)
 921         {
 922           complain (&start->location, complaint,
 923                     _("the start symbol %s is undefined"),
 924                     start->tag);
 925           // I claim this situation is unreachable.  This is caught
 926           // before, and we get "symbol 'foo' is used, but is not
 927           // defined as a token and has no rules".
 928           abort ();
 929         }
 930       // If your only start symbol is a token, you're weird.
 931       if (!multistart && start->content->class == token_sym)
 932         complain (&start->location, complaint,
 933                   _("the start symbol %s is a token"),
 934                   start->tag);
 935     }
 936   if (complaint_status == status_complaint)
 937     exit (EXIT_FAILURE);
 938 }
 939
 940
 941 /*-------------------------------------------------------------.
 942 | Check the grammar that has just been read, and convert it to |
 943 | internal form.                                               |
 944 `-------------------------------------------------------------*/
 945
 946 static void
 947 check_and_convert_grammar (void)
 948 {
 949   /* Grammar has been read.  Do some checking.  */
 950   if (nrules == 0)
 951     complain (NULL, fatal, _("no rules in the input grammar"));
 952
 953   /* If the user did not define her EOFTOKEN, do it now. */
 954   if (!eoftoken)
 955     {
 956       eoftoken = symbol_get ("YYEOF", empty_loc);
 957       eoftoken->content->class = token_sym;
 958       eoftoken->content->number = 0;
 959       /* Value specified by POSIX.  */
 960       eoftoken->content->code = 0;
 961       {
 962         symbol *alias = symbol_get ("$end", empty_loc);
 963         symbol_class_set (alias, token_sym, empty_loc, false);
 964         symbol_make_alias (eoftoken, alias, empty_loc);
 965       }
 966     }
 967
 968   /* Insert the initial rule(s).  */
 969   create_start_rules ();
 970
 971   /* Report any undefined symbols and consider them nonterminals.  */
 972   symbols_check_defined ();
 973
 974   if (SYMBOL_NUMBER_MAXIMUM - nnterms < ntokens)
 975     complain (NULL, fatal, "too many symbols in input grammar (limit is %d)",
 976               SYMBOL_NUMBER_MAXIMUM);
 977
 978   nsyms = ntokens + nnterms;
 979
 980   /* Assign the symbols their symbol numbers.  */
 981   symbols_pack ();
 982
 983   check_start_symbols ();
 984
 985   /* Scan rule actions after invoking symbol_check_alias_consistency
 986      (in symbols_pack above) so that token types are set correctly
 987      before the rule action type checking.
 988
 989      Before invoking grammar_rule_check_and_complete (in packgram
 990      below) on any rule, make sure all actions have already been
 991      scanned in order to set 'used' flags.  Otherwise, checking that a
 992      midrule's $$ should be set will not always work properly because
 993      the check must forward-reference the midrule's parent rule.  For
 994      the same reason, all the 'used' flags must be set before checking
 995      whether to remove '$' from any midrule symbol name (also in
 996      packgram).  */
 997   for (symbol_list *sym = grammar; sym; sym = sym->next)
 998     code_props_translate_code (&sym->action_props);
 999
1000   /* Convert the grammar into the format described in gram.h.  */
1001   packgram ();
1002
1003   /* The grammar as a symbol_list is no longer needed. */
1004   symbol_list_free (grammar);
1005 }