src/parse-gram.y

   1 /* Bison Grammar Parser                             -*- C -*-
   2
   3    Copyright (C) 2002-2015, 2018-2020 Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    This program is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation, either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 %code requires
  21 {
  22   #include "symlist.h"
  23   #include "symtab.h"
  24 }
  25
  26 %code provides
  27 {
  28   /* Initialize unquote.  */
  29   void parser_init (void);
  30   /* Deallocate storage for unquote.  */
  31   void parser_free (void);
  32 }
  33
  34 %code top
  35 {
  36   /* On column 0 to please syntax-check.  */
  37 #include <config.h>
  38 }
  39
  40 %code
  41 {
  42   #include "system.h"
  43
  44   #include <c-ctype.h>
  45   #include <errno.h>
  46   #include <intprops.h>
  47   #include <quotearg.h>
  48   #include <vasnprintf.h>
  49   #include <xmemdup0.h>
  50
  51   #include "complain.h"
  52   #include "conflicts.h"
  53   #include "files.h"
  54   #include "getargs.h"
  55   #include "gram.h"
  56   #include "named-ref.h"
  57   #include "reader.h"
  58   #include "scan-code.h"
  59   #include "scan-gram.h"
  60
  61   /* Pretend to be at least that version, to check features published
  62      in that version while developping it.  */
  63   static const char* api_version = "3.7";
  64
  65   static int current_prec = 0;
  66   static location current_lhs_loc;
  67   static named_ref *current_lhs_named_ref;
  68   static symbol *current_lhs_symbol;
  69   static symbol_class current_class = unknown_sym;
  70
  71   /** Set the new current left-hand side symbol, possibly common
  72    * to several right-hand side parts of rule.
  73    */
  74   static void current_lhs (symbol *sym, location loc, named_ref *ref);
  75
  76   #define YYLLOC_DEFAULT(Current, Rhs, N)         \
  77     (Current) = lloc_default (Rhs, N)
  78   static YYLTYPE lloc_default (YYLTYPE const *, int);
  79
  80   #define YY_LOCATION_PRINT(File, Loc)            \
  81     location_print (Loc, File)
  82
  83   /* Strip initial '{' and final '}' (must be first and last characters).
  84      Return the result.  */
  85   static char *strip_braces (char *code);
  86
  87   /* Convert CODE by calling code_props_plain_init if PLAIN, otherwise
  88      code_props_symbol_action_init.  Calls
  89      gram_scanner_last_string_free to release the latest string from
  90      the scanner (should be CODE). */
  91   static char const *translate_code (char *code, location loc, bool plain);
  92
  93   /* Convert CODE by calling code_props_plain_init after having
  94      stripped the first and last characters (expected to be '{', and
  95      '}').  Calls gram_scanner_last_string_free to release the latest
  96      string from the scanner (should be CODE). */
  97   static char const *translate_code_braceless (char *code, location loc);
  98
  99   /* Handle a %defines directive.  */
 100   static void handle_defines (char const *value);
 101
 102   /* Handle a %error-verbose directive.  */
 103   static void handle_error_verbose (location const *loc, char const *directive);
 104
 105   /* Handle a %file-prefix directive.  */
 106   static void handle_file_prefix (location const *loc,
 107                                   location const *dir_loc,
 108                                   char const *directive, char const *value);
 109
 110   /* Handle a %language directive.  */
 111   static void handle_language (location const *loc, char const *lang);
 112
 113   /* Handle a %name-prefix directive.  */
 114   static void handle_name_prefix (location const *loc,
 115                                   char const *directive, char const *value);
 116
 117   /* Handle a %pure-parser directive.  */
 118   static void handle_pure_parser (location const *loc, char const *directive);
 119
 120   /* Handle a %require directive.  */
 121   static void handle_require (location const *loc, char const *version);
 122
 123   /* Handle a %skeleton directive.  */
 124   static void handle_skeleton (location const *loc, char const *skel);
 125
 126   /* Handle a %yacc directive.  */
 127   static void handle_yacc (location const *loc);
 128
 129   /* Implementation of yyerror.  */
 130   static void gram_error (location const *, char const *);
 131
 132   /* A string that describes a char (e.g., 'a' -> "'a'").  */
 133   static char const *char_name (char);
 134
 135   /* Add style to semantic values in traces.  */
 136   static void tron (FILE *yyo);
 137   static void troff (FILE *yyo);
 138
 139   /* Interpret a quoted string (such as `"Hello, \"World\"\n\""`).
 140      Manages the memory of the result.  */
 141   static char *unquote (const char *str);
 142
 143   /* Discard the latest unquoted string.  */
 144   static void unquote_free (char *last_string);
 145 }
 146
 147 %define api.header.include {"parse-gram.h"}
 148 %define api.prefix {gram_}
 149 %define api.pure full
 150 %define api.token.raw
 151 %define api.value.type union
 152 %define locations
 153 %define parse.error custom
 154 %define parse.lac full
 155 %define parse.trace
 156 %defines
 157 %expect 0
 158 %verbose
 159
 160 %initial-action
 161 {
 162   /* Bison's grammar can initial empty locations, hence a default
 163      location is needed. */
 164   boundary_set (&@$.start, grammar_file, 1, 1, 1);
 165   boundary_set (&@$.end, grammar_file, 1, 1, 1);
 166 }
 167
 168 %token
 169   STRING              _("string")
 170   TSTRING             _("translatable string")
 171
 172   PERCENT_TOKEN       "%token"
 173   PERCENT_NTERM       "%nterm"
 174
 175   PERCENT_TYPE        "%type"
 176   PERCENT_DESTRUCTOR  "%destructor"
 177   PERCENT_PRINTER     "%printer"
 178
 179   PERCENT_LEFT        "%left"
 180   PERCENT_RIGHT       "%right"
 181   PERCENT_NONASSOC    "%nonassoc"
 182   PERCENT_PRECEDENCE  "%precedence"
 183
 184   PERCENT_PREC        "%prec"
 185   PERCENT_DPREC       "%dprec"
 186   PERCENT_MERGE       "%merge"
 187
 188   PERCENT_CODE            "%code"
 189   PERCENT_DEFAULT_PREC    "%default-prec"
 190   PERCENT_DEFINE          "%define"
 191   PERCENT_DEFINES         "%defines"
 192   PERCENT_ERROR_VERBOSE   "%error-verbose"
 193   PERCENT_EXPECT          "%expect"
 194   PERCENT_EXPECT_RR       "%expect-rr"
 195   PERCENT_FLAG            "%<flag>"
 196   PERCENT_FILE_PREFIX     "%file-prefix"
 197   PERCENT_GLR_PARSER      "%glr-parser"
 198   PERCENT_INITIAL_ACTION  "%initial-action"
 199   PERCENT_LANGUAGE        "%language"
 200   PERCENT_NAME_PREFIX     "%name-prefix"
 201   PERCENT_NO_DEFAULT_PREC "%no-default-prec"
 202   PERCENT_NO_LINES        "%no-lines"
 203   PERCENT_NONDETERMINISTIC_PARSER
 204                           "%nondeterministic-parser"
 205   PERCENT_OUTPUT          "%output"
 206   PERCENT_PURE_PARSER     "%pure-parser"
 207   PERCENT_REQUIRE         "%require"
 208   PERCENT_SKELETON        "%skeleton"
 209   PERCENT_START           "%start"
 210   PERCENT_TOKEN_TABLE     "%token-table"
 211   PERCENT_VERBOSE         "%verbose"
 212   PERCENT_YACC            "%yacc"
 213
 214   BRACED_CODE       "{...}"
 215   BRACED_PREDICATE  "%?{...}"
 216   BRACKETED_ID      _("[identifier]")
 217   CHAR              _("character literal")
 218   COLON             ":"
 219   EPILOGUE          _("epilogue")
 220   EQUAL             "="
 221   ID                _("identifier")
 222   ID_COLON          _("identifier:")
 223   PERCENT_PERCENT   "%%"
 224   PIPE              "|"
 225   PROLOGUE          "%{...%}"
 226   SEMICOLON         ";"
 227   TAG               _("<tag>")
 228   TAG_ANY           "<*>"
 229   TAG_NONE          "<>"
 230
 231  /* Experimental feature, don't rely on it.  */
 232 %code pre-printer  {tron (yyo);}
 233 %code post-printer {troff (yyo);}
 234
 235 %type <unsigned char> CHAR
 236 %printer { fputs (char_name ($$), yyo); } <unsigned char>
 237
 238 %type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING TSTRING
 239 %printer { fputs ($$, yyo); } <char*>
 240
 241 %type <uniqstr>
 242   BRACKETED_ID ID ID_COLON
 243   PERCENT_ERROR_VERBOSE PERCENT_FILE_PREFIX PERCENT_FLAG PERCENT_NAME_PREFIX
 244   PERCENT_PURE_PARSER
 245   TAG tag tag.opt variable
 246 %printer { fputs ($$, yyo); } <uniqstr>
 247 %printer { fprintf (yyo, "[%s]", $$); } BRACKETED_ID
 248 %printer { fprintf (yyo, "%s:", $$); } ID_COLON
 249 %printer { fprintf (yyo, "%%%s", $$); } PERCENT_FLAG
 250 %printer { fprintf (yyo, "<%s>", $$); } TAG tag
 251
 252 %token <int> INT _("integer literal")
 253 %printer { fprintf (yyo, "%d", $$); } <int>
 254
 255 %type <symbol*> id id_colon string_as_id symbol token_decl token_decl_for_prec
 256 %printer { fprintf (yyo, "%s", $$ ? $$->tag : "<NULL>"); } <symbol*>
 257 %printer { fprintf (yyo, "%s:", $$->tag); } id_colon
 258
 259 %type <assoc> precedence_declarator
 260
 261 %destructor { symbol_list_free ($$); } <symbol_list*>
 262 %printer { symbol_list_syms_print ($$, yyo); } <symbol_list*>
 263
 264 %type <named_ref*> named_ref.opt
 265
 266 /*---------.
 267 | %param.  |
 268 `---------*/
 269 %code requires
 270 {
 271   typedef enum
 272   {
 273     param_none   = 0,
 274     param_lex    = 1 << 0,
 275     param_parse  = 1 << 1,
 276     param_both   = param_lex | param_parse
 277   } param_type;
 278 };
 279 %code
 280 {
 281   /** Add a lex-param and/or a parse-param.
 282    *
 283    * \param type  where to push this formal argument.
 284    * \param decl  the formal argument.  Destroyed.
 285    * \param loc   the location in the source.
 286    */
 287   static void add_param (param_type type, char *decl, location loc);
 288   static param_type current_param = param_none;
 289 };
 290 %token <param_type> PERCENT_PARAM "%param";
 291 %printer
 292 {
 293   switch ($$)
 294     {
 295 #define CASE(In, Out)                                           \
 296       case param_ ## In: fputs ("%" #Out, yyo); break
 297       CASE (lex,   lex-param);
 298       CASE (parse, parse-param);
 299       CASE (both,  param);
 300 #undef CASE
 301       case param_none: aver (false); break;
 302     }
 303 } <param_type>;
 304
 305
 306                      /*==========\
 307                      | Grammar.  |
 308                      \==========*/
 309 %%
 310
 311 input:
 312   prologue_declarations "%%" grammar epilogue.opt
 313 ;
 314
 315
 316         /*------------------------------------.
 317         | Declarations: before the first %%.  |
 318         `------------------------------------*/
 319
 320 prologue_declarations:
 321   %empty
 322 | prologue_declarations prologue_declaration
 323 ;
 324
 325 prologue_declaration:
 326   grammar_declaration
 327 | "%{...%}"
 328     {
 329       muscle_code_grow (union_seen ? "post_prologue" : "pre_prologue",
 330                         translate_code ($1, @1, true), @1);
 331       code_scanner_last_string_free ();
 332     }
 333 | "%<flag>"
 334     {
 335       muscle_percent_define_ensure ($1, @1, true);
 336     }
 337 | "%define" variable value
 338     {
 339       muscle_percent_define_insert ($2, @$, $3.kind, $3.chars,
 340                                     MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE);
 341     }
 342 | "%defines"                       { defines_flag = true; }
 343 | "%defines" STRING                { handle_defines ($2); }
 344 | "%error-verbose"                 { handle_error_verbose (&@$, $1); }
 345 | "%expect" INT                    { expected_sr_conflicts = $2; }
 346 | "%expect-rr" INT                 { expected_rr_conflicts = $2; }
 347 | "%file-prefix" STRING            { handle_file_prefix (&@$, &@1, $1, $2); }
 348 | "%glr-parser"
 349     {
 350       nondeterministic_parser = true;
 351       glr_parser = true;
 352     }
 353 | "%initial-action" "{...}"
 354     {
 355       muscle_code_grow ("initial_action", translate_code ($2, @2, false), @2);
 356       code_scanner_last_string_free ();
 357     }
 358 | "%language" STRING            { handle_language (&@1, $2); }
 359 | "%name-prefix" STRING         { handle_name_prefix (&@$, $1, $2); }
 360 | "%no-lines"                   { no_lines_flag = true; }
 361 | "%nondeterministic-parser"    { nondeterministic_parser = true; }
 362 | "%output" STRING              { spec_outfile = unquote ($2); gram_scanner_last_string_free (); }
 363 | "%param" { current_param = $1; } params { current_param = param_none; }
 364 | "%pure-parser"                { handle_pure_parser (&@$, $1); }
 365 | "%require" STRING             { handle_require (&@2, $2); }
 366 | "%skeleton" STRING            { handle_skeleton (&@2, $2); }
 367 | "%token-table"                { token_table_flag = true; }
 368 | "%verbose"                    { report_flag |= report_states; }
 369 | "%yacc"                       { handle_yacc (&@$); }
 370 | error ";"                     { current_class = unknown_sym; yyerrok; }
 371 | /*FIXME: Err?  What is this horror doing here? */ ";"
 372 ;
 373
 374 params:
 375    params "{...}"  { add_param (current_param, $2, @2); }
 376 | "{...}"          { add_param (current_param, $1, @1); }
 377 ;
 378
 379
 380 /*----------------------.
 381 | grammar_declaration.  |
 382 `----------------------*/
 383
 384 grammar_declaration:
 385   symbol_declaration
 386 | "%start" symbol
 387     {
 388       grammar_start_symbol_set ($2, @2);
 389     }
 390 | code_props_type "{...}" generic_symlist
 391     {
 392       code_props code;
 393       code_props_symbol_action_init (&code, $2, @2);
 394       code_props_translate_code (&code);
 395       {
 396         for (symbol_list *list = $3; list; list = list->next)
 397           symbol_list_code_props_set (list, $1, &code);
 398         symbol_list_free ($3);
 399       }
 400     }
 401 | "%default-prec"
 402     {
 403       default_prec = true;
 404     }
 405 | "%no-default-prec"
 406     {
 407       default_prec = false;
 408     }
 409 | "%code" "{...}"
 410     {
 411       /* Do not invoke muscle_percent_code_grow here since it invokes
 412          muscle_user_name_list_grow.  */
 413       muscle_code_grow ("percent_code()",
 414                         translate_code_braceless ($2, @2), @2);
 415       code_scanner_last_string_free ();
 416     }
 417 | "%code" ID "{...}"
 418     {
 419       muscle_percent_code_grow ($2, @2, translate_code_braceless ($3, @3), @3);
 420       code_scanner_last_string_free ();
 421     }
 422 ;
 423
 424 %type <code_props_type> code_props_type;
 425 %printer { fprintf (yyo, "%s", code_props_type_string ($$)); } <code_props_type>;
 426 code_props_type:
 427   "%destructor"  { $$ = destructor; }
 428 | "%printer"     { $$ = printer; }
 429 ;
 430
 431 /*---------.
 432 | %union.  |
 433 `---------*/
 434
 435 %token PERCENT_UNION "%union";
 436
 437 union_name:
 438   %empty {}
 439 | ID     { muscle_percent_define_insert ("api.value.union.name",
 440                                          @1, muscle_keyword, $1,
 441                                          MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE); }
 442 ;
 443
 444 grammar_declaration:
 445   "%union" union_name "{...}"
 446     {
 447       union_seen = true;
 448       muscle_code_grow ("union_members", translate_code_braceless ($3, @3), @3);
 449       code_scanner_last_string_free ();
 450     }
 451 ;
 452
 453
 454 %type <symbol_list*> nterm_decls symbol_decls symbol_decl.1
 455       token_decls token_decls_for_prec
 456       token_decl.1 token_decl_for_prec.1;
 457 symbol_declaration:
 458   "%nterm" { current_class = nterm_sym; } nterm_decls[syms]
 459     {
 460       current_class = unknown_sym;
 461       symbol_list_free ($syms);
 462     }
 463 | "%token" { current_class = token_sym; } token_decls[syms]
 464     {
 465       current_class = unknown_sym;
 466       symbol_list_free ($syms);
 467     }
 468 | "%type" symbol_decls[syms]
 469     {
 470       symbol_list_free ($syms);
 471     }
 472 | precedence_declarator token_decls_for_prec[syms]
 473     {
 474       ++current_prec;
 475       for (symbol_list *list = $syms; list; list = list->next)
 476         symbol_precedence_set (list->content.sym, current_prec, $1, @1);
 477       symbol_list_free ($syms);
 478     }
 479 ;
 480
 481 precedence_declarator:
 482   "%left"       { $$ = left_assoc; }
 483 | "%right"      { $$ = right_assoc; }
 484 | "%nonassoc"   { $$ = non_assoc; }
 485 | "%precedence" { $$ = precedence_assoc; }
 486 ;
 487
 488 tag.opt:
 489   %empty { $$ = NULL; }
 490 | TAG    { $$ = $1; }
 491 ;
 492
 493 %type <symbol_list*> generic_symlist generic_symlist_item;
 494 generic_symlist:
 495   generic_symlist_item
 496 | generic_symlist generic_symlist_item   { $$ = symbol_list_append ($1, $2); }
 497 ;
 498
 499 generic_symlist_item:
 500   symbol    { $$ = symbol_list_sym_new ($1, @1); }
 501 | tag       { $$ = symbol_list_type_new ($1, @1); }
 502 ;
 503
 504 tag:
 505   TAG
 506 | "<*>" { $$ = uniqstr_new ("*"); }
 507 | "<>"  { $$ = uniqstr_new (""); }
 508 ;
 509
 510 /*-----------------------.
 511 | nterm_decls (%nterm).  |
 512 `-----------------------*/
 513
 514 // A non empty list of possibly tagged symbols for %nterm.
 515 //
 516 // Can easily be defined like symbol_decls but restricted to ID, but
 517 // using token_decls allows to reudce the number of rules, and also to
 518 // make nicer error messages on "%nterm 'a'" or '%nterm FOO "foo"'.
 519 nterm_decls:
 520   token_decls
 521 ;
 522
 523 /*-----------------------------------.
 524 | token_decls (%token, and %nterm).  |
 525 `-----------------------------------*/
 526
 527 // A non empty list of possibly tagged symbols for %token or %nterm.
 528 token_decls:
 529   token_decl.1[syms]
 530     {
 531       $$ = $syms;
 532     }
 533 | TAG token_decl.1[syms]
 534     {
 535       $$ = symbol_list_type_set ($syms, $TAG, @TAG);
 536     }
 537 | token_decls TAG token_decl.1[syms]
 538     {
 539       $$ = symbol_list_append ($1, symbol_list_type_set ($syms, $TAG, @TAG));
 540     }
 541 ;
 542
 543 // One or more symbol declarations for %token or %nterm.
 544 token_decl.1:
 545   token_decl                { $$ = symbol_list_sym_new ($1, @1); }
 546 | token_decl.1 token_decl   { $$ = symbol_list_append ($1, symbol_list_sym_new ($2, @2)); }
 547
 548 // One symbol declaration for %token or %nterm.
 549 token_decl:
 550   id int.opt[num] alias
 551     {
 552       $$ = $id;
 553       symbol_class_set ($id, current_class, @id, true);
 554       if (0 <= $num)
 555         symbol_code_set ($id, $num, @num);
 556       if ($alias)
 557         symbol_make_alias ($id, $alias, @alias);
 558     }
 559 ;
 560
 561 %type <int> int.opt;
 562 int.opt:
 563   %empty  { $$ = -1; }
 564 | INT
 565 ;
 566
 567 %type <symbol*> alias;
 568 alias:
 569   %empty         { $$ = NULL; }
 570 | string_as_id   { $$ = $1; }
 571 | TSTRING
 572     {
 573       $$ = symbol_get ($1, @1);
 574       symbol_class_set ($$, token_sym, @1, false);
 575       $$->translatable = true;
 576     }
 577 ;
 578
 579
 580 /*-------------------------------------.
 581 | token_decls_for_prec (%left, etc.).  |
 582 `-------------------------------------*/
 583
 584 // A non empty list of possibly tagged tokens for precedence declaration.
 585 //
 586 // Similar to %token (token_decls), but in '%left FOO 1 "foo"', it treats
 587 // FOO and "foo" as two different symbols instead of aliasing them.
 588 token_decls_for_prec:
 589   token_decl_for_prec.1[syms]
 590     {
 591       $$ = $syms;
 592     }
 593 | TAG token_decl_for_prec.1[syms]
 594     {
 595       $$ = symbol_list_type_set ($syms, $TAG, @TAG);
 596     }
 597 | token_decls_for_prec TAG token_decl_for_prec.1[syms]
 598     {
 599       $$ = symbol_list_append ($1, symbol_list_type_set ($syms, $TAG, @TAG));
 600     }
 601 ;
 602
 603 // One or more token declarations for precedence declaration.
 604 token_decl_for_prec.1:
 605   token_decl_for_prec
 606     { $$ = symbol_list_sym_new ($1, @1); }
 607 | token_decl_for_prec.1 token_decl_for_prec
 608     { $$ = symbol_list_append ($1, symbol_list_sym_new ($2, @2)); }
 609
 610 // One token declaration for precedence declaration.
 611 token_decl_for_prec:
 612   id int.opt[num]
 613     {
 614       $$ = $id;
 615       symbol_class_set ($id, token_sym, @id, false);
 616       if (0 <= $num)
 617         symbol_code_set ($id, $num, @num);
 618     }
 619 | string_as_id
 620 ;
 621
 622
 623 /*-----------------------------------.
 624 | symbol_decls (argument of %type).  |
 625 `-----------------------------------*/
 626
 627 // A non empty list of typed symbols (for %type).
 628 symbol_decls:
 629   symbol_decl.1[syms]
 630     {
 631       $$ = $syms;
 632     }
 633 | TAG symbol_decl.1[syms]
 634     {
 635       $$ = symbol_list_type_set ($syms, $TAG, @TAG);
 636     }
 637 | symbol_decls TAG symbol_decl.1[syms]
 638     {
 639       $$ = symbol_list_append ($1, symbol_list_type_set ($syms, $TAG, @TAG));
 640     }
 641 ;
 642
 643 // One or more token declarations (for %type).
 644 symbol_decl.1:
 645   symbol
 646     {
 647       symbol_class_set ($symbol, pct_type_sym, @symbol, false);
 648       $$ = symbol_list_sym_new ($symbol, @symbol);
 649     }
 650   | symbol_decl.1 symbol
 651     {
 652       symbol_class_set ($symbol, pct_type_sym, @symbol, false);
 653       $$ = symbol_list_append ($1, symbol_list_sym_new ($symbol, @symbol));
 654     }
 655 ;
 656
 657         /*------------------------------------------.
 658         | The grammar section: between the two %%.  |
 659         `------------------------------------------*/
 660
 661 grammar:
 662   rules_or_grammar_declaration
 663 | grammar rules_or_grammar_declaration
 664 ;
 665
 666 /* As a Bison extension, one can use the grammar declarations in the
 667    body of the grammar.  */
 668 rules_or_grammar_declaration:
 669   rules
 670 | grammar_declaration ";"
 671 | error ";"
 672     {
 673       yyerrok;
 674     }
 675 ;
 676
 677 rules:
 678   id_colon named_ref.opt { current_lhs ($1, @1, $2); } ":" rhses.1
 679     {
 680       /* Free the current lhs. */
 681       current_lhs (0, @1, 0);
 682     }
 683 ;
 684
 685 rhses.1:
 686   rhs                { grammar_current_rule_end (@rhs); }
 687 | rhses.1 "|" rhs    { grammar_current_rule_end (@rhs); }
 688 | rhses.1 ";"
 689 ;
 690
 691 %token PERCENT_EMPTY "%empty";
 692 rhs:
 693   %empty
 694     { grammar_current_rule_begin (current_lhs_symbol, current_lhs_loc,
 695                                   current_lhs_named_ref); }
 696 | rhs symbol named_ref.opt
 697     { grammar_current_rule_symbol_append ($2, @2, $3); }
 698 | rhs tag.opt "{...}"[action] named_ref.opt[name]
 699     { grammar_current_rule_action_append ($action, @action, $name, $[tag.opt]); }
 700 | rhs "%?{...}"
 701     { grammar_current_rule_predicate_append ($2, @2); }
 702 | rhs "%empty"
 703     { grammar_current_rule_empty_set (@2); }
 704 | rhs "%prec" symbol
 705     { grammar_current_rule_prec_set ($3, @3); }
 706 | rhs "%dprec" INT
 707     { grammar_current_rule_dprec_set ($3, @3); }
 708 | rhs "%merge" TAG
 709     { grammar_current_rule_merge_set ($3, @3); }
 710 | rhs "%expect" INT
 711     { grammar_current_rule_expect_sr ($3, @3); }
 712 | rhs "%expect-rr" INT
 713     { grammar_current_rule_expect_rr ($3, @3); }
 714 ;
 715
 716 named_ref.opt:
 717   %empty         { $$ = NULL; }
 718 | BRACKETED_ID   { $$ = named_ref_new ($1, @1); }
 719 ;
 720
 721
 722 /*---------------------.
 723 | variable and value.  |
 724 `---------------------*/
 725
 726 variable:
 727   ID
 728 ;
 729
 730 /* Some content or empty by default. */
 731 %code requires {
 732   #include "muscle-tab.h"
 733   typedef struct
 734   {
 735     char const *chars;
 736     muscle_kind kind;
 737   } value_type;
 738 };
 739 %type <value_type> value;
 740 %printer
 741 {
 742   switch ($$.kind)
 743     {
 744     case muscle_code:    fprintf (yyo,  "{%s}",  $$.chars); break;
 745     case muscle_keyword: fprintf (yyo,   "%s",   $$.chars); break;
 746     case muscle_string:  fprintf (yyo, "\"%s\"", $$.chars); break;
 747     }
 748 } <value_type>;
 749
 750 value:
 751   %empty  { $$.kind = muscle_keyword; $$.chars = ""; }
 752 | ID      { $$.kind = muscle_keyword; $$.chars = $1; }
 753 | STRING  { $$.kind = muscle_string;  $$.chars = unquote ($1); gram_scanner_last_string_free ();}
 754 | "{...}" { $$.kind = muscle_code;    $$.chars = strip_braces ($1); gram_scanner_last_string_free (); }
 755 ;
 756
 757
 758 /*--------------.
 759 | Identifiers.  |
 760 `--------------*/
 761
 762 /* Identifiers are returned as uniqstr values by the scanner.
 763    Depending on their use, we may need to make them genuine symbols.  */
 764
 765 id:
 766   ID
 767     { $$ = symbol_from_uniqstr ($1, @1); }
 768 | CHAR
 769     {
 770       const char *var = "api.token.raw";
 771       if (current_class == nterm_sym)
 772         {
 773           complain (&@1, complaint,
 774                     _("character literals cannot be nonterminals"));
 775           YYERROR;
 776         }
 777       if (muscle_percent_define_ifdef (var))
 778         {
 779           complain (&@1, complaint,
 780                     _("character literals cannot be used together"
 781                     " with %s"), var);
 782           location loc = muscle_percent_define_get_loc (var);
 783           subcomplain (&loc, complaint, _("definition of %s"), var);
 784         }
 785       $$ = symbol_get (char_name ($1), @1);
 786       symbol_class_set ($$, token_sym, @1, false);
 787       symbol_code_set ($$, $1, @1);
 788     }
 789 ;
 790
 791 id_colon:
 792   ID_COLON { $$ = symbol_from_uniqstr ($1, @1); }
 793 ;
 794
 795
 796 symbol:
 797   id
 798 | string_as_id
 799 ;
 800
 801 /* A string used as an ID.  */
 802 string_as_id:
 803   STRING
 804     {
 805       $$ = symbol_get ($1, @1);
 806       symbol_class_set ($$, token_sym, @1, false);
 807     }
 808 ;
 809
 810 epilogue.opt:
 811   %empty
 812 | "%%" EPILOGUE
 813     {
 814       muscle_code_grow ("epilogue", translate_code ($2, @2, true), @2);
 815       code_scanner_last_string_free ();
 816     }
 817 ;
 818
 819 %%
 820
 821 int
 822 yyreport_syntax_error (const yypcontext_t *ctx)
 823 {
 824   int res = 0;
 825   /* Arguments of format: reported tokens (one for the "unexpected",
 826      one per "expected"). */
 827   enum { ARGS_MAX = 5 };
 828   const char *argv[ARGS_MAX];
 829   int argc = 0;
 830   yysymbol_kind_t unexpected = yypcontext_token (ctx);
 831   if (unexpected != YYSYMBOL_YYEMPTY)
 832     {
 833       argv[argc++] = yysymbol_name (unexpected);
 834       yysymbol_kind_t expected[ARGS_MAX - 1];
 835       int nexpected = yypcontext_expected_tokens (ctx, expected, ARGS_MAX - 1);
 836       if (nexpected < 0)
 837         res = nexpected;
 838       else
 839         for (int i = 0; i < nexpected; ++i)
 840           argv[argc++] = yysymbol_name (expected[i]);
 841     }
 842   syntax_error (*yypcontext_location (ctx), argc, argv);
 843   return res;
 844 }
 845
 846
 847 /* Return the location of the left-hand side of a rule whose
 848    right-hand side is RHS[1] ... RHS[N].  Ignore empty nonterminals in
 849    the right-hand side, and return an empty location equal to the end
 850    boundary of RHS[0] if the right-hand side is empty.  */
 851
 852 static YYLTYPE
 853 lloc_default (YYLTYPE const *rhs, int n)
 854 {
 855   YYLTYPE loc;
 856
 857   /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
 858      The bug is fixed in 7.4.2m, but play it safe for now.  */
 859   loc.start = rhs[n].end;
 860   loc.end = rhs[n].end;
 861
 862   /* Ignore empty nonterminals the start of the right-hand side.
 863      Do not bother to ignore them at the end of the right-hand side,
 864      since empty nonterminals have the same end as their predecessors.  */
 865   for (int i = 1; i <= n; i++)
 866     if (! equal_boundaries (rhs[i].start, rhs[i].end))
 867       {
 868         loc.start = rhs[i].start;
 869         break;
 870       }
 871
 872   return loc;
 873 }
 874
 875 static
 876 char *strip_braces (char *code)
 877 {
 878   code[strlen (code) - 1] = 0;
 879   return code + 1;
 880 }
 881
 882 static
 883 char const *
 884 translate_code (char *code, location loc, bool plain)
 885 {
 886   code_props plain_code;
 887   if (plain)
 888     code_props_plain_init (&plain_code, code, loc);
 889   else
 890     code_props_symbol_action_init (&plain_code, code, loc);
 891   code_props_translate_code (&plain_code);
 892   gram_scanner_last_string_free ();
 893   return plain_code.code;
 894 }
 895
 896 static
 897 char const *
 898 translate_code_braceless (char *code, location loc)
 899 {
 900   return translate_code (strip_braces (code), loc, true);
 901 }
 902
 903 static void
 904 add_param (param_type type, char *decl, location loc)
 905 {
 906   static char const alphanum[26 + 26 + 1 + 10 + 1] =
 907     "abcdefghijklmnopqrstuvwxyz"
 908     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 909     "_"
 910     "0123456789";
 911
 912   char const *name_start = NULL;
 913   {
 914     char *p;
 915     /* Stop on last actual character.  */
 916     for (p = decl; p[1]; p++)
 917       if ((p == decl
 918            || ! memchr (alphanum, p[-1], sizeof alphanum - 1))
 919           && memchr (alphanum, p[0], sizeof alphanum - 10 - 1))
 920         name_start = p;
 921
 922     /* Strip the surrounding '{' and '}', and any blanks just inside
 923        the braces.  */
 924     --p;
 925     while (c_isspace ((unsigned char) *p))
 926       --p;
 927     p[1] = '\0';
 928     ++decl;
 929     while (c_isspace ((unsigned char) *decl))
 930       ++decl;
 931   }
 932
 933   if (! name_start)
 934     complain (&loc, complaint, _("missing identifier in parameter declaration"));
 935   else
 936     {
 937       char *name = xmemdup0 (name_start, strspn (name_start, alphanum));
 938       if (type & param_lex)
 939         muscle_pair_list_grow ("lex_param", decl, name);
 940       if (type & param_parse)
 941         muscle_pair_list_grow ("parse_param", decl, name);
 942       free (name);
 943     }
 944
 945   gram_scanner_last_string_free ();
 946 }
 947
 948
 949 static void
 950 handle_defines (char const *value)
 951 {
 952   defines_flag = true;
 953   char *file = unquote (value);
 954   spec_header_file = xstrdup (file);
 955   gram_scanner_last_string_free ();
 956   unquote_free (file);
 957 }
 958
 959
 960 static void
 961 handle_error_verbose (location const *loc, char const *directive)
 962 {
 963   bison_directive (loc, directive);
 964   muscle_percent_define_insert (directive, *loc, muscle_keyword, "",
 965                                 MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE);
 966 }
 967
 968
 969 static void
 970 handle_file_prefix (location const *loc,
 971                     location const *dir_loc,
 972                     char const *directive, char const *value_quoted)
 973 {
 974   char *value = unquote (value_quoted);
 975   bison_directive (loc, directive);
 976   bool warned = false;
 977
 978   if (location_empty (spec_file_prefix_loc))
 979     {
 980       spec_file_prefix_loc = *loc;
 981       spec_file_prefix = value;
 982     }
 983   else
 984     {
 985       duplicate_directive (directive, spec_file_prefix_loc, *loc);
 986       warned = true;
 987     }
 988
 989   if (!warned
 990       && STRNEQ (directive, "%file-prefix"))
 991     deprecated_directive (dir_loc, directive, "%file-prefix");
 992 }
 993
 994 static void
 995 handle_language (location const *loc, char const *lang)
 996 {
 997   language_argmatch (unquote (lang), grammar_prio, *loc);
 998 }
 999
1000
1001 static void
1002 handle_name_prefix (location const *loc,
1003                     char const *directive, char const *value_quoted)
1004 {
1005   char *value = unquote (value_quoted);
1006   bison_directive (loc, directive);
1007
1008   char buf1[1024];
1009   size_t len1 = sizeof (buf1);
1010   char *old = asnprintf (buf1, &len1, "%s\"%s\"", directive, value);
1011   if (!old)
1012     xalloc_die ();
1013
1014   if (location_empty (spec_name_prefix_loc))
1015     {
1016       spec_name_prefix = value;
1017       spec_name_prefix_loc = *loc;
1018
1019       char buf2[1024];
1020       size_t len2 = sizeof (buf2);
1021       char *new = asnprintf (buf2, &len2, "%%define api.prefix {%s}", value);
1022       if (!new)
1023         xalloc_die ();
1024       deprecated_directive (loc, old, new);
1025       if (new != buf2)
1026         free (new);
1027     }
1028   else
1029     duplicate_directive (old, spec_file_prefix_loc, *loc);
1030
1031   if (old != buf1)
1032     free (old);
1033 }
1034
1035
1036 static void
1037 handle_pure_parser (location const *loc, char const *directive)
1038 {
1039   bison_directive (loc, directive);
1040   deprecated_directive (loc, directive, "%define api.pure");
1041   muscle_percent_define_insert ("api.pure", *loc, muscle_keyword, "",
1042                                 MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE);
1043 }
1044
1045
1046 /* Convert VERSION into an int (MAJOR * 100 + MINOR).  Return -1 on
1047    errors.
1048
1049    Changes of behavior are only on minor version changes, so "3.0.5"
1050    is the same as "3.0": 300. */
1051 static int
1052 str_to_version (char const *version)
1053 {
1054   IGNORE_TYPE_LIMITS_BEGIN
1055   int res = 0;
1056   errno = 0;
1057   char *cp = NULL;
1058   long major = strtol (version, &cp, 10);
1059   if (errno || cp == version || *cp != '.' || major < 0
1060       || INT_MULTIPLY_WRAPV (major, 100, &res))
1061     return -1;
1062
1063   ++cp;
1064   char *cp1 = NULL;
1065   long minor = strtol (cp, &cp1, 10);
1066   if (errno || cp1 == cp || (*cp1 != '\0' && *cp1 != '.')
1067       || ! (0 <= minor && minor < 100)
1068       || INT_ADD_WRAPV (minor, res, &res))
1069     return -1;
1070
1071   IGNORE_TYPE_LIMITS_END
1072   return res;
1073 }
1074
1075
1076 static void
1077 handle_require (location const *loc, char const *version_quoted)
1078 {
1079   char *version = unquote (version_quoted);
1080   required_version = str_to_version (version);
1081   if (required_version == -1)
1082     {
1083       complain (loc, complaint, _("invalid version requirement: %s"),
1084                 version);
1085       required_version = 0;
1086     }
1087   else
1088     {
1089       const char* package_version =
1090         0 < strverscmp (api_version, PACKAGE_VERSION)
1091         ? api_version : PACKAGE_VERSION;
1092       if (0 < strverscmp (version, package_version))
1093         {
1094           complain (loc, complaint, _("require bison %s, but have %s"),
1095                     version, package_version);
1096           exit (EX_MISMATCH);
1097         }
1098     }
1099   unquote_free (version);
1100   gram_scanner_last_string_free ();
1101 }
1102
1103 static void
1104 handle_skeleton (location const *loc, char const *skel_quoted)
1105 {
1106   char *skel = unquote (skel_quoted);
1107   char const *skeleton_user = skel;
1108   if (strchr (skeleton_user, '/'))
1109     {
1110       size_t dir_length = strlen (grammar_file);
1111       while (dir_length && grammar_file[dir_length - 1] != '/')
1112         --dir_length;
1113       while (dir_length && grammar_file[dir_length - 1] == '/')
1114         --dir_length;
1115       char *skeleton_build =
1116         xmalloc (dir_length + 1 + strlen (skeleton_user) + 1);
1117       if (dir_length > 0)
1118         {
1119           memcpy (skeleton_build, grammar_file, dir_length);
1120           skeleton_build[dir_length++] = '/';
1121         }
1122       strcpy (skeleton_build + dir_length, skeleton_user);
1123       skeleton_user = uniqstr_new (skeleton_build);
1124       free (skeleton_build);
1125     }
1126   skeleton_arg (skeleton_user, grammar_prio, *loc);
1127 }
1128
1129
1130 static void
1131 handle_yacc (location const *loc)
1132 {
1133   const char *directive = "%yacc";
1134   bison_directive (loc, directive);
1135   if (location_empty (yacc_loc))
1136     yacc_loc = *loc;
1137   else
1138     duplicate_directive (directive, yacc_loc, *loc);
1139 }
1140
1141
1142 static void
1143 gram_error (location const *loc, char const *msg)
1144 {
1145   complain (loc, complaint, "%s", msg);
1146 }
1147
1148 static char const *
1149 char_name (char c)
1150 {
1151   if (c == '\'')
1152     return "'\\''";
1153   else
1154     {
1155       char buf[4];
1156       buf[0] = '\''; buf[1] = c; buf[2] = '\''; buf[3] = '\0';
1157       return quotearg_style (escape_quoting_style, buf);
1158     }
1159 }
1160
1161 static
1162 void
1163 current_lhs (symbol *sym, location loc, named_ref *ref)
1164 {
1165   current_lhs_symbol = sym;
1166   current_lhs_loc = loc;
1167   if (sym)
1168     symbol_location_as_lhs_set (sym, loc);
1169   /* In order to simplify memory management, named references for lhs
1170      are always assigned by deep copy into the current symbol_list
1171      node.  This is because a single named-ref in the grammar may
1172      result in several uses when the user factors lhs between several
1173      rules using "|".  Therefore free the parser's original copy.  */
1174   free (current_lhs_named_ref);
1175   current_lhs_named_ref = ref;
1176 }
1177
1178 static void tron (FILE *yyo)
1179 {
1180   begin_use_class ("value", yyo);
1181 }
1182
1183 static void troff (FILE *yyo)
1184 {
1185   end_use_class ("value", yyo);
1186 }
1187
1188
1189 /*----------.
1190 | Unquote.  |
1191 `----------*/
1192
1193 struct obstack obstack_for_unquote;
1194
1195 void
1196 parser_init (void)
1197 {
1198   obstack_init (&obstack_for_unquote);
1199 }
1200
1201 void
1202 parser_free (void)
1203 {
1204   obstack_free (&obstack_for_unquote, 0);
1205 }
1206
1207 static void
1208 unquote_free (char *last_string)
1209 {
1210   obstack_free (&obstack_for_unquote, last_string);
1211 }
1212
1213 static char *
1214 unquote (const char *cp)
1215 {
1216 #define GROW(Char)                              \
1217   obstack_1grow (&obstack_for_unquote, Char);
1218   for (++cp; *cp && *cp != '"'; ++cp)
1219     switch (*cp)
1220       {
1221       case '"':
1222         break;
1223       case '\\':
1224         ++cp;
1225         switch (*cp)
1226           {
1227           case '0': case '1': case '2': case '3': case '4':
1228           case '5': case '6': case '7': case '8': case '9':
1229             {
1230               int c = cp[0] - '0';
1231               if (c_isdigit (cp[1]))
1232                 {
1233                   ++cp;
1234                   c = c * 8 + cp[0] - '0';
1235                 }
1236               if (c_isdigit (cp[1]))
1237                 {
1238                   ++cp;
1239                   c = c * 8 + cp[0] - '0';
1240                 }
1241               GROW (c);
1242             }
1243             break;
1244
1245           case 'a': GROW ('\a'); break;
1246           case 'b': GROW ('\b'); break;
1247           case 'f': GROW ('\f'); break;
1248           case 'n': GROW ('\n'); break;
1249           case 'r': GROW ('\r'); break;
1250           case 't': GROW ('\t'); break;
1251           case 'v': GROW ('\v'); break;
1252
1253           case 'x':
1254             {
1255               int c = 0;
1256               while (c_isxdigit (cp[1]))
1257                 {
1258                   ++cp;
1259                   c = (c * 16 + (c_isdigit (cp[0]) ? cp[0] - '0'
1260                                  : c_isupper (cp[0]) ? cp[0] - 'A'
1261                                  : cp[0] - '0'));
1262                 }
1263               GROW (c);
1264               break;
1265             }
1266           }
1267         break;
1268
1269       default:
1270         GROW (*cp);
1271         break;
1272       }
1273   assert (*cp == '"');
1274   ++cp;
1275   assert (*cp == '\0');
1276 #undef GROW
1277   return obstack_finish0 (&obstack_for_unquote);
1278 }