src/parse-gram.y

   1 /* Bison Grammar Parser                             -*- C -*-
   2
   3    Copyright (C) 2002-2015, 2018-2021 Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    This program is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation, either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  19
  20 %code requires
  21 {
  22   #include "symlist.h"
  23   #include "symtab.h"
  24 }
  25
  26 %code provides
  27 {
  28   /* Initialize unquote.  */
  29   void parser_init (void);
  30   /* Deallocate storage for unquote.  */
  31   void parser_free (void);
  32 }
  33
  34 %code top
  35 {
  36   /* On column 0 to please syntax-check.  */
  37 #include <config.h>
  38 }
  39
  40 %code
  41 {
  42   #include "system.h"
  43
  44   #include <c-ctype.h>
  45   #include <quotearg.h>
  46   #include <vasnprintf.h>
  47   #include <xmemdup0.h>
  48
  49   #include "complain.h"
  50   #include "conflicts.h"
  51   #include "files.h"
  52   #include "getargs.h"
  53   #include "gram.h"
  54   #include "named-ref.h"
  55   #include "reader.h"
  56   #include "scan-code.h"
  57   #include "scan-gram.h"
  58   #include "strversion.h"
  59
  60   /* Pretend to be at least that version, to check features published
  61      in that version while developping it.  */
  62   static const char* api_version = "3.7";
  63
  64   static int current_prec = 0;
  65   static location current_lhs_loc;
  66   static named_ref *current_lhs_named_ref;
  67   static symbol *current_lhs_symbol;
  68   static symbol_class current_class = unknown_sym;
  69
  70   /** Set the new current left-hand side symbol, possibly common
  71    * to several right-hand side parts of rule.
  72    */
  73   static void current_lhs (symbol *sym, location loc, named_ref *ref);
  74
  75   #define YYLLOC_DEFAULT(Current, Rhs, N)         \
  76     (Current) = lloc_default (Rhs, N)
  77   static YYLTYPE lloc_default (YYLTYPE const *, int);
  78
  79   #define YY_LOCATION_PRINT(File, Loc)            \
  80     location_print (Loc, File)
  81
  82   /* Strip initial '{' and final '}' (must be first and last characters).
  83      Return the result.  */
  84   static char *strip_braces (char *code);
  85
  86   /* Convert CODE by calling code_props_plain_init if PLAIN, otherwise
  87      code_props_symbol_action_init.  Calls
  88      gram_scanner_last_string_free to release the latest string from
  89      the scanner (should be CODE). */
  90   static char const *translate_code (char *code, location loc, bool plain);
  91
  92   /* Convert CODE by calling code_props_plain_init after having
  93      stripped the first and last characters (expected to be '{', and
  94      '}').  Calls gram_scanner_last_string_free to release the latest
  95      string from the scanner (should be CODE). */
  96   static char const *translate_code_braceless (char *code, location loc);
  97
  98   /* Handle a %defines directive.  */
  99   static void handle_defines (char const *value);
 100
 101   /* Handle a %error-verbose directive.  */
 102   static void handle_error_verbose (location const *loc, char const *directive);
 103
 104   /* Handle a %file-prefix directive.  */
 105   static void handle_file_prefix (location const *loc,
 106                                   location const *dir_loc,
 107                                   char const *directive, char const *value);
 108
 109   /* Handle a %language directive.  */
 110   static void handle_language (location const *loc, char const *lang);
 111
 112   /* Handle a %name-prefix directive.  */
 113   static void handle_name_prefix (location const *loc,
 114                                   char const *directive, char const *value);
 115
 116   /* Handle a %pure-parser directive.  */
 117   static void handle_pure_parser (location const *loc, char const *directive);
 118
 119   /* Handle a %require directive.  */
 120   static void handle_require (location const *loc, char const *version);
 121
 122   /* Handle a %skeleton directive.  */
 123   static void handle_skeleton (location const *loc, char const *skel);
 124
 125   /* Handle a %yacc directive.  */
 126   static void handle_yacc (location const *loc);
 127
 128   /* Implementation of yyerror.  */
 129   static void gram_error (location const *, char const *);
 130
 131   /* A string that describes a char (e.g., 'a' -> "'a'").  */
 132   static char const *char_name (char);
 133
 134   /* Add style to semantic values in traces.  */
 135   static void tron (FILE *yyo);
 136   static void troff (FILE *yyo);
 137
 138   /* Interpret a quoted string (such as `"Hello, \"World\"\n\""`).
 139      Manages the memory of the result.  */
 140   static char *unquote (const char *str);
 141
 142   /* Discard the latest unquoted string.  */
 143   static void unquote_free (char *last_string);
 144 }
 145
 146 %define api.header.include {"parse-gram.h"}
 147 %define api.prefix {gram_}
 148 %define api.pure full
 149 %define api.token.raw
 150 %define api.value.type union
 151 %define locations
 152 %define parse.error custom
 153 %define parse.lac full
 154 %define parse.trace
 155 %defines
 156 %expect 0
 157 %verbose
 158
 159 %initial-action
 160 {
 161   /* Bison's grammar can initial empty locations, hence a default
 162      location is needed. */
 163   boundary_set (&@$.start, grammar_file, 1, 1, 1);
 164   boundary_set (&@$.end, grammar_file, 1, 1, 1);
 165 }
 166
 167 %token
 168   STRING              _("string")
 169   TSTRING             _("translatable string")
 170
 171   PERCENT_TOKEN       "%token"
 172   PERCENT_NTERM       "%nterm"
 173
 174   PERCENT_TYPE        "%type"
 175   PERCENT_DESTRUCTOR  "%destructor"
 176   PERCENT_PRINTER     "%printer"
 177
 178   PERCENT_LEFT        "%left"
 179   PERCENT_RIGHT       "%right"
 180   PERCENT_NONASSOC    "%nonassoc"
 181   PERCENT_PRECEDENCE  "%precedence"
 182
 183   PERCENT_PREC        "%prec"
 184   PERCENT_DPREC       "%dprec"
 185   PERCENT_MERGE       "%merge"
 186
 187   PERCENT_CODE            "%code"
 188   PERCENT_DEFAULT_PREC    "%default-prec"
 189   PERCENT_DEFINE          "%define"
 190   PERCENT_DEFINES         "%defines"
 191   PERCENT_ERROR_VERBOSE   "%error-verbose"
 192   PERCENT_EXPECT          "%expect"
 193   PERCENT_EXPECT_RR       "%expect-rr"
 194   PERCENT_FLAG            "%<flag>"
 195   PERCENT_FILE_PREFIX     "%file-prefix"
 196   PERCENT_GLR_PARSER      "%glr-parser"
 197   PERCENT_INITIAL_ACTION  "%initial-action"
 198   PERCENT_LANGUAGE        "%language"
 199   PERCENT_NAME_PREFIX     "%name-prefix"
 200   PERCENT_NO_DEFAULT_PREC "%no-default-prec"
 201   PERCENT_NO_LINES        "%no-lines"
 202   PERCENT_NONDETERMINISTIC_PARSER
 203                           "%nondeterministic-parser"
 204   PERCENT_OUTPUT          "%output"
 205   PERCENT_PURE_PARSER     "%pure-parser"
 206   PERCENT_REQUIRE         "%require"
 207   PERCENT_SKELETON        "%skeleton"
 208   PERCENT_START           "%start"
 209   PERCENT_TOKEN_TABLE     "%token-table"
 210   PERCENT_VERBOSE         "%verbose"
 211   PERCENT_YACC            "%yacc"
 212
 213   BRACED_CODE       "{...}"
 214   BRACED_PREDICATE  "%?{...}"
 215   BRACKETED_ID      _("[identifier]")
 216   CHAR_LITERAL      _("character literal")
 217   COLON             ":"
 218   EPILOGUE          _("epilogue")
 219   EQUAL             "="
 220   ID                _("identifier")
 221   ID_COLON          _("identifier:")
 222   PERCENT_PERCENT   "%%"
 223   PIPE              "|"
 224   PROLOGUE          "%{...%}"
 225   SEMICOLON         ";"
 226   TAG               _("<tag>")
 227   TAG_ANY           "<*>"
 228   TAG_NONE          "<>"
 229
 230  /* Experimental feature, don't rely on it.  */
 231 %code pre-printer  {tron (yyo);}
 232 %code post-printer {troff (yyo);}
 233
 234 %type <unsigned char> CHAR_LITERAL
 235 %printer { fputs (char_name ($$), yyo); } <unsigned char>
 236
 237 %type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING TSTRING
 238 %printer { fputs ($$, yyo); } <char*>
 239
 240 %type <uniqstr>
 241   BRACKETED_ID ID ID_COLON
 242   PERCENT_ERROR_VERBOSE PERCENT_FILE_PREFIX PERCENT_FLAG PERCENT_NAME_PREFIX
 243   PERCENT_PURE_PARSER
 244   TAG tag tag.opt variable
 245 %printer { fputs ($$, yyo); } <uniqstr>
 246 %printer { fprintf (yyo, "[%s]", $$); } BRACKETED_ID
 247 %printer { fprintf (yyo, "%s:", $$); } ID_COLON
 248 %printer { fprintf (yyo, "%%%s", $$); } PERCENT_FLAG
 249 %printer { fprintf (yyo, "<%s>", $$); } TAG tag
 250
 251 %token <int> INT_LITERAL _("integer literal")
 252 %printer { fprintf (yyo, "%d", $$); } <int>
 253
 254 %type <symbol*> id id_colon string_as_id symbol token_decl token_decl_for_prec
 255 %printer { fprintf (yyo, "%s", $$ ? $$->tag : "<NULL>"); } <symbol*>
 256 %printer { fprintf (yyo, "%s:", $$->tag); } id_colon
 257
 258 %type <assoc> precedence_declarator
 259
 260 %destructor { symbol_list_free ($$); } <symbol_list*>
 261 %printer { symbol_list_syms_print ($$, yyo); } <symbol_list*>
 262
 263 %type <named_ref*> named_ref.opt
 264
 265 /*---------.
 266 | %param.  |
 267 `---------*/
 268 %code requires
 269 {
 270   typedef enum
 271   {
 272     param_none   = 0,
 273     param_lex    = 1 << 0,
 274     param_parse  = 1 << 1,
 275     param_both   = param_lex | param_parse
 276   } param_type;
 277 };
 278 %code
 279 {
 280   /** Add a lex-param and/or a parse-param.
 281    *
 282    * \param type  where to push this formal argument.
 283    * \param decl  the formal argument.  Destroyed.
 284    * \param loc   the location in the source.
 285    */
 286   static void add_param (param_type type, char *decl, location loc);
 287   static param_type current_param = param_none;
 288 };
 289 %token <param_type> PERCENT_PARAM "%param";
 290 %printer
 291 {
 292   switch ($$)
 293     {
 294 #define CASE(In, Out)                                           \
 295       case param_ ## In: fputs ("%" #Out, yyo); break
 296       CASE (lex,   lex-param);
 297       CASE (parse, parse-param);
 298       CASE (both,  param);
 299 #undef CASE
 300       case param_none: aver (false); break;
 301     }
 302 } <param_type>;
 303
 304
 305                      /*==========\
 306                      | Grammar.  |
 307                      \==========*/
 308 %%
 309
 310 input:
 311   prologue_declarations "%%" grammar epilogue.opt
 312 ;
 313
 314
 315         /*------------------------------------.
 316         | Declarations: before the first %%.  |
 317         `------------------------------------*/
 318
 319 prologue_declarations:
 320   %empty
 321 | prologue_declarations prologue_declaration
 322 ;
 323
 324 prologue_declaration:
 325   grammar_declaration
 326 | "%{...%}"
 327     {
 328       muscle_code_grow (union_seen ? "post_prologue" : "pre_prologue",
 329                         translate_code ($1, @1, true), @1);
 330       code_scanner_last_string_free ();
 331     }
 332 | "%<flag>"
 333     {
 334       muscle_percent_define_ensure ($1, @1, true);
 335     }
 336 | "%define" variable value
 337     {
 338       muscle_percent_define_insert ($2, @$, $3.kind, $3.chars,
 339                                     MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE);
 340     }
 341 | "%defines"                       { defines_flag = true; }
 342 | "%defines" STRING                { handle_defines ($2); }
 343 | "%error-verbose"                 { handle_error_verbose (&@$, $1); }
 344 | "%expect" INT_LITERAL            { expected_sr_conflicts = $2; }
 345 | "%expect-rr" INT_LITERAL         { expected_rr_conflicts = $2; }
 346 | "%file-prefix" STRING            { handle_file_prefix (&@$, &@1, $1, $2); }
 347 | "%glr-parser"
 348     {
 349       nondeterministic_parser = true;
 350       glr_parser = true;
 351     }
 352 | "%initial-action" "{...}"
 353     {
 354       muscle_code_grow ("initial_action", translate_code ($2, @2, false), @2);
 355       code_scanner_last_string_free ();
 356     }
 357 | "%language" STRING            { handle_language (&@1, $2); }
 358 | "%name-prefix" STRING         { handle_name_prefix (&@$, $1, $2); }
 359 | "%no-lines"                   { no_lines_flag = true; }
 360 | "%nondeterministic-parser"    { nondeterministic_parser = true; }
 361 | "%output" STRING              { spec_outfile = unquote ($2); gram_scanner_last_string_free (); }
 362 | "%param" { current_param = $1; } params { current_param = param_none; }
 363 | "%pure-parser"                { handle_pure_parser (&@$, $1); }
 364 | "%require" STRING             { handle_require (&@2, $2); }
 365 | "%skeleton" STRING            { handle_skeleton (&@2, $2); }
 366 | "%token-table"                { token_table_flag = true; }
 367 | "%verbose"                    { report_flag |= report_states; }
 368 | "%yacc"                       { handle_yacc (&@$); }
 369 | error ";"                     { current_class = unknown_sym; yyerrok; }
 370 | /*FIXME: Err?  What is this horror doing here? */ ";"
 371 ;
 372
 373 params:
 374    params "{...}"  { add_param (current_param, $2, @2); }
 375 | "{...}"          { add_param (current_param, $1, @1); }
 376 ;
 377
 378
 379 /*----------------------.
 380 | grammar_declaration.  |
 381 `----------------------*/
 382
 383 grammar_declaration:
 384   symbol_declaration
 385 | "%start" symbol
 386     {
 387       grammar_start_symbol_set ($2, @2);
 388     }
 389 | code_props_type "{...}" generic_symlist
 390     {
 391       code_props code;
 392       code_props_symbol_action_init (&code, $2, @2);
 393       code_props_translate_code (&code);
 394       {
 395         for (symbol_list *list = $3; list; list = list->next)
 396           symbol_list_code_props_set (list, $1, &code);
 397         symbol_list_free ($3);
 398       }
 399     }
 400 | "%default-prec"
 401     {
 402       default_prec = true;
 403     }
 404 | "%no-default-prec"
 405     {
 406       default_prec = false;
 407     }
 408 | "%code" "{...}"
 409     {
 410       /* Do not invoke muscle_percent_code_grow here since it invokes
 411          muscle_user_name_list_grow.  */
 412       muscle_code_grow ("percent_code()",
 413                         translate_code_braceless ($2, @2), @2);
 414       code_scanner_last_string_free ();
 415     }
 416 | "%code" ID "{...}"
 417     {
 418       muscle_percent_code_grow ($2, @2, translate_code_braceless ($3, @3), @3);
 419       code_scanner_last_string_free ();
 420     }
 421 ;
 422
 423 %type <code_props_type> code_props_type;
 424 %printer { fprintf (yyo, "%s", code_props_type_string ($$)); } <code_props_type>;
 425 code_props_type:
 426   "%destructor"  { $$ = destructor; }
 427 | "%printer"     { $$ = printer; }
 428 ;
 429
 430 /*---------.
 431 | %union.  |
 432 `---------*/
 433
 434 %token PERCENT_UNION "%union";
 435
 436 union_name:
 437   %empty {}
 438 | ID     { muscle_percent_define_insert ("api.value.union.name",
 439                                          @1, muscle_keyword, $1,
 440                                          MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE); }
 441 ;
 442
 443 grammar_declaration:
 444   "%union" union_name "{...}"
 445     {
 446       union_seen = true;
 447       muscle_code_grow ("union_members", translate_code_braceless ($3, @3), @3);
 448       code_scanner_last_string_free ();
 449     }
 450 ;
 451
 452
 453 %type <symbol_list*> nterm_decls symbol_decls symbol_decl.1
 454       token_decls token_decls_for_prec
 455       token_decl.1 token_decl_for_prec.1;
 456 symbol_declaration:
 457   "%nterm" { current_class = nterm_sym; } nterm_decls[syms]
 458     {
 459       current_class = unknown_sym;
 460       symbol_list_free ($syms);
 461     }
 462 | "%token" { current_class = token_sym; } token_decls[syms]
 463     {
 464       current_class = unknown_sym;
 465       symbol_list_free ($syms);
 466     }
 467 | "%type" symbol_decls[syms]
 468     {
 469       symbol_list_free ($syms);
 470     }
 471 | precedence_declarator token_decls_for_prec[syms]
 472     {
 473       ++current_prec;
 474       for (symbol_list *list = $syms; list; list = list->next)
 475         symbol_precedence_set (list->content.sym, current_prec, $1, @1);
 476       symbol_list_free ($syms);
 477     }
 478 ;
 479
 480 precedence_declarator:
 481   "%left"       { $$ = left_assoc; }
 482 | "%right"      { $$ = right_assoc; }
 483 | "%nonassoc"   { $$ = non_assoc; }
 484 | "%precedence" { $$ = precedence_assoc; }
 485 ;
 486
 487 tag.opt:
 488   %empty { $$ = NULL; }
 489 | TAG    { $$ = $1; }
 490 ;
 491
 492 %type <symbol_list*> generic_symlist generic_symlist_item;
 493 generic_symlist:
 494   generic_symlist_item
 495 | generic_symlist generic_symlist_item   { $$ = symbol_list_append ($1, $2); }
 496 ;
 497
 498 generic_symlist_item:
 499   symbol    { $$ = symbol_list_sym_new ($1, @1); }
 500 | tag       { $$ = symbol_list_type_new ($1, @1); }
 501 ;
 502
 503 tag:
 504   TAG
 505 | "<*>" { $$ = uniqstr_new ("*"); }
 506 | "<>"  { $$ = uniqstr_new (""); }
 507 ;
 508
 509 /*-----------------------.
 510 | nterm_decls (%nterm).  |
 511 `-----------------------*/
 512
 513 // A non empty list of possibly tagged symbols for %nterm.
 514 //
 515 // Can easily be defined like symbol_decls but restricted to ID, but
 516 // using token_decls allows to reduce the number of rules, and also to
 517 // make nicer error messages on "%nterm 'a'" or '%nterm FOO "foo"'.
 518 nterm_decls:
 519   token_decls
 520 ;
 521
 522 /*-----------------------------------.
 523 | token_decls (%token, and %nterm).  |
 524 `-----------------------------------*/
 525
 526 // A non empty list of possibly tagged symbols for %token or %nterm.
 527 token_decls:
 528   token_decl.1[syms]
 529     {
 530       $$ = $syms;
 531     }
 532 | TAG token_decl.1[syms]
 533     {
 534       $$ = symbol_list_type_set ($syms, $TAG);
 535     }
 536 | token_decls TAG token_decl.1[syms]
 537     {
 538       $$ = symbol_list_append ($1, symbol_list_type_set ($syms, $TAG));
 539     }
 540 ;
 541
 542 // One or more symbol declarations for %token or %nterm.
 543 token_decl.1:
 544   token_decl                { $$ = symbol_list_sym_new ($1, @1); }
 545 | token_decl.1 token_decl   { $$ = symbol_list_append ($1, symbol_list_sym_new ($2, @2)); }
 546
 547 // One symbol declaration for %token or %nterm.
 548 token_decl:
 549   id int.opt[num] alias
 550     {
 551       $$ = $id;
 552       symbol_class_set ($id, current_class, @id, true);
 553       if (0 <= $num)
 554         symbol_code_set ($id, $num, @num);
 555       if ($alias)
 556         symbol_make_alias ($id, $alias, @alias);
 557     }
 558 ;
 559
 560 %type <int> int.opt;
 561 int.opt:
 562   %empty  { $$ = -1; }
 563 | INT_LITERAL
 564 ;
 565
 566 %type <symbol*> alias;
 567 alias:
 568   %empty         { $$ = NULL; }
 569 | string_as_id   { $$ = $1; }
 570 | TSTRING
 571     {
 572       $$ = symbol_get ($1, @1);
 573       symbol_class_set ($$, token_sym, @1, false);
 574       $$->translatable = true;
 575     }
 576 ;
 577
 578
 579 /*-------------------------------------.
 580 | token_decls_for_prec (%left, etc.).  |
 581 `-------------------------------------*/
 582
 583 // A non empty list of possibly tagged tokens for precedence declaration.
 584 //
 585 // Similar to %token (token_decls), but in '%left FOO 1 "foo"', it treats
 586 // FOO and "foo" as two different symbols instead of aliasing them.
 587 token_decls_for_prec:
 588   token_decl_for_prec.1[syms]
 589     {
 590       $$ = $syms;
 591     }
 592 | TAG token_decl_for_prec.1[syms]
 593     {
 594       $$ = symbol_list_type_set ($syms, $TAG);
 595     }
 596 | token_decls_for_prec TAG token_decl_for_prec.1[syms]
 597     {
 598       $$ = symbol_list_append ($1, symbol_list_type_set ($syms, $TAG));
 599     }
 600 ;
 601
 602 // One or more token declarations for precedence declaration.
 603 token_decl_for_prec.1:
 604   token_decl_for_prec
 605     { $$ = symbol_list_sym_new ($1, @1); }
 606 | token_decl_for_prec.1 token_decl_for_prec
 607     { $$ = symbol_list_append ($1, symbol_list_sym_new ($2, @2)); }
 608
 609 // One token declaration for precedence declaration.
 610 token_decl_for_prec:
 611   id int.opt[num]
 612     {
 613       $$ = $id;
 614       symbol_class_set ($id, token_sym, @id, false);
 615       if (0 <= $num)
 616         symbol_code_set ($id, $num, @num);
 617     }
 618 | string_as_id
 619 ;
 620
 621
 622 /*-----------------------------------.
 623 | symbol_decls (argument of %type).  |
 624 `-----------------------------------*/
 625
 626 // A non empty list of typed symbols (for %type).
 627 symbol_decls:
 628   symbol_decl.1[syms]
 629     {
 630       $$ = $syms;
 631     }
 632 | TAG symbol_decl.1[syms]
 633     {
 634       $$ = symbol_list_type_set ($syms, $TAG);
 635     }
 636 | symbol_decls TAG symbol_decl.1[syms]
 637     {
 638       $$ = symbol_list_append ($1, symbol_list_type_set ($syms, $TAG));
 639     }
 640 ;
 641
 642 // One or more token declarations (for %type).
 643 symbol_decl.1:
 644   symbol
 645     {
 646       symbol_class_set ($symbol, pct_type_sym, @symbol, false);
 647       $$ = symbol_list_sym_new ($symbol, @symbol);
 648     }
 649   | symbol_decl.1 symbol
 650     {
 651       symbol_class_set ($symbol, pct_type_sym, @symbol, false);
 652       $$ = symbol_list_append ($1, symbol_list_sym_new ($symbol, @symbol));
 653     }
 654 ;
 655
 656         /*------------------------------------------.
 657         | The grammar section: between the two %%.  |
 658         `------------------------------------------*/
 659
 660 grammar:
 661   rules_or_grammar_declaration
 662 | grammar rules_or_grammar_declaration
 663 ;
 664
 665 /* As a Bison extension, one can use the grammar declarations in the
 666    body of the grammar.  */
 667 rules_or_grammar_declaration:
 668   rules
 669 | grammar_declaration ";"
 670 | error ";"
 671     {
 672       yyerrok;
 673     }
 674 ;
 675
 676 rules:
 677   id_colon named_ref.opt { current_lhs ($1, @1, $2); } ":" rhses.1
 678     {
 679       /* Free the current lhs. */
 680       current_lhs (0, @1, 0);
 681     }
 682 ;
 683
 684 rhses.1:
 685   rhs                { grammar_current_rule_end (@rhs); }
 686 | rhses.1 "|" rhs    { grammar_current_rule_end (@rhs); }
 687 | rhses.1 ";"
 688 ;
 689
 690 %token PERCENT_EMPTY "%empty";
 691 rhs:
 692   %empty
 693     { grammar_current_rule_begin (current_lhs_symbol, current_lhs_loc,
 694                                   current_lhs_named_ref); }
 695 | rhs symbol named_ref.opt
 696     { grammar_current_rule_symbol_append ($2, @2, $3); }
 697 | rhs tag.opt "{...}"[action] named_ref.opt[name]
 698     { grammar_current_rule_action_append ($action, @action, $name, $[tag.opt]); }
 699 | rhs "%?{...}"
 700     { grammar_current_rule_predicate_append ($2, @2); }
 701 | rhs "%empty"
 702     { grammar_current_rule_empty_set (@2); }
 703 | rhs "%prec" symbol
 704     { grammar_current_rule_prec_set ($3, @3); }
 705 | rhs "%dprec" INT_LITERAL
 706     { grammar_current_rule_dprec_set ($3, @3); }
 707 | rhs "%merge" TAG
 708     { grammar_current_rule_merge_set ($3, @3); }
 709 | rhs "%expect" INT_LITERAL
 710     { grammar_current_rule_expect_sr ($3, @3); }
 711 | rhs "%expect-rr" INT_LITERAL
 712     { grammar_current_rule_expect_rr ($3, @3); }
 713 ;
 714
 715 named_ref.opt:
 716   %empty         { $$ = NULL; }
 717 | BRACKETED_ID   { $$ = named_ref_new ($1, @1); }
 718 ;
 719
 720
 721 /*---------------------.
 722 | variable and value.  |
 723 `---------------------*/
 724
 725 variable:
 726   ID
 727 ;
 728
 729 /* Some content or empty by default. */
 730 %code requires {
 731   #include "muscle-tab.h"
 732   typedef struct
 733   {
 734     char const *chars;
 735     muscle_kind kind;
 736   } value_type;
 737 };
 738 %type <value_type> value;
 739 %printer
 740 {
 741   switch ($$.kind)
 742     {
 743     case muscle_code:    fprintf (yyo,  "{%s}",  $$.chars); break;
 744     case muscle_keyword: fprintf (yyo,   "%s",   $$.chars); break;
 745     case muscle_string:  fprintf (yyo, "\"%s\"", $$.chars); break;
 746     }
 747 } <value_type>;
 748
 749 value:
 750   %empty  { $$.kind = muscle_keyword; $$.chars = ""; }
 751 | ID      { $$.kind = muscle_keyword; $$.chars = $1; }
 752 | STRING  { $$.kind = muscle_string;  $$.chars = unquote ($1); gram_scanner_last_string_free ();}
 753 | "{...}" { $$.kind = muscle_code;    $$.chars = strip_braces ($1); gram_scanner_last_string_free (); }
 754 ;
 755
 756
 757 /*--------------.
 758 | Identifiers.  |
 759 `--------------*/
 760
 761 /* Identifiers are returned as uniqstr values by the scanner.
 762    Depending on their use, we may need to make them genuine symbols.  */
 763
 764 id:
 765   ID
 766     { $$ = symbol_from_uniqstr ($1, @1); }
 767 | CHAR_LITERAL
 768     {
 769       const char *var = "api.token.raw";
 770       if (current_class == nterm_sym)
 771         {
 772           complain (&@1, complaint,
 773                     _("character literals cannot be nonterminals"));
 774           YYERROR;
 775         }
 776       if (muscle_percent_define_ifdef (var))
 777         {
 778           complain (&@1, complaint,
 779                     _("character literals cannot be used together"
 780                     " with %s"), var);
 781           location loc = muscle_percent_define_get_loc (var);
 782           subcomplain (&loc, complaint, _("definition of %s"), var);
 783         }
 784       $$ = symbol_get (char_name ($1), @1);
 785       symbol_class_set ($$, token_sym, @1, false);
 786       symbol_code_set ($$, $1, @1);
 787     }
 788 ;
 789
 790 id_colon:
 791   ID_COLON { $$ = symbol_from_uniqstr ($1, @1); }
 792 ;
 793
 794
 795 symbol:
 796   id
 797 | string_as_id
 798 ;
 799
 800 /* A string used as an ID.  */
 801 string_as_id:
 802   STRING
 803     {
 804       $$ = symbol_get ($1, @1);
 805       symbol_class_set ($$, token_sym, @1, false);
 806     }
 807 ;
 808
 809 epilogue.opt:
 810   %empty
 811 | "%%" EPILOGUE
 812     {
 813       muscle_code_grow ("epilogue", translate_code ($2, @2, true), @2);
 814       code_scanner_last_string_free ();
 815     }
 816 ;
 817
 818 %%
 819
 820 int
 821 yyreport_syntax_error (const yypcontext_t *ctx)
 822 {
 823   int res = 0;
 824   /* Arguments of format: reported tokens (one for the "unexpected",
 825      one per "expected"). */
 826   enum { ARGS_MAX = 5 };
 827   const char *argv[ARGS_MAX];
 828   int argc = 0;
 829   yysymbol_kind_t unexpected = yypcontext_token (ctx);
 830   if (unexpected != YYSYMBOL_YYEMPTY)
 831     {
 832       argv[argc++] = yysymbol_name (unexpected);
 833       yysymbol_kind_t expected[ARGS_MAX - 1];
 834       int nexpected = yypcontext_expected_tokens (ctx, expected, ARGS_MAX - 1);
 835       if (nexpected < 0)
 836         res = nexpected;
 837       else
 838         for (int i = 0; i < nexpected; ++i)
 839           argv[argc++] = yysymbol_name (expected[i]);
 840     }
 841   syntax_error (*yypcontext_location (ctx), argc, argv);
 842   return res;
 843 }
 844
 845
 846 /* Return the location of the left-hand side of a rule whose
 847    right-hand side is RHS[1] ... RHS[N].  Ignore empty nonterminals in
 848    the right-hand side, and return an empty location equal to the end
 849    boundary of RHS[0] if the right-hand side is empty.  */
 850
 851 static YYLTYPE
 852 lloc_default (YYLTYPE const *rhs, int n)
 853 {
 854   YYLTYPE loc;
 855
 856   /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
 857      The bug is fixed in 7.4.2m, but play it safe for now.  */
 858   loc.start = rhs[n].end;
 859   loc.end = rhs[n].end;
 860
 861   /* Ignore empty nonterminals the start of the right-hand side.
 862      Do not bother to ignore them at the end of the right-hand side,
 863      since empty nonterminals have the same end as their predecessors.  */
 864   for (int i = 1; i <= n; i++)
 865     if (! equal_boundaries (rhs[i].start, rhs[i].end))
 866       {
 867         loc.start = rhs[i].start;
 868         break;
 869       }
 870
 871   return loc;
 872 }
 873
 874 static
 875 char *strip_braces (char *code)
 876 {
 877   code[strlen (code) - 1] = 0;
 878   return code + 1;
 879 }
 880
 881 static
 882 char const *
 883 translate_code (char *code, location loc, bool plain)
 884 {
 885   code_props plain_code;
 886   if (plain)
 887     code_props_plain_init (&plain_code, code, loc);
 888   else
 889     code_props_symbol_action_init (&plain_code, code, loc);
 890   code_props_translate_code (&plain_code);
 891   gram_scanner_last_string_free ();
 892   return plain_code.code;
 893 }
 894
 895 static
 896 char const *
 897 translate_code_braceless (char *code, location loc)
 898 {
 899   return translate_code (strip_braces (code), loc, true);
 900 }
 901
 902 static void
 903 add_param (param_type type, char *decl, location loc)
 904 {
 905   static char const alphanum[26 + 26 + 1 + 10 + 1] =
 906     "abcdefghijklmnopqrstuvwxyz"
 907     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 908     "_"
 909     "0123456789";
 910
 911   char const *name_start = NULL;
 912   {
 913     char *p;
 914     /* Stop on last actual character.  */
 915     for (p = decl; p[1]; p++)
 916       if ((p == decl
 917            || ! memchr (alphanum, p[-1], sizeof alphanum - 1))
 918           && memchr (alphanum, p[0], sizeof alphanum - 10 - 1))
 919         name_start = p;
 920
 921     /* Strip the surrounding '{' and '}', and any blanks just inside
 922        the braces.  */
 923     --p;
 924     while (c_isspace ((unsigned char) *p))
 925       --p;
 926     p[1] = '\0';
 927     ++decl;
 928     while (c_isspace ((unsigned char) *decl))
 929       ++decl;
 930   }
 931
 932   if (! name_start)
 933     complain (&loc, complaint, _("missing identifier in parameter declaration"));
 934   else
 935     {
 936       char *name = xmemdup0 (name_start, strspn (name_start, alphanum));
 937       if (type & param_lex)
 938         muscle_pair_list_grow ("lex_param", decl, name);
 939       if (type & param_parse)
 940         muscle_pair_list_grow ("parse_param", decl, name);
 941       free (name);
 942     }
 943
 944   gram_scanner_last_string_free ();
 945 }
 946
 947
 948 static void
 949 handle_defines (char const *value)
 950 {
 951   defines_flag = true;
 952   char *file = unquote (value);
 953   spec_header_file = xstrdup (file);
 954   gram_scanner_last_string_free ();
 955   unquote_free (file);
 956 }
 957
 958
 959 static void
 960 handle_error_verbose (location const *loc, char const *directive)
 961 {
 962   bison_directive (loc, directive);
 963   muscle_percent_define_insert (directive, *loc, muscle_keyword, "",
 964                                 MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE);
 965 }
 966
 967
 968 static void
 969 handle_file_prefix (location const *loc,
 970                     location const *dir_loc,
 971                     char const *directive, char const *value_quoted)
 972 {
 973   char *value = unquote (value_quoted);
 974   bison_directive (loc, directive);
 975   bool warned = false;
 976
 977   if (location_empty (spec_file_prefix_loc))
 978     {
 979       spec_file_prefix_loc = *loc;
 980       spec_file_prefix = value;
 981     }
 982   else
 983     {
 984       duplicate_directive (directive, spec_file_prefix_loc, *loc);
 985       warned = true;
 986     }
 987
 988   if (!warned
 989       && STRNEQ (directive, "%file-prefix"))
 990     deprecated_directive (dir_loc, directive, "%file-prefix");
 991 }
 992
 993 static void
 994 handle_language (location const *loc, char const *lang)
 995 {
 996   language_argmatch (unquote (lang), grammar_prio, *loc);
 997 }
 998
 999
1000 static void
1001 handle_name_prefix (location const *loc,
1002                     char const *directive, char const *value_quoted)
1003 {
1004   char *value = unquote (value_quoted);
1005   bison_directive (loc, directive);
1006
1007   char buf1[1024];
1008   size_t len1 = sizeof (buf1);
1009   char *old = asnprintf (buf1, &len1, "%s\"%s\"", directive, value);
1010   if (!old)
1011     xalloc_die ();
1012
1013   if (location_empty (spec_name_prefix_loc))
1014     {
1015       spec_name_prefix = value;
1016       spec_name_prefix_loc = *loc;
1017
1018       char buf2[1024];
1019       size_t len2 = sizeof (buf2);
1020       char *new = asnprintf (buf2, &len2, "%%define api.prefix {%s}", value);
1021       if (!new)
1022         xalloc_die ();
1023       deprecated_directive (loc, old, new);
1024       if (new != buf2)
1025         free (new);
1026     }
1027   else
1028     duplicate_directive (old, spec_file_prefix_loc, *loc);
1029
1030   if (old != buf1)
1031     free (old);
1032 }
1033
1034
1035 static void
1036 handle_pure_parser (location const *loc, char const *directive)
1037 {
1038   bison_directive (loc, directive);
1039   deprecated_directive (loc, directive, "%define api.pure");
1040   muscle_percent_define_insert ("api.pure", *loc, muscle_keyword, "",
1041                                 MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE);
1042 }
1043
1044
1045 static void
1046 handle_require (location const *loc, char const *version_quoted)
1047 {
1048   char *version = unquote (version_quoted);
1049   required_version = strversion_to_int (version);
1050   if (required_version == -1)
1051     {
1052       complain (loc, complaint, _("invalid version requirement: %s"),
1053                 version);
1054       required_version = 0;
1055     }
1056   else
1057     {
1058       const char* package_version =
1059         0 < strverscmp (api_version, PACKAGE_VERSION)
1060         ? api_version : PACKAGE_VERSION;
1061       if (0 < strverscmp (version, package_version))
1062         {
1063           complain (loc, complaint, _("require bison %s, but have %s"),
1064                     version, package_version);
1065           exit (EX_MISMATCH);
1066         }
1067     }
1068   unquote_free (version);
1069   gram_scanner_last_string_free ();
1070 }
1071
1072 static void
1073 handle_skeleton (location const *loc, char const *skel_quoted)
1074 {
1075   char *skel = unquote (skel_quoted);
1076   char const *skeleton_user = skel;
1077   if (strchr (skeleton_user, '/'))
1078     {
1079       size_t dir_length = strlen (grammar_file);
1080       while (dir_length && grammar_file[dir_length - 1] != '/')
1081         --dir_length;
1082       while (dir_length && grammar_file[dir_length - 1] == '/')
1083         --dir_length;
1084       char *skeleton_build =
1085         xmalloc (dir_length + 1 + strlen (skeleton_user) + 1);
1086       if (dir_length > 0)
1087         {
1088           memcpy (skeleton_build, grammar_file, dir_length);
1089           skeleton_build[dir_length++] = '/';
1090         }
1091       strcpy (skeleton_build + dir_length, skeleton_user);
1092       skeleton_user = uniqstr_new (skeleton_build);
1093       free (skeleton_build);
1094     }
1095   skeleton_arg (skeleton_user, grammar_prio, *loc);
1096 }
1097
1098
1099 static void
1100 handle_yacc (location const *loc)
1101 {
1102   const char *directive = "%yacc";
1103   bison_directive (loc, directive);
1104   if (location_empty (yacc_loc))
1105     yacc_loc = *loc;
1106   else
1107     duplicate_directive (directive, yacc_loc, *loc);
1108 }
1109
1110
1111 static void
1112 gram_error (location const *loc, char const *msg)
1113 {
1114   complain (loc, complaint, "%s", msg);
1115 }
1116
1117 static char const *
1118 char_name (char c)
1119 {
1120   if (c == '\'')
1121     return "'\\''";
1122   else
1123     {
1124       char buf[4];
1125       buf[0] = '\''; buf[1] = c; buf[2] = '\''; buf[3] = '\0';
1126       return quotearg_style (escape_quoting_style, buf);
1127     }
1128 }
1129
1130 static void
1131 current_lhs (symbol *sym, location loc, named_ref *ref)
1132 {
1133   current_lhs_symbol = sym;
1134   current_lhs_loc = loc;
1135   if (sym)
1136     symbol_location_as_lhs_set (sym, loc);
1137   /* In order to simplify memory management, named references for lhs
1138      are always assigned by deep copy into the current symbol_list
1139      node.  This is because a single named-ref in the grammar may
1140      result in several uses when the user factors lhs between several
1141      rules using "|".  Therefore free the parser's original copy.  */
1142   free (current_lhs_named_ref);
1143   current_lhs_named_ref = ref;
1144 }
1145
1146 static void tron (FILE *yyo)
1147 {
1148   begin_use_class ("value", yyo);
1149 }
1150
1151 static void troff (FILE *yyo)
1152 {
1153   end_use_class ("value", yyo);
1154 }
1155
1156
1157 /*----------.
1158 | Unquote.  |
1159 `----------*/
1160
1161 struct obstack obstack_for_unquote;
1162
1163 void
1164 parser_init (void)
1165 {
1166   obstack_init (&obstack_for_unquote);
1167 }
1168
1169 void
1170 parser_free (void)
1171 {
1172   obstack_free (&obstack_for_unquote, 0);
1173 }
1174
1175 static void
1176 unquote_free (char *last_string)
1177 {
1178   obstack_free (&obstack_for_unquote, last_string);
1179 }
1180
1181 static char *
1182 unquote (const char *cp)
1183 {
1184 #define GROW(Char)                              \
1185   obstack_1grow (&obstack_for_unquote, Char);
1186   for (++cp; *cp && *cp != '"'; ++cp)
1187     switch (*cp)
1188       {
1189       case '"':
1190         break;
1191       case '\\':
1192         ++cp;
1193         switch (*cp)
1194           {
1195           case '0': case '1': case '2': case '3': case '4':
1196           case '5': case '6': case '7': case '8': case '9':
1197             {
1198               int c = cp[0] - '0';
1199               if (c_isdigit (cp[1]))
1200                 {
1201                   ++cp;
1202                   c = c * 8 + cp[0] - '0';
1203                 }
1204               if (c_isdigit (cp[1]))
1205                 {
1206                   ++cp;
1207                   c = c * 8 + cp[0] - '0';
1208                 }
1209               GROW (c);
1210             }
1211             break;
1212
1213           case 'a': GROW ('\a'); break;
1214           case 'b': GROW ('\b'); break;
1215           case 'f': GROW ('\f'); break;
1216           case 'n': GROW ('\n'); break;
1217           case 'r': GROW ('\r'); break;
1218           case 't': GROW ('\t'); break;
1219           case 'v': GROW ('\v'); break;
1220
1221           case 'x':
1222             {
1223               int c = 0;
1224               while (c_isxdigit (cp[1]))
1225                 {
1226                   ++cp;
1227                   c = (c * 16 + (c_isdigit (cp[0]) ? cp[0] - '0'
1228                                  : c_isupper (cp[0]) ? cp[0] - 'A'
1229                                  : cp[0] - '0'));
1230                 }
1231               GROW (c);
1232               break;
1233             }
1234           }
1235         break;
1236
1237       default:
1238         GROW (*cp);
1239         break;
1240       }
1241   assert (*cp == '"');
1242   ++cp;
1243   assert (*cp == '\0');
1244 #undef GROW
1245   return obstack_finish0 (&obstack_for_unquote);
1246 }