src/parse-gram.y

   1 /* Bison Grammar Parser                             -*- C -*-
   2
   3    Copyright (C) 2002-2015, 2018-2021 Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    This program is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation, either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
  19
  20 %code requires
  21 {
  22   #include "symlist.h"
  23   #include "symtab.h"
  24 }
  25
  26 %code provides
  27 {
  28   /* Initialize unquote.  */
  29   void parser_init (void);
  30   /* Deallocate storage for unquote.  */
  31   void parser_free (void);
  32 }
  33
  34 %code top
  35 {
  36   /* On column 0 to please syntax-check.  */
  37 #include <config.h>
  38 }
  39
  40 %code
  41 {
  42   #include "system.h"
  43
  44   #include <c-ctype.h>
  45   #include <quotearg.h>
  46   #include <vasnprintf.h>
  47   #include <xmemdup0.h>
  48
  49   #include "complain.h"
  50   #include "conflicts.h"
  51   #include "files.h"
  52   #include "getargs.h"
  53   #include "gram.h"
  54   #include "named-ref.h"
  55   #include "reader.h"
  56   #include "scan-code.h"
  57   #include "scan-gram.h"
  58   #include "strversion.h"
  59
  60   /* Pretend to be at least that version, to check features published
  61      in that version while developping it.  */
  62   static const char* api_version = "3.8";
  63
  64   static int current_prec = 0;
  65   static location current_lhs_loc;
  66   static named_ref *current_lhs_named_ref;
  67   static symbol *current_lhs_symbol;
  68   static symbol_class current_class = unknown_sym;
  69
  70   /** Set the new current left-hand side symbol, possibly common
  71    * to several right-hand side parts of rule.
  72    */
  73   static void current_lhs (symbol *sym, location loc, named_ref *ref);
  74
  75   #define YYLLOC_DEFAULT(Current, Rhs, N)         \
  76     (Current) = lloc_default (Rhs, N)
  77   static YYLTYPE lloc_default (YYLTYPE const *, int);
  78
  79   #define YY_LOCATION_PRINT(File, Loc)            \
  80     location_print (Loc, File)
  81
  82   /* Strip initial '{' and final '}' (must be first and last characters).
  83      Return the result.  */
  84   static char *strip_braces (char *code);
  85
  86   /* Convert CODE by calling code_props_plain_init if PLAIN, otherwise
  87      code_props_symbol_action_init.  Calls
  88      gram_scanner_last_string_free to release the latest string from
  89      the scanner (should be CODE). */
  90   static char const *translate_code (char *code, location loc, bool plain);
  91
  92   /* Convert CODE by calling code_props_plain_init after having
  93      stripped the first and last characters (expected to be '{', and
  94      '}').  Calls gram_scanner_last_string_free to release the latest
  95      string from the scanner (should be CODE). */
  96   static char const *translate_code_braceless (char *code, location loc);
  97
  98   /* Handle a %header directive.  */
  99   static void handle_header (char const *value);
 100
 101   /* Handle a %error-verbose directive.  */
 102   static void handle_error_verbose (location const *loc, char const *directive);
 103
 104   /* Handle a %file-prefix directive.  */
 105   static void handle_file_prefix (location const *loc,
 106                                   location const *dir_loc,
 107                                   char const *directive, char const *value);
 108
 109   /* Handle a %language directive.  */
 110   static void handle_language (location const *loc, char const *lang);
 111
 112   /* Handle a %name-prefix directive.  */
 113   static void handle_name_prefix (location const *loc,
 114                                   char const *directive, char const *value);
 115
 116   /* Handle a %pure-parser directive.  */
 117   static void handle_pure_parser (location const *loc, char const *directive);
 118
 119   /* Handle a %require directive.  */
 120   static void handle_require (location const *loc, char const *version);
 121
 122   /* Handle a %skeleton directive.  */
 123   static void handle_skeleton (location const *loc, char const *skel);
 124
 125   /* Handle a %yacc directive.  */
 126   static void handle_yacc (location const *loc);
 127
 128   /* Implementation of yyerror.  */
 129   static void gram_error (location const *, char const *);
 130
 131   /* A string that describes a char (e.g., 'a' -> "'a'").  */
 132   static char const *char_name (char);
 133
 134   /* Add style to semantic values in traces.  */
 135   static void tron (FILE *yyo);
 136   static void troff (FILE *yyo);
 137
 138   /* Interpret a quoted string (such as `"Hello, \"World\"\n\""`).
 139      Manages the memory of the result.  */
 140   static char *unquote (const char *str);
 141
 142   /* Discard the latest unquoted string.  */
 143   static void unquote_free (char *last_string);
 144 }
 145
 146 %define api.header.include {"parse-gram.h"}
 147 %define api.prefix {gram_}
 148 %define api.pure full
 149 %define api.token.raw
 150 %define api.value.type union
 151 %define locations
 152 %define parse.error custom
 153 %define parse.lac full
 154 %define parse.trace
 155 %header
 156 %expect 0
 157 %verbose
 158
 159 %initial-action
 160 {
 161   /* Bison's grammar can initial empty locations, hence a default
 162      location is needed. */
 163   boundary_set (&@$.start, grammar_file, 1, 1, 1);
 164   boundary_set (&@$.end, grammar_file, 1, 1, 1);
 165 }
 166
 167 %token
 168   STRING              _("string")
 169   TSTRING             _("translatable string")
 170
 171   PERCENT_TOKEN       "%token"
 172   PERCENT_NTERM       "%nterm"
 173
 174   PERCENT_TYPE        "%type"
 175   PERCENT_DESTRUCTOR  "%destructor"
 176   PERCENT_PRINTER     "%printer"
 177
 178   PERCENT_LEFT        "%left"
 179   PERCENT_RIGHT       "%right"
 180   PERCENT_NONASSOC    "%nonassoc"
 181   PERCENT_PRECEDENCE  "%precedence"
 182
 183   PERCENT_PREC        "%prec"
 184   PERCENT_DPREC       "%dprec"
 185   PERCENT_MERGE       "%merge"
 186
 187   PERCENT_CODE            "%code"
 188   PERCENT_DEFAULT_PREC    "%default-prec"
 189   PERCENT_DEFINE          "%define"
 190   PERCENT_ERROR_VERBOSE   "%error-verbose"
 191   PERCENT_EXPECT          "%expect"
 192   PERCENT_EXPECT_RR       "%expect-rr"
 193   PERCENT_FILE_PREFIX     "%file-prefix"
 194   PERCENT_FLAG            "%<flag>"
 195   PERCENT_GLR_PARSER      "%glr-parser"
 196   PERCENT_HEADER          "%header"
 197   PERCENT_INITIAL_ACTION  "%initial-action"
 198   PERCENT_LANGUAGE        "%language"
 199   PERCENT_NAME_PREFIX     "%name-prefix"
 200   PERCENT_NO_DEFAULT_PREC "%no-default-prec"
 201   PERCENT_NO_LINES        "%no-lines"
 202   PERCENT_NONDETERMINISTIC_PARSER "%nondeterministic-parser"
 203   PERCENT_OUTPUT          "%output"
 204   PERCENT_PURE_PARSER     "%pure-parser"
 205   PERCENT_REQUIRE         "%require"
 206   PERCENT_SKELETON        "%skeleton"
 207   PERCENT_START           "%start"
 208   PERCENT_TOKEN_TABLE     "%token-table"
 209   PERCENT_VERBOSE         "%verbose"
 210   PERCENT_YACC            "%yacc"
 211
 212   BRACED_CODE       "{...}"
 213   BRACED_PREDICATE  "%?{...}"
 214   BRACKETED_ID      _("[identifier]")
 215   CHAR_LITERAL      _("character literal")
 216   COLON             ":"
 217   EPILOGUE          _("epilogue")
 218   EQUAL             "="
 219   ID                _("identifier")
 220   ID_COLON          _("identifier:")
 221   PERCENT_PERCENT   "%%"
 222   PIPE              "|"
 223   PROLOGUE          "%{...%}"
 224   SEMICOLON         ";"
 225   TAG               _("<tag>")
 226   TAG_ANY           "<*>"
 227   TAG_NONE          "<>"
 228
 229  /* Experimental feature, don't rely on it.  */
 230 %code pre-printer  {tron (yyo);}
 231 %code post-printer {troff (yyo);}
 232
 233 %type <unsigned char> CHAR_LITERAL
 234 %printer { fputs (char_name ($$), yyo); } <unsigned char>
 235
 236 %type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING TSTRING
 237 %printer { fputs ($$, yyo); } <char*>
 238
 239 %type <uniqstr>
 240   BRACKETED_ID ID ID_COLON
 241   PERCENT_ERROR_VERBOSE PERCENT_FILE_PREFIX PERCENT_FLAG PERCENT_NAME_PREFIX
 242   PERCENT_PURE_PARSER
 243   TAG tag tag.opt variable
 244 %printer { fputs ($$, yyo); } <uniqstr>
 245 %printer { fprintf (yyo, "[%s]", $$); } BRACKETED_ID
 246 %printer { fprintf (yyo, "%s:", $$); } ID_COLON
 247 %printer { fprintf (yyo, "%%%s", $$); } PERCENT_FLAG
 248 %printer { fprintf (yyo, "<%s>", $$); } TAG tag
 249
 250 %token <int> INT_LITERAL _("integer literal")
 251 %printer { fprintf (yyo, "%d", $$); } <int>
 252
 253 %type <symbol*> id id_colon string_as_id symbol token_decl token_decl_for_prec
 254 %printer { fprintf (yyo, "%s", $$ ? $$->tag : "<NULL>"); } <symbol*>
 255 %printer { fprintf (yyo, "%s:", $$->tag); } id_colon
 256
 257 %type <assoc> precedence_declarator
 258
 259 %destructor { symbol_list_free ($$); } <symbol_list*>
 260 %printer { symbol_list_syms_print ($$, yyo); } <symbol_list*>
 261
 262 %type <named_ref*> named_ref.opt
 263
 264 /*---------.
 265 | %param.  |
 266 `---------*/
 267 %code requires
 268 {
 269   typedef enum
 270   {
 271     param_none   = 0,
 272     param_lex    = 1 << 0,
 273     param_parse  = 1 << 1,
 274     param_both   = param_lex | param_parse
 275   } param_type;
 276 };
 277 %code
 278 {
 279   /** Add a lex-param and/or a parse-param.
 280    *
 281    * \param type  where to push this formal argument.
 282    * \param decl  the formal argument.  Destroyed.
 283    * \param loc   the location in the source.
 284    */
 285   static void add_param (param_type type, char *decl, location loc);
 286   static param_type current_param = param_none;
 287 };
 288 %token <param_type> PERCENT_PARAM "%param";
 289 %printer
 290 {
 291   switch ($$)
 292     {
 293 #define CASE(In, Out)                                           \
 294       case param_ ## In: fputs ("%" #Out, yyo); break
 295       CASE (lex,   lex-param);
 296       CASE (parse, parse-param);
 297       CASE (both,  param);
 298 #undef CASE
 299       case param_none: aver (false); break;
 300     }
 301 } <param_type>;
 302
 303
 304                      /*==========\
 305                      | Grammar.  |
 306                      \==========*/
 307 %%
 308
 309 input:
 310   prologue_declarations "%%" grammar epilogue.opt
 311 ;
 312
 313
 314         /*------------------------------------.
 315         | Declarations: before the first %%.  |
 316         `------------------------------------*/
 317
 318 prologue_declarations:
 319   %empty
 320 | prologue_declarations prologue_declaration
 321 ;
 322
 323 prologue_declaration:
 324   grammar_declaration
 325 | "%{...%}"
 326     {
 327       muscle_code_grow (union_seen ? "post_prologue" : "pre_prologue",
 328                         translate_code ($1, @1, true), @1);
 329       code_scanner_last_string_free ();
 330     }
 331 | "%<flag>"
 332     {
 333       muscle_percent_define_ensure ($1, @1, true);
 334     }
 335 | "%define" variable value
 336     {
 337       muscle_percent_define_insert ($2, @$, $3.kind, $3.chars,
 338                                     MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE);
 339     }
 340 | "%header" string.opt             { handle_header ($2); }
 341 | "%error-verbose"                 { handle_error_verbose (&@$, $1); }
 342 | "%expect" INT_LITERAL            { expected_sr_conflicts = $2; }
 343 | "%expect-rr" INT_LITERAL         { expected_rr_conflicts = $2; }
 344 | "%file-prefix" STRING            { handle_file_prefix (&@$, &@1, $1, $2); }
 345 | "%glr-parser"
 346     {
 347       nondeterministic_parser = true;
 348       glr_parser = true;
 349     }
 350 | "%initial-action" "{...}"
 351     {
 352       muscle_code_grow ("initial_action", translate_code ($2, @2, false), @2);
 353       code_scanner_last_string_free ();
 354     }
 355 | "%language" STRING            { handle_language (&@1, $2); }
 356 | "%name-prefix" STRING         { handle_name_prefix (&@$, $1, $2); }
 357 | "%no-lines"                   { no_lines_flag = true; }
 358 | "%nondeterministic-parser"    { nondeterministic_parser = true; }
 359 | "%output" STRING              { spec_outfile = unquote ($2); gram_scanner_last_string_free (); }
 360 | "%param" { current_param = $1; } params { current_param = param_none; }
 361 | "%pure-parser"                { handle_pure_parser (&@$, $1); }
 362 | "%require" STRING             { handle_require (&@2, $2); }
 363 | "%skeleton" STRING            { handle_skeleton (&@2, $2); }
 364 | "%token-table"                { token_table_flag = true; }
 365 | "%verbose"                    { report_flag |= report_states; }
 366 | "%yacc"                       { handle_yacc (&@$); }
 367 | error ";"                     { current_class = unknown_sym; yyerrok; }
 368 | /*FIXME: Err?  What is this horror doing here? */ ";"
 369 ;
 370
 371 params:
 372    params "{...}"  { add_param (current_param, $2, @2); }
 373 | "{...}"          { add_param (current_param, $1, @1); }
 374 ;
 375
 376
 377 /*----------------------.
 378 | grammar_declaration.  |
 379 `----------------------*/
 380
 381 grammar_declaration:
 382   symbol_declaration
 383 | "%start" symbols.1
 384     {
 385       grammar_start_symbols_add ($2);
 386     }
 387 | code_props_type "{...}" generic_symlist
 388     {
 389       code_props code;
 390       code_props_symbol_action_init (&code, $2, @2);
 391       code_props_translate_code (&code);
 392       {
 393         for (symbol_list *list = $3; list; list = list->next)
 394           symbol_list_code_props_set (list, $1, &code);
 395         symbol_list_free ($3);
 396       }
 397     }
 398 | "%default-prec"
 399     {
 400       default_prec = true;
 401     }
 402 | "%no-default-prec"
 403     {
 404       default_prec = false;
 405     }
 406 | "%code" "{...}"
 407     {
 408       /* Do not invoke muscle_percent_code_grow here since it invokes
 409          muscle_user_name_list_grow.  */
 410       muscle_code_grow ("percent_code()",
 411                         translate_code_braceless ($2, @2), @2);
 412       code_scanner_last_string_free ();
 413     }
 414 | "%code" ID "{...}"
 415     {
 416       muscle_percent_code_grow ($2, @2, translate_code_braceless ($3, @3), @3);
 417       code_scanner_last_string_free ();
 418     }
 419 ;
 420
 421 %type <code_props_type> code_props_type;
 422 %printer { fprintf (yyo, "%s", code_props_type_string ($$)); } <code_props_type>;
 423 code_props_type:
 424   "%destructor"  { $$ = destructor; }
 425 | "%printer"     { $$ = printer; }
 426 ;
 427
 428 /*---------.
 429 | %union.  |
 430 `---------*/
 431
 432 %token PERCENT_UNION "%union";
 433
 434 union_name:
 435   %empty {}
 436 | ID     { muscle_percent_define_insert ("api.value.union.name",
 437                                          @1, muscle_keyword, $1,
 438                                          MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE); }
 439 ;
 440
 441 grammar_declaration:
 442   "%union" union_name "{...}"
 443     {
 444       union_seen = true;
 445       muscle_code_grow ("union_members", translate_code_braceless ($3, @3), @3);
 446       code_scanner_last_string_free ();
 447     }
 448 ;
 449
 450
 451 %type <symbol_list*> nterm_decls symbol_decls symbols.1
 452       token_decls token_decls_for_prec
 453       token_decl.1 token_decl_for_prec.1;
 454 symbol_declaration:
 455   "%nterm" { current_class = nterm_sym; } nterm_decls[syms]
 456     {
 457       current_class = unknown_sym;
 458       symbol_list_free ($syms);
 459     }
 460 | "%token" { current_class = token_sym; } token_decls[syms]
 461     {
 462       current_class = unknown_sym;
 463       symbol_list_free ($syms);
 464     }
 465 | "%type" { current_class = pct_type_sym; } symbol_decls[syms]
 466     {
 467       current_class = unknown_sym;
 468       symbol_list_free ($syms);
 469     }
 470 | precedence_declarator token_decls_for_prec[syms]
 471     {
 472       ++current_prec;
 473       for (symbol_list *list = $syms; list; list = list->next)
 474         symbol_precedence_set (list->content.sym, current_prec, $1, @1);
 475       symbol_list_free ($syms);
 476     }
 477 ;
 478
 479 precedence_declarator:
 480   "%left"       { $$ = left_assoc; }
 481 | "%right"      { $$ = right_assoc; }
 482 | "%nonassoc"   { $$ = non_assoc; }
 483 | "%precedence" { $$ = precedence_assoc; }
 484 ;
 485
 486 %type <char*> string.opt;
 487 string.opt:
 488   %empty  { $$ = NULL; }
 489 | STRING  { $$ = $1; }
 490 ;
 491
 492 tag.opt:
 493   %empty { $$ = NULL; }
 494 | TAG    { $$ = $1; }
 495 ;
 496
 497 %type <symbol_list*> generic_symlist generic_symlist_item;
 498 generic_symlist:
 499   generic_symlist_item
 500 | generic_symlist generic_symlist_item   { $$ = symbol_list_append ($1, $2); }
 501 ;
 502
 503 generic_symlist_item:
 504   symbol    { $$ = symbol_list_sym_new ($1, @1); }
 505 | tag       { $$ = symbol_list_type_new ($1, @1); }
 506 ;
 507
 508 tag:
 509   TAG
 510 | "<*>" { $$ = uniqstr_new ("*"); }
 511 | "<>"  { $$ = uniqstr_new (""); }
 512 ;
 513
 514 /*-----------------------.
 515 | nterm_decls (%nterm).  |
 516 `-----------------------*/
 517
 518 // A non empty list of possibly tagged symbols for %nterm.
 519 //
 520 // Can easily be defined like symbol_decls but restricted to ID, but
 521 // using token_decls allows to reduce the number of rules, and also to
 522 // make nicer error messages on "%nterm 'a'" or '%nterm FOO "foo"'.
 523 nterm_decls:
 524   token_decls
 525 ;
 526
 527 /*-----------------------------------.
 528 | token_decls (%token, and %nterm).  |
 529 `-----------------------------------*/
 530
 531 // A non empty list of possibly tagged symbols for %token or %nterm.
 532 token_decls:
 533   token_decl.1[syms]
 534     {
 535       $$ = $syms;
 536     }
 537 | TAG token_decl.1[syms]
 538     {
 539       $$ = symbol_list_type_set ($syms, $TAG);
 540     }
 541 | token_decls TAG token_decl.1[syms]
 542     {
 543       $$ = symbol_list_append ($1, symbol_list_type_set ($syms, $TAG));
 544     }
 545 ;
 546
 547 // One or more symbol declarations for %token or %nterm.
 548 token_decl.1:
 549   token_decl                { $$ = symbol_list_sym_new ($1, @1); }
 550 | token_decl.1 token_decl   { $$ = symbol_list_append ($1, symbol_list_sym_new ($2, @2)); }
 551
 552 // One symbol declaration for %token or %nterm.
 553 token_decl:
 554   id int.opt[num] alias
 555     {
 556       $$ = $id;
 557       symbol_class_set ($id, current_class, @id, true);
 558       if (0 <= $num)
 559         symbol_code_set ($id, $num, @num);
 560       if ($alias)
 561         symbol_make_alias ($id, $alias, @alias);
 562     }
 563 ;
 564
 565 %type <int> int.opt;
 566 int.opt:
 567   %empty  { $$ = -1; }
 568 | INT_LITERAL
 569 ;
 570
 571 %type <symbol*> alias;
 572 alias:
 573   %empty         { $$ = NULL; }
 574 | string_as_id   { $$ = $1; }
 575 | TSTRING
 576     {
 577       $$ = symbol_get ($1, @1);
 578       symbol_class_set ($$, token_sym, @1, false);
 579       $$->translatable = true;
 580     }
 581 ;
 582
 583
 584 /*-------------------------------------.
 585 | token_decls_for_prec (%left, etc.).  |
 586 `-------------------------------------*/
 587
 588 // A non empty list of possibly tagged tokens for precedence declaration.
 589 //
 590 // Similar to %token (token_decls), but in '%left FOO 1 "foo"', it treats
 591 // FOO and "foo" as two different symbols instead of aliasing them.
 592 token_decls_for_prec:
 593   token_decl_for_prec.1[syms]
 594     {
 595       $$ = $syms;
 596     }
 597 | TAG token_decl_for_prec.1[syms]
 598     {
 599       $$ = symbol_list_type_set ($syms, $TAG);
 600     }
 601 | token_decls_for_prec TAG token_decl_for_prec.1[syms]
 602     {
 603       $$ = symbol_list_append ($1, symbol_list_type_set ($syms, $TAG));
 604     }
 605 ;
 606
 607 // One or more token declarations for precedence declaration.
 608 token_decl_for_prec.1:
 609   token_decl_for_prec
 610     { $$ = symbol_list_sym_new ($1, @1); }
 611 | token_decl_for_prec.1 token_decl_for_prec
 612     { $$ = symbol_list_append ($1, symbol_list_sym_new ($2, @2)); }
 613
 614 // One token declaration for precedence declaration.
 615 token_decl_for_prec:
 616   id int.opt[num]
 617     {
 618       $$ = $id;
 619       symbol_class_set ($id, token_sym, @id, false);
 620       if (0 <= $num)
 621         symbol_code_set ($id, $num, @num);
 622     }
 623 | string_as_id
 624 ;
 625
 626
 627 /*-----------------------------------.
 628 | symbol_decls (argument of %type).  |
 629 `-----------------------------------*/
 630
 631 // A non empty list of typed symbols (for %type).
 632 symbol_decls:
 633   symbols.1[syms]
 634     {
 635       $$ = $syms;
 636     }
 637 | TAG symbols.1[syms]
 638     {
 639       $$ = symbol_list_type_set ($syms, $TAG);
 640     }
 641 | symbol_decls TAG symbols.1[syms]
 642     {
 643       $$ = symbol_list_append ($1, symbol_list_type_set ($syms, $TAG));
 644     }
 645 ;
 646
 647 // One or more symbols.
 648 symbols.1:
 649   symbol
 650     {
 651       if (current_class != unknown_sym)
 652         symbol_class_set ($symbol, current_class, @symbol, false);
 653       $$ = symbol_list_sym_new ($symbol, @symbol);
 654     }
 655   | symbols.1 symbol
 656     {
 657       if (current_class != unknown_sym)
 658         symbol_class_set ($symbol, current_class, @symbol, false);
 659       $$ = symbol_list_append ($1, symbol_list_sym_new ($symbol, @symbol));
 660     }
 661 ;
 662
 663         /*------------------------------------------.
 664         | The grammar section: between the two %%.  |
 665         `------------------------------------------*/
 666
 667 grammar:
 668   rules_or_grammar_declaration
 669 | grammar rules_or_grammar_declaration
 670 ;
 671
 672 /* As a Bison extension, one can use the grammar declarations in the
 673    body of the grammar.  */
 674 rules_or_grammar_declaration:
 675   rules
 676 | grammar_declaration ";"
 677 | error ";"
 678     {
 679       yyerrok;
 680     }
 681 ;
 682
 683 rules:
 684   id_colon named_ref.opt { current_lhs ($1, @1, $2); } ":" rhses.1
 685     {
 686       /* Free the current lhs. */
 687       current_lhs (0, @1, 0);
 688     }
 689 ;
 690
 691 rhses.1:
 692   rhs                { grammar_current_rule_end (@rhs); }
 693 | rhses.1 "|" rhs    { grammar_current_rule_end (@rhs); }
 694 | rhses.1 ";"
 695 ;
 696
 697 %token PERCENT_EMPTY "%empty";
 698 rhs:
 699   %empty
 700     { grammar_current_rule_begin (current_lhs_symbol, current_lhs_loc,
 701                                   current_lhs_named_ref); }
 702 | rhs symbol named_ref.opt
 703     { grammar_current_rule_symbol_append ($2, @2, $3); }
 704 | rhs tag.opt "{...}"[action] named_ref.opt[name]
 705     { grammar_current_rule_action_append ($action, @action, $name, $[tag.opt]); }
 706 | rhs "%?{...}"
 707     { grammar_current_rule_predicate_append ($2, @2); }
 708 | rhs "%empty"
 709     { grammar_current_rule_empty_set (@2); }
 710 | rhs "%prec" symbol
 711     { grammar_current_rule_prec_set ($3, @3); }
 712 | rhs "%dprec" INT_LITERAL
 713     { grammar_current_rule_dprec_set ($3, @3); }
 714 | rhs "%merge" TAG
 715     { grammar_current_rule_merge_set ($3, @3); }
 716 | rhs "%expect" INT_LITERAL
 717     { grammar_current_rule_expect_sr ($3, @3); }
 718 | rhs "%expect-rr" INT_LITERAL
 719     { grammar_current_rule_expect_rr ($3, @3); }
 720 ;
 721
 722 named_ref.opt:
 723   %empty         { $$ = NULL; }
 724 | BRACKETED_ID   { $$ = named_ref_new ($1, @1); }
 725 ;
 726
 727
 728 /*---------------------.
 729 | variable and value.  |
 730 `---------------------*/
 731
 732 variable:
 733   ID
 734 ;
 735
 736 /* Some content or empty by default. */
 737 %code requires {
 738   #include "muscle-tab.h"
 739   typedef struct
 740   {
 741     char const *chars;
 742     muscle_kind kind;
 743   } value_type;
 744 };
 745 %type <value_type> value;
 746 %printer
 747 {
 748   switch ($$.kind)
 749     {
 750     case muscle_code:    fprintf (yyo,  "{%s}",  $$.chars); break;
 751     case muscle_keyword: fprintf (yyo,   "%s",   $$.chars); break;
 752     case muscle_string:  fprintf (yyo, "\"%s\"", $$.chars); break;
 753     }
 754 } <value_type>;
 755
 756 value:
 757   %empty  { $$.kind = muscle_keyword; $$.chars = ""; }
 758 | ID      { $$.kind = muscle_keyword; $$.chars = $1; }
 759 | STRING  { $$.kind = muscle_string;  $$.chars = unquote ($1); gram_scanner_last_string_free ();}
 760 | "{...}" { $$.kind = muscle_code;    $$.chars = strip_braces ($1); gram_scanner_last_string_free (); }
 761 ;
 762
 763
 764 /*--------------.
 765 | Identifiers.  |
 766 `--------------*/
 767
 768 /* Identifiers are returned as uniqstr values by the scanner.
 769    Depending on their use, we may need to make them genuine symbols.  */
 770
 771 id:
 772   ID
 773     { $$ = symbol_from_uniqstr ($1, @1); }
 774 | CHAR_LITERAL
 775     {
 776       const char *var = "api.token.raw";
 777       if (current_class == nterm_sym)
 778         {
 779           complain (&@1, complaint,
 780                     _("character literals cannot be nonterminals"));
 781           YYERROR;
 782         }
 783       if (muscle_percent_define_ifdef (var))
 784         {
 785           complain (&@1, complaint,
 786                     _("character literals cannot be used together"
 787                     " with %s"), var);
 788           location loc = muscle_percent_define_get_loc (var);
 789           subcomplain (&loc, complaint, _("definition of %s"), var);
 790         }
 791       $$ = symbol_get (char_name ($1), @1);
 792       symbol_class_set ($$, token_sym, @1, false);
 793       symbol_code_set ($$, $1, @1);
 794     }
 795 ;
 796
 797 id_colon:
 798   ID_COLON { $$ = symbol_from_uniqstr ($1, @1); }
 799 ;
 800
 801
 802 symbol:
 803   id
 804 | string_as_id
 805 ;
 806
 807 /* A string used as an ID.  */
 808 string_as_id:
 809   STRING
 810     {
 811       $$ = symbol_get ($1, @1);
 812       symbol_class_set ($$, token_sym, @1, false);
 813     }
 814 ;
 815
 816 epilogue.opt:
 817   %empty
 818 | "%%" EPILOGUE
 819     {
 820       muscle_code_grow ("epilogue", translate_code ($2, @2, true), @2);
 821       code_scanner_last_string_free ();
 822     }
 823 ;
 824
 825 %%
 826
 827 int
 828 yyreport_syntax_error (const yypcontext_t *ctx)
 829 {
 830   int res = 0;
 831   /* Arguments of format: reported tokens (one for the "unexpected",
 832      one per "expected"). */
 833   enum { ARGS_MAX = 5 };
 834   const char *argv[ARGS_MAX];
 835   int argc = 0;
 836   yysymbol_kind_t unexpected = yypcontext_token (ctx);
 837   if (unexpected != YYSYMBOL_YYEMPTY)
 838     {
 839       argv[argc++] = yysymbol_name (unexpected);
 840       yysymbol_kind_t expected[ARGS_MAX - 1];
 841       int nexpected = yypcontext_expected_tokens (ctx, expected, ARGS_MAX - 1);
 842       if (nexpected < 0)
 843         res = nexpected;
 844       else
 845         for (int i = 0; i < nexpected; ++i)
 846           argv[argc++] = yysymbol_name (expected[i]);
 847     }
 848   syntax_error (*yypcontext_location (ctx), argc, argv);
 849   return res;
 850 }
 851
 852
 853 /* Return the location of the left-hand side of a rule whose
 854    right-hand side is RHS[1] ... RHS[N].  Ignore empty nonterminals in
 855    the right-hand side, and return an empty location equal to the end
 856    boundary of RHS[0] if the right-hand side is empty.  */
 857
 858 static YYLTYPE
 859 lloc_default (YYLTYPE const *rhs, int n)
 860 {
 861   YYLTYPE loc;
 862
 863   /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
 864      The bug is fixed in 7.4.2m, but play it safe for now.  */
 865   loc.start = rhs[n].end;
 866   loc.end = rhs[n].end;
 867
 868   /* Ignore empty nonterminals the start of the right-hand side.
 869      Do not bother to ignore them at the end of the right-hand side,
 870      since empty nonterminals have the same end as their predecessors.  */
 871   for (int i = 1; i <= n; i++)
 872     if (! equal_boundaries (rhs[i].start, rhs[i].end))
 873       {
 874         loc.start = rhs[i].start;
 875         break;
 876       }
 877
 878   return loc;
 879 }
 880
 881 static
 882 char *strip_braces (char *code)
 883 {
 884   code[strlen (code) - 1] = 0;
 885   return code + 1;
 886 }
 887
 888 static
 889 char const *
 890 translate_code (char *code, location loc, bool plain)
 891 {
 892   code_props plain_code;
 893   if (plain)
 894     code_props_plain_init (&plain_code, code, loc);
 895   else
 896     code_props_symbol_action_init (&plain_code, code, loc);
 897   code_props_translate_code (&plain_code);
 898   gram_scanner_last_string_free ();
 899   return plain_code.code;
 900 }
 901
 902 static
 903 char const *
 904 translate_code_braceless (char *code, location loc)
 905 {
 906   return translate_code (strip_braces (code), loc, true);
 907 }
 908
 909 static void
 910 add_param (param_type type, char *decl, location loc)
 911 {
 912   static char const alphanum[26 + 26 + 1 + 10 + 1] =
 913     "abcdefghijklmnopqrstuvwxyz"
 914     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 915     "_"
 916     "0123456789";
 917
 918   char const *name_start = NULL;
 919   {
 920     char *p;
 921     /* Stop on last actual character.  */
 922     for (p = decl; p[1]; p++)
 923       if ((p == decl
 924            || ! memchr (alphanum, p[-1], sizeof alphanum - 1))
 925           && memchr (alphanum, p[0], sizeof alphanum - 10 - 1))
 926         name_start = p;
 927
 928     /* Strip the surrounding '{' and '}', and any blanks just inside
 929        the braces.  */
 930     --p;
 931     while (c_isspace ((unsigned char) *p))
 932       --p;
 933     p[1] = '\0';
 934     ++decl;
 935     while (c_isspace ((unsigned char) *decl))
 936       ++decl;
 937   }
 938
 939   if (! name_start)
 940     complain (&loc, complaint, _("missing identifier in parameter declaration"));
 941   else
 942     {
 943       char *name = xmemdup0 (name_start, strspn (name_start, alphanum));
 944       if (type & param_lex)
 945         muscle_pair_list_grow ("lex_param", decl, name);
 946       if (type & param_parse)
 947         muscle_pair_list_grow ("parse_param", decl, name);
 948       free (name);
 949     }
 950
 951   gram_scanner_last_string_free ();
 952 }
 953
 954
 955 static void
 956 handle_header (char const *value)
 957 {
 958   header_flag = true;
 959   if (value)
 960     {
 961       char *file = unquote (value);
 962       spec_header_file = xstrdup (file);
 963       gram_scanner_last_string_free ();
 964       unquote_free (file);
 965     }
 966 }
 967
 968
 969 static void
 970 handle_error_verbose (location const *loc, char const *directive)
 971 {
 972   bison_directive (loc, directive);
 973   muscle_percent_define_insert (directive, *loc, muscle_keyword, "",
 974                                 MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE);
 975 }
 976
 977
 978 static void
 979 handle_file_prefix (location const *loc,
 980                     location const *dir_loc,
 981                     char const *directive, char const *value_quoted)
 982 {
 983   char *value = unquote (value_quoted);
 984   bison_directive (loc, directive);
 985   bool warned = false;
 986
 987   if (location_empty (spec_file_prefix_loc))
 988     {
 989       spec_file_prefix_loc = *loc;
 990       spec_file_prefix = value;
 991     }
 992   else
 993     {
 994       duplicate_directive (directive, spec_file_prefix_loc, *loc);
 995       warned = true;
 996     }
 997
 998   if (!warned
 999       && STRNEQ (directive, "%file-prefix"))
1000     deprecated_directive (dir_loc, directive, "%file-prefix");
1001 }
1002
1003 static void
1004 handle_language (location const *loc, char const *lang)
1005 {
1006   language_argmatch (unquote (lang), grammar_prio, *loc);
1007 }
1008
1009
1010 static void
1011 handle_name_prefix (location const *loc,
1012                     char const *directive, char const *value_quoted)
1013 {
1014   char *value = unquote (value_quoted);
1015   bison_directive (loc, directive);
1016
1017   char buf1[1024];
1018   size_t len1 = sizeof (buf1);
1019   char *old = asnprintf (buf1, &len1, "%s\"%s\"", directive, value);
1020   if (!old)
1021     xalloc_die ();
1022
1023   if (location_empty (spec_name_prefix_loc))
1024     {
1025       spec_name_prefix = value;
1026       spec_name_prefix_loc = *loc;
1027
1028       char buf2[1024];
1029       size_t len2 = sizeof (buf2);
1030       char *new = asnprintf (buf2, &len2, "%%define api.prefix {%s}", value);
1031       if (!new)
1032         xalloc_die ();
1033       deprecated_directive (loc, old, new);
1034       if (new != buf2)
1035         free (new);
1036     }
1037   else
1038     duplicate_directive (old, spec_file_prefix_loc, *loc);
1039
1040   if (old != buf1)
1041     free (old);
1042 }
1043
1044
1045 static void
1046 handle_pure_parser (location const *loc, char const *directive)
1047 {
1048   bison_directive (loc, directive);
1049   deprecated_directive (loc, directive, "%define api.pure");
1050   muscle_percent_define_insert ("api.pure", *loc, muscle_keyword, "",
1051                                 MUSCLE_PERCENT_DEFINE_GRAMMAR_FILE);
1052 }
1053
1054
1055 static void
1056 handle_require (location const *loc, char const *version_quoted)
1057 {
1058   char *version = unquote (version_quoted);
1059   required_version = strversion_to_int (version);
1060   if (required_version == -1)
1061     {
1062       complain (loc, complaint, _("invalid version requirement: %s"),
1063                 version);
1064       required_version = 0;
1065     }
1066   else
1067     {
1068       const char* package_version =
1069         0 < strverscmp (api_version, PACKAGE_VERSION)
1070         ? api_version : PACKAGE_VERSION;
1071       if (0 < strverscmp (version, package_version))
1072         {
1073           complain (loc, complaint, _("require bison %s, but have %s"),
1074                     version, package_version);
1075           exit (EX_MISMATCH);
1076         }
1077     }
1078   unquote_free (version);
1079   gram_scanner_last_string_free ();
1080 }
1081
1082 static void
1083 handle_skeleton (location const *loc, char const *skel_quoted)
1084 {
1085   char *skel = unquote (skel_quoted);
1086   char const *skeleton_user = skel;
1087   if (strchr (skeleton_user, '/'))
1088     {
1089       size_t dir_length = strlen (grammar_file);
1090       while (dir_length && grammar_file[dir_length - 1] != '/')
1091         --dir_length;
1092       while (dir_length && grammar_file[dir_length - 1] == '/')
1093         --dir_length;
1094       char *skeleton_build =
1095         xmalloc (dir_length + 1 + strlen (skeleton_user) + 1);
1096       if (dir_length > 0)
1097         {
1098           memcpy (skeleton_build, grammar_file, dir_length);
1099           skeleton_build[dir_length++] = '/';
1100         }
1101       strcpy (skeleton_build + dir_length, skeleton_user);
1102       skeleton_user = uniqstr_new (skeleton_build);
1103       free (skeleton_build);
1104     }
1105   skeleton_arg (skeleton_user, grammar_prio, *loc);
1106 }
1107
1108
1109 static void
1110 handle_yacc (location const *loc)
1111 {
1112   const char *directive = "%yacc";
1113   bison_directive (loc, directive);
1114   if (location_empty (yacc_loc))
1115     yacc_loc = *loc;
1116   else
1117     duplicate_directive (directive, yacc_loc, *loc);
1118 }
1119
1120
1121 static void
1122 gram_error (location const *loc, char const *msg)
1123 {
1124   complain (loc, complaint, "%s", msg);
1125 }
1126
1127 static char const *
1128 char_name (char c)
1129 {
1130   if (c == '\'')
1131     return "'\\''";
1132   else
1133     {
1134       char buf[4];
1135       buf[0] = '\''; buf[1] = c; buf[2] = '\''; buf[3] = '\0';
1136       return quotearg_style (escape_quoting_style, buf);
1137     }
1138 }
1139
1140 static void
1141 current_lhs (symbol *sym, location loc, named_ref *ref)
1142 {
1143   current_lhs_symbol = sym;
1144   current_lhs_loc = loc;
1145   if (sym)
1146     symbol_location_as_lhs_set (sym, loc);
1147   /* In order to simplify memory management, named references for lhs
1148      are always assigned by deep copy into the current symbol_list
1149      node.  This is because a single named-ref in the grammar may
1150      result in several uses when the user factors lhs between several
1151      rules using "|".  Therefore free the parser's original copy.  */
1152   free (current_lhs_named_ref);
1153   current_lhs_named_ref = ref;
1154 }
1155
1156 static void tron (FILE *yyo)
1157 {
1158   begin_use_class ("value", yyo);
1159 }
1160
1161 static void troff (FILE *yyo)
1162 {
1163   end_use_class ("value", yyo);
1164 }
1165
1166
1167 /*----------.
1168 | Unquote.  |
1169 `----------*/
1170
1171 struct obstack obstack_for_unquote;
1172
1173 void
1174 parser_init (void)
1175 {
1176   obstack_init (&obstack_for_unquote);
1177 }
1178
1179 void
1180 parser_free (void)
1181 {
1182   obstack_free (&obstack_for_unquote, 0);
1183 }
1184
1185 static void
1186 unquote_free (char *last_string)
1187 {
1188   obstack_free (&obstack_for_unquote, last_string);
1189 }
1190
1191 static char *
1192 unquote (const char *cp)
1193 {
1194 #define GROW(Char)                              \
1195   obstack_1grow (&obstack_for_unquote, Char);
1196   for (++cp; *cp && *cp != '"'; ++cp)
1197     switch (*cp)
1198       {
1199       case '"':
1200         break;
1201       case '\\':
1202         ++cp;
1203         switch (*cp)
1204           {
1205           case '0': case '1': case '2': case '3': case '4':
1206           case '5': case '6': case '7': case '8': case '9':
1207             {
1208               int c = cp[0] - '0';
1209               if (c_isdigit (cp[1]))
1210                 {
1211                   ++cp;
1212                   c = c * 8 + cp[0] - '0';
1213                 }
1214               if (c_isdigit (cp[1]))
1215                 {
1216                   ++cp;
1217                   c = c * 8 + cp[0] - '0';
1218                 }
1219               GROW (c);
1220             }
1221             break;
1222
1223           case 'a': GROW ('\a'); break;
1224           case 'b': GROW ('\b'); break;
1225           case 'f': GROW ('\f'); break;
1226           case 'n': GROW ('\n'); break;
1227           case 'r': GROW ('\r'); break;
1228           case 't': GROW ('\t'); break;
1229           case 'v': GROW ('\v'); break;
1230
1231           case 'x':
1232             {
1233               int c = 0;
1234               while (c_isxdigit (cp[1]))
1235                 {
1236                   ++cp;
1237                   c = (c * 16 + (c_isdigit (cp[0]) ? cp[0] - '0'
1238                                  : c_isupper (cp[0]) ? cp[0] - 'A'
1239                                  : cp[0] - '0'));
1240                 }
1241               GROW (c);
1242               break;
1243             }
1244           }
1245         break;
1246
1247       default:
1248         GROW (*cp);
1249         break;
1250       }
1251   assert (*cp == '"');
1252   ++cp;
1253   assert (*cp == '\0');
1254 #undef GROW
1255   return obstack_finish0 (&obstack_for_unquote);
1256 }