contrib/flex/parse.y

   1 /* parse.y - parser for flex input */
   2
   3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
   4 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS OPT_HEADER OPT_EXTRA_TYPE
   5 %token OPT_TABLES
   6
   7 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
   8 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
   9
  10 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
  11 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
  12
  13 %left CCL_OP_DIFF CCL_OP_UNION
  14
  15 /*
  16  *POSIX and AT&T lex place the
  17  * precedence of the repeat operator, {}, below that of concatenation.
  18  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
  19  * Regular Expression (ERE) precedence that has the repeat operator
  20  * higher than concatenation.  This causes ab{3} to yield abbb.
  21  *
  22  * In order to support the POSIX and AT&T precedence and the flex
  23  * precedence we define two token sets for the begin and end tokens of
  24  * the repeat operator, '{' and '}'.  The lexical scanner chooses
  25  * which tokens to return based on whether posix_compat or lex_compat
  26  * are specified. Specifying either posix_compat or lex_compat will
  27  * cause flex to parse scanner files as per the AT&T and
  28  * POSIX-mandated behavior.
  29  */
  30
  31 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
  32
  33
  34 %{
  35 /*  Copyright (c) 1990 The Regents of the University of California. */
  36 /*  All rights reserved. */
  37
  38 /*  This code is derived from software contributed to Berkeley by */
  39 /*  Vern Paxson. */
  40
  41 /*  The United States Government has rights in this work pursuant */
  42 /*  to contract no. DE-AC03-76SF00098 between the United States */
  43 /*  Department of Energy and the University of California. */
  44
  45 /*  This file is part of flex. */
  46
  47 /*  Redistribution and use in source and binary forms, with or without */
  48 /*  modification, are permitted provided that the following conditions */
  49 /*  are met: */
  50
  51 /*  1. Redistributions of source code must retain the above copyright */
  52 /*     notice, this list of conditions and the following disclaimer. */
  53 /*  2. Redistributions in binary form must reproduce the above copyright */
  54 /*     notice, this list of conditions and the following disclaimer in the */
  55 /*     documentation and/or other materials provided with the distribution. */
  56
  57 /*  Neither the name of the University nor the names of its contributors */
  58 /*  may be used to endorse or promote products derived from this software */
  59 /*  without specific prior written permission. */
  60
  61 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
  62 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
  63 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
  64 /*  PURPOSE. */
  65
  66 #include "flexdef.h"
  67 #include "tables.h"
  68
  69 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
  70 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
  71
  72 int *scon_stk;
  73 int scon_stk_ptr;
  74
  75 static int madeany = false;  /* whether we've made the '.' character class */
  76 static int ccldot, cclany;
  77 int previous_continued_action;  /* whether the previous rule's action was '|' */
  78
  79 #define format_warn3(fmt, a1, a2) \
  80         do{ \
  81         char fw3_msg[MAXLINE];\
  82         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
  83         warn( fw3_msg );\
  84         }while(0)
  85
  86 /* Expand a POSIX character class expression. */
  87 #define CCL_EXPR(func) \
  88         do{ \
  89         int c; \
  90         for ( c = 0; c < csize; ++c ) \
  91                 if ( isascii(c) && func(c) ) \
  92                         ccladd( currccl, c ); \
  93         }while(0)
  94
  95 /* negated class */
  96 #define CCL_NEG_EXPR(func) \
  97         do{ \
  98         int c; \
  99         for ( c = 0; c < csize; ++c ) \
 100                 if ( !func(c) ) \
 101                         ccladd( currccl, c ); \
 102         }while(0)
 103
 104 /* While POSIX defines isblank(), it's not ANSI C. */
 105 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
 106
 107 /* On some over-ambitious machines, such as DEC Alpha's, the default
 108  * token type is "long" instead of "int"; this leads to problems with
 109  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
 110  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
 111  * following should ensure that the default token type is "int".
 112  */
 113 #define YYSTYPE int
 114
 115 %}
 116
 117 %%
 118 goal            :  initlex sect1 sect1end sect2 initforrule
 119                         { /* add default rule */
 120                         int def_rule;
 121
 122                         pat = cclinit();
 123                         cclnegate( pat );
 124
 125                         def_rule = mkstate( -pat );
 126
 127                         /* Remember the number of the default rule so we
 128                          * don't generate "can't match" warnings for it.
 129                          */
 130                         default_rule = num_rules;
 131
 132                         finish_rule( def_rule, false, 0, 0, 0);
 133
 134                         for ( i = 1; i <= lastsc; ++i )
 135                                 scset[i] = mkbranch( scset[i], def_rule );
 136
 137                         if ( spprdflt )
 138                                 add_action(
 139                                 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
 140                         else
 141                                 add_action( "ECHO" );
 142
 143                         add_action( ";\n\tYY_BREAK\n" );
 144                         }
 145                 ;
 146
 147 initlex         :
 148                         { /* initialize for processing rules */
 149
 150                         /* Create default DFA start condition. */
 151                         scinstal( "INITIAL", false );
 152                         }
 153                 ;
 154
 155 sect1           :  sect1 startconddecl namelist1
 156                 |  sect1 options
 157                 |
 158                 |  error
 159                         { synerr( _("unknown error processing section 1") ); }
 160                 ;
 161
 162 sect1end        :  SECTEND
 163                         {
 164                         check_options();
 165                         scon_stk = allocate_integer_array( lastsc + 1 );
 166                         scon_stk_ptr = 0;
 167                         }
 168                 ;
 169
 170 startconddecl   :  SCDECL
 171                         { xcluflg = false; }
 172
 173                 |  XSCDECL
 174                         { xcluflg = true; }
 175                 ;
 176
 177 namelist1       :  namelist1 NAME
 178                         { scinstal( nmstr, xcluflg ); }
 179
 180                 |  NAME
 181                         { scinstal( nmstr, xcluflg ); }
 182
 183                 |  error
 184                         { synerr( _("bad start condition list") ); }
 185                 ;
 186
 187 options         :  OPTION_OP optionlist
 188                 ;
 189
 190 optionlist      :  optionlist option
 191                 |
 192                 ;
 193
 194 option          :  OPT_OUTFILE '=' NAME
 195                         {
 196                         outfilename = copy_string( nmstr );
 197                         did_outfilename = 1;
 198                         }
 199                 |  OPT_EXTRA_TYPE '=' NAME
 200                         { extra_type = copy_string( nmstr ); }
 201                 |  OPT_PREFIX '=' NAME
 202                         { prefix = copy_string( nmstr ); }
 203                 |  OPT_YYCLASS '=' NAME
 204                         { yyclass = copy_string( nmstr ); }
 205                 |  OPT_HEADER '=' NAME
 206                         { headerfilename = copy_string( nmstr ); }
 207             |  OPT_TABLES '=' NAME
 208             { tablesext = true; tablesfilename = copy_string( nmstr ); }
 209                 ;
 210
 211 sect2           :  sect2 scon initforrule flexrule '\n'
 212                         { scon_stk_ptr = $2; }
 213                 |  sect2 scon '{' sect2 '}'
 214                         { scon_stk_ptr = $2; }
 215                 |
 216                 ;
 217
 218 initforrule     :
 219                         {
 220                         /* Initialize for a parse of one rule. */
 221                         trlcontxt = variable_trail_rule = varlength = false;
 222                         trailcnt = headcnt = rulelen = 0;
 223                         current_state_type = STATE_NORMAL;
 224                         previous_continued_action = continued_action;
 225                         in_rule = true;
 226
 227                         new_rule();
 228                         }
 229                 ;
 230
 231 flexrule        :  '^' rule
 232                         {
 233                         pat = $2;
 234                         finish_rule( pat, variable_trail_rule,
 235                                 headcnt, trailcnt , previous_continued_action);
 236
 237                         if ( scon_stk_ptr > 0 )
 238                                 {
 239                                 for ( i = 1; i <= scon_stk_ptr; ++i )
 240                                         scbol[scon_stk[i]] =
 241                                                 mkbranch( scbol[scon_stk[i]],
 242                                                                 pat );
 243                                 }
 244
 245                         else
 246                                 {
 247                                 /* Add to all non-exclusive start conditions,
 248                                  * including the default (0) start condition.
 249                                  */
 250
 251                                 for ( i = 1; i <= lastsc; ++i )
 252                                         if ( ! scxclu[i] )
 253                                                 scbol[i] = mkbranch( scbol[i],
 254                                                                         pat );
 255                                 }
 256
 257                         if ( ! bol_needed )
 258                                 {
 259                                 bol_needed = true;
 260
 261                                 if ( performance_report > 1 )
 262                                         pinpoint_message(
 263                         "'^' operator results in sub-optimal performance" );
 264                                 }
 265                         }
 266
 267                 |  rule
 268                         {
 269                         pat = $1;
 270                         finish_rule( pat, variable_trail_rule,
 271                                 headcnt, trailcnt , previous_continued_action);
 272
 273                         if ( scon_stk_ptr > 0 )
 274                                 {
 275                                 for ( i = 1; i <= scon_stk_ptr; ++i )
 276                                         scset[scon_stk[i]] =
 277                                                 mkbranch( scset[scon_stk[i]],
 278                                                                 pat );
 279                                 }
 280
 281                         else
 282                                 {
 283                                 for ( i = 1; i <= lastsc; ++i )
 284                                         if ( ! scxclu[i] )
 285                                                 scset[i] =
 286                                                         mkbranch( scset[i],
 287                                                                 pat );
 288                                 }
 289                         }
 290
 291                 |  EOF_OP
 292                         {
 293                         if ( scon_stk_ptr > 0 )
 294                                 build_eof_action();
 295
 296                         else
 297                                 {
 298                                 /* This EOF applies to all start conditions
 299                                  * which don't already have EOF actions.
 300                                  */
 301                                 for ( i = 1; i <= lastsc; ++i )
 302                                         if ( ! sceof[i] )
 303                                                 scon_stk[++scon_stk_ptr] = i;
 304
 305                                 if ( scon_stk_ptr == 0 )
 306                                         warn(
 307                         "all start conditions already have <<EOF>> rules" );
 308
 309                                 else
 310                                         build_eof_action();
 311                                 }
 312                         }
 313
 314                 |  error
 315                         { synerr( _("unrecognized rule") ); }
 316                 ;
 317
 318 scon_stk_ptr    :
 319                         { $$ = scon_stk_ptr; }
 320                 ;
 321
 322 scon            :  '<' scon_stk_ptr namelist2 '>'
 323                         { $$ = $2; }
 324
 325                 |  '<' '*' '>'
 326                         {
 327                         $$ = scon_stk_ptr;
 328
 329                         for ( i = 1; i <= lastsc; ++i )
 330                                 {
 331                                 int j;
 332
 333                                 for ( j = 1; j <= scon_stk_ptr; ++j )
 334                                         if ( scon_stk[j] == i )
 335                                                 break;
 336
 337                                 if ( j > scon_stk_ptr )
 338                                         scon_stk[++scon_stk_ptr] = i;
 339                                 }
 340                         }
 341
 342                 |
 343                         { $$ = scon_stk_ptr; }
 344                 ;
 345
 346 namelist2       :  namelist2 ',' sconname
 347
 348                 |  sconname
 349
 350                 |  error
 351                         { synerr( _("bad start condition list") ); }
 352                 ;
 353
 354 sconname        :  NAME
 355                         {
 356                         if ( (scnum = sclookup( nmstr )) == 0 )
 357                                 format_pinpoint_message(
 358                                         "undeclared start condition %s",
 359                                         nmstr );
 360                         else
 361                                 {
 362                                 for ( i = 1; i <= scon_stk_ptr; ++i )
 363                                         if ( scon_stk[i] == scnum )
 364                                                 {
 365                                                 format_warn(
 366                                                         "<%s> specified twice",
 367                                                         scname[scnum] );
 368                                                 break;
 369                                                 }
 370
 371                                 if ( i > scon_stk_ptr )
 372                                         scon_stk[++scon_stk_ptr] = scnum;
 373                                 }
 374                         }
 375                 ;
 376
 377 rule            :  re2 re
 378                         {
 379                         if ( transchar[lastst[$2]] != SYM_EPSILON )
 380                                 /* Provide final transition \now/ so it
 381                                  * will be marked as a trailing context
 382                                  * state.
 383                                  */
 384                                 $2 = link_machines( $2,
 385                                                 mkstate( SYM_EPSILON ) );
 386
 387                         mark_beginning_as_normal( $2 );
 388                         current_state_type = STATE_NORMAL;
 389
 390                         if ( previous_continued_action )
 391                                 {
 392                                 /* We need to treat this as variable trailing
 393                                  * context so that the backup does not happen
 394                                  * in the action but before the action switch
 395                                  * statement.  If the backup happens in the
 396                                  * action, then the rules "falling into" this
 397                                  * one's action will *also* do the backup,
 398                                  * erroneously.
 399                                  */
 400                                 if ( ! varlength || headcnt != 0 )
 401                                         warn(
 402                 "trailing context made variable due to preceding '|' action" );
 403
 404                                 /* Mark as variable. */
 405                                 varlength = true;
 406                                 headcnt = 0;
 407
 408                                 }
 409
 410                         if ( lex_compat || (varlength && headcnt == 0) )
 411                                 { /* variable trailing context rule */
 412                                 /* Mark the first part of the rule as the
 413                                  * accepting "head" part of a trailing
 414                                  * context rule.
 415                                  *
 416                                  * By the way, we didn't do this at the
 417                                  * beginning of this production because back
 418                                  * then current_state_type was set up for a
 419                                  * trail rule, and add_accept() can create
 420                                  * a new state ...
 421                                  */
 422                                 add_accept( $1,
 423                                         num_rules | YY_TRAILING_HEAD_MASK );
 424                                 variable_trail_rule = true;
 425                                 }
 426
 427                         else
 428                                 trailcnt = rulelen;
 429
 430                         $$ = link_machines( $1, $2 );
 431                         }
 432
 433                 |  re2 re '$'
 434                         { synerr( _("trailing context used twice") ); }
 435
 436                 |  re '$'
 437                         {
 438                         headcnt = 0;
 439                         trailcnt = 1;
 440                         rulelen = 1;
 441                         varlength = false;
 442
 443                         current_state_type = STATE_TRAILING_CONTEXT;
 444
 445                         if ( trlcontxt )
 446                                 {
 447                                 synerr( _("trailing context used twice") );
 448                                 $$ = mkstate( SYM_EPSILON );
 449                                 }
 450
 451                         else if ( previous_continued_action )
 452                                 {
 453                                 /* See the comment in the rule for "re2 re"
 454                                  * above.
 455                                  */
 456                                 warn(
 457                 "trailing context made variable due to preceding '|' action" );
 458
 459                                 varlength = true;
 460                                 }
 461
 462                         if ( lex_compat || varlength )
 463                                 {
 464                                 /* Again, see the comment in the rule for
 465                                  * "re2 re" above.
 466                                  */
 467                                 add_accept( $1,
 468                                         num_rules | YY_TRAILING_HEAD_MASK );
 469                                 variable_trail_rule = true;
 470                                 }
 471
 472                         trlcontxt = true;
 473
 474                         eps = mkstate( SYM_EPSILON );
 475                         $$ = link_machines( $1,
 476                                 link_machines( eps, mkstate( '\n' ) ) );
 477                         }
 478
 479                 |  re
 480                         {
 481                         $$ = $1;
 482
 483                         if ( trlcontxt )
 484                                 {
 485                                 if ( lex_compat || (varlength && headcnt == 0) )
 486                                         /* Both head and trail are
 487                                          * variable-length.
 488                                          */
 489                                         variable_trail_rule = true;
 490                                 else
 491                                         trailcnt = rulelen;
 492                                 }
 493                         }
 494                 ;
 495
 496
 497 re              :  re '|' series
 498                         {
 499                         varlength = true;
 500                         $$ = mkor( $1, $3 );
 501                         }
 502
 503                 |  series
 504                         { $$ = $1; }
 505                 ;
 506
 507
 508 re2             :  re '/'
 509                         {
 510                         /* This rule is written separately so the
 511                          * reduction will occur before the trailing
 512                          * series is parsed.
 513                          */
 514
 515                         if ( trlcontxt )
 516                                 synerr( _("trailing context used twice") );
 517                         else
 518                                 trlcontxt = true;
 519
 520                         if ( varlength )
 521                                 /* We hope the trailing context is
 522                                  * fixed-length.
 523                                  */
 524                                 varlength = false;
 525                         else
 526                                 headcnt = rulelen;
 527
 528                         rulelen = 0;
 529
 530                         current_state_type = STATE_TRAILING_CONTEXT;
 531                         $$ = $1;
 532                         }
 533                 ;
 534
 535 series          :  series singleton
 536                         {
 537                         /* This is where concatenation of adjacent patterns
 538                          * gets done.
 539                          */
 540                         $$ = link_machines( $1, $2 );
 541                         }
 542
 543                 |  singleton
 544                         { $$ = $1; }
 545
 546                 |  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
 547                         {
 548                         varlength = true;
 549
 550                         if ( $3 > $5 || $3 < 0 )
 551                                 {
 552                                 synerr( _("bad iteration values") );
 553                                 $$ = $1;
 554                                 }
 555                         else
 556                                 {
 557                                 if ( $3 == 0 )
 558                                         {
 559                                         if ( $5 <= 0 )
 560                                                 {
 561                                                 synerr(
 562                                                 _("bad iteration values") );
 563                                                 $$ = $1;
 564                                                 }
 565                                         else
 566                                                 $$ = mkopt(
 567                                                         mkrep( $1, 1, $5 ) );
 568                                         }
 569                                 else
 570                                         $$ = mkrep( $1, $3, $5 );
 571                                 }
 572                         }
 573
 574                 |  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
 575                         {
 576                         varlength = true;
 577
 578                         if ( $3 <= 0 )
 579                                 {
 580                                 synerr( _("iteration value must be positive") );
 581                                 $$ = $1;
 582                                 }
 583
 584                         else
 585                                 $$ = mkrep( $1, $3, INFINITE_REPEAT );
 586                         }
 587
 588                 |  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
 589                         {
 590                         /* The series could be something like "(foo)",
 591                          * in which case we have no idea what its length
 592                          * is, so we punt here.
 593                          */
 594                         varlength = true;
 595
 596                         if ( $3 <= 0 )
 597                                 {
 598                                   synerr( _("iteration value must be positive")
 599                                           );
 600                                 $$ = $1;
 601                                 }
 602
 603                         else
 604                                 $$ = link_machines( $1,
 605                                                 copysingl( $1, $3 - 1 ) );
 606                         }
 607
 608                 ;
 609
 610 singleton       :  singleton '*'
 611                         {
 612                         varlength = true;
 613
 614                         $$ = mkclos( $1 );
 615                         }
 616
 617                 |  singleton '+'
 618                         {
 619                         varlength = true;
 620                         $$ = mkposcl( $1 );
 621                         }
 622
 623                 |  singleton '?'
 624                         {
 625                         varlength = true;
 626                         $$ = mkopt( $1 );
 627                         }
 628
 629                 |  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
 630                         {
 631                         varlength = true;
 632
 633                         if ( $3 > $5 || $3 < 0 )
 634                                 {
 635                                 synerr( _("bad iteration values") );
 636                                 $$ = $1;
 637                                 }
 638                         else
 639                                 {
 640                                 if ( $3 == 0 )
 641                                         {
 642                                         if ( $5 <= 0 )
 643                                                 {
 644                                                 synerr(
 645                                                 _("bad iteration values") );
 646                                                 $$ = $1;
 647                                                 }
 648                                         else
 649                                                 $$ = mkopt(
 650                                                         mkrep( $1, 1, $5 ) );
 651                                         }
 652                                 else
 653                                         $$ = mkrep( $1, $3, $5 );
 654                                 }
 655                         }
 656
 657                 |  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
 658                         {
 659                         varlength = true;
 660
 661                         if ( $3 <= 0 )
 662                                 {
 663                                 synerr( _("iteration value must be positive") );
 664                                 $$ = $1;
 665                                 }
 666
 667                         else
 668                                 $$ = mkrep( $1, $3, INFINITE_REPEAT );
 669                         }
 670
 671                 |  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
 672                         {
 673                         /* The singleton could be something like "(foo)",
 674                          * in which case we have no idea what its length
 675                          * is, so we punt here.
 676                          */
 677                         varlength = true;
 678
 679                         if ( $3 <= 0 )
 680                                 {
 681                                 synerr( _("iteration value must be positive") );
 682                                 $$ = $1;
 683                                 }
 684
 685                         else
 686                                 $$ = link_machines( $1,
 687                                                 copysingl( $1, $3 - 1 ) );
 688                         }
 689
 690                 |  '.'
 691                         {
 692                         if ( ! madeany )
 693                                 {
 694                                 /* Create the '.' character class. */
 695                     ccldot = cclinit();
 696                     ccladd( ccldot, '\n' );
 697                     cclnegate( ccldot );
 698
 699                     if ( useecs )
 700                         mkeccl( ccltbl + cclmap[ccldot],
 701                             ccllen[ccldot], nextecm,
 702                             ecgroup, csize, csize );
 703
 704                                 /* Create the (?s:'.') character class. */
 705                     cclany = cclinit();
 706                     cclnegate( cclany );
 707
 708                     if ( useecs )
 709                         mkeccl( ccltbl + cclmap[cclany],
 710                             ccllen[cclany], nextecm,
 711                             ecgroup, csize, csize );
 712
 713                                 madeany = true;
 714                                 }
 715
 716                         ++rulelen;
 717
 718             if (sf_dot_all())
 719                 $$ = mkstate( -cclany );
 720             else
 721                 $$ = mkstate( -ccldot );
 722                         }
 723
 724                 |  fullccl
 725                         {
 726                                 /* Sort characters for fast searching.
 727                                  */
 728                                 qsort( ccltbl + cclmap[$1], ccllen[$1], sizeof (*ccltbl), cclcmp );
 729
 730                         if ( useecs )
 731                                 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
 732                                         nextecm, ecgroup, csize, csize );
 733
 734                         ++rulelen;
 735
 736                         if (ccl_has_nl[$1])
 737                                 rule_has_nl[num_rules] = true;
 738
 739                         $$ = mkstate( -$1 );
 740                         }
 741
 742                 |  PREVCCL
 743                         {
 744                         ++rulelen;
 745
 746                         if (ccl_has_nl[$1])
 747                                 rule_has_nl[num_rules] = true;
 748
 749                         $$ = mkstate( -$1 );
 750                         }
 751
 752                 |  '"' string '"'
 753                         { $$ = $2; }
 754
 755                 |  '(' re ')'
 756                         { $$ = $2; }
 757
 758                 |  CHAR
 759                         {
 760                         ++rulelen;
 761
 762                         if ($1 == nlch)
 763                                 rule_has_nl[num_rules] = true;
 764
 765             if (sf_case_ins() && has_case($1))
 766                 /* create an alternation, as in (a|A) */
 767                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
 768             else
 769                 $$ = mkstate( $1 );
 770                         }
 771                 ;
 772 fullccl:
 773         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
 774     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
 775     |   braceccl
 776     ;
 777
 778 braceccl:
 779
 780             '[' ccl ']' { $$ = $2; }
 781
 782                 |  '[' '^' ccl ']'
 783                         {
 784                         cclnegate( $3 );
 785                         $$ = $3;
 786                         }
 787                 ;
 788
 789 ccl             :  ccl CHAR '-' CHAR
 790                         {
 791
 792                         if (sf_case_ins())
 793                           {
 794
 795                             /* If one end of the range has case and the other
 796                              * does not, or the cases are different, then we're not
 797                              * sure what range the user is trying to express.
 798                              * Examples: [@-z] or [S-t]
 799                              */
 800                             if (has_case ($2) != has_case ($4)
 801                                      || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
 802                                      || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
 803                               format_warn3 (
 804                               _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
 805                                             $2, $4);
 806
 807                             /* If the range spans uppercase characters but not
 808                              * lowercase (or vice-versa), then should we automatically
 809                              * include lowercase characters in the range?
 810                              * Example: [@-_] spans [a-z] but not [A-Z]
 811                              */
 812                             else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
 813                               format_warn3 (
 814                               _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
 815                                             $2, $4);
 816                           }
 817
 818                         if ( $2 > $4 )
 819                                 synerr( _("negative range in character class") );
 820
 821                         else
 822                                 {
 823                                 for ( i = $2; i <= $4; ++i )
 824                                         ccladd( $1, i );
 825
 826                                 /* Keep track if this ccl is staying in
 827                                  * alphabetical order.
 828                                  */
 829                                 cclsorted = cclsorted && ($2 > lastchar);
 830                                 lastchar = $4;
 831
 832                 /* Do it again for upper/lowercase */
 833                 if (sf_case_ins() && has_case($2) && has_case($4)){
 834                     $2 = reverse_case ($2);
 835                     $4 = reverse_case ($4);
 836
 837                     for ( i = $2; i <= $4; ++i )
 838                         ccladd( $1, i );
 839
 840                     cclsorted = cclsorted && ($2 > lastchar);
 841                     lastchar = $4;
 842                 }
 843
 844                                 }
 845
 846                         $$ = $1;
 847                         }
 848
 849                 |  ccl CHAR
 850                         {
 851                         ccladd( $1, $2 );
 852                         cclsorted = cclsorted && ($2 > lastchar);
 853                         lastchar = $2;
 854
 855             /* Do it again for upper/lowercase */
 856             if (sf_case_ins() && has_case($2)){
 857                 $2 = reverse_case ($2);
 858                 ccladd ($1, $2);
 859
 860                 cclsorted = cclsorted && ($2 > lastchar);
 861                 lastchar = $2;
 862             }
 863
 864                         $$ = $1;
 865                         }
 866
 867                 |  ccl ccl_expr
 868                         {
 869                         /* Too hard to properly maintain cclsorted. */
 870                         cclsorted = false;
 871                         $$ = $1;
 872                         }
 873
 874                 |
 875                         {
 876                         cclsorted = true;
 877                         lastchar = 0;
 878                         currccl = $$ = cclinit();
 879                         }
 880                 ;
 881
 882 ccl_expr:
 883            CCE_ALNUM    { CCL_EXPR(isalnum); }
 884                 |  CCE_ALPHA    { CCL_EXPR(isalpha); }
 885                 |  CCE_BLANK    { CCL_EXPR(IS_BLANK); }
 886                 |  CCE_CNTRL    { CCL_EXPR(iscntrl); }
 887                 |  CCE_DIGIT    { CCL_EXPR(isdigit); }
 888                 |  CCE_GRAPH    { CCL_EXPR(isgraph); }
 889                 |  CCE_LOWER    {
 890                           CCL_EXPR(islower);
 891                           if (sf_case_ins())
 892                               CCL_EXPR(isupper);
 893                         }
 894                 |  CCE_PRINT    { CCL_EXPR(isprint); }
 895                 |  CCE_PUNCT    { CCL_EXPR(ispunct); }
 896                 |  CCE_SPACE    { CCL_EXPR(isspace); }
 897                 |  CCE_XDIGIT   { CCL_EXPR(isxdigit); }
 898                 |  CCE_UPPER    {
 899                     CCL_EXPR(isupper);
 900                     if (sf_case_ins())
 901                         CCL_EXPR(islower);
 902                                 }
 903
 904         |  CCE_NEG_ALNUM        { CCL_NEG_EXPR(isalnum); }
 905                 |  CCE_NEG_ALPHA        { CCL_NEG_EXPR(isalpha); }
 906                 |  CCE_NEG_BLANK        { CCL_NEG_EXPR(IS_BLANK); }
 907                 |  CCE_NEG_CNTRL        { CCL_NEG_EXPR(iscntrl); }
 908                 |  CCE_NEG_DIGIT        { CCL_NEG_EXPR(isdigit); }
 909                 |  CCE_NEG_GRAPH        { CCL_NEG_EXPR(isgraph); }
 910                 |  CCE_NEG_PRINT        { CCL_NEG_EXPR(isprint); }
 911                 |  CCE_NEG_PUNCT        { CCL_NEG_EXPR(ispunct); }
 912                 |  CCE_NEG_SPACE        { CCL_NEG_EXPR(isspace); }
 913                 |  CCE_NEG_XDIGIT       { CCL_NEG_EXPR(isxdigit); }
 914                 |  CCE_NEG_LOWER        {
 915                                 if ( sf_case_ins() )
 916                                         warn(_("[:^lower:] is ambiguous in case insensitive scanner"));
 917                                 else
 918                                         CCL_NEG_EXPR(islower);
 919                                 }
 920                 |  CCE_NEG_UPPER        {
 921                                 if ( sf_case_ins() )
 922                                         warn(_("[:^upper:] ambiguous in case insensitive scanner"));
 923                                 else
 924                                         CCL_NEG_EXPR(isupper);
 925                                 }
 926                 ;
 927
 928 string          :  string CHAR
 929                         {
 930                         if ( $2 == nlch )
 931                                 rule_has_nl[num_rules] = true;
 932
 933                         ++rulelen;
 934
 935             if (sf_case_ins() && has_case($2))
 936                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
 937             else
 938                 $$ = mkstate ($2);
 939
 940                         $$ = link_machines( $1, $$);
 941                         }
 942
 943                 |
 944                         { $$ = mkstate( SYM_EPSILON ); }
 945                 ;
 946
 947 %%
 948
 949
 950 /* build_eof_action - build the "<<EOF>>" action for the active start
 951  *                    conditions
 952  */
 953
 954 void build_eof_action()
 955         {
 956         register int i;
 957         char action_text[MAXLINE];
 958
 959         for ( i = 1; i <= scon_stk_ptr; ++i )
 960                 {
 961                 if ( sceof[scon_stk[i]] )
 962                         format_pinpoint_message(
 963                                 "multiple <<EOF>> rules for start condition %s",
 964                                 scname[scon_stk[i]] );
 965
 966                 else
 967                         {
 968                         sceof[scon_stk[i]] = true;
 969
 970                         if (previous_continued_action /* && previous action was regular */)
 971                                 add_action("YY_RULE_SETUP\n");
 972
 973                         snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
 974                                 scname[scon_stk[i]] );
 975                         add_action( action_text );
 976                         }
 977                 }
 978
 979         line_directive_out( (FILE *) 0, 1 );
 980
 981         /* This isn't a normal rule after all - don't count it as
 982          * such, so we don't have any holes in the rule numbering
 983          * (which make generating "rule can never match" warnings
 984          * more difficult.
 985          */
 986         --num_rules;
 987         ++num_eof_rules;
 988         }
 989
 990
 991 /* format_synerr - write out formatted syntax error */
 992
 993 void format_synerr( msg, arg )
 994 const char *msg, arg[];
 995         {
 996         char errmsg[MAXLINE];
 997
 998         (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
 999         synerr( errmsg );
1000         }
1001
1002
1003 /* synerr - report a syntax error */
1004
1005 void synerr( str )
1006 const char *str;
1007         {
1008         syntaxerror = true;
1009         pinpoint_message( str );
1010         }
1011
1012
1013 /* format_warn - write out formatted warning */
1014
1015 void format_warn( msg, arg )
1016 const char *msg, arg[];
1017         {
1018         char warn_msg[MAXLINE];
1019
1020         snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1021         warn( warn_msg );
1022         }
1023
1024
1025 /* warn - report a warning, unless -w was given */
1026
1027 void warn( str )
1028 const char *str;
1029         {
1030         line_warning( str, linenum );
1031         }
1032
1033 /* format_pinpoint_message - write out a message formatted with one string,
1034  *                           pinpointing its location
1035  */
1036
1037 void format_pinpoint_message( msg, arg )
1038 const char *msg, arg[];
1039         {
1040         char errmsg[MAXLINE];
1041
1042         snprintf( errmsg, sizeof(errmsg), msg, arg );
1043         pinpoint_message( errmsg );
1044         }
1045
1046
1047 /* pinpoint_message - write out a message, pinpointing its location */
1048
1049 void pinpoint_message( str )
1050 const char *str;
1051         {
1052         line_pinpoint( str, linenum );
1053         }
1054
1055
1056 /* line_warning - report a warning at a given line, unless -w was given */
1057
1058 void line_warning( str, line )
1059 const char *str;
1060 int line;
1061         {
1062         char warning[MAXLINE];
1063
1064         if ( ! nowarn )
1065                 {
1066                 snprintf( warning, sizeof(warning), "warning, %s", str );
1067                 line_pinpoint( warning, line );
1068                 }
1069         }
1070
1071
1072 /* line_pinpoint - write out a message, pinpointing it at the given line */
1073
1074 void line_pinpoint( str, line )
1075 const char *str;
1076 int line;
1077         {
1078         fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1079         }
1080
1081
1082 /* yyerror - eat up an error message from the parser;
1083  *           currently, messages are ignore
1084  */
1085
1086 void yyerror( msg )
1087 const char *msg;
1088         {
1089         }