contrib/flex/dfa.c

   1 /* dfa - DFA construction routines */
   2
   3 /*  Copyright (c) 1990 The Regents of the University of California. */
   4 /*  All rights reserved. */
   5
   6 /*  This code is derived from software contributed to Berkeley by */
   7 /*  Vern Paxson. */
   8
   9 /*  The United States Government has rights in this work pursuant */
  10 /*  to contract no. DE-AC03-76SF00098 between the United States */
  11 /*  Department of Energy and the University of California. */
  12
  13 /*  Redistribution and use in source and binary forms, with or without */
  14 /*  modification, are permitted provided that the following conditions */
  15 /*  are met: */
  16
  17 /*  1. Redistributions of source code must retain the above copyright */
  18 /*     notice, this list of conditions and the following disclaimer. */
  19 /*  2. Redistributions in binary form must reproduce the above copyright */
  20 /*     notice, this list of conditions and the following disclaimer in the */
  21 /*     documentation and/or other materials provided with the distribution. */
  22
  23 /*  Neither the name of the University nor the names of its contributors */
  24 /*  may be used to endorse or promote products derived from this software */
  25 /*  without specific prior written permission. */
  26
  27 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
  28 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
  29 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
  30 /*  PURPOSE. */
  31
  32 #include "flexdef.h"
  33 #include "tables.h"
  34
  35 /* declare functions that have forward references */
  36
  37 void dump_associated_rules PROTO ((FILE *, int));
  38 void dump_transitions PROTO ((FILE *, int[]));
  39 void sympartition PROTO ((int[], int, int[], int[]));
  40 int symfollowset PROTO ((int[], int, int, int[]));
  41
  42
  43 /* check_for_backing_up - check a DFA state for backing up
  44  *
  45  * synopsis
  46  *     void check_for_backing_up( int ds, int state[numecs] );
  47  *
  48  * ds is the number of the state to check and state[] is its out-transitions,
  49  * indexed by equivalence class.
  50  */
  51
  52 void check_for_backing_up (ds, state)
  53      int ds;
  54      int state[];
  55 {
  56         if ((reject && !dfaacc[ds].dfaacc_set) || (!reject && !dfaacc[ds].dfaacc_state)) {      /* state is non-accepting */
  57                 ++num_backing_up;
  58
  59                 if (backing_up_report) {
  60                         fprintf (backing_up_file,
  61                                  _("State #%d is non-accepting -\n"), ds);
  62
  63                         /* identify the state */
  64                         dump_associated_rules (backing_up_file, ds);
  65
  66                         /* Now identify it further using the out- and
  67                          * jam-transitions.
  68                          */
  69                         dump_transitions (backing_up_file, state);
  70
  71                         putc ('\n', backing_up_file);
  72                 }
  73         }
  74 }
  75
  76
  77 /* check_trailing_context - check to see if NFA state set constitutes
  78  *                          "dangerous" trailing context
  79  *
  80  * synopsis
  81  *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
  82  *                              int accset[nacc+1], int nacc );
  83  *
  84  * NOTES
  85  *  Trailing context is "dangerous" if both the head and the trailing
  86  *  part are of variable size \and/ there's a DFA state which contains
  87  *  both an accepting state for the head part of the rule and NFA states
  88  *  which occur after the beginning of the trailing context.
  89  *
  90  *  When such a rule is matched, it's impossible to tell if having been
  91  *  in the DFA state indicates the beginning of the trailing context or
  92  *  further-along scanning of the pattern.  In these cases, a warning
  93  *  message is issued.
  94  *
  95  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
  96  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
  97  */
  98
  99 void check_trailing_context (nfa_states, num_states, accset, nacc)
 100      int    *nfa_states, num_states;
 101      int    *accset;
 102      int nacc;
 103 {
 104         register int i, j;
 105
 106         for (i = 1; i <= num_states; ++i) {
 107                 int     ns = nfa_states[i];
 108                 register int type = state_type[ns];
 109                 register int ar = assoc_rule[ns];
 110
 111                 if (type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE) {   /* do nothing */
 112                 }
 113
 114                 else if (type == STATE_TRAILING_CONTEXT) {
 115                         /* Potential trouble.  Scan set of accepting numbers
 116                          * for the one marking the end of the "head".  We
 117                          * assume that this looping will be fairly cheap
 118                          * since it's rare that an accepting number set
 119                          * is large.
 120                          */
 121                         for (j = 1; j <= nacc; ++j)
 122                                 if (accset[j] & YY_TRAILING_HEAD_MASK) {
 123                                         line_warning (_
 124                                                       ("dangerous trailing context"),
 125                                                       rule_linenum[ar]);
 126                                         return;
 127                                 }
 128                 }
 129         }
 130 }
 131
 132
 133 /* dump_associated_rules - list the rules associated with a DFA state
 134  *
 135  * Goes through the set of NFA states associated with the DFA and
 136  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
 137  * and writes a report to the given file.
 138  */
 139
 140 void dump_associated_rules (file, ds)
 141      FILE   *file;
 142      int ds;
 143 {
 144         register int i, j;
 145         register int num_associated_rules = 0;
 146         int     rule_set[MAX_ASSOC_RULES + 1];
 147         int    *dset = dss[ds];
 148         int     size = dfasiz[ds];
 149
 150         for (i = 1; i <= size; ++i) {
 151                 register int rule_num = rule_linenum[assoc_rule[dset[i]]];
 152
 153                 for (j = 1; j <= num_associated_rules; ++j)
 154                         if (rule_num == rule_set[j])
 155                                 break;
 156
 157                 if (j > num_associated_rules) { /* new rule */
 158                         if (num_associated_rules < MAX_ASSOC_RULES)
 159                                 rule_set[++num_associated_rules] =
 160                                         rule_num;
 161                 }
 162         }
 163
 164         qsort (&rule_set [1], num_associated_rules, sizeof (rule_set [1]), intcmp);
 165
 166         fprintf (file, _(" associated rule line numbers:"));
 167
 168         for (i = 1; i <= num_associated_rules; ++i) {
 169                 if (i % 8 == 1)
 170                         putc ('\n', file);
 171
 172                 fprintf (file, "\t%d", rule_set[i]);
 173         }
 174
 175         putc ('\n', file);
 176 }
 177
 178
 179 /* dump_transitions - list the transitions associated with a DFA state
 180  *
 181  * synopsis
 182  *     dump_transitions( FILE *file, int state[numecs] );
 183  *
 184  * Goes through the set of out-transitions and lists them in human-readable
 185  * form (i.e., not as equivalence classes); also lists jam transitions
 186  * (i.e., all those which are not out-transitions, plus EOF).  The dump
 187  * is done to the given file.
 188  */
 189
 190 void dump_transitions (file, state)
 191      FILE   *file;
 192      int state[];
 193 {
 194         register int i, ec;
 195         int     out_char_set[CSIZE];
 196
 197         for (i = 0; i < csize; ++i) {
 198                 ec = ABS (ecgroup[i]);
 199                 out_char_set[i] = state[ec];
 200         }
 201
 202         fprintf (file, _(" out-transitions: "));
 203
 204         list_character_set (file, out_char_set);
 205
 206         /* now invert the members of the set to get the jam transitions */
 207         for (i = 0; i < csize; ++i)
 208                 out_char_set[i] = !out_char_set[i];
 209
 210         fprintf (file, _("\n jam-transitions: EOF "));
 211
 212         list_character_set (file, out_char_set);
 213
 214         putc ('\n', file);
 215 }
 216
 217
 218 /* epsclosure - construct the epsilon closure of a set of ndfa states
 219  *
 220  * synopsis
 221  *    int *epsclosure( int t[num_states], int *numstates_addr,
 222  *                      int accset[num_rules+1], int *nacc_addr,
 223  *                      int *hashval_addr );
 224  *
 225  * NOTES
 226  *  The epsilon closure is the set of all states reachable by an arbitrary
 227  *  number of epsilon transitions, which themselves do not have epsilon
 228  *  transitions going out, unioned with the set of states which have non-null
 229  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
 230  *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
 231  *  accset holds a list of the accepting numbers, and the size of accset is
 232  *  given by *nacc_addr.  t may be subjected to reallocation if it is not
 233  *  large enough to hold the epsilon closure.
 234  *
 235  *  hashval is the hash value for the dfa corresponding to the state set.
 236  */
 237
 238 int    *epsclosure (t, ns_addr, accset, nacc_addr, hv_addr)
 239      int    *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
 240 {
 241         register int stkpos, ns, tsp;
 242         int     numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
 243         int     stkend, nstate;
 244         static int did_stk_init = false, *stk;
 245
 246 #define MARK_STATE(state) \
 247 do{ trans1[state] = trans1[state] - MARKER_DIFFERENCE;} while(0)
 248
 249 #define IS_MARKED(state) (trans1[state] < 0)
 250
 251 #define UNMARK_STATE(state) \
 252 do{ trans1[state] = trans1[state] + MARKER_DIFFERENCE;} while(0)
 253
 254 #define CHECK_ACCEPT(state) \
 255 do{ \
 256 nfaccnum = accptnum[state]; \
 257 if ( nfaccnum != NIL ) \
 258 accset[++nacc] = nfaccnum; \
 259 }while(0)
 260
 261 #define DO_REALLOCATION() \
 262 do { \
 263 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
 264 ++num_reallocs; \
 265 t = reallocate_integer_array( t, current_max_dfa_size ); \
 266 stk = reallocate_integer_array( stk, current_max_dfa_size ); \
 267 }while(0) \
 268
 269 #define PUT_ON_STACK(state) \
 270 do { \
 271 if ( ++stkend >= current_max_dfa_size ) \
 272 DO_REALLOCATION(); \
 273 stk[stkend] = state; \
 274 MARK_STATE(state); \
 275 }while(0)
 276
 277 #define ADD_STATE(state) \
 278 do { \
 279 if ( ++numstates >= current_max_dfa_size ) \
 280 DO_REALLOCATION(); \
 281 t[numstates] = state; \
 282 hashval += state; \
 283 }while(0)
 284
 285 #define STACK_STATE(state) \
 286 do { \
 287 PUT_ON_STACK(state); \
 288 CHECK_ACCEPT(state); \
 289 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
 290 ADD_STATE(state); \
 291 }while(0)
 292
 293
 294         if (!did_stk_init) {
 295                 stk = allocate_integer_array (current_max_dfa_size);
 296                 did_stk_init = true;
 297         }
 298
 299         nacc = stkend = hashval = 0;
 300
 301         for (nstate = 1; nstate <= numstates; ++nstate) {
 302                 ns = t[nstate];
 303
 304                 /* The state could be marked if we've already pushed it onto
 305                  * the stack.
 306                  */
 307                 if (!IS_MARKED (ns)) {
 308                         PUT_ON_STACK (ns);
 309                         CHECK_ACCEPT (ns);
 310                         hashval += ns;
 311                 }
 312         }
 313
 314         for (stkpos = 1; stkpos <= stkend; ++stkpos) {
 315                 ns = stk[stkpos];
 316                 transsym = transchar[ns];
 317
 318                 if (transsym == SYM_EPSILON) {
 319                         tsp = trans1[ns] + MARKER_DIFFERENCE;
 320
 321                         if (tsp != NO_TRANSITION) {
 322                                 if (!IS_MARKED (tsp))
 323                                         STACK_STATE (tsp);
 324
 325                                 tsp = trans2[ns];
 326
 327                                 if (tsp != NO_TRANSITION
 328                                     && !IS_MARKED (tsp))
 329                                         STACK_STATE (tsp);
 330                         }
 331                 }
 332         }
 333
 334         /* Clear out "visit" markers. */
 335
 336         for (stkpos = 1; stkpos <= stkend; ++stkpos) {
 337                 if (IS_MARKED (stk[stkpos]))
 338                         UNMARK_STATE (stk[stkpos]);
 339                 else
 340                         flexfatal (_
 341                                    ("consistency check failed in epsclosure()"));
 342         }
 343
 344         *ns_addr = numstates;
 345         *hv_addr = hashval;
 346         *nacc_addr = nacc;
 347
 348         return t;
 349 }
 350
 351
 352 /* increase_max_dfas - increase the maximum number of DFAs */
 353
 354 void increase_max_dfas ()
 355 {
 356         current_max_dfas += MAX_DFAS_INCREMENT;
 357
 358         ++num_reallocs;
 359
 360         base = reallocate_integer_array (base, current_max_dfas);
 361         def = reallocate_integer_array (def, current_max_dfas);
 362         dfasiz = reallocate_integer_array (dfasiz, current_max_dfas);
 363         accsiz = reallocate_integer_array (accsiz, current_max_dfas);
 364         dhash = reallocate_integer_array (dhash, current_max_dfas);
 365         dss = reallocate_int_ptr_array (dss, current_max_dfas);
 366         dfaacc = reallocate_dfaacc_union (dfaacc, current_max_dfas);
 367
 368         if (nultrans)
 369                 nultrans =
 370                         reallocate_integer_array (nultrans,
 371                                                   current_max_dfas);
 372 }
 373
 374
 375 /* ntod - convert an ndfa to a dfa
 376  *
 377  * Creates the dfa corresponding to the ndfa we've constructed.  The
 378  * dfa starts out in state #1.
 379  */
 380
 381 void ntod ()
 382 {
 383         int    *accset, ds, nacc, newds;
 384         int     sym, hashval, numstates, dsize;
 385         int     num_full_table_rows=0;  /* used only for -f */
 386         int    *nset, *dset;
 387         int     targptr, totaltrans, i, comstate, comfreq, targ;
 388         int     symlist[CSIZE + 1];
 389         int     num_start_states;
 390         int     todo_head, todo_next;
 391
 392         struct yytbl_data *yynxt_tbl = 0;
 393         flex_int32_t *yynxt_data = 0, yynxt_curr = 0;
 394
 395         /* Note that the following are indexed by *equivalence classes*
 396          * and not by characters.  Since equivalence classes are indexed
 397          * beginning with 1, even if the scanner accepts NUL's, this
 398          * means that (since every character is potentially in its own
 399          * equivalence class) these arrays must have room for indices
 400          * from 1 to CSIZE, so their size must be CSIZE + 1.
 401          */
 402         int     duplist[CSIZE + 1], state[CSIZE + 1];
 403         int     targfreq[CSIZE + 1], targstate[CSIZE + 1];
 404
 405         /* accset needs to be large enough to hold all of the rules present
 406          * in the input, *plus* their YY_TRAILING_HEAD_MASK variants.
 407          */
 408         accset = allocate_integer_array ((num_rules + 1) * 2);
 409         nset = allocate_integer_array (current_max_dfa_size);
 410
 411         /* The "todo" queue is represented by the head, which is the DFA
 412          * state currently being processed, and the "next", which is the
 413          * next DFA state number available (not in use).  We depend on the
 414          * fact that snstods() returns DFA's \in increasing order/, and thus
 415          * need only know the bounds of the dfas to be processed.
 416          */
 417         todo_head = todo_next = 0;
 418
 419         for (i = 0; i <= csize; ++i) {
 420                 duplist[i] = NIL;
 421                 symlist[i] = false;
 422         }
 423
 424         for (i = 0; i <= num_rules; ++i)
 425                 accset[i] = NIL;
 426
 427         if (trace) {
 428                 dumpnfa (scset[1]);
 429                 fputs (_("\n\nDFA Dump:\n\n"), stderr);
 430         }
 431
 432         inittbl ();
 433
 434         /* Check to see whether we should build a separate table for
 435          * transitions on NUL characters.  We don't do this for full-speed
 436          * (-F) scanners, since for them we don't have a simple state
 437          * number lying around with which to index the table.  We also
 438          * don't bother doing it for scanners unless (1) NUL is in its own
 439          * equivalence class (indicated by a positive value of
 440          * ecgroup[NUL]), (2) NUL's equivalence class is the last
 441          * equivalence class, and (3) the number of equivalence classes is
 442          * the same as the number of characters.  This latter case comes
 443          * about when useecs is false or when it's true but every character
 444          * still manages to land in its own class (unlikely, but it's
 445          * cheap to check for).  If all these things are true then the
 446          * character code needed to represent NUL's equivalence class for
 447          * indexing the tables is going to take one more bit than the
 448          * number of characters, and therefore we won't be assured of
 449          * being able to fit it into a YY_CHAR variable.  This rules out
 450          * storing the transitions in a compressed table, since the code
 451          * for interpreting them uses a YY_CHAR variable (perhaps it
 452          * should just use an integer, though; this is worth pondering ...
 453          * ###).
 454          *
 455          * Finally, for full tables, we want the number of entries in the
 456          * table to be a power of two so the array references go fast (it
 457          * will just take a shift to compute the major index).  If
 458          * encoding NUL's transitions in the table will spoil this, we
 459          * give it its own table (note that this will be the case if we're
 460          * not using equivalence classes).
 461          */
 462
 463         /* Note that the test for ecgroup[0] == numecs below accomplishes
 464          * both (1) and (2) above
 465          */
 466         if (!fullspd && ecgroup[0] == numecs) {
 467                 /* NUL is alone in its equivalence class, which is the
 468                  * last one.
 469                  */
 470                 int     use_NUL_table = (numecs == csize);
 471
 472                 if (fulltbl && !use_NUL_table) {
 473                         /* We still may want to use the table if numecs
 474                          * is a power of 2.
 475                          */
 476                         int     power_of_two;
 477
 478                         for (power_of_two = 1; power_of_two <= csize;
 479                              power_of_two *= 2)
 480                                 if (numecs == power_of_two) {
 481                                         use_NUL_table = true;
 482                                         break;
 483                                 }
 484                 }
 485
 486                 if (use_NUL_table)
 487                         nultrans =
 488                                 allocate_integer_array (current_max_dfas);
 489
 490                 /* From now on, nultrans != nil indicates that we're
 491                  * saving null transitions for later, separate encoding.
 492                  */
 493         }
 494
 495
 496         if (fullspd) {
 497                 for (i = 0; i <= numecs; ++i)
 498                         state[i] = 0;
 499
 500                 place_state (state, 0, 0);
 501                 dfaacc[0].dfaacc_state = 0;
 502         }
 503
 504         else if (fulltbl) {
 505                 if (nultrans)
 506                         /* We won't be including NUL's transitions in the
 507                          * table, so build it for entries from 0 .. numecs - 1.
 508                          */
 509                         num_full_table_rows = numecs;
 510
 511                 else
 512                         /* Take into account the fact that we'll be including
 513                          * the NUL entries in the transition table.  Build it
 514                          * from 0 .. numecs.
 515                          */
 516                         num_full_table_rows = numecs + 1;
 517
 518                 /* Begin generating yy_nxt[][]
 519                  * This spans the entire LONG function.
 520                  * This table is tricky because we don't know how big it will be.
 521                  * So we'll have to realloc() on the way...
 522                  * we'll wait until we can calculate yynxt_tbl->td_hilen.
 523                  */
 524                 yynxt_tbl =
 525                         (struct yytbl_data *) calloc (1,
 526                                                       sizeof (struct
 527                                                               yytbl_data));
 528                 yytbl_data_init (yynxt_tbl, YYTD_ID_NXT);
 529                 yynxt_tbl->td_hilen = 1;
 530                 yynxt_tbl->td_lolen = num_full_table_rows;
 531                 yynxt_tbl->td_data = yynxt_data =
 532                         (flex_int32_t *) calloc (yynxt_tbl->td_lolen *
 533                                             yynxt_tbl->td_hilen,
 534                                             sizeof (flex_int32_t));
 535                 yynxt_curr = 0;
 536
 537                 buf_prints (&yydmap_buf,
 538                             "\t{YYTD_ID_NXT, (void**)&yy_nxt, sizeof(%s)},\n",
 539                             long_align ? "flex_int32_t" : "flex_int16_t");
 540
 541                 /* Unless -Ca, declare it "short" because it's a real
 542                  * long-shot that that won't be large enough.
 543                  */
 544                 if (gentables)
 545                         out_str_dec
 546                                 ("static yyconst %s yy_nxt[][%d] =\n    {\n",
 547                                  long_align ? "flex_int32_t" : "flex_int16_t",
 548                                  num_full_table_rows);
 549                 else {
 550                         out_dec ("#undef YY_NXT_LOLEN\n#define YY_NXT_LOLEN (%d)\n", num_full_table_rows);
 551                         out_str ("static yyconst %s *yy_nxt =0;\n",
 552                                  long_align ? "flex_int32_t" : "flex_int16_t");
 553                 }
 554
 555
 556                 if (gentables)
 557                         outn ("    {");
 558
 559                 /* Generate 0 entries for state #0. */
 560                 for (i = 0; i < num_full_table_rows; ++i) {
 561                         mk2data (0);
 562                         yynxt_data[yynxt_curr++] = 0;
 563                 }
 564
 565                 dataflush ();
 566                 if (gentables)
 567                         outn ("    },\n");
 568         }
 569
 570         /* Create the first states. */
 571
 572         num_start_states = lastsc * 2;
 573
 574         for (i = 1; i <= num_start_states; ++i) {
 575                 numstates = 1;
 576
 577                 /* For each start condition, make one state for the case when
 578                  * we're at the beginning of the line (the '^' operator) and
 579                  * one for the case when we're not.
 580                  */
 581                 if (i % 2 == 1)
 582                         nset[numstates] = scset[(i / 2) + 1];
 583                 else
 584                         nset[numstates] =
 585                                 mkbranch (scbol[i / 2], scset[i / 2]);
 586
 587                 nset = epsclosure (nset, &numstates, accset, &nacc,
 588                                    &hashval);
 589
 590                 if (snstods (nset, numstates, accset, nacc, hashval, &ds)) {
 591                         numas += nacc;
 592                         totnst += numstates;
 593                         ++todo_next;
 594
 595                         if (variable_trailing_context_rules && nacc > 0)
 596                                 check_trailing_context (nset, numstates,
 597                                                         accset, nacc);
 598                 }
 599         }
 600
 601         if (!fullspd) {
 602                 if (!snstods (nset, 0, accset, 0, 0, &end_of_buffer_state))
 603                         flexfatal (_
 604                                    ("could not create unique end-of-buffer state"));
 605
 606                 ++numas;
 607                 ++num_start_states;
 608                 ++todo_next;
 609         }
 610
 611
 612         while (todo_head < todo_next) {
 613                 targptr = 0;
 614                 totaltrans = 0;
 615
 616                 for (i = 1; i <= numecs; ++i)
 617                         state[i] = 0;
 618
 619                 ds = ++todo_head;
 620
 621                 dset = dss[ds];
 622                 dsize = dfasiz[ds];
 623
 624                 if (trace)
 625                         fprintf (stderr, _("state # %d:\n"), ds);
 626
 627                 sympartition (dset, dsize, symlist, duplist);
 628
 629                 for (sym = 1; sym <= numecs; ++sym) {
 630                         if (symlist[sym]) {
 631                                 symlist[sym] = 0;
 632
 633                                 if (duplist[sym] == NIL) {
 634                                         /* Symbol has unique out-transitions. */
 635                                         numstates =
 636                                                 symfollowset (dset, dsize,
 637                                                               sym, nset);
 638                                         nset = epsclosure (nset,
 639                                                            &numstates,
 640                                                            accset, &nacc,
 641                                                            &hashval);
 642
 643                                         if (snstods
 644                                             (nset, numstates, accset, nacc,
 645                                              hashval, &newds)) {
 646                                                 totnst = totnst +
 647                                                         numstates;
 648                                                 ++todo_next;
 649                                                 numas += nacc;
 650
 651                                                 if (variable_trailing_context_rules && nacc > 0)
 652                                                         check_trailing_context
 653                                                                 (nset,
 654                                                                  numstates,
 655                                                                  accset,
 656                                                                  nacc);
 657                                         }
 658
 659                                         state[sym] = newds;
 660
 661                                         if (trace)
 662                                                 fprintf (stderr,
 663                                                          "\t%d\t%d\n", sym,
 664                                                          newds);
 665
 666                                         targfreq[++targptr] = 1;
 667                                         targstate[targptr] = newds;
 668                                         ++numuniq;
 669                                 }
 670
 671                                 else {
 672                                         /* sym's equivalence class has the same
 673                                          * transitions as duplist(sym)'s
 674                                          * equivalence class.
 675                                          */
 676                                         targ = state[duplist[sym]];
 677                                         state[sym] = targ;
 678
 679                                         if (trace)
 680                                                 fprintf (stderr,
 681                                                          "\t%d\t%d\n", sym,
 682                                                          targ);
 683
 684                                         /* Update frequency count for
 685                                          * destination state.
 686                                          */
 687
 688                                         i = 0;
 689                                         while (targstate[++i] != targ) ;
 690
 691                                         ++targfreq[i];
 692                                         ++numdup;
 693                                 }
 694
 695                                 ++totaltrans;
 696                                 duplist[sym] = NIL;
 697                         }
 698                 }
 699
 700
 701                 numsnpairs += totaltrans;
 702
 703                 if (ds > num_start_states)
 704                         check_for_backing_up (ds, state);
 705
 706                 if (nultrans) {
 707                         nultrans[ds] = state[NUL_ec];
 708                         state[NUL_ec] = 0;      /* remove transition */
 709                 }
 710
 711                 if (fulltbl) {
 712
 713                         /* Each time we hit here, it's another td_hilen, so we realloc. */
 714                         yynxt_tbl->td_hilen++;
 715                         yynxt_tbl->td_data = yynxt_data =
 716                                 (flex_int32_t *) realloc (yynxt_data,
 717                                                      yynxt_tbl->td_hilen *
 718                                                      yynxt_tbl->td_lolen *
 719                                                      sizeof (flex_int32_t));
 720
 721
 722                         if (gentables)
 723                                 outn ("    {");
 724
 725                         /* Supply array's 0-element. */
 726                         if (ds == end_of_buffer_state) {
 727                                 mk2data (-end_of_buffer_state);
 728                                 yynxt_data[yynxt_curr++] =
 729                                         -end_of_buffer_state;
 730                         }
 731                         else {
 732                                 mk2data (end_of_buffer_state);
 733                                 yynxt_data[yynxt_curr++] =
 734                                         end_of_buffer_state;
 735                         }
 736
 737                         for (i = 1; i < num_full_table_rows; ++i) {
 738                                 /* Jams are marked by negative of state
 739                                  * number.
 740                                  */
 741                                 mk2data (state[i] ? state[i] : -ds);
 742                                 yynxt_data[yynxt_curr++] =
 743                                         state[i] ? state[i] : -ds;
 744                         }
 745
 746                         dataflush ();
 747                         if (gentables)
 748                                 outn ("    },\n");
 749                 }
 750
 751                 else if (fullspd)
 752                         place_state (state, ds, totaltrans);
 753
 754                 else if (ds == end_of_buffer_state)
 755                         /* Special case this state to make sure it does what
 756                          * it's supposed to, i.e., jam on end-of-buffer.
 757                          */
 758                         stack1 (ds, 0, 0, JAMSTATE);
 759
 760                 else {          /* normal, compressed state */
 761
 762                         /* Determine which destination state is the most
 763                          * common, and how many transitions to it there are.
 764                          */
 765
 766                         comfreq = 0;
 767                         comstate = 0;
 768
 769                         for (i = 1; i <= targptr; ++i)
 770                                 if (targfreq[i] > comfreq) {
 771                                         comfreq = targfreq[i];
 772                                         comstate = targstate[i];
 773                                 }
 774
 775                         bldtbl (state, ds, totaltrans, comstate, comfreq);
 776                 }
 777         }
 778
 779         if (fulltbl) {
 780                 dataend ();
 781                 if (tablesext) {
 782                         yytbl_data_compress (yynxt_tbl);
 783                         if (yytbl_data_fwrite (&tableswr, yynxt_tbl) < 0)
 784                                 flexerror (_
 785                                            ("Could not write yynxt_tbl[][]"));
 786                 }
 787                 if (yynxt_tbl) {
 788                         yytbl_data_destroy (yynxt_tbl);
 789                         yynxt_tbl = 0;
 790                 }
 791         }
 792
 793         else if (!fullspd) {
 794                 cmptmps ();     /* create compressed template entries */
 795
 796                 /* Create tables for all the states with only one
 797                  * out-transition.
 798                  */
 799                 while (onesp > 0) {
 800                         mk1tbl (onestate[onesp], onesym[onesp],
 801                                 onenext[onesp], onedef[onesp]);
 802                         --onesp;
 803                 }
 804
 805                 mkdeftbl ();
 806         }
 807
 808         flex_free ((void *) accset);
 809         flex_free ((void *) nset);
 810 }
 811
 812
 813 /* snstods - converts a set of ndfa states into a dfa state
 814  *
 815  * synopsis
 816  *    is_new_state = snstods( int sns[numstates], int numstates,
 817  *                              int accset[num_rules+1], int nacc,
 818  *                              int hashval, int *newds_addr );
 819  *
 820  * On return, the dfa state number is in newds.
 821  */
 822
 823 int snstods (sns, numstates, accset, nacc, hashval, newds_addr)
 824      int sns[], numstates, accset[], nacc, hashval, *newds_addr;
 825 {
 826         int     didsort = 0;
 827         register int i, j;
 828         int     newds, *oldsns;
 829
 830         for (i = 1; i <= lastdfa; ++i)
 831                 if (hashval == dhash[i]) {
 832                         if (numstates == dfasiz[i]) {
 833                                 oldsns = dss[i];
 834
 835                                 if (!didsort) {
 836                                         /* We sort the states in sns so we
 837                                          * can compare it to oldsns quickly.
 838                                          */
 839                                         qsort (&sns [1], numstates, sizeof (sns [1]), intcmp);
 840                                         didsort = 1;
 841                                 }
 842
 843                                 for (j = 1; j <= numstates; ++j)
 844                                         if (sns[j] != oldsns[j])
 845                                                 break;
 846
 847                                 if (j > numstates) {
 848                                         ++dfaeql;
 849                                         *newds_addr = i;
 850                                         return 0;
 851                                 }
 852
 853                                 ++hshcol;
 854                         }
 855
 856                         else
 857                                 ++hshsave;
 858                 }
 859
 860         /* Make a new dfa. */
 861
 862         if (++lastdfa >= current_max_dfas)
 863                 increase_max_dfas ();
 864
 865         newds = lastdfa;
 866
 867         dss[newds] = allocate_integer_array (numstates + 1);
 868
 869         /* If we haven't already sorted the states in sns, we do so now,
 870          * so that future comparisons with it can be made quickly.
 871          */
 872
 873         if (!didsort)
 874                 qsort (&sns [1], numstates, sizeof (sns [1]), intcmp);
 875
 876         for (i = 1; i <= numstates; ++i)
 877                 dss[newds][i] = sns[i];
 878
 879         dfasiz[newds] = numstates;
 880         dhash[newds] = hashval;
 881
 882         if (nacc == 0) {
 883                 if (reject)
 884                         dfaacc[newds].dfaacc_set = (int *) 0;
 885                 else
 886                         dfaacc[newds].dfaacc_state = 0;
 887
 888                 accsiz[newds] = 0;
 889         }
 890
 891         else if (reject) {
 892                 /* We sort the accepting set in increasing order so the
 893                  * disambiguating rule that the first rule listed is considered
 894                  * match in the event of ties will work.
 895                  */
 896
 897                 qsort (&accset [1], nacc, sizeof (accset [1]), intcmp);
 898
 899                 dfaacc[newds].dfaacc_set =
 900                         allocate_integer_array (nacc + 1);
 901
 902                 /* Save the accepting set for later */
 903                 for (i = 1; i <= nacc; ++i) {
 904                         dfaacc[newds].dfaacc_set[i] = accset[i];
 905
 906                         if (accset[i] <= num_rules)
 907                                 /* Who knows, perhaps a REJECT can yield
 908                                  * this rule.
 909                                  */
 910                                 rule_useful[accset[i]] = true;
 911                 }
 912
 913                 accsiz[newds] = nacc;
 914         }
 915
 916         else {
 917                 /* Find lowest numbered rule so the disambiguating rule
 918                  * will work.
 919                  */
 920                 j = num_rules + 1;
 921
 922                 for (i = 1; i <= nacc; ++i)
 923                         if (accset[i] < j)
 924                                 j = accset[i];
 925
 926                 dfaacc[newds].dfaacc_state = j;
 927
 928                 if (j <= num_rules)
 929                         rule_useful[j] = true;
 930         }
 931
 932         *newds_addr = newds;
 933
 934         return 1;
 935 }
 936
 937
 938 /* symfollowset - follow the symbol transitions one step
 939  *
 940  * synopsis
 941  *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
 942  *                              int transsym, int nset[current_max_dfa_size] );
 943  */
 944
 945 int symfollowset (ds, dsize, transsym, nset)
 946      int ds[], dsize, transsym, nset[];
 947 {
 948         int     ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
 949
 950         numstates = 0;
 951
 952         for (i = 1; i <= dsize; ++i) {  /* for each nfa state ns in the state set of ds */
 953                 ns = ds[i];
 954                 sym = transchar[ns];
 955                 tsp = trans1[ns];
 956
 957                 if (sym < 0) {  /* it's a character class */
 958                         sym = -sym;
 959                         ccllist = cclmap[sym];
 960                         lenccl = ccllen[sym];
 961
 962                         if (cclng[sym]) {
 963                                 for (j = 0; j < lenccl; ++j) {
 964                                         /* Loop through negated character
 965                                          * class.
 966                                          */
 967                                         ch = ccltbl[ccllist + j];
 968
 969                                         if (ch == 0)
 970                                                 ch = NUL_ec;
 971
 972                                         if (ch > transsym)
 973                                                 /* Transsym isn't in negated
 974                                                  * ccl.
 975                                                  */
 976                                                 break;
 977
 978                                         else if (ch == transsym)
 979                                                 /* next 2 */
 980                                                 goto bottom;
 981                                 }
 982
 983                                 /* Didn't find transsym in ccl. */
 984                                 nset[++numstates] = tsp;
 985                         }
 986
 987                         else
 988                                 for (j = 0; j < lenccl; ++j) {
 989                                         ch = ccltbl[ccllist + j];
 990
 991                                         if (ch == 0)
 992                                                 ch = NUL_ec;
 993
 994                                         if (ch > transsym)
 995                                                 break;
 996                                         else if (ch == transsym) {
 997                                                 nset[++numstates] = tsp;
 998                                                 break;
 999                                         }
1000                                 }
1001                 }
1002
1003                 else if (sym == SYM_EPSILON) {  /* do nothing */
1004                 }
1005
1006                 else if (ABS (ecgroup[sym]) == transsym)
1007                         nset[++numstates] = tsp;
1008
1009               bottom:;
1010         }
1011
1012         return numstates;
1013 }
1014
1015
1016 /* sympartition - partition characters with same out-transitions
1017  *
1018  * synopsis
1019  *    sympartition( int ds[current_max_dfa_size], int numstates,
1020  *                      int symlist[numecs], int duplist[numecs] );
1021  */
1022
1023 void sympartition (ds, numstates, symlist, duplist)
1024      int ds[], numstates;
1025      int symlist[], duplist[];
1026 {
1027         int     tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
1028
1029         /* Partitioning is done by creating equivalence classes for those
1030          * characters which have out-transitions from the given state.  Thus
1031          * we are really creating equivalence classes of equivalence classes.
1032          */
1033
1034         for (i = 1; i <= numecs; ++i) { /* initialize equivalence class list */
1035                 duplist[i] = i - 1;
1036                 dupfwd[i] = i + 1;
1037         }
1038
1039         duplist[1] = NIL;
1040         dupfwd[numecs] = NIL;
1041
1042         for (i = 1; i <= numstates; ++i) {
1043                 ns = ds[i];
1044                 tch = transchar[ns];
1045
1046                 if (tch != SYM_EPSILON) {
1047                         if (tch < -lastccl || tch >= csize) {
1048                                 flexfatal (_
1049                                            ("bad transition character detected in sympartition()"));
1050                         }
1051
1052                         if (tch >= 0) { /* character transition */
1053                                 int     ec = ecgroup[tch];
1054
1055                                 mkechar (ec, dupfwd, duplist);
1056                                 symlist[ec] = 1;
1057                         }
1058
1059                         else {  /* character class */
1060                                 tch = -tch;
1061
1062                                 lenccl = ccllen[tch];
1063                                 cclp = cclmap[tch];
1064                                 mkeccl (ccltbl + cclp, lenccl, dupfwd,
1065                                         duplist, numecs, NUL_ec);
1066
1067                                 if (cclng[tch]) {
1068                                         j = 0;
1069
1070                                         for (k = 0; k < lenccl; ++k) {
1071                                                 ich = ccltbl[cclp + k];
1072
1073                                                 if (ich == 0)
1074                                                         ich = NUL_ec;
1075
1076                                                 for (++j; j < ich; ++j)
1077                                                         symlist[j] = 1;
1078                                         }
1079
1080                                         for (++j; j <= numecs; ++j)
1081                                                 symlist[j] = 1;
1082                                 }
1083
1084                                 else
1085                                         for (k = 0; k < lenccl; ++k) {
1086                                                 ich = ccltbl[cclp + k];
1087
1088                                                 if (ich == 0)
1089                                                         ich = NUL_ec;
1090
1091                                                 symlist[ich] = 1;
1092                                         }
1093                         }
1094                 }
1095         }
1096 }