FYI: Reply from HP-UX
[git/dscho.git] / flex-2.5.33 / nfa.c
blob09fedcf65a494c106fcdae3be80600f5cce76c37
1 /* nfa - NFA construction routines */
3 /* Copyright (c) 1990 The Regents of the University of California. */
4 /* All rights reserved. */
6 /* This code is derived from software contributed to Berkeley by */
7 /* Vern Paxson. */
9 /* The United States Government has rights in this work pursuant */
10 /* to contract no. DE-AC03-76SF00098 between the United States */
11 /* Department of Energy and the University of California. */
13 /* This file is part of flex. */
15 /* Redistribution and use in source and binary forms, with or without */
16 /* modification, are permitted provided that the following conditions */
17 /* are met: */
19 /* 1. Redistributions of source code must retain the above copyright */
20 /* notice, this list of conditions and the following disclaimer. */
21 /* 2. Redistributions in binary form must reproduce the above copyright */
22 /* notice, this list of conditions and the following disclaimer in the */
23 /* documentation and/or other materials provided with the distribution. */
25 /* Neither the name of the University nor the names of its contributors */
26 /* may be used to endorse or promote products derived from this software */
27 /* without specific prior written permission. */
29 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
30 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
31 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
32 /* PURPOSE. */
34 #include "flexdef.h"
37 /* declare functions that have forward references */
39 int dupmachine PROTO ((int));
40 void mkxtion PROTO ((int, int));
43 /* add_accept - add an accepting state to a machine
45 * accepting_number becomes mach's accepting number.
48 void add_accept (mach, accepting_number)
49 int mach, accepting_number;
51 /* Hang the accepting number off an epsilon state. if it is associated
52 * with a state that has a non-epsilon out-transition, then the state
53 * will accept BEFORE it makes that transition, i.e., one character
54 * too soon.
57 if (transchar[finalst[mach]] == SYM_EPSILON)
58 accptnum[finalst[mach]] = accepting_number;
60 else {
61 int astate = mkstate (SYM_EPSILON);
63 accptnum[astate] = accepting_number;
64 (void) link_machines (mach, astate);
69 /* copysingl - make a given number of copies of a singleton machine
71 * synopsis
73 * newsng = copysingl( singl, num );
75 * newsng - a new singleton composed of num copies of singl
76 * singl - a singleton machine
77 * num - the number of copies of singl to be present in newsng
80 int copysingl (singl, num)
81 int singl, num;
83 int copy, i;
85 copy = mkstate (SYM_EPSILON);
87 for (i = 1; i <= num; ++i)
88 copy = link_machines (copy, dupmachine (singl));
90 return copy;
94 /* dumpnfa - debugging routine to write out an nfa */
96 void dumpnfa (state1)
97 int state1;
100 int sym, tsp1, tsp2, anum, ns;
102 fprintf (stderr,
104 ("\n\n********** beginning dump of nfa with start state %d\n"),
105 state1);
107 /* We probably should loop starting at firstst[state1] and going to
108 * lastst[state1], but they're not maintained properly when we "or"
109 * all of the rules together. So we use our knowledge that the machine
110 * starts at state 1 and ends at lastnfa.
113 /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
114 for (ns = 1; ns <= lastnfa; ++ns) {
115 fprintf (stderr, _("state # %4d\t"), ns);
117 sym = transchar[ns];
118 tsp1 = trans1[ns];
119 tsp2 = trans2[ns];
120 anum = accptnum[ns];
122 fprintf (stderr, "%3d: %4d, %4d", sym, tsp1, tsp2);
124 if (anum != NIL)
125 fprintf (stderr, " [%d]", anum);
127 fprintf (stderr, "\n");
130 fprintf (stderr, _("********** end of dump\n"));
134 /* dupmachine - make a duplicate of a given machine
136 * synopsis
138 * copy = dupmachine( mach );
140 * copy - holds duplicate of mach
141 * mach - machine to be duplicated
143 * note that the copy of mach is NOT an exact duplicate; rather, all the
144 * transition states values are adjusted so that the copy is self-contained,
145 * as the original should have been.
147 * also note that the original MUST be contiguous, with its low and high
148 * states accessible by the arrays firstst and lastst
151 int dupmachine (mach)
152 int mach;
154 int i, init, state_offset;
155 int state = 0;
156 int last = lastst[mach];
158 for (i = firstst[mach]; i <= last; ++i) {
159 state = mkstate (transchar[i]);
161 if (trans1[i] != NO_TRANSITION) {
162 mkxtion (finalst[state], trans1[i] + state - i);
164 if (transchar[i] == SYM_EPSILON &&
165 trans2[i] != NO_TRANSITION)
166 mkxtion (finalst[state],
167 trans2[i] + state - i);
170 accptnum[state] = accptnum[i];
173 if (state == 0)
174 flexfatal (_("empty machine in dupmachine()"));
176 state_offset = state - i + 1;
178 init = mach + state_offset;
179 firstst[init] = firstst[mach] + state_offset;
180 finalst[init] = finalst[mach] + state_offset;
181 lastst[init] = lastst[mach] + state_offset;
183 return init;
187 /* finish_rule - finish up the processing for a rule
189 * An accepting number is added to the given machine. If variable_trail_rule
190 * is true then the rule has trailing context and both the head and trail
191 * are variable size. Otherwise if headcnt or trailcnt is non-zero then
192 * the machine recognizes a pattern with trailing context and headcnt is
193 * the number of characters in the matched part of the pattern, or zero
194 * if the matched part has variable length. trailcnt is the number of
195 * trailing context characters in the pattern, or zero if the trailing
196 * context has variable length.
199 void finish_rule (mach, variable_trail_rule, headcnt, trailcnt,
200 pcont_act)
201 int mach, variable_trail_rule, headcnt, trailcnt, pcont_act;
203 char action_text[MAXLINE];
205 add_accept (mach, num_rules);
207 /* We did this in new_rule(), but it often gets the wrong
208 * number because we do it before we start parsing the current rule.
210 rule_linenum[num_rules] = linenum;
212 /* If this is a continued action, then the line-number has already
213 * been updated, giving us the wrong number.
215 if (continued_action)
216 --rule_linenum[num_rules];
219 /* If the previous rule was continued action, then we inherit the
220 * previous newline flag, possibly overriding the current one.
222 if (pcont_act && rule_has_nl[num_rules - 1])
223 rule_has_nl[num_rules] = true;
225 sprintf (action_text, "case %d:\n", num_rules);
226 add_action (action_text);
227 if (rule_has_nl[num_rules]) {
228 sprintf (action_text, "/* rule %d can match eol */\n",
229 num_rules);
230 add_action (action_text);
234 if (variable_trail_rule) {
235 rule_type[num_rules] = RULE_VARIABLE;
237 if (performance_report > 0)
238 fprintf (stderr,
240 ("Variable trailing context rule at line %d\n"),
241 rule_linenum[num_rules]);
243 variable_trailing_context_rules = true;
246 else {
247 rule_type[num_rules] = RULE_NORMAL;
249 if (headcnt > 0 || trailcnt > 0) {
250 /* Do trailing context magic to not match the trailing
251 * characters.
253 char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
254 char *scanner_bp = "yy_bp";
256 add_action
257 ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n");
259 if (headcnt > 0) {
260 sprintf (action_text, "%s = %s + %d;\n",
261 scanner_cp, scanner_bp, headcnt);
262 add_action (action_text);
265 else {
266 sprintf (action_text, "%s -= %d;\n",
267 scanner_cp, trailcnt);
268 add_action (action_text);
271 add_action
272 ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
276 /* Okay, in the action code at this point yytext and yyleng have
277 * their proper final values for this rule, so here's the point
278 * to do any user action. But don't do it for continued actions,
279 * as that'll result in multiple YY_RULE_SETUP's.
281 if (!continued_action)
282 add_action ("YY_RULE_SETUP\n");
284 line_directive_out ((FILE *) 0, 1);
288 /* link_machines - connect two machines together
290 * synopsis
292 * new = link_machines( first, last );
294 * new - a machine constructed by connecting first to last
295 * first - the machine whose successor is to be last
296 * last - the machine whose predecessor is to be first
298 * note: this routine concatenates the machine first with the machine
299 * last to produce a machine new which will pattern-match first first
300 * and then last, and will fail if either of the sub-patterns fails.
301 * FIRST is set to new by the operation. last is unmolested.
304 int link_machines (first, last)
305 int first, last;
307 if (first == NIL)
308 return last;
310 else if (last == NIL)
311 return first;
313 else {
314 mkxtion (finalst[first], last);
315 finalst[first] = finalst[last];
316 lastst[first] = MAX (lastst[first], lastst[last]);
317 firstst[first] = MIN (firstst[first], firstst[last]);
319 return first;
324 /* mark_beginning_as_normal - mark each "beginning" state in a machine
325 * as being a "normal" (i.e., not trailing context-
326 * associated) states
328 * The "beginning" states are the epsilon closure of the first state
331 void mark_beginning_as_normal (mach)
332 register int mach;
334 switch (state_type[mach]) {
335 case STATE_NORMAL:
336 /* Oh, we've already visited here. */
337 return;
339 case STATE_TRAILING_CONTEXT:
340 state_type[mach] = STATE_NORMAL;
342 if (transchar[mach] == SYM_EPSILON) {
343 if (trans1[mach] != NO_TRANSITION)
344 mark_beginning_as_normal (trans1[mach]);
346 if (trans2[mach] != NO_TRANSITION)
347 mark_beginning_as_normal (trans2[mach]);
349 break;
351 default:
352 flexerror (_
353 ("bad state type in mark_beginning_as_normal()"));
354 break;
359 /* mkbranch - make a machine that branches to two machines
361 * synopsis
363 * branch = mkbranch( first, second );
365 * branch - a machine which matches either first's pattern or second's
366 * first, second - machines whose patterns are to be or'ed (the | operator)
368 * Note that first and second are NEITHER destroyed by the operation. Also,
369 * the resulting machine CANNOT be used with any other "mk" operation except
370 * more mkbranch's. Compare with mkor()
373 int mkbranch (first, second)
374 int first, second;
376 int eps;
378 if (first == NO_TRANSITION)
379 return second;
381 else if (second == NO_TRANSITION)
382 return first;
384 eps = mkstate (SYM_EPSILON);
386 mkxtion (eps, first);
387 mkxtion (eps, second);
389 return eps;
393 /* mkclos - convert a machine into a closure
395 * synopsis
396 * new = mkclos( state );
398 * new - a new state which matches the closure of "state"
401 int mkclos (state)
402 int state;
404 return mkopt (mkposcl (state));
408 /* mkopt - make a machine optional
410 * synopsis
412 * new = mkopt( mach );
414 * new - a machine which optionally matches whatever mach matched
415 * mach - the machine to make optional
417 * notes:
418 * 1. mach must be the last machine created
419 * 2. mach is destroyed by the call
422 int mkopt (mach)
423 int mach;
425 int eps;
427 if (!SUPER_FREE_EPSILON (finalst[mach])) {
428 eps = mkstate (SYM_EPSILON);
429 mach = link_machines (mach, eps);
432 /* Can't skimp on the following if FREE_EPSILON(mach) is true because
433 * some state interior to "mach" might point back to the beginning
434 * for a closure.
436 eps = mkstate (SYM_EPSILON);
437 mach = link_machines (eps, mach);
439 mkxtion (mach, finalst[mach]);
441 return mach;
445 /* mkor - make a machine that matches either one of two machines
447 * synopsis
449 * new = mkor( first, second );
451 * new - a machine which matches either first's pattern or second's
452 * first, second - machines whose patterns are to be or'ed (the | operator)
454 * note that first and second are both destroyed by the operation
455 * the code is rather convoluted because an attempt is made to minimize
456 * the number of epsilon states needed
459 int mkor (first, second)
460 int first, second;
462 int eps, orend;
464 if (first == NIL)
465 return second;
467 else if (second == NIL)
468 return first;
470 else {
471 /* See comment in mkopt() about why we can't use the first
472 * state of "first" or "second" if they satisfy "FREE_EPSILON".
474 eps = mkstate (SYM_EPSILON);
476 first = link_machines (eps, first);
478 mkxtion (first, second);
480 if (SUPER_FREE_EPSILON (finalst[first]) &&
481 accptnum[finalst[first]] == NIL) {
482 orend = finalst[first];
483 mkxtion (finalst[second], orend);
486 else if (SUPER_FREE_EPSILON (finalst[second]) &&
487 accptnum[finalst[second]] == NIL) {
488 orend = finalst[second];
489 mkxtion (finalst[first], orend);
492 else {
493 eps = mkstate (SYM_EPSILON);
495 first = link_machines (first, eps);
496 orend = finalst[first];
498 mkxtion (finalst[second], orend);
502 finalst[first] = orend;
503 return first;
507 /* mkposcl - convert a machine into a positive closure
509 * synopsis
510 * new = mkposcl( state );
512 * new - a machine matching the positive closure of "state"
515 int mkposcl (state)
516 int state;
518 int eps;
520 if (SUPER_FREE_EPSILON (finalst[state])) {
521 mkxtion (finalst[state], state);
522 return state;
525 else {
526 eps = mkstate (SYM_EPSILON);
527 mkxtion (eps, state);
528 return link_machines (state, eps);
533 /* mkrep - make a replicated machine
535 * synopsis
536 * new = mkrep( mach, lb, ub );
538 * new - a machine that matches whatever "mach" matched from "lb"
539 * number of times to "ub" number of times
541 * note
542 * if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
545 int mkrep (mach, lb, ub)
546 int mach, lb, ub;
548 int base_mach, tail, copy, i;
550 base_mach = copysingl (mach, lb - 1);
552 if (ub == INFINITE_REPEAT) {
553 copy = dupmachine (mach);
554 mach = link_machines (mach,
555 link_machines (base_mach,
556 mkclos (copy)));
559 else {
560 tail = mkstate (SYM_EPSILON);
562 for (i = lb; i < ub; ++i) {
563 copy = dupmachine (mach);
564 tail = mkopt (link_machines (copy, tail));
567 mach =
568 link_machines (mach,
569 link_machines (base_mach, tail));
572 return mach;
576 /* mkstate - create a state with a transition on a given symbol
578 * synopsis
580 * state = mkstate( sym );
582 * state - a new state matching sym
583 * sym - the symbol the new state is to have an out-transition on
585 * note that this routine makes new states in ascending order through the
586 * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
587 * relies on machines being made in ascending order and that they are
588 * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
589 * that it admittedly is)
592 int mkstate (sym)
593 int sym;
595 if (++lastnfa >= current_mns) {
596 if ((current_mns += MNS_INCREMENT) >= maximum_mns)
597 lerrif (_
598 ("input rules are too complicated (>= %d NFA states)"),
599 current_mns);
601 ++num_reallocs;
603 firstst = reallocate_integer_array (firstst, current_mns);
604 lastst = reallocate_integer_array (lastst, current_mns);
605 finalst = reallocate_integer_array (finalst, current_mns);
606 transchar =
607 reallocate_integer_array (transchar, current_mns);
608 trans1 = reallocate_integer_array (trans1, current_mns);
609 trans2 = reallocate_integer_array (trans2, current_mns);
610 accptnum =
611 reallocate_integer_array (accptnum, current_mns);
612 assoc_rule =
613 reallocate_integer_array (assoc_rule, current_mns);
614 state_type =
615 reallocate_integer_array (state_type, current_mns);
618 firstst[lastnfa] = lastnfa;
619 finalst[lastnfa] = lastnfa;
620 lastst[lastnfa] = lastnfa;
621 transchar[lastnfa] = sym;
622 trans1[lastnfa] = NO_TRANSITION;
623 trans2[lastnfa] = NO_TRANSITION;
624 accptnum[lastnfa] = NIL;
625 assoc_rule[lastnfa] = num_rules;
626 state_type[lastnfa] = current_state_type;
628 /* Fix up equivalence classes base on this transition. Note that any
629 * character which has its own transition gets its own equivalence
630 * class. Thus only characters which are only in character classes
631 * have a chance at being in the same equivalence class. E.g. "a|b"
632 * puts 'a' and 'b' into two different equivalence classes. "[ab]"
633 * puts them in the same equivalence class (barring other differences
634 * elsewhere in the input).
637 if (sym < 0) {
638 /* We don't have to update the equivalence classes since
639 * that was already done when the ccl was created for the
640 * first time.
644 else if (sym == SYM_EPSILON)
645 ++numeps;
647 else {
648 check_char (sym);
650 if (useecs)
651 /* Map NUL's to csize. */
652 mkechar (sym ? sym : csize, nextecm, ecgroup);
655 return lastnfa;
659 /* mkxtion - make a transition from one state to another
661 * synopsis
663 * mkxtion( statefrom, stateto );
665 * statefrom - the state from which the transition is to be made
666 * stateto - the state to which the transition is to be made
669 void mkxtion (statefrom, stateto)
670 int statefrom, stateto;
672 if (trans1[statefrom] == NO_TRANSITION)
673 trans1[statefrom] = stateto;
675 else if ((transchar[statefrom] != SYM_EPSILON) ||
676 (trans2[statefrom] != NO_TRANSITION))
677 flexfatal (_("found too many transitions in mkxtion()"));
679 else { /* second out-transition for an epsilon state */
680 ++eps2;
681 trans2[statefrom] = stateto;
685 /* new_rule - initialize for a new rule */
687 void new_rule ()
689 if (++num_rules >= current_max_rules) {
690 ++num_reallocs;
691 current_max_rules += MAX_RULES_INCREMENT;
692 rule_type = reallocate_integer_array (rule_type,
693 current_max_rules);
694 rule_linenum = reallocate_integer_array (rule_linenum,
695 current_max_rules);
696 rule_useful = reallocate_integer_array (rule_useful,
697 current_max_rules);
698 rule_has_nl = reallocate_bool_array (rule_has_nl,
699 current_max_rules);
702 if (num_rules > MAX_RULE)
703 lerrif (_("too many rules (> %d)!"), MAX_RULE);
705 rule_linenum[num_rules] = linenum;
706 rule_useful[num_rules] = false;
707 rule_has_nl[num_rules] = false;