Notes on version 6.3.
[ragel.git] / ragel / fsmmin.cpp
blobc9ace1b039201fe12269717a6bade26178a1a35a
1 /*
2 * Copyright 2002 Adrian Thurston <thurston@cs.queensu.ca>
3 */
5 /* This file is part of Ragel.
7 * Ragel is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * Ragel is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Ragel; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 #include "fsmgraph.h"
23 #include "mergesort.h"
25 int FsmAp::partitionRound( StateAp **statePtrs, MinPartition *parts, int numParts )
27 /* Need a mergesort object and a single partition compare. */
28 MergeSort<StateAp*, PartitionCompare> mergeSort;
29 PartitionCompare partCompare;
31 /* For each partition. */
32 for ( int p = 0; p < numParts; p++ ) {
33 /* Fill the pointer array with the states in the partition. */
34 StateList::Iter state = parts[p].list;
35 for ( int s = 0; state.lte(); state++, s++ )
36 statePtrs[s] = state;
38 /* Sort the states using the partitioning compare. */
39 int numStates = parts[p].list.length();
40 mergeSort.sort( statePtrs, numStates );
42 /* Assign the states into partitions based on the results of the sort. */
43 int destPart = p, firstNewPart = numParts;
44 for ( int s = 1; s < numStates; s++ ) {
45 /* If this state differs from the last then move to the next partition. */
46 if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
47 /* The new partition is the next avail spot. */
48 destPart = numParts;
49 numParts += 1;
52 /* If the state is not staying in the first partition, then
53 * transfer it to its destination partition. */
54 if ( destPart != p ) {
55 StateAp *state = parts[p].list.detach( statePtrs[s] );
56 parts[destPart].list.append( state );
60 /* Fix the partition pointer for all the states that got moved to a new
61 * partition. This must be done after the states are transfered so the
62 * result of the sort is not altered. */
63 for ( int newPart = firstNewPart; newPart < numParts; newPart++ ) {
64 StateList::Iter state = parts[newPart].list;
65 for ( ; state.lte(); state++ )
66 state->alg.partition = &parts[newPart];
70 return numParts;
73 /**
74 * \brief Minimize by partitioning version 1.
76 * Repeatedly tries to split partitions until all partitions are unsplittable.
77 * Produces the most minimal FSM possible.
79 void FsmAp::minimizePartition1()
81 /* Need one mergesort object and partition compares. */
82 MergeSort<StateAp*, InitPartitionCompare> mergeSort;
83 InitPartitionCompare initPartCompare;
85 /* Nothing to do if there are no states. */
86 if ( stateList.length() == 0 )
87 return;
89 /*
90 * First thing is to partition the states by final state status and
91 * transition functions. This gives us an initial partitioning to work
92 * with.
95 /* Make a array of pointers to states. */
96 int numStates = stateList.length();
97 StateAp** statePtrs = new StateAp*[numStates];
99 /* Fill up an array of pointers to the states for easy sorting. */
100 StateList::Iter state = stateList;
101 for ( int s = 0; state.lte(); state++, s++ )
102 statePtrs[s] = state;
104 /* Sort the states using the array of states. */
105 mergeSort.sort( statePtrs, numStates );
107 /* An array of lists of states is used to partition the states. */
108 MinPartition *parts = new MinPartition[numStates];
110 /* Assign the states into partitions. */
111 int destPart = 0;
112 for ( int s = 0; s < numStates; s++ ) {
113 /* If this state differs from the last then move to the next partition. */
114 if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
115 /* Move to the next partition. */
116 destPart += 1;
119 /* Put the state into its partition. */
120 statePtrs[s]->alg.partition = &parts[destPart];
121 parts[destPart].list.append( statePtrs[s] );
124 /* We just moved all the states from the main list into partitions without
125 * taking them off the main list. So clean up the main list now. */
126 stateList.abandon();
128 /* Split partitions. */
129 int numParts = destPart + 1;
130 while ( true ) {
131 /* Test all partitions for splitting. */
132 int newNum = partitionRound( statePtrs, parts, numParts );
134 /* When no partitions can be split, stop. */
135 if ( newNum == numParts )
136 break;
138 numParts = newNum;
141 /* Fuse states in the same partition. The states will end up back on the
142 * main list. */
143 fusePartitions( parts, numParts );
145 /* Cleanup. */
146 delete[] statePtrs;
147 delete[] parts;
150 /* Split partitions that need splittting, decide which partitions might need
151 * to be split as a result, continue until there are no more that might need
152 * to be split. */
153 int FsmAp::splitCandidates( StateAp **statePtrs, MinPartition *parts, int numParts )
155 /* Need a mergesort and a partition compare. */
156 MergeSort<StateAp*, PartitionCompare> mergeSort;
157 PartitionCompare partCompare;
159 /* The lists of unsplitable (partList) and splitable partitions.
160 * Only partitions in the splitable list are check for needing splitting. */
161 PartitionList partList, splittable;
163 /* Initially, all partitions are born from a split (the initial
164 * partitioning) and can cause other partitions to be split. So any
165 * partition with a state with a transition out to another partition is a
166 * candidate for splitting. This will make every partition except possibly
167 * partitions of final states split candidates. */
168 for ( int p = 0; p < numParts; p++ ) {
169 /* Assume not active. */
170 parts[p].active = false;
172 /* Look for a trans out of any state in the partition. */
173 for ( StateList::Iter state = parts[p].list; state.lte(); state++ ) {
174 /* If there is at least one transition out to another state then
175 * the partition becomes splittable. */
176 if ( state->outList.length() > 0 ) {
177 parts[p].active = true;
178 break;
182 /* If it was found active then it goes on the splittable list. */
183 if ( parts[p].active )
184 splittable.append( &parts[p] );
185 else
186 partList.append( &parts[p] );
189 /* While there are partitions that are splittable, pull one off and try
190 * to split it. If it splits, determine which partitions may now be split
191 * as a result of the newly split partition. */
192 while ( splittable.length() > 0 ) {
193 MinPartition *partition = splittable.detachFirst();
195 /* Fill the pointer array with the states in the partition. */
196 StateList::Iter state = partition->list;
197 for ( int s = 0; state.lte(); state++, s++ )
198 statePtrs[s] = state;
200 /* Sort the states using the partitioning compare. */
201 int numStates = partition->list.length();
202 mergeSort.sort( statePtrs, numStates );
204 /* Assign the states into partitions based on the results of the sort. */
205 MinPartition *destPart = partition;
206 int firstNewPart = numParts;
207 for ( int s = 1; s < numStates; s++ ) {
208 /* If this state differs from the last then move to the next partition. */
209 if ( partCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
210 /* The new partition is the next avail spot. */
211 destPart = &parts[numParts];
212 numParts += 1;
215 /* If the state is not staying in the first partition, then
216 * transfer it to its destination partition. */
217 if ( destPart != partition ) {
218 StateAp *state = partition->list.detach( statePtrs[s] );
219 destPart->list.append( state );
223 /* Fix the partition pointer for all the states that got moved to a new
224 * partition. This must be done after the states are transfered so the
225 * result of the sort is not altered. */
226 int newPart;
227 for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
228 StateList::Iter state = parts[newPart].list;
229 for ( ; state.lte(); state++ )
230 state->alg.partition = &parts[newPart];
233 /* Put the partition we just split and any new partitions that came out
234 * of the split onto the inactive list. */
235 partition->active = false;
236 partList.append( partition );
237 for ( newPart = firstNewPart; newPart < numParts; newPart++ ) {
238 parts[newPart].active = false;
239 partList.append( &parts[newPart] );
242 if ( destPart == partition )
243 continue;
245 /* Now determine which partitions are splittable as a result of
246 * splitting partition by walking the in lists of the states in
247 * partitions that got split. Partition is the faked first item in the
248 * loop. */
249 MinPartition *causalPart = partition;
250 newPart = firstNewPart - 1;
251 while ( newPart < numParts ) {
252 /* Loop all states in the causal partition. */
253 StateList::Iter state = causalPart->list;
254 for ( ; state.lte(); state++ ) {
255 /* Walk all transition into the state and put the partition
256 * that the from state is in onto the splittable list. */
257 for ( TransInList::Iter trans = state->inList; trans.lte(); trans++ ) {
258 MinPartition *fromPart = trans->fromState->alg.partition;
259 if ( ! fromPart->active ) {
260 fromPart->active = true;
261 partList.detach( fromPart );
262 splittable.append( fromPart );
267 newPart += 1;
268 causalPart = &parts[newPart];
271 return numParts;
276 * \brief Minimize by partitioning version 2 (best alg).
278 * Repeatedly tries to split partitions that may splittable until there are no
279 * more partitions that might possibly need splitting. Runs faster than
280 * version 1. Produces the most minimal fsm possible.
282 void FsmAp::minimizePartition2()
284 /* Need a mergesort and an initial partition compare. */
285 MergeSort<StateAp*, InitPartitionCompare> mergeSort;
286 InitPartitionCompare initPartCompare;
288 /* Nothing to do if there are no states. */
289 if ( stateList.length() == 0 )
290 return;
293 * First thing is to partition the states by final state status and
294 * transition functions. This gives us an initial partitioning to work
295 * with.
298 /* Make a array of pointers to states. */
299 int numStates = stateList.length();
300 StateAp** statePtrs = new StateAp*[numStates];
302 /* Fill up an array of pointers to the states for easy sorting. */
303 StateList::Iter state = stateList;
304 for ( int s = 0; state.lte(); state++, s++ )
305 statePtrs[s] = state;
307 /* Sort the states using the array of states. */
308 mergeSort.sort( statePtrs, numStates );
310 /* An array of lists of states is used to partition the states. */
311 MinPartition *parts = new MinPartition[numStates];
313 /* Assign the states into partitions. */
314 int destPart = 0;
315 for ( int s = 0; s < numStates; s++ ) {
316 /* If this state differs from the last then move to the next partition. */
317 if ( s > 0 && initPartCompare.compare( statePtrs[s-1], statePtrs[s] ) < 0 ) {
318 /* Move to the next partition. */
319 destPart += 1;
322 /* Put the state into its partition. */
323 statePtrs[s]->alg.partition = &parts[destPart];
324 parts[destPart].list.append( statePtrs[s] );
327 /* We just moved all the states from the main list into partitions without
328 * taking them off the main list. So clean up the main list now. */
329 stateList.abandon();
331 /* Split partitions. */
332 int numParts = splitCandidates( statePtrs, parts, destPart+1 );
334 /* Fuse states in the same partition. The states will end up back on the
335 * main list. */
336 fusePartitions( parts, numParts );
338 /* Cleanup. */
339 delete[] statePtrs;
340 delete[] parts;
343 void FsmAp::initialMarkRound( MarkIndex &markIndex )
345 /* P and q for walking pairs. */
346 StateAp *p = stateList.head, *q;
348 /* Need an initial partition compare. */
349 InitPartitionCompare initPartCompare;
351 /* Walk all unordered pairs of (p, q) where p != q.
352 * The second depth of the walk stops before reaching p. This
353 * gives us all unordered pairs of states (p, q) where p != q. */
354 while ( p != 0 ) {
355 q = stateList.head;
356 while ( q != p ) {
357 /* If the states differ on final state status, out transitions or
358 * any transition data then they should be separated on the initial
359 * round. */
360 if ( initPartCompare.compare( p, q ) != 0 )
361 markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
363 q = q->next;
365 p = p->next;
369 bool FsmAp::markRound( MarkIndex &markIndex )
371 /* P an q for walking pairs. Take note if any pair gets marked. */
372 StateAp *p = stateList.head, *q;
373 bool pairWasMarked = false;
375 /* Need a mark comparison. */
376 MarkCompare markCompare;
378 /* Walk all unordered pairs of (p, q) where p != q.
379 * The second depth of the walk stops before reaching p. This
380 * gives us all unordered pairs of states (p, q) where p != q. */
381 while ( p != 0 ) {
382 q = stateList.head;
383 while ( q != p ) {
384 /* Should we mark the pair? */
385 if ( !markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
386 if ( markCompare.shouldMark( markIndex, p, q ) ) {
387 markIndex.markPair( p->alg.stateNum, q->alg.stateNum );
388 pairWasMarked = true;
391 q = q->next;
393 p = p->next;
396 return pairWasMarked;
401 * \brief Minimize by pair marking.
403 * Decides if each pair of states is distinct or not. Uses O(n^2) memory and
404 * should only be used on small graphs. Produces the most minmimal FSM
405 * possible.
407 void FsmAp::minimizeStable()
409 /* Set the state numbers. */
410 setStateNumbers( 0 );
412 /* This keeps track of which pairs have been marked. */
413 MarkIndex markIndex( stateList.length() );
415 /* Mark pairs where final stateness, out trans, or trans data differ. */
416 initialMarkRound( markIndex );
418 /* While the last round of marking succeeded in marking a state
419 * continue to do another round. */
420 int modified = markRound( markIndex );
421 while (modified)
422 modified = markRound( markIndex );
424 /* Merge pairs that are unmarked. */
425 fuseUnmarkedPairs( markIndex );
428 bool FsmAp::minimizeRound()
430 /* Nothing to do if there are no states. */
431 if ( stateList.length() == 0 )
432 return false;
434 /* Need a mergesort on approx compare and an approx compare. */
435 MergeSort<StateAp*, ApproxCompare> mergeSort;
436 ApproxCompare approxCompare;
438 /* Fill up an array of pointers to the states. */
439 StateAp **statePtrs = new StateAp*[stateList.length()];
440 StateList::Iter state = stateList;
441 for ( int s = 0; state.lte(); state++, s++ )
442 statePtrs[s] = state;
444 bool modified = false;
446 /* Sort The list. */
447 mergeSort.sort( statePtrs, stateList.length() );
449 /* Walk the list looking for duplicates next to each other,
450 * merge in any duplicates. */
451 StateAp **pLast = statePtrs;
452 StateAp **pState = statePtrs + 1;
453 for ( int i = 1; i < stateList.length(); i++, pState++ ) {
454 if ( approxCompare.compare( *pLast, *pState ) == 0 ) {
455 /* Last and pState are the same, so fuse together. Move forward
456 * with pState but not with pLast. If any more are identical, we
457 * must */
458 fuseEquivStates( *pLast, *pState );
459 modified = true;
461 else {
462 /* Last and this are different, do not set to merge them. Move
463 * pLast to the current (it may be way behind from merging many
464 * states) and pState forward one to consider the next pair. */
465 pLast = pState;
468 delete[] statePtrs;
469 return modified;
473 * \brief Minmimize by an approximation.
475 * Repeatedly tries to find states with transitions out to the same set of
476 * states on the same set of keys until no more identical states can be found.
477 * Does not produce the most minimial FSM possible.
479 void FsmAp::minimizeApproximate()
481 /* While the last minimization round succeeded in compacting states,
482 * continue to try to compact states. */
483 while ( true ) {
484 bool modified = minimizeRound();
485 if ( ! modified )
486 break;
491 /* Remove states that have no path to them from the start state. Recursively
492 * traverses the graph marking states that have paths into them. Then removes
493 * all states that did not get marked. */
494 void FsmAp::removeUnreachableStates()
496 /* Misfit accounting should be off and there should be no states on the
497 * misfit list. */
498 assert( !misfitAccounting && misfitList.length() == 0 );
500 /* Mark all the states that can be reached
501 * through the existing set of entry points. */
502 markReachableFromHere( startState );
503 for ( EntryMap::Iter en = entryPoints; en.lte(); en++ )
504 markReachableFromHere( en->value );
506 /* Delete all states that are not marked
507 * and unmark the ones that are marked. */
508 StateAp *state = stateList.head;
509 while ( state ) {
510 StateAp *next = state->next;
512 if ( state->stateBits & STB_ISMARKED )
513 state->stateBits &= ~ STB_ISMARKED;
514 else {
515 detachState( state );
516 stateList.detach( state );
517 delete state;
520 state = next;
524 bool FsmAp::outListCovers( StateAp *state )
526 /* Must be at least one range to cover. */
527 if ( state->outList.length() == 0 )
528 return false;
530 /* The first must start at the lower bound. */
531 TransList::Iter trans = state->outList.first();
532 if ( keyOps->minKey < trans->lowKey )
533 return false;
535 /* Loop starts at second el. */
536 trans.increment();
538 /* Loop checks lower against prev upper. */
539 for ( ; trans.lte(); trans++ ) {
540 /* Lower end of the trans must be one greater than the
541 * previous' high end. */
542 Key lowKey = trans->lowKey;
543 lowKey.decrement();
544 if ( trans->prev->highKey < lowKey )
545 return false;
548 /* Require that the last range extends to the upper bound. */
549 trans = state->outList.last();
550 if ( trans->highKey < keyOps->maxKey )
551 return false;
553 return true;
556 /* Remove states that that do not lead to a final states. Works recursivly traversing
557 * the graph in reverse (starting from all final states) and marking seen states. Then
558 * removes states that did not get marked. */
559 void FsmAp::removeDeadEndStates()
561 /* Misfit accounting should be off and there should be no states on the
562 * misfit list. */
563 assert( !misfitAccounting && misfitList.length() == 0 );
565 /* Mark all states that have paths to the final states. */
566 StateAp **st = finStateSet.data;
567 int nst = finStateSet.length();
568 for ( int i = 0; i < nst; i++, st++ )
569 markReachableFromHereReverse( *st );
571 /* Start state gets honorary marking. If the machine accepts nothing we
572 * still want the start state to hang around. This must be done after the
573 * recursive call on all the final states so that it does not cause the
574 * start state in transitions to be skipped when the start state is
575 * visited by the traversal. */
576 startState->stateBits |= STB_ISMARKED;
578 /* Delete all states that are not marked
579 * and unmark the ones that are marked. */
580 StateAp *state = stateList.head;
581 while ( state != 0 ) {
582 StateAp *next = state->next;
584 if ( state->stateBits & STB_ISMARKED )
585 state->stateBits &= ~ STB_ISMARKED;
586 else {
587 detachState( state );
588 stateList.detach( state );
589 delete state;
592 state = next;
596 /* Remove states on the misfit list. To work properly misfit accounting should
597 * be on when this is called. The detaching of a state will likely cause
598 * another misfit to be collected and it can then be removed. */
599 void FsmAp::removeMisfits()
601 while ( misfitList.length() > 0 ) {
602 /* Get the first state. */
603 StateAp *state = misfitList.head;
605 /* Detach and delete. */
606 detachState( state );
608 /* The state was previously on the misfit list and detaching can only
609 * remove in transitions so the state must still be on the misfit
610 * list. */
611 misfitList.detach( state );
612 delete state;
616 /* Fuse src into dest because they have been deemed equivalent states.
617 * Involves moving transitions into src to go into dest and invoking
618 * callbacks. Src is deleted detached from the graph and deleted. */
619 void FsmAp::fuseEquivStates( StateAp *dest, StateAp *src )
621 /* This would get ugly. */
622 assert( dest != src );
624 /* Cur is a duplicate. We can merge it with trail. */
625 inTransMove( dest, src );
627 detachState( src );
628 stateList.detach( src );
629 delete src;
632 void FsmAp::fuseUnmarkedPairs( MarkIndex &markIndex )
634 StateAp *p = stateList.head, *nextP, *q;
636 /* Definition: The primary state of an equivalence class is the first state
637 * encounterd that belongs to the equivalence class. All equivalence
638 * classes have primary state including equivalence classes with one state
639 * in it. */
641 /* For each unmarked pair merge p into q and delete p. q is always the
642 * primary state of it's equivalence class. We wouldn't have landed on it
643 * here if it were not, because it would have been deleted.
645 * Proof that q is the primaray state of it's equivalence class: Assume q
646 * is not the primary state of it's equivalence class, then it would be
647 * merged into some state that came before it and thus p would be
648 * equivalent to that state. But q is the first state that p is equivalent
649 * to so we have a contradiction. */
651 /* Walk all unordered pairs of (p, q) where p != q.
652 * The second depth of the walk stops before reaching p. This
653 * gives us all unordered pairs of states (p, q) where p != q. */
654 while ( p != 0 ) {
655 nextP = p->next;
657 q = stateList.head;
658 while ( q != p ) {
659 /* If one of p or q is a final state then mark. */
660 if ( ! markIndex.isPairMarked( p->alg.stateNum, q->alg.stateNum ) ) {
661 fuseEquivStates( q, p );
662 break;
664 q = q->next;
666 p = nextP;
670 void FsmAp::fusePartitions( MinPartition *parts, int numParts )
672 /* For each partition, fuse state 2, 3, ... into state 1. */
673 for ( int p = 0; p < numParts; p++ ) {
674 /* Assume that there will always be at least one state. */
675 StateAp *first = parts[p].list.head, *toFuse = first->next;
677 /* Put the first state back onto the main state list. Don't bother
678 * removing it from the partition list first. */
679 stateList.append( first );
681 /* Fuse the rest of the state into the first. */
682 while ( toFuse != 0 ) {
683 /* Save the next. We will trash it before it is needed. */
684 StateAp *next = toFuse->next;
686 /* Put the state to be fused in to the first back onto the main
687 * list before it is fuse. the graph. The state needs to be on
688 * the main list for the detach from the graph to work. Don't
689 * bother removing the state from the partition list first. We
690 * need not maintain it. */
691 stateList.append( toFuse );
693 /* Now fuse to the first. */
694 fuseEquivStates( first, toFuse );
696 /* Go to the next that we saved before trashing the next pointer. */
697 toFuse = next;
700 /* We transfered the states from the partition list into the main list without
701 * removing the states from the partition list first. Clean it up. */
702 parts[p].list.abandon();
707 /* Merge neighboring transitions go to the same state and have the same
708 * transitions data. */
709 void FsmAp::compressTransitions()
711 for ( StateList::Iter st = stateList; st.lte(); st++ ) {
712 if ( st->outList.length() > 1 ) {
713 for ( TransList::Iter trans = st->outList, next = trans.next(); next.lte(); ) {
714 Key nextLow = next->lowKey;
715 nextLow.decrement();
716 if ( trans->highKey == nextLow && trans->toState == next->toState &&
717 CmpActionTable::compare( trans->actionTable, next->actionTable ) == 0 )
719 trans->highKey = next->highKey;
720 st->outList.detach( next );
721 detachTrans( next->fromState, next->toState, next );
722 delete next;
723 next = trans.next();
725 else {
726 trans.increment();
727 next.increment();