Bugfix: Really fix the crash in bm.c, do memory checking after each allocation, corre...
[2dmatching.git] / aho.c
blob8b5949b0e00159011f7be1eaa37d5e96d0915cab
1 #define DRIVER 1
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
7 #include "code.h"
9 struct kword {
10 unsigned char *word;
11 struct kword *next;
14 #define MAXCHAR 128
15 //#define MAXCHAR 260
16 //#define MAXCHAR 1030
18 static int MaxState;
20 static int *MatchArray;
22 #define MULTI_WAY -1
23 #define EMPTY_SLOT -2
25 union GotoTable {
26 int GotoState;
27 int *BranchTable;
28 } static *GotoArray;
30 #define FAIL_STATE -1
32 static struct kword **OutArray;
34 static int *FailArray;
36 static int HighState;
38 static void AddStateTrans ( int, int ,int );
39 static void ComputeFail ( void );
40 static void Enter ( unsigned char * );
41 static void FindFail ( int state, int s, int a );
42 static void QueueAdd ( int *queue, int qbeg, int new );
44 int RetrieveChar ( char **text );
46 int textcolumn = 0;
48 void MsrchInit ( struct kword *klist )
50 int i;
52 struct kword *ktemp;
54 MaxState = 1;
56 /* MaxState equals the number of characters */
57 for ( ktemp = klist; ktemp != NULL; ktemp = ktemp->next)
58 MaxState += strlen( ktemp->word );
60 MatchArray = (int *) malloc ( sizeof(int) * MaxState );
61 GotoArray = (union GotoTable *) malloc (sizeof (union GotoTable) * MaxState );
62 OutArray = (struct kword **) malloc (sizeof (struct kword *) * MaxState );
63 FailArray = (int *) malloc (sizeof (int) * MaxState );
65 for ( i = 0; i < MaxState; i++) {
66 MatchArray[i] = EMPTY_SLOT;
67 OutArray[i] = NULL;
70 HighState = 0;
71 AddStateTrans ( 0, 'a', FAIL_STATE );
72 AddStateTrans ( 0, 'b', FAIL_STATE );
74 for ( ; klist != NULL; klist = klist->next )
75 Enter ( klist->word );
77 for ( i = 0; i < MAXCHAR; i++)
78 if ( GotoArray[0].BranchTable[i] == FAIL_STATE )
79 GotoArray[0].BranchTable[i] = 0;
81 ComputeFail();
84 /* Add transition from OldState to NewState for MatchChar */
85 static void AddStateTrans ( int OldState, int MatchChar, int NewState )
87 int i, *temp;
89 if ( MatchArray[OldState] == EMPTY_SLOT ) {
90 MatchArray[OldState] = MatchChar;
91 GotoArray[OldState].GotoState = NewState;
94 else
95 if ( MatchArray[OldState] == MULTI_WAY )
96 GotoArray[OldState].BranchTable[MatchChar] = NewState;
98 else {
100 temp = (int *) malloc ( sizeof(int) * MAXCHAR );
102 for ( i = 0; i < MAXCHAR; i++ )
103 temp[i] = FAIL_STATE;
106 temp[MatchArray[OldState]] = GotoArray[OldState].GotoState;
108 temp[MatchChar] = NewState;
110 MatchArray[OldState] = MULTI_WAY;
111 GotoArray[OldState].BranchTable = temp;
115 /* Add word to the list of words the program recognises */
116 static void Enter ( unsigned char *kword )
118 int state, k;
119 char *save;
120 struct kword *ktemp;
122 state = 0;
124 /* keep a copy */
125 save = kword;
127 for ( ; *kword != '\0'; kword++ ) {
128 if ( MatchArray[state] == *kword )
129 state = GotoArray[state].GotoState;
131 else
132 if ( MatchArray[state] == MULTI_WAY ) {
133 if (( k = GotoArray[state].BranchTable[*kword] ) == FAIL_STATE)
134 break;
135 else
136 state = k;
138 else
139 break;
142 for ( ; *kword != '\0'; kword++ ) {
143 HighState += 1;
145 if ( HighState >= MaxState) {
146 printf("Too many states!\n");
147 exit(EXIT_FAILURE);
150 AddStateTrans ( state, *kword, HighState );
151 state = HighState;
154 ktemp = (struct kword *) malloc ( sizeof ( struct kword ));
155 ktemp->word = save;
156 ktemp->next = OutArray[state];
157 OutArray[state] = ktemp;
160 static void ComputeFail()
162 int *queue, qbeg, r, s;
163 int i;
165 /* Allocate a queue */
166 queue = (int *) malloc ( sizeof( int ) * MaxState);
167 qbeg = 0;
168 queue[0] = 0;
170 for ( i = 0; i < MAXCHAR; i++ )
171 if (( s = GotoArray[0].BranchTable[i] ) != 0 ) {
172 FailArray[s] = 0;
173 QueueAdd ( queue, qbeg, s );
176 while ( queue[qbeg] != 0)
178 r = queue[qbeg];
179 qbeg = r;
181 if ( MatchArray[r] == EMPTY_SLOT )
182 continue;
184 else
185 if ( MatchArray[r] == MULTI_WAY ) {
186 for ( i = 0; i < MAXCHAR; i++ )
187 if (( s = GotoArray[r].BranchTable[i] ) != FAIL_STATE ) {
188 QueueAdd ( queue, qbeg, s );
189 FindFail ( FailArray[r], s, i);
192 else {
193 QueueAdd ( queue, qbeg, GotoArray[r].GotoState );
194 FindFail ( FailArray[r], GotoArray[r].GotoState, MatchArray[r] );
197 free ( queue );
201 static void FindFail ( int s1, int s2, int a )
203 int on_fail;
204 struct kword *ktemp, kdummy, *out_copy, *kscan;
206 for ( ; ; s1 = FailArray[s1] )
207 if ( MatchArray[s1] == a ) {
208 if (( on_fail = GotoArray[s1].GotoState ) != FAIL_STATE )
209 break;
211 else
212 if ( MatchArray[s1] == MULTI_WAY )
213 if (( on_fail = GotoArray[s1].BranchTable[a] ) != FAIL_STATE )
214 break;
216 FailArray[s2] = on_fail;
218 if( OutArray[on_fail] == NULL )
219 out_copy = NULL;
220 else {
222 kscan = OutArray[on_fail];
223 out_copy = malloc ( sizeof (struct kword ));
224 out_copy->word = kscan->word;
225 out_copy->next = NULL;
227 for ( kscan = kscan->next; kscan != NULL; kscan = kscan->next ) {
229 ktemp = malloc ( sizeof ( struct kword ));
230 ktemp->word = kscan->word;
231 ktemp->next = out_copy->next;
232 out_copy->next = ktemp;
236 if (( kdummy.next = OutArray[s2] ) != NULL ) {
237 ktemp = &kdummy;
238 for ( ; ktemp->next->next != NULL; ktemp = ktemp->next )
240 ktemp->next->next = out_copy;
243 else
244 OutArray[s2] = out_copy;
247 static void QueueAdd ( int *queue, int qbeg, int new )
249 int q;
251 q = queue[qbeg];
253 if ( q == 0 )
254 queue[qbeg] = new;
255 else {
256 for ( ; queue[q] != 0; q = queue[q] )
258 queue[q] = new;
261 queue[new] = 0;
264 /* Do the actual search */
265 int MsrchGo ( char **text )
267 int state, c, g, mm;
268 struct kword *kscan;
270 state = 0;
272 while (( c = RetrieveChar( text ) ) != EOF ) {
274 for ( ;; ) {
276 if ( state == 0 || ( mm = MatchArray[state] ) == MULTI_WAY )
277 g = GotoArray[state].BranchTable[c];
279 else
280 if ( mm == c )
281 g = GotoArray[state].GotoState;
283 else
284 g = FAIL_STATE;
286 if ( g != FAIL_STATE )
287 break;
289 state = FailArray[state];
291 state = g;
293 /* Output results */
294 if (( kscan = OutArray[state] ) != NULL )
295 for ( ; kscan != NULL; kscan = kscan->next )
296 /* Break on the first result */
297 return (textcolumn - strlen(kscan->word));
299 return -1;
302 void MsrchEnd ( void )
304 int i;
306 struct kword *kscan;
308 for ( i = 0; i < MaxState; i++ )
309 if ( MatchArray[i] == MULTI_WAY )
310 free ( GotoArray[i].BranchTable );
312 free ( MatchArray );
313 free ( GotoArray );
314 free ( FailArray );
316 for ( i = 0; i < MaxState; i++ )
317 if ( OutArray[i] != NULL )
318 for ( kscan = OutArray[i]; kscan != NULL; kscan = kscan->next )
319 free ( kscan );
321 free ( OutArray );
325 #ifdef DRIVER
327 #define BUFSIZE 200
329 FILE *infile;
330 char inbuf[BUFSIZE];
331 char *inbufptr;
332 int linetextcolumn;
334 int textrow;
335 int textlength;
337 int aho( char **text, char **pattern, int m, int n, int trow, int tcolumn, int prow, int tlength )
339 textrow = trow;
340 textlength = tlength;
341 textcolumn = tcolumn;
343 struct kword *khead, *ktemp;
345 int i;
347 linetextcolumn = 0;
348 inbufptr = NULL;
350 khead = NULL;
352 char *pattern_small;
354 pattern_small = malloc (m + 1);
356 for( i = 0; i < m; i++)
357 pattern_small[i] = pattern[prow][i];
359 ktemp = ( struct kword * ) malloc ( sizeof ( struct kword ));
360 ktemp->word = pattern_small;
361 ktemp->next = khead;
363 if (khead != 0 && khead != ktemp)
364 if (khead->word == ktemp->word)
365 printf("Words point to the same thing - this is not valid\n");
367 khead = ktemp;
369 /* Setup system and pass list of words */
370 MsrchInit ( khead );
372 /* Now search */
373 int result = MsrchGo( text );
375 /* Do a clean up */
376 MsrchEnd();
378 free ( pattern_small );
380 return ( result );
383 int RetrieveChar( char **text )
385 if ( textcolumn < textlength ) {
386 textcolumn ++;
387 return text[textrow][textcolumn - 1];
390 else
391 return ( EOF );
394 #endif