From 5b4b229d02dd15834f90362725c87c3776cf21ba Mon Sep 17 00:00:00 2001 From: Kouzinopoulos Charis Date: Wed, 9 Jan 2008 12:28:00 +0200 Subject: [PATCH] Initial Commit --- Makefile | 50 +++++++ aho.c | 489 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ baeza.c | 68 +++++++++ baeza2.c | 94 ++++++++++++ bird.c | 51 +++++++ bm.c | 137 ++++++++++++++++++ code.c | 84 +++++++++++ code.h | 49 +++++++ helper.c | 115 +++++++++++++++ karp.c | 93 ++++++++++++ kmp.c | 60 ++++++++ naive.c | 41 ++++++ zhu.c | 34 +++++ 13 files changed, 1365 insertions(+) create mode 100644 Makefile create mode 100644 aho.c create mode 100644 baeza.c create mode 100644 baeza2.c create mode 100644 bird.c create mode 100644 bm.c create mode 100644 code.c create mode 100644 code.h create mode 100644 helper.c create mode 100644 karp.c create mode 100644 kmp.c create mode 100644 naive.c create mode 100644 zhu.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..720c4f3 --- /dev/null +++ b/Makefile @@ -0,0 +1,50 @@ +CC = gcc -Wall -O2 -funroll-loops +#CC = gcc -Wall -g + +TARGET = code + +OBJS= naive.o karp.o code.o helper.o zhu.o bird.o aho.o baeza.o baeza2.o bm.o kmp.o + +LDFLAGS=-lmpi + +all: $(TARGET) + +$(TARGET): $(OBJS) + $(CC) $(OBJS) -o $(TARGET) $(LDFLAGS) + +naive.o: naive.c + $(CC) -c naive.c + +karp.o: karp.c + $(CC) -c karp.c + +code.o: code.c + $(CC) -c code.c + +helper.o: helper.c + $(CC) -c helper.c + +zhu.o: zhu.c + $(CC) -c zhu.c + +bird.o: bird.c + $(CC) -c bird.c + +aho.o: aho.c + $(CC) -c aho.c + +baeza.o: baeza.c + $(CC) -c baeza.c + +baeza2.o: baeza2.c + $(CC) -c baeza2.c + +bm.o: bm.c + $(CC) -c bm.c + +kmp.o: kmp.c + $(CC) -c kmp.c + +clean: + rm -f *.o $(TARGET) core *.txt + diff --git a/aho.c b/aho.c new file mode 100644 index 0000000..4c214b3 --- /dev/null +++ b/aho.c @@ -0,0 +1,489 @@ +#define DRIVER 1 + +//#define DEBUG 1 + +#include +#include +#include +#include "code.h" + +struct kword { + unsigned char *word; + struct kword *next; +}; + +#define MAXCHAR 128 +//#define MAXCHAR 260 +//#define MAXCHAR 1030 + +static int MaxState; + +static int *MatchArray; + +#define MULTI_WAY -1 +#define EMPTY_SLOT -2 + +union GotoTable { + int GotoState; + int *BranchTable; +} static *GotoArray; + +#define FAIL_STATE -1 + +static struct kword **OutArray; + +static int *FailArray; + +static int HighState; + +static void AddStateTrans ( int, int ,int ); +static void ComputeFail ( void ); +static void Enter ( unsigned char * ); +static void FindFail ( int state, int s, int a ); +static void QueueAdd ( int *queue, int qbeg, int new ); + +double aho_steps = 0; + +double loops = 0; + +int textcolumn = 0; + +void MsrchInit ( struct kword *klist ) +{ + #ifdef DEBUG + printf("MsrchInit\n"); + #endif + + int i; + + struct kword *ktemp; + + MaxState = 1; + + /* MaxState equals the number of characters */ +// #warning possible bug: MaxState should be 2, not 4 + for ( ktemp = klist; ktemp != NULL; ktemp = ktemp->next) + MaxState += strlen( ktemp->word ); + + #ifdef DEBUG + printf("MaxState = %i\n", MaxState); + #endif + + MatchArray = (int *) malloc ( sizeof(int) * MaxState ); + GotoArray = (union GotoTable *) malloc (sizeof (union GotoTable) * MaxState ); + OutArray = (struct kword **) malloc (sizeof (struct kword *) * MaxState ); + FailArray = (int *) malloc (sizeof (int) * MaxState ); + + for ( i = 0; i < MaxState; i++) { + MatchArray[i] = EMPTY_SLOT; + OutArray[i] = NULL; + } + + HighState = 0; + AddStateTrans ( 0, 'a', FAIL_STATE ); + AddStateTrans ( 0, 'b', FAIL_STATE ); + + for ( ; klist != NULL; klist = klist->next ) + Enter ( klist->word ); + + for ( i = 0; i < MAXCHAR; i++) { + aho_steps++; + if ( GotoArray[0].BranchTable[i] == FAIL_STATE ) + GotoArray[0].BranchTable[i] = 0; + } + + + ComputeFail(); +} + +/* Add transition from OldState to NewState for MatchChar */ +static void AddStateTrans ( int OldState, int MatchChar, int NewState ) +{ + #ifdef DEBUG + printf("AddStateTrans\n"); + #endif + + int i, *temp; + + #ifdef DEBUG + printf("MatchArray[%i] = %i GotoArray[%i].GotoState = %i\n",OldState,MatchArray[OldState],OldState,GotoArray[OldState].GotoState); + #endif + + if ( MatchArray[OldState] == EMPTY_SLOT ) { + MatchArray[OldState] = MatchChar; + GotoArray[OldState].GotoState = NewState; + } + + else + if ( MatchArray[OldState] == MULTI_WAY ) + GotoArray[OldState].BranchTable[MatchChar] = NewState; + + else { + + temp = (int *) malloc ( sizeof(int) * MAXCHAR ); + + for ( i = 0; i < MAXCHAR; i++ ) + temp[i] = FAIL_STATE; + + + temp[MatchArray[OldState]] = GotoArray[OldState].GotoState; + + temp[MatchChar] = NewState; + + MatchArray[OldState] = MULTI_WAY; + GotoArray[OldState].BranchTable = temp; + + #ifdef DEBUG + printf("GotoArray[%i].BranchTable = %i\n",OldState, *temp); + #endif + } + + #ifdef DEBUG + printf("Now MatchArray[%i] = %i GotoArray[%i].GotoState = %i\n",OldState,MatchArray[OldState],OldState,GotoArray[OldState].GotoState); + #endif +} + +/* Add word to the list of words the program recognises */ +static void Enter ( unsigned char *kword ) +{ + #ifdef DEBUG + printf("Enter\n"); + #endif + + int state, k; + char *save; + struct kword *ktemp; + + state = 0; + + /* keep a copy */ + save = kword; + + for ( ; *kword != '\0'; kword++ ) { + aho_steps++; + if ( MatchArray[state] == *kword ) + state = GotoArray[state].GotoState; + + else + if ( MatchArray[state] == MULTI_WAY ) { + if (( k = GotoArray[state].BranchTable[*kword] ) == FAIL_STATE) + break; + else + state = k; + } + else + break; + } + + for ( ; *kword != '\0'; kword++ ) { + HighState += 1; + + if ( HighState >= MaxState) { + printf("Too many states!\n"); + exit(EXIT_FAILURE); + } + + AddStateTrans ( state, *kword, HighState ); + state = HighState; + } + + ktemp = (struct kword *) malloc ( sizeof ( struct kword )); + ktemp->word = save; + ktemp->next = OutArray[state]; + OutArray[state] = ktemp; +} + +static void ComputeFail() +{ + #ifdef DEBUG + printf("ComputeFail\n"); + #endif + + int *queue, qbeg, r, s; + int i; + + /* Allocate a queue */ + queue = (int *) malloc ( sizeof( int ) * MaxState); + qbeg = 0; + queue[0] = 0; + + for ( i = 0; i < MAXCHAR; i++ ) { + aho_steps++; + if (( s = GotoArray[0].BranchTable[i] ) != 0 ) { + FailArray[s] = 0; + QueueAdd ( queue, qbeg, s ); + } + } + + while ( queue[qbeg] != 0) + { + r = queue[qbeg]; + qbeg = r; + + #ifdef DEBUG + printf("MatchArray[r] = %i\n", MatchArray[r]); + #endif + + if ( MatchArray[r] == EMPTY_SLOT ) + continue; + + else + if ( MatchArray[r] == MULTI_WAY ) { + for ( i = 0; i < MAXCHAR; i++ ) { + aho_steps++; + if (( s = GotoArray[r].BranchTable[i] ) != FAIL_STATE ) { + QueueAdd ( queue, qbeg, s ); + FindFail ( FailArray[r], s, i); + } + } + } + else { + QueueAdd ( queue, qbeg, GotoArray[r].GotoState ); + FindFail ( FailArray[r], GotoArray[r].GotoState, MatchArray[r] ); + } + } + + free ( queue ); +} + + +static void FindFail ( int s1, int s2, int a ) +{ + #ifdef DEBUG + printf("FindFail\n"); + #endif + + int on_fail; + struct kword *ktemp, kdummy, *out_copy, *kscan; + + for ( ; ; s1 = FailArray[s1] ) + if ( MatchArray[s1] == a ) { + if (( on_fail = GotoArray[s1].GotoState ) != FAIL_STATE ) + break; + } + else + if ( MatchArray[s1] == MULTI_WAY ) + if (( on_fail = GotoArray[s1].BranchTable[a] ) != FAIL_STATE ) + break; + + FailArray[s2] = on_fail; + + if( OutArray[on_fail] == NULL ) + out_copy = NULL; + else { + + kscan = OutArray[on_fail]; + out_copy = malloc ( sizeof (struct kword )); + out_copy->word = kscan->word; + out_copy->next = NULL; + + for ( kscan = kscan->next; kscan != NULL; kscan = kscan->next ) { + + ktemp = malloc ( sizeof ( struct kword )); + ktemp->word = kscan->word; + ktemp->next = out_copy->next; + out_copy->next = ktemp; + } + } + + if (( kdummy.next = OutArray[s2] ) != NULL ) { + ktemp = &kdummy; + for ( ; ktemp->next->next != NULL; ktemp = ktemp->next ) + ; + ktemp->next->next = out_copy; + } + + else + OutArray[s2] = out_copy; +} + +static void QueueAdd ( int *queue, int qbeg, int new ) +{ + #ifdef DEBUG + printf("QueueAdd\n"); + #endif + + int q; + + q = queue[qbeg]; + + if ( q == 0 ) + queue[qbeg] = new; + else { + for ( ; queue[q] != 0; q = queue[q] ) + ; + queue[q] = new; + } + + queue[new] = 0; +} + +/* Do the actual search */ +int MsrchGo ( int (*MsrchData) () ) +{ + #ifdef DEBUG + printf("MsrchGo\n"); + #endif + + int state, c, g, mm; + struct kword *kscan; + + state = 0; + + while ((c = MsrchData() ) != EOF ) { + + for ( ;; ) { + aho_steps++; + + if ( state == 0 || ( mm = MatchArray[state] ) == MULTI_WAY ) + g = GotoArray[state].BranchTable[c]; + + else + if ( mm == c ) + g = GotoArray[state].GotoState; + + else + g = FAIL_STATE; + + #ifdef DEBUG + printf("character %c state %i mm %i g %i OutArray[state] %i\n",c,state,mm,g,OutArray[state]); + #endif + + if ( g != FAIL_STATE ) + break; + + state = FailArray[state]; + } + state = g; + + /* Output results */ + if (( kscan = OutArray[state] ) != NULL ) + for ( ; kscan != NULL; kscan = kscan->next ) + /* Break on the first result */ + return (textcolumn - strlen(kscan->word)); + } + return -1; +} + +void MsrchEnd ( void ) +{ + #ifdef DEBUG + printf("MsrchEnd\n"); + #endif + + int i; + + struct kword *kscan; + + for ( i = 0; i < MaxState; i++ ) + if ( MatchArray[i] == MULTI_WAY ) + free ( GotoArray[i].BranchTable ); + + free ( MatchArray ); + free ( GotoArray ); + free ( FailArray ); + + for ( i = 0; i < MaxState; i++ ) + if ( OutArray[i] != NULL ) + for ( kscan = OutArray[i]; kscan != NULL; kscan = kscan->next ) + free ( kscan ); + + free ( OutArray ); +} + + +#ifdef DRIVER + +#define BUFSIZE 200 + +FILE *infile; +char inbuf[BUFSIZE]; +char *inbufptr; +int linetextcolumn; + +int textrow; +int textlength; + +int RetrieveChar ( void ); + +int aho( int trow, int tcolumn, int prow, int tlength ) +{ + #ifdef DEBUG + printf("main\n"); + #endif + + textrow = trow; + textlength = tlength; + textcolumn = tcolumn; + + struct kword *khead, *ktemp; + + int i; + + linetextcolumn = 0; + inbufptr = NULL; + + khead = NULL; + + char *pattern_small; + + pattern_small = malloc (m + 1); + + for( i = 0; i < m; i++) + pattern_small[i] = pattern[prow][i]; + + ktemp = ( struct kword * ) malloc ( sizeof ( struct kword )); + ktemp->word = pattern_small; + ktemp->next = khead; + + if (khead != 0 && khead != ktemp) + if (khead->word == ktemp->word) + printf("Words point to the same thing - this is not valid\n"); + + khead = ktemp; + +// #ifdef DEBUG +// printf("ktemp->word = %s\n",ktemp->word); +// #endif + + /* Setup system and pass list of words */ + MsrchInit ( khead ); + + /* Now search */ + int result = MsrchGo ( RetrieveChar ); + + /* Do a clean up */ + MsrchEnd(); + + free ( pattern_small ); + +// loops++; + +// if(loops == 1) +// printf("%.0lf) aho: %.0lf steps\n",loops, aho_steps); + + aho_steps = 0; + + return ( result ); +} + +int RetrieveChar( void ) +{ + #ifdef DEBUG + printf("RetrieveChar\n"); + #endif + +// printf("if textcolumn %i < textlength + 1 %i\n",textcolumn, textlength); + + if ( textcolumn < textlength ) { + textcolumn ++; +// printf("%c",text[textrow][textcolumn - 1]); + return text[textrow][textcolumn - 1]; + } + + else + return ( EOF ); + +} + +#endif diff --git a/baeza.c b/baeza.c new file mode 100644 index 0000000..7fa9aa6 --- /dev/null +++ b/baeza.c @@ -0,0 +1,68 @@ +#include "code.h" + +void baeza() +{ + unsigned int i, j, row, matches = 0; + + int x, y; + + int steps = 0; + + /* Iterate every m rows (first check lines 0 to m - 1) */ + for ( row = m - 1; row < n; row += m ) { + + /* Search the same row for occurences of the different lines of the pattern */ + for ( i = 0; i < m; i++ ) { + + /* In each row start searching from the begining */ + x = 0; + + while ( x != -1) { + + /* Check text's row against each pattern's row */ + x = aho( row, x, i, n ); + + steps++; + + /* If a row is not found procceed to the next row */ + if ( x != -1 ) { + + /* If there is a match at i = 3, check vertically the text above for i = 0-2 and below for 4-m*/ + + /* Check lines above the current */ + for ( j = 0; j < i; j++ ) { + y = aho( row - i + j, x, j, n ); + steps++; + if ( y != x) + goto newline; + } + + /* Check lines below the current */ + for ( j = 1; j < m - i; j++ ) { + + /* Avoid crashing when x = n or n-1 */ + if ( row + j >= n ) + goto newline; + + y = aho( row + j, x, i + j, n ); + steps++; + if ( y != x ) + goto newline; + } + + /* If we reached here we should have a match */ + matches++; + + /* If a mismatch is found, don't bother searching for the other y lines */ + newline: + + /* Now increase counter to continue search in the same row */ + x++; + } + else break; + } + } + } + + printf("Baeza: %i\n",steps); +} diff --git a/baeza2.c b/baeza2.c new file mode 100644 index 0000000..b13219c --- /dev/null +++ b/baeza2.c @@ -0,0 +1,94 @@ +#include "code.h" + +int naive2( int textrow, int textcolumn, int patternrow ) +{ + unsigned int i, j, k; + + for(i = textcolumn; i < n - m; i++) { + + k = i; + + for(j = 0; j < m; j++) { + + if( pattern[patternrow][j] == text[textrow][k]) + k++; + + else + break; + } + + if (j == m) + return k-m; + + else + j = 0; + + } + /* No match found on this row */ + return -1; +} + +void baeza2() +{ + + int steps = 0; + int x, y, i, j, row, matches = 0; + + /* Iterate every m rows (first check lines 0 to m - 1) */ + for ( row = m - 1; row < n; row += m ) { + + /* Search the same row for occurences of the different lines of the pattern */ + for ( i = 0; i < m; i++ ) { + + /* In each row start searching from the begining */ + x = 0; + + while ( x != -1) { + + /* Check text's row against each pattern's row */ + x = naive2( row, x, i); + + steps++; + + /* If a row is not found procceed to the next row */ + if ( x != -1 ) { + + /* If there is a match at i = 3, check vertically the text above for i = 0-2 and below for 4-m*/ + + /* Check lines above the current */ + for ( j = 0; j < i; j++ ) { + y = naive2( row - i + j, x, j); + steps++; + if ( y != x) + goto newline; + } + + /* Check lines below the current */ + for ( j = 1; j < m - i; j++ ) { + + /* Avoid crashing when x = n or n-1 */ + if ( row + j >= n ) + goto newline; + + y = naive2( row + j, x, i + j); + steps++; + if ( y != x ) + goto newline; + } + + /* If we reached here we should have a match */ + matches++; + + /* If a mismatch is found, don't bother searching for the other y lines */ + newline: + + /* Now increase counter to continue searching on the same row */ + x++; + } + else break; + } + } + } + + printf("Baeza2: %i\n",steps); +} diff --git a/bird.c b/bird.c new file mode 100644 index 0000000..c1ccd21 --- /dev/null +++ b/bird.c @@ -0,0 +1,51 @@ +#include "code.h" + +void bird() +{ + int steps = 0; + + /* x and y can be -1 */ + int x, y; + + unsigned int i, row, matches = 0; + + /* Iterate rows */ + for ( row = 0; row < n - m + 1; row++ ) { + + x = 0; + + /* Iterate through results on the same row */ + while ( x != -1 ) { + + x = aho( row, x, 0, n ); + + steps++; + +// printf("x row %i column %i\n",row, x); + + /* If a row is found, check the m - 1 rows underneath it for matches */ + if ( x != -1 ) { + + for ( i = 1; i < m; i++) { + /* Optimization: instead of checking against n chars on + subsequent rows, just check against the next m chars after x */ + y = aho( row + i, x, i, x + m ); + + steps++; + +// printf("y row %i column %i\n",row + i, y); + + if ( y != x ) + break; + + /* If we reached to the final line above x without breaking, we have a match */ + else if ( i == m -1) + matches++; + } + /* After each attempt,increase the counter + 1 */ + x++; + } + } + } + printf("Bird: %i\n",steps); +} diff --git a/bm.c b/bm.c new file mode 100644 index 0000000..779f04c --- /dev/null +++ b/bm.c @@ -0,0 +1,137 @@ +#include "code.h" + +unsigned int CharJump[m]; + +unsigned int *MatchJump; + +unsigned int *BackUp; + +void heur1() +{ + unsigned int i; + + for ( i = 0; i < m; i++ ) { + CharJump[i] = m - i -1; + //bm_preprocessing++; + } +} + +void heur2() +{ + unsigned int i, ia, ib; + + for ( i = 1; i < m; i++ ) { + MatchJump[i] = 2 * m - i; + //bm_preprocessing++; + } + + + i = n; + ia = n + 1; + + while ( i > 0 ) { + + BackUp[i] = ia; + + while ( ia <= m && pattern2[i - 1] != pattern2[i -1] ) { + + //bm_preprocessing++; + if ( MatchJump[ia] > m - i ) + MatchJump[ia] = m - i; + ia = BackUp[ia]; + } + i--; + ia--; + } + + for ( i = 1; i < ia; i++) { + //bm_preprocessing++; + if ( MatchJump[i] > m + ia - i ) + MatchJump[i] = m + ia - i; + } + + ib = BackUp[ia]; + + while ( ia <= m ) { + while ( ia <= ib ) { + //bm_preprocessing++; + if ( MatchJump[ia] > ib - ia + m ) + MatchJump[ia] = ib - ia + m; + ia++; + } + ib = BackUp[ib]; + } +} + +int jump( int mark ) +{ + unsigned int i; + + for ( i = 0; i < m; i++ ) { + //bm_preprocessing++; + if( text2[mark] == pattern2[i] ) + return CharJump[i]; + } + + return m; +} + +int search_bm() +{ + #ifndef max + #define max(a,b) ((a) > (b) ? (a) : (b)) + #endif + + unsigned int steps = 0; + + unsigned int matches = 0; + + unsigned int ia, ib; + + unsigned int pattern_marker, text_marker; + + pattern_marker = m; + + text_marker = m -1; + + MatchJump = ( unsigned * ) malloc ( 2 * sizeof ( unsigned ) * ( m + 1 ) ); + + BackUp = MatchJump + m + 1; + + /* bm uses two heuristics */ + heur1(); + + heur2(); + + const int limit = n * n - ( n * ( m - 1 ) ); + + while ( text_marker < limit ) + { + + steps++; + + /* We have a partial match */ + if ( text2[text_marker] == pattern2[pattern_marker -1] ) { + pattern_marker--; + text_marker--; + } + + + else { + ia = jump( text_marker ); + ib = MatchJump[pattern_marker]; + text_marker += max( ia, ib ); + pattern_marker = m; + } + + /* We have a match */ + if ( pattern_marker == 0 ) { + matches++; + text_marker += m + 1; + pattern_marker = m; + } + } +// printf("bm: %i steps %i preprocessing %i matches\n", bm_steps, bm_preprocessing, matches); + printf("Zhu2: %i\n",steps); + return steps; +} diff --git a/code.c b/code.c new file mode 100644 index 0000000..a97bb2b --- /dev/null +++ b/code.c @@ -0,0 +1,84 @@ +#include "code.h" + +void run_tests() +{ + + double t1, t2; + + int i; + + int times = 1; + + printf("Executing all algorithms %i times without preprocessing for m = %i and n = %i\n",times, m, n); + +// for (i = 0; i < times; i++) + +// printf("Average elapsed time for naive: \n"); + + t1 = MPI_Wtime(); + for (i = 0; i < times; i++) + naive(); + t2 = MPI_Wtime(); + +// printf("Average elapsed time for karp: \n"); + + t1 = MPI_Wtime(); + for (i = 0; i < times; i++) + karp(); + t2 = MPI_Wtime(); + + t1 = MPI_Wtime(); + for (i = 0; i < times; i++) + zhu(1); + t2 = MPI_Wtime(); + +// printf("Average elapsed time for zhu1 : %f Comparisons: %i\n", (t2-t1)/times, x); + + t1 = MPI_Wtime(); + for (i = 0; i < times; i++) + zhu(2); + t2 = MPI_Wtime(); + +// printf("Average elapsed time for zhu2 : %f Comparisons: %i\n", (t2-t1)/times, x); + + t1 = MPI_Wtime(); + for (i = 0; i < times; i++) + bird(); + t2 = MPI_Wtime(); + +// printf("Average elapsed time for bird : %f Comparisons: %i\n", (t2-t1)/times, x); + + t1 = MPI_Wtime(); + for (i = 0; i < times; i++) + baeza(); + t2 = MPI_Wtime(); + +// printf("Average elapsed time for baeza: %f Comparisons: %i\n", (t2-t1)/times, x); + + t1 = MPI_Wtime(); + for (i = 0; i < times; i++) + baeza2(); + t2 = MPI_Wtime(); + +// printf("Average elapsed time for baeza2: %f Comparisons: %i\n", (t2-t1)/times, x); + + +} + +int main(int argc, char *argv[]) +{ + + MPI_Init( &argc, &argv ); + + create_files( m , n, ALPHABET_SIZE ); + + load_files(); + +// print_pattern(); + + run_tests(); + + MPI_Finalize(); + + return 0; +} diff --git a/code.h b/code.h new file mode 100644 index 0000000..aa9bf52 --- /dev/null +++ b/code.h @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include + +extern void load_files(); + +extern void print_pattern(); + +extern void create_files(int pattern_size, int text_size, int alphabet); + +extern int power(int number, int power); + +extern void naive(); + +extern void karp(); + +extern void zhu( int version ); + +extern void bird(); + +extern void baeza(); + +extern void baeza2(); + +extern int search_kmp(); + +extern int search_bm(); + +extern int aho( int trow, int tcolumn, int prow, int tlength ); + +#define Q 16647133 /* A big prime number for the Zhu algorithm*/ + +#define m 4/*pattern size*/ +#define n 10000/*text size*/ + +/*Don't forget to change MAXCHAR on aho.c */ + +#define ALPHABET_SIZE 2 + +int pattern[m][m]; + +int text[n][n]; + +double pattern2[m]; + +double text2[n*n]; + diff --git a/helper.c b/helper.c new file mode 100644 index 0000000..f401918 --- /dev/null +++ b/helper.c @@ -0,0 +1,115 @@ +/*A list of helper functions*/ + +#include "code.h" +#include + +/*Prints text and pattern*/ +void print_pattern() +{ + int i,j; + + /*Prints the pattern*/ + for(j = 0; j < m; j++) { + + for(i = 0; i < m; i++) + printf("%i",pattern[j][i]); + + printf("\n"); + } + + /*Prints the text*/ +// for(j = 0; j < n; j++) { +// printf("\n"); +// +// for(i = 0; i < n; i++) +// printf("%c",text[j][i]); +// } +} + + +/*Returns the square of a number*/ +int power(int number, int power) +{ + int i; + + int returned_number = number; + + for(i=1;i -1 && pattern2[i] != pattern2[j]) { + //preprocessing++; + j = kmpNext[j]; + } + //preprocessing++; + i++; + j++; + + if (pattern2[i] == pattern2[j]) + kmpNext[i] = kmpNext[j]; + else + kmpNext[i] = j; + } + +// printf("kmp: %i preprocessing\n", preprocessing); +} + + +int search_kmp() +{ + prep_kmp(); + + unsigned int i, j, matches = 0; + + int steps = 0; + + i = j = 0; + while (i < n * n) { + while (j > -1 && pattern2[j] != text2[i]) { + steps++; + j = kmpNext[j]; + + } + i++; j++; steps++; + + if (j == m) { + //printf("An exact occurrence at %d\n", i - j); + matches++; + j = kmpNext[j]; + } + } +// printf("kmp: %i comparisons %i matches\n", steps, matches); + printf("zhu1: %i\n",steps); + return(matches); +} + diff --git a/naive.c b/naive.c new file mode 100644 index 0000000..8d76411 --- /dev/null +++ b/naive.c @@ -0,0 +1,41 @@ +#include "code.h" + +void naive() +{ +int steps = 0; + unsigned int i, j, k, l; + unsigned int matches = 0; + unsigned int x, y; + + /* Text size 7, pattern size 3, so the last element would be 7 - 3 + 1 = 5 */ + for (y = 0; y < n - m + 1; y++) { + + for (x = 0; x < n - m + 1; x++) { + + k = y; /* reset text's vertical k (not to 0 but to the number of vertical loops) */ + + for (l = 0; l < m; l++) { + + i = x; /* reset text's horizontal i (not to 0 but to the number of horizontal loops) */ + + for (j = 0; j < m; j++) { + + steps++; + + if ( pattern[l][j] != text[k][i] ) + goto newline; + + i++; /*increase horizontal drift*/ + } + + k++; /*increase vertical drift*/ + } + + /* If we reached here we should have a match */ + matches++; + + newline: ; + } + } + printf("Naive: %i\n",steps); +} diff --git a/zhu.c b/zhu.c new file mode 100644 index 0000000..421230b --- /dev/null +++ b/zhu.c @@ -0,0 +1,34 @@ +#include "code.h" + +void zhu( int version ) +{ + unsigned int i = 0, j = 0, k = 0; + + /* Huge optimization instead of unsigned int B */ + #define B 128 + + #define limit n * n - (n * ( m - 1 )) + + /* Reduce text to 1d and calculate hash */ + for( k = 0; k < limit; k = n + k ) + for( j = 0 + k; j < n + k; j++ ) { + text2[j] = 0; + + for( i = 0; i < m; i++ ) + text2[j] = text2[j] * B + text[i][j] % Q; + } + + /* Reduce pattern to 1d and calculate hash */ + for( j = 0; j < m; j++ ) { + pattern2[j] = 0; + + for( i = 0; i < m; i++ ) + pattern2[j] = ( pattern2[j] * B ) + pattern[i][j] % Q; + } + + if ( version == 1 ) + search_kmp(); + + else + search_bm(); +} -- 2.11.4.GIT