10 MyThes::MyThes(const char* idxpath
, const char * datpath
)
17 if (thInitialize(idxpath
, datpath
) != 1) {
18 fprintf(stderr
,"Error - can't open %s or %s\n",idxpath
, datpath
);
20 if (encoding
) free((void*)encoding
);
21 if (list
) free((void*)list
);
22 if (offst
) free((void*)offst
);
23 // did not initialize properly - throw exception?
30 if (thCleanup() != 1) {
31 /* did not cleanup properly - throw exception? */
33 if (encoding
) free((void*)encoding
);
40 int MyThes::thInitialize(const char* idxpath
, const char* datpath
)
43 // open the index file
44 FILE * pifile
= fopen(idxpath
,"r");
50 // parse in encoding and index size */
52 wrd
= (char *)calloc(1, MAX_WD_LEN
);
53 int len
= readLine(pifile
,wrd
,MAX_WD_LEN
);
54 encoding
= mystrdup(wrd
);
55 len
= readLine(pifile
,wrd
,MAX_WD_LEN
);
56 int idxsz
= atoi(wrd
);
59 // now allocate list, offst for the given size
60 list
= (char**) calloc(idxsz
,sizeof(char*));
61 offst
= (unsigned int*) calloc(idxsz
,sizeof(unsigned int));
63 if ( (!(list
)) || (!(offst
)) ) {
64 fprintf(stderr
,"Error - bad memory allocation\n");
69 // now parse the remaining lines of the index
70 len
= readLine(pifile
,wrd
,MAX_WD_LEN
);
73 int np
= mystr_indexOfChar(wrd
,'|');
77 list
[nw
] = (char *)calloc(1,(np
+1));
78 memcpy((list
[nw
]),wrd
,np
);
79 offst
[nw
] = atoi(wrd
+np
+1);
83 len
= readLine(pifile
,wrd
,MAX_WD_LEN
);
90 /* next open the data file */
91 pdfile
= fopen(datpath
,"r");
101 int MyThes::thCleanup()
103 /* first close the data file */
109 /* now free up all the allocated strings on the list */
110 for (int i
=0; i
< nw
; i
++)
118 if (list
) free((void*)list
);
119 if (offst
) free((void*)offst
);
127 // lookup text in index and count of meanings and a list of meaning entries
128 // with each entry having a synonym count and pointer to an
129 // array of char * (i.e the synonyms)
131 // note: calling routine should call CleanUpAfterLookup with the original
132 // meaning point and count to properly deallocate memory
134 int MyThes::Lookup(const char * pText
, int len
, mentry
** pme
)
139 // handle the case of missing file or file related errors
140 if (! pdfile
) return 0;
144 /* copy search word and make sure null terminated */
145 char * wrd
= (char *) calloc(1,(len
+1));
146 memcpy(wrd
,pText
,len
);
148 /* find it in the list */
149 int idx
= binsearch(wrd
,list
,nw
);
151 if (idx
< 0) return 0;
153 // now seek to the offset
154 offset
= (long) offst
[idx
];
155 int rc
= fseek(pdfile
,offset
,SEEK_SET
);
160 // grab the count of the number of meanings
161 // and allocate a list of meaning entries
163 buf
= (char *) malloc( MAX_LN_LEN
);
165 readLine(pdfile
, buf
, (MAX_LN_LEN
-1));
166 int np
= mystr_indexOfChar(buf
,'|');
171 int nmeanings
= atoi(buf
+np
+1);
172 *pme
= (mentry
*) malloc( nmeanings
* sizeof(mentry
) );
178 // now read in each meaning and parse it to get defn, count and synonym lists
180 char dfn
[MAX_WD_LEN
];
182 for (int j
= 0; j
< nmeanings
; j
++) {
183 readLine(pdfile
, buf
, (MAX_LN_LEN
-1));
189 // store away the part of speech for later use
192 np
= mystr_indexOfChar(p
,'|');
201 // count the number of fields in the remaining line
204 np
= mystr_indexOfChar(d
,'|');
208 np
= mystr_indexOfChar(d
,'|');
211 pm
->psyns
= (char **) malloc(nf
*sizeof(char*));
213 // fill in the synonym list
215 for (int j
= 0; j
< nf
; j
++) {
216 np
= mystr_indexOfChar(d
,'|');
219 pm
->psyns
[j
] = mystrdup(d
);
222 pm
->psyns
[j
] = mystrdup(d
);
226 // add pos to first synonym to create the definition
228 int m
= strlen(pm
->psyns
[0]);
229 if ((k
+m
) < (MAX_WD_LEN
- 1)) {
232 strncpy((dfn
+k
+1),(pm
->psyns
[0]),m
+1);
233 pm
->defn
= mystrdup(dfn
);
235 pm
->defn
= mystrdup(pm
->psyns
[0]);
248 void MyThes::CleanUpAfterLookup(mentry
** pme
, int nmeanings
)
251 if (nmeanings
== 0) return;
252 if ((*pme
) == NULL
) return;
256 for (int i
= 0; i
< nmeanings
; i
++) {
257 int count
= pm
->count
;
258 for (int j
= 0; j
< count
; j
++) {
259 if (pm
->psyns
[j
]) free(pm
->psyns
[j
]);
262 if (pm
->psyns
) free(pm
->psyns
);
264 if (pm
->defn
) free(pm
->defn
);
276 // read a line of text from a text file stripping
277 // off the line terminator and replacing it with
278 // a null string terminator.
279 // returns: -1 on error or the number of characters in
280 // in the returning string
282 // A maximum of nc characters will be returned
284 int MyThes::readLine(FILE * pf
, char * buf
, int nc
)
287 if (fgets(buf
,nc
,pf
)) {
296 // performs a binary search on null terminated character
299 // returns: -1 on not found
300 // index of wrd in the list[]
302 int MyThes::binsearch(char * sw
, char* list
[], int nlst
)
304 int lp
, up
, mp
, j
, indx
;
308 if (strcmp(sw
,list
[lp
]) < 0) return -1;
309 if (strcmp(sw
,list
[up
]) > 0) return -1;
311 mp
= (int)((lp
+up
) >> 1);
312 j
= strcmp(sw
,list
[mp
]);
320 if (lp
> up
) return -1;
325 char * MyThes::get_th_encoding()
327 if (encoding
) return encoding
;
332 // string duplication routine
333 char * MyThes::mystrdup(const char * p
)
335 int sl
= strlen(p
) + 1;
336 char * d
= (char *)malloc(sl
);
344 // remove cross-platform text line end characters
345 void MyThes::mychomp(char * s
)
348 if ((k
> 0) && ((*(s
+k
-1)=='\r') || (*(s
+k
-1)=='\n'))) *(s
+k
-1) = '\0';
349 if ((k
> 1) && (*(s
+k
-2) == '\r')) *(s
+k
-2) = '\0';
353 // return index of char in string
354 int MyThes::mystr_indexOfChar(const char * d
, int c
)
356 char * p
= strchr((char *)d
,c
);
357 if (p
) return (int)(p
-d
);