10 MyThes::MyThes(const char* idxpath
, const char * datpath
)
17 if (thInitialize(idxpath
, datpath
) != 1) {
18 fprintf(stderr
,"Error - can't open %s or %s\n",idxpath
, datpath
);
20 if (encoding
) free((void*)encoding
);
21 if (list
) free((void*)list
);
22 if (offst
) free((void*)offst
);
23 // did not initialize properly - throw exception?
30 if (thCleanup() != 1) {
31 /* did not cleanup properly - throw exception? */
33 if (encoding
) free((void*)encoding
);
40 int MyThes::thInitialize(const char* idxpath
, const char* datpath
)
43 // open the index file
44 FILE * pifile
= fopen(idxpath
,"r");
49 char * wrd
= (char *)calloc(1, MAX_WD_LEN
);
51 // parse in encoding and index size */
52 int len
= readLine(pifile
,wrd
,MAX_WD_LEN
);
53 encoding
= mystrdup(wrd
);
54 len
= readLine(pifile
,wrd
,MAX_WD_LEN
);
55 int idxsz
= atoi(wrd
);
58 // now allocate list, offst for the given size
59 list
= (char**)calloc(idxsz
,sizeof(char*));
60 offst
= (unsigned int*) calloc(idxsz
,sizeof(unsigned int));
62 if ( (!(list
)) || (!(offst
)) ) {
63 fprintf(stderr
,"Error - bad memory allocation\n");
70 // now parse the remaining lines of the index
71 len
= readLine(pifile
,wrd
,MAX_WD_LEN
);
74 int np
= mystr_indexOfChar(wrd
,'|');
78 list
[nw
] = (char *)calloc(1,(np
+1));
79 memcpy((list
[nw
]),wrd
,np
);
80 offst
[nw
] = atoi(wrd
+np
+1);
84 len
= readLine(pifile
,wrd
,MAX_WD_LEN
);
91 /* next open the data file */
92 pdfile
= fopen(datpath
,"r");
93 return pdfile
? 1 : 0;
97 int MyThes::thCleanup()
99 /* first close the data file */
105 /* now free up all the allocated strings on the list */
106 for (int i
=0; i
< nw
; i
++)
114 if (list
) free((void*)list
);
115 if (offst
) free((void*)offst
);
123 // lookup text in index and count of meanings and a list of meaning entries
124 // with each entry having a synonym count and pointer to an
125 // array of char * (i.e the synonyms)
127 // note: calling routine should call CleanUpAfterLookup with the original
128 // meaning point and count to properly deallocate memory
130 int MyThes::Lookup(const char * pText
, int len
, mentry
** pme
)
135 // handle the case of missing file or file related errors
136 if (! pdfile
) return 0;
140 /* copy search word and make sure null terminated */
141 char * wrd
= (char *) calloc(1,(len
+1));
142 memcpy(wrd
,pText
,len
);
144 /* find it in the list */
145 int idx
= binsearch(wrd
,list
,nw
);
147 if (idx
< 0) return 0;
149 // now seek to the offset
150 offset
= (long) offst
[idx
];
151 int rc
= fseek(pdfile
,offset
,SEEK_SET
);
156 // grab the count of the number of meanings
157 // and allocate a list of meaning entries
159 buf
= (char *) malloc( MAX_LN_LEN
);
161 readLine(pdfile
, buf
, (MAX_LN_LEN
-1));
162 int np
= mystr_indexOfChar(buf
,'|');
167 int nmeanings
= atoi(buf
+np
+1);
168 *pme
= (mentry
*) malloc( nmeanings
* sizeof(mentry
) );
174 // now read in each meaning and parse it to get defn, count and synonym lists
176 char dfn
[MAX_WD_LEN
];
178 for (int j
= 0; j
< nmeanings
; j
++) {
179 readLine(pdfile
, buf
, (MAX_LN_LEN
-1));
185 // store away the part of speech for later use
188 np
= mystr_indexOfChar(p
,'|');
197 // count the number of fields in the remaining line
200 np
= mystr_indexOfChar(d
,'|');
204 np
= mystr_indexOfChar(d
,'|');
207 pm
->psyns
= (char **) malloc(nf
*sizeof(char*));
209 // fill in the synonym list
211 for (int j
= 0; j
< nf
; j
++) {
212 np
= mystr_indexOfChar(d
,'|');
215 pm
->psyns
[j
] = mystrdup(d
);
218 pm
->psyns
[j
] = mystrdup(d
);
222 // add pos to first synonym to create the definition
224 int m
= strlen(pm
->psyns
[0]);
225 if ((k
+m
) < (MAX_WD_LEN
- 1)) {
228 strncpy((dfn
+k
+1),(pm
->psyns
[0]),m
+1);
229 pm
->defn
= mystrdup(dfn
);
231 pm
->defn
= mystrdup(pm
->psyns
[0]);
244 void MyThes::CleanUpAfterLookup(mentry
** pme
, int nmeanings
)
247 if (nmeanings
== 0) return;
248 if ((*pme
) == NULL
) return;
252 for (int i
= 0; i
< nmeanings
; i
++) {
253 int count
= pm
->count
;
254 for (int j
= 0; j
< count
; j
++) {
255 if (pm
->psyns
[j
]) free(pm
->psyns
[j
]);
258 if (pm
->psyns
) free(pm
->psyns
);
260 if (pm
->defn
) free(pm
->defn
);
272 // read a line of text from a text file stripping
273 // off the line terminator and replacing it with
274 // a null string terminator.
275 // returns: -1 on error or the number of characters in
276 // in the returning string
278 // A maximum of nc characters will be returned
280 int MyThes::readLine(FILE * pf
, char * buf
, int nc
)
283 if (fgets(buf
,nc
,pf
)) {
292 // performs a binary search on null terminated character
295 // returns: -1 on not found
296 // index of wrd in the list[]
298 int MyThes::binsearch(char * sw
, char* list
[], int nlst
)
300 int lp
, up
, mp
, j
, indx
;
304 if (strcmp(sw
,list
[lp
]) < 0) return -1;
305 if (strcmp(sw
,list
[up
]) > 0) return -1;
307 mp
= (int)((lp
+up
) >> 1);
308 j
= strcmp(sw
,list
[mp
]);
316 if (lp
> up
) return -1;
321 char * MyThes::get_th_encoding()
323 if (encoding
) return encoding
;
328 // string duplication routine
329 char * MyThes::mystrdup(const char * p
)
331 int sl
= strlen(p
) + 1;
332 char * d
= (char *)malloc(sl
);
340 // remove cross-platform text line end characters
341 void MyThes::mychomp(char * s
)
344 if ((k
> 0) && ((*(s
+k
-1)=='\r') || (*(s
+k
-1)=='\n'))) *(s
+k
-1) = '\0';
345 if ((k
> 1) && (*(s
+k
-2) == '\r')) *(s
+k
-2) = '\0';
349 // return index of char in string
350 int MyThes::mystr_indexOfChar(const char * d
, int c
)
352 char * p
= strchr((char *)d
,c
);
353 if (p
) return (int)(p
-d
);