optical cleanup
[TortoiseGit.git] / ext / hunspell / mythes.cxx
blob081d778c95bd3ae304da7e58e46e6e343e26ca5a
1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <errno.h>
6 #include "mythes.hxx"
10 MyThes::MyThes(const char* idxpath, const char * datpath)
12 nw = 0;
13 encoding = NULL;
14 list = NULL;
15 offst = NULL;
17 if (thInitialize(idxpath, datpath) != 1) {
18 fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
19 fflush(stderr);
20 if (encoding) free((void*)encoding);
21 if (list) free((void*)list);
22 if (offst) free((void*)offst);
23 // did not initialize properly - throw exception?
28 MyThes::~MyThes()
30 if (thCleanup() != 1) {
31 /* did not cleanup properly - throw exception? */
33 if (encoding) free((void*)encoding);
34 encoding = NULL;
35 list = NULL;
36 offst = NULL;
40 int MyThes::thInitialize(const char* idxpath, const char* datpath)
43 // open the index file
44 FILE * pifile = fopen(idxpath,"r");
45 if (!pifile) {
46 pifile = NULL;
47 return 0;
50 // parse in encoding and index size */
51 char * wrd;
52 wrd = (char *)calloc(1, MAX_WD_LEN);
53 int len = readLine(pifile,wrd,MAX_WD_LEN);
54 encoding = mystrdup(wrd);
55 len = readLine(pifile,wrd,MAX_WD_LEN);
56 int idxsz = atoi(wrd);
59 // now allocate list, offst for the given size
60 list = (char**) calloc(idxsz,sizeof(char*));
61 offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
63 if ( (!(list)) || (!(offst)) ) {
64 fprintf(stderr,"Error - bad memory allocation\n");
65 fflush(stderr);
66 return 0;
69 // now parse the remaining lines of the index
70 len = readLine(pifile,wrd,MAX_WD_LEN);
71 while (len > 0)
73 int np = mystr_indexOfChar(wrd,'|');
74 if (nw < idxsz) {
75 if (np >= 0) {
76 *(wrd+np) = '\0';
77 list[nw] = (char *)calloc(1,(np+1));
78 memcpy((list[nw]),wrd,np);
79 offst[nw] = atoi(wrd+np+1);
80 nw++;
83 len = readLine(pifile,wrd,MAX_WD_LEN);
86 free((void *)wrd);
87 fclose(pifile);
88 pifile=NULL;
90 /* next open the data file */
91 pdfile = fopen(datpath,"r");
92 if (!pdfile) {
93 pdfile = NULL;
94 return 0;
97 return 1;
101 int MyThes::thCleanup()
103 /* first close the data file */
104 if (pdfile) {
105 fclose(pdfile);
106 pdfile=NULL;
109 /* now free up all the allocated strings on the list */
110 for (int i=0; i < nw; i++)
112 if (list[i]) {
113 free(list[i]);
114 list[i] = 0;
118 if (list) free((void*)list);
119 if (offst) free((void*)offst);
121 nw = 0;
122 return 1;
127 // lookup text in index and count of meanings and a list of meaning entries
128 // with each entry having a synonym count and pointer to an
129 // array of char * (i.e the synonyms)
131 // note: calling routine should call CleanUpAfterLookup with the original
132 // meaning point and count to properly deallocate memory
134 int MyThes::Lookup(const char * pText, int len, mentry** pme)
137 *pme = NULL;
139 // handle the case of missing file or file related errors
140 if (! pdfile) return 0;
142 long offset = 0;
144 /* copy search word and make sure null terminated */
145 char * wrd = (char *) calloc(1,(len+1));
146 memcpy(wrd,pText,len);
148 /* find it in the list */
149 int idx = binsearch(wrd,list,nw);
150 free(wrd);
151 if (idx < 0) return 0;
153 // now seek to the offset
154 offset = (long) offst[idx];
155 int rc = fseek(pdfile,offset,SEEK_SET);
156 if (rc) {
157 return 0;
160 // grab the count of the number of meanings
161 // and allocate a list of meaning entries
162 char * buf = NULL;
163 buf = (char *) malloc( MAX_LN_LEN );
164 if (!buf) return 0;
165 readLine(pdfile, buf, (MAX_LN_LEN-1));
166 int np = mystr_indexOfChar(buf,'|');
167 if (np < 0) {
168 free(buf);
169 return 0;
171 int nmeanings = atoi(buf+np+1);
172 *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
173 if (!(*pme)) {
174 free(buf);
175 return 0;
178 // now read in each meaning and parse it to get defn, count and synonym lists
179 mentry* pm = *(pme);
180 char dfn[MAX_WD_LEN];
182 for (int j = 0; j < nmeanings; j++) {
183 readLine(pdfile, buf, (MAX_LN_LEN-1));
185 pm->count = 0;
186 pm->psyns = NULL;
187 pm->defn = NULL;
189 // store away the part of speech for later use
190 char * p = buf;
191 char * pos = NULL;
192 np = mystr_indexOfChar(p,'|');
193 if (np >= 0) {
194 *(buf+np) = '\0';
195 pos = mystrdup(p);
196 p = p + np + 1;
197 } else {
198 pos = mystrdup("");
201 // count the number of fields in the remaining line
202 int nf = 1;
203 char * d = p;
204 np = mystr_indexOfChar(d,'|');
205 while ( np >= 0 ) {
206 nf++;
207 d = d + np + 1;
208 np = mystr_indexOfChar(d,'|');
210 pm->count = nf;
211 pm->psyns = (char **) malloc(nf*sizeof(char*));
213 // fill in the synonym list
214 d = p;
215 for (int j = 0; j < nf; j++) {
216 np = mystr_indexOfChar(d,'|');
217 if (np > 0) {
218 *(d+np) = '\0';
219 pm->psyns[j] = mystrdup(d);
220 d = d + np + 1;
221 } else {
222 pm->psyns[j] = mystrdup(d);
226 // add pos to first synonym to create the definition
227 int k = strlen(pos);
228 int m = strlen(pm->psyns[0]);
229 if ((k+m) < (MAX_WD_LEN - 1)) {
230 strncpy(dfn,pos,k);
231 *(dfn+k) = ' ';
232 strncpy((dfn+k+1),(pm->psyns[0]),m+1);
233 pm->defn = mystrdup(dfn);
234 } else {
235 pm->defn = mystrdup(pm->psyns[0]);
237 free(pos);
238 pm++;
241 free(buf);
243 return nmeanings;
248 void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
251 if (nmeanings == 0) return;
252 if ((*pme) == NULL) return;
254 mentry * pm = *pme;
256 for (int i = 0; i < nmeanings; i++) {
257 int count = pm->count;
258 for (int j = 0; j < count; j++) {
259 if (pm->psyns[j]) free(pm->psyns[j]);
260 pm->psyns[j] = NULL;
262 if (pm->psyns) free(pm->psyns);
263 pm->psyns = NULL;
264 if (pm->defn) free(pm->defn);
265 pm->defn = NULL;
266 pm->count = 0;
267 pm++;
269 pm = *pme;
270 free(pm);
271 *pme = NULL;
272 return;
276 // read a line of text from a text file stripping
277 // off the line terminator and replacing it with
278 // a null string terminator.
279 // returns: -1 on error or the number of characters in
280 // in the returning string
282 // A maximum of nc characters will be returned
284 int MyThes::readLine(FILE * pf, char * buf, int nc)
287 if (fgets(buf,nc,pf)) {
288 mychomp(buf);
289 return strlen(buf);
291 return -1;
296 // performs a binary search on null terminated character
297 // strings
299 // returns: -1 on not found
300 // index of wrd in the list[]
302 int MyThes::binsearch(char * sw, char* list[], int nlst)
304 int lp, up, mp, j, indx;
305 lp = 0;
306 up = nlst-1;
307 indx = -1;
308 if (strcmp(sw,list[lp]) < 0) return -1;
309 if (strcmp(sw,list[up]) > 0) return -1;
310 while (indx < 0 ) {
311 mp = (int)((lp+up) >> 1);
312 j = strcmp(sw,list[mp]);
313 if ( j > 0) {
314 lp = mp + 1;
315 } else if (j < 0 ) {
316 up = mp - 1;
317 } else {
318 indx = mp;
320 if (lp > up) return -1;
322 return indx;
325 char * MyThes::get_th_encoding()
327 if (encoding) return encoding;
328 return NULL;
332 // string duplication routine
333 char * MyThes::mystrdup(const char * p)
335 int sl = strlen(p) + 1;
336 char * d = (char *)malloc(sl);
337 if (d) {
338 memcpy(d,p,sl);
339 return d;
341 return NULL;
344 // remove cross-platform text line end characters
345 void MyThes::mychomp(char * s)
347 int k = strlen(s);
348 if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
349 if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
353 // return index of char in string
354 int MyThes::mystr_indexOfChar(const char * d, int c)
356 char * p = strchr((char *)d,c);
357 if (p) return (int)(p-d);
358 return -1;