Merge branch 'git-describe'
[TortoiseGit.git] / ext / hunspell / mythes.cxx
blobbd6a5403ee0e612f859439415ad9ee49e0aaec50
1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <errno.h>
6 #include "mythes.hxx"
10 MyThes::MyThes(const char* idxpath, const char * datpath)
12 nw = 0;
13 encoding = NULL;
14 list = NULL;
15 offst = NULL;
17 if (thInitialize(idxpath, datpath) != 1) {
18 fprintf(stderr,"Error - can't open %s or %s\n",idxpath, datpath);
19 fflush(stderr);
20 if (encoding) free((void*)encoding);
21 if (list) free((void*)list);
22 if (offst) free((void*)offst);
23 // did not initialize properly - throw exception?
28 MyThes::~MyThes()
30 if (thCleanup() != 1) {
31 /* did not cleanup properly - throw exception? */
33 if (encoding) free((void*)encoding);
34 encoding = NULL;
35 list = NULL;
36 offst = NULL;
40 int MyThes::thInitialize(const char* idxpath, const char* datpath)
43 // open the index file
44 FILE * pifile = fopen(idxpath,"r");
45 if (!pifile) {
46 return 0;
49 char * wrd = (char *)calloc(1, MAX_WD_LEN);
51 // parse in encoding and index size */
52 int len = readLine(pifile,wrd,MAX_WD_LEN);
53 encoding = mystrdup(wrd);
54 len = readLine(pifile,wrd,MAX_WD_LEN);
55 int idxsz = atoi(wrd);
58 // now allocate list, offst for the given size
59 list = (char**)calloc(idxsz,sizeof(char*));
60 offst = (unsigned int*) calloc(idxsz,sizeof(unsigned int));
62 if ( (!(list)) || (!(offst)) ) {
63 fprintf(stderr,"Error - bad memory allocation\n");
64 fflush(stderr);
65 free((void *)wrd);
66 fclose(pifile);
67 return 0;
70 // now parse the remaining lines of the index
71 len = readLine(pifile,wrd,MAX_WD_LEN);
72 while (len > 0)
74 int np = mystr_indexOfChar(wrd,'|');
75 if (nw < idxsz) {
76 if (np >= 0) {
77 *(wrd+np) = '\0';
78 list[nw] = (char *)calloc(1,(np+1));
79 memcpy((list[nw]),wrd,np);
80 offst[nw] = atoi(wrd+np+1);
81 nw++;
84 len = readLine(pifile,wrd,MAX_WD_LEN);
87 free((void *)wrd);
88 fclose(pifile);
89 pifile=NULL;
91 /* next open the data file */
92 pdfile = fopen(datpath,"r");
93 return pdfile ? 1 : 0;
97 int MyThes::thCleanup()
99 /* first close the data file */
100 if (pdfile) {
101 fclose(pdfile);
102 pdfile=NULL;
105 /* now free up all the allocated strings on the list */
106 for (int i=0; i < nw; i++)
108 if (list[i]) {
109 free(list[i]);
110 list[i] = 0;
114 if (list) free((void*)list);
115 if (offst) free((void*)offst);
117 nw = 0;
118 return 1;
123 // lookup text in index and count of meanings and a list of meaning entries
124 // with each entry having a synonym count and pointer to an
125 // array of char * (i.e the synonyms)
127 // note: calling routine should call CleanUpAfterLookup with the original
128 // meaning point and count to properly deallocate memory
130 int MyThes::Lookup(const char * pText, int len, mentry** pme)
133 *pme = NULL;
135 // handle the case of missing file or file related errors
136 if (! pdfile) return 0;
138 long offset = 0;
140 /* copy search word and make sure null terminated */
141 char * wrd = (char *) calloc(1,(len+1));
142 memcpy(wrd,pText,len);
144 /* find it in the list */
145 int idx = binsearch(wrd,list,nw);
146 free(wrd);
147 if (idx < 0) return 0;
149 // now seek to the offset
150 offset = (long) offst[idx];
151 int rc = fseek(pdfile,offset,SEEK_SET);
152 if (rc) {
153 return 0;
156 // grab the count of the number of meanings
157 // and allocate a list of meaning entries
158 char * buf = NULL;
159 buf = (char *) malloc( MAX_LN_LEN );
160 if (!buf) return 0;
161 readLine(pdfile, buf, (MAX_LN_LEN-1));
162 int np = mystr_indexOfChar(buf,'|');
163 if (np < 0) {
164 free(buf);
165 return 0;
167 int nmeanings = atoi(buf+np+1);
168 *pme = (mentry*) malloc( nmeanings * sizeof(mentry) );
169 if (!(*pme)) {
170 free(buf);
171 return 0;
174 // now read in each meaning and parse it to get defn, count and synonym lists
175 mentry* pm = *(pme);
176 char dfn[MAX_WD_LEN];
178 for (int j = 0; j < nmeanings; j++) {
179 readLine(pdfile, buf, (MAX_LN_LEN-1));
181 pm->count = 0;
182 pm->psyns = NULL;
183 pm->defn = NULL;
185 // store away the part of speech for later use
186 char * p = buf;
187 char * pos = NULL;
188 np = mystr_indexOfChar(p,'|');
189 if (np >= 0) {
190 *(buf+np) = '\0';
191 pos = mystrdup(p);
192 p = p + np + 1;
193 } else {
194 pos = mystrdup("");
197 // count the number of fields in the remaining line
198 int nf = 1;
199 char * d = p;
200 np = mystr_indexOfChar(d,'|');
201 while ( np >= 0 ) {
202 nf++;
203 d = d + np + 1;
204 np = mystr_indexOfChar(d,'|');
206 pm->count = nf;
207 pm->psyns = (char **) malloc(nf*sizeof(char*));
209 // fill in the synonym list
210 d = p;
211 for (int j = 0; j < nf; j++) {
212 np = mystr_indexOfChar(d,'|');
213 if (np > 0) {
214 *(d+np) = '\0';
215 pm->psyns[j] = mystrdup(d);
216 d = d + np + 1;
217 } else {
218 pm->psyns[j] = mystrdup(d);
222 // add pos to first synonym to create the definition
223 int k = strlen(pos);
224 int m = strlen(pm->psyns[0]);
225 if ((k+m) < (MAX_WD_LEN - 1)) {
226 strncpy(dfn,pos,k);
227 *(dfn+k) = ' ';
228 strncpy((dfn+k+1),(pm->psyns[0]),m+1);
229 pm->defn = mystrdup(dfn);
230 } else {
231 pm->defn = mystrdup(pm->psyns[0]);
233 free(pos);
234 pm++;
237 free(buf);
239 return nmeanings;
244 void MyThes::CleanUpAfterLookup(mentry ** pme, int nmeanings)
247 if (nmeanings == 0) return;
248 if ((*pme) == NULL) return;
250 mentry * pm = *pme;
252 for (int i = 0; i < nmeanings; i++) {
253 int count = pm->count;
254 for (int j = 0; j < count; j++) {
255 if (pm->psyns[j]) free(pm->psyns[j]);
256 pm->psyns[j] = NULL;
258 if (pm->psyns) free(pm->psyns);
259 pm->psyns = NULL;
260 if (pm->defn) free(pm->defn);
261 pm->defn = NULL;
262 pm->count = 0;
263 pm++;
265 pm = *pme;
266 free(pm);
267 *pme = NULL;
268 return;
272 // read a line of text from a text file stripping
273 // off the line terminator and replacing it with
274 // a null string terminator.
275 // returns: -1 on error or the number of characters in
276 // in the returning string
278 // A maximum of nc characters will be returned
280 int MyThes::readLine(FILE * pf, char * buf, int nc)
283 if (fgets(buf,nc,pf)) {
284 mychomp(buf);
285 return strlen(buf);
287 return -1;
292 // performs a binary search on null terminated character
293 // strings
295 // returns: -1 on not found
296 // index of wrd in the list[]
298 int MyThes::binsearch(char * sw, char* list[], int nlst)
300 int lp, up, mp, j, indx;
301 lp = 0;
302 up = nlst-1;
303 indx = -1;
304 if (strcmp(sw,list[lp]) < 0) return -1;
305 if (strcmp(sw,list[up]) > 0) return -1;
306 while (indx < 0 ) {
307 mp = (int)((lp+up) >> 1);
308 j = strcmp(sw,list[mp]);
309 if ( j > 0) {
310 lp = mp + 1;
311 } else if (j < 0 ) {
312 up = mp - 1;
313 } else {
314 indx = mp;
316 if (lp > up) return -1;
318 return indx;
321 char * MyThes::get_th_encoding()
323 if (encoding) return encoding;
324 return NULL;
328 // string duplication routine
329 char * MyThes::mystrdup(const char * p)
331 int sl = strlen(p) + 1;
332 char * d = (char *)malloc(sl);
333 if (d) {
334 memcpy(d,p,sl);
335 return d;
337 return NULL;
340 // remove cross-platform text line end characters
341 void MyThes::mychomp(char * s)
343 int k = strlen(s);
344 if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
345 if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
349 // return index of char in string
350 int MyThes::mystr_indexOfChar(const char * d, int c)
352 char * p = strchr((char *)d,c);
353 if (p) return (int)(p-d);
354 return -1;