pulled latest translations from Transifex
[TortoiseGit.git] / ext / hunspell / hunspell.cxx
blob9aab3480e97304865b84f41f50115268411300ca
1 #include "license.hunspell"
2 #include "license.myspell"
4 #ifndef MOZILLA_CLIENT
5 #include <cstdlib>
6 #include <cstring>
7 #include <cstdio>
8 #else
9 #include <stdlib.h>
10 #include <string.h>
11 #include <stdio.h>
12 #endif
14 #include "hunspell.hxx"
15 #include "hunspell.h"
17 #ifndef MOZILLA_CLIENT
18 #ifndef W32
19 using namespace std;
20 #endif
21 #endif
23 Hunspell::Hunspell(const char * affpath, const char * dpath)
25 encoding = NULL;
26 csconv = NULL;
27 utf8 = 0;
28 complexprefixes = 0;
30 /* first set up the hash manager */
31 pHMgr = new HashMgr(dpath, affpath);
33 /* next set up the affix manager */
34 /* it needs access to the hash manager lookup methods */
35 pAMgr = new AffixMgr(affpath,pHMgr);
37 /* get the preferred try string and the dictionary */
38 /* encoding from the Affix Manager for that dictionary */
39 char * try_string = pAMgr->get_try_string();
40 encoding = pAMgr->get_encoding();
41 csconv = get_current_cs(encoding);
42 langnum = pAMgr->get_langnum();
43 utf8 = pAMgr->get_utf8();
44 complexprefixes = pAMgr->get_complexprefixes();
45 wordbreak = pAMgr->get_breaktable();
47 /* and finally set up the suggestion manager */
48 pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
49 if (try_string) free(try_string);
53 Hunspell::~Hunspell()
55 if (pSMgr) delete pSMgr;
56 if (pAMgr) delete pAMgr;
57 if (pHMgr) delete pHMgr;
58 pSMgr = NULL;
59 pAMgr = NULL;
60 pHMgr = NULL;
61 csconv= NULL;
62 if (encoding) free(encoding);
63 encoding = NULL;
67 // make a copy of src at destination while removing all leading
68 // blanks and removing any trailing periods after recording
69 // their presence with the abbreviation flag
70 // also since already going through character by character,
71 // set the capitalization type
72 // return the length of the "cleaned" (and UTF-8 encoded) word
74 int Hunspell::cleanword2(char * dest, const char * src,
75 w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
77 unsigned char * p = (unsigned char *) dest;
78 const unsigned char * q = (const unsigned char * ) src;
79 int firstcap = 0;
81 // first skip over any leading blanks
82 while ((*q != '\0') && (*q == ' ')) q++;
84 // now strip off any trailing periods (recording their presence)
85 *pabbrev = 0;
86 int nl = strlen((const char *)q);
87 while ((nl > 0) && (*(q+nl-1)=='.')) {
88 nl--;
89 (*pabbrev)++;
92 // if no characters are left it can't be capitalized
93 if (nl <= 0) {
94 *pcaptype = NOCAP;
95 *p = '\0';
96 return 0;
99 // now determine the capitalization type of the first nl letters
100 int ncap = 0;
101 int nneutral = 0;
102 *nc = 0;
104 if (!utf8) {
105 while (nl > 0) {
106 (*nc)++;
107 if (csconv[(*q)].ccase) ncap++;
108 if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
109 *p++ = *q++;
110 nl--;
112 // remember to terminate the destination string
113 *p = '\0';
114 if (ncap) {
115 firstcap = csconv[(unsigned char)(*dest)].ccase;
117 } else {
118 unsigned short idx;
119 *nc = u8_u16(dest_utf, MAXWORDLEN, (const char *) q);
120 // don't check too long words
121 if (*nc >= MAXWORDLEN) return 0;
122 if (*nc == -1) { // big Unicode character (non BMP area)
123 *pcaptype = NOCAP;
124 strcpy((char *) p, (char *) q);
125 return strlen(dest);
127 *nc -= *pabbrev;
128 for (int i = 0; i < *nc; i++) {
129 idx = (dest_utf[i].h << 8) + dest_utf[i].l;
130 if (idx != unicodetolower(idx, langnum)) ncap++;
131 if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) nneutral++;
133 u16_u8(dest, MAXWORDUTF8LEN, dest_utf, *nc);
134 if (ncap) {
135 idx = (dest_utf[0].h << 8) + dest_utf[0].l;
136 firstcap = (idx != unicodetolower(idx, langnum));
140 // now finally set the captype
141 if (ncap == 0) {
142 *pcaptype = NOCAP;
143 } else if ((ncap == 1) && firstcap) {
144 *pcaptype = INITCAP;
145 } else if ((ncap == *nc) || ((ncap + nneutral) == *nc)) {
146 *pcaptype = ALLCAP;
147 } else if ((ncap > 1) && firstcap) {
148 *pcaptype = HUHINITCAP;
149 } else {
150 *pcaptype = HUHCAP;
152 return strlen(dest);
155 int Hunspell::cleanword(char * dest, const char * src,
156 int * pcaptype, int * pabbrev)
158 unsigned char * p = (unsigned char *) dest;
159 const unsigned char * q = (const unsigned char * ) src;
160 int firstcap = 0;
162 // first skip over any leading blanks
163 while ((*q != '\0') && (*q == ' ')) q++;
165 // now strip off any trailing periods (recording their presence)
166 *pabbrev = 0;
167 int nl = strlen((const char *)q);
168 while ((nl > 0) && (*(q+nl-1)=='.')) {
169 nl--;
170 (*pabbrev)++;
173 // if no characters are left it can't be capitalized
174 if (nl <= 0) {
175 *pcaptype = NOCAP;
176 *p = '\0';
177 return 0;
180 // now determine the capitalization type of the first nl letters
181 int ncap = 0;
182 int nneutral = 0;
183 int nc = 0;
185 if (!utf8) {
186 while (nl > 0) {
187 nc++;
188 if (csconv[(*q)].ccase) ncap++;
189 if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
190 *p++ = *q++;
191 nl--;
193 // remember to terminate the destination string
194 *p = '\0';
195 firstcap = csconv[(unsigned char)(*dest)].ccase;
196 } else {
197 unsigned short idx;
198 w_char t[MAXWORDLEN];
199 nc = u8_u16(t, MAXWORDLEN, src);
200 for (int i = 0; i < nc; i++) {
201 idx = (t[i].h << 8) + t[i].l;
202 if (idx != unicodetolower(idx, langnum)) ncap++;
203 if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) nneutral++;
205 u16_u8(dest, MAXWORDUTF8LEN, t, nc);
206 if (ncap) {
207 idx = (t[0].h << 8) + t[0].l;
208 firstcap = (idx != unicodetolower(idx, langnum));
212 // now finally set the captype
213 if (ncap == 0) {
214 *pcaptype = NOCAP;
215 } else if ((ncap == 1) && firstcap) {
216 *pcaptype = INITCAP;
217 } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
218 *pcaptype = ALLCAP;
219 } else if ((ncap > 1) && firstcap) {
220 *pcaptype = HUHINITCAP;
221 } else {
222 *pcaptype = HUHCAP;
224 return strlen(dest);
228 void Hunspell::mkallcap(char * p)
230 if (utf8) {
231 w_char u[MAXWORDLEN];
232 int nc = u8_u16(u, MAXWORDLEN, p);
233 unsigned short idx;
234 for (int i = 0; i < nc; i++) {
235 idx = (u[i].h << 8) + u[i].l;
236 if (idx != unicodetoupper(idx, langnum)) {
237 u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
238 u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
241 u16_u8(p, MAXWORDUTF8LEN, u, nc);
242 } else {
243 while (*p != '\0') {
244 *p = csconv[((unsigned char) *p)].cupper;
245 p++;
250 int Hunspell::mkallcap2(char * p, w_char * u, int nc)
252 if (utf8) {
253 unsigned short idx;
254 for (int i = 0; i < nc; i++) {
255 idx = (u[i].h << 8) + u[i].l;
256 if (idx != unicodetoupper(idx, langnum)) {
257 u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
258 u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
261 u16_u8(p, MAXWORDUTF8LEN, u, nc);
262 return strlen(p);
263 } else {
264 while (*p != '\0') {
265 *p = csconv[((unsigned char) *p)].cupper;
266 p++;
269 return nc;
273 void Hunspell::mkallsmall(char * p)
275 while (*p != '\0') {
276 *p = csconv[((unsigned char) *p)].clower;
277 p++;
281 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
283 if (utf8) {
284 unsigned short idx;
285 for (int i = 0; i < nc; i++) {
286 idx = (u[i].h << 8) + u[i].l;
287 if (idx != unicodetolower(idx, langnum)) {
288 u[i].h = (unsigned char) (unicodetolower(idx, langnum) >> 8);
289 u[i].l = (unsigned char) (unicodetolower(idx, langnum) & 0x00FF);
292 u16_u8(p, MAXWORDUTF8LEN, u, nc);
293 return strlen(p);
294 } else {
295 while (*p != '\0') {
296 *p = csconv[((unsigned char) *p)].clower;
297 p++;
300 return nc;
303 // convert UTF-8 sharp S codes to latin 1
304 char * Hunspell::sharps_u8_l1(char * dest, char * source) {
305 char * p = dest;
306 *p = *source;
307 for (p++, source++; *(source - 1); p++, source++) {
308 *p = *source;
309 if (*source == '?') *--p = '?';
311 return dest;
314 // recursive search for right ss-?permutations
315 hentry * Hunspell::spellsharps(char * base, char * pos, int n,
316 int repnum, char * tmp, int * info, char **root) {
317 pos = strstr(pos, "ss");
318 if (pos && (n < MAXSHARPS)) {
319 *pos = '?';
320 *(pos + 1) = '?';
321 hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
322 if (h) return h;
323 *pos = 's';
324 *(pos + 1) = 's';
325 h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
326 if (h) return h;
327 } else if (repnum > 0) {
328 if (utf8) return checkword(base, info, root);
329 return checkword(sharps_u8_l1(tmp, base), info, root);
331 return NULL;
334 int Hunspell::is_keepcase(const hentry * rv) {
335 return pAMgr && rv->astr && pAMgr->get_keepcase() &&
336 TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
339 /* check and insert a word to beginning of the suggestion array */
340 int Hunspell::insert_sug(char ***slst, char * word, int *ns) {
341 if (spell(word)) {
342 if (*ns == MAXSUGGESTION) {
343 (*ns)--;
344 free((*slst)[*ns]);
346 for (int k = *ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
347 (*slst)[0] = mystrdup(word);
348 (*ns)++;
350 return 0;
353 int Hunspell::spell(const char * word, int * info, char ** root)
355 struct hentry * rv=NULL;
356 // need larger vector. For example, Turkish capital letter I converted a
357 // 2-byte UTF-8 character (dotless i) by mkallsmall.
358 char cw[MAXWORDUTF8LEN + 4];
359 char wspace[MAXWORDUTF8LEN + 4];
360 w_char unicw[MAXWORDLEN + 1];
361 int nc = strlen(word);
362 int wl2 = 0;
363 if (utf8) {
364 if (nc >= MAXWORDUTF8LEN) return 0;
365 } else {
366 if (nc >= MAXWORDLEN) return 0;
368 int captype = 0;
369 int abbv = 0;
370 int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
372 if (wl == 0) return 1;
374 if (info) *info = 0;
375 if (root) *root = NULL;
377 // allow numbers with dots and commas (but forbid double separators: "..", ",," etc.)
378 enum { NBEGIN, NNUM, NSEP };
379 int nstate = NBEGIN;
380 int i;
382 for (i = 0; (i < wl); i++) {
383 if ((cw[i] <= '9') && (cw[i] >= '0')) {
384 nstate = NNUM;
385 } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
386 if ((nstate == NSEP) || (i == 0)) break;
387 nstate = NSEP;
388 } else break;
390 if ((i == wl) && (nstate == NNUM)) return 1;
392 // LANG_hu section: number(s) + (percent or degree) with suffixes
393 if (langnum == LANG_hu) {
394 if ((nstate == NNUM) && ((cw[i] == '%') || (cw[i] == '?'))
395 && checkword(cw + i, info, root)) return 1;
397 // END of LANG_hu section
399 switch(captype) {
400 case HUHCAP:
401 case HUHINITCAP:
402 case NOCAP: {
403 rv = checkword(cw, info, root);
404 if ((abbv) && !(rv)) {
405 memcpy(wspace,cw,wl);
406 *(wspace+wl) = '.';
407 *(wspace+wl+1) = '\0';
408 rv = checkword(wspace, info, root);
410 break;
412 case ALLCAP: {
413 rv = checkword(cw, info, root);
414 if (rv) break;
415 if (abbv) {
416 memcpy(wspace,cw,wl);
417 *(wspace+wl) = '.';
418 *(wspace+wl+1) = '\0';
419 rv = checkword(wspace, info, root);
420 if (rv) break;
422 if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
423 char tmpword[MAXWORDUTF8LEN];
424 wl = mkallsmall2(cw, unicw, nc);
425 memcpy(wspace,cw,(wl+1));
426 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
427 if (!rv) {
428 wl2 = mkinitcap2(cw, unicw, nc);
429 rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
431 if ((abbv) && !(rv)) {
432 *(wspace+wl) = '.';
433 *(wspace+wl+1) = '\0';
434 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
435 if (!rv) {
436 memcpy(wspace, cw, wl2);
437 *(wspace+wl2) = '.';
438 *(wspace+wl2+1) = '\0';
439 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
442 if (rv) break;
445 case INITCAP: {
446 wl = mkallsmall2(cw, unicw, nc);
447 memcpy(wspace,cw,(wl+1));
448 rv = checkword(wspace, info, root);
449 if (!rv || (is_keepcase(rv) && !((captype == INITCAP) &&
450 // if CHECKSHARPS: KEEPCASE words with ?are allowed
451 // in INITCAP form, too.
452 pAMgr->get_checksharps() && ((utf8 && strstr(wspace, "脽")) ||
453 (!utf8 && strchr(wspace, '?')))))) {
454 wl2 = mkinitcap2(cw, unicw, nc);
455 rv = checkword(cw, info, root);
456 if (rv && (captype == ALLCAP) && is_keepcase(rv)) rv = NULL;
458 if (abbv && !rv) {
459 *(wspace+wl) = '.';
460 *(wspace+wl+1) = '\0';
461 rv = checkword(wspace, info, root);
462 if (!rv || is_keepcase(rv)) {
463 memcpy(wspace, cw, wl2);
464 *(wspace+wl2) = '.';
465 *(wspace+wl2+1) = '\0';
466 rv = checkword(wspace, info, root);
467 if (rv && ((captype == ALLCAP) && is_keepcase(rv))) rv = NULL;
470 break;
474 if (rv) return 1;
476 // recursive breaking at break points (not good for morphological analysis)
477 if (wordbreak) {
478 char * s;
479 char r;
480 for (int j = 0; j < pAMgr->get_numbreak(); j++) {
481 s=(char *) strstr(cw, wordbreak[j]);
482 if (s) {
483 r = *s;
484 *s = '\0';
485 // examine 2 sides of the break point
486 if (spell(cw) && spell(s + strlen(wordbreak[j]))) {
487 *s = r;
488 return 1;
490 *s = r;
495 // LANG_hu: compoundings with dashes and n-dashes XXX deprecated!
496 if (langnum == LANG_hu) {
497 int n;
498 // compound word with dash (HU) I18n
499 char * dash;
500 int result = 0;
501 // n-dash
502 dash = (char *) strstr(cw,"-");
503 if (dash && !wordbreak) {
504 *dash = '\0';
505 // examine 2 sides of the dash
506 if (spell(cw) && spell(dash + 3)) {
507 *dash = '?';
508 return 1;
510 *dash = '?';
512 dash = (char *) strchr(cw,'-');
513 if (dash) {
514 *dash='\0';
515 // examine 2 sides of the dash
516 if (dash[1] == '\0') { // base word ending with dash
517 if (spell(cw)) return 1;
518 } else {
519 // first word ending with dash: word-
520 char r2 = *(dash + 1);
521 dash[0]='-';
522 dash[1]='\0';
523 result = spell(cw);
524 dash[1] = r2;
525 dash[0]='\0';
526 if (result && spell(dash+1) && ((strlen(dash+1) > 1) || (dash[1] == 'e') ||
527 ((dash[1] > '0') && (dash[1] < '9')))) return 1;
529 // affixed number in correct word
530 if (result && (dash > cw) && (((*(dash-1)<='9') && (*(dash-1)>='0')) || (*(dash-1)>='.'))) {
531 *dash='-';
532 n = 1;
533 if (*(dash - n) == '.') n++;
534 // search first not a number character to left from dash
535 while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
536 n++;
538 if ((dash - n) < cw) n--;
539 // numbers: deprecated
540 for(; n >= 1; n--) {
541 if ((*(dash - n) >= '0') && (*(dash - n) <= '9') &&
542 checkword(dash - n, info, root)) return 1;
547 return 0;
550 //int Hunspell::spell(const char * word) {
551 // return spell(word, NULL, NULL);
554 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
556 struct hentry * he = NULL;
557 int len;
558 char w2[MAXWORDUTF8LEN];
559 const char * word;
561 char * ignoredchars = pAMgr->get_ignore();
562 if (ignoredchars != NULL) {
563 strcpy(w2, w);
564 if (utf8) {
565 int ignoredchars_utf16_len;
566 unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
567 remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
568 } else {
569 remove_ignored_chars(w2,ignoredchars);
571 word = w2;
572 free(ignoredchars);
573 } else word = w;
575 // word reversing wrapper for complex prefixes
576 if (complexprefixes) {
577 if (word != w2) {
578 strcpy(w2, word);
579 word = w2;
581 if (utf8) reverseword_utf(w2); else reverseword(w2);
584 // look word in hash table
585 if (pHMgr) he = pHMgr->lookup(word);
587 // check forbidden and onlyincompound words
588 if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
589 info += SPELL_FORBIDDEN;
590 // LANG_hu section: set dash information for suggestions
591 if (langnum == LANG_hu) {
592 if (pAMgr->get_compoundflag() &&
593 TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
594 info += SPELL_COMPOUND;
597 return NULL;
600 // he = next not pseudoroot and not onlyincompound homonym or NULL
601 while (he && (he->astr) &&
602 ((pAMgr->get_pseudoroot() && TESTAFF(he->astr, pAMgr->get_pseudoroot(), he->alen)) ||
603 (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen))
604 )) he = he->next_homonym;
606 // check with affixes
607 if (!he && pAMgr) {
608 // try stripping off affixes */
609 len = strlen(word);
610 he = pAMgr->affix_check(word, len, 0);
612 // check compound restriction
613 if (he && he->astr && pAMgr->get_onlyincompound() &&
614 TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) he = NULL;
616 if (he) {
617 if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
618 info += SPELL_FORBIDDEN;
619 return NULL;
621 if (root) {
622 *root = mystrdup(he->word);
623 if (complexprefixes) {
624 if (utf8) reverseword_utf(*root); else reverseword(*root);
627 // try check compound word
628 } else if (pAMgr->get_compound()) {
629 he = pAMgr->compound_check(word, len,
630 0,0,100,0,NULL,0,NULL,NULL,0);
631 // LANG_hu section: `moving rule' with last dash
632 if ((!he) && (langnum == LANG_hu) && (word[len-1]=='-')) {
633 char * dup = mystrdup(word);
634 dup[len-1] = '\0';
635 he = pAMgr->compound_check(dup, len-1,
636 -5,0,100,0,NULL,1,NULL,NULL,0);
637 free(dup);
639 // end of LANG speficic region
640 if (he) {
641 if (root) {
642 *root = mystrdup(he->word);
643 if (complexprefixes) {
644 if (utf8) reverseword_utf(*root); else reverseword(*root);
647 if (info) *info += SPELL_COMPOUND;
653 return he;
656 int Hunspell::suggest(char*** slst, const char * word)
658 char cw[MAXWORDUTF8LEN + 4];
659 char wspace[MAXWORDUTF8LEN + 4];
660 if (! pSMgr) return 0;
661 w_char unicw[MAXWORDLEN + 1];
662 int nc = strlen(word);
663 if (utf8) {
664 if (nc >= MAXWORDUTF8LEN) return 0;
665 } else {
666 if (nc >= MAXWORDLEN) return 0;
668 int captype = 0;
669 int abbv = 0;
670 int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
671 if (wl == 0) return 0;
672 int ns = 0;
673 *slst = NULL;
674 int capwords = 0;
675 int ngramsugs = 0;
677 switch(captype) {
678 case NOCAP: {
679 ns = pSMgr->suggest(slst, cw, ns);
680 break;
683 case INITCAP: {
684 capwords = 1;
685 ns = pSMgr->suggest(slst, cw, ns);
686 if (ns == -1) break;
687 memcpy(wspace,cw,(wl+1));
688 mkallsmall2(wspace, unicw, nc);
689 ns = pSMgr->suggest(slst, wspace, ns);
690 break;
692 case HUHINITCAP:
693 capwords = 1;
694 case HUHCAP: {
695 ns = pSMgr->suggest(slst, cw, ns);
696 if (ns != -1) {
697 int prevns;
698 if (captype == HUHINITCAP) {
699 // TheOpenOffice.org -> The OpenOffice.org
700 memcpy(wspace,cw,(wl+1));
701 mkinitsmall2(wspace, unicw, nc);
702 ns = pSMgr->suggest(slst, wspace, ns);
704 memcpy(wspace,cw,(wl+1));
705 mkallsmall2(wspace, unicw, nc);
706 insert_sug(slst, wspace, &ns);
707 prevns = ns;
708 ns = pSMgr->suggest(slst, wspace, ns);
709 if (captype == HUHINITCAP) {
710 mkinitcap2(wspace, unicw, nc);
711 insert_sug(slst, wspace, &ns);
712 ns = pSMgr->suggest(slst, wspace, ns);
714 // aNew -> "a New" (instead of "a new")
715 for (int j = prevns; j < ns; j++) {
716 char * space = strchr((*slst)[j],' ');
717 if (space) {
718 int slen = strlen(space + 1);
719 // different case after space (need capitalisation)
720 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
721 w_char w[MAXWORDLEN + 1];
722 int wc = 0;
723 char * r = (*slst)[j];
724 if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
725 mkinitcap2(space + 1, w, wc);
726 // set as first suggestion
727 for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
728 (*slst)[0] = r;
733 break;
736 case ALLCAP: {
737 memcpy(wspace, cw, (wl+1));
738 mkallsmall2(wspace, unicw, nc);
739 ns = pSMgr->suggest(slst, wspace, ns);
740 if (ns == -1) break;
741 if (pAMgr && pAMgr->get_keepcase()) insert_sug(slst, wspace, &ns);
742 mkinitcap2(wspace, unicw, nc);
743 ns = pSMgr->suggest(slst, wspace, ns);
744 for (int j=0; j < ns; j++) {
745 mkallcap((*slst)[j]);
746 if (pAMgr && pAMgr->get_checksharps()) {
747 char * pos;
748 if (utf8) {
749 pos = strstr((*slst)[j], "脽");
750 while (pos) {
751 *pos = 'S';
752 *(pos+1) = 'S';
753 pos = strstr(pos+2, "脽");
755 } else {
756 pos = strchr((*slst)[j], '?');
757 while (pos) {
758 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
759 mystrrep((*slst)[j], "?", "SS");
760 pos = strchr((*slst)[j], '?');
765 break;
769 // LANG_hu section: replace '-' with ' ' in Hungarian
770 if (langnum == LANG_hu) {
771 for (int j=0; j < ns; j++) {
772 char * pos = strchr((*slst)[j],'-');
773 if (pos) {
774 int info;
775 char w[MAXWORDUTF8LEN];
776 *pos = '\0';
777 strcpy(w, (*slst)[j]);
778 strcat(w, pos + 1);
779 spell(w, &info, NULL);
780 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
781 *pos = ' ';
782 } else *pos = '-';
786 // END OF LANG_hu section
788 // try ngram approach since found nothing
789 if ((ns == 0) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
790 ngramsugs = 1;
791 switch(captype) {
792 case NOCAP: {
793 ns = pSMgr->ngsuggest(*slst, cw, pHMgr);
794 break;
796 case HUHCAP: {
797 memcpy(wspace,cw,(wl+1));
798 mkallsmall2(wspace, unicw, nc);
799 ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
800 break;
802 case INITCAP: {
803 capwords = 1;
804 memcpy(wspace,cw,(wl+1));
805 mkallsmall2(wspace, unicw, nc);
806 ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
807 break;
809 case ALLCAP: {
810 memcpy(wspace,cw,(wl+1));
811 mkallsmall2(wspace, unicw, nc);
812 ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
813 for (int j=0; j < ns; j++)
814 mkallcap((*slst)[j]);
815 break;
820 // word reversing wrapper for complex prefixes
821 if (complexprefixes) {
822 for (int j = 0; j < ns; j++) {
823 if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
827 // capitalize
828 if (capwords) for (int j=0; j < ns; j++) {
829 mkinitcap((*slst)[j]);
832 // expand suggestions with dot(s)
833 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
834 for (int j = 0; j < ns; j++) {
835 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
836 strcat((*slst)[j], word + strlen(word) - abbv);
840 // suggest keepcase
841 if (pAMgr->get_keepcase()) {
842 switch (captype) {
843 case INITCAP:
844 case ALLCAP: {
845 int l = 0;
846 for (int j=0; j < ns; j++) {
847 if (!spell((*slst)[j])) {
848 char s[MAXSWUTF8L];
849 w_char w[MAXSWL];
850 int len;
851 if (utf8) {
852 len = u8_u16(w, MAXSWL, (*slst)[j]);
853 } else {
854 strcpy(s, (*slst)[j]);
855 len = strlen(s);
857 mkallsmall2(s, w, len);
858 free((*slst)[j]);
859 if (spell(s)) {
860 (*slst)[l] = mystrdup(s);
861 l++;
862 } else {
863 mkinitcap2(s, w, len);
864 if (spell(s)) {
865 (*slst)[l] = mystrdup(s);
866 l++;
869 } else {
870 (*slst)[l] = (*slst)[j];
871 l++;
874 ns = l;
879 // remove duplications
880 int l = 0;
881 for (int j = 0; j < ns; j++) {
882 (*slst)[l] = (*slst)[j];
883 for (int k = 0; k < l; k++) {
884 if (strcmp((*slst)[k], (*slst)[j]) == 0) {
885 free((*slst)[j]);
886 l--;
889 l++;
891 return l;
894 char * Hunspell::get_dic_encoding()
896 return encoding;
899 #ifdef HUNSPELL_EXPERIMENTAL
900 // XXX need UTF-8 support
901 int Hunspell::suggest_auto(char*** slst, const char * word)
903 char cw[MAXWORDUTF8LEN + 4];
904 char wspace[MAXWORDUTF8LEN + 4];
905 if (! pSMgr) return 0;
906 int wl = strlen(word);
907 if (utf8) {
908 if (wl >= MAXWORDUTF8LEN) return 0;
909 } else {
910 if (wl >= MAXWORDLEN) return 0;
912 int captype = 0;
913 int abbv = 0;
914 wl = cleanword(cw, word, &captype, &abbv);
915 if (wl == 0) return 0;
916 int ns = 0;
917 *slst = NULL; // HU, nsug in pSMgr->suggest
919 switch(captype) {
920 case NOCAP: {
921 ns = pSMgr->suggest_auto(slst, cw, ns);
922 if (ns>0) break;
923 break;
926 case INITCAP: {
927 memcpy(wspace,cw,(wl+1));
928 mkallsmall(wspace);
929 ns = pSMgr->suggest_auto(slst, wspace, ns);
930 for (int j=0; j < ns; j++)
931 mkinitcap((*slst)[j]);
932 ns = pSMgr->suggest_auto(slst, cw, ns);
933 break;
937 case HUHCAP: {
938 ns = pSMgr->suggest_auto(slst, cw, ns);
939 if (ns == 0) {
940 memcpy(wspace,cw,(wl+1));
941 mkallsmall(wspace);
942 ns = pSMgr->suggest_auto(slst, wspace, ns);
944 break;
947 case ALLCAP: {
948 memcpy(wspace,cw,(wl+1));
949 mkallsmall(wspace);
950 ns = pSMgr->suggest_auto(slst, wspace, ns);
952 mkinitcap(wspace);
953 ns = pSMgr->suggest_auto(slst, wspace, ns);
955 for (int j=0; j < ns; j++)
956 mkallcap((*slst)[j]);
957 break;
961 // word reversing wrapper for complex prefixes
962 if (complexprefixes) {
963 for (int j = 0; j < ns; j++) {
964 if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
968 // expand suggestions with dot(s)
969 if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
970 for (int j = 0; j < ns; j++) {
971 (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
972 strcat((*slst)[j], word + strlen(word) - abbv);
976 // LANG_hu section: replace '-' with ' ' in Hungarian
977 if (langnum == LANG_hu) {
978 for (int j=0; j < ns; j++) {
979 char * pos = strchr((*slst)[j],'-');
980 if (pos) {
981 int info;
982 char w[MAXWORDUTF8LEN];
983 *pos = '\0';
984 strcpy(w, (*slst)[j]);
985 strcat(w, pos + 1);
986 spell(w, &info, NULL);
987 if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
988 *pos = ' ';
989 } else *pos = '-';
993 // END OF LANG_hu section
994 return ns;
997 // XXX need UTF-8 support
998 int Hunspell::stem(char*** slst, const char * word)
1000 char cw[MAXWORDUTF8LEN + 4];
1001 char wspace[MAXWORDUTF8LEN + 4];
1002 if (! pSMgr) return 0;
1003 int wl = strlen(word);
1004 if (utf8) {
1005 if (wl >= MAXWORDUTF8LEN) return 0;
1006 } else {
1007 if (wl >= MAXWORDLEN) return 0;
1009 int captype = 0;
1010 int abbv = 0;
1011 wl = cleanword(cw, word, &captype, &abbv);
1012 if (wl == 0) return 0;
1014 int ns = 0;
1016 *slst = NULL; // HU, nsug in pSMgr->suggest
1018 switch(captype) {
1019 case HUHCAP:
1020 case NOCAP: {
1021 ns = pSMgr->suggest_stems(slst, cw, ns);
1023 if ((abbv) && (ns == 0)) {
1024 memcpy(wspace,cw,wl);
1025 *(wspace+wl) = '.';
1026 *(wspace+wl+1) = '\0';
1027 ns = pSMgr->suggest_stems(slst, wspace, ns);
1030 break;
1033 case INITCAP: {
1035 ns = pSMgr->suggest_stems(slst, cw, ns);
1037 if (ns == 0) {
1038 memcpy(wspace,cw,(wl+1));
1039 mkallsmall(wspace);
1040 ns = pSMgr->suggest_stems(slst, wspace, ns);
1044 if ((abbv) && (ns == 0)) {
1045 memcpy(wspace,cw,wl);
1046 mkallsmall(wspace);
1047 *(wspace+wl) = '.';
1048 *(wspace+wl+1) = '\0';
1049 ns = pSMgr->suggest_stems(slst, wspace, ns);
1052 break;
1056 case ALLCAP: {
1057 ns = pSMgr->suggest_stems(slst, cw, ns);
1058 if (ns != 0) break;
1060 memcpy(wspace,cw,(wl+1));
1061 mkallsmall(wspace);
1062 ns = pSMgr->suggest_stems(slst, wspace, ns);
1064 if (ns == 0) {
1065 mkinitcap(wspace);
1066 ns = pSMgr->suggest_stems(slst, wspace, ns);
1069 if ((abbv) && (ns == 0)) {
1070 memcpy(wspace,cw,wl);
1071 mkallsmall(wspace);
1072 *(wspace+wl) = '.';
1073 *(wspace+wl+1) = '\0';
1074 ns = pSMgr->suggest_stems(slst, wspace, ns);
1078 break;
1082 return ns;
1085 int Hunspell::suggest_pos_stems(char*** slst, const char * word)
1087 char cw[MAXWORDUTF8LEN + 4];
1088 char wspace[MAXWORDUTF8LEN + 4];
1089 if (! pSMgr) return 0;
1090 int wl = strlen(word);
1091 if (utf8) {
1092 if (wl >= MAXWORDUTF8LEN) return 0;
1093 } else {
1094 if (wl >= MAXWORDLEN) return 0;
1096 int captype = 0;
1097 int abbv = 0;
1098 wl = cleanword(cw, word, &captype, &abbv);
1099 if (wl == 0) return 0;
1101 int ns = 0; // ns=0 = normalized input
1103 *slst = NULL; // HU, nsug in pSMgr->suggest
1105 switch(captype) {
1106 case HUHCAP:
1107 case NOCAP: {
1108 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1110 if ((abbv) && (ns == 0)) {
1111 memcpy(wspace,cw,wl);
1112 *(wspace+wl) = '.';
1113 *(wspace+wl+1) = '\0';
1114 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1117 break;
1120 case INITCAP: {
1122 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1124 if (ns == 0 || ((*slst)[0][0] == '#')) {
1125 memcpy(wspace,cw,(wl+1));
1126 mkallsmall(wspace);
1127 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1130 break;
1134 case ALLCAP: {
1135 ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1136 if (ns != 0) break;
1138 memcpy(wspace,cw,(wl+1));
1139 mkallsmall(wspace);
1140 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1142 if (ns == 0) {
1143 mkinitcap(wspace);
1144 ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1146 break;
1150 return ns;
1152 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1154 const char * Hunspell::get_wordchars()
1156 return pAMgr->get_wordchars();
1159 unsigned short * Hunspell::get_wordchars_utf16(int * len)
1161 return pAMgr->get_wordchars_utf16(len);
1164 void Hunspell::mkinitcap(char * p)
1166 if (!utf8) {
1167 if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1168 } else {
1169 int len;
1170 w_char u[MAXWORDLEN];
1171 len = u8_u16(u, MAXWORDLEN, p);
1172 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1173 u[0].h = (unsigned char) (i >> 8);
1174 u[0].l = (unsigned char) (i & 0x00FF);
1175 u16_u8(p, MAXWORDUTF8LEN, u, len);
1179 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
1181 if (!utf8) {
1182 if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1183 } else if (nc > 0) {
1184 unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1185 u[0].h = (unsigned char) (i >> 8);
1186 u[0].l = (unsigned char) (i & 0x00FF);
1187 u16_u8(p, MAXWORDUTF8LEN, u, nc);
1188 return strlen(p);
1190 return nc;
1193 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
1195 if (!utf8) {
1196 if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
1197 } else if (nc > 0) {
1198 unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
1199 u[0].h = (unsigned char) (i >> 8);
1200 u[0].l = (unsigned char) (i & 0x00FF);
1201 u16_u8(p, MAXWORDUTF8LEN, u, nc);
1202 return strlen(p);
1204 return nc;
1207 int Hunspell::put_word(const char * word)
1209 if (pHMgr) {
1210 return pHMgr->put_word(word, strlen(word), NULL);
1212 return 0;
1215 int Hunspell::put_word_pattern(const char * word, const char * pattern)
1217 if (pHMgr) {
1218 return pHMgr->put_word_pattern(word, strlen(word), pattern);
1220 return 0;
1223 const char * Hunspell::get_version()
1225 return pAMgr->get_version();
1228 struct cs_info * Hunspell::get_csconv()
1230 return csconv;
1233 #ifdef HUNSPELL_EXPERIMENTAL
1234 // XXX need UTF-8 support
1235 char * Hunspell::morph(const char * word)
1237 char cw[MAXWORDUTF8LEN + 4];
1238 char wspace[MAXWORDUTF8LEN + 4];
1239 if (! pSMgr) return 0;
1240 int wl = strlen(word);
1241 if (utf8) {
1242 if (wl >= MAXWORDUTF8LEN) return 0;
1243 } else {
1244 if (wl >= MAXWORDLEN) return 0;
1246 int captype = 0;
1247 int abbv = 0;
1248 wl = cleanword(cw, word, &captype, &abbv);
1249 if (wl == 0) {
1250 if (abbv) {
1251 for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
1252 cw[wl] = '\0';
1253 abbv = 0;
1254 } else return 0;
1257 char result[MAXLNLEN];
1258 char * st = NULL;
1260 *result = '\0';
1262 int n = 0;
1263 int n2 = 0;
1264 int n3 = 0;
1266 // test numbers
1267 // LANG_hu section: set dash information for suggestions
1268 if (langnum == LANG_hu) {
1269 while ((n < wl) &&
1270 (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
1271 n++;
1272 if ((cw[n] == '.') || (cw[n] == ',')) {
1273 if (((n2 == 0) && (n > 3)) ||
1274 ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
1275 n2++;
1276 n3 = n;
1280 if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return NULL;
1281 if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='?)) && checkword(cw+n, NULL, NULL))) {
1282 strcat(result, cw);
1283 result[n - 1] = '\0';
1284 if (n == wl) {
1285 st = pSMgr->suggest_morph(cw + n - 1);
1286 if (st) {
1287 strcat(result, st);
1288 free(st);
1290 } else {
1291 char sign = cw[n];
1292 cw[n] = '\0';
1293 st = pSMgr->suggest_morph(cw + n - 1);
1294 if (st) {
1295 strcat(result, st);
1296 free(st);
1298 strcat(result, "+"); // XXX SPEC. MORPHCODE
1299 cw[n] = sign;
1300 st = pSMgr->suggest_morph(cw + n);
1301 if (st) {
1302 strcat(result, st);
1303 free(st);
1306 return mystrdup(result);
1309 // END OF LANG_hu section
1311 switch(captype) {
1312 case NOCAP: {
1313 st = pSMgr->suggest_morph(cw);
1314 if (st) {
1315 strcat(result, st);
1316 free(st);
1318 if (abbv) {
1319 memcpy(wspace,cw,wl);
1320 *(wspace+wl) = '.';
1321 *(wspace+wl+1) = '\0';
1322 st = pSMgr->suggest_morph(wspace);
1323 if (st) {
1324 if (*result) strcat(result, "\n");
1325 strcat(result, st);
1326 free(st);
1329 break;
1331 case INITCAP: {
1332 memcpy(wspace,cw,(wl+1));
1333 mkallsmall(wspace);
1334 st = pSMgr->suggest_morph(wspace);
1335 if (st) {
1336 strcat(result, st);
1337 free(st);
1339 st = pSMgr->suggest_morph(cw);
1340 if (st) {
1341 if (*result) strcat(result, "\n");
1342 strcat(result, st);
1343 free(st);
1345 if (abbv) {
1346 memcpy(wspace,cw,wl);
1347 *(wspace+wl) = '.';
1348 *(wspace+wl+1) = '\0';
1349 mkallsmall(wspace);
1350 st = pSMgr->suggest_morph(wspace);
1351 if (st) {
1352 if (*result) strcat(result, "\n");
1353 strcat(result, st);
1354 free(st);
1356 mkinitcap(wspace);
1357 st = pSMgr->suggest_morph(wspace);
1358 if (st) {
1359 if (*result) strcat(result, "\n");
1360 strcat(result, st);
1361 free(st);
1364 break;
1366 case HUHCAP: {
1367 st = pSMgr->suggest_morph(cw);
1368 if (st) {
1369 strcat(result, st);
1370 free(st);
1372 #if 0
1373 memcpy(wspace,cw,(wl+1));
1374 mkallsmall(wspace);
1375 st = pSMgr->suggest_morph(wspace);
1376 if (st) {
1377 if (*result) strcat(result, "\n");
1378 strcat(result, st);
1379 free(st);
1381 #endif
1382 break;
1384 case ALLCAP: {
1385 memcpy(wspace,cw,(wl+1));
1386 st = pSMgr->suggest_morph(wspace);
1387 if (st) {
1388 strcat(result, st);
1389 free(st);
1391 mkallsmall(wspace);
1392 st = pSMgr->suggest_morph(wspace);
1393 if (st) {
1394 if (*result) strcat(result, "\n");
1395 strcat(result, st);
1396 free(st);
1398 mkinitcap(wspace);
1399 st = pSMgr->suggest_morph(wspace);
1400 if (st) {
1401 if (*result) strcat(result, "\n");
1402 strcat(result, st);
1403 free(st);
1405 if (abbv) {
1406 memcpy(wspace,cw,(wl+1));
1407 *(wspace+wl) = '.';
1408 *(wspace+wl+1) = '\0';
1409 if (*result) strcat(result, "\n");
1410 st = pSMgr->suggest_morph(wspace);
1411 if (st) {
1412 strcat(result, st);
1413 free(st);
1415 mkallsmall(wspace);
1416 st = pSMgr->suggest_morph(wspace);
1417 if (st) {
1418 if (*result) strcat(result, "\n");
1419 strcat(result, st);
1420 free(st);
1422 mkinitcap(wspace);
1423 st = pSMgr->suggest_morph(wspace);
1424 if (st) {
1425 if (*result) strcat(result, "\n");
1426 strcat(result, st);
1427 free(st);
1430 break;
1434 if (result && (*result)) {
1435 // word reversing wrapper for complex prefixes
1436 if (complexprefixes) {
1437 if (utf8) reverseword_utf(result); else reverseword(result);
1439 return mystrdup(result);
1442 // compound word with dash (HU) I18n
1443 char * dash = NULL;
1444 int nresult = 0;
1445 // LANG_hu section: set dash information for suggestions
1446 if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
1447 if ((langnum == LANG_hu) && dash) {
1448 *dash='\0';
1449 // examine 2 sides of the dash
1450 if (dash[1] == '\0') { // base word ending with dash
1451 if (spell(cw)) return pSMgr->suggest_morph(cw);
1452 } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
1453 if (spell(cw) && (spell("-e"))) {
1454 st = pSMgr->suggest_morph(cw);
1455 if (st) {
1456 strcat(result, st);
1457 free(st);
1459 strcat(result,"+"); // XXX spec. separator in MORPHCODE
1460 st = pSMgr->suggest_morph("-e");
1461 if (st) {
1462 strcat(result, st);
1463 free(st);
1465 return mystrdup(result);
1467 } else {
1468 // first word ending with dash: word- XXX ???
1469 char r2 = *(dash + 1);
1470 dash[0]='-';
1471 dash[1]='\0';
1472 nresult = spell(cw);
1473 dash[1] = r2;
1474 dash[0]='\0';
1475 if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
1476 ((dash[1] > '0') && (dash[1] < '9')))) {
1477 st = morph(cw);
1478 if (st) {
1479 strcat(result, st);
1480 free(st);
1481 strcat(result,"+"); // XXX spec. separator in MORPHCODE
1483 st = morph(dash+1);
1484 if (st) {
1485 strcat(result, st);
1486 free(st);
1488 return mystrdup(result);
1491 // affixed number in correct word
1492 if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
1493 (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
1494 *dash='-';
1495 n = 1;
1496 if (*(dash - n) == '.') n++;
1497 // search first not a number character to left from dash
1498 while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
1499 n++;
1501 if ((dash - n) < cw) n--;
1502 // numbers: valami1000000-hoz
1503 // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1504 // 56-hoz, 6-hoz
1505 for(; n >= 1; n--) {
1506 if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
1507 strcat(result, cw);
1508 result[dash - cw - n] = '\0';
1509 st = pSMgr->suggest_morph(dash - n);
1510 if (st) {
1511 strcat(result, st);
1512 free(st);
1514 return mystrdup(result);
1519 return NULL;
1522 // XXX need UTF-8 support
1523 char * Hunspell::morph_with_correction(const char * word)
1525 char cw[MAXWORDUTF8LEN + 4];
1526 char wspace[MAXWORDUTF8LEN + 4];
1527 if (! pSMgr) return 0;
1528 int wl = strlen(word);
1529 if (utf8) {
1530 if (wl >= MAXWORDUTF8LEN) return 0;
1531 } else {
1532 if (wl >= MAXWORDLEN) return 0;
1534 int captype = 0;
1535 int abbv = 0;
1536 wl = cleanword(cw, word, &captype, &abbv);
1537 if (wl == 0) return 0;
1539 char result[MAXLNLEN];
1540 char * st = NULL;
1542 *result = '\0';
1545 switch(captype) {
1546 case NOCAP: {
1547 st = pSMgr->suggest_morph_for_spelling_error(cw);
1548 if (st) {
1549 strcat(result, st);
1550 free(st);
1552 if (abbv) {
1553 memcpy(wspace,cw,wl);
1554 *(wspace+wl) = '.';
1555 *(wspace+wl+1) = '\0';
1556 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1557 if (st) {
1558 if (*result) strcat(result, "\n");
1559 strcat(result, st);
1560 free(st);
1563 break;
1565 case INITCAP: {
1566 memcpy(wspace,cw,(wl+1));
1567 mkallsmall(wspace);
1568 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1569 if (st) {
1570 strcat(result, st);
1571 free(st);
1573 st = pSMgr->suggest_morph_for_spelling_error(cw);
1574 if (st) {
1575 if (*result) strcat(result, "\n");
1576 strcat(result, st);
1577 free(st);
1579 if (abbv) {
1580 memcpy(wspace,cw,wl);
1581 *(wspace+wl) = '.';
1582 *(wspace+wl+1) = '\0';
1583 mkallsmall(wspace);
1584 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1585 if (st) {
1586 if (*result) strcat(result, "\n");
1587 strcat(result, st);
1588 free(st);
1590 mkinitcap(wspace);
1591 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1592 if (st) {
1593 if (*result) strcat(result, "\n");
1594 strcat(result, st);
1595 free(st);
1598 break;
1600 case HUHCAP: {
1601 st = pSMgr->suggest_morph_for_spelling_error(cw);
1602 if (st) {
1603 strcat(result, st);
1604 free(st);
1606 memcpy(wspace,cw,(wl+1));
1607 mkallsmall(wspace);
1608 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1609 if (st) {
1610 if (*result) strcat(result, "\n");
1611 strcat(result, st);
1612 free(st);
1614 break;
1616 case ALLCAP: {
1617 memcpy(wspace,cw,(wl+1));
1618 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1619 if (st) {
1620 strcat(result, st);
1621 free(st);
1623 mkallsmall(wspace);
1624 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1625 if (st) {
1626 if (*result) strcat(result, "\n");
1627 strcat(result, st);
1628 free(st);
1630 mkinitcap(wspace);
1631 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1632 if (st) {
1633 if (*result) strcat(result, "\n");
1634 strcat(result, st);
1635 free(st);
1637 if (abbv) {
1638 memcpy(wspace,cw,(wl+1));
1639 *(wspace+wl) = '.';
1640 *(wspace+wl+1) = '\0';
1641 if (*result) strcat(result, "\n");
1642 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1643 if (st) {
1644 strcat(result, st);
1645 free(st);
1647 mkallsmall(wspace);
1648 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1649 if (st) {
1650 if (*result) strcat(result, "\n");
1651 strcat(result, st);
1652 free(st);
1654 mkinitcap(wspace);
1655 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1656 if (st) {
1657 if (*result) strcat(result, "\n");
1658 strcat(result, st);
1659 free(st);
1662 break;
1666 if (result) return mystrdup(result);
1667 return NULL;
1670 /* analyze word
1671 * return line count
1672 * XXX need a better data structure for morphological analysis */
1673 int Hunspell::analyze(char ***out, const char *word) {
1674 int n = 0;
1675 if (!word) return 0;
1676 char * m = morph(word);
1677 if(!m) return 0;
1678 if (!out) return line_tok(m, out);
1680 // without memory allocation
1681 /* BUG missing buffer size checking */
1682 int i, p;
1683 for(p = 0, i = 0; m[i]; i++) {
1684 if(m[i] == '\n' || !m[i+1]) {
1685 n++;
1686 strncpy((*out)[n++], m + p, i - p + 1);
1687 if (m[i] == '\n') (*out)[n++][i - p] = '\0';
1688 if(!m[i+1]) break;
1689 p = i + 1;
1692 free(m);
1693 return n;
1696 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1698 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
1700 return (Hunhandle*)(new Hunspell(affpath, dpath));
1703 void Hunspell_destroy(Hunhandle *pHunspell)
1705 delete (Hunspell*)(pHunspell);
1708 int Hunspell_spell(Hunhandle *pHunspell, const char *word)
1710 return ((Hunspell*)pHunspell)->spell(word);
1713 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
1715 return ((Hunspell*)pHunspell)->get_dic_encoding();
1718 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
1720 return ((Hunspell*)pHunspell)->suggest(slst, word);