ext/hunspell/hunspell.cxx

   1 #include "license.hunspell"
   2 #include "license.myspell"
   3
   4 #ifndef MOZILLA_CLIENT
   5 #include <cstdlib>
   6 #include <cstring>
   7 #include <cstdio>
   8 #else
   9 #include <stdlib.h>
  10 #include <string.h>
  11 #include <stdio.h>
  12 #endif
  13
  14 #include "hunspell.hxx"
  15 #include "hunspell.h"
  16
  17 #ifndef MOZILLA_CLIENT
  18 #ifndef W32
  19 using namespace std;
  20 #endif
  21 #endif
  22
  23 Hunspell::Hunspell(const char * affpath, const char * dpath)
  24 {
  25     encoding = NULL;
  26     csconv = NULL;
  27     utf8 = 0;
  28     complexprefixes = 0;
  29
  30     /* first set up the hash manager */
  31     pHMgr = new HashMgr(dpath, affpath);
  32
  33     /* next set up the affix manager */
  34     /* it needs access to the hash manager lookup methods */
  35     pAMgr = new AffixMgr(affpath,pHMgr);
  36
  37     /* get the preferred try string and the dictionary */
  38     /* encoding from the Affix Manager for that dictionary */
  39     char * try_string = pAMgr->get_try_string();
  40     encoding = pAMgr->get_encoding();
  41     csconv = get_current_cs(encoding);
  42     langnum = pAMgr->get_langnum();
  43     utf8 = pAMgr->get_utf8();
  44     complexprefixes = pAMgr->get_complexprefixes();
  45     wordbreak = pAMgr->get_breaktable();
  46
  47     /* and finally set up the suggestion manager */
  48     pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
  49     if (try_string) free(try_string);
  50
  51 }
  52
  53 Hunspell::~Hunspell()
  54 {
  55     if (pSMgr) delete pSMgr;
  56     if (pAMgr) delete pAMgr;
  57     if (pHMgr) delete pHMgr;
  58     pSMgr = NULL;
  59     pAMgr = NULL;
  60     pHMgr = NULL;
  61     csconv= NULL;
  62     if (encoding) free(encoding);
  63     encoding = NULL;
  64 }
  65
  66
  67 // make a copy of src at destination while removing all leading
  68 // blanks and removing any trailing periods after recording
  69 // their presence with the abbreviation flag
  70 // also since already going through character by character,
  71 // set the capitalization type
  72 // return the length of the "cleaned" (and UTF-8 encoded) word
  73
  74 int Hunspell::cleanword2(char * dest, const char * src,
  75     w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
  76 {
  77    unsigned char * p = (unsigned char *) dest;
  78    const unsigned char * q = (const unsigned char * ) src;
  79    int firstcap = 0;
  80
  81    // first skip over any leading blanks
  82    while ((*q != '\0') && (*q == ' ')) q++;
  83
  84    // now strip off any trailing periods (recording their presence)
  85    *pabbrev = 0;
  86    int nl = strlen((const char *)q);
  87    while ((nl > 0) && (*(q+nl-1)=='.')) {
  88        nl--;
  89        (*pabbrev)++;
  90    }
  91
  92    // if no characters are left it can't be capitalized
  93    if (nl <= 0) {
  94        *pcaptype = NOCAP;
  95        *p = '\0';
  96        return 0;
  97    }
  98
  99    // now determine the capitalization type of the first nl letters
 100    int ncap = 0;
 101    int nneutral = 0;
 102    *nc = 0;
 103
 104    if (!utf8) {
 105       while (nl > 0) {
 106          (*nc)++;
 107          if (csconv[(*q)].ccase) ncap++;
 108          if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
 109          *p++ = *q++;
 110          nl--;
 111       }
 112       // remember to terminate the destination string
 113       *p = '\0';
 114       if (ncap) {
 115         firstcap = csconv[(unsigned char)(*dest)].ccase;
 116       }
 117    } else {
 118       unsigned short idx;
 119       *nc = u8_u16(dest_utf, MAXWORDLEN, (const char *) q);
 120       // don't check too long words
 121       if (*nc >= MAXWORDLEN) return 0;
 122       if (*nc == -1) { // big Unicode character (non BMP area)
 123          *pcaptype = NOCAP;
 124          strcpy((char *) p, (char *) q);
 125          return strlen(dest);
 126       }
 127       *nc -= *pabbrev;
 128       for (int i = 0; i < *nc; i++) {
 129          idx = (dest_utf[i].h << 8) + dest_utf[i].l;
 130          if (idx != unicodetolower(idx, langnum)) ncap++;
 131          if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) nneutral++;
 132       }
 133       u16_u8(dest, MAXWORDUTF8LEN, dest_utf, *nc);
 134       if (ncap) {
 135          idx = (dest_utf[0].h << 8) + dest_utf[0].l;
 136          firstcap = (idx != unicodetolower(idx, langnum));
 137       }
 138    }
 139
 140    // now finally set the captype
 141    if (ncap == 0) {
 142         *pcaptype = NOCAP;
 143    } else if ((ncap == 1) && firstcap) {
 144         *pcaptype = INITCAP;
 145    } else if ((ncap == *nc) || ((ncap + nneutral) == *nc)) {
 146         *pcaptype = ALLCAP;
 147    } else if ((ncap > 1) && firstcap) {
 148         *pcaptype = HUHINITCAP;
 149    } else {
 150         *pcaptype = HUHCAP;
 151    }
 152    return strlen(dest);
 153 }
 154
 155 int Hunspell::cleanword(char * dest, const char * src,
 156     int * pcaptype, int * pabbrev)
 157 {
 158    unsigned char * p = (unsigned char *) dest;
 159    const unsigned char * q = (const unsigned char * ) src;
 160    int firstcap = 0;
 161
 162    // first skip over any leading blanks
 163    while ((*q != '\0') && (*q == ' ')) q++;
 164
 165    // now strip off any trailing periods (recording their presence)
 166    *pabbrev = 0;
 167    int nl = strlen((const char *)q);
 168    while ((nl > 0) && (*(q+nl-1)=='.')) {
 169        nl--;
 170        (*pabbrev)++;
 171    }
 172
 173    // if no characters are left it can't be capitalized
 174    if (nl <= 0) {
 175        *pcaptype = NOCAP;
 176        *p = '\0';
 177        return 0;
 178    }
 179
 180    // now determine the capitalization type of the first nl letters
 181    int ncap = 0;
 182    int nneutral = 0;
 183    int nc = 0;
 184
 185    if (!utf8) {
 186       while (nl > 0) {
 187          nc++;
 188          if (csconv[(*q)].ccase) ncap++;
 189          if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
 190          *p++ = *q++;
 191          nl--;
 192       }
 193       // remember to terminate the destination string
 194       *p = '\0';
 195       firstcap = csconv[(unsigned char)(*dest)].ccase;
 196    } else {
 197       unsigned short idx;
 198       w_char t[MAXWORDLEN];
 199       nc = u8_u16(t, MAXWORDLEN, src);
 200       for (int i = 0; i < nc; i++) {
 201          idx = (t[i].h << 8) + t[i].l;
 202          if (idx != unicodetolower(idx, langnum)) ncap++;
 203          if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) nneutral++;
 204       }
 205       u16_u8(dest, MAXWORDUTF8LEN, t, nc);
 206       if (ncap) {
 207          idx = (t[0].h << 8) + t[0].l;
 208          firstcap = (idx != unicodetolower(idx, langnum));
 209       }
 210    }
 211
 212    // now finally set the captype
 213    if (ncap == 0) {
 214         *pcaptype = NOCAP;
 215    } else if ((ncap == 1) && firstcap) {
 216         *pcaptype = INITCAP;
 217    } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
 218         *pcaptype = ALLCAP;
 219    } else if ((ncap > 1) && firstcap) {
 220         *pcaptype = HUHINITCAP;
 221    } else {
 222         *pcaptype = HUHCAP;
 223    }
 224    return strlen(dest);
 225 }
 226
 227
 228 void Hunspell::mkallcap(char * p)
 229 {
 230   if (utf8) {
 231       w_char u[MAXWORDLEN];
 232       int nc = u8_u16(u, MAXWORDLEN, p);
 233       unsigned short idx;
 234       for (int i = 0; i < nc; i++) {
 235          idx = (u[i].h << 8) + u[i].l;
 236          if (idx != unicodetoupper(idx, langnum)) {
 237             u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
 238             u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
 239          }
 240       }
 241       u16_u8(p, MAXWORDUTF8LEN, u, nc);
 242   } else {
 243     while (*p != '\0') {
 244         *p = csconv[((unsigned char) *p)].cupper;
 245         p++;
 246     }
 247   }
 248 }
 249
 250 int Hunspell::mkallcap2(char * p, w_char * u, int nc)
 251 {
 252   if (utf8) {
 253       unsigned short idx;
 254       for (int i = 0; i < nc; i++) {
 255          idx = (u[i].h << 8) + u[i].l;
 256          if (idx != unicodetoupper(idx, langnum)) {
 257             u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
 258             u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
 259          }
 260       }
 261       u16_u8(p, MAXWORDUTF8LEN, u, nc);
 262       return strlen(p);
 263   } else {
 264     while (*p != '\0') {
 265         *p = csconv[((unsigned char) *p)].cupper;
 266         p++;
 267     }
 268   }
 269   return nc;
 270 }
 271
 272
 273 void Hunspell::mkallsmall(char * p)
 274 {
 275     while (*p != '\0') {
 276         *p = csconv[((unsigned char) *p)].clower;
 277         p++;
 278     }
 279 }
 280
 281 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
 282 {
 283   if (utf8) {
 284       unsigned short idx;
 285       for (int i = 0; i < nc; i++) {
 286          idx = (u[i].h << 8) + u[i].l;
 287          if (idx != unicodetolower(idx, langnum)) {
 288             u[i].h = (unsigned char) (unicodetolower(idx, langnum) >> 8);
 289             u[i].l = (unsigned char) (unicodetolower(idx, langnum) & 0x00FF);
 290          }
 291       }
 292       u16_u8(p, MAXWORDUTF8LEN, u, nc);
 293       return strlen(p);
 294   } else {
 295     while (*p != '\0') {
 296         *p = csconv[((unsigned char) *p)].clower;
 297         p++;
 298     }
 299   }
 300   return nc;
 301 }
 302
 303 // convert UTF-8 sharp S codes to latin 1
 304 char * Hunspell::sharps_u8_l1(char * dest, char * source) {
 305     char * p = dest;
 306     *p = *source;
 307     for (p++, source++; *(source - 1); p++, source++) {
 308         *p = *source;
 309         if (*source == '?') *--p = '?';
 310     }
 311     return dest;
 312 }
 313
 314 // recursive search for right ss-?permutations
 315 hentry * Hunspell::spellsharps(char * base, char * pos, int n,
 316         int repnum, char * tmp, int * info, char **root) {
 317     pos = strstr(pos, "ss");
 318     if (pos && (n < MAXSHARPS)) {
 319         *pos = '?';
 320         *(pos + 1) = '?';
 321         hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
 322         if (h) return h;
 323         *pos = 's';
 324         *(pos + 1) = 's';
 325         h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
 326         if (h) return h;
 327     } else if (repnum > 0) {
 328         if (utf8) return checkword(base, info, root);
 329         return checkword(sharps_u8_l1(tmp, base), info, root);
 330     }
 331     return NULL;
 332 }
 333
 334 int Hunspell::is_keepcase(const hentry * rv) {
 335     return pAMgr && rv->astr && pAMgr->get_keepcase() &&
 336         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
 337 }
 338
 339 /* check and insert a word to beginning of the suggestion array */
 340 int Hunspell::insert_sug(char ***slst, char * word, int *ns) {
 341     if (spell(word)) {
 342         if (*ns == MAXSUGGESTION) {
 343             (*ns)--;
 344             free((*slst)[*ns]);
 345         }
 346         for (int k = *ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
 347         (*slst)[0] = mystrdup(word);
 348         (*ns)++;
 349     }
 350     return 0;
 351 }
 352
 353 int Hunspell::spell(const char * word, int * info, char ** root)
 354 {
 355   struct hentry * rv=NULL;
 356   // need larger vector. For example, Turkish capital letter I converted a
 357   // 2-byte UTF-8 character (dotless i) by mkallsmall.
 358   char cw[MAXWORDUTF8LEN + 4];
 359   char wspace[MAXWORDUTF8LEN + 4];
 360   w_char unicw[MAXWORDLEN + 1];
 361   int nc = strlen(word);
 362   int wl2 = 0;
 363   if (utf8) {
 364     if (nc >= MAXWORDUTF8LEN) return 0;
 365   } else {
 366     if (nc >= MAXWORDLEN) return 0;
 367   }
 368   int captype = 0;
 369   int abbv = 0;
 370   int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
 371
 372   if (wl == 0) return 1;
 373
 374   if (info) *info = 0;
 375   if (root) *root = NULL;
 376
 377   // allow numbers with dots and commas (but forbid double separators: "..", ",," etc.)
 378   enum { NBEGIN, NNUM, NSEP };
 379   int nstate = NBEGIN;
 380   int i;
 381
 382   for (i = 0; (i < wl); i++) {
 383     if ((cw[i] <= '9') && (cw[i] >= '0')) {
 384         nstate = NNUM;
 385     } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
 386         if ((nstate == NSEP) || (i == 0)) break;
 387         nstate = NSEP;
 388     } else break;
 389   }
 390   if ((i == wl) && (nstate == NNUM)) return 1;
 391
 392   // LANG_hu section: number(s) + (percent or degree) with suffixes
 393   if (langnum == LANG_hu) {
 394     if ((nstate == NNUM) && ((cw[i] == '%') || (cw[i] == '?'))
 395         && checkword(cw + i, info, root)) return 1;
 396   }
 397   // END of LANG_hu section
 398
 399   switch(captype) {
 400      case HUHCAP:
 401      case HUHINITCAP:
 402      case NOCAP: {
 403                     rv = checkword(cw, info, root);
 404                     if ((abbv) && !(rv)) {
 405                         memcpy(wspace,cw,wl);
 406                         *(wspace+wl) = '.';
 407                         *(wspace+wl+1) = '\0';
 408                         rv = checkword(wspace, info, root);
 409                     }
 410                     break;
 411                  }
 412      case ALLCAP: {
 413                     rv = checkword(cw, info, root);
 414                     if (rv) break;
 415                     if (abbv) {
 416                         memcpy(wspace,cw,wl);
 417                         *(wspace+wl) = '.';
 418                         *(wspace+wl+1) = '\0';
 419                         rv = checkword(wspace, info, root);
 420                         if (rv) break;
 421                     }
 422                     if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
 423                         char tmpword[MAXWORDUTF8LEN];
 424                         wl = mkallsmall2(cw, unicw, nc);
 425                         memcpy(wspace,cw,(wl+1));
 426                         rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
 427                         if (!rv) {
 428                             wl2 = mkinitcap2(cw, unicw, nc);
 429                             rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
 430                         }
 431                         if ((abbv) && !(rv)) {
 432                             *(wspace+wl) = '.';
 433                             *(wspace+wl+1) = '\0';
 434                             rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
 435                             if (!rv) {
 436                                 memcpy(wspace, cw, wl2);
 437                                 *(wspace+wl2) = '.';
 438                                 *(wspace+wl2+1) = '\0';
 439                                 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
 440                             }
 441                         }
 442                         if (rv) break;
 443                     }
 444                 }
 445      case INITCAP: {
 446                      wl = mkallsmall2(cw, unicw, nc);
 447                      memcpy(wspace,cw,(wl+1));
 448                      rv = checkword(wspace, info, root);
 449                      if (!rv || (is_keepcase(rv) && !((captype == INITCAP) &&
 450                            // if CHECKSHARPS: KEEPCASE words with ?are allowed
 451                            // in INITCAP form, too.
 452                            pAMgr->get_checksharps() && ((utf8 && strstr(wspace, "脽")) ||
 453                             (!utf8 && strchr(wspace, '?')))))) {
 454                         wl2 = mkinitcap2(cw, unicw, nc);
 455                         rv = checkword(cw, info, root);
 456                         if (rv && (captype == ALLCAP) && is_keepcase(rv)) rv = NULL;
 457                      }
 458                      if (abbv && !rv) {
 459                          *(wspace+wl) = '.';
 460                          *(wspace+wl+1) = '\0';
 461                          rv = checkword(wspace, info, root);
 462                          if (!rv || is_keepcase(rv)) {
 463                             memcpy(wspace, cw, wl2);
 464                             *(wspace+wl2) = '.';
 465                             *(wspace+wl2+1) = '\0';
 466                             rv = checkword(wspace, info, root);
 467                             if (rv && ((captype == ALLCAP) && is_keepcase(rv))) rv = NULL;
 468                          }
 469                      }
 470                      break;
 471                    }
 472   }
 473
 474   if (rv) return 1;
 475
 476   // recursive breaking at break points (not good for morphological analysis)
 477   if (wordbreak) {
 478     char * s;
 479     char r;
 480     for (int j = 0; j < pAMgr->get_numbreak(); j++) {
 481       s=(char *) strstr(cw, wordbreak[j]);
 482       if (s) {
 483         r = *s;
 484         *s = '\0';
 485         // examine 2 sides of the break point
 486         if (spell(cw) && spell(s + strlen(wordbreak[j]))) {
 487             *s = r;
 488             return 1;
 489         }
 490         *s = r;
 491       }
 492     }
 493   }
 494
 495   // LANG_hu: compoundings with dashes and n-dashes XXX deprecated!
 496   if (langnum == LANG_hu) {
 497     int n;
 498     // compound word with dash (HU) I18n
 499     char * dash;
 500     int result = 0;
 501     // n-dash
 502     dash = (char *) strstr(cw,"-");
 503     if (dash && !wordbreak) {
 504         *dash = '\0';
 505         // examine 2 sides of the dash
 506         if (spell(cw) && spell(dash + 3)) {
 507             *dash = '?';
 508             return 1;
 509         }
 510         *dash = '?';
 511     }
 512     dash = (char *) strchr(cw,'-');
 513     if (dash) {
 514         *dash='\0';
 515         // examine 2 sides of the dash
 516         if (dash[1] == '\0') { // base word ending with dash
 517             if (spell(cw)) return 1;
 518         } else {
 519             // first word ending with dash: word-
 520             char r2 = *(dash + 1);
 521             dash[0]='-';
 522             dash[1]='\0';
 523             result = spell(cw);
 524             dash[1] = r2;
 525             dash[0]='\0';
 526             if (result && spell(dash+1) && ((strlen(dash+1) > 1) || (dash[1] == 'e') ||
 527                 ((dash[1] > '0') && (dash[1] < '9')))) return 1;
 528         }
 529         // affixed number in correct word
 530         if (result && (dash > cw) && (((*(dash-1)<='9') && (*(dash-1)>='0')) || (*(dash-1)>='.'))) {
 531             *dash='-';
 532             n = 1;
 533             if (*(dash - n) == '.') n++;
 534             // search first not a number character to left from dash
 535             while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
 536                 n++;
 537             }
 538             if ((dash - n) < cw) n--;
 539             // numbers: deprecated
 540             for(; n >= 1; n--) {
 541                 if ((*(dash - n) >= '0') && (*(dash - n) <= '9') &&
 542                     checkword(dash - n, info, root)) return 1;
 543             }
 544         }
 545     }
 546   }
 547   return 0;
 548 }
 549
 550 //int Hunspell::spell(const char * word) {
 551 //  return spell(word, NULL, NULL);
 552 //}
 553
 554 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
 555 {
 556   struct hentry * he = NULL;
 557   int len;
 558   char w2[MAXWORDUTF8LEN];
 559   const char * word;
 560
 561   char * ignoredchars = pAMgr->get_ignore();
 562   if (ignoredchars != NULL) {
 563      strcpy(w2, w);
 564      if (utf8) {
 565         int ignoredchars_utf16_len;
 566         unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
 567         remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
 568      } else {
 569         remove_ignored_chars(w2,ignoredchars);
 570      }
 571      word = w2;
 572      free(ignoredchars);
 573   } else word = w;
 574
 575   // word reversing wrapper for complex prefixes
 576   if (complexprefixes) {
 577     if (word != w2) {
 578       strcpy(w2, word);
 579       word = w2;
 580     }
 581     if (utf8) reverseword_utf(w2); else reverseword(w2);
 582   }
 583
 584   // look word in hash table
 585   if (pHMgr) he = pHMgr->lookup(word);
 586
 587   // check forbidden and onlyincompound words
 588   if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
 589     info += SPELL_FORBIDDEN;
 590     // LANG_hu section: set dash information for suggestions
 591     if (langnum == LANG_hu) {
 592         if (pAMgr->get_compoundflag() &&
 593             TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
 594                 info += SPELL_COMPOUND;
 595         }
 596     }
 597     return NULL;
 598   }
 599
 600   // he = next not pseudoroot and not onlyincompound homonym or NULL
 601   while (he && (he->astr) &&
 602     ((pAMgr->get_pseudoroot() && TESTAFF(he->astr, pAMgr->get_pseudoroot(), he->alen)) ||
 603        (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen))
 604     )) he = he->next_homonym;
 605
 606   // check with affixes
 607   if (!he && pAMgr) {
 608      // try stripping off affixes */
 609      len = strlen(word);
 610      he = pAMgr->affix_check(word, len, 0);
 611
 612      // check compound restriction
 613      if (he && he->astr && pAMgr->get_onlyincompound() &&
 614          TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) he = NULL;
 615
 616      if (he) {
 617         if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
 618             info += SPELL_FORBIDDEN;
 619             return NULL;
 620         }
 621         if (root) {
 622             *root = mystrdup(he->word);
 623             if (complexprefixes) {
 624                 if (utf8) reverseword_utf(*root); else reverseword(*root);
 625             }
 626         }
 627      // try check compound word
 628      } else if (pAMgr->get_compound()) {
 629           he = pAMgr->compound_check(word, len,
 630                                   0,0,100,0,NULL,0,NULL,NULL,0);
 631           // LANG_hu section: `moving rule' with last dash
 632           if ((!he) && (langnum == LANG_hu) && (word[len-1]=='-')) {
 633              char * dup = mystrdup(word);
 634              dup[len-1] = '\0';
 635              he = pAMgr->compound_check(dup, len-1,
 636                                   -5,0,100,0,NULL,1,NULL,NULL,0);
 637              free(dup);
 638           }
 639           // end of LANG speficic region
 640           if (he) {
 641                 if (root) {
 642                     *root = mystrdup(he->word);
 643                     if (complexprefixes) {
 644                         if (utf8) reverseword_utf(*root); else reverseword(*root);
 645                     }
 646                 }
 647                 if (info) *info += SPELL_COMPOUND;
 648           }
 649      }
 650
 651   }
 652
 653   return he;
 654 }
 655
 656 int Hunspell::suggest(char*** slst, const char * word)
 657 {
 658   char cw[MAXWORDUTF8LEN + 4];
 659   char wspace[MAXWORDUTF8LEN + 4];
 660   if (! pSMgr) return 0;
 661   w_char unicw[MAXWORDLEN + 1];
 662   int nc = strlen(word);
 663   if (utf8) {
 664     if (nc >= MAXWORDUTF8LEN) return 0;
 665   } else {
 666     if (nc >= MAXWORDLEN) return 0;
 667   }
 668   int captype = 0;
 669   int abbv = 0;
 670   int wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
 671   if (wl == 0) return 0;
 672   int ns = 0;
 673   *slst = NULL;
 674   int capwords = 0;
 675   int ngramsugs = 0;
 676
 677   switch(captype) {
 678      case NOCAP:   {
 679                      ns = pSMgr->suggest(slst, cw, ns);
 680                      break;
 681                    }
 682
 683      case INITCAP: {
 684                      capwords = 1;
 685                      ns = pSMgr->suggest(slst, cw, ns);
 686                      if (ns == -1) break;
 687                      memcpy(wspace,cw,(wl+1));
 688                      mkallsmall2(wspace, unicw, nc);
 689                      ns = pSMgr->suggest(slst, wspace, ns);
 690                      break;
 691                    }
 692      case HUHINITCAP:
 693                     capwords = 1;
 694      case HUHCAP: {
 695                      ns = pSMgr->suggest(slst, cw, ns);
 696                      if (ns != -1) {
 697                         int prevns;
 698                         if (captype == HUHINITCAP) {
 699                             // TheOpenOffice.org -> The OpenOffice.org
 700                             memcpy(wspace,cw,(wl+1));
 701                             mkinitsmall2(wspace, unicw, nc);
 702                             ns = pSMgr->suggest(slst, wspace, ns);
 703                         }
 704                         memcpy(wspace,cw,(wl+1));
 705                         mkallsmall2(wspace, unicw, nc);
 706                         insert_sug(slst, wspace, &ns);
 707                         prevns = ns;
 708                         ns = pSMgr->suggest(slst, wspace, ns);
 709                         if (captype == HUHINITCAP) {
 710                             mkinitcap2(wspace, unicw, nc);
 711                             insert_sug(slst, wspace, &ns);
 712                             ns = pSMgr->suggest(slst, wspace, ns);
 713                         }
 714                         // aNew -> "a New" (instead of "a new")
 715                         for (int j = prevns; j < ns; j++) {
 716                            char * space = strchr((*slst)[j],' ');
 717                            if (space) {
 718                                 int slen = strlen(space + 1);
 719                                 // different case after space (need capitalisation)
 720                                 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
 721                                     w_char w[MAXWORDLEN + 1];
 722                                     int wc = 0;
 723                                     char * r = (*slst)[j];
 724                                     if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
 725                                     mkinitcap2(space + 1, w, wc);
 726                                     // set as first suggestion
 727                                     for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
 728                                     (*slst)[0] = r;
 729                                 }
 730                            }
 731                         }
 732                      }
 733                      break;
 734                    }
 735
 736      case ALLCAP: {
 737                      memcpy(wspace, cw, (wl+1));
 738                      mkallsmall2(wspace, unicw, nc);
 739                      ns = pSMgr->suggest(slst, wspace, ns);
 740                      if (ns == -1) break;
 741                      if (pAMgr && pAMgr->get_keepcase()) insert_sug(slst, wspace, &ns);
 742                      mkinitcap2(wspace, unicw, nc);
 743                      ns = pSMgr->suggest(slst, wspace, ns);
 744                      for (int j=0; j < ns; j++) {
 745                         mkallcap((*slst)[j]);
 746                         if (pAMgr && pAMgr->get_checksharps()) {
 747                             char * pos;
 748                             if (utf8) {
 749                                 pos = strstr((*slst)[j], "脽");
 750                                 while (pos) {
 751                                     *pos = 'S';
 752                                     *(pos+1) = 'S';
 753                                     pos = strstr(pos+2, "脽");
 754                                 }
 755                             } else {
 756                                 pos = strchr((*slst)[j], '?');
 757                                 while (pos) {
 758                                     (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
 759                                     mystrrep((*slst)[j], "?", "SS");
 760                                     pos = strchr((*slst)[j], '?');
 761                                 }
 762                             }
 763                         }
 764                      }
 765                      break;
 766                    }
 767   }
 768
 769   // LANG_hu section: replace '-' with ' ' in Hungarian
 770   if (langnum == LANG_hu) {
 771       for (int j=0; j < ns; j++) {
 772           char * pos = strchr((*slst)[j],'-');
 773           if (pos) {
 774               int info;
 775               char w[MAXWORDUTF8LEN];
 776               *pos = '\0';
 777               strcpy(w, (*slst)[j]);
 778               strcat(w, pos + 1);
 779               spell(w, &info, NULL);
 780               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
 781                   *pos = ' ';
 782               } else *pos = '-';
 783           }
 784       }
 785   }
 786   // END OF LANG_hu section
 787
 788   // try ngram approach since found nothing
 789   if ((ns == 0) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
 790       ngramsugs = 1;
 791       switch(captype) {
 792           case NOCAP: {
 793               ns = pSMgr->ngsuggest(*slst, cw, pHMgr);
 794               break;
 795           }
 796           case HUHCAP: {
 797               memcpy(wspace,cw,(wl+1));
 798               mkallsmall2(wspace, unicw, nc);
 799               ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
 800               break;
 801           }
 802           case INITCAP: {
 803               capwords = 1;
 804               memcpy(wspace,cw,(wl+1));
 805               mkallsmall2(wspace, unicw, nc);
 806               ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
 807               break;
 808           }
 809           case ALLCAP: {
 810               memcpy(wspace,cw,(wl+1));
 811               mkallsmall2(wspace, unicw, nc);
 812               ns = pSMgr->ngsuggest(*slst, wspace, pHMgr);
 813               for (int j=0; j < ns; j++)
 814                   mkallcap((*slst)[j]);
 815               break;
 816          }
 817       }
 818   }
 819
 820   // word reversing wrapper for complex prefixes
 821   if (complexprefixes) {
 822     for (int j = 0; j < ns; j++) {
 823       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
 824     }
 825   }
 826
 827   // capitalize
 828   if (capwords) for (int j=0; j < ns; j++) {
 829       mkinitcap((*slst)[j]);
 830   }
 831
 832   // expand suggestions with dot(s)
 833   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
 834     for (int j = 0; j < ns; j++) {
 835       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
 836       strcat((*slst)[j], word + strlen(word) - abbv);
 837     }
 838   }
 839
 840   // suggest keepcase
 841   if (pAMgr->get_keepcase()) {
 842   switch (captype) {
 843     case INITCAP:
 844     case ALLCAP: {
 845       int l = 0;
 846       for (int j=0; j < ns; j++) {
 847         if (!spell((*slst)[j])) {
 848           char s[MAXSWUTF8L];
 849           w_char w[MAXSWL];
 850           int len;
 851           if (utf8) {
 852             len = u8_u16(w, MAXSWL, (*slst)[j]);
 853           } else {
 854             strcpy(s, (*slst)[j]);
 855             len = strlen(s);
 856           }
 857           mkallsmall2(s, w, len);
 858           free((*slst)[j]);
 859           if (spell(s)) {
 860             (*slst)[l] = mystrdup(s);
 861             l++;
 862           } else {
 863             mkinitcap2(s, w, len);
 864             if (spell(s)) {
 865               (*slst)[l] = mystrdup(s);
 866               l++;
 867             }
 868           }
 869         } else {
 870           (*slst)[l] = (*slst)[j];
 871           l++;
 872         }
 873       }
 874       ns = l;
 875     }
 876   }
 877   }
 878
 879   // remove duplications
 880   int l = 0;
 881   for (int j = 0; j < ns; j++) {
 882     (*slst)[l] = (*slst)[j];
 883     for (int k = 0; k < l; k++) {
 884       if (strcmp((*slst)[k], (*slst)[j]) == 0) {
 885         free((*slst)[j]);
 886         l--;
 887       }
 888     }
 889     l++;
 890   }
 891   return l;
 892 }
 893
 894 char * Hunspell::get_dic_encoding()
 895 {
 896   return encoding;
 897 }
 898
 899 #ifdef HUNSPELL_EXPERIMENTAL
 900 // XXX need UTF-8 support
 901 int Hunspell::suggest_auto(char*** slst, const char * word)
 902 {
 903   char cw[MAXWORDUTF8LEN + 4];
 904   char wspace[MAXWORDUTF8LEN + 4];
 905   if (! pSMgr) return 0;
 906   int wl = strlen(word);
 907   if (utf8) {
 908     if (wl >= MAXWORDUTF8LEN) return 0;
 909   } else {
 910     if (wl >= MAXWORDLEN) return 0;
 911   }
 912   int captype = 0;
 913   int abbv = 0;
 914   wl = cleanword(cw, word, &captype, &abbv);
 915   if (wl == 0) return 0;
 916   int ns = 0;
 917   *slst = NULL; // HU, nsug in pSMgr->suggest
 918
 919   switch(captype) {
 920      case NOCAP:   {
 921                      ns = pSMgr->suggest_auto(slst, cw, ns);
 922                      if (ns>0) break;
 923                      break;
 924                    }
 925
 926      case INITCAP: {
 927                      memcpy(wspace,cw,(wl+1));
 928                      mkallsmall(wspace);
 929                      ns = pSMgr->suggest_auto(slst, wspace, ns);
 930                      for (int j=0; j < ns; j++)
 931                        mkinitcap((*slst)[j]);
 932                      ns = pSMgr->suggest_auto(slst, cw, ns);
 933                      break;
 934
 935                    }
 936
 937      case HUHCAP: {
 938                      ns = pSMgr->suggest_auto(slst, cw, ns);
 939                      if (ns == 0) {
 940                         memcpy(wspace,cw,(wl+1));
 941                         mkallsmall(wspace);
 942                         ns = pSMgr->suggest_auto(slst, wspace, ns);
 943                      }
 944                      break;
 945                    }
 946
 947      case ALLCAP: {
 948                      memcpy(wspace,cw,(wl+1));
 949                      mkallsmall(wspace);
 950                      ns = pSMgr->suggest_auto(slst, wspace, ns);
 951
 952                      mkinitcap(wspace);
 953                      ns = pSMgr->suggest_auto(slst, wspace, ns);
 954
 955                      for (int j=0; j < ns; j++)
 956                        mkallcap((*slst)[j]);
 957                      break;
 958                    }
 959   }
 960
 961   // word reversing wrapper for complex prefixes
 962   if (complexprefixes) {
 963     for (int j = 0; j < ns; j++) {
 964       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
 965     }
 966   }
 967
 968   // expand suggestions with dot(s)
 969   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
 970     for (int j = 0; j < ns; j++) {
 971       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
 972       strcat((*slst)[j], word + strlen(word) - abbv);
 973     }
 974   }
 975
 976   // LANG_hu section: replace '-' with ' ' in Hungarian
 977   if (langnum == LANG_hu) {
 978       for (int j=0; j < ns; j++) {
 979           char * pos = strchr((*slst)[j],'-');
 980           if (pos) {
 981               int info;
 982               char w[MAXWORDUTF8LEN];
 983               *pos = '\0';
 984               strcpy(w, (*slst)[j]);
 985               strcat(w, pos + 1);
 986               spell(w, &info, NULL);
 987               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
 988                   *pos = ' ';
 989               } else *pos = '-';
 990           }
 991       }
 992   }
 993   // END OF LANG_hu section
 994   return ns;
 995 }
 996
 997 // XXX need UTF-8 support
 998 int Hunspell::stem(char*** slst, const char * word)
 999 {
1000   char cw[MAXWORDUTF8LEN + 4];
1001   char wspace[MAXWORDUTF8LEN + 4];
1002   if (! pSMgr) return 0;
1003   int wl = strlen(word);
1004   if (utf8) {
1005     if (wl >= MAXWORDUTF8LEN) return 0;
1006   } else {
1007     if (wl >= MAXWORDLEN) return 0;
1008   }
1009   int captype = 0;
1010   int abbv = 0;
1011   wl = cleanword(cw, word, &captype, &abbv);
1012   if (wl == 0) return 0;
1013
1014   int ns = 0;
1015
1016   *slst = NULL; // HU, nsug in pSMgr->suggest
1017
1018   switch(captype) {
1019      case HUHCAP:
1020      case NOCAP:   {
1021                      ns = pSMgr->suggest_stems(slst, cw, ns);
1022
1023                      if ((abbv) && (ns == 0)) {
1024                          memcpy(wspace,cw,wl);
1025                          *(wspace+wl) = '.';
1026                          *(wspace+wl+1) = '\0';
1027                          ns = pSMgr->suggest_stems(slst, wspace, ns);
1028                      }
1029
1030                      break;
1031                    }
1032
1033      case INITCAP: {
1034
1035                      ns = pSMgr->suggest_stems(slst, cw, ns);
1036
1037                      if (ns == 0) {
1038                         memcpy(wspace,cw,(wl+1));
1039                         mkallsmall(wspace);
1040                         ns = pSMgr->suggest_stems(slst, wspace, ns);
1041
1042                      }
1043
1044                      if ((abbv) && (ns == 0)) {
1045                          memcpy(wspace,cw,wl);
1046                          mkallsmall(wspace);
1047                          *(wspace+wl) = '.';
1048                          *(wspace+wl+1) = '\0';
1049                          ns = pSMgr->suggest_stems(slst, wspace, ns);
1050                      }
1051
1052                      break;
1053
1054                    }
1055
1056      case ALLCAP: {
1057                      ns = pSMgr->suggest_stems(slst, cw, ns);
1058                      if (ns != 0) break;
1059
1060                      memcpy(wspace,cw,(wl+1));
1061                      mkallsmall(wspace);
1062                      ns = pSMgr->suggest_stems(slst, wspace, ns);
1063
1064                      if (ns == 0) {
1065                          mkinitcap(wspace);
1066                          ns = pSMgr->suggest_stems(slst, wspace, ns);
1067                      }
1068
1069                      if ((abbv) && (ns == 0)) {
1070                          memcpy(wspace,cw,wl);
1071                          mkallsmall(wspace);
1072                          *(wspace+wl) = '.';
1073                          *(wspace+wl+1) = '\0';
1074                          ns = pSMgr->suggest_stems(slst, wspace, ns);
1075                      }
1076
1077
1078                      break;
1079                    }
1080   }
1081
1082   return ns;
1083 }
1084
1085 int Hunspell::suggest_pos_stems(char*** slst, const char * word)
1086 {
1087   char cw[MAXWORDUTF8LEN + 4];
1088   char wspace[MAXWORDUTF8LEN + 4];
1089   if (! pSMgr) return 0;
1090   int wl = strlen(word);
1091   if (utf8) {
1092     if (wl >= MAXWORDUTF8LEN) return 0;
1093   } else {
1094     if (wl >= MAXWORDLEN) return 0;
1095   }
1096   int captype = 0;
1097   int abbv = 0;
1098   wl = cleanword(cw, word, &captype, &abbv);
1099   if (wl == 0) return 0;
1100
1101   int ns = 0; // ns=0 = normalized input
1102
1103   *slst = NULL; // HU, nsug in pSMgr->suggest
1104
1105   switch(captype) {
1106      case HUHCAP:
1107      case NOCAP:   {
1108                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1109
1110                      if ((abbv) && (ns == 0)) {
1111                          memcpy(wspace,cw,wl);
1112                          *(wspace+wl) = '.';
1113                          *(wspace+wl+1) = '\0';
1114                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1115                      }
1116
1117                      break;
1118                    }
1119
1120      case INITCAP: {
1121
1122                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1123
1124                      if (ns == 0 || ((*slst)[0][0] == '#')) {
1125                         memcpy(wspace,cw,(wl+1));
1126                         mkallsmall(wspace);
1127                         ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1128                      }
1129
1130                      break;
1131
1132                    }
1133
1134      case ALLCAP: {
1135                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1136                      if (ns != 0) break;
1137
1138                      memcpy(wspace,cw,(wl+1));
1139                      mkallsmall(wspace);
1140                      ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1141
1142                      if (ns == 0) {
1143                          mkinitcap(wspace);
1144                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1145                      }
1146                      break;
1147                    }
1148   }
1149
1150   return ns;
1151 }
1152 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1153
1154 const char * Hunspell::get_wordchars()
1155 {
1156   return pAMgr->get_wordchars();
1157 }
1158
1159 unsigned short * Hunspell::get_wordchars_utf16(int * len)
1160 {
1161   return pAMgr->get_wordchars_utf16(len);
1162 }
1163
1164 void Hunspell::mkinitcap(char * p)
1165 {
1166   if (!utf8) {
1167     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1168   } else {
1169       int len;
1170       w_char u[MAXWORDLEN];
1171       len = u8_u16(u, MAXWORDLEN, p);
1172       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1173       u[0].h = (unsigned char) (i >> 8);
1174       u[0].l = (unsigned char) (i & 0x00FF);
1175       u16_u8(p, MAXWORDUTF8LEN, u, len);
1176   }
1177 }
1178
1179 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
1180 {
1181   if (!utf8) {
1182     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1183   } else if (nc > 0) {
1184       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1185       u[0].h = (unsigned char) (i >> 8);
1186       u[0].l = (unsigned char) (i & 0x00FF);
1187       u16_u8(p, MAXWORDUTF8LEN, u, nc);
1188       return strlen(p);
1189   }
1190   return nc;
1191 }
1192
1193 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
1194 {
1195   if (!utf8) {
1196     if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
1197   } else if (nc > 0) {
1198       unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
1199       u[0].h = (unsigned char) (i >> 8);
1200       u[0].l = (unsigned char) (i & 0x00FF);
1201       u16_u8(p, MAXWORDUTF8LEN, u, nc);
1202       return strlen(p);
1203   }
1204   return nc;
1205 }
1206
1207 int Hunspell::put_word(const char * word)
1208 {
1209     if (pHMgr) {
1210         return pHMgr->put_word(word, strlen(word), NULL);
1211     }
1212     return 0;
1213 }
1214
1215 int Hunspell::put_word_pattern(const char * word, const char * pattern)
1216 {
1217     if (pHMgr) {
1218         return pHMgr->put_word_pattern(word, strlen(word), pattern);
1219     }
1220     return 0;
1221 }
1222
1223 const char * Hunspell::get_version()
1224 {
1225   return pAMgr->get_version();
1226 }
1227
1228 struct cs_info * Hunspell::get_csconv()
1229 {
1230   return csconv;
1231 }
1232
1233 #ifdef HUNSPELL_EXPERIMENTAL
1234 // XXX need UTF-8 support
1235 char * Hunspell::morph(const char * word)
1236 {
1237   char cw[MAXWORDUTF8LEN + 4];
1238   char wspace[MAXWORDUTF8LEN + 4];
1239   if (! pSMgr) return 0;
1240   int wl = strlen(word);
1241   if (utf8) {
1242     if (wl >= MAXWORDUTF8LEN) return 0;
1243   } else {
1244     if (wl >= MAXWORDLEN) return 0;
1245   }
1246   int captype = 0;
1247   int abbv = 0;
1248   wl = cleanword(cw, word, &captype, &abbv);
1249   if (wl == 0) {
1250       if (abbv) {
1251           for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
1252           cw[wl] = '\0';
1253           abbv = 0;
1254       } else return 0;
1255   }
1256
1257   char result[MAXLNLEN];
1258   char * st = NULL;
1259
1260   *result = '\0';
1261
1262   int n = 0;
1263   int n2 = 0;
1264   int n3 = 0;
1265
1266   // test numbers
1267   // LANG_hu section: set dash information for suggestions
1268   if (langnum == LANG_hu) {
1269   while ((n < wl) &&
1270         (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
1271         n++;
1272         if ((cw[n] == '.') || (cw[n] == ',')) {
1273                 if (((n2 == 0) && (n > 3)) ||
1274                         ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
1275                 n2++;
1276                 n3 = n;
1277         }
1278   }
1279
1280   if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return NULL;
1281   if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='?)) && checkword(cw+n, NULL, NULL))) {
1282         strcat(result, cw);
1283         result[n - 1] = '\0';
1284         if (n == wl) {
1285                 st = pSMgr->suggest_morph(cw + n - 1);
1286                 if (st) {
1287                         strcat(result, st);
1288                         free(st);
1289                 }
1290         } else {
1291                 char sign = cw[n];
1292                 cw[n] = '\0';
1293                 st = pSMgr->suggest_morph(cw + n - 1);
1294                 if (st) {
1295                         strcat(result, st);
1296                         free(st);
1297                 }
1298                 strcat(result, "+"); // XXX SPEC. MORPHCODE
1299                 cw[n] = sign;
1300                 st = pSMgr->suggest_morph(cw + n);
1301                 if (st) {
1302                         strcat(result, st);
1303                         free(st);
1304                 }
1305         }
1306         return mystrdup(result);
1307   }
1308   }
1309   // END OF LANG_hu section
1310
1311   switch(captype) {
1312      case NOCAP:   {
1313                      st = pSMgr->suggest_morph(cw);
1314                      if (st) {
1315                         strcat(result, st);
1316                         free(st);
1317                      }
1318                                          if (abbv) {
1319                                         memcpy(wspace,cw,wl);
1320                          *(wspace+wl) = '.';
1321                          *(wspace+wl+1) = '\0';
1322                          st = pSMgr->suggest_morph(wspace);
1323                          if (st) {
1324                             if (*result) strcat(result, "\n");
1325                             strcat(result, st);
1326                             free(st);
1327                                                  }
1328                      }
1329                                          break;
1330                    }
1331      case INITCAP: {
1332                      memcpy(wspace,cw,(wl+1));
1333                      mkallsmall(wspace);
1334                      st = pSMgr->suggest_morph(wspace);
1335                      if (st) {
1336                         strcat(result, st);
1337                         free(st);
1338                      }
1339                          st = pSMgr->suggest_morph(cw);
1340                      if (st) {
1341                         if (*result) strcat(result, "\n");
1342                         strcat(result, st);
1343                         free(st);
1344                      }
1345                                          if (abbv) {
1346                                          memcpy(wspace,cw,wl);
1347                          *(wspace+wl) = '.';
1348                          *(wspace+wl+1) = '\0';
1349                          mkallsmall(wspace);
1350                          st = pSMgr->suggest_morph(wspace);
1351                          if (st) {
1352                             if (*result) strcat(result, "\n");
1353                             strcat(result, st);
1354                             free(st);
1355                                                  }
1356                          mkinitcap(wspace);
1357                          st = pSMgr->suggest_morph(wspace);
1358                          if (st) {
1359                             if (*result) strcat(result, "\n");
1360                             strcat(result, st);
1361                             free(st);
1362                                                  }
1363                      }
1364                      break;
1365                    }
1366      case HUHCAP: {
1367                      st = pSMgr->suggest_morph(cw);
1368                      if (st) {
1369                         strcat(result, st);
1370                         free(st);
1371                      }
1372 #if 0
1373                      memcpy(wspace,cw,(wl+1));
1374                      mkallsmall(wspace);
1375                      st = pSMgr->suggest_morph(wspace);
1376                      if (st) {
1377                         if (*result) strcat(result, "\n");
1378                         strcat(result, st);
1379                         free(st);
1380                      }
1381 #endif
1382                      break;
1383                  }
1384      case ALLCAP: {
1385                      memcpy(wspace,cw,(wl+1));
1386                      st = pSMgr->suggest_morph(wspace);
1387                      if (st) {
1388                         strcat(result, st);
1389                         free(st);
1390                      }
1391                      mkallsmall(wspace);
1392                      st = pSMgr->suggest_morph(wspace);
1393                      if (st) {
1394                         if (*result) strcat(result, "\n");
1395                         strcat(result, st);
1396                         free(st);
1397                      }
1398                              mkinitcap(wspace);
1399                              st = pSMgr->suggest_morph(wspace);
1400                      if (st) {
1401                         if (*result) strcat(result, "\n");
1402                         strcat(result, st);
1403                         free(st);
1404                      }
1405                                          if (abbv) {
1406                         memcpy(wspace,cw,(wl+1));
1407                         *(wspace+wl) = '.';
1408                         *(wspace+wl+1) = '\0';
1409                         if (*result) strcat(result, "\n");
1410                         st = pSMgr->suggest_morph(wspace);
1411                         if (st) {
1412                                 strcat(result, st);
1413                                 free(st);
1414                         }
1415                         mkallsmall(wspace);
1416                         st = pSMgr->suggest_morph(wspace);
1417                         if (st) {
1418                           if (*result) strcat(result, "\n");
1419                           strcat(result, st);
1420                           free(st);
1421                         }
1422                                 mkinitcap(wspace);
1423                                 st = pSMgr->suggest_morph(wspace);
1424                         if (st) {
1425                           if (*result) strcat(result, "\n");
1426                           strcat(result, st);
1427                           free(st);
1428                         }
1429                                          }
1430                      break;
1431                    }
1432   }
1433
1434   if (result && (*result)) {
1435     // word reversing wrapper for complex prefixes
1436     if (complexprefixes) {
1437       if (utf8) reverseword_utf(result); else reverseword(result);
1438     }
1439     return mystrdup(result);
1440   }
1441
1442   // compound word with dash (HU) I18n
1443   char * dash = NULL;
1444   int nresult = 0;
1445   // LANG_hu section: set dash information for suggestions
1446   if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
1447   if ((langnum == LANG_hu) && dash) {
1448       *dash='\0';
1449       // examine 2 sides of the dash
1450       if (dash[1] == '\0') { // base word ending with dash
1451         if (spell(cw)) return pSMgr->suggest_morph(cw);
1452       } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
1453         if (spell(cw) && (spell("-e"))) {
1454                         st = pSMgr->suggest_morph(cw);
1455                         if (st) {
1456                                 strcat(result, st);
1457                                 free(st);
1458                         }
1459                         strcat(result,"+"); // XXX spec. separator in MORPHCODE
1460                         st = pSMgr->suggest_morph("-e");
1461                         if (st) {
1462                                 strcat(result, st);
1463                                 free(st);
1464                         }
1465                         return mystrdup(result);
1466                 }
1467       } else {
1468       // first word ending with dash: word- XXX ???
1469         char r2 = *(dash + 1);
1470         dash[0]='-';
1471         dash[1]='\0';
1472         nresult = spell(cw);
1473         dash[1] = r2;
1474         dash[0]='\0';
1475         if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
1476                 ((dash[1] > '0') && (dash[1] < '9')))) {
1477                             st = morph(cw);
1478                             if (st) {
1479                                 strcat(result, st);
1480                                     free(st);
1481                                 strcat(result,"+"); // XXX spec. separator in MORPHCODE
1482                             }
1483                             st = morph(dash+1);
1484                             if (st) {
1485                                     strcat(result, st);
1486                                     free(st);
1487                             }
1488                             return mystrdup(result);
1489                         }
1490       }
1491       // affixed number in correct word
1492      if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
1493                         (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
1494          *dash='-';
1495          n = 1;
1496          if (*(dash - n) == '.') n++;
1497          // search first not a number character to left from dash
1498          while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
1499             n++;
1500          }
1501          if ((dash - n) < cw) n--;
1502          // numbers: valami1000000-hoz
1503          // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1504          // 56-hoz, 6-hoz
1505          for(; n >= 1; n--) {
1506             if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
1507                     strcat(result, cw);
1508                     result[dash - cw - n] = '\0';
1509                         st = pSMgr->suggest_morph(dash - n);
1510                         if (st) {
1511                         strcat(result, st);
1512                                 free(st);
1513                         }
1514                     return mystrdup(result);
1515             }
1516          }
1517      }
1518   }
1519   return NULL;
1520 }
1521
1522 // XXX need UTF-8 support
1523 char * Hunspell::morph_with_correction(const char * word)
1524 {
1525   char cw[MAXWORDUTF8LEN + 4];
1526   char wspace[MAXWORDUTF8LEN + 4];
1527   if (! pSMgr) return 0;
1528   int wl = strlen(word);
1529   if (utf8) {
1530     if (wl >= MAXWORDUTF8LEN) return 0;
1531   } else {
1532     if (wl >= MAXWORDLEN) return 0;
1533   }
1534   int captype = 0;
1535   int abbv = 0;
1536   wl = cleanword(cw, word, &captype, &abbv);
1537   if (wl == 0) return 0;
1538
1539   char result[MAXLNLEN];
1540   char * st = NULL;
1541
1542   *result = '\0';
1543
1544
1545   switch(captype) {
1546      case NOCAP:   {
1547                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1548                      if (st) {
1549                         strcat(result, st);
1550                         free(st);
1551                      }
1552                                          if (abbv) {
1553                                         memcpy(wspace,cw,wl);
1554                          *(wspace+wl) = '.';
1555                          *(wspace+wl+1) = '\0';
1556                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1557                          if (st) {
1558                             if (*result) strcat(result, "\n");
1559                             strcat(result, st);
1560                             free(st);
1561                                                  }
1562                      }
1563                                          break;
1564                    }
1565      case INITCAP: {
1566                      memcpy(wspace,cw,(wl+1));
1567                      mkallsmall(wspace);
1568                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1569                      if (st) {
1570                         strcat(result, st);
1571                         free(st);
1572                      }
1573                          st = pSMgr->suggest_morph_for_spelling_error(cw);
1574                      if (st) {
1575                         if (*result) strcat(result, "\n");
1576                         strcat(result, st);
1577                         free(st);
1578                      }
1579                                          if (abbv) {
1580                                          memcpy(wspace,cw,wl);
1581                          *(wspace+wl) = '.';
1582                          *(wspace+wl+1) = '\0';
1583                          mkallsmall(wspace);
1584                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1585                          if (st) {
1586                             if (*result) strcat(result, "\n");
1587                             strcat(result, st);
1588                             free(st);
1589                                                  }
1590                          mkinitcap(wspace);
1591                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1592                          if (st) {
1593                             if (*result) strcat(result, "\n");
1594                             strcat(result, st);
1595                             free(st);
1596                                                  }
1597                      }
1598                      break;
1599                    }
1600      case HUHCAP: {
1601                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1602                      if (st) {
1603                         strcat(result, st);
1604                         free(st);
1605                      }
1606                      memcpy(wspace,cw,(wl+1));
1607                      mkallsmall(wspace);
1608                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1609                      if (st) {
1610                         if (*result) strcat(result, "\n");
1611                         strcat(result, st);
1612                         free(st);
1613                      }
1614                      break;
1615                  }
1616      case ALLCAP: {
1617                      memcpy(wspace,cw,(wl+1));
1618                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1619                      if (st) {
1620                         strcat(result, st);
1621                         free(st);
1622                      }
1623                      mkallsmall(wspace);
1624                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1625                      if (st) {
1626                         if (*result) strcat(result, "\n");
1627                         strcat(result, st);
1628                         free(st);
1629                      }
1630                              mkinitcap(wspace);
1631                              st = pSMgr->suggest_morph_for_spelling_error(wspace);
1632                      if (st) {
1633                         if (*result) strcat(result, "\n");
1634                         strcat(result, st);
1635                         free(st);
1636                      }
1637                                          if (abbv) {
1638                         memcpy(wspace,cw,(wl+1));
1639                         *(wspace+wl) = '.';
1640                         *(wspace+wl+1) = '\0';
1641                         if (*result) strcat(result, "\n");
1642                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1643                         if (st) {
1644                                 strcat(result, st);
1645                                 free(st);
1646                         }
1647                         mkallsmall(wspace);
1648                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1649                         if (st) {
1650                           if (*result) strcat(result, "\n");
1651                           strcat(result, st);
1652                           free(st);
1653                         }
1654                                 mkinitcap(wspace);
1655                                 st = pSMgr->suggest_morph_for_spelling_error(wspace);
1656                         if (st) {
1657                           if (*result) strcat(result, "\n");
1658                           strcat(result, st);
1659                           free(st);
1660                         }
1661                                          }
1662                      break;
1663                    }
1664   }
1665
1666   if (result) return mystrdup(result);
1667   return NULL;
1668 }
1669
1670 /* analyze word
1671  * return line count
1672  * XXX need a better data structure for morphological analysis */
1673 int Hunspell::analyze(char ***out, const char *word) {
1674   int  n = 0;
1675   if (!word) return 0;
1676   char * m = morph(word);
1677   if(!m) return 0;
1678   if (!out) return line_tok(m, out);
1679
1680   // without memory allocation
1681   /* BUG missing buffer size checking */
1682   int i, p;
1683   for(p = 0, i = 0; m[i]; i++) {
1684      if(m[i] == '\n' || !m[i+1]) {
1685        n++;
1686        strncpy((*out)[n++], m + p, i - p + 1);
1687        if (m[i] == '\n') (*out)[n++][i - p] = '\0';
1688        if(!m[i+1]) break;
1689        p = i + 1;
1690      }
1691   }
1692   free(m);
1693   return n;
1694 }
1695
1696 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1697
1698 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
1699 {
1700         return (Hunhandle*)(new Hunspell(affpath, dpath));
1701 }
1702
1703 void Hunspell_destroy(Hunhandle *pHunspell)
1704 {
1705         delete (Hunspell*)(pHunspell);
1706 }
1707
1708 int Hunspell_spell(Hunhandle *pHunspell, const char *word)
1709 {
1710         return ((Hunspell*)pHunspell)->spell(word);
1711 }
1712
1713 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
1714 {
1715         return ((Hunspell*)pHunspell)->get_dic_encoding();
1716 }
1717
1718 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
1719 {
1720         return ((Hunspell*)pHunspell)->suggest(slst, word);
1721 }
1722