ext/hunspell/hunspell.cxx

   1 #include "license.hunspell"
   2 #include "license.myspell"
   3
   4 #include <stdlib.h>
   5 #include <string.h>
   6 #include <stdio.h>
   7
   8 #include "hunspell.hxx"
   9 #include "hunspell.h"
  10 #ifndef MOZILLA_CLIENT
  11 #    include "config.h"
  12 #endif
  13 #include "csutil.hxx"
  14
  15 Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
  16 {
  17     encoding = NULL;
  18     csconv = NULL;
  19     utf8 = 0;
  20     complexprefixes = 0;
  21     affixpath = mystrdup(affpath);
  22     maxdic = 0;
  23
  24     /* first set up the hash manager */
  25     pHMgr[0] = new HashMgr(dpath, affpath, key);
  26     if (pHMgr[0]) maxdic = 1;
  27
  28     /* next set up the affix manager */
  29     /* it needs access to the hash manager lookup methods */
  30     pAMgr = new AffixMgr(affpath, pHMgr, &maxdic, key);
  31
  32     /* get the preferred try string and the dictionary */
  33     /* encoding from the Affix Manager for that dictionary */
  34     char * try_string = pAMgr->get_try_string();
  35     encoding = pAMgr->get_encoding();
  36     langnum = pAMgr->get_langnum();
  37     utf8 = pAMgr->get_utf8();
  38     if (!utf8)
  39         csconv = get_current_cs(encoding);
  40     complexprefixes = pAMgr->get_complexprefixes();
  41     wordbreak = pAMgr->get_breaktable();
  42
  43     /* and finally set up the suggestion manager */
  44     pSMgr = new SuggestMgr(try_string, MAXSUGGESTION, pAMgr);
  45     if (try_string) free(try_string);
  46 }
  47
  48 Hunspell::~Hunspell()
  49 {
  50     if (pSMgr) delete pSMgr;
  51     if (pAMgr) delete pAMgr;
  52     for (int i = 0; i < maxdic; i++) delete pHMgr[i];
  53     maxdic = 0;
  54     pSMgr = NULL;
  55     pAMgr = NULL;
  56 #ifdef MOZILLA_CLIENT
  57     delete [] csconv;
  58 #endif
  59     csconv= NULL;
  60     if (encoding) free(encoding);
  61     encoding = NULL;
  62     if (affixpath) free(affixpath);
  63     affixpath = NULL;
  64 }
  65
  66 // load extra dictionaries
  67 int Hunspell::add_dic(const char * dpath, const char * key) {
  68     if (maxdic == MAXDIC || !affixpath) return 1;
  69     pHMgr[maxdic] = new HashMgr(dpath, affixpath, key);
  70     if (pHMgr[maxdic]) maxdic++; else return 1;
  71     return 0;
  72 }
  73
  74 // make a copy of src at destination while removing all leading
  75 // blanks and removing any trailing periods after recording
  76 // their presence with the abbreviation flag
  77 // also since already going through character by character,
  78 // set the capitalization type
  79 // return the length of the "cleaned" (and UTF-8 encoded) word
  80
  81 int Hunspell::cleanword2(char * dest, const char * src,
  82     w_char * dest_utf, int * nc, int * pcaptype, int * pabbrev)
  83 {
  84    unsigned char * p = (unsigned char *) dest;
  85    const unsigned char * q = (const unsigned char * ) src;
  86
  87    // first skip over any leading blanks
  88    while ((*q != '\0') && (*q == ' ')) q++;
  89
  90    // now strip off any trailing periods (recording their presence)
  91    *pabbrev = 0;
  92    int nl = strlen((const char *)q);
  93    while ((nl > 0) && (*(q+nl-1)=='.')) {
  94        nl--;
  95        (*pabbrev)++;
  96    }
  97
  98    // if no characters are left it can't be capitalized
  99    if (nl <= 0) {
 100        *pcaptype = NOCAP;
 101        *p = '\0';
 102        return 0;
 103    }
 104
 105    strncpy(dest, (char *) q, nl);
 106    *(dest + nl) = '\0';
 107    nl = strlen(dest);
 108    if (utf8) {
 109       *nc = u8_u16(dest_utf, MAXWORDLEN, dest);
 110       // don't check too long words
 111       if (*nc >= MAXWORDLEN) return 0;
 112       if (*nc == -1) { // big Unicode character (non BMP area)
 113          *pcaptype = NOCAP;
 114          return nl;
 115       }
 116      *pcaptype = get_captype_utf8(dest_utf, *nc, langnum);
 117    } else {
 118      *pcaptype = get_captype(dest, nl, csconv);
 119      *nc = nl;
 120    }
 121    return nl;
 122 }
 123
 124 int Hunspell::cleanword(char * dest, const char * src,
 125     int * pcaptype, int * pabbrev)
 126 {
 127    unsigned char * p = (unsigned char *) dest;
 128    const unsigned char * q = (const unsigned char * ) src;
 129    int firstcap = 0;
 130
 131    // first skip over any leading blanks
 132    while ((*q != '\0') && (*q == ' ')) q++;
 133
 134    // now strip off any trailing periods (recording their presence)
 135    *pabbrev = 0;
 136    int nl = strlen((const char *)q);
 137    while ((nl > 0) && (*(q+nl-1)=='.')) {
 138        nl--;
 139        (*pabbrev)++;
 140    }
 141
 142    // if no characters are left it can't be capitalized
 143    if (nl <= 0) {
 144        *pcaptype = NOCAP;
 145        *p = '\0';
 146        return 0;
 147    }
 148
 149    // now determine the capitalization type of the first nl letters
 150    int ncap = 0;
 151    int nneutral = 0;
 152    int nc = 0;
 153
 154    if (!utf8) {
 155       while (nl > 0) {
 156          nc++;
 157          if (csconv[(*q)].ccase) ncap++;
 158          if (csconv[(*q)].cupper == csconv[(*q)].clower) nneutral++;
 159          *p++ = *q++;
 160          nl--;
 161       }
 162       // remember to terminate the destination string
 163       *p = '\0';
 164       firstcap = csconv[(unsigned char)(*dest)].ccase;
 165    } else {
 166       unsigned short idx;
 167       w_char t[MAXWORDLEN];
 168       nc = u8_u16(t, MAXWORDLEN, src);
 169       for (int i = 0; i < nc; i++) {
 170          idx = (t[i].h << 8) + t[i].l;
 171          unsigned short low = unicodetolower(idx, langnum);
 172          if (idx != low) ncap++;
 173          if (unicodetoupper(idx, langnum) == low) nneutral++;
 174       }
 175       u16_u8(dest, MAXWORDUTF8LEN, t, nc);
 176       if (ncap) {
 177          idx = (t[0].h << 8) + t[0].l;
 178          firstcap = (idx != unicodetolower(idx, langnum));
 179       }
 180    }
 181
 182    // now finally set the captype
 183    if (ncap == 0) {
 184         *pcaptype = NOCAP;
 185    } else if ((ncap == 1) && firstcap) {
 186         *pcaptype = INITCAP;
 187    } else if ((ncap == nc) || ((ncap + nneutral) == nc)){
 188         *pcaptype = ALLCAP;
 189    } else if ((ncap > 1) && firstcap) {
 190         *pcaptype = HUHINITCAP;
 191    } else {
 192         *pcaptype = HUHCAP;
 193    }
 194    return strlen(dest);
 195 }
 196
 197 void Hunspell::mkallcap(char * p)
 198 {
 199   if (utf8) {
 200       w_char u[MAXWORDLEN];
 201       int nc = u8_u16(u, MAXWORDLEN, p);
 202       unsigned short idx;
 203       for (int i = 0; i < nc; i++) {
 204          idx = (u[i].h << 8) + u[i].l;
 205          if (idx != unicodetoupper(idx, langnum)) {
 206             u[i].h = (unsigned char) (unicodetoupper(idx, langnum) >> 8);
 207             u[i].l = (unsigned char) (unicodetoupper(idx, langnum) & 0x00FF);
 208          }
 209       }
 210       u16_u8(p, MAXWORDUTF8LEN, u, nc);
 211   } else {
 212     while (*p != '\0') {
 213         *p = csconv[((unsigned char) *p)].cupper;
 214         p++;
 215     }
 216   }
 217 }
 218
 219 int Hunspell::mkallcap2(char * p, w_char * u, int nc)
 220 {
 221   if (utf8) {
 222       unsigned short idx;
 223       for (int i = 0; i < nc; i++) {
 224          idx = (u[i].h << 8) + u[i].l;
 225          unsigned short up = unicodetoupper(idx, langnum);
 226          if (idx != up) {
 227             u[i].h = (unsigned char) (up >> 8);
 228             u[i].l = (unsigned char) (up & 0x00FF);
 229          }
 230       }
 231       u16_u8(p, MAXWORDUTF8LEN, u, nc);
 232       return strlen(p);
 233   } else {
 234     while (*p != '\0') {
 235         *p = csconv[((unsigned char) *p)].cupper;
 236         p++;
 237     }
 238   }
 239   return nc;
 240 }
 241
 242
 243 void Hunspell::mkallsmall(char * p)
 244 {
 245     while (*p != '\0') {
 246         *p = csconv[((unsigned char) *p)].clower;
 247         p++;
 248     }
 249 }
 250
 251 int Hunspell::mkallsmall2(char * p, w_char * u, int nc)
 252 {
 253   if (utf8) {
 254       unsigned short idx;
 255       for (int i = 0; i < nc; i++) {
 256          idx = (u[i].h << 8) + u[i].l;
 257          unsigned short low = unicodetolower(idx, langnum);
 258          if (idx != low) {
 259             u[i].h = (unsigned char) (low >> 8);
 260             u[i].l = (unsigned char) (low & 0x00FF);
 261          }
 262       }
 263       u16_u8(p, MAXWORDUTF8LEN, u, nc);
 264       return strlen(p);
 265   } else {
 266     while (*p != '\0') {
 267         *p = csconv[((unsigned char) *p)].clower;
 268         p++;
 269     }
 270   }
 271   return nc;
 272 }
 273
 274 // convert UTF-8 sharp S codes to latin 1
 275 char * Hunspell::sharps_u8_l1(char * dest, char * source) {
 276     char * p = dest;
 277     *p = *source;
 278     for (p++, source++; *(source - 1); p++, source++) {
 279         *p = *source;
 280         if (*source == '\x9F') *--p = '\xDF';
 281     }
 282     return dest;
 283 }
 284
 285 // recursive search for right ss - sharp s permutations
 286 hentry * Hunspell::spellsharps(char * base, char * pos, int n,
 287         int repnum, char * tmp, int * info, char **root) {
 288     pos = strstr(pos, "ss");
 289     if (pos && (n < MAXSHARPS)) {
 290         *pos = '\xC3';
 291         *(pos + 1) = '\x9F';
 292         hentry * h = spellsharps(base, pos + 2, n + 1, repnum + 1, tmp, info, root);
 293         if (h) return h;
 294         *pos = 's';
 295         *(pos + 1) = 's';
 296         h = spellsharps(base, pos + 2, n + 1, repnum, tmp, info, root);
 297         if (h) return h;
 298     } else if (repnum > 0) {
 299         if (utf8) return checkword(base, info, root);
 300         return checkword(sharps_u8_l1(tmp, base), info, root);
 301     }
 302     return NULL;
 303 }
 304
 305 int Hunspell::is_keepcase(const hentry * rv) {
 306     return pAMgr && rv->astr && pAMgr->get_keepcase() &&
 307         TESTAFF(rv->astr, pAMgr->get_keepcase(), rv->alen);
 308 }
 309
 310 /* insert a word to the beginning of the suggestion array and return ns */
 311 int Hunspell::insert_sug(char ***slst, char * word, int ns) {
 312     char * dup = mystrdup(word);
 313     if (!dup) return ns;
 314     if (ns == MAXSUGGESTION) {
 315         ns--;
 316         free((*slst)[ns]);
 317     }
 318     for (int k = ns; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
 319     (*slst)[0] = dup;
 320     return ns + 1;
 321 }
 322
 323 int Hunspell::spell(const char * word, int * info, char ** root)
 324 {
 325   struct hentry * rv=NULL;
 326   // need larger vector. For example, Turkish capital letter I converted a
 327   // 2-byte UTF-8 character (dotless i) by mkallsmall.
 328   char cw[MAXWORDUTF8LEN];
 329   char wspace[MAXWORDUTF8LEN];
 330   w_char unicw[MAXWORDLEN];
 331   // Hunspell supports XML input of the simplified API (see manual)
 332   if (strcmp(word, SPELL_XML) == 0) return 1;
 333   int nc = strlen(word);
 334   int wl2 = 0;
 335   if (utf8) {
 336     if (nc >= MAXWORDUTF8LEN) return 0;
 337   } else {
 338     if (nc >= MAXWORDLEN) return 0;
 339   }
 340   int captype = 0;
 341   int abbv = 0;
 342   int wl = 0;
 343
 344   // input conversion
 345   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
 346   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
 347   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
 348
 349   int info2 = 0;
 350   if (wl == 0 || maxdic == 0) return 1;
 351   if (root) *root = NULL;
 352
 353   // allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
 354   enum { NBEGIN, NNUM, NSEP };
 355   int nstate = NBEGIN;
 356   int i;
 357
 358   for (i = 0; (i < wl); i++) {
 359     if ((cw[i] <= '9') && (cw[i] >= '0')) {
 360         nstate = NNUM;
 361     } else if ((cw[i] == ',') || (cw[i] == '.') || (cw[i] == '-')) {
 362         if ((nstate == NSEP) || (i == 0)) break;
 363         nstate = NSEP;
 364     } else break;
 365   }
 366   if ((i == wl) && (nstate == NNUM)) return 1;
 367   if (!info) info = &info2; else *info = 0;
 368
 369   switch(captype) {
 370      case HUHCAP:
 371      case HUHINITCAP:
 372             *info += SPELL_ORIGCAP;
 373      case NOCAP: {
 374             rv = checkword(cw, info, root);
 375             if ((abbv) && !(rv)) {
 376                 memcpy(wspace,cw,wl);
 377                 *(wspace+wl) = '.';
 378                 *(wspace+wl+1) = '\0';
 379                 rv = checkword(wspace, info, root);
 380             }
 381             break;
 382          }
 383      case ALLCAP: {
 384             *info += SPELL_ORIGCAP;
 385             rv = checkword(cw, info, root);
 386             if (rv) break;
 387             if (abbv) {
 388                 memcpy(wspace,cw,wl);
 389                 *(wspace+wl) = '.';
 390                 *(wspace+wl+1) = '\0';
 391                 rv = checkword(wspace, info, root);
 392                 if (rv) break;
 393             }
 394             // Spec. prefix handling for Catalan, French, Italian:
 395             // prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
 396             if (pAMgr && strchr(cw, '\'')) {
 397                 wl = mkallsmall2(cw, unicw, nc);
 398                 //There are no really sane circumstances where this could fail,
 399                 //but anyway...
 400                 if (char * apostrophe = strchr(cw, '\'')) {
 401                     if (utf8) {
 402                         w_char tmpword[MAXWORDLEN];
 403                         *apostrophe = '\0';
 404                         wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
 405                         *apostrophe = '\'';
 406                         if (wl2 < nc) {
 407                             mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
 408                             rv = checkword(cw, info, root);
 409                             if (rv) break;
 410                         }
 411                     } else {
 412                         mkinitcap2(apostrophe + 1, unicw, nc);
 413                         rv = checkword(cw, info, root);
 414                         if (rv) break;
 415                     }
 416                 }
 417                 mkinitcap2(cw, unicw, nc);
 418                 rv = checkword(cw, info, root);
 419                 if (rv) break;
 420             }
 421             if (pAMgr && pAMgr->get_checksharps() && strstr(cw, "SS")) {
 422                 char tmpword[MAXWORDUTF8LEN];
 423                 wl = mkallsmall2(cw, unicw, nc);
 424                 memcpy(wspace,cw,(wl+1));
 425                 rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
 426                 if (!rv) {
 427                     wl2 = mkinitcap2(cw, unicw, nc);
 428                     rv = spellsharps(cw, cw, 0, 0, tmpword, info, root);
 429                 }
 430                 if ((abbv) && !(rv)) {
 431                     *(wspace+wl) = '.';
 432                     *(wspace+wl+1) = '\0';
 433                     rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
 434                     if (!rv) {
 435                         memcpy(wspace, cw, wl2);
 436                         *(wspace+wl2) = '.';
 437                         *(wspace+wl2+1) = '\0';
 438                         rv = spellsharps(wspace, wspace, 0, 0, tmpword, info, root);
 439                     }
 440                 }
 441                 if (rv) break;
 442             }
 443         }
 444      case INITCAP: {
 445              *info += SPELL_ORIGCAP;
 446              wl = mkallsmall2(cw, unicw, nc);
 447              memcpy(wspace,cw,(wl+1));
 448              wl2 = mkinitcap2(cw, unicw, nc);
 449              if (captype == INITCAP) *info += SPELL_INITCAP;
 450              rv = checkword(cw, info, root);
 451              if (captype == INITCAP) *info -= SPELL_INITCAP;
 452              // forbid bad capitalization
 453              // (for example, ijs -> Ijs instead of IJs in Dutch)
 454              // use explicit forms in dic: Ijs/F (F = FORBIDDENWORD flag)
 455              if (*info & SPELL_FORBIDDEN) {
 456                 rv = NULL;
 457                 break;
 458              }
 459              if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
 460              if (rv) break;
 461
 462              rv = checkword(wspace, info, root);
 463              if (abbv && !rv) {
 464
 465                  *(wspace+wl) = '.';
 466                  *(wspace+wl+1) = '\0';
 467                  rv = checkword(wspace, info, root);
 468                  if (!rv) {
 469                     memcpy(wspace, cw, wl2);
 470                     *(wspace+wl2) = '.';
 471                     *(wspace+wl2+1) = '\0';
 472                     if (captype == INITCAP) *info += SPELL_INITCAP;
 473                     rv = checkword(wspace, info, root);
 474                     if (captype == INITCAP) *info -= SPELL_INITCAP;
 475                     if (rv && is_keepcase(rv) && (captype == ALLCAP)) rv = NULL;
 476                     break;
 477                  }
 478              }
 479              if (rv && is_keepcase(rv) &&
 480                 ((captype == ALLCAP) ||
 481                    // if CHECKSHARPS: KEEPCASE words with \xDF  are allowed
 482                    // in INITCAP form, too.
 483                    !(pAMgr->get_checksharps() &&
 484                       ((utf8 && strstr(wspace, "\xC3\x9F")) ||
 485                       (!utf8 && strchr(wspace, '\xDF')))))) rv = NULL;
 486              break;
 487            }
 488   }
 489
 490   if (rv) {
 491       if (pAMgr && pAMgr->get_warn() && rv->astr &&
 492           TESTAFF(rv->astr, pAMgr->get_warn(), rv->alen)) {
 493               *info += SPELL_WARN;
 494               if (pAMgr->get_forbidwarn()) return 0;
 495               return HUNSPELL_OK_WARN;
 496       }
 497       return HUNSPELL_OK;
 498   }
 499
 500   // recursive breaking at break points
 501   if (wordbreak) {
 502     char * s;
 503     char r;
 504     int nbr = 0;
 505     wl = strlen(cw);
 506     int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
 507
 508     // calculate break points for recursion limit
 509     for (int j = 0; j < numbreak; j++) {
 510       s = cw;
 511       do {
 512         s = (char *) strstr(s, wordbreak[j]);
 513         if (s) {
 514                 nbr++;
 515                 s++;
 516         }
 517       } while (s);
 518     }
 519     if (nbr >= 10) return 0;
 520
 521     // check boundary patterns (^begin and end$)
 522     for (int j = 0; j < numbreak; j++) {
 523       int plen = strlen(wordbreak[j]);
 524       if (plen == 1 || plen > wl) continue;
 525       if (wordbreak[j][0] == '^' && strncmp(cw, wordbreak[j] + 1, plen - 1) == 0
 526         && spell(cw + plen - 1)) return 1;
 527       if (wordbreak[j][plen - 1] == '$' &&
 528         strncmp(cw + wl - plen + 1, wordbreak[j], plen - 1) == 0) {
 529             r = cw[wl - plen + 1];
 530             cw[wl - plen + 1] = '\0';
 531             if (spell(cw)) return 1;
 532             cw[wl - plen + 1] = r;
 533         }
 534     }
 535
 536     // other patterns
 537     for (int j = 0; j < numbreak; j++) {
 538       int plen = strlen(wordbreak[j]);
 539       s=(char *) strstr(cw, wordbreak[j]);
 540       if (s && (s > cw) && (s < cw + wl - plen)) {
 541         if (!spell(s + plen)) continue;
 542         r = *s;
 543         *s = '\0';
 544         // examine 2 sides of the break point
 545         if (spell(cw)) return 1;
 546         *s = r;
 547
 548         // LANG_hu: spec. dash rule
 549         if (langnum == LANG_hu && strcmp(wordbreak[j], "-") == 0) {
 550           r = s[1];
 551           s[1] = '\0';
 552           if (spell(cw)) return 1; // check the first part with dash
 553           s[1] = r;
 554         }
 555         // end of LANG speficic region
 556
 557       }
 558     }
 559   }
 560
 561   return 0;
 562 }
 563
 564 struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
 565 {
 566   struct hentry * he = NULL;
 567   int len, i;
 568   char w2[MAXWORDUTF8LEN];
 569   const char * word;
 570
 571   char * ignoredchars = pAMgr->get_ignore();
 572   if (ignoredchars != NULL) {
 573      strcpy(w2, w);
 574      if (utf8) {
 575         int ignoredchars_utf16_len;
 576         unsigned short * ignoredchars_utf16 = pAMgr->get_ignore_utf16(&ignoredchars_utf16_len);
 577         remove_ignored_chars_utf(w2, ignoredchars_utf16, ignoredchars_utf16_len);
 578      } else {
 579         remove_ignored_chars(w2,ignoredchars);
 580      }
 581      word = w2;
 582   } else word = w;
 583
 584   len = strlen(word);
 585
 586   if (!len)
 587       return NULL;
 588
 589   // word reversing wrapper for complex prefixes
 590   if (complexprefixes) {
 591     if (word != w2) {
 592       strcpy(w2, word);
 593       word = w2;
 594     }
 595     if (utf8) reverseword_utf(w2); else reverseword(w2);
 596   }
 597
 598   // look word in hash table
 599   for (i = 0; (i < maxdic) && !he; i ++) {
 600   he = (pHMgr[i])->lookup(word);
 601
 602   // check forbidden and onlyincompound words
 603   if ((he) && (he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
 604     if (info) *info += SPELL_FORBIDDEN;
 605     // LANG_hu section: set dash information for suggestions
 606     if (langnum == LANG_hu) {
 607         if (pAMgr->get_compoundflag() &&
 608             TESTAFF(he->astr, pAMgr->get_compoundflag(), he->alen)) {
 609                 if (info) *info += SPELL_COMPOUND;
 610         }
 611     }
 612     return NULL;
 613   }
 614
 615   // he = next not needaffix, onlyincompound homonym or onlyupcase word
 616   while (he && (he->astr) &&
 617     ((pAMgr->get_needaffix() && TESTAFF(he->astr, pAMgr->get_needaffix(), he->alen)) ||
 618        (pAMgr->get_onlyincompound() && TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
 619        (info && (*info & SPELL_INITCAP) && TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen))
 620     )) he = he->next_homonym;
 621   }
 622
 623   // check with affixes
 624   if (!he && pAMgr) {
 625      // try stripping off affixes */
 626      he = pAMgr->affix_check(word, len, 0);
 627
 628      // check compound restriction and onlyupcase
 629      if (he && he->astr && (
 630         (pAMgr->get_onlyincompound() &&
 631             TESTAFF(he->astr, pAMgr->get_onlyincompound(), he->alen)) ||
 632         (info && (*info & SPELL_INITCAP) &&
 633             TESTAFF(he->astr, ONLYUPCASEFLAG, he->alen)))) {
 634             he = NULL;
 635      }
 636
 637      if (he) {
 638         if ((he->astr) && (pAMgr) && TESTAFF(he->astr, pAMgr->get_forbiddenword(), he->alen)) {
 639             if (info) *info += SPELL_FORBIDDEN;
 640             return NULL;
 641         }
 642         if (root) {
 643             *root = mystrdup(he->word);
 644             if (*root && complexprefixes) {
 645                 if (utf8) reverseword_utf(*root); else reverseword(*root);
 646             }
 647         }
 648      // try check compound word
 649      } else if (pAMgr->get_compound()) {
 650           he = pAMgr->compound_check(word, len, 0, 0, 100, 0, NULL, 0, 0, info);
 651           // LANG_hu section: `moving rule' with last dash
 652           if ((!he) && (langnum == LANG_hu) && (word[len-1] == '-')) {
 653              char * dup = mystrdup(word);
 654              if (!dup) return NULL;
 655              dup[len-1] = '\0';
 656              he = pAMgr->compound_check(dup, len-1, -5, 0, 100, 0, NULL, 1, 0, info);
 657              free(dup);
 658           }
 659           // end of LANG speficic region
 660           if (he) {
 661                 if (root) {
 662                     *root = mystrdup(he->word);
 663                     if (*root && complexprefixes) {
 664                         if (utf8) reverseword_utf(*root); else reverseword(*root);
 665                     }
 666                 }
 667                 if (info) *info += SPELL_COMPOUND;
 668           }
 669      }
 670
 671   }
 672
 673   return he;
 674 }
 675
 676 int Hunspell::suggest(char*** slst, const char * word)
 677 {
 678   int onlycmpdsug = 0;
 679   char cw[MAXWORDUTF8LEN];
 680   char wspace[MAXWORDUTF8LEN];
 681   if (!pSMgr || maxdic == 0) return 0;
 682   w_char unicw[MAXWORDLEN];
 683   *slst = NULL;
 684   // process XML input of the simplified API (see manual)
 685   if (strncmp(word, SPELL_XML, sizeof(SPELL_XML) - 3) == 0) {
 686      return spellml(slst, word);
 687   }
 688   int nc = strlen(word);
 689   if (utf8) {
 690     if (nc >= MAXWORDUTF8LEN) return 0;
 691   } else {
 692     if (nc >= MAXWORDLEN) return 0;
 693   }
 694   int captype = 0;
 695   int abbv = 0;
 696   int wl = 0;
 697
 698   // input conversion
 699   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
 700   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
 701   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
 702
 703   if (wl == 0) return 0;
 704   int ns = 0;
 705   int capwords = 0;
 706
 707   // check capitalized form for FORCEUCASE
 708   if (pAMgr && captype == NOCAP && pAMgr->get_forceucase()) {
 709     int info = SPELL_ORIGCAP;
 710     char ** wlst;
 711     if (checkword(cw, &info, NULL)) {
 712         if (*slst) {
 713             wlst = *slst;
 714         } else {
 715             wlst = (char **) malloc(MAXSUGGESTION * sizeof(char *));
 716             if (wlst == NULL) return -1;
 717             *slst = wlst;
 718             for (int i = 0; i < MAXSUGGESTION; i++) {
 719                 wlst[i] = NULL;
 720             }
 721         }
 722         wlst[0] = mystrdup(cw);
 723         mkinitcap(wlst[0]);
 724         return 1;
 725     }
 726   }
 727
 728   switch(captype) {
 729      case NOCAP:   {
 730                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
 731                      break;
 732                    }
 733
 734      case INITCAP: {
 735                      capwords = 1;
 736                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
 737                      if (ns == -1) break;
 738                      memcpy(wspace,cw,(wl+1));
 739                      mkallsmall2(wspace, unicw, nc);
 740                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 741                      break;
 742                    }
 743      case HUHINITCAP:
 744                     capwords = 1;
 745      case HUHCAP: {
 746                      ns = pSMgr->suggest(slst, cw, ns, &onlycmpdsug);
 747                      if (ns != -1) {
 748                         int prevns;
 749                         // something.The -> something. The
 750                         char * dot = strchr(cw, '.');
 751                         if (dot && (dot > cw)) {
 752                             int captype_;
 753                             if (utf8) {
 754                                w_char w_[MAXWORDLEN];
 755                                int wl_ = u8_u16(w_, MAXWORDLEN, dot + 1);
 756                                captype_ = get_captype_utf8(w_, wl_, langnum);
 757                             } else captype_ = get_captype(dot+1, strlen(dot+1), csconv);
 758                             if (captype_ == INITCAP) {
 759                                 char * st = mystrdup(cw);
 760                                 if (st) st = (char *) realloc(st, wl + 2);
 761                                 if (st) {
 762                                         st[(dot - cw) + 1] = ' ';
 763                                         strcpy(st + (dot - cw) + 2, dot + 1);
 764                                         ns = insert_sug(slst, st, ns);
 765                                         free(st);
 766                                 }
 767                             }
 768                         }
 769                         if (captype == HUHINITCAP) {
 770                             // TheOpenOffice.org -> The OpenOffice.org
 771                             memcpy(wspace,cw,(wl+1));
 772                             mkinitsmall2(wspace, unicw, nc);
 773                             ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 774                         }
 775                         memcpy(wspace,cw,(wl+1));
 776                         mkallsmall2(wspace, unicw, nc);
 777                         if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
 778                         prevns = ns;
 779                         ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 780                         if (captype == HUHINITCAP) {
 781                             mkinitcap2(wspace, unicw, nc);
 782                             if (spell(wspace)) ns = insert_sug(slst, wspace, ns);
 783                             ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 784                         }
 785                         // aNew -> "a New" (instead of "a new")
 786                         for (int j = prevns; j < ns; j++) {
 787                            char * space = strchr((*slst)[j],' ');
 788                            if (space) {
 789                                 int slen = strlen(space + 1);
 790                                 // different case after space (need capitalisation)
 791                                 if ((slen < wl) && strcmp(cw + wl - slen, space + 1)) {
 792                                     w_char w[MAXWORDLEN];
 793                                     int wc = 0;
 794                                     char * r = (*slst)[j];
 795                                     if (utf8) wc = u8_u16(w, MAXWORDLEN, space + 1);
 796                                     mkinitcap2(space + 1, w, wc);
 797                                     // set as first suggestion
 798                                     for (int k = j; k > 0; k--) (*slst)[k] = (*slst)[k - 1];
 799                                     (*slst)[0] = r;
 800                                 }
 801                            }
 802                         }
 803                      }
 804                      break;
 805                    }
 806
 807      case ALLCAP: {
 808                      memcpy(wspace, cw, (wl+1));
 809                      mkallsmall2(wspace, unicw, nc);
 810                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 811                      if (ns == -1) break;
 812                      if (pAMgr && pAMgr->get_keepcase() && spell(wspace))
 813                         ns = insert_sug(slst, wspace, ns);
 814                      mkinitcap2(wspace, unicw, nc);
 815                      ns = pSMgr->suggest(slst, wspace, ns, &onlycmpdsug);
 816                      for (int j=0; j < ns; j++) {
 817                         mkallcap((*slst)[j]);
 818                         if (pAMgr && pAMgr->get_checksharps()) {
 819                             char * pos;
 820                             if (utf8) {
 821                                 pos = strstr((*slst)[j], "\xC3\x9F");
 822                                 while (pos) {
 823                                     *pos = 'S';
 824                                     *(pos+1) = 'S';
 825                                     pos = strstr(pos+2, "\xC3\x9F");
 826                                 }
 827                             } else {
 828                                 pos = strchr((*slst)[j], '\xDF');
 829                                 while (pos) {
 830                                     (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 2);
 831                                     mystrrep((*slst)[j], "\xDF", "SS");
 832                                     pos = strchr((*slst)[j], '\xDF');
 833                                 }
 834                             }
 835                         }
 836                      }
 837                      break;
 838                    }
 839   }
 840
 841  // LANG_hu section: replace '-' with ' ' in Hungarian
 842   if (langnum == LANG_hu) {
 843       for (int j=0; j < ns; j++) {
 844           char * pos = strchr((*slst)[j],'-');
 845           if (pos) {
 846               int info;
 847               char w[MAXWORDUTF8LEN];
 848               *pos = '\0';
 849               strcpy(w, (*slst)[j]);
 850               strcat(w, pos + 1);
 851               spell(w, &info, NULL);
 852               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
 853                   *pos = ' ';
 854               } else *pos = '-';
 855           }
 856       }
 857   }
 858   // END OF LANG_hu section
 859
 860   // try ngram approach since found nothing or only compound words
 861   if (pAMgr && (ns == 0 || onlycmpdsug) && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
 862       switch(captype) {
 863           case NOCAP: {
 864               ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
 865               break;
 866           }
 867           case HUHINITCAP:
 868               capwords = 1;
 869           case HUHCAP: {
 870               memcpy(wspace,cw,(wl+1));
 871               mkallsmall2(wspace, unicw, nc);
 872               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
 873               break;
 874           }
 875          case INITCAP: {
 876               capwords = 1;
 877               memcpy(wspace,cw,(wl+1));
 878               mkallsmall2(wspace, unicw, nc);
 879               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
 880               break;
 881           }
 882           case ALLCAP: {
 883               memcpy(wspace,cw,(wl+1));
 884               mkallsmall2(wspace, unicw, nc);
 885               int oldns = ns;
 886               ns = pSMgr->ngsuggest(*slst, wspace, ns, pHMgr, maxdic);
 887               for (int j = oldns; j < ns; j++)
 888                   mkallcap((*slst)[j]);
 889               break;
 890          }
 891       }
 892   }
 893
 894   // try dash suggestion (Afo-American -> Afro-American)
 895   if (char * pos = strchr(cw, '-')) {
 896      char * ppos = cw;
 897      int nodashsug = 1;
 898      char ** nlst = NULL;
 899      int nn = 0;
 900      int last = 0;
 901      if (*slst) {
 902         for (int j = 0; j < ns && nodashsug == 1; j++) {
 903            if (strchr((*slst)[j], '-')) nodashsug = 0;
 904         }
 905      }
 906      while (nodashsug && !last) {
 907         if (*pos == '\0') last = 1; else *pos = '\0';
 908         if (!spell(ppos)) {
 909           nn = suggest(&nlst, ppos);
 910           for (int j = nn - 1; j >= 0; j--) {
 911             strncpy(wspace, cw, ppos - cw);
 912             strcpy(wspace + (ppos - cw), nlst[j]);
 913             if (!last) {
 914                 strcat(wspace, "-");
 915                 strcat(wspace, pos + 1);
 916             }
 917             ns = insert_sug(slst, wspace, ns);
 918             free(nlst[j]);
 919           }
 920           if (nlst != NULL) free(nlst);
 921           nodashsug = 0;
 922         }
 923         if (!last) {
 924           *pos = '-';
 925           ppos = pos + 1;
 926           pos = strchr(ppos, '-');
 927         }
 928         if (!pos) pos = cw + strlen(cw);
 929      }
 930   }
 931
 932   // word reversing wrapper for complex prefixes
 933   if (complexprefixes) {
 934     for (int j = 0; j < ns; j++) {
 935       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
 936     }
 937   }
 938
 939   // capitalize
 940   if (capwords) for (int j=0; j < ns; j++) {
 941       mkinitcap((*slst)[j]);
 942   }
 943
 944   // expand suggestions with dot(s)
 945   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
 946     for (int j = 0; j < ns; j++) {
 947       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
 948       strcat((*slst)[j], word + strlen(word) - abbv);
 949     }
 950   }
 951
 952   // remove bad capitalized and forbidden forms
 953   if (pAMgr && (pAMgr->get_keepcase() || pAMgr->get_forbiddenword())) {
 954   switch (captype) {
 955     case INITCAP:
 956     case ALLCAP: {
 957       int l = 0;
 958       for (int j=0; j < ns; j++) {
 959         if (!strchr((*slst)[j],' ') && !spell((*slst)[j])) {
 960           char s[MAXSWUTF8L];
 961           w_char w[MAXSWL];
 962           int len;
 963           if (utf8) {
 964             len = u8_u16(w, MAXSWL, (*slst)[j]);
 965           } else {
 966             strcpy(s, (*slst)[j]);
 967             len = strlen(s);
 968           }
 969           mkallsmall2(s, w, len);
 970           free((*slst)[j]);
 971           if (spell(s)) {
 972             (*slst)[l] = mystrdup(s);
 973             if ((*slst)[l]) l++;
 974           } else {
 975             mkinitcap2(s, w, len);
 976             if (spell(s)) {
 977               (*slst)[l] = mystrdup(s);
 978               if ((*slst)[l]) l++;
 979             }
 980           }
 981         } else {
 982           (*slst)[l] = (*slst)[j];
 983           l++;
 984         }
 985       }
 986       ns = l;
 987     }
 988   }
 989   }
 990
 991   // remove duplications
 992   int l = 0;
 993   for (int j = 0; j < ns; j++) {
 994     (*slst)[l] = (*slst)[j];
 995     for (int k = 0; k < l; k++) {
 996       if (strcmp((*slst)[k], (*slst)[j]) == 0) {
 997         free((*slst)[j]);
 998         l--;
 999         break;
1000       }
1001     }
1002     l++;
1003   }
1004   ns = l;
1005
1006   // output conversion
1007   rl = (pAMgr) ? pAMgr->get_oconvtable() : NULL;
1008   for (int j = 0; rl && j < ns; j++) {
1009     if (rl->conv((*slst)[j], wspace)) {
1010       free((*slst)[j]);
1011       (*slst)[j] = mystrdup(wspace);
1012     }
1013   }
1014
1015   // if suggestions removed by nosuggest, onlyincompound parameters
1016   if (l == 0 && *slst) {
1017     free(*slst);
1018     *slst = NULL;
1019   }
1020   return l;
1021 }
1022
1023 void Hunspell::free_list(char *** slst, int n) {
1024         freelist(slst, n);
1025 }
1026
1027 char * Hunspell::get_dic_encoding()
1028 {
1029   return encoding;
1030 }
1031
1032 #ifdef HUNSPELL_EXPERIMENTAL
1033 // XXX need UTF-8 support
1034 int Hunspell::suggest_auto(char*** slst, const char * word)
1035 {
1036   char cw[MAXWORDUTF8LEN];
1037   char wspace[MAXWORDUTF8LEN];
1038   if (!pSMgr || maxdic == 0) return 0;
1039   int wl = strlen(word);
1040   if (utf8) {
1041     if (wl >= MAXWORDUTF8LEN) return 0;
1042   } else {
1043     if (wl >= MAXWORDLEN) return 0;
1044   }
1045   int captype = 0;
1046   int abbv = 0;
1047   wl = cleanword(cw, word, &captype, &abbv);
1048   if (wl == 0) return 0;
1049   int ns = 0;
1050   *slst = NULL; // HU, nsug in pSMgr->suggest
1051
1052   switch(captype) {
1053      case NOCAP:   {
1054                      ns = pSMgr->suggest_auto(slst, cw, ns);
1055                      if (ns>0) break;
1056                      break;
1057                    }
1058
1059      case INITCAP: {
1060                      memcpy(wspace,cw,(wl+1));
1061                      mkallsmall(wspace);
1062                      ns = pSMgr->suggest_auto(slst, wspace, ns);
1063                      for (int j=0; j < ns; j++)
1064                        mkinitcap((*slst)[j]);
1065                      ns = pSMgr->suggest_auto(slst, cw, ns);
1066                      break;
1067
1068                    }
1069
1070      case HUHINITCAP:
1071      case HUHCAP: {
1072                      ns = pSMgr->suggest_auto(slst, cw, ns);
1073                      if (ns == 0) {
1074                         memcpy(wspace,cw,(wl+1));
1075                         mkallsmall(wspace);
1076                         ns = pSMgr->suggest_auto(slst, wspace, ns);
1077                      }
1078                      break;
1079                    }
1080
1081      case ALLCAP: {
1082                      memcpy(wspace,cw,(wl+1));
1083                      mkallsmall(wspace);
1084                      ns = pSMgr->suggest_auto(slst, wspace, ns);
1085
1086                      mkinitcap(wspace);
1087                      ns = pSMgr->suggest_auto(slst, wspace, ns);
1088
1089                      for (int j=0; j < ns; j++)
1090                        mkallcap((*slst)[j]);
1091                      break;
1092                    }
1093   }
1094
1095   // word reversing wrapper for complex prefixes
1096   if (complexprefixes) {
1097     for (int j = 0; j < ns; j++) {
1098       if (utf8) reverseword_utf((*slst)[j]); else reverseword((*slst)[j]);
1099     }
1100   }
1101
1102   // expand suggestions with dot(s)
1103   if (abbv && pAMgr && pAMgr->get_sugswithdots()) {
1104     for (int j = 0; j < ns; j++) {
1105       (*slst)[j] = (char *) realloc((*slst)[j], strlen((*slst)[j]) + 1 + abbv);
1106       strcat((*slst)[j], word + strlen(word) - abbv);
1107     }
1108   }
1109
1110   // LANG_hu section: replace '-' with ' ' in Hungarian
1111   if (langnum == LANG_hu) {
1112       for (int j=0; j < ns; j++) {
1113           char * pos = strchr((*slst)[j],'-');
1114           if (pos) {
1115               int info;
1116               char w[MAXWORDUTF8LEN];
1117               *pos = '\0';
1118               strcpy(w, (*slst)[j]);
1119               strcat(w, pos + 1);
1120               spell(w, &info, NULL);
1121               if ((info & SPELL_COMPOUND) && (info & SPELL_FORBIDDEN)) {
1122                   *pos = ' ';
1123               } else *pos = '-';
1124           }
1125       }
1126   }
1127   // END OF LANG_hu section
1128   return ns;
1129 }
1130 #endif
1131
1132 int Hunspell::stem(char*** slst, char ** desc, int n)
1133 {
1134   char result[MAXLNLEN];
1135   char result2[MAXLNLEN];
1136   *slst = NULL;
1137   if (n == 0) return 0;
1138   *result2 = '\0';
1139   for (int i = 0; i < n; i++) {
1140     *result = '\0';
1141     // add compound word parts (except the last one)
1142     char * s = (char *) desc[i];
1143     char * part = strstr(s, MORPH_PART);
1144     if (part) {
1145         char * nextpart = strstr(part + 1, MORPH_PART);
1146         while (nextpart) {
1147             copy_field(result + strlen(result), part, MORPH_PART);
1148             part = nextpart;
1149             nextpart = strstr(part + 1, MORPH_PART);
1150         }
1151         s = part;
1152     }
1153
1154     char **pl;
1155     char tok[MAXLNLEN];
1156     strcpy(tok, s);
1157     char * alt = strstr(tok, " | ");
1158     while (alt) {
1159         alt[1] = MSEP_ALT;
1160         alt = strstr(alt, " | ");
1161     }
1162     int pln = line_tok(tok, &pl, MSEP_ALT);
1163     for (int k = 0; k < pln; k++) {
1164         // add derivational suffixes
1165         if (strstr(pl[k], MORPH_DERI_SFX)) {
1166             // remove inflectional suffixes
1167             char * is = strstr(pl[k], MORPH_INFL_SFX);
1168             if (is) *is = '\0';
1169             char * sg = pSMgr->suggest_gen(&(pl[k]), 1, pl[k]);
1170             if (sg) {
1171                 char ** gen;
1172                 int genl = line_tok(sg, &gen, MSEP_REC);
1173                 free(sg);
1174                 for (int j = 0; j < genl; j++) {
1175                     sprintf(result2 + strlen(result2), "%c%s%s",
1176                             MSEP_REC, result, gen[j]);
1177                 }
1178                 freelist(&gen, genl);
1179             }
1180         } else {
1181             sprintf(result2 + strlen(result2), "%c%s", MSEP_REC, result);
1182             if (strstr(pl[k], MORPH_SURF_PFX)) {
1183                 copy_field(result2 + strlen(result2), pl[k], MORPH_SURF_PFX);
1184             }
1185             copy_field(result2 + strlen(result2), pl[k], MORPH_STEM);
1186         }
1187     }
1188     freelist(&pl, pln);
1189   }
1190   int sln = line_tok(result2, slst, MSEP_REC);
1191   return uniqlist(*slst, sln);
1192
1193 }
1194
1195 int Hunspell::stem(char*** slst, const char * word)
1196 {
1197   char ** pl;
1198   int pln = analyze(&pl, word);
1199   int pln2 = stem(slst, pl, pln);
1200   freelist(&pl, pln);
1201   return pln2;
1202 }
1203
1204 #ifdef HUNSPELL_EXPERIMENTAL
1205 int Hunspell::suggest_pos_stems(char*** slst, const char * word)
1206 {
1207   char cw[MAXWORDUTF8LEN];
1208   char wspace[MAXWORDUTF8LEN];
1209   if (! pSMgr || maxdic == 0) return 0;
1210   int wl = strlen(word);
1211   if (utf8) {
1212     if (wl >= MAXWORDUTF8LEN) return 0;
1213   } else {
1214     if (wl >= MAXWORDLEN) return 0;
1215   }
1216   int captype = 0;
1217   int abbv = 0;
1218   wl = cleanword(cw, word, &captype, &abbv);
1219   if (wl == 0) return 0;
1220
1221   int ns = 0; // ns=0 = normalized input
1222
1223   *slst = NULL; // HU, nsug in pSMgr->suggest
1224
1225   switch(captype) {
1226      case HUHCAP:
1227      case NOCAP:   {
1228                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1229
1230                      if ((abbv) && (ns == 0)) {
1231                          memcpy(wspace,cw,wl);
1232                          *(wspace+wl) = '.';
1233                          *(wspace+wl+1) = '\0';
1234                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1235                      }
1236
1237                      break;
1238                    }
1239
1240      case INITCAP: {
1241
1242                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1243
1244                      if (ns == 0 || ((*slst)[0][0] == '#')) {
1245                         memcpy(wspace,cw,(wl+1));
1246                         mkallsmall(wspace);
1247                         ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1248                      }
1249
1250                      break;
1251
1252                    }
1253
1254      case ALLCAP: {
1255                      ns = pSMgr->suggest_pos_stems(slst, cw, ns);
1256                      if (ns != 0) break;
1257
1258                      memcpy(wspace,cw,(wl+1));
1259                      mkallsmall(wspace);
1260                      ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1261
1262                      if (ns == 0) {
1263                          mkinitcap(wspace);
1264                          ns = pSMgr->suggest_pos_stems(slst, wspace, ns);
1265                      }
1266                      break;
1267                    }
1268   }
1269
1270   return ns;
1271 }
1272 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1273
1274 const char * Hunspell::get_wordchars()
1275 {
1276   return pAMgr->get_wordchars();
1277 }
1278
1279 unsigned short * Hunspell::get_wordchars_utf16(int * len)
1280 {
1281   return pAMgr->get_wordchars_utf16(len);
1282 }
1283
1284 void Hunspell::mkinitcap(char * p)
1285 {
1286   if (!utf8) {
1287     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1288   } else {
1289       int len;
1290       w_char u[MAXWORDLEN];
1291       len = u8_u16(u, MAXWORDLEN, p);
1292       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1293       u[0].h = (unsigned char) (i >> 8);
1294       u[0].l = (unsigned char) (i & 0x00FF);
1295       u16_u8(p, MAXWORDUTF8LEN, u, len);
1296   }
1297 }
1298
1299 int Hunspell::mkinitcap2(char * p, w_char * u, int nc)
1300 {
1301   if (!utf8) {
1302     if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
1303   } else if (nc > 0) {
1304       unsigned short i = unicodetoupper((u[0].h << 8) + u[0].l, langnum);
1305       u[0].h = (unsigned char) (i >> 8);
1306       u[0].l = (unsigned char) (i & 0x00FF);
1307       u16_u8(p, MAXWORDUTF8LEN, u, nc);
1308       return strlen(p);
1309   }
1310   return nc;
1311 }
1312
1313 int Hunspell::mkinitsmall2(char * p, w_char * u, int nc)
1314 {
1315   if (!utf8) {
1316     if (*p != '\0') *p = csconv[((unsigned char)*p)].clower;
1317   } else if (nc > 0) {
1318       unsigned short i = unicodetolower((u[0].h << 8) + u[0].l, langnum);
1319       u[0].h = (unsigned char) (i >> 8);
1320       u[0].l = (unsigned char) (i & 0x00FF);
1321       u16_u8(p, MAXWORDUTF8LEN, u, nc);
1322       return strlen(p);
1323   }
1324   return nc;
1325 }
1326
1327 int Hunspell::add(const char * word)
1328 {
1329     if (pHMgr[0]) return (pHMgr[0])->add(word);
1330     return 0;
1331 }
1332
1333 int Hunspell::add_with_affix(const char * word, const char * example)
1334 {
1335     if (pHMgr[0]) return (pHMgr[0])->add_with_affix(word, example);
1336     return 0;
1337 }
1338
1339 int Hunspell::remove(const char * word)
1340 {
1341     if (pHMgr[0]) return (pHMgr[0])->remove(word);
1342     return 0;
1343 }
1344
1345 const char * Hunspell::get_version()
1346 {
1347   return pAMgr->get_version();
1348 }
1349
1350 struct cs_info * Hunspell::get_csconv()
1351 {
1352   return csconv;
1353 }
1354
1355 void Hunspell::cat_result(char * result, char * st)
1356 {
1357     if (st) {
1358         if (*result) mystrcat(result, "\n", MAXLNLEN);
1359         mystrcat(result, st, MAXLNLEN);
1360         free(st);
1361     }
1362 }
1363
1364 int Hunspell::analyze(char*** slst, const char * word)
1365 {
1366   char cw[MAXWORDUTF8LEN];
1367   char wspace[MAXWORDUTF8LEN];
1368   w_char unicw[MAXWORDLEN];
1369   int wl2 = 0;
1370   *slst = NULL;
1371   if (! pSMgr || maxdic == 0) return 0;
1372   int nc = strlen(word);
1373   if (utf8) {
1374     if (nc >= MAXWORDUTF8LEN) return 0;
1375   } else {
1376     if (nc >= MAXWORDLEN) return 0;
1377   }
1378   int captype = 0;
1379   int abbv = 0;
1380   int wl = 0;
1381
1382   // input conversion
1383   RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
1384   if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
1385   else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
1386
1387   if (wl == 0) {
1388       if (abbv) {
1389           for (wl = 0; wl < abbv; wl++) cw[wl] = '.';
1390           cw[wl] = '\0';
1391           abbv = 0;
1392       } else return 0;
1393   }
1394
1395   char result[MAXLNLEN];
1396   char * st = NULL;
1397
1398   *result = '\0';
1399
1400   int n = 0;
1401   int n2 = 0;
1402   int n3 = 0;
1403
1404   // test numbers
1405   // LANG_hu section: set dash information for suggestions
1406   if (langnum == LANG_hu) {
1407   while ((n < wl) &&
1408         (((cw[n] <= '9') && (cw[n] >= '0')) || (((cw[n] == '.') || (cw[n] == ',')) && (n > 0)))) {
1409         n++;
1410         if ((cw[n] == '.') || (cw[n] == ',')) {
1411                 if (((n2 == 0) && (n > 3)) ||
1412                         ((n2 > 0) && ((cw[n-1] == '.') || (cw[n-1] == ',')))) break;
1413                 n2++;
1414                 n3 = n;
1415         }
1416   }
1417
1418   if ((n == wl) && (n3 > 0) && (n - n3 > 3)) return 0;
1419   if ((n == wl) || ((n>0) && ((cw[n]=='%') || (cw[n]=='\xB0')) && checkword(cw+n, NULL, NULL))) {
1420         mystrcat(result, cw, MAXLNLEN);
1421         result[n - 1] = '\0';
1422         if (n == wl) cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1423         else {
1424                 char sign = cw[n];
1425                 cw[n] = '\0';
1426                 cat_result(result, pSMgr->suggest_morph(cw + n - 1));
1427                 mystrcat(result, "+", MAXLNLEN); // XXX SPEC. MORPHCODE
1428                 cw[n] = sign;
1429                 cat_result(result, pSMgr->suggest_morph(cw + n));
1430         }
1431         return line_tok(result, slst, MSEP_REC);
1432   }
1433   }
1434   // END OF LANG_hu section
1435
1436   switch(captype) {
1437      case HUHCAP:
1438      case HUHINITCAP:
1439      case NOCAP:  {
1440                     cat_result(result, pSMgr->suggest_morph(cw));
1441                     if (abbv) {
1442                         memcpy(wspace,cw,wl);
1443                         *(wspace+wl) = '.';
1444                         *(wspace+wl+1) = '\0';
1445                         cat_result(result, pSMgr->suggest_morph(wspace));
1446                     }
1447                     break;
1448                 }
1449      case INITCAP: {
1450                      wl = mkallsmall2(cw, unicw, nc);
1451                      memcpy(wspace,cw,(wl+1));
1452                      wl2 = mkinitcap2(cw, unicw, nc);
1453                      cat_result(result, pSMgr->suggest_morph(wspace));
1454                      cat_result(result, pSMgr->suggest_morph(cw));
1455                      if (abbv) {
1456                          *(wspace+wl) = '.';
1457                          *(wspace+wl+1) = '\0';
1458                          cat_result(result, pSMgr->suggest_morph(wspace));
1459
1460                          memcpy(wspace, cw, wl2);
1461                          *(wspace+wl2) = '.';
1462                          *(wspace+wl2+1) = '\0';
1463
1464                          cat_result(result, pSMgr->suggest_morph(wspace));
1465                      }
1466                      break;
1467                    }
1468      case ALLCAP: {
1469                      cat_result(result, pSMgr->suggest_morph(cw));
1470                      if (abbv) {
1471                          memcpy(wspace,cw,wl);
1472                          *(wspace+wl) = '.';
1473                          *(wspace+wl+1) = '\0';
1474                          cat_result(result, pSMgr->suggest_morph(cw));
1475                      }
1476                      wl = mkallsmall2(cw, unicw, nc);
1477                      memcpy(wspace,cw,(wl+1));
1478                      wl2 = mkinitcap2(cw, unicw, nc);
1479
1480                      cat_result(result, pSMgr->suggest_morph(wspace));
1481                      cat_result(result, pSMgr->suggest_morph(cw));
1482                      if (abbv) {
1483                          *(wspace+wl) = '.';
1484                          *(wspace+wl+1) = '\0';
1485                          cat_result(result, pSMgr->suggest_morph(wspace));
1486
1487                          memcpy(wspace, cw, wl2);
1488                          *(wspace+wl2) = '.';
1489                          *(wspace+wl2+1) = '\0';
1490
1491                          cat_result(result, pSMgr->suggest_morph(wspace));
1492                      }
1493                      break;
1494                    }
1495   }
1496
1497   if (*result) {
1498     // word reversing wrapper for complex prefixes
1499     if (complexprefixes) {
1500       if (utf8) reverseword_utf(result); else reverseword(result);
1501     }
1502     return line_tok(result, slst, MSEP_REC);
1503   }
1504
1505   // compound word with dash (HU) I18n
1506   char * dash = NULL;
1507   int nresult = 0;
1508   // LANG_hu section: set dash information for suggestions
1509   if (langnum == LANG_hu) dash = (char *) strchr(cw,'-');
1510   if ((langnum == LANG_hu) && dash) {
1511       *dash='\0';
1512       // examine 2 sides of the dash
1513       if (dash[1] == '\0') { // base word ending with dash
1514         if (spell(cw)) {
1515                 char * p = pSMgr->suggest_morph(cw);
1516                 if (p) {
1517                     int ret = line_tok(p, slst, MSEP_REC);
1518                     free(p);
1519                     return ret;
1520                 }
1521
1522         }
1523       } else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
1524         if (spell(cw) && (spell("-e"))) {
1525                         st = pSMgr->suggest_morph(cw);
1526                         if (st) {
1527                                 mystrcat(result, st, MAXLNLEN);
1528                                 free(st);
1529                         }
1530                         mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
1531                         st = pSMgr->suggest_morph("-e");
1532                         if (st) {
1533                                 mystrcat(result, st, MAXLNLEN);
1534                                 free(st);
1535                         }
1536                         return line_tok(result, slst, MSEP_REC);
1537                 }
1538       } else {
1539       // first word ending with dash: word- XXX ???
1540         char r2 = *(dash + 1);
1541         dash[0]='-';
1542         dash[1]='\0';
1543         nresult = spell(cw);
1544         dash[1] = r2;
1545         dash[0]='\0';
1546         if (nresult && spell(dash+1) && ((strlen(dash+1) > 1) ||
1547                 ((dash[1] > '0') && (dash[1] < '9')))) {
1548                             st = pSMgr->suggest_morph(cw);
1549                             if (st) {
1550                                 mystrcat(result, st, MAXLNLEN);
1551                                     free(st);
1552                                 mystrcat(result,"+", MAXLNLEN); // XXX spec. separator in MORPHCODE
1553                             }
1554                             st = pSMgr->suggest_morph(dash+1);
1555                             if (st) {
1556                                     mystrcat(result, st, MAXLNLEN);
1557                                     free(st);
1558                             }
1559                             return line_tok(result, slst, MSEP_REC);
1560                         }
1561       }
1562       // affixed number in correct word
1563      if (nresult && (dash > cw) && (((*(dash-1)<='9') &&
1564                         (*(dash-1)>='0')) || (*(dash-1)=='.'))) {
1565          *dash='-';
1566          n = 1;
1567          if (*(dash - n) == '.') n++;
1568          // search first not a number character to left from dash
1569          while (((dash - n)>=cw) && ((*(dash - n)=='0') || (n < 3)) && (n < 6)) {
1570             n++;
1571          }
1572          if ((dash - n) < cw) n--;
1573          // numbers: valami1000000-hoz
1574          // examine 100000-hoz, 10000-hoz 1000-hoz, 10-hoz,
1575          // 56-hoz, 6-hoz
1576          for(; n >= 1; n--) {
1577             if ((*(dash - n) >= '0') && (*(dash - n) <= '9') && checkword(dash - n, NULL, NULL)) {
1578                     mystrcat(result, cw, MAXLNLEN);
1579                     result[dash - cw - n] = '\0';
1580                         st = pSMgr->suggest_morph(dash - n);
1581                         if (st) {
1582                         mystrcat(result, st, MAXLNLEN);
1583                                 free(st);
1584                         }
1585                         return line_tok(result, slst, MSEP_REC);
1586             }
1587          }
1588      }
1589   }
1590   return 0;
1591 }
1592
1593 int Hunspell::generate(char*** slst, const char * word, char ** pl, int pln)
1594 {
1595   *slst = NULL;
1596   if (!pSMgr || !pln) return 0;
1597   char **pl2;
1598   int pl2n = analyze(&pl2, word);
1599   int captype = 0;
1600   int abbv = 0;
1601   char cw[MAXWORDUTF8LEN];
1602   cleanword(cw, word, &captype, &abbv);
1603   char result[MAXLNLEN];
1604   *result = '\0';
1605
1606   for (int i = 0; i < pln; i++) {
1607     cat_result(result, pSMgr->suggest_gen(pl2, pl2n, pl[i]));
1608   }
1609   freelist(&pl2, pl2n);
1610
1611   if (*result) {
1612     // allcap
1613     if (captype == ALLCAP) mkallcap(result);
1614
1615     // line split
1616     int linenum = line_tok(result, slst, MSEP_REC);
1617
1618     // capitalize
1619     if (captype == INITCAP || captype == HUHINITCAP) {
1620         for (int j=0; j < linenum; j++) mkinitcap((*slst)[j]);
1621     }
1622
1623     // temporary filtering of prefix related errors (eg.
1624     // generate("undrinkable", "eats") --> "undrinkables" and "*undrinks")
1625
1626     int r = 0;
1627     for (int j=0; j < linenum; j++) {
1628         if (!spell((*slst)[j])) {
1629             free((*slst)[j]);
1630             (*slst)[j] = NULL;
1631         } else {
1632             if (r < j) (*slst)[r] = (*slst)[j];
1633             r++;
1634         }
1635     }
1636     if (r > 0) return r;
1637     free(*slst);
1638     *slst = NULL;
1639   }
1640   return 0;
1641 }
1642
1643 int Hunspell::generate(char*** slst, const char * word, const char * pattern)
1644 {
1645   char **pl;
1646   int pln = analyze(&pl, pattern);
1647   int n = generate(slst, word, pl, pln);
1648   freelist(&pl, pln);
1649   return uniqlist(*slst, n);
1650 }
1651
1652 // minimal XML parser functions
1653 int Hunspell::get_xml_par(char * dest, const char * par, int max)
1654 {
1655    char * d = dest;
1656    if (!par) return 0;
1657    char end = *par;
1658    char * dmax = dest + max;
1659    if (end == '>') end = '<';
1660    else if (end != '\'' && end != '"') return 0; // bad XML
1661    for (par++; d < dmax && *par != '\0' && *par != end; par++, d++) *d = *par;
1662    *d = '\0';
1663    mystrrep(dest, "&lt;", "<");
1664    mystrrep(dest, "&amp;", "&");
1665    return (int)(d - dest);
1666 }
1667
1668 int Hunspell::get_langnum() const
1669 {
1670    return langnum;
1671 }
1672
1673 // return the beginning of the element (attr == NULL) or the attribute
1674 const char * Hunspell::get_xml_pos(const char * s, const char * attr)
1675 {
1676   const char * end = strchr(s, '>');
1677   const char * p = s;
1678   if (attr == NULL) return end;
1679   do {
1680     p = strstr(p, attr);
1681     if (!p || p >= end) return 0;
1682   } while (*(p-1) != ' ' &&  *(p-1) != '\n');
1683   return p + strlen(attr);
1684 }
1685
1686 int Hunspell::check_xml_par(const char * q, const char * attr, const char * value) {
1687   char cw[MAXWORDUTF8LEN];
1688   if (get_xml_par(cw, get_xml_pos(q, attr), MAXWORDUTF8LEN - 1) &&
1689     strcmp(cw, value) == 0) return 1;
1690   return 0;
1691 }
1692
1693 int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
1694     int n = 0;
1695     char * p;
1696     if (!list) return 0;
1697     for (p = list; (p = strstr(p, tag)); p++) n++;
1698     if (n == 0) return 0;
1699     *slst = (char **) malloc(sizeof(char *) * n);
1700     if (!*slst) return 0;
1701     for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
1702         int l = strlen(p);
1703         (*slst)[n] = (char *) malloc(l + 1);
1704         if (!(*slst)[n]) return n;
1705         if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
1706             free((*slst)[n]);
1707             break;
1708         }
1709     }
1710     return n;
1711 }
1712
1713 int Hunspell::spellml(char*** slst, const char * word)
1714 {
1715   char *q, *q2;
1716   char cw[MAXWORDUTF8LEN], cw2[MAXWORDUTF8LEN];
1717   q = (char *) strstr(word, "<query");
1718   if (!q) return 0; // bad XML input
1719   q2 = strchr(q, '>');
1720   if (!q2) return 0; // bad XML input
1721   q2 = strstr(q2, "<word");
1722   if (!q2) return 0; // bad XML input
1723   if (check_xml_par(q, "type=", "analyze")) {
1724       int n = 0, s = 0;
1725       if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
1726       if (n == 0) return 0;
1727       // convert the result to <code><a>ana1</a><a>ana2</a></code> format
1728       for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
1729       char * r = (char *) malloc(6 + 5 * s + 7 * n + 7 + 1); // XXX 5*s->&->&amp;
1730       if (!r) return 0;
1731       strcpy(r, "<code>");
1732       for (int i = 0; i < n; i++) {
1733         int l = strlen(r);
1734         strcpy(r + l, "<a>");
1735         strcpy(r + l + 3, (*slst)[i]);
1736         mystrrep(r + l + 3, "\t", " ");
1737         mystrrep(r + l + 3, "<", "&lt;");
1738         mystrrep(r + l + 3, "&", "&amp;");
1739         strcat(r, "</a>");
1740         free((*slst)[i]);
1741       }
1742       strcat(r, "</code>");
1743       (*slst)[0] = r;
1744       return 1;
1745   } else if (check_xml_par(q, "type=", "stem")) {
1746       if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
1747   } else if (check_xml_par(q, "type=", "generate")) {
1748       int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
1749       if (n == 0) return 0;
1750       char * q3 = strstr(q2 + 1, "<word");
1751       if (q3) {
1752         if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
1753             return generate(slst, cw, cw2);
1754         }
1755       } else {
1756         if ((q2 = strstr(q2 + 1, "<code"))) {
1757           char ** slst2;
1758           if ((n = get_xml_list(&slst2, strchr(q2, '>'), "<a>"))) {
1759             int n2 = generate(slst, cw, slst2, n);
1760             freelist(&slst2, n);
1761             return uniqlist(*slst, n2);
1762           }
1763           freelist(&slst2, n);
1764         }
1765       }
1766   }
1767   return 0;
1768 }
1769
1770
1771 #ifdef HUNSPELL_EXPERIMENTAL
1772 // XXX need UTF-8 support
1773 char * Hunspell::morph_with_correction(const char * word)
1774 {
1775   char cw[MAXWORDUTF8LEN];
1776   char wspace[MAXWORDUTF8LEN];
1777   if (! pSMgr || maxdic == 0) return NULL;
1778   int wl = strlen(word);
1779   if (utf8) {
1780     if (wl >= MAXWORDUTF8LEN) return NULL;
1781   } else {
1782     if (wl >= MAXWORDLEN) return NULL;
1783   }
1784   int captype = 0;
1785   int abbv = 0;
1786   wl = cleanword(cw, word, &captype, &abbv);
1787   if (wl == 0) return NULL;
1788
1789   char result[MAXLNLEN];
1790   char * st = NULL;
1791
1792   *result = '\0';
1793
1794
1795   switch(captype) {
1796      case NOCAP:   {
1797                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1798                      if (st) {
1799                         mystrcat(result, st, MAXLNLEN);
1800                         free(st);
1801                      }
1802                      if (abbv) {
1803                          memcpy(wspace,cw,wl);
1804                          *(wspace+wl) = '.';
1805                          *(wspace+wl+1) = '\0';
1806                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1807                          if (st) {
1808                             if (*result) mystrcat(result, "\n", MAXLNLEN);
1809                             mystrcat(result, st, MAXLNLEN);
1810                             free(st);
1811                                                  }
1812                      }
1813                                          break;
1814                    }
1815      case INITCAP: {
1816                      memcpy(wspace,cw,(wl+1));
1817                      mkallsmall(wspace);
1818                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1819                      if (st) {
1820                         mystrcat(result, st, MAXLNLEN);
1821                         free(st);
1822                      }
1823                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1824                      if (st) {
1825                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1826                         mystrcat(result, st, MAXLNLEN);
1827                         free(st);
1828                      }
1829                      if (abbv) {
1830                          memcpy(wspace,cw,wl);
1831                          *(wspace+wl) = '.';
1832                          *(wspace+wl+1) = '\0';
1833                          mkallsmall(wspace);
1834                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1835                          if (st) {
1836                             if (*result) mystrcat(result, "\n", MAXLNLEN);
1837                             mystrcat(result, st, MAXLNLEN);
1838                             free(st);
1839                          }
1840                          mkinitcap(wspace);
1841                          st = pSMgr->suggest_morph_for_spelling_error(wspace);
1842                          if (st) {
1843                             if (*result) mystrcat(result, "\n", MAXLNLEN);
1844                             mystrcat(result, st, MAXLNLEN);
1845                             free(st);
1846                          }
1847                      }
1848                      break;
1849                    }
1850      case HUHCAP: {
1851                      st = pSMgr->suggest_morph_for_spelling_error(cw);
1852                      if (st) {
1853                         mystrcat(result, st, MAXLNLEN);
1854                         free(st);
1855                      }
1856                      memcpy(wspace,cw,(wl+1));
1857                      mkallsmall(wspace);
1858                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1859                      if (st) {
1860                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1861                         mystrcat(result, st, MAXLNLEN);
1862                         free(st);
1863                      }
1864                      break;
1865                  }
1866      case ALLCAP: {
1867                      memcpy(wspace,cw,(wl+1));
1868                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1869                      if (st) {
1870                         mystrcat(result, st, MAXLNLEN);
1871                         free(st);
1872                      }
1873                      mkallsmall(wspace);
1874                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1875                      if (st) {
1876                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1877                         mystrcat(result, st, MAXLNLEN);
1878                         free(st);
1879                      }
1880                      mkinitcap(wspace);
1881                      st = pSMgr->suggest_morph_for_spelling_error(wspace);
1882                      if (st) {
1883                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1884                         mystrcat(result, st, MAXLNLEN);
1885                         free(st);
1886                      }
1887                      if (abbv) {
1888                         memcpy(wspace,cw,(wl+1));
1889                         *(wspace+wl) = '.';
1890                         *(wspace+wl+1) = '\0';
1891                         if (*result) mystrcat(result, "\n", MAXLNLEN);
1892                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1893                         if (st) {
1894                             mystrcat(result, st, MAXLNLEN);
1895                             free(st);
1896                         }
1897                         mkallsmall(wspace);
1898                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1899                         if (st) {
1900                           if (*result) mystrcat(result, "\n", MAXLNLEN);
1901                           mystrcat(result, st, MAXLNLEN);
1902                           free(st);
1903                         }
1904                         mkinitcap(wspace);
1905                         st = pSMgr->suggest_morph_for_spelling_error(wspace);
1906                         if (st) {
1907                           if (*result) mystrcat(result, "\n", MAXLNLEN);
1908                           mystrcat(result, st, MAXLNLEN);
1909                           free(st);
1910                         }
1911                      }
1912                      break;
1913                    }
1914   }
1915
1916   if (*result) return mystrdup(result);
1917   return NULL;
1918 }
1919
1920 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1921
1922 Hunhandle *Hunspell_create(const char * affpath, const char * dpath)
1923 {
1924         return (Hunhandle*)(new Hunspell(affpath, dpath));
1925 }
1926
1927 Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
1928     const char * key)
1929 {
1930         return (Hunhandle*)(new Hunspell(affpath, dpath, key));
1931 }
1932
1933 void Hunspell_destroy(Hunhandle *pHunspell)
1934 {
1935         delete (Hunspell*)(pHunspell);
1936 }
1937
1938 int Hunspell_spell(Hunhandle *pHunspell, const char *word)
1939 {
1940         return ((Hunspell*)pHunspell)->spell(word);
1941 }
1942
1943 char *Hunspell_get_dic_encoding(Hunhandle *pHunspell)
1944 {
1945         return ((Hunspell*)pHunspell)->get_dic_encoding();
1946 }
1947
1948 int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word)
1949 {
1950         return ((Hunspell*)pHunspell)->suggest(slst, word);
1951 }
1952
1953 int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word)
1954 {
1955         return ((Hunspell*)pHunspell)->analyze(slst, word);
1956 }
1957
1958 int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
1959 {
1960         return ((Hunspell*)pHunspell)->stem(slst, word);
1961 }
1962
1963 int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
1964 {
1965         return ((Hunspell*)pHunspell)->stem(slst, desc, n);
1966 }
1967
1968 int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
1969     const char * word2)
1970 {
1971         return ((Hunspell*)pHunspell)->generate(slst, word, word2);
1972 }
1973
1974 int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
1975     char** desc, int n)
1976 {
1977         return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
1978 }
1979
1980   /* functions for run-time modification of the dictionary */
1981
1982   /* add word to the run-time dictionary */
1983
1984 int Hunspell_add(Hunhandle *pHunspell, const char * word) {
1985         return ((Hunspell*)pHunspell)->add(word);
1986 }
1987
1988   /* add word to the run-time dictionary with affix flags of
1989    * the example (a dictionary word): Hunspell will recognize
1990    * affixed forms of the new word, too.
1991    */
1992
1993 int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word,
1994         const char * example) {
1995         return ((Hunspell*)pHunspell)->add_with_affix(word, example);
1996 }
1997
1998   /* remove word from the run-time dictionary */
1999
2000 int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
2001         return ((Hunspell*)pHunspell)->remove(word);
2002 }
2003
2004 void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
2005         freelist(slst, n);
2006 }