ext/hunspell/affixmgr.cxx

   1 #include "license.hunspell"
   2 #include "license.myspell"
   3
   4 #ifndef MOZILLA_CLIENT
   5 #include <cstdlib>
   6 #include <cstring>
   7 #include <cctype>
   8 #include <cstdio>
   9 #else
  10 #include <stdlib.h>
  11 #include <string.h>
  12 #include <stdio.h>
  13 #include <ctype.h>
  14 #endif
  15
  16 #include "affixmgr.hxx"
  17 #include "affentry.hxx"
  18 #include "langnum.hxx"
  19
  20 #include "csutil.hxx"
  21
  22 #ifndef MOZILLA_CLIENT
  23 #ifndef W32
  24 using namespace std;
  25 #endif
  26 #endif
  27
  28 AffixMgr::AffixMgr(const char * affpath, HashMgr* ptr)
  29 {
  30   // register hash manager and load affix data from aff file
  31   pHMgr = ptr;
  32   trystring = NULL;
  33   encoding=NULL;
  34   utf8 = 0;
  35   complexprefixes = 0;
  36   maptable = NULL;
  37   nummap = 0;
  38   breaktable = NULL;
  39   numbreak = 0;
  40   reptable = NULL;
  41   numrep = 0;
  42   checkcpdtable = NULL;
  43   numcheckcpd = 0;
  44   defcpdtable = NULL;
  45   numdefcpd = 0;
  46   compoundflag = FLAG_NULL; // permits word in compound forms
  47   compoundbegin = FLAG_NULL; // may be first word in compound forms
  48   compoundmiddle = FLAG_NULL; // may be middle word in compound forms
  49   compoundend = FLAG_NULL; // may be last word in compound forms
  50   compoundroot = FLAG_NULL; // compound word signing flag
  51   compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
  52   compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
  53   checkcompounddup = 0; // forbid double words in compounds
  54   checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
  55   checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
  56   checkcompoundtriple = 0; // forbid compounds with triple letters
  57   forbiddenword = FLAG_NULL; // forbidden word signing flag
  58   nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
  59   lang = NULL; // language
  60   langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
  61   pseudoroot = FLAG_NULL; // forbidden root, allowed only with suffixes
  62   cpdwordmax = -1; // default: unlimited wordcount in compound words
  63   cpdmin = -1;  // undefined
  64   cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
  65   cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
  66   cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
  67   cpdvowels_utf16_len=0; // vowels
  68   pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG
  69   sfxappnd=NULL; // previous suffix for counting a special syllables BUG
  70   cpdsyllablenum=NULL; // syllable count incrementing flag
  71   checknum=0; // checking numbers, and word with numbers
  72   wordchars=NULL; // letters + spec. word characters
  73   wordchars_utf16=NULL; // letters + spec. word characters
  74   wordchars_utf16_len=0; // letters + spec. word characters
  75   ignorechars=NULL; // letters + spec. word characters
  76   ignorechars_utf16=NULL; // letters + spec. word characters
  77   ignorechars_utf16_len=0; // letters + spec. word characters
  78   version=NULL; // affix and dictionary file version string
  79   havecontclass=0; // flags of possible continuing classes (double affix)
  80   // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
  81   // in morhological description in dictionary file. It's often combined with PSEUDOROOT.
  82   lemma_present = FLAG_NULL;
  83   circumfix = FLAG_NULL;
  84   onlyincompound = FLAG_NULL;
  85   flag_mode = FLAG_CHAR; // default one-character flags in affix and dic file
  86   maxngramsugs = -1; // undefined
  87   nosplitsugs = 0;
  88   sugswithdots = 0;
  89   keepcase = 0;
  90   checksharps = 0;
  91
  92   derived = NULL; // XXX not threadsafe variable for experimental stemming
  93   sfx = NULL;
  94   pfx = NULL;
  95
  96   for (int i=0; i < SETSIZE; i++) {
  97      pStart[i] = NULL;
  98      sStart[i] = NULL;
  99      pFlag[i] = NULL;
 100      sFlag[i] = NULL;
 101   }
 102
 103   for (int j=0; j < CONTSIZE; j++) {
 104     contclasses[j] = 0;
 105   }
 106
 107   if (parse_file(affpath)) {
 108      HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
 109      wordchars = mystrdup("qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM");
 110   }
 111
 112   if (cpdmin == -1) cpdmin = MINCPDLEN;
 113
 114 }
 115
 116
 117 AffixMgr::~AffixMgr()
 118 {
 119
 120   // pass through linked prefix entries and clean up
 121   for (int i=0; i < SETSIZE ;i++) {
 122        pFlag[i] = NULL;
 123        PfxEntry * ptr = (PfxEntry *)pStart[i];
 124        PfxEntry * nptr = NULL;
 125        while (ptr) {
 126             nptr = ptr->getNext();
 127             delete(ptr);
 128             ptr = nptr;
 129             nptr = NULL;
 130        }
 131   }
 132
 133   // pass through linked suffix entries and clean up
 134   for (int j=0; j < SETSIZE ; j++) {
 135        sFlag[j] = NULL;
 136        SfxEntry * ptr = (SfxEntry *)sStart[j];
 137        SfxEntry * nptr = NULL;
 138        while (ptr) {
 139             nptr = ptr->getNext();
 140             delete(ptr);
 141             ptr = nptr;
 142             nptr = NULL;
 143        }
 144        sStart[j] = NULL;
 145   }
 146
 147   if (trystring) free(trystring);
 148   trystring=NULL;
 149   if (encoding) free(encoding);
 150   encoding=NULL;
 151   if (maptable) {
 152      for (int j=0; j < nummap; j++) {
 153         if (maptable[j].set) free(maptable[j].set);
 154         if (maptable[j].set_utf16) free(maptable[j].set_utf16);
 155         maptable[j].set = NULL;
 156         maptable[j].len = 0;
 157      }
 158      free(maptable);
 159      maptable = NULL;
 160   }
 161   nummap = 0;
 162   if (breaktable) {
 163      for (int j=0; j < numbreak; j++) {
 164         if (breaktable[j]) free(breaktable[j]);
 165         breaktable[j] = NULL;
 166      }
 167      free(breaktable);
 168      breaktable = NULL;
 169   }
 170   numbreak = 0;
 171   if (reptable) {
 172      for (int j=0; j < numrep; j++) {
 173         free(reptable[j].pattern);
 174         free(reptable[j].pattern2);
 175         reptable[j].pattern = NULL;
 176         reptable[j].pattern2 = NULL;
 177      }
 178      free(reptable);
 179      reptable = NULL;
 180   }
 181   if (defcpdtable) {
 182      for (int j=0; j < numdefcpd; j++) {
 183         free(defcpdtable[j].def);
 184         defcpdtable[j].def = NULL;
 185      }
 186      free(defcpdtable);
 187      defcpdtable = NULL;
 188   }
 189   numrep = 0;
 190   if (checkcpdtable) {
 191      for (int j=0; j < numcheckcpd; j++) {
 192         free(checkcpdtable[j].pattern);
 193         free(checkcpdtable[j].pattern2);
 194         checkcpdtable[j].pattern = NULL;
 195         checkcpdtable[j].pattern2 = NULL;
 196      }
 197      free(checkcpdtable);
 198      checkcpdtable = NULL;
 199   }
 200   numcheckcpd = 0;
 201   FREE_FLAG(compoundflag);
 202   FREE_FLAG(compoundbegin);
 203   FREE_FLAG(compoundmiddle);
 204   FREE_FLAG(compoundend);
 205   FREE_FLAG(compoundpermitflag);
 206   FREE_FLAG(compoundforbidflag);
 207   FREE_FLAG(compoundroot);
 208   FREE_FLAG(forbiddenword);
 209   FREE_FLAG(nosuggest);
 210   FREE_FLAG(pseudoroot);
 211   FREE_FLAG(lemma_present);
 212   FREE_FLAG(circumfix);
 213   FREE_FLAG(onlyincompound);
 214
 215   cpdwordmax = 0;
 216   pHMgr = NULL;
 217   cpdmin = 0;
 218   cpdmaxsyllable = 0;
 219   if (cpdvowels) free(cpdvowels);
 220   if (cpdvowels_utf16) free(cpdvowels_utf16);
 221   if (cpdsyllablenum) free(cpdsyllablenum);
 222   free_utf_tbl();
 223   if (lang) free(lang);
 224   if (wordchars) free(wordchars);
 225   if (wordchars_utf16) free(wordchars_utf16);
 226   if (ignorechars) free(ignorechars);
 227   if (ignorechars_utf16) free(ignorechars_utf16);
 228   if (version) free(version);
 229   if (derived) free(derived);
 230   checknum=0;
 231 }
 232
 233
 234 // read in aff file and build up prefix and suffix entry objects
 235 int  AffixMgr::parse_file(const char * affpath)
 236 {
 237
 238   // io buffers
 239   char line[MAXLNLEN+1];
 240
 241   // affix type
 242   char ft;
 243
 244   // checking flag duplication
 245   char dupflags[CONTSIZE];
 246   char dupflags_ini = 1;
 247
 248   // first line indicator for removing byte order mark
 249   int firstline = 1;
 250
 251   // open the affix file
 252   FILE * afflst;
 253   afflst = fopen(affpath,"r");
 254   if (!afflst) {
 255     HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
 256     return 1;
 257   }
 258
 259   // step one is to parse the affix file building up the internal
 260   // affix data structures
 261
 262
 263     // read in each line ignoring any that do not
 264     // start with a known line type indicator
 265     while (fgets(line,MAXLNLEN,afflst)) {
 266        mychomp(line);
 267
 268        /* remove byte order mark */
 269        if (firstline) {
 270          firstline = 0;
 271          if (strncmp(line,"",3) == 0) {
 272             memmove(line, line+3, strlen(line+3)+1);
 273             HUNSPELL_WARNING(stderr, "warning: affix file begins with byte order mark: possible incompatibility with old Hunspell versions\n");
 274          }
 275        }
 276
 277        /* parse in the try string */
 278        if (strncmp(line,"TRY",3) == 0) {
 279           if (parse_string(line, &trystring, "TRY")) {
 280              fclose(afflst);
 281              return 1;
 282           }
 283        }
 284
 285        /* parse in the name of the character set used by the .dict and .aff */
 286        if (strncmp(line,"SET",3) == 0) {
 287           if (parse_string(line, &encoding, "SET")) {
 288              fclose(afflst);
 289              return 1;
 290           }
 291           if (strcmp(encoding, "UTF-8") == 0) {
 292              utf8 = 1;
 293 #ifndef OPENOFFICEORG
 294 #ifndef MOZILLA_CLIENT
 295              if (initialize_utf_tbl()) return 1;
 296 #endif
 297 #endif
 298           }
 299        }
 300
 301        /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left writing system */
 302        if (strncmp(line,"COMPLEXPREFIXES",15) == 0)
 303                    complexprefixes = 1;
 304
 305        /* parse in the flag used by the controlled compound words */
 306        if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
 307           if (parse_flag(line, &compoundflag, "COMPOUNDFLAG")) {
 308              fclose(afflst);
 309              return 1;
 310           }
 311        }
 312
 313        /* parse in the flag used by compound words */
 314        if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
 315           if (complexprefixes) {
 316             if (parse_flag(line, &compoundend, "COMPOUNDBEGIN")) {
 317               fclose(afflst);
 318               return 1;
 319             }
 320           } else {
 321             if (parse_flag(line, &compoundbegin, "COMPOUNDBEGIN")) {
 322               fclose(afflst);
 323               return 1;
 324             }
 325           }
 326        }
 327
 328        /* parse in the flag used by compound words */
 329        if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
 330           if (parse_flag(line, &compoundmiddle, "COMPOUNDMIDDLE")) {
 331              fclose(afflst);
 332              return 1;
 333           }
 334        }
 335        /* parse in the flag used by compound words */
 336        if (strncmp(line,"COMPOUNDEND",11) == 0) {
 337           if (complexprefixes) {
 338             if (parse_flag(line, &compoundbegin, "COMPOUNDEND")) {
 339               fclose(afflst);
 340               return 1;
 341             }
 342           } else {
 343             if (parse_flag(line, &compoundend, "COMPOUNDEND")) {
 344               fclose(afflst);
 345               return 1;
 346             }
 347           }
 348        }
 349
 350        /* parse in the data used by compound_check() method */
 351        if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
 352           if (parse_num(line, &cpdwordmax, "COMPOUNDWORDMAX")) {
 353              fclose(afflst);
 354              return 1;
 355           }
 356        }
 357
 358        /* parse in the flag sign compounds in dictionary */
 359        if (strncmp(line,"COMPOUNDROOT",12) == 0) {
 360           if (parse_flag(line, &compoundroot, "COMPOUNDROOT")) {
 361              fclose(afflst);
 362              return 1;
 363           }
 364        }
 365
 366        /* parse in the flag used by compound_check() method */
 367        if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
 368           if (parse_flag(line, &compoundpermitflag, "COMPOUNDPERMITFLAG")) {
 369              fclose(afflst);
 370              return 1;
 371           }
 372        }
 373
 374        /* parse in the flag used by compound_check() method */
 375        if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
 376           if (parse_flag(line, &compoundforbidflag, "COMPOUNDFORBIDFLAG")) {
 377              fclose(afflst);
 378              return 1;
 379           }
 380        }
 381
 382        if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
 383                    checkcompounddup = 1;
 384        }
 385
 386        if (strncmp(line,"CHECKCOMPOUNDREP",16) == 0) {
 387                    checkcompoundrep = 1;
 388        }
 389
 390        if (strncmp(line,"CHECKCOMPOUNDTRIPLE",19) == 0) {
 391                    checkcompoundtriple = 1;
 392        }
 393
 394        if (strncmp(line,"CHECKCOMPOUNDCASE",17) == 0) {
 395                    checkcompoundcase = 1;
 396        }
 397
 398        if (strncmp(line,"NOSUGGEST",9) == 0) {
 399           if (parse_flag(line, &nosuggest, "NOSUGGEST")) {
 400              fclose(afflst);
 401              return 1;
 402           }
 403        }
 404
 405        /* parse in the flag used by forbidden words */
 406        if (strncmp(line,"FORBIDDENWORD",13) == 0) {
 407           if (parse_flag(line, &forbiddenword, "FORBIDDENWORD")) {
 408              fclose(afflst);
 409              return 1;
 410           }
 411        }
 412
 413        /* parse in the flag used by forbidden words */
 414        if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
 415           if (parse_flag(line, &lemma_present, "LEMMA_PRESENT")) {
 416              fclose(afflst);
 417              return 1;
 418           }
 419        }
 420
 421        /* parse in the flag used by circumfixes */
 422        if (strncmp(line,"CIRCUMFIX",9) == 0) {
 423           if (parse_flag(line, &circumfix, "CIRCUMFIX")) {
 424              fclose(afflst);
 425              return 1;
 426           }
 427        }
 428
 429        /* parse in the flag used by fogemorphemes */
 430        if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
 431           if (parse_flag(line, &onlyincompound, "ONLYINCOMPOUND")) {
 432              fclose(afflst);
 433              return 1;
 434           }
 435        }
 436
 437        /* parse in the flag used by `pseudoroots' */
 438        if (strncmp(line,"PSEUDOROOT",10) == 0) {
 439           if (parse_flag(line, &pseudoroot, "PSEUDOROOT")) {
 440              fclose(afflst);
 441              return 1;
 442           }
 443        }
 444
 445        /* parse in the flag used by `pseudoroots' */
 446        if (strncmp(line,"NEEDAFFIX",9) == 0) {
 447           if (parse_flag(line, &pseudoroot, "NEEDAFFIX")) {
 448              fclose(afflst);
 449              return 1;
 450           }
 451        }
 452
 453        /* parse in the minimal length for words in compounds */
 454        if (strncmp(line,"COMPOUNDMIN",11) == 0) {
 455           if (parse_num(line, &cpdmin, "COMPOUNDMIN")) {
 456              fclose(afflst);
 457              return 1;
 458           }
 459           if (cpdmin < 1) cpdmin = 1;
 460        }
 461
 462        /* parse in the max. words and syllables in compounds */
 463        if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
 464           if (parse_cpdsyllable(line)) {
 465              fclose(afflst);
 466              return 1;
 467           }
 468        }
 469
 470        /* parse in the flag used by compound_check() method */
 471        if (strncmp(line,"SYLLABLENUM",11) == 0) {
 472           if (parse_string(line, &cpdsyllablenum, "SYLLABLENUM")) {
 473              fclose(afflst);
 474              return 1;
 475           }
 476        }
 477
 478        /* parse in the flag used by the controlled compound words */
 479        if (strncmp(line,"CHECKNUM",8) == 0) {
 480            checknum=1;
 481        }
 482
 483        /* parse in the extra word characters */
 484        if (strncmp(line,"WORDCHARS",9) == 0) {
 485           if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, "WORDCHARS", utf8)) {
 486              fclose(afflst);
 487              return 1;
 488           }
 489        }
 490
 491        /* parse in the ignored characters (for example, Arabic optional diacretics charachters */
 492        if (strncmp(line,"IGNORE",6) == 0) {
 493           if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, "IGNORE", utf8)) {
 494              fclose(afflst);
 495              return 1;
 496           }
 497        }
 498
 499        /* parse in the typical fault correcting table */
 500        if (strncmp(line,"REP",3) == 0) {
 501           if (parse_reptable(line, afflst)) {
 502              fclose(afflst);
 503              return 1;
 504           }
 505        }
 506
 507        /* parse in the checkcompoundpattern table */
 508        if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
 509           if (parse_checkcpdtable(line, afflst)) {
 510              fclose(afflst);
 511              return 1;
 512           }
 513        }
 514
 515        /* parse in the defcompound table */
 516        if (strncmp(line,"COMPOUNDRULE",12) == 0) {
 517           if (parse_defcpdtable(line, afflst)) {
 518              fclose(afflst);
 519              return 1;
 520           }
 521        }
 522
 523        /* parse in the related character map table */
 524        if (strncmp(line,"MAP",3) == 0) {
 525           if (parse_maptable(line, afflst)) {
 526              fclose(afflst);
 527              return 1;
 528           }
 529        }
 530
 531        /* parse in the word breakpoints table */
 532        if (strncmp(line,"BREAK",5) == 0) {
 533           if (parse_breaktable(line, afflst)) {
 534              fclose(afflst);
 535              return 1;
 536           }
 537        }
 538
 539        /* parse in the language for language specific codes */
 540        if (strncmp(line,"LANG",4) == 0) {
 541           if (parse_string(line, &lang, "LANG")) {
 542              fclose(afflst);
 543              return 1;
 544           }
 545           langnum = get_lang_num(lang);
 546        }
 547
 548        if (strncmp(line,"VERSION",7) == 0) {
 549           if (parse_string(line, &version, "VERSION")) {
 550              fclose(afflst);
 551              return 1;
 552           }
 553        }
 554
 555        if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
 556           if (parse_num(line, &maxngramsugs, "MAXNGRAMSUGS")) {
 557              fclose(afflst);
 558              return 1;
 559           }
 560        }
 561
 562        if (strncmp(line,"NOSPLITSUGS",11) == 0) {
 563                    nosplitsugs=1;
 564        }
 565
 566        if (strncmp(line,"SUGSWITHDOTS",12) == 0) {
 567                    sugswithdots=1;
 568        }
 569
 570        /* parse in the flag used by forbidden words */
 571        if (strncmp(line,"KEEPCASE",8) == 0) {
 572           if (parse_flag(line, &keepcase, "KEEPCASE")) {
 573              fclose(afflst);
 574              return 1;
 575           }
 576        }
 577
 578        if (strncmp(line,"CHECKSHARPS",11) == 0) {
 579                    checksharps=1;
 580        }
 581
 582        /* parse this affix: P - prefix, S - suffix */
 583        ft = ' ';
 584        if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
 585        if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
 586        if (ft != ' ') {
 587           if (dupflags_ini) {
 588             for (int i = 0; i < CONTSIZE; i++) dupflags[i] = 0;
 589             dupflags_ini = 0;
 590           }
 591           if (parse_affix(line, ft, afflst, dupflags)) {
 592              fclose(afflst);
 593              process_pfx_tree_to_list();
 594              process_sfx_tree_to_list();
 595              return 1;
 596           }
 597        }
 598
 599     }
 600     fclose(afflst);
 601
 602     // convert affix trees to sorted list
 603     process_pfx_tree_to_list();
 604     process_sfx_tree_to_list();
 605
 606     // now we can speed up performance greatly taking advantage of the
 607     // relationship between the affixes and the idea of "subsets".
 608
 609     // View each prefix as a potential leading subset of another and view
 610     // each suffix (reversed) as a potential trailing subset of another.
 611
 612     // To illustrate this relationship if we know the prefix "ab" is found in the
 613     // word to examine, only prefixes that "ab" is a leading subset of need be examined.
 614     // Furthermore is "ab" is not present then none of the prefixes that "ab" is
 615     // is a subset need be examined.
 616     // The same argument goes for suffix string that are reversed.
 617
 618     // Then to top this off why not examine the first char of the word to quickly
 619     // limit the set of prefixes to examine (i.e. the prefixes to examine must
 620     // be leading supersets of the first character of the word (if they exist)
 621
 622     // To take advantage of this "subset" relationship, we need to add two links
 623     // from entry.  One to take next if the current prefix is found (call it nexteq)
 624     // and one to take next if the current prefix is not found (call it nextne).
 625
 626     // Since we have built ordered lists, all that remains is to properly intialize
 627     // the nextne and nexteq pointers that relate them
 628
 629     process_pfx_order();
 630     process_sfx_order();
 631
 632     // expand wordchars string, based on csutil (for external tokenization)
 633
 634     char * enc = get_encoding();
 635     csconv = get_current_cs(enc);
 636     free(enc);
 637     enc = NULL;
 638
 639     char expw[MAXLNLEN];
 640     if (wordchars) {
 641         strcpy(expw, wordchars);
 642         free(wordchars);
 643     } else *expw = '\0';
 644
 645     for (int i = 0; i <= 255; i++) {
 646         if ( (csconv[i].cupper != csconv[i].clower) &&
 647             (! strchr(expw, (char) i))) {
 648                 *(expw + strlen(expw) + 1) = '\0';
 649                 *(expw + strlen(expw)) = (char) i;
 650         }
 651     }
 652
 653     wordchars = mystrdup(expw);
 654
 655     // temporary BREAK definition for German dash handling (OOo issue 64400)
 656     if ((langnum == LANG_de) && (!breaktable)) {
 657         breaktable = (char **) malloc(sizeof(char *));
 658         if (!breaktable) return 1;
 659         breaktable[0] = mystrdup("-");
 660         numbreak = 1;
 661     }
 662     return 0;
 663 }
 664
 665
 666 // we want to be able to quickly access prefix information
 667 // both by prefix flag, and sorted by prefix string itself
 668 // so we need to set up two indexes
 669
 670 int AffixMgr::build_pfxtree(AffEntry* pfxptr)
 671 {
 672   PfxEntry * ptr;
 673   PfxEntry * pptr;
 674   PfxEntry * ep = (PfxEntry*) pfxptr;
 675
 676   // get the right starting points
 677   const char * key = ep->getKey();
 678   const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
 679
 680   // first index by flag which must exist
 681   ptr = (PfxEntry*)pFlag[flg];
 682   ep->setFlgNxt(ptr);
 683   pFlag[flg] = (AffEntry *) ep;
 684
 685
 686   // handle the special case of null affix string
 687   if (strlen(key) == 0) {
 688     // always inset them at head of list at element 0
 689      ptr = (PfxEntry*)pStart[0];
 690      ep->setNext(ptr);
 691      pStart[0] = (AffEntry*)ep;
 692      return 0;
 693   }
 694
 695   // now handle the normal case
 696   ep->setNextEQ(NULL);
 697   ep->setNextNE(NULL);
 698
 699   unsigned char sp = *((const unsigned char *)key);
 700   ptr = (PfxEntry*)pStart[sp];
 701
 702   // handle the first insert
 703   if (!ptr) {
 704      pStart[sp] = (AffEntry*)ep;
 705      return 0;
 706   }
 707
 708
 709   // otherwise use binary tree insertion so that a sorted
 710   // list can easily be generated later
 711   pptr = NULL;
 712   for (;;) {
 713     pptr = ptr;
 714     if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) {
 715        ptr = ptr->getNextEQ();
 716        if (!ptr) {
 717           pptr->setNextEQ(ep);
 718           break;
 719        }
 720     } else {
 721        ptr = ptr->getNextNE();
 722        if (!ptr) {
 723           pptr->setNextNE(ep);
 724           break;
 725        }
 726     }
 727   }
 728   return 0;
 729 }
 730
 731 // we want to be able to quickly access suffix information
 732 // both by suffix flag, and sorted by the reverse of the
 733 // suffix string itself; so we need to set up two indexes
 734 int AffixMgr::build_sfxtree(AffEntry* sfxptr)
 735 {
 736   SfxEntry * ptr;
 737   SfxEntry * pptr;
 738   SfxEntry * ep = (SfxEntry *) sfxptr;
 739
 740   /* get the right starting point */
 741   const char * key = ep->getKey();
 742   const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
 743
 744   // first index by flag which must exist
 745   ptr = (SfxEntry*)sFlag[flg];
 746   ep->setFlgNxt(ptr);
 747   sFlag[flg] = (AffEntry *) ep;
 748
 749   // next index by affix string
 750
 751   // handle the special case of null affix string
 752   if (strlen(key) == 0) {
 753     // always inset them at head of list at element 0
 754      ptr = (SfxEntry*)sStart[0];
 755      ep->setNext(ptr);
 756      sStart[0] = (AffEntry*)ep;
 757      return 0;
 758   }
 759
 760   // now handle the normal case
 761   ep->setNextEQ(NULL);
 762   ep->setNextNE(NULL);
 763
 764   unsigned char sp = *((const unsigned char *)key);
 765   ptr = (SfxEntry*)sStart[sp];
 766
 767   // handle the first insert
 768   if (!ptr) {
 769      sStart[sp] = (AffEntry*)ep;
 770      return 0;
 771   }
 772
 773   // otherwise use binary tree insertion so that a sorted
 774   // list can easily be generated later
 775   pptr = NULL;
 776   for (;;) {
 777     pptr = ptr;
 778     if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) {
 779        ptr = ptr->getNextEQ();
 780        if (!ptr) {
 781           pptr->setNextEQ(ep);
 782           break;
 783        }
 784     } else {
 785        ptr = ptr->getNextNE();
 786        if (!ptr) {
 787           pptr->setNextNE(ep);
 788           break;
 789        }
 790     }
 791   }
 792   return 0;
 793 }
 794
 795 // convert from binary tree to sorted list
 796 int AffixMgr::process_pfx_tree_to_list()
 797 {
 798   for (int i=1; i< SETSIZE; i++) {
 799     pStart[i] = process_pfx_in_order(pStart[i],NULL);
 800   }
 801   return 0;
 802 }
 803
 804
 805 AffEntry* AffixMgr::process_pfx_in_order(AffEntry* ptr, AffEntry* nptr)
 806 {
 807   if (ptr) {
 808     nptr = process_pfx_in_order(((PfxEntry*) ptr)->getNextNE(), nptr);
 809     ((PfxEntry*) ptr)->setNext((PfxEntry*) nptr);
 810     nptr = process_pfx_in_order(((PfxEntry*) ptr)->getNextEQ(), ptr);
 811   }
 812   return nptr;
 813 }
 814
 815
 816 // convert from binary tree to sorted list
 817 int AffixMgr:: process_sfx_tree_to_list()
 818 {
 819   for (int i=1; i< SETSIZE; i++) {
 820     sStart[i] = process_sfx_in_order(sStart[i],NULL);
 821   }
 822   return 0;
 823 }
 824
 825 AffEntry* AffixMgr::process_sfx_in_order(AffEntry* ptr, AffEntry* nptr)
 826 {
 827   if (ptr) {
 828     nptr = process_sfx_in_order(((SfxEntry*) ptr)->getNextNE(), nptr);
 829     ((SfxEntry*) ptr)->setNext((SfxEntry*) nptr);
 830     nptr = process_sfx_in_order(((SfxEntry*) ptr)->getNextEQ(), ptr);
 831   }
 832   return nptr;
 833 }
 834
 835
 836 // reinitialize the PfxEntry links NextEQ and NextNE to speed searching
 837 // using the idea of leading subsets this time
 838 int AffixMgr::process_pfx_order()
 839 {
 840     PfxEntry* ptr;
 841
 842     // loop through each prefix list starting point
 843     for (int i=1; i < SETSIZE; i++) {
 844
 845          ptr = (PfxEntry*)pStart[i];
 846
 847          // look through the remainder of the list
 848          //  and find next entry with affix that
 849          // the current one is not a subset of
 850          // mark that as destination for NextNE
 851          // use next in list that you are a subset
 852          // of as NextEQ
 853
 854          for (; ptr != NULL; ptr = ptr->getNext()) {
 855
 856              PfxEntry * nptr = ptr->getNext();
 857              for (; nptr != NULL; nptr = nptr->getNext()) {
 858                  if (! isSubset( ptr->getKey() , nptr->getKey() )) break;
 859              }
 860              ptr->setNextNE(nptr);
 861              ptr->setNextEQ(NULL);
 862              if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey()))
 863                  ptr->setNextEQ(ptr->getNext());
 864          }
 865
 866          // now clean up by adding smart search termination strings:
 867          // if you are already a superset of the previous prefix
 868          // but not a subset of the next, search can end here
 869          // so set NextNE properly
 870
 871          ptr = (PfxEntry *) pStart[i];
 872          for (; ptr != NULL; ptr = ptr->getNext()) {
 873              PfxEntry * nptr = ptr->getNext();
 874              PfxEntry * mptr = NULL;
 875              for (; nptr != NULL; nptr = nptr->getNext()) {
 876                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
 877                  mptr = nptr;
 878              }
 879              if (mptr) mptr->setNextNE(NULL);
 880          }
 881     }
 882     return 0;
 883 }
 884
 885 // initialize the SfxEntry links NextEQ and NextNE to speed searching
 886 // using the idea of leading subsets this time
 887 int AffixMgr::process_sfx_order()
 888 {
 889     SfxEntry* ptr;
 890
 891     // loop through each prefix list starting point
 892     for (int i=1; i < SETSIZE; i++) {
 893
 894          ptr = (SfxEntry *) sStart[i];
 895
 896          // look through the remainder of the list
 897          //  and find next entry with affix that
 898          // the current one is not a subset of
 899          // mark that as destination for NextNE
 900          // use next in list that you are a subset
 901          // of as NextEQ
 902
 903          for (; ptr != NULL; ptr = ptr->getNext()) {
 904              SfxEntry * nptr = ptr->getNext();
 905              for (; nptr != NULL; nptr = nptr->getNext()) {
 906                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
 907              }
 908              ptr->setNextNE(nptr);
 909              ptr->setNextEQ(NULL);
 910              if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey()))
 911                  ptr->setNextEQ(ptr->getNext());
 912          }
 913
 914
 915          // now clean up by adding smart search termination strings:
 916          // if you are already a superset of the previous suffix
 917          // but not a subset of the next, search can end here
 918          // so set NextNE properly
 919
 920          ptr = (SfxEntry *) sStart[i];
 921          for (; ptr != NULL; ptr = ptr->getNext()) {
 922              SfxEntry * nptr = ptr->getNext();
 923              SfxEntry * mptr = NULL;
 924              for (; nptr != NULL; nptr = nptr->getNext()) {
 925                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
 926                  mptr = nptr;
 927              }
 928              if (mptr) mptr->setNextNE(NULL);
 929          }
 930     }
 931     return 0;
 932 }
 933
 934
 935
 936 // takes aff file condition string and creates the
 937 // conds array - please see the appendix at the end of the
 938 // file affentry.cxx which describes what is going on here
 939 // in much more detail
 940
 941 int AffixMgr::encodeit(struct affentry * ptr, char * cs)
 942 {
 943   unsigned char c;
 944   int i, j, k;
 945   unsigned char mbr[MAXLNLEN];
 946   w_char wmbr[MAXLNLEN];
 947   w_char * wpos = wmbr;
 948
 949   // now clear the conditions array */
 950   for (i=0;i<SETSIZE;i++) ptr->conds.base[i] = (unsigned char) 0;
 951
 952   // now parse the string to create the conds array */
 953   int nc = strlen(cs);
 954   unsigned char neg = 0;   // complement indicator
 955   int grp = 0;   // group indicator
 956   unsigned char n = 0;     // number of conditions
 957   int ec = 0;    // end condition indicator
 958   int nm = 0;    // number of member in group
 959
 960   // if no condition just return
 961   if (strcmp(cs,".")==0) {
 962     ptr->numconds = 0;
 963     return 0;
 964   }
 965
 966   i = 0;
 967   while (i < nc) {
 968     c = *((unsigned char *)(cs + i));
 969
 970     // start group indicator
 971     if (c == '[') {
 972        grp = 1;
 973        c = 0;
 974     }
 975
 976     // complement flag
 977     if ((grp == 1) && (c == '^')) {
 978        neg = 1;
 979        c = 0;
 980     }
 981
 982     // end goup indicator
 983     if (c == ']') {
 984        ec = 1;
 985        c = 0;
 986     }
 987
 988     // add character of group to list
 989     if ((grp == 1) && (c != 0)) {
 990       *(mbr + nm) = c;
 991       nm++;
 992       c = 0;
 993     }
 994
 995     // end of condition
 996     if (c != 0) {
 997        ec = 1;
 998     }
 999
1000   if (ec) {
1001     if (!utf8) {
1002       if (grp == 1) {
1003         if (neg == 0) {
1004           // set the proper bits in the condition array vals for those chars
1005           for (j=0;j<nm;j++) {
1006              k = (unsigned int) mbr[j];
1007              ptr->conds.base[k] = ptr->conds.base[k] | ((unsigned char)1 << n);
1008           }
1009         } else {
1010           // complement so set all of them and then unset indicated ones
1011            for (j=0;j<SETSIZE;j++) ptr->conds.base[j] = ptr->conds.base[j] | ((unsigned char)1 << n);
1012            for (j=0;j<nm;j++) {
1013              k = (unsigned int) mbr[j];
1014              ptr->conds.base[k] = ptr->conds.base[k] & ~((unsigned char)1 << n);
1015            }
1016         }
1017         neg = 0;
1018         grp = 0;
1019         nm = 0;
1020       } else {
1021          // not a group so just set the proper bit for this char
1022          // but first handle special case of . inside condition
1023          if (c == '.') {
1024             // wild card character so set them all
1025             for (j=0;j<SETSIZE;j++) ptr->conds.base[j] = ptr->conds.base[j] | ((unsigned char)1 << n);
1026          } else {
1027             ptr->conds.base[(unsigned int) c] = ptr->conds.base[(unsigned int)c] | ((unsigned char)1 << n);
1028          }
1029       }
1030       n++;
1031       ec = 0;
1032     } else { // UTF-8 character set
1033       if (grp == 1) {
1034         ptr->conds.utf8.neg[n] = neg;
1035         if (neg == 0) {
1036           // set the proper bits in the condition array vals for those chars
1037           for (j=0;j<nm;j++) {
1038              k = (unsigned int) mbr[j];
1039              if (k >> 7) {
1040                 u8_u16(wpos, 1, (char *) mbr + j);
1041                 wpos++;
1042                 if ((k & 0xe0) == 0xe0) j+=2; else j++; // 3-byte UTF-8 character
1043              } else {
1044                 ptr->conds.utf8.ascii[k] = ptr->conds.utf8.ascii[k] | ((unsigned char)1 << n);
1045              }
1046           }
1047         } else { // neg == 1
1048           // complement so set all of them and then unset indicated ones
1049            for (j=0;j<(SETSIZE/2);j++) ptr->conds.utf8.ascii[j] = ptr->conds.utf8.ascii[j] | ((unsigned char)1 << n);
1050            for (j=0;j<nm;j++) {
1051              k = (unsigned int) mbr[j];
1052              if (k >> 7) {
1053                 u8_u16(wpos, 1, (char *) mbr + j);
1054                 wpos++;
1055                 if ((k & 0xe0) == 0xe0) j+=2; else j++; // 3-byte UTF-8 character
1056              } else {
1057                 ptr->conds.utf8.ascii[k] = ptr->conds.utf8.ascii[k] & ~((unsigned char)1 << n);
1058              }
1059            }
1060         }
1061         neg = 0;
1062         grp = 0;
1063         nm = 0;
1064         ptr->conds.utf8.wlen[n] = wpos - wmbr;
1065         if ((wpos - wmbr) != 0) {
1066             ptr->conds.utf8.wchars[n] = (w_char *) malloc(sizeof(w_char) * (wpos - wmbr));
1067             if (!ptr->conds.utf8.wchars[n]) return 1;
1068             memcpy(ptr->conds.utf8.wchars[n], wmbr, sizeof(w_char) * (wpos - wmbr));
1069             flag_qsort((unsigned short *) ptr->conds.utf8.wchars[n], 0, ptr->conds.utf8.wlen[n]);
1070             wpos = wmbr;
1071         }
1072       } else { // grp == 0
1073          // is UTF-8 character?
1074          if (c >> 7) {
1075             ptr->conds.utf8.wchars[n] = (w_char *) malloc(sizeof(w_char));
1076             if (!ptr->conds.utf8.wchars[n]) return 1;
1077             ptr->conds.utf8.wlen[n] = 1;
1078             u8_u16(ptr->conds.utf8.wchars[n], 1, cs + i);
1079             if ((c & 0xe0) == 0xe0) i+=2; else i++; // 3-byte UFT-8 character
1080          } else {
1081             ptr->conds.utf8.wchars[n] = NULL;
1082             // not a group so just set the proper bit for this char
1083             // but first handle special case of . inside condition
1084             if (c == '.') {
1085                 ptr->conds.utf8.all[n] = 1;
1086                 // wild card character so set them all
1087                 for (j=0;j<(SETSIZE/2);j++) ptr->conds.utf8.ascii[j] = ptr->conds.utf8.ascii[j] | ((unsigned char)1 << n);
1088             } else {
1089                 ptr->conds.utf8.all[n] = 0;
1090                 ptr->conds.utf8.ascii[(unsigned int) c] = ptr->conds.utf8.ascii[(unsigned int)c] | ((unsigned char)1 << n);
1091             }
1092          }
1093          neg = 0;
1094       }
1095       n++;
1096       ec = 0;
1097       neg = 0;
1098     }
1099   }
1100
1101     i++;
1102   }
1103   ptr->numconds = n;
1104   return 0;
1105 }
1106
1107  // return 1 if s1 is a leading subset of s2
1108 /* inline int AffixMgr::isSubset(const char * s1, const char * s2)
1109  {
1110     while ((*s1 == *s2) && *s1) {
1111         s1++;
1112         s2++;
1113     }
1114     return (*s1 == '\0');
1115  }
1116 */
1117
1118  // return 1 if s1 is a leading subset of s2 (dots are for infixes)
1119 inline int AffixMgr::isSubset(const char * s1, const char * s2)
1120  {
1121     while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) {
1122         s1++;
1123         s2++;
1124     }
1125     return (*s1 == '\0');
1126  }
1127
1128
1129 // check word for prefixes
1130 struct hentry * AffixMgr::prefix_check(const char * word, int len, char in_compound,
1131     const FLAG needflag)
1132 {
1133     struct hentry * rv= NULL;
1134
1135     pfx = NULL;
1136     pfxappnd = NULL;
1137     sfxappnd = NULL;
1138
1139     // first handle the special case of 0 length prefixes
1140     PfxEntry * pe = (PfxEntry *) pStart[0];
1141     while (pe) {
1142         if (
1143             // fogemorpheme
1144               ((in_compound != IN_CPD_NOT) || !(pe->getCont() &&
1145                   (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) &&
1146             // permit prefixes in compounds
1147               ((in_compound != IN_CPD_END) || (pe->getCont() &&
1148                   (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))
1149               ) {
1150                     // check prefix
1151                     rv = pe->checkword(word, len, in_compound, needflag);
1152                     if (rv) {
1153                         pfx=(AffEntry *)pe; // BUG: pfx not stateless
1154                         return rv;
1155                     }
1156              }
1157        pe = pe->getNext();
1158     }
1159
1160     // now handle the general case
1161     unsigned char sp = *((const unsigned char *)word);
1162     PfxEntry * pptr = (PfxEntry *)pStart[sp];
1163
1164     while (pptr) {
1165         if (isSubset(pptr->getKey(),word)) {
1166              if (
1167             // fogemorpheme
1168               ((in_compound != IN_CPD_NOT) || !(pptr->getCont() &&
1169                   (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) &&
1170             // permit prefixes in compounds
1171               ((in_compound != IN_CPD_END) || (pptr->getCont() &&
1172                   (TESTAFF(pptr->getCont(), compoundpermitflag, pptr->getContLen()))))
1173               ) {
1174             // check prefix
1175                   rv = pptr->checkword(word, len, in_compound, needflag);
1176                   if (rv) {
1177                     pfx=(AffEntry *)pptr; // BUG: pfx not stateless
1178                     return rv;
1179                   }
1180              }
1181              pptr = pptr->getNextEQ();
1182         } else {
1183              pptr = pptr->getNextNE();
1184         }
1185     }
1186
1187     return NULL;
1188 }
1189
1190 // check word for prefixes
1191 struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
1192     char in_compound, const FLAG needflag)
1193 {
1194     struct hentry * rv= NULL;
1195
1196     pfx = NULL;
1197     sfxappnd = NULL;
1198
1199     // first handle the special case of 0 length prefixes
1200     PfxEntry * pe = (PfxEntry *) pStart[0];
1201
1202     while (pe) {
1203         rv = pe->check_twosfx(word, len, in_compound, needflag);
1204         if (rv) return rv;
1205         pe = pe->getNext();
1206     }
1207
1208     // now handle the general case
1209     unsigned char sp = *((const unsigned char *)word);
1210     PfxEntry * pptr = (PfxEntry *)pStart[sp];
1211
1212     while (pptr) {
1213         if (isSubset(pptr->getKey(),word)) {
1214             rv = pptr->check_twosfx(word, len, in_compound, needflag);
1215             if (rv) {
1216                 pfx = (AffEntry *)pptr;
1217                 return rv;
1218             }
1219             pptr = pptr->getNextEQ();
1220         } else {
1221              pptr = pptr->getNextNE();
1222         }
1223     }
1224
1225     return NULL;
1226 }
1227
1228 #ifdef HUNSPELL_EXPERIMENTAL
1229 // check word for prefixes
1230 char * AffixMgr::prefix_check_morph(const char * word, int len, char in_compound,
1231     const FLAG needflag)
1232 {
1233     char * st;
1234
1235     char result[MAXLNLEN];
1236     result[0] = '\0';
1237
1238     pfx = NULL;
1239     sfxappnd = NULL;
1240
1241     // first handle the special case of 0 length prefixes
1242     PfxEntry * pe = (PfxEntry *) pStart[0];
1243     while (pe) {
1244        st = pe->check_morph(word,len,in_compound, needflag);
1245        if (st) {
1246             strcat(result, st);
1247             free(st);
1248        }
1249        // if (rv) return rv;
1250        pe = pe->getNext();
1251     }
1252
1253     // now handle the general case
1254     unsigned char sp = *((const unsigned char *)word);
1255     PfxEntry * pptr = (PfxEntry *)pStart[sp];
1256
1257     while (pptr) {
1258         if (isSubset(pptr->getKey(),word)) {
1259             st = pptr->check_morph(word,len,in_compound, needflag);
1260             if (st) {
1261               // fogemorpheme
1262               if ((in_compound != IN_CPD_NOT) || !((pptr->getCont() &&
1263                         (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen()))))) {
1264                     strcat(result, st);
1265                     pfx = (AffEntry *)pptr;
1266                 }
1267                 free(st);
1268             }
1269             pptr = pptr->getNextEQ();
1270         } else {
1271             pptr = pptr->getNextNE();
1272         }
1273     }
1274
1275     if (*result) return mystrdup(result);
1276     return NULL;
1277 }
1278
1279
1280 // check word for prefixes
1281 char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
1282     char in_compound, const FLAG needflag)
1283 {
1284     char * st;
1285
1286     char result[MAXLNLEN];
1287     result[0] = '\0';
1288
1289     pfx = NULL;
1290     sfxappnd = NULL;
1291
1292     // first handle the special case of 0 length prefixes
1293     PfxEntry * pe = (PfxEntry *) pStart[0];
1294     while (pe) {
1295         st = pe->check_twosfx_morph(word,len,in_compound, needflag);
1296         if (st) {
1297             strcat(result, st);
1298             free(st);
1299         }
1300         pe = pe->getNext();
1301     }
1302
1303     // now handle the general case
1304     unsigned char sp = *((const unsigned char *)word);
1305     PfxEntry * pptr = (PfxEntry *)pStart[sp];
1306
1307     while (pptr) {
1308         if (isSubset(pptr->getKey(),word)) {
1309             st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
1310             if (st) {
1311                 strcat(result, st);
1312                 free(st);
1313                 pfx = (AffEntry *)pptr;
1314             }
1315             pptr = pptr->getNextEQ();
1316         } else {
1317             pptr = pptr->getNextNE();
1318         }
1319     }
1320
1321     if (*result) return mystrdup(result);
1322     return NULL;
1323 }
1324 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1325
1326
1327 // Is word a non compound with a REP substitution (see checkcompoundrep)?
1328 int AffixMgr::cpdrep_check(const char * word, int wl)
1329 {
1330   char candidate[MAXLNLEN];
1331   const char * r;
1332   int lenr, lenp;
1333
1334   if ((wl < 2) || !numrep) return 0;
1335
1336   for (int i=0; i < numrep; i++ ) {
1337       r = word;
1338       lenr = strlen(reptable[i].pattern2);
1339       lenp = strlen(reptable[i].pattern);
1340       // search every occurence of the pattern in the word
1341       while ((r=strstr(r, reptable[i].pattern)) != NULL) {
1342           strcpy(candidate, word);
1343           if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
1344           strcpy(candidate+(r-word),reptable[i].pattern2);
1345           strcpy(candidate+(r-word)+lenr, r+lenp);
1346           if (candidate_check(candidate,strlen(candidate))) return 1;
1347           r++; // search for the next letter
1348       }
1349    }
1350    return 0;
1351 }
1352
1353 // forbid compoundings when there are special patterns at word bound
1354 int AffixMgr::cpdpat_check(const char * word, int pos)
1355 {
1356   int len;
1357   for (int i = 0; i < numcheckcpd; i++) {
1358       if (isSubset(checkcpdtable[i].pattern2, word + pos) &&
1359         (len = strlen(checkcpdtable[i].pattern)) && (pos > len) &&
1360         (strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)) return 1;
1361   }
1362   return 0;
1363 }
1364
1365 // forbid compounding with neighbouring upper and lower case characters at word bounds
1366 int AffixMgr::cpdcase_check(const char * word, int pos)
1367 {
1368   if (utf8) {
1369       w_char u, w;
1370       const char * p;
1371       u8_u16(&u, 1, word + pos);
1372       for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--);
1373       u8_u16(&w, 1, p);
1374       unsigned short a = (u.h << 8) + u.l;
1375       unsigned short b = (w.h << 8) + w.l;
1376       if (((unicodetoupper(a, langnum) == a) || (unicodetoupper(b, langnum) == b))) return 1;
1377   } else {
1378       unsigned char a = *(word + pos - 1);
1379       unsigned char b = *(word + pos);
1380       if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-')) return 1;
1381   }
1382   return 0;
1383 }
1384
1385 // check compound patterns
1386 int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** def, char all)
1387 {
1388   signed short btpp[MAXWORDLEN]; // metacharacter (*, ?) positions for backtracking
1389   signed short btwp[MAXWORDLEN]; // word positions for metacharacters
1390   int btnum[MAXWORDLEN]; // number of matched characters in metacharacter positions
1391   short bt = 0;
1392   int i;
1393   int ok;
1394   int w = 0;
1395   if (!*words) {
1396     w = 1;
1397     *words = def;
1398   }
1399   (*words)[wnum] = rv;
1400
1401   for (i = 0; i < numdefcpd; i++) {
1402     signed short pp = 0; // pattern position
1403     signed short wp = 0; // "words" position
1404     int ok2;
1405     ok = 1;
1406     ok2 = 1;
1407     do {
1408       while ((pp < defcpdtable[i].len) && (wp <= wnum)) {
1409         if (((pp+1) < defcpdtable[i].len) &&
1410           ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) {
1411             int wend = (defcpdtable[i].def[pp+1] == '?') ? wp : wnum;
1412             ok2 = 1;
1413             pp+=2;
1414             btpp[bt] = pp;
1415             btwp[bt] = wp;
1416             while (wp <= wend) {
1417                 if (!(*words)[wp]->alen ||
1418                   !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp-2], (*words)[wp]->alen)) {
1419                     ok2 = 0;
1420                     break;
1421                 }
1422                 wp++;
1423             }
1424             if (wp <= wnum) ok2 = 0;
1425             btnum[bt] = wp - btwp[bt];
1426             if (btnum[bt] > 0) bt++;
1427             if (ok2) break;
1428         } else {
1429             ok2 = 1;
1430             if (!(*words)[wp] || !(*words)[wp]->alen ||
1431               !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp], (*words)[wp]->alen)) {
1432                 ok = 0;
1433                 break;
1434             }
1435             pp++;
1436             wp++;
1437             if ((defcpdtable[i].len == pp) && !(wp > wnum)) ok = 0;
1438         }
1439       }
1440     if (ok && ok2) {
1441         int r = pp;
1442         while ((defcpdtable[i].len > r) && ((r+1) < defcpdtable[i].len) &&
1443             ((defcpdtable[i].def[r+1] == '*') || (defcpdtable[i].def[r+1] == '?'))) r+=2;
1444         if (defcpdtable[i].len <= r) return 1;
1445     }
1446     // backtrack
1447     if (bt) do {
1448         ok = 1;
1449         btnum[bt - 1]--;
1450         pp = btpp[bt - 1];
1451         wp = btwp[bt - 1] + btnum[bt - 1];
1452     } while ((btnum[bt - 1] < 0) && --bt);
1453   } while (bt);
1454
1455   if (ok && ok2 && (!all || (defcpdtable[i].len <= pp))) return 1;
1456   // check zero ending
1457   while (ok && ok2 && (defcpdtable[i].len > pp) && ((pp+1) < defcpdtable[i].len) &&
1458     ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) pp+=2;
1459   if (ok && ok2 && (defcpdtable[i].len <= pp)) return 1;
1460   }
1461   (*words)[wnum] = NULL;
1462   if (w) *words = NULL;
1463   return 0;
1464 }
1465
1466 inline int AffixMgr::candidate_check(const char * word, int len)
1467 {
1468   struct hentry * rv=NULL;
1469
1470   rv = lookup(word);
1471   if (rv) return 1;
1472
1473 //  rv = prefix_check(word,len,1);
1474 //  if (rv) return 1;
1475
1476   rv = affix_check(word,len);
1477   if (rv) return 1;
1478   return 0;
1479 }
1480
1481 // calculate number of syllable for compound-checking
1482 short AffixMgr::get_syllable(const char * word, int wlen)
1483 {
1484     if (cpdmaxsyllable==0) return 0;
1485
1486     short num=0;
1487
1488     if (!utf8) {
1489         for (int i=0; i<wlen; i++) {
1490             if (strchr(cpdvowels, word[i])) num++;
1491         }
1492     } else if (cpdvowels_utf16) {
1493         w_char w[MAXWORDUTF8LEN];
1494         int i = u8_u16(w, MAXWORDUTF8LEN, word);
1495         for (; i; i--) {
1496             if (flag_bsearch((unsigned short *) cpdvowels_utf16,
1497                 ((unsigned short *) w)[i - 1], cpdvowels_utf16_len)) num++;
1498         }
1499     }
1500     return num;
1501 }
1502
1503 // check if compound word is correctly spelled
1504 // hu_mov_rule = spec. Hungarian rule (XXX)
1505 struct hentry * AffixMgr::compound_check(const char * word, int len,
1506     short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words = NULL,
1507     char hu_mov_rule = 0, int * cmpdstemnum = NULL, int * cmpdstem = NULL, char is_sug = 0)
1508 {
1509     int i;
1510     short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
1511     int oldcmpdstemnum = 0;
1512     struct hentry * rv = NULL;
1513     struct hentry * rv_first;
1514     struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
1515     char st [MAXWORDUTF8LEN + 4];
1516     char ch;
1517     int cmin;
1518     int cmax;
1519
1520     int checked_prefix;
1521
1522 #ifdef HUNSTEM
1523     if (cmpdstemnum) {
1524         if (wordnum == 0) {
1525             *cmpdstemnum = 1;
1526         } else {
1527             (*cmpdstemnum)++;
1528         }
1529     }
1530 #endif
1531     if (utf8) {
1532         for (cmin = 0, i = 0; (i < cpdmin) && word[cmin]; i++) {
1533           cmin++;
1534           for (; (word[cmin] & 0xc0) == 0x80; cmin++);
1535         }
1536         for (cmax = len, i = 0; (i < (cpdmin - 1)) && cmax; i++) {
1537           cmax--;
1538           for (; (word[cmax] & 0xc0) == 0x80; cmax--);
1539         }
1540     } else {
1541         cmin = cpdmin;
1542         cmax = len - cpdmin + 1;
1543     }
1544
1545     strcpy(st, word);
1546
1547     for (i = cmin; i < cmax; i++) {
1548
1549         oldnumsyllable = numsyllable;
1550         oldwordnum = wordnum;
1551         checked_prefix = 0;
1552
1553         // go to end of the UTF-8 character
1554         if (utf8) {
1555             for (; (st[i] & 0xc0) == 0x80; i++);
1556             if (i >= cmax) return NULL;
1557         }
1558
1559
1560         ch = st[i];
1561         st[i] = '\0';
1562
1563         sfx = NULL;
1564         pfx = NULL;
1565
1566         // FIRST WORD
1567
1568         rv = lookup(st); // perhaps without prefix
1569
1570         // search homonym with compound flag
1571         while ((rv) && !hu_mov_rule &&
1572             ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
1573                 !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1574                   (compoundbegin && !wordnum &&
1575                         TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
1576                   (compoundmiddle && wordnum && !words &&
1577                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
1578                   (numdefcpd &&
1579                     ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
1580                     (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
1581                   ))) {
1582             rv = rv->next_homonym;
1583         }
1584
1585         if (!rv) {
1586             if (compoundflag &&
1587              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
1588                 if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
1589                         FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
1590                     ((SfxEntry*)sfx)->getCont() &&
1591                         ((compoundforbidflag && TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
1592                             ((SfxEntry*)sfx)->getContLen())) || (compoundend &&
1593                         TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend,
1594                             ((SfxEntry*)sfx)->getContLen())))) {
1595                         rv = NULL;
1596                 }
1597             }
1598             if (rv ||
1599               (((wordnum == 0) && compoundbegin &&
1600                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
1601                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
1602               ((wordnum > 0) && compoundmiddle &&
1603                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
1604                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
1605               ) checked_prefix = 1;
1606         // else check forbiddenwords and pseudoroot
1607         } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1608             TESTAFF(rv->astr, pseudoroot, rv->alen) ||
1609             (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen))
1610              )) {
1611                 st[i] = ch;
1612                 continue;
1613         }
1614
1615             // check non_compound flag in suffix and prefix
1616             if ((rv) && !hu_mov_rule &&
1617                 ((pfx && ((PfxEntry*)pfx)->getCont() &&
1618                     TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag,
1619                         ((PfxEntry*)pfx)->getContLen())) ||
1620                 (sfx && ((SfxEntry*)sfx)->getCont() &&
1621                     TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
1622                         ((SfxEntry*)sfx)->getContLen())))) {
1623                     rv = NULL;
1624             }
1625
1626             // check compoundend flag in suffix and prefix
1627             if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
1628                 ((pfx && ((PfxEntry*)pfx)->getCont() &&
1629                     TESTAFF(((PfxEntry*)pfx)->getCont(), compoundend,
1630                         ((PfxEntry*)pfx)->getContLen())) ||
1631                 (sfx && ((SfxEntry*)sfx)->getCont() &&
1632                     TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend,
1633                         ((SfxEntry*)sfx)->getContLen())))) {
1634                     rv = NULL;
1635             }
1636
1637             // check compoundmiddle flag in suffix and prefix
1638             if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
1639                 ((pfx && ((PfxEntry*)pfx)->getCont() &&
1640                     TESTAFF(((PfxEntry*)pfx)->getCont(), compoundmiddle,
1641                         ((PfxEntry*)pfx)->getContLen())) ||
1642                 (sfx && ((SfxEntry*)sfx)->getCont() &&
1643                     TESTAFF(((SfxEntry*)sfx)->getCont(), compoundmiddle,
1644                         ((SfxEntry*)sfx)->getContLen())))) {
1645                     rv = NULL;
1646             }
1647
1648         // check forbiddenwords
1649         if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1650             (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) {
1651                 return NULL;
1652             }
1653
1654         // increment word number, if the second root has a compoundroot flag
1655         if ((rv) && compoundroot &&
1656             (TESTAFF(rv->astr, compoundroot, rv->alen))) {
1657                 wordnum++;
1658         }
1659
1660         // first word is acceptable in compound words?
1661         if (((rv) &&
1662           ( checked_prefix || (words && words[wnum]) ||
1663             (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1664             ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
1665             ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen))// ||
1666 //            (numdefcpd && )
1667
1668 // LANG_hu section: spec. Hungarian rule
1669             || ((langnum == LANG_hu) && hu_mov_rule && (
1670                     TESTAFF(rv->astr, 'F', rv->alen) || // XXX hardwired Hungarian dictionary codes
1671                     TESTAFF(rv->astr, 'G', rv->alen) ||
1672                     TESTAFF(rv->astr, 'H', rv->alen)
1673                 )
1674               )
1675 // END of LANG_hu section
1676           )
1677           && ! (( checkcompoundtriple && // test triple letters
1678                    (word[i-1]==word[i]) && (
1679                       ((i>1) && (word[i-1]==word[i-2])) ||
1680                       ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
1681                    )
1682                ) ||
1683                (
1684                  // test CHECKCOMPOUNDPATTERN
1685                  numcheckcpd && cpdpat_check(word, i)
1686                ) ||
1687                (
1688                  checkcompoundcase && cpdcase_check(word, i)
1689                ))
1690          )
1691 // LANG_hu section: spec. Hungarian rule
1692          || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
1693               (sfx && ((SfxEntry*)sfx)->getCont() && ( // XXX hardwired Hungarian dic. codes
1694                         TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) 'x', ((SfxEntry*)sfx)->getContLen()) ||
1695                         TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) '%', ((SfxEntry*)sfx)->getContLen())
1696                     )
1697                )
1698              )
1699 // END of LANG_hu section
1700          ) {
1701
1702 // LANG_hu section: spec. Hungarian rule
1703             if (langnum == LANG_hu) {
1704                 // calculate syllable number of the word
1705                 numsyllable += get_syllable(st, i);
1706
1707                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
1708                 if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
1709             }
1710 // END of LANG_hu section
1711
1712 #ifdef HUNSTEM
1713             if (cmpdstem) cmpdstem[*cmpdstemnum - 1] = i;
1714 #endif
1715
1716             // NEXT WORD(S)
1717             rv_first = rv;
1718             rv = lookup((word+i)); // perhaps without prefix
1719
1720         // search homonym with compound flag
1721         while ((rv) && ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
1722                         !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1723                           (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
1724                            (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
1725             rv = rv->next_homonym;
1726         }
1727
1728             if (rv && words && words[wnum + 1]) return rv;
1729
1730             oldnumsyllable2 = numsyllable;
1731             oldwordnum2 = wordnum;
1732
1733 // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary code
1734             if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
1735                 numsyllable--;
1736             }
1737 // END of LANG_hu section
1738
1739             // increment word number, if the second root has a compoundroot flag
1740             if ((rv) && (compoundroot) &&
1741                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
1742                     wordnum++;
1743             }
1744
1745             // check forbiddenwords
1746             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1747                (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
1748
1749             // second word is acceptable, as a root?
1750             // hungarian conventions: compounding is acceptable,
1751             // when compound forms consist of 2 words, or if more,
1752             // then the syllable number of root words must be 6, or lesser.
1753
1754             if ((rv) && (
1755                       (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1756                       (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
1757                     )
1758                 && (
1759                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
1760                       ((cpdmaxsyllable==0) ||
1761                           (numsyllable + get_syllable(rv->word,rv->wlen)<=cpdmaxsyllable))
1762                     )
1763                 && (
1764                      (!checkcompounddup || (rv != rv_first))
1765                    )
1766                 )
1767                  {
1768                       // forbid compound word, if it is a non compound word with typical fault
1769                       if (checkcompoundrep && cpdrep_check(word,len)) return NULL;
1770                       return rv;
1771             }
1772
1773             numsyllable = oldnumsyllable2 ;
1774             wordnum = oldwordnum2;
1775
1776             // perhaps second word has prefix or/and suffix
1777             sfx = NULL;
1778             sfxflag = FLAG_NULL;
1779             rv = (compoundflag) ? affix_check((word+i),strlen(word+i), compoundflag, IN_CPD_END) : NULL;
1780             if (!rv && compoundend) {
1781                 sfx = NULL;
1782                 pfx = NULL;
1783                 rv = affix_check((word+i),strlen(word+i), compoundend, IN_CPD_END);
1784             }
1785
1786             if (!rv && numdefcpd && words) {
1787                 rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
1788                 if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1)) return rv;
1789             }
1790
1791             // check non_compound flag in suffix and prefix
1792             if ((rv) &&
1793                 ((pfx && ((PfxEntry*)pfx)->getCont() &&
1794                     TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag,
1795                         ((PfxEntry*)pfx)->getContLen())) ||
1796                 (sfx && ((SfxEntry*)sfx)->getCont() &&
1797                     TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
1798                         ((SfxEntry*)sfx)->getContLen())))) {
1799                     rv = NULL;
1800             }
1801
1802             // check forbiddenwords
1803             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1804                (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
1805
1806             // pfxappnd = prefix of word+i, or NULL
1807             // calculate syllable number of prefix.
1808             // hungarian convention: when syllable number of prefix is more,
1809             // than 1, the prefix+word counts as two words.
1810
1811             if (langnum == LANG_hu) {
1812                 // calculate syllable number of the word
1813                 numsyllable += get_syllable(word + i, strlen(word + i));
1814
1815                 // - affix syllable num.
1816                 // XXX only second suffix (inflections, not derivations)
1817                 if (sfxappnd) {
1818                     char * tmp = myrevstrdup(sfxappnd);
1819                     numsyllable -= get_syllable(tmp, strlen(tmp));
1820                     free(tmp);
1821                 }
1822
1823                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
1824                 if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
1825
1826                 // increment syllable num, if last word has a SYLLABLENUM flag
1827                 // and the suffix is beginning `s'
1828
1829                 if (cpdsyllablenum) {
1830                     switch (sfxflag) {
1831                         case 'c': { numsyllable+=2; break; }
1832                         case 'J': { numsyllable += 1; break; }
1833                         case 'I': { if (TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
1834                     }
1835                 }
1836             }
1837
1838             // increment word number, if the second word has a compoundroot flag
1839             if ((rv) && (compoundroot) &&
1840                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
1841                     wordnum++;
1842             }
1843
1844             // second word is acceptable, as a word with prefix or/and suffix?
1845             // hungarian conventions: compounding is acceptable,
1846             // when compound forms consist 2 word, otherwise
1847             // the syllable number of root words is 6, or lesser.
1848             if ((rv) &&
1849                     (
1850                       ((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
1851                       ((cpdmaxsyllable == 0) ||
1852                           (numsyllable <= cpdmaxsyllable))
1853                     )
1854                 && (
1855                    (!checkcompounddup || (rv != rv_first))
1856                    )) {
1857                     // forbid compound word, if it is a non compound word with typical fault
1858                     if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
1859                     return rv;
1860             }
1861
1862             numsyllable = oldnumsyllable2;
1863             wordnum = oldwordnum2;
1864 #ifdef HUNSTEM
1865             if (cmpdstemnum) oldcmpdstemnum = *cmpdstemnum;
1866 #endif
1867             // perhaps second word is a compound word (recursive call)
1868             if (wordnum < maxwordnum) {
1869                 rv = compound_check((word+i),strlen(word+i), wordnum+1,
1870                      numsyllable, maxwordnum, wnum + 1, words,
1871                      0, cmpdstemnum, cmpdstem, is_sug);
1872             } else {
1873                 rv=NULL;
1874             }
1875             if (rv) {
1876                 // forbid compound word, if it is a non compound word with typical fault
1877                 if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
1878                 return rv;
1879             } else {
1880 #ifdef HUNSTEM
1881             if (cmpdstemnum) *cmpdstemnum = oldcmpdstemnum;
1882 #endif
1883             }
1884         }
1885         st[i] = ch;
1886         wordnum = oldwordnum;
1887         numsyllable = oldnumsyllable;
1888     }
1889
1890     return NULL;
1891 }
1892
1893 #ifdef HUNSPELL_EXPERIMENTAL
1894 // check if compound word is correctly spelled
1895 // hu_mov_rule = spec. Hungarian rule (XXX)
1896 int AffixMgr::compound_check_morph(const char * word, int len,
1897     short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
1898     char hu_mov_rule = 0, char ** result = NULL, char * partresult = NULL)
1899 {
1900     int i;
1901     short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
1902     int ok = 0;
1903
1904     struct hentry * rv = NULL;
1905     struct hentry * rv_first;
1906     struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
1907     char st [MAXWORDUTF8LEN + 4];
1908     char ch;
1909
1910     int checked_prefix;
1911     char presult[MAXLNLEN];
1912
1913     int cmin;
1914     int cmax;
1915
1916     if (utf8) {
1917         for (cmin = 0, i = 0; (i < cpdmin) && word[cmin]; i++) {
1918           cmin++;
1919           for (; (word[cmin] & 0xc0) == 0x80; cmin++);
1920         }
1921         for (cmax = len, i = 0; (i < (cpdmin - 1)) && cmax; i++) {
1922           cmax--;
1923           for (; (word[cmax] & 0xc0) == 0x80; cmax--);
1924         }
1925     } else {
1926         cmin = cpdmin;
1927         cmax = len - cpdmin + 1;
1928     }
1929
1930     strcpy(st, word);
1931
1932     for (i = cmin; i < cmax; i++) {
1933         oldnumsyllable = numsyllable;
1934         oldwordnum = wordnum;
1935         checked_prefix = 0;
1936
1937         // go to end of the UTF-8 character
1938         if (utf8) {
1939             for (; (st[i] & 0xc0) == 0x80; i++);
1940             if (i >= cmax) return 0;
1941         }
1942
1943         ch = st[i];
1944         st[i] = '\0';
1945         sfx = NULL;
1946
1947         // FIRST WORD
1948         *presult = '\0';
1949         if (partresult) strcat(presult, partresult);
1950
1951         rv = lookup(st); // perhaps without prefix
1952
1953         // search homonym with compound flag
1954         while ((rv) && !hu_mov_rule &&
1955             ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
1956                 !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1957                 (compoundbegin && !wordnum &&
1958                         TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
1959                 (compoundmiddle && wordnum && !words &&
1960                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
1961                   (numdefcpd &&
1962                     ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
1963                     (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
1964                   ))) {
1965             rv = rv->next_homonym;
1966         }
1967
1968         if (rv)  {
1969             if (rv->description) {
1970                 if ((!rv->astr) || !TESTAFF(rv->astr, lemma_present, rv->alen))
1971                                         strcat(presult, st);
1972                 strcat(presult, rv->description);
1973             }
1974         }
1975
1976         if (!rv) {
1977             if (compoundflag &&
1978              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
1979                 if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
1980                         FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
1981                     ((SfxEntry*)sfx)->getCont() &&
1982                         ((compoundforbidflag && TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
1983                             ((SfxEntry*)sfx)->getContLen())) || (compoundend &&
1984                         TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend,
1985                             ((SfxEntry*)sfx)->getContLen())))) {
1986                         rv = NULL;
1987                 }
1988             }
1989
1990             if (rv ||
1991               (((wordnum == 0) && compoundbegin &&
1992                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
1993                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
1994               ((wordnum > 0) && compoundmiddle &&
1995                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
1996                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
1997               ) {
1998                 //char * p = prefix_check_morph(st, i, 0, compound);
1999                 char * p = NULL;
2000                 if (compoundflag) p = affix_check_morph(st, i, compoundflag);
2001                 if (!p || (*p == '\0')) {
2002                    if ((wordnum == 0) && compoundbegin) {
2003                      p = affix_check_morph(st, i, compoundbegin);
2004                    } else if ((wordnum > 0) && compoundmiddle) {
2005                      p = affix_check_morph(st, i, compoundmiddle);
2006                    }
2007                 }
2008                 if (*p != '\0') {
2009                     line_uniq(p);
2010                     if (strchr(p, '\n')) {
2011                         strcat(presult, "(");
2012                         strcat(presult, line_join(p, '|'));
2013                         strcat(presult, ")");
2014                       } else {
2015                         strcat(presult, p);
2016                       }
2017                 }
2018                 if (presult[strlen(presult) - 1] == '\n') {
2019                     presult[strlen(presult) - 1] = '\0';
2020                 }
2021                 checked_prefix = 1;
2022                 //strcat(presult, "+");
2023             }
2024         // else check forbiddenwords
2025         } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
2026             TESTAFF(rv->astr, pseudoroot, rv->alen))) {
2027                 st[i] = ch;
2028                 continue;
2029         }
2030
2031             // check non_compound flag in suffix and prefix
2032             if ((rv) && !hu_mov_rule &&
2033                 ((pfx && ((PfxEntry*)pfx)->getCont() &&
2034                     TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag,
2035                         ((PfxEntry*)pfx)->getContLen())) ||
2036                 (sfx && ((SfxEntry*)sfx)->getCont() &&
2037                     TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
2038                         ((SfxEntry*)sfx)->getContLen())))) {
2039                     continue;
2040             }
2041
2042             // check compoundend flag in suffix and prefix
2043             if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
2044                 ((pfx && ((PfxEntry*)pfx)->getCont() &&
2045                     TESTAFF(((PfxEntry*)pfx)->getCont(), compoundend,
2046                         ((PfxEntry*)pfx)->getContLen())) ||
2047                 (sfx && ((SfxEntry*)sfx)->getCont() &&
2048                     TESTAFF(((SfxEntry*)sfx)->getCont(), compoundend,
2049                         ((SfxEntry*)sfx)->getContLen())))) {
2050                     continue;
2051             }
2052
2053             // check compoundmiddle flag in suffix and prefix
2054             if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
2055                 ((pfx && ((PfxEntry*)pfx)->getCont() &&
2056                     TESTAFF(((PfxEntry*)pfx)->getCont(), compoundmiddle,
2057                         ((PfxEntry*)pfx)->getContLen())) ||
2058                 (sfx && ((SfxEntry*)sfx)->getCont() &&
2059                     TESTAFF(((SfxEntry*)sfx)->getCont(), compoundmiddle,
2060                         ((SfxEntry*)sfx)->getContLen())))) {
2061                     rv = NULL;
2062             }
2063
2064         // check forbiddenwords
2065         if ((rv) && (rv->astr) && TESTAFF(rv->astr, forbiddenword, rv->alen)) continue;
2066
2067         // increment word number, if the second root has a compoundroot flag
2068         if ((rv) && (compoundroot) &&
2069             (TESTAFF(rv->astr, compoundroot, rv->alen))) {
2070                 wordnum++;
2071         }
2072
2073         // first word is acceptable in compound words?
2074         if (((rv) &&
2075           ( checked_prefix || (words && words[wnum]) ||
2076             (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2077             ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
2078             ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen))
2079 // LANG_hu section: spec. Hungarian rule
2080             || ((langnum == LANG_hu) && // hu_mov_rule
2081                 hu_mov_rule && (
2082                     TESTAFF(rv->astr, 'F', rv->alen) ||
2083                     TESTAFF(rv->astr, 'G', rv->alen) ||
2084                     TESTAFF(rv->astr, 'H', rv->alen)
2085                 )
2086               )
2087 // END of LANG_hu section
2088           )
2089           && ! (( checkcompoundtriple && // test triple letters
2090                    (word[i-1]==word[i]) && (
2091                       ((i>1) && (word[i-1]==word[i-2])) ||
2092                       ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
2093                    )
2094                ) ||
2095                (
2096                    // test CHECKCOMPOUNDPATTERN
2097                    numcheckcpd && cpdpat_check(word, i)
2098                ) ||
2099                (
2100                  checkcompoundcase && cpdcase_check(word, i)
2101                ))
2102          )
2103 // LANG_hu section: spec. Hungarian rule
2104          || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
2105               (sfx && ((SfxEntry*)sfx)->getCont() && (
2106                         TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) 'x', ((SfxEntry*)sfx)->getContLen()) ||
2107                         TESTAFF(((SfxEntry*)sfx)->getCont(), (unsigned short) '%', ((SfxEntry*)sfx)->getContLen())
2108                     )
2109                )
2110              )
2111 // END of LANG_hu section
2112          ) {
2113
2114 // LANG_hu section: spec. Hungarian rule
2115             if (langnum == LANG_hu) {
2116                 // calculate syllable number of the word
2117                 numsyllable += get_syllable(st, i);
2118
2119                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
2120                 if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
2121             }
2122 // END of LANG_hu section
2123
2124             // NEXT WORD(S)
2125             rv_first = rv;
2126             rv = lookup((word+i)); // perhaps without prefix
2127
2128         // search homonym with compound flag
2129         while ((rv) && ((pseudoroot && TESTAFF(rv->astr, pseudoroot, rv->alen)) ||
2130                         !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2131                           (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
2132                            (numdefcpd && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
2133             rv = rv->next_homonym;
2134         }
2135
2136             if (rv && words && words[wnum + 1]) {
2137                   strcat(*result, presult);
2138                   if (complexprefixes && rv->description) strcat(*result, rv->description);
2139                   if (rv->description && ((!rv->astr) ||
2140                      !TESTAFF(rv->astr, lemma_present, rv->alen)))
2141                         strcat(*result, rv->word);
2142                   if (!complexprefixes && rv->description) strcat(*result, rv->description);
2143                   strcat(*result, "\n");
2144                   ok = 1;
2145                   return 0;
2146             }
2147
2148             oldnumsyllable2 = numsyllable;
2149             oldwordnum2 = wordnum;
2150
2151 // LANG_hu section: spec. Hungarian rule
2152             if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
2153                 numsyllable--;
2154             }
2155 // END of LANG_hu section
2156             // increment word number, if the second root has a compoundroot flag
2157             if ((rv) && (compoundroot) &&
2158                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
2159                     wordnum++;
2160             }
2161
2162             // check forbiddenwords
2163             if ((rv) && (rv->astr) && TESTAFF(rv->astr, forbiddenword, rv->alen)) {
2164                 st[i] = ch;
2165                 continue;
2166             }
2167
2168             // second word is acceptable, as a root?
2169             // hungarian conventions: compounding is acceptable,
2170             // when compound forms consist of 2 words, or if more,
2171             // then the syllable number of root words must be 6, or lesser.
2172             if ((rv) && (
2173                       (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2174                       (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
2175                     )
2176                 && (
2177                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
2178                       ((cpdmaxsyllable==0) ||
2179                           (numsyllable+get_syllable(rv->word,rv->wlen)<=cpdmaxsyllable))
2180                     )
2181                 && (
2182                      (!checkcompounddup || (rv != rv_first))
2183                    )
2184                 )
2185                  {
2186                       // bad compound word
2187                       strcat(*result, presult);
2188
2189                       if (rv->description) {
2190                         if (complexprefixes) strcat(*result, rv->description);
2191                         if ((!rv->astr) || !TESTAFF(rv->astr, lemma_present, rv->alen))
2192                                                strcat(*result, rv->word);
2193                         if (!complexprefixes) strcat(*result, rv->description);
2194                       }
2195                       strcat(*result, "\n");
2196                               ok = 1;
2197             }
2198
2199             numsyllable = oldnumsyllable2 ;
2200             wordnum = oldwordnum2;
2201
2202             // perhaps second word has prefix or/and suffix
2203             sfx = NULL;
2204             sfxflag = FLAG_NULL;
2205
2206             if (compoundflag) rv = affix_check((word+i),strlen(word+i), compoundflag); else rv = NULL;
2207
2208             if (!rv && compoundend) {
2209                 sfx = NULL;
2210                 pfx = NULL;
2211                 rv = affix_check((word+i),strlen(word+i), compoundend);
2212             }
2213
2214             if (!rv && numdefcpd && words) {
2215                 rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
2216                 if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
2217                       char * m = NULL;
2218                       if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
2219                       if ((!m || *m == '\0') && compoundend)
2220                             m = affix_check_morph((word+i),strlen(word+i), compoundend);
2221                       strcat(*result, presult);
2222                       if (m) {
2223                         line_uniq(m);
2224                         if (strchr(m, '\n')) {
2225                             strcat(*result, "(");
2226                             strcat(*result, line_join(m, '|'));
2227                             strcat(*result, ")");
2228                         } else {
2229                             strcat(*result, m);
2230                         }
2231                         free(m);
2232                       }
2233                       strcat(*result, "\n");
2234                       ok = 1;
2235                 }
2236             }
2237
2238             // check non_compound flag in suffix and prefix
2239             if ((rv) &&
2240                 ((pfx && ((PfxEntry*)pfx)->getCont() &&
2241                     TESTAFF(((PfxEntry*)pfx)->getCont(), compoundforbidflag,
2242                         ((PfxEntry*)pfx)->getContLen())) ||
2243                 (sfx && ((SfxEntry*)sfx)->getCont() &&
2244                     TESTAFF(((SfxEntry*)sfx)->getCont(), compoundforbidflag,
2245                         ((SfxEntry*)sfx)->getContLen())))) {
2246                     rv = NULL;
2247             }
2248
2249             // check forbiddenwords
2250             if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen))
2251                     && (! TESTAFF(rv->astr, pseudoroot, rv->alen))) {
2252                         st[i] = ch;
2253                         continue;
2254                     }
2255
2256             if (langnum == LANG_hu) {
2257                 // calculate syllable number of the word
2258                 numsyllable += get_syllable(word + i, strlen(word + i));
2259
2260                 // - affix syllable num.
2261                 // XXX only second suffix (inflections, not derivations)
2262                 if (sfxappnd) {
2263                     char * tmp = myrevstrdup(sfxappnd);
2264                     numsyllable -= get_syllable(tmp, strlen(tmp));
2265                     free(tmp);
2266                 }
2267
2268                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
2269                 if (pfx && (get_syllable(((PfxEntry *)pfx)->getKey(),strlen(((PfxEntry *)pfx)->getKey())) > 1)) wordnum++;
2270
2271                 // increment syllable num, if last word has a SYLLABLENUM flag
2272                 // and the suffix is beginning `s'
2273
2274                 if (cpdsyllablenum) {
2275                     switch (sfxflag) {
2276                         case 'c': { numsyllable+=2; break; }
2277                         case 'J': { numsyllable += 1; break; }
2278                         case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
2279                     }
2280                 }
2281             }
2282
2283             // increment word number, if the second word has a compoundroot flag
2284             if ((rv) && (compoundroot) &&
2285                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
2286                     wordnum++;
2287             }
2288             // second word is acceptable, as a word with prefix or/and suffix?
2289             // hungarian conventions: compounding is acceptable,
2290             // when compound forms consist 2 word, otherwise
2291             // the syllable number of root words is 6, or lesser.
2292             if ((rv) &&
2293                     (
2294                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
2295                       ((cpdmaxsyllable==0) ||
2296                           (numsyllable <= cpdmaxsyllable))
2297                     )
2298                 && (
2299                    (!checkcompounddup || (rv != rv_first))
2300                    )) {
2301                       char * m = NULL;
2302                       if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
2303                       if ((!m || *m == '\0') && compoundend)
2304                             m = affix_check_morph((word+i),strlen(word+i), compoundend);
2305                       strcat(*result, presult);
2306                       if (m) {
2307                         line_uniq(m);
2308                         if (strchr(m, '\n')) {
2309                             strcat(*result, "(");
2310                             strcat(*result, line_join(m, '|'));
2311                             strcat(*result, ")");
2312                         } else {
2313                             strcat(*result, m);
2314                         }
2315                         free(m);
2316                       }
2317                       strcat(*result, "\n");
2318                       ok = 1;
2319             }
2320
2321             numsyllable = oldnumsyllable2;
2322             wordnum = oldwordnum2;
2323
2324             // perhaps second word is a compound word (recursive call)
2325             if ((wordnum < maxwordnum) && (ok == 0)) {
2326                         compound_check_morph((word+i),strlen(word+i), wordnum+1,
2327                              numsyllable, maxwordnum, wnum + 1, words, 0, result, presult);
2328             } else {
2329                 rv=NULL;
2330             }
2331         }
2332         st[i] = ch;
2333         wordnum = oldwordnum;
2334         numsyllable = oldnumsyllable;
2335     }
2336     return 0;
2337 }
2338 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
2339
2340  // return 1 if s1 (reversed) is a leading subset of end of s2
2341 /* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
2342  {
2343     while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
2344         s1++;
2345         end_of_s2--;
2346         len--;
2347     }
2348     return (*s1 == '\0');
2349  }
2350  */
2351
2352 inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
2353  {
2354     while ((len > 0) && (*s1 != '\0') && ((*s1 == *end_of_s2) || (*s1 == '.'))) {
2355         s1++;
2356         end_of_s2--;
2357         len--;
2358     }
2359     return (*s1 == '\0');
2360  }
2361
2362 // check word for suffixes
2363
2364 struct hentry * AffixMgr::suffix_check (const char * word, int len,
2365        int sfxopts, AffEntry * ppfx, char ** wlst, int maxSug, int * ns,
2366        const FLAG cclass, const FLAG needflag, char in_compound)
2367 {
2368     struct hentry * rv = NULL;
2369     char result[MAXLNLEN];
2370
2371     PfxEntry* ep = (PfxEntry *) ppfx;
2372
2373     // first handle the special case of 0 length suffixes
2374     SfxEntry * se = (SfxEntry *) sStart[0];
2375
2376     while (se) {
2377         if (!cclass || se->getCont()) {
2378             // suffixes are not allowed in beginning of compounds
2379             if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2380              // except when signed with compoundpermitflag flag
2381              (se->getCont() && compoundpermitflag &&
2382                 TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
2383               // no circumfix flag in prefix and suffix
2384               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2385                    circumfix, ep->getContLen())) &&
2386                (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
2387               // circumfix flag in prefix AND suffix
2388               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2389                    circumfix, ep->getContLen())) &&
2390                (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
2391             // fogemorpheme
2392               (in_compound ||
2393                  !((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
2394             // pseudoroot on prefix or first suffix
2395               (cclass ||
2396                    !(se->getCont() && TESTAFF(se->getCont(), pseudoroot, se->getContLen())) ||
2397                    (ppfx && !((ep->getCont()) &&
2398                      TESTAFF(ep->getCont(), pseudoroot,
2399                        ep->getContLen())))
2400               )
2401             ) {
2402                 rv = se->checkword(word,len, sfxopts, ppfx, wlst, maxSug, ns, (FLAG) cclass,
2403                     needflag, (in_compound ? 0 : onlyincompound));
2404                 if (rv) {
2405                     sfx=(AffEntry *)se; // BUG: sfx not stateless
2406                     return rv;
2407                 }
2408             }
2409         }
2410        se = se->getNext();
2411     }
2412
2413     // now handle the general case
2414     unsigned char sp = *((const unsigned char *)(word + len - 1));
2415     SfxEntry * sptr = (SfxEntry *) sStart[sp];
2416
2417     while (sptr) {
2418         if (isRevSubset(sptr->getKey(), word + len - 1, len)
2419         ) {
2420             // suffixes are not allowed in beginning of compounds
2421             if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2422              // except when signed with compoundpermitflag flag
2423              (sptr->getCont() && compoundpermitflag &&
2424                 TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
2425               // no circumfix flag in prefix and suffix
2426               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2427                    circumfix, ep->getContLen())) &&
2428                (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
2429               // circumfix flag in prefix AND suffix
2430               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2431                    circumfix, ep->getContLen())) &&
2432                (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
2433             // fogemorpheme
2434               (in_compound ||
2435                  !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
2436             // pseudoroot on prefix or first suffix
2437               (cclass ||
2438                   !(sptr->getCont() && TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())) ||
2439                   (ppfx && !((ep->getCont()) &&
2440                      TESTAFF(ep->getCont(), pseudoroot,
2441                        ep->getContLen())))
2442               )
2443             ) {
2444                 rv = sptr->checkword(word,len, sfxopts, ppfx, wlst,
2445                     maxSug, ns, cclass, needflag, (in_compound ? 0 : onlyincompound));
2446                 if (rv) {
2447                     sfx=(AffEntry *)sptr; // BUG: sfx not stateless
2448                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2449                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
2450                     if (cclass || sptr->getCont()) {
2451                                 if (!derived) {
2452                                         derived = mystrdup(word);
2453                                 } else {
2454                                         strcpy(result, derived); // XXX check size
2455                                         strcat(result, "\n");
2456                                         strcat(result, word);
2457                                         free(derived);
2458                                         derived = mystrdup(result);
2459                                 }
2460                     }
2461                     return rv;
2462                 }
2463              }
2464              sptr = sptr->getNextEQ();
2465         } else {
2466              sptr = sptr->getNextNE();
2467         }
2468     }
2469
2470     return NULL;
2471 }
2472
2473 // check word for two-level suffixes
2474
2475 struct hentry * AffixMgr::suffix_check_twosfx(const char * word, int len,
2476        int sfxopts, AffEntry * ppfx, const FLAG needflag)
2477 {
2478     struct hentry * rv = NULL;
2479
2480     // first handle the special case of 0 length suffixes
2481     SfxEntry * se = (SfxEntry *) sStart[0];
2482     while (se) {
2483         if (contclasses[se->getFlag()])
2484         {
2485             rv = se->check_twosfx(word,len, sfxopts, ppfx, needflag);
2486             if (rv) return rv;
2487         }
2488         se = se->getNext();
2489     }
2490
2491     // now handle the general case
2492     unsigned char sp = *((const unsigned char *)(word + len - 1));
2493     SfxEntry * sptr = (SfxEntry *) sStart[sp];
2494
2495     while (sptr) {
2496         if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
2497             if (contclasses[sptr->getFlag()])
2498             {
2499                 rv = sptr->check_twosfx(word,len, sfxopts, ppfx, needflag);
2500                 if (rv) {
2501                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2502                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
2503                     return rv;
2504                 }
2505             }
2506             sptr = sptr->getNextEQ();
2507         } else {
2508              sptr = sptr->getNextNE();
2509         }
2510     }
2511
2512     return NULL;
2513 }
2514
2515 #ifdef HUNSPELL_EXPERIMENTAL
2516 char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
2517        int sfxopts, AffEntry * ppfx, const FLAG needflag)
2518 {
2519     char result[MAXLNLEN];
2520     char result2[MAXLNLEN];
2521     char result3[MAXLNLEN];
2522
2523     char * st;
2524
2525     result[0] = '\0';
2526     result2[0] = '\0';
2527     result3[0] = '\0';
2528
2529     // first handle the special case of 0 length suffixes
2530     SfxEntry * se = (SfxEntry *) sStart[0];
2531     while (se) {
2532         if (contclasses[se->getFlag()])
2533         {
2534             st = se->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
2535             if (st) {
2536                 if (ppfx) {
2537                     if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
2538                 }
2539                 strcat(result, st);
2540                 free(st);
2541                 if (se->getMorph()) strcat(result, se->getMorph());
2542                 strcat(result, "\n");
2543             }
2544         }
2545         se = se->getNext();
2546     }
2547
2548     // now handle the general case
2549     unsigned char sp = *((const unsigned char *)(word + len - 1));
2550     SfxEntry * sptr = (SfxEntry *) sStart[sp];
2551
2552     while (sptr) {
2553         if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
2554             if (contclasses[sptr->getFlag()])
2555             {
2556                 st = sptr->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
2557                 if (st) {
2558                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2559                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
2560                     strcpy(result2, st);
2561                     free(st);
2562
2563                 result3[0] = '\0';
2564 #ifdef DEBUG
2565                 unsigned short flag = sptr->getFlag();
2566                 if (flag_mode == FLAG_NUM) {
2567                     sprintf(result3, "<%d>", sptr->getKey());
2568                 } else if (flag_mode == FLAG_LONG) {
2569                     sprintf(result3, "<%c%c>", flag >> 8, (flag << 8) >>8);
2570                 } else sprintf(result3, "<%c>", flag);
2571                 strcat(result3, ":");
2572 #endif
2573                 if (sptr->getMorph()) strcat(result3, sptr->getMorph());
2574                 strlinecat(result2, result3);
2575                 strcat(result2, "\n");
2576                 strcat(result,  result2);
2577                 }
2578             }
2579             sptr = sptr->getNextEQ();
2580         } else {
2581              sptr = sptr->getNextNE();
2582         }
2583     }
2584     if (result) return mystrdup(result);
2585     return NULL;
2586 }
2587
2588 char * AffixMgr::suffix_check_morph(const char * word, int len,
2589        int sfxopts, AffEntry * ppfx, const FLAG cclass, const FLAG needflag, char in_compound)
2590 {
2591     char result[MAXLNLEN];
2592
2593     struct hentry * rv = NULL;
2594
2595     result[0] = '\0';
2596
2597     PfxEntry* ep = (PfxEntry *) ppfx;
2598
2599     // first handle the special case of 0 length suffixes
2600     SfxEntry * se = (SfxEntry *) sStart[0];
2601     while (se) {
2602         if (!cclass || se->getCont()) {
2603             // suffixes are not allowed in beginning of compounds
2604             if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2605              // except when signed with compoundpermitflag flag
2606              (se->getCont() && compoundpermitflag &&
2607                 TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
2608               // no circumfix flag in prefix and suffix
2609               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2610                    circumfix, ep->getContLen())) &&
2611                (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
2612               // circumfix flag in prefix AND suffix
2613               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2614                    circumfix, ep->getContLen())) &&
2615                (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
2616             // fogemorpheme
2617               (in_compound ||
2618                  !((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
2619             // pseudoroot on prefix or first suffix
2620               (cclass ||
2621                    !(se->getCont() && TESTAFF(se->getCont(), pseudoroot, se->getContLen())) ||
2622                    (ppfx && !((ep->getCont()) &&
2623                      TESTAFF(ep->getCont(), pseudoroot,
2624                        ep->getContLen())))
2625               )
2626             ))
2627             rv = se->checkword(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
2628          while (rv) {
2629            if (ppfx) {
2630                 if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
2631             }
2632             if (complexprefixes && rv->description) strcat(result, rv->description);
2633             if (rv->description && ((!rv->astr) ||
2634                                         !TESTAFF(rv->astr, lemma_present, rv->alen)))
2635                                                strcat(result, rv->word);
2636             if (!complexprefixes && rv->description) strcat(result, rv->description);
2637             if (se->getMorph()) strcat(result, se->getMorph());
2638             strcat(result, "\n");
2639             rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
2640          }
2641        }
2642        se = se->getNext();
2643     }
2644
2645     // now handle the general case
2646     unsigned char sp = *((const unsigned char *)(word + len - 1));
2647     SfxEntry * sptr = (SfxEntry *) sStart[sp];
2648
2649     while (sptr) {
2650         if (isRevSubset(sptr->getKey(), word + len - 1, len)
2651         ) {
2652             // suffixes are not allowed in beginning of compounds
2653             if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2654              // except when signed with compoundpermitflag flag
2655              (sptr->getCont() && compoundpermitflag &&
2656                 TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
2657               // no circumfix flag in prefix and suffix
2658               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2659                    circumfix, ep->getContLen())) &&
2660                (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
2661               // circumfix flag in prefix AND suffix
2662               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2663                    circumfix, ep->getContLen())) &&
2664                (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
2665             // fogemorpheme
2666               (in_compound ||
2667                  !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
2668             // pseudoroot on first suffix
2669               (cclass || !(sptr->getCont() &&
2670                    TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())))
2671             )) rv = sptr->checkword(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
2672             while (rv) {
2673                     if (ppfx) {
2674                         if (((PfxEntry *) ppfx)->getMorph()) strcat(result, ((PfxEntry *) ppfx)->getMorph());
2675                     }
2676                     if (complexprefixes && rv->description) strcat(result, rv->description);
2677                     if (rv->description && ((!rv->astr) ||
2678                         !TESTAFF(rv->astr, lemma_present, rv->alen))) strcat(result, rv->word);
2679                     if (!complexprefixes && rv->description) strcat(result, rv->description);
2680 #ifdef DEBUG
2681                 unsigned short flag = sptr->getFlag();
2682                 if (flag_mode == FLAG_NUM) {
2683                     sprintf(result, "<%d>", sptr->getKey());
2684                 } else if (flag_mode == FLAG_LONG) {
2685                     sprintf(result, "<%c%c>", flag >> 8, (flag << 8) >>8);
2686                 } else sprintf(result, "<%c>", flag);
2687                 strcat(result, ":");
2688 #endif
2689
2690                 if (sptr->getMorph()) strcat(result, sptr->getMorph());
2691                 strcat(result, "\n");
2692                 rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
2693             }
2694              sptr = sptr->getNextEQ();
2695         } else {
2696              sptr = sptr->getNextNE();
2697         }
2698     }
2699
2700     if (*result) return mystrdup(result);
2701     return NULL;
2702 }
2703 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
2704
2705
2706 // check if word with affixes is correctly spelled
2707 struct hentry * AffixMgr::affix_check (const char * word, int len, const FLAG needflag, char in_compound)
2708 {
2709     struct hentry * rv= NULL;
2710     if (derived) free(derived);
2711     derived =  NULL;
2712
2713     // check all prefixes (also crossed with suffixes if allowed)
2714     rv = prefix_check(word, len, in_compound, needflag);
2715     if (rv) return rv;
2716
2717     // if still not found check all suffixes
2718     rv = suffix_check(word, len, 0, NULL, NULL, 0, NULL, FLAG_NULL, needflag, in_compound);
2719
2720     if (havecontclass) {
2721         sfx = NULL;
2722         pfx = NULL;
2723         if (rv) return rv;
2724         // if still not found check all two-level suffixes
2725         rv = suffix_check_twosfx(word, len, 0, NULL, needflag);
2726         if (rv) return rv;
2727         // if still not found check all two-level suffixes
2728         rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag);
2729     }
2730     return rv;
2731 }
2732
2733 #ifdef HUNSPELL_EXPERIMENTAL
2734 // check if word with affixes is correctly spelled
2735 char * AffixMgr::affix_check_morph(const char * word, int len, const FLAG needflag, char in_compound)
2736 {
2737     char result[MAXLNLEN];
2738     char * st = NULL;
2739
2740     *result = '\0';
2741
2742     // check all prefixes (also crossed with suffixes if allowed)
2743     st = prefix_check_morph(word, len, in_compound);
2744     if (st) {
2745         strcat(result, st);
2746         free(st);
2747     }
2748
2749     // if still not found check all suffixes
2750     st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound);
2751     if (st) {
2752         strcat(result, st);
2753         free(st);
2754     }
2755
2756     if (havecontclass) {
2757         sfx = NULL;
2758         pfx = NULL;
2759         // if still not found check all two-level suffixes
2760         st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag);
2761         if (st) {
2762             strcat(result, st);
2763             free(st);
2764         }
2765
2766         // if still not found check all two-level suffixes
2767         st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag);
2768         if (st) {
2769             strcat(result, st);
2770             free(st);
2771         }
2772     }
2773
2774     return mystrdup(result);
2775 }
2776 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
2777
2778
2779 int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts,
2780     int wl, const unsigned short * ap, unsigned short al, char * bad, int badl)
2781 {
2782
2783     int nh=0;
2784
2785     // first add root word to list
2786     if ((nh < maxn) && !(al && ((pseudoroot && TESTAFF(ap, pseudoroot, al)) ||
2787          (onlyincompound && TESTAFF(ap, onlyincompound, al))))) {
2788        wlst[nh].word = mystrdup(ts);
2789        wlst[nh].allow = (1 == 0);
2790        nh++;
2791     }
2792
2793     // handle suffixes
2794     for (int i = 0; i < al; i++) {
2795        unsigned short c = (unsigned short) ap[i];
2796        SfxEntry * sptr = (SfxEntry *)sFlag[c];
2797        while (sptr) {
2798          if (!sptr->getKeyLen() || ((badl > sptr->getKeyLen()) &&
2799                 (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0)) &&
2800                 // check pseudoroot flag
2801                 !(sptr->getCont() && ((pseudoroot &&
2802                       TESTAFF(sptr->getCont(), pseudoroot, sptr->getContLen())) ||
2803                   (circumfix &&
2804                       TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) ||
2805                   (onlyincompound &&
2806                       TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))
2807                 ) {
2808             char * newword = sptr->add(ts, wl);
2809             if (newword) {
2810                 if (nh < maxn) {
2811                     wlst[nh].word = newword;
2812                     wlst[nh].allow = sptr->allowCross();
2813                 nh++;
2814                 } else {
2815                     free(newword);
2816                 }
2817             }
2818          }
2819          sptr = (SfxEntry *)sptr ->getFlgNxt();
2820        }
2821     }
2822
2823     int n = nh;
2824
2825     // handle cross products of prefixes and suffixes
2826     for (int j=1;j<n ;j++)
2827        if (wlst[j].allow) {
2828           for (int k = 0; k < al; k++) {
2829              unsigned short c = (unsigned short) ap[k];
2830              PfxEntry * cptr = (PfxEntry *) pFlag[c];
2831              while (cptr) {
2832                 if (cptr->allowCross() && (!cptr->getKeyLen() || ((badl > cptr->getKeyLen()) &&
2833                         (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) {
2834                     int l1 = strlen(wlst[j].word);
2835                     char * newword = cptr->add(wlst[j].word, l1);
2836                     if (newword) {
2837                        if (nh < maxn) {
2838                           wlst[nh].word = newword;
2839                           wlst[nh].allow = cptr->allowCross();
2840                           nh++;
2841                        } else {
2842                           free(newword);
2843                        }
2844                     }
2845                 }
2846                 cptr = (PfxEntry *)cptr ->getFlgNxt();
2847              }
2848           }
2849        }
2850
2851
2852     // now handle pure prefixes
2853     for (int m = 0; m < al; m ++) {
2854        unsigned short c = (unsigned short) ap[m];
2855        PfxEntry * ptr = (PfxEntry *) pFlag[c];
2856        while (ptr) {
2857          if (!ptr->getKeyLen() || ((badl > ptr->getKeyLen()) &&
2858                 (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0)) &&
2859                 // check pseudoroot flag
2860                 !(ptr->getCont() && ((pseudoroot &&
2861                       TESTAFF(ptr->getCont(), pseudoroot, ptr->getContLen())) ||
2862                      (circumfix &&
2863                       TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) ||
2864                   (onlyincompound &&
2865                       TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))
2866                 ) {
2867             char * newword = ptr->add(ts, wl);
2868             if (newword) {
2869                 if (nh < maxn) {
2870                     wlst[nh].word = newword;
2871                     wlst[nh].allow = ptr->allowCross();
2872                     nh++;
2873                 } else {
2874                     free(newword);
2875                 }
2876             }
2877          }
2878          ptr = (PfxEntry *)ptr ->getFlgNxt();
2879        }
2880     }
2881
2882     return nh;
2883 }
2884
2885
2886
2887 // return length of replacing table
2888 int AffixMgr::get_numrep()
2889 {
2890   return numrep;
2891 }
2892
2893 // return replacing table
2894 struct replentry * AffixMgr::get_reptable()
2895 {
2896   if (! reptable ) return NULL;
2897   return reptable;
2898 }
2899
2900 // return length of character map table
2901 int AffixMgr::get_nummap()
2902 {
2903   return nummap;
2904 }
2905
2906 // return character map table
2907 struct mapentry * AffixMgr::get_maptable()
2908 {
2909   if (! maptable ) return NULL;
2910   return maptable;
2911 }
2912
2913 // return length of word break table
2914 int AffixMgr::get_numbreak()
2915 {
2916   return numbreak;
2917 }
2918
2919 // return character map table
2920 char ** AffixMgr::get_breaktable()
2921 {
2922   if (! breaktable ) return NULL;
2923   return breaktable;
2924 }
2925
2926 // return text encoding of dictionary
2927 char * AffixMgr::get_encoding()
2928 {
2929   if (! encoding ) {
2930       encoding = mystrdup("ISO8859-1");
2931   }
2932   return mystrdup(encoding);
2933 }
2934
2935 // return text encoding of dictionary
2936 int AffixMgr::get_langnum()
2937 {
2938   return langnum;
2939 }
2940
2941 // return double prefix option
2942 int AffixMgr::get_complexprefixes()
2943 {
2944   return complexprefixes;
2945 }
2946
2947 FLAG AffixMgr::get_keepcase()
2948 {
2949   return keepcase;
2950 }
2951
2952 int AffixMgr::get_checksharps()
2953 {
2954   return checksharps;
2955 }
2956
2957 // return the preferred ignore string for suggestions
2958 char * AffixMgr::get_ignore()
2959 {
2960   if (!ignorechars) return NULL;
2961   return mystrdup(ignorechars);
2962 }
2963
2964 // return the preferred ignore string for suggestions
2965 unsigned short * AffixMgr::get_ignore_utf16(int * len)
2966 {
2967   *len = ignorechars_utf16_len;
2968   return ignorechars_utf16;
2969 }
2970
2971 // return the preferred try string for suggestions
2972 char * AffixMgr::get_try_string()
2973 {
2974   if (! trystring ) return NULL;
2975   return mystrdup(trystring);
2976 }
2977
2978 // return the preferred try string for suggestions
2979 const char * AffixMgr::get_wordchars()
2980 {
2981   return wordchars;
2982 }
2983
2984 unsigned short * AffixMgr::get_wordchars_utf16(int * len)
2985 {
2986   *len = wordchars_utf16_len;
2987   return wordchars_utf16;
2988 }
2989
2990 // is there compounding?
2991 int AffixMgr::get_compound()
2992 {
2993   return compoundflag || compoundbegin || numdefcpd;
2994 }
2995
2996 // return the compound words control flag
2997 FLAG AffixMgr::get_compoundflag()
2998 {
2999   return compoundflag;
3000 }
3001
3002 // return the forbidden words control flag
3003 FLAG AffixMgr::get_forbiddenword()
3004 {
3005   return forbiddenword;
3006 }
3007
3008 // return the forbidden words control flag
3009 FLAG AffixMgr::get_nosuggest()
3010 {
3011   return nosuggest;
3012 }
3013
3014 // return the forbidden words flag modify flag
3015 FLAG AffixMgr::get_pseudoroot()
3016 {
3017   return pseudoroot;
3018 }
3019
3020 // return the onlyincompound flag
3021 FLAG AffixMgr::get_onlyincompound()
3022 {
3023   return onlyincompound;
3024 }
3025
3026 // return the compound word signal flag
3027 FLAG AffixMgr::get_compoundroot()
3028 {
3029   return compoundroot;
3030 }
3031
3032 // return the compound begin signal flag
3033 FLAG AffixMgr::get_compoundbegin()
3034 {
3035   return compoundbegin;
3036 }
3037
3038 // return the value of checknum
3039 int AffixMgr::get_checknum()
3040 {
3041   return checknum;
3042 }
3043
3044 // return the value of prefix
3045 const char * AffixMgr::get_prefix()
3046 {
3047   if (pfx) return ((PfxEntry *)pfx)->getKey();
3048   return NULL;
3049 }
3050
3051 // return the value of suffix
3052 const char * AffixMgr::get_suffix()
3053 {
3054   return sfxappnd;
3055 }
3056
3057 // return the value of derived form (base word with first suffix).
3058 const char * AffixMgr::get_derived()
3059 {
3060   return derived;
3061 }
3062
3063 // return the value of suffix
3064 const char * AffixMgr::get_version()
3065 {
3066   return version;
3067 }
3068
3069 // return lemma_present flag
3070 FLAG AffixMgr::get_lemma_present()
3071 {
3072   return lemma_present;
3073 }
3074
3075 // utility method to look up root words in hash table
3076 struct hentry * AffixMgr::lookup(const char * word)
3077 {
3078   if (! pHMgr) return NULL;
3079   return pHMgr->lookup(word);
3080 }
3081
3082 // return the value of suffix
3083 const int AffixMgr::have_contclass()
3084 {
3085   return havecontclass;
3086 }
3087
3088 // return utf8
3089 int AffixMgr::get_utf8()
3090 {
3091   return utf8;
3092 }
3093
3094 // return nosplitsugs
3095 int AffixMgr::get_maxngramsugs(void)
3096 {
3097   return maxngramsugs;
3098 }
3099
3100 // return nosplitsugs
3101 int AffixMgr::get_nosplitsugs(void)
3102 {
3103   return nosplitsugs;
3104 }
3105
3106 // return sugswithdots
3107 int AffixMgr::get_sugswithdots(void)
3108 {
3109   return sugswithdots;
3110 }
3111
3112 /* parse flag */
3113 int AffixMgr::parse_flag(char * line, unsigned short * out, const char * name) {
3114    char * s = NULL;
3115    if (*out != FLAG_NULL) {
3116       HUNSPELL_WARNING(stderr, "error: duplicate %s line\n", name);
3117       return 1;
3118    }
3119    if (parse_string(line, &s, name)) return 1;
3120    *out = pHMgr->decode_flag(s);
3121    free(s);
3122    return 0;
3123 }
3124
3125 /* parse num */
3126 int AffixMgr::parse_num(char * line, int * out, const char * name) {
3127    char * s = NULL;
3128    if (*out != -1) {
3129       HUNSPELL_WARNING(stderr, "error: duplicate %s line\n", name);
3130       return 1;
3131    }
3132    if (parse_string(line, &s, name)) return 1;
3133    *out = atoi(s);
3134    free(s);
3135    return 0;
3136 }
3137
3138 /* parse in the max syllablecount of compound words and  */
3139 int  AffixMgr::parse_cpdsyllable(char * line)
3140 {
3141    char * tp = line;
3142    char * piece;
3143    int i = 0;
3144    int np = 0;
3145    w_char w[MAXWORDLEN];
3146    piece = mystrsep(&tp, 0);
3147    while (piece) {
3148       if (*piece != '\0') {
3149           switch(i) {
3150              case 0: { np++; break; }
3151              case 1: { cpdmaxsyllable = atoi(piece); np++; break; }
3152              case 2: {
3153                 if (!utf8) {
3154                     cpdvowels = mystrdup(piece);
3155                 } else {
3156                     int n = u8_u16(w, MAXWORDLEN, piece);
3157                     if (n > 0) {
3158                         flag_qsort((unsigned short *) w, 0, n);
3159                         cpdvowels_utf16 = (w_char *) malloc(n * sizeof(w_char));
3160                         if (!cpdvowels_utf16) return 1;
3161                         memcpy(cpdvowels_utf16, w, n * sizeof(w_char));
3162                     }
3163                     cpdvowels_utf16_len = n;
3164                 }
3165                 np++;
3166                 break;
3167              }
3168              default: break;
3169           }
3170           i++;
3171       }
3172       free(piece);
3173       piece = mystrsep(&tp, 0);
3174    }
3175    if (np < 2) {
3176       HUNSPELL_WARNING(stderr, "error: missing compoundsyllable information\n");
3177       return 1;
3178    }
3179    if (np == 2) cpdvowels = mystrdup("aeiouAEIOU");
3180    return 0;
3181 }
3182
3183 /* parse in the typical fault correcting table */
3184 int  AffixMgr::parse_reptable(char * line, FILE * af)
3185 {
3186    if (numrep != 0) {
3187       HUNSPELL_WARNING(stderr, "error: duplicate REP tables used\n");
3188       return 1;
3189    }
3190    char * tp = line;
3191    char * piece;
3192    int i = 0;
3193    int np = 0;
3194    piece = mystrsep(&tp, 0);
3195    while (piece) {
3196        if (*piece != '\0') {
3197           switch(i) {
3198              case 0: { np++; break; }
3199              case 1: {
3200                        numrep = atoi(piece);
3201                        if (numrep < 1) {
3202                           HUNSPELL_WARNING(stderr, "incorrect number of entries in replacement table\n");
3203                           free(piece);
3204                           return 1;
3205                        }
3206                        reptable = (replentry *) malloc(numrep * sizeof(struct replentry));
3207                        if (!reptable) return 1;
3208                        np++;
3209                        break;
3210                      }
3211              default: break;
3212           }
3213           i++;
3214        }
3215        free(piece);
3216        piece = mystrsep(&tp, 0);
3217    }
3218    if (np != 2) {
3219       HUNSPELL_WARNING(stderr, "error: missing replacement table information\n");
3220       return 1;
3221    }
3222
3223    /* now parse the numrep lines to read in the remainder of the table */
3224    char * nl = line;
3225    for (int j=0; j < numrep; j++) {
3226         if (!fgets(nl,MAXLNLEN,af)) return 1;
3227         mychomp(nl);
3228         tp = nl;
3229         i = 0;
3230         reptable[j].pattern = NULL;
3231         reptable[j].pattern2 = NULL;
3232         piece = mystrsep(&tp, 0);
3233         while (piece) {
3234            if (*piece != '\0') {
3235                switch(i) {
3236                   case 0: {
3237                              if (strncmp(piece,"REP",3) != 0) {
3238                                  HUNSPELL_WARNING(stderr, "error: replacement table is corrupt\n");
3239                                  free(piece);
3240                                  return 1;
3241                              }
3242                              break;
3243                           }
3244                   case 1: { reptable[j].pattern = mystrrep(mystrdup(piece),"_"," "); break; }
3245                   case 2: { reptable[j].pattern2 = mystrrep(mystrdup(piece),"_"," "); break; }
3246                   default: break;
3247                }
3248                i++;
3249            }
3250            free(piece);
3251            piece = mystrsep(&tp, 0);
3252         }
3253         if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) {
3254              HUNSPELL_WARNING(stderr, "error: replacement table is corrupt\n");
3255              return 1;
3256         }
3257    }
3258    return 0;
3259 }
3260
3261 /* parse in the checkcompoundpattern table */
3262 int  AffixMgr::parse_checkcpdtable(char * line, FILE * af)
3263 {
3264    if (numcheckcpd != 0) {
3265       HUNSPELL_WARNING(stderr, "error: duplicate compound pattern tables used\n");
3266       return 1;
3267    }
3268    char * tp = line;
3269    char * piece;
3270    int i = 0;
3271    int np = 0;
3272    piece = mystrsep(&tp, 0);
3273    while (piece) {
3274        if (*piece != '\0') {
3275           switch(i) {
3276              case 0: { np++; break; }
3277              case 1: {
3278                        numcheckcpd = atoi(piece);
3279                        if (numcheckcpd < 1) {
3280                           HUNSPELL_WARNING(stderr, "incorrect number of entries in compound pattern table\n");
3281                           free(piece);
3282                           return 1;
3283                        }
3284                        checkcpdtable = (replentry *) malloc(numcheckcpd * sizeof(struct replentry));
3285                        if (!checkcpdtable) return 1;
3286                        np++;
3287                        break;
3288                      }
3289              default: break;
3290           }
3291           i++;
3292        }
3293        free(piece);
3294        piece = mystrsep(&tp, 0);
3295    }
3296    if (np != 2) {
3297       HUNSPELL_WARNING(stderr, "error: missing compound pattern table information\n");
3298       return 1;
3299    }
3300
3301    /* now parse the numcheckcpd lines to read in the remainder of the table */
3302    char * nl = line;
3303    for (int j=0; j < numcheckcpd; j++) {
3304         if (!fgets(nl,MAXLNLEN,af)) return 1;
3305         mychomp(nl);
3306         tp = nl;
3307         i = 0;
3308         checkcpdtable[j].pattern = NULL;
3309         checkcpdtable[j].pattern2 = NULL;
3310         piece = mystrsep(&tp, 0);
3311         while (piece) {
3312            if (*piece != '\0') {
3313                switch(i) {
3314                   case 0: {
3315                              if (strncmp(piece,"CHECKCOMPOUNDPATTERN",20) != 0) {
3316                                  HUNSPELL_WARNING(stderr, "error: compound pattern table is corrupt\n");
3317                                  free(piece);
3318                                  return 1;
3319                              }
3320                              break;
3321                           }
3322                   case 1: { checkcpdtable[j].pattern = mystrdup(piece); break; }
3323                   case 2: { checkcpdtable[j].pattern2 = mystrdup(piece); break; }
3324                   default: break;
3325                }
3326                i++;
3327            }
3328            free(piece);
3329            piece = mystrsep(&tp, 0);
3330         }
3331         if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) {
3332              HUNSPELL_WARNING(stderr, "error: compound pattern table is corrupt\n");
3333              return 1;
3334         }
3335    }
3336    return 0;
3337 }
3338
3339 /* parse in the compound rule table */
3340 int  AffixMgr::parse_defcpdtable(char * line, FILE * af)
3341 {
3342    if (numdefcpd != 0) {
3343       HUNSPELL_WARNING(stderr, "error: duplicate compound rule tables used\n");
3344       return 1;
3345    }
3346    char * tp = line;
3347    char * piece;
3348    int i = 0;
3349    int np = 0;
3350    piece = mystrsep(&tp, 0);
3351    while (piece) {
3352        if (*piece != '\0') {
3353           switch(i) {
3354              case 0: { np++; break; }
3355              case 1: {
3356                        numdefcpd = atoi(piece);
3357                        if (numdefcpd < 1) {
3358                           HUNSPELL_WARNING(stderr, "incorrect number of entries in compound rule table\n");
3359                           free(piece);
3360                           return 1;
3361                        }
3362                        defcpdtable = (flagentry *) malloc(numdefcpd * sizeof(flagentry));
3363                        if (!defcpdtable) return 1;
3364                        np++;
3365                        break;
3366                      }
3367              default: break;
3368           }
3369           i++;
3370        }
3371        free(piece);
3372        piece = mystrsep(&tp, 0);
3373    }
3374    if (np != 2) {
3375       HUNSPELL_WARNING(stderr, "error: missing compound rule table information\n");
3376       return 1;
3377    }
3378
3379    /* now parse the numdefcpd lines to read in the remainder of the table */
3380    char * nl = line;
3381    for (int j=0; j < numdefcpd; j++) {
3382         if (!fgets(nl,MAXLNLEN,af)) return 1;
3383         mychomp(nl);
3384         tp = nl;
3385         i = 0;
3386         defcpdtable[j].def = NULL;
3387         piece = mystrsep(&tp, 0);
3388         while (piece) {
3389            if (*piece != '\0') {
3390                switch(i) {
3391                   case 0: {
3392                              if (strncmp(piece, "COMPOUNDRULE", 12) != 0) {
3393                                  HUNSPELL_WARNING(stderr, "error: compound rule table is corrupt\n");
3394                                  free(piece);
3395                                  return 1;
3396                              }
3397                              break;
3398                           }
3399                   case 1: {
3400                             defcpdtable[j].len =
3401                                 pHMgr->decode_flags(&(defcpdtable[j].def), piece);
3402                             break;
3403                            }
3404                   default: break;
3405                }
3406                i++;
3407            }
3408            free(piece);
3409            piece = mystrsep(&tp, 0);
3410         }
3411         if (!defcpdtable[j].len) {
3412              HUNSPELL_WARNING(stderr, "error: compound rule table is corrupt\n");
3413              return 1;
3414         }
3415    }
3416    return 0;
3417 }
3418
3419
3420 /* parse in the character map table */
3421 int  AffixMgr::parse_maptable(char * line, FILE * af)
3422 {
3423    if (nummap != 0) {
3424       HUNSPELL_WARNING(stderr, "error: duplicate MAP tables used\n");
3425       return 1;
3426    }
3427    char * tp = line;
3428    char * piece;
3429    int i = 0;
3430    int np = 0;
3431    piece = mystrsep(&tp, 0);
3432    while (piece) {
3433        if (*piece != '\0') {
3434           switch(i) {
3435              case 0: { np++; break; }
3436              case 1: {
3437                        nummap = atoi(piece);
3438                        if (nummap < 1) {
3439                           HUNSPELL_WARNING(stderr, "incorrect number of entries in map table\n");
3440                           free(piece);
3441                           return 1;
3442                        }
3443                        maptable = (mapentry *) malloc(nummap * sizeof(struct mapentry));
3444                        if (!maptable) return 1;
3445                        np++;
3446                        break;
3447                      }
3448              default: break;
3449           }
3450           i++;
3451        }
3452        free(piece);
3453        piece = mystrsep(&tp, 0);
3454    }
3455    if (np != 2) {
3456       HUNSPELL_WARNING(stderr, "error: missing map table information\n");
3457       return 1;
3458    }
3459
3460    /* now parse the nummap lines to read in the remainder of the table */
3461    char * nl = line;
3462    for (int j=0; j < nummap; j++) {
3463         if (!fgets(nl,MAXLNLEN,af)) return 1;
3464         mychomp(nl);
3465         tp = nl;
3466         i = 0;
3467         maptable[j].set = NULL;
3468         maptable[j].len = 0;
3469         piece = mystrsep(&tp, 0);
3470         while (piece) {
3471            if (*piece != '\0') {
3472                switch(i) {
3473                   case 0: {
3474                              if (strncmp(piece,"MAP",3) != 0) {
3475                                  HUNSPELL_WARNING(stderr, "error: map table is corrupt\n");
3476                                  free(piece);
3477                                  return 1;
3478                              }
3479                              break;
3480                           }
3481                   case 1: {
3482                             maptable[j].len = 0;
3483                             maptable[j].set = NULL;
3484                             maptable[j].set_utf16 = NULL;
3485                             if (!utf8) {
3486                                 maptable[j].set = mystrdup(piece);
3487                                 maptable[j].len = strlen(maptable[j].set);
3488                             } else {
3489                                 w_char w[MAXWORDLEN];
3490                                 int n = u8_u16(w, MAXWORDLEN, piece);
3491                                 if (n > 0) {
3492                                     flag_qsort((unsigned short *) w, 0, n);
3493                                     maptable[j].set_utf16 = (w_char *) malloc(n * sizeof(w_char));
3494                                     if (!maptable[j].set_utf16) return 1;
3495                                     memcpy(maptable[j].set_utf16, w, n * sizeof(w_char));
3496                                 }
3497                                 maptable[j].len = n;
3498                             }
3499                             break; }
3500                   default: break;
3501                }
3502                i++;
3503            }
3504            free(piece);
3505            piece = mystrsep(&tp, 0);
3506         }
3507         if ((!(maptable[j].set || maptable[j].set_utf16)) || (!(maptable[j].len))) {
3508              HUNSPELL_WARNING(stderr, "error: map table is corrupt\n");
3509              return 1;
3510         }
3511    }
3512    return 0;
3513 }
3514
3515 /* parse in the word breakpoint table */
3516 int  AffixMgr::parse_breaktable(char * line, FILE * af)
3517 {
3518    if (numbreak != 0) {
3519       HUNSPELL_WARNING(stderr, "error: duplicate word breakpoint tables used\n");
3520       return 1;
3521    }
3522    char * tp = line;
3523    char * piece;
3524    int i = 0;
3525    int np = 0;
3526    piece = mystrsep(&tp, 0);
3527    while (piece) {
3528        if (*piece != '\0') {
3529           switch(i) {
3530              case 0: { np++; break; }
3531              case 1: {
3532                        numbreak = atoi(piece);
3533                        if (numbreak < 1) {
3534                           HUNSPELL_WARNING(stderr, "incorrect number of entries in BREAK table\n");
3535                           free(piece);
3536                           return 1;
3537                        }
3538                        breaktable = (char **) malloc(numbreak * sizeof(char *));
3539                        if (!breaktable) return 1;
3540                        np++;
3541                        break;
3542                      }
3543              default: break;
3544           }
3545           i++;
3546        }
3547        free(piece);
3548        piece = mystrsep(&tp, 0);
3549    }
3550    if (np != 2) {
3551       HUNSPELL_WARNING(stderr, "error: missing word breakpoint table information\n");
3552       return 1;
3553    }
3554
3555    /* now parse the numbreak lines to read in the remainder of the table */
3556    char * nl = line;
3557    for (int j=0; j < numbreak; j++) {
3558         if (!fgets(nl,MAXLNLEN,af)) return 1;
3559         mychomp(nl);
3560         tp = nl;
3561         i = 0;
3562         piece = mystrsep(&tp, 0);
3563         while (piece) {
3564            if (*piece != '\0') {
3565                switch(i) {
3566                   case 0: {
3567                              if (strncmp(piece,"BREAK",5) != 0) {
3568                                  HUNSPELL_WARNING(stderr, "error: BREAK table is corrupt\n");
3569                                  free(piece);
3570                                  return 1;
3571                              }
3572                              break;
3573                           }
3574                   case 1: {
3575                             breaktable[j] = mystrdup(piece);
3576                             break;
3577                           }
3578                   default: break;
3579                }
3580                i++;
3581            }
3582            free(piece);
3583            piece = mystrsep(&tp, 0);
3584         }
3585         if (!breaktable) {
3586              HUNSPELL_WARNING(stderr, "error: BREAK table is corrupt\n");
3587              return 1;
3588         }
3589    }
3590    return 0;
3591 }
3592
3593 int  AffixMgr::parse_affix(char * line, const char at, FILE * af, char * dupflags)
3594 {
3595    int numents = 0;      // number of affentry structures to parse
3596
3597    unsigned short aflag = 0;      // affix char identifier
3598
3599    char ff=0;
3600    struct affentry * ptr= NULL;
3601    struct affentry * nptr= NULL;
3602
3603    char * tp = line;
3604    char * nl = line;
3605    char * piece;
3606    int i = 0;
3607
3608    // checking lines with bad syntax
3609 #ifdef DEBUG
3610    int basefieldnum = 0;
3611 #endif
3612
3613    // split affix header line into pieces
3614
3615    int np = 0;
3616    piece = mystrsep(&tp, 0);
3617    while (piece) {
3618       if (*piece != '\0') {
3619           switch(i) {
3620              // piece 1 - is type of affix
3621              case 0: { np++; break; }
3622
3623              // piece 2 - is affix char
3624              case 1: {
3625                     np++;
3626                     aflag = pHMgr->decode_flag(piece);
3627                     if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
3628                         ((at == 'P') && (dupflags[aflag] & dupPFX))) {
3629                         HUNSPELL_WARNING(stderr, "error: duplicate affix flag %s in line %s\n", piece, nl);
3630                         // return 1; XXX permissive mode for bad dictionaries
3631                     }
3632                     dupflags[aflag] += ((at == 'S') ? dupSFX : dupPFX);
3633                     break;
3634                     }
3635              // piece 3 - is cross product indicator
3636              case 2: { np++; if (*piece == 'Y') ff = aeXPRODUCT; break; }
3637
3638              // piece 4 - is number of affentries
3639              case 3: {
3640                        np++;
3641                        numents = atoi(piece);
3642                        if (numents == 0) {
3643                            char * err = pHMgr->encode_flag(aflag);
3644                            HUNSPELL_WARNING(stderr, "error: affix %s header has incorrect entry count in line %s\n",
3645                                    err, nl);
3646                            free(err);
3647                            return 1;
3648                        }
3649                        ptr = (struct affentry *) malloc(numents * sizeof(struct affentry));
3650                        if (!ptr) return 1;
3651                        ptr->opts = ff;
3652                        if (utf8) ptr->opts += aeUTF8;
3653                        if (pHMgr->is_aliasf()) ptr->opts += aeALIASF;
3654 #ifdef HUNSPELL_EXPERIMENTAL
3655                        if (pHMgr->is_aliasm()) ptr->opts += aeALIASM;
3656 #endif
3657                        ptr->aflag = aflag;
3658                      }
3659
3660              default: break;
3661           }
3662           i++;
3663       }
3664       free(piece);
3665       piece = mystrsep(&tp, 0);
3666    }
3667    // check to make sure we parsed enough pieces
3668    if (np != 4) {
3669        char * err = pHMgr->encode_flag(aflag);
3670        HUNSPELL_WARNING(stderr, "error: affix %s header has insufficient data in line %s\n", err, nl);
3671        free(err);
3672        free(ptr);
3673        return 1;
3674    }
3675
3676    // store away ptr to first affentry
3677    nptr = ptr;
3678
3679    // now parse numents affentries for this affix
3680    for (int j=0; j < numents; j++) {
3681       if (!fgets(nl,MAXLNLEN,af)) return 1;
3682       mychomp(nl);
3683       tp = nl;
3684       i = 0;
3685       np = 0;
3686
3687       // split line into pieces
3688       piece = mystrsep(&tp, 0);
3689       while (piece) {
3690          if (*piece != '\0') {
3691              switch(i) {
3692                 // piece 1 - is type
3693                 case 0: {
3694                           np++;
3695                           if (nptr != ptr) nptr->opts = ptr->opts;
3696                           break;
3697                         }
3698
3699                 // piece 2 - is affix char
3700                 case 1: {
3701                           np++;
3702                           if (pHMgr->decode_flag(piece) != aflag) {
3703                               char * err = pHMgr->encode_flag(aflag);
3704                               HUNSPELL_WARNING(stderr, "error: affix %s is corrupt near line %s\n", err, nl);
3705                               HUNSPELL_WARNING(stderr, "error: possible incorrect count\n");
3706                               free(err);
3707                               free(piece);
3708                               return 1;
3709                           }
3710
3711                           if (nptr != ptr) nptr->aflag = ptr->aflag;
3712                           break;
3713                         }
3714
3715                 // piece 3 - is string to strip or 0 for null
3716                 case 2: {
3717                           np++;
3718                           if (complexprefixes) {
3719                             if (utf8) reverseword_utf(piece); else reverseword(piece);
3720                           }
3721                           nptr->strip = mystrdup(piece);
3722                           nptr->stripl = (unsigned char) strlen(nptr->strip);
3723                           if (strcmp(nptr->strip,"0") == 0) {
3724                               free(nptr->strip);
3725                               nptr->strip=mystrdup("");
3726                               nptr->stripl = 0;
3727                           }
3728                           break;
3729                         }
3730
3731                 // piece 4 - is affix string or 0 for null
3732                 case 3: {
3733                           char * dash;
3734 #ifdef HUNSPELL_EXPERIMENTAL
3735                           nptr->morphcode = NULL;
3736 #endif
3737                           nptr->contclass = NULL;
3738                           nptr->contclasslen = 0;
3739                           np++;
3740                           dash = strchr(piece, '/');
3741                           if (dash) {
3742                             *dash = '\0';
3743
3744                             if (ignorechars) {
3745                               if (utf8) {
3746                                 remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
3747                               } else {
3748                                 remove_ignored_chars(piece,ignorechars);
3749                               }
3750                             }
3751
3752                             if (complexprefixes) {
3753                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
3754                             }
3755                             nptr->appnd = mystrdup(piece);
3756
3757                             if (pHMgr->is_aliasf()) {
3758                                 int index = atoi(dash + 1);
3759                                 nptr->contclasslen = (unsigned short) pHMgr->get_aliasf(index, &(nptr->contclass));
3760                             } else {
3761                                 nptr->contclasslen = (unsigned short) pHMgr->decode_flags(&(nptr->contclass), dash + 1);
3762                                 flag_qsort(nptr->contclass, 0, nptr->contclasslen);
3763                             }
3764                             *dash = '/';
3765
3766                             havecontclass = 1;
3767                             for (unsigned short _i = 0; _i < nptr->contclasslen; _i++) {
3768                               contclasses[(nptr->contclass)[_i]] = 1;
3769                             }
3770                           } else {
3771                             if (ignorechars) {
3772                               if (utf8) {
3773                                 remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
3774                               } else {
3775                                 remove_ignored_chars(piece,ignorechars);
3776                               }
3777                             }
3778
3779                             if (complexprefixes) {
3780                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
3781                             }
3782                             nptr->appnd = mystrdup(piece);
3783                           }
3784
3785                           nptr->appndl = (unsigned char) strlen(nptr->appnd);
3786                           if (strcmp(nptr->appnd,"0") == 0) {
3787                               free(nptr->appnd);
3788                               nptr->appnd=mystrdup("");
3789                               nptr->appndl = 0;
3790                           }
3791                           break;
3792                         }
3793
3794                 // piece 5 - is the conditions descriptions
3795                 case 4: {
3796                           np++;
3797                           if (complexprefixes) {
3798                             int neg = 0;
3799                             if (utf8) reverseword_utf(piece); else reverseword(piece);
3800                             // reverse condition
3801                             for (char * k = piece + strlen(piece) - 1; k >= piece; k--) {
3802                                 switch(*k) {
3803                                   case '[': {
3804                                         if (neg) *(k+1) = '['; else *k = ']';
3805                                         break;
3806                                     }
3807                                   case ']': {
3808                                         *k = '[';
3809                                         if (neg) *(k+1) = '^';
3810                                         neg = 0;
3811                                         break;
3812                                     }
3813                                   case '^': {
3814                                        if (*(k+1) == ']') neg = 1; else *(k+1) = *k;
3815                                        break;
3816                                     }
3817                                   default: {
3818                                     if (neg) *(k+1) = *k;
3819                                   }
3820                                }
3821                             }
3822                           }
3823                           if (nptr->stripl && (strcmp(piece, ".") != 0) &&
3824                             redundant_condition(at, nptr->strip, nptr->stripl, piece, nl))
3825                                 strcpy(piece, ".");
3826                           if (encodeit(nptr,piece)) return 1;
3827                          break;
3828                 }
3829
3830 #ifdef HUNSPELL_EXPERIMENTAL
3831                 case 5: {
3832                           np++;
3833                           if (pHMgr->is_aliasm()) {
3834                             int index = atoi(piece);
3835                             nptr->morphcode = pHMgr->get_aliasm(index);
3836                           } else {
3837                             if (complexprefixes) {
3838                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
3839                             }
3840                             nptr->morphcode = mystrdup(piece);
3841                           }
3842                           break;
3843                 }
3844 #endif
3845
3846                 default: break;
3847              }
3848              i++;
3849          }
3850          free(piece);
3851          piece = mystrsep(&tp, 0);
3852       }
3853       // check to make sure we parsed enough pieces
3854       if (np < 4) {
3855           char * err = pHMgr->encode_flag(aflag);
3856           HUNSPELL_WARNING(stderr, "error: affix %s is corrupt near line %s\n", err, nl);
3857           free(err);
3858           free(ptr);
3859           return 1;
3860       }
3861
3862 #ifdef DEBUG
3863 #ifdef HUNSPELL_EXPERIMENTAL
3864       // detect unnecessary fields, excepting comments
3865       if (basefieldnum) {
3866         int fieldnum = !(nptr->morphcode) ? 5 : ((*(nptr->morphcode)=='#') ? 5 : 6);
3867           if (fieldnum != basefieldnum)
3868             HUNSPELL_WARNING(stderr, "warning: bad field number:\n%s\n", nl);
3869       } else {
3870         basefieldnum = !(nptr->morphcode) ? 5 : ((*(nptr->morphcode)=='#') ? 5 : 6);
3871       }
3872 #endif
3873 #endif
3874       nptr++;
3875    }
3876
3877    // now create SfxEntry or PfxEntry objects and use links to
3878    // build an ordered (sorted by affix string) list
3879    nptr = ptr;
3880    for (int k = 0; k < numents; k++) {
3881       if (at == 'P') {
3882           PfxEntry * pfxptr = new PfxEntry(this,nptr);
3883           build_pfxtree((AffEntry *)pfxptr);
3884       } else {
3885           SfxEntry * sfxptr = new SfxEntry(this,nptr);
3886           build_sfxtree((AffEntry *)sfxptr);
3887       }
3888       nptr++;
3889    }
3890    free(ptr);
3891    return 0;
3892 }
3893
3894 int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char * cond, char * line) {
3895   int condl = strlen(cond);
3896   int i;
3897   int j;
3898   int neg;
3899   int in;
3900   if (ft == 'P') { // prefix
3901     if (strncmp(strip, cond, condl) == 0) return 1;
3902     if (utf8) {
3903     } else {
3904       for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) {
3905         if (cond[j] != '[') {
3906           if (cond[j] != strip[i]) {
3907             HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", line);
3908           }
3909         } else {
3910           neg = (cond[j+1] == '^') ? 1 : 0;
3911           in = 0;
3912           do {
3913             j++;
3914             if (strip[i] == cond[j]) in = 1;
3915           } while ((j < (condl - 1)) && (cond[j] != ']'));
3916           if (j == (condl - 1) && (cond[j] != ']')) {
3917             HUNSPELL_WARNING(stderr, "error: missing ] in condition:\n%s\n", line);
3918             return 0;
3919           }
3920           if ((!neg && !in) || (neg && in)) {
3921             HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", line);
3922             return 0;
3923           }
3924         }
3925       }
3926       if (j >= condl) return 1;
3927     }
3928   } else { // suffix
3929     if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0) return 1;
3930     if (utf8) {
3931     } else {
3932       for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) {
3933         if (cond[j] != ']') {
3934           if (cond[j] != strip[i]) {
3935             HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", line);
3936           }
3937         } else {
3938           in = 0;
3939           do {
3940             j--;
3941             if (strip[i] == cond[j]) in = 1;
3942           } while ((j > 0) && (cond[j] != '['));
3943           if ((j == 0) && (cond[j] != '[')) {
3944             HUNSPELL_WARNING(stderr, "error: missing ] in condition:\n%s\n", line);
3945             return 0;
3946           }
3947           neg = (cond[j+1] == '^') ? 1 : 0;
3948           if ((!neg && !in) || (neg && in)) {
3949             HUNSPELL_WARNING(stderr, "warning: incompatible stripping characters and condition:\n%s\n", line);
3950             return 0;
3951           }
3952         }
3953       }
3954       if (j < 0) return 1;
3955     }
3956   }
3957   return 0;
3958 }