ext/hunspell/affixmgr.cxx

   1 #include "license.hunspell"
   2 #include "license.myspell"
   3
   4 #include <stdlib.h>
   5 #include <string.h>
   6 #include <stdio.h>
   7 #include <ctype.h>
   8
   9 #include <vector>
  10
  11 #include "affixmgr.hxx"
  12 #include "affentry.hxx"
  13 #include "langnum.hxx"
  14
  15 #include "csutil.hxx"
  16
  17 AffixMgr::AffixMgr(const char * affpath, HashMgr** ptr, int * md, const char * key)
  18 {
  19   // register hash manager and load affix data from aff file
  20   pHMgr = ptr[0];
  21   alldic = ptr;
  22   maxdic = md;
  23   keystring = NULL;
  24   trystring = NULL;
  25   encoding=NULL;
  26   csconv=NULL;
  27   utf8 = 0;
  28   complexprefixes = 0;
  29   maptable = NULL;
  30   nummap = 0;
  31   breaktable = NULL;
  32   numbreak = -1;
  33   reptable = NULL;
  34   numrep = 0;
  35   iconvtable = NULL;
  36   oconvtable = NULL;
  37   checkcpdtable = NULL;
  38   // allow simplified compound forms (see 3rd field of CHECKCOMPOUNDPATTERN)
  39   simplifiedcpd = 0;
  40   numcheckcpd = 0;
  41   defcpdtable = NULL;
  42   numdefcpd = 0;
  43   phone = NULL;
  44   compoundflag = FLAG_NULL; // permits word in compound forms
  45   compoundbegin = FLAG_NULL; // may be first word in compound forms
  46   compoundmiddle = FLAG_NULL; // may be middle word in compound forms
  47   compoundend = FLAG_NULL; // may be last word in compound forms
  48   compoundroot = FLAG_NULL; // compound word signing flag
  49   compoundpermitflag = FLAG_NULL; // compound permitting flag for suffixed word
  50   compoundforbidflag = FLAG_NULL; // compound fordidden flag for suffixed word
  51   checkcompounddup = 0; // forbid double words in compounds
  52   checkcompoundrep = 0; // forbid bad compounds (may be non compound word with a REP substitution)
  53   checkcompoundcase = 0; // forbid upper and lowercase combinations at word bounds
  54   checkcompoundtriple = 0; // forbid compounds with triple letters
  55   simplifiedtriple = 0; // allow simplified triple letters in compounds (Schiff+fahrt -> Schiffahrt)
  56   forbiddenword = FORBIDDENWORD; // forbidden word signing flag
  57   nosuggest = FLAG_NULL; // don't suggest words signed with NOSUGGEST flag
  58   nongramsuggest = FLAG_NULL;
  59   lang = NULL; // language
  60   langnum = 0; // language code (see http://l10n.openoffice.org/languages.html)
  61   needaffix = FLAG_NULL; // forbidden root, allowed only with suffixes
  62   cpdwordmax = -1; // default: unlimited wordcount in compound words
  63   cpdmin = -1;  // undefined
  64   cpdmaxsyllable = 0; // default: unlimited syllablecount in compound words
  65   cpdvowels=NULL; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
  66   cpdvowels_utf16=NULL; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
  67   cpdvowels_utf16_len=0; // vowels
  68   pfxappnd=NULL; // previous prefix for counting the syllables of prefix BUG
  69   sfxappnd=NULL; // previous suffix for counting a special syllables BUG
  70   cpdsyllablenum=NULL; // syllable count incrementing flag
  71   checknum=0; // checking numbers, and word with numbers
  72   wordchars=NULL; // letters + spec. word characters
  73   wordchars_utf16=NULL; // letters + spec. word characters
  74   wordchars_utf16_len=0; // letters + spec. word characters
  75   ignorechars=NULL; // letters + spec. word characters
  76   ignorechars_utf16=NULL; // letters + spec. word characters
  77   ignorechars_utf16_len=0; // letters + spec. word characters
  78   version=NULL; // affix and dictionary file version string
  79   havecontclass=0; // flags of possible continuing classes (double affix)
  80   // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
  81   // in morhological description in dictionary file. It's often combined with PSEUDOROOT.
  82   lemma_present = FLAG_NULL;
  83   circumfix = FLAG_NULL;
  84   onlyincompound = FLAG_NULL;
  85   maxngramsugs = -1; // undefined
  86   maxdiff = -1; // undefined
  87   onlymaxdiff = 0;
  88   maxcpdsugs = -1; // undefined
  89   nosplitsugs = 0;
  90   sugswithdots = 0;
  91   keepcase = 0;
  92   forceucase = 0;
  93   warn = 0;
  94   forbidwarn = 0;
  95   checksharps = 0;
  96   substandard = FLAG_NULL;
  97   fullstrip = 0;
  98
  99   sfx = NULL;
 100   pfx = NULL;
 101
 102   for (int i=0; i < SETSIZE; i++) {
 103      pStart[i] = NULL;
 104      sStart[i] = NULL;
 105      pFlag[i] = NULL;
 106      sFlag[i] = NULL;
 107   }
 108
 109   for (int j=0; j < CONTSIZE; j++) {
 110     contclasses[j] = 0;
 111   }
 112
 113   if (parse_file(affpath, key)) {
 114      HUNSPELL_WARNING(stderr, "Failure loading aff file %s\n",affpath);
 115   }
 116
 117   if (cpdmin == -1) cpdmin = MINCPDLEN;
 118
 119 }
 120
 121
 122 AffixMgr::~AffixMgr()
 123 {
 124   // pass through linked prefix entries and clean up
 125   for (int i=0; i < SETSIZE ;i++) {
 126        pFlag[i] = NULL;
 127        PfxEntry * ptr = pStart[i];
 128        PfxEntry * nptr = NULL;
 129        while (ptr) {
 130             nptr = ptr->getNext();
 131             delete(ptr);
 132             ptr = nptr;
 133             nptr = NULL;
 134        }
 135   }
 136
 137   // pass through linked suffix entries and clean up
 138   for (int j=0; j < SETSIZE ; j++) {
 139        sFlag[j] = NULL;
 140        SfxEntry * ptr = sStart[j];
 141        SfxEntry * nptr = NULL;
 142        while (ptr) {
 143             nptr = ptr->getNext();
 144             delete(ptr);
 145             ptr = nptr;
 146             nptr = NULL;
 147        }
 148        sStart[j] = NULL;
 149   }
 150
 151   if (keystring) free(keystring);
 152   keystring=NULL;
 153   if (trystring) free(trystring);
 154   trystring=NULL;
 155   if (encoding) free(encoding);
 156   encoding=NULL;
 157   if (maptable) {
 158      for (int j=0; j < nummap; j++) {
 159         for (int k=0; k < maptable[j].len; k++) {
 160            if (maptable[j].set[k]) free(maptable[j].set[k]);
 161         }
 162         free(maptable[j].set);
 163         maptable[j].set = NULL;
 164         maptable[j].len = 0;
 165      }
 166      free(maptable);
 167      maptable = NULL;
 168   }
 169   nummap = 0;
 170   if (breaktable) {
 171      for (int j=0; j < numbreak; j++) {
 172         if (breaktable[j]) free(breaktable[j]);
 173         breaktable[j] = NULL;
 174      }
 175      free(breaktable);
 176      breaktable = NULL;
 177   }
 178   numbreak = 0;
 179   if (reptable) {
 180      for (int j=0; j < numrep; j++) {
 181         free(reptable[j].pattern);
 182         free(reptable[j].pattern2);
 183      }
 184      free(reptable);
 185      reptable = NULL;
 186   }
 187   if (iconvtable) delete iconvtable;
 188   if (oconvtable) delete oconvtable;
 189   if (phone && phone->rules) {
 190      for (int j=0; j < phone->num + 1; j++) {
 191         free(phone->rules[j * 2]);
 192         free(phone->rules[j * 2 + 1]);
 193      }
 194      free(phone->rules);
 195      free(phone);
 196      phone = NULL;
 197   }
 198
 199   if (defcpdtable) {
 200      for (int j=0; j < numdefcpd; j++) {
 201         free(defcpdtable[j].def);
 202         defcpdtable[j].def = NULL;
 203      }
 204      free(defcpdtable);
 205      defcpdtable = NULL;
 206   }
 207   numrep = 0;
 208   if (checkcpdtable) {
 209      for (int j=0; j < numcheckcpd; j++) {
 210         free(checkcpdtable[j].pattern);
 211         free(checkcpdtable[j].pattern2);
 212         free(checkcpdtable[j].pattern3);
 213         checkcpdtable[j].pattern = NULL;
 214         checkcpdtable[j].pattern2 = NULL;
 215         checkcpdtable[j].pattern3 = NULL;
 216      }
 217      free(checkcpdtable);
 218      checkcpdtable = NULL;
 219   }
 220   numcheckcpd = 0;
 221   FREE_FLAG(compoundflag);
 222   FREE_FLAG(compoundbegin);
 223   FREE_FLAG(compoundmiddle);
 224   FREE_FLAG(compoundend);
 225   FREE_FLAG(compoundpermitflag);
 226   FREE_FLAG(compoundforbidflag);
 227   FREE_FLAG(compoundroot);
 228   FREE_FLAG(forbiddenword);
 229   FREE_FLAG(nosuggest);
 230   FREE_FLAG(nongramsuggest);
 231   FREE_FLAG(needaffix);
 232   FREE_FLAG(lemma_present);
 233   FREE_FLAG(circumfix);
 234   FREE_FLAG(onlyincompound);
 235
 236   cpdwordmax = 0;
 237   pHMgr = NULL;
 238   cpdmin = 0;
 239   cpdmaxsyllable = 0;
 240   if (cpdvowels) free(cpdvowels);
 241   if (cpdvowels_utf16) free(cpdvowels_utf16);
 242   if (cpdsyllablenum) free(cpdsyllablenum);
 243   free_utf_tbl();
 244   if (lang) free(lang);
 245   if (wordchars) free(wordchars);
 246   if (wordchars_utf16) free(wordchars_utf16);
 247   if (ignorechars) free(ignorechars);
 248   if (ignorechars_utf16) free(ignorechars_utf16);
 249   if (version) free(version);
 250   checknum=0;
 251 #ifdef MOZILLA_CLIENT
 252   delete [] csconv;
 253 #endif
 254 }
 255
 256
 257 // read in aff file and build up prefix and suffix entry objects
 258 int  AffixMgr::parse_file(const char * affpath, const char * key)
 259 {
 260   char * line; // io buffers
 261   char ft;     // affix type
 262
 263   // checking flag duplication
 264   char dupflags[CONTSIZE];
 265   char dupflags_ini = 1;
 266
 267   // first line indicator for removing byte order mark
 268   int firstline = 1;
 269
 270   // open the affix file
 271   FileMgr * afflst = new FileMgr(affpath, key);
 272   if (!afflst) {
 273     HUNSPELL_WARNING(stderr, "error: could not open affix description file %s\n",affpath);
 274     return 1;
 275   }
 276
 277   // step one is to parse the affix file building up the internal
 278   // affix data structures
 279
 280     // read in each line ignoring any that do not
 281     // start with a known line type indicator
 282     while ((line = afflst->getline())) {
 283        mychomp(line);
 284
 285        /* remove byte order mark */
 286        if (firstline) {
 287          firstline = 0;
 288          // Affix file begins with byte order mark: possible incompatibility with old Hunspell versions
 289          if (strncmp(line,"\xEF\xBB\xBF",3) == 0) {
 290             memmove(line, line+3, strlen(line+3)+1);
 291          }
 292        }
 293
 294        /* parse in the keyboard string */
 295        if (strncmp(line,"KEY",3) == 0) {
 296           if (parse_string(line, &keystring, afflst->getlinenum())) {
 297              delete afflst;
 298              return 1;
 299           }
 300        }
 301
 302        /* parse in the try string */
 303        if (strncmp(line,"TRY",3) == 0) {
 304           if (parse_string(line, &trystring, afflst->getlinenum())) {
 305              delete afflst;
 306              return 1;
 307           }
 308        }
 309
 310        /* parse in the name of the character set used by the .dict and .aff */
 311        if (strncmp(line,"SET",3) == 0) {
 312           if (parse_string(line, &encoding, afflst->getlinenum())) {
 313              delete afflst;
 314              return 1;
 315           }
 316           if (strcmp(encoding, "UTF-8") == 0) {
 317              utf8 = 1;
 318 #ifndef OPENOFFICEORG
 319 #ifndef MOZILLA_CLIENT
 320              if (initialize_utf_tbl()) return 1;
 321 #endif
 322 #endif
 323           }
 324        }
 325
 326        /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left writing system */
 327        if (strncmp(line,"COMPLEXPREFIXES",15) == 0)
 328                    complexprefixes = 1;
 329
 330        /* parse in the flag used by the controlled compound words */
 331        if (strncmp(line,"COMPOUNDFLAG",12) == 0) {
 332           if (parse_flag(line, &compoundflag, afflst)) {
 333              delete afflst;
 334              return 1;
 335           }
 336        }
 337
 338        /* parse in the flag used by compound words */
 339        if (strncmp(line,"COMPOUNDBEGIN",13) == 0) {
 340           if (complexprefixes) {
 341             if (parse_flag(line, &compoundend, afflst)) {
 342               delete afflst;
 343               return 1;
 344             }
 345           } else {
 346             if (parse_flag(line, &compoundbegin, afflst)) {
 347               delete afflst;
 348               return 1;
 349             }
 350           }
 351        }
 352
 353        /* parse in the flag used by compound words */
 354        if (strncmp(line,"COMPOUNDMIDDLE",14) == 0) {
 355           if (parse_flag(line, &compoundmiddle, afflst)) {
 356              delete afflst;
 357              return 1;
 358           }
 359        }
 360        /* parse in the flag used by compound words */
 361        if (strncmp(line,"COMPOUNDEND",11) == 0) {
 362           if (complexprefixes) {
 363             if (parse_flag(line, &compoundbegin, afflst)) {
 364               delete afflst;
 365               return 1;
 366             }
 367           } else {
 368             if (parse_flag(line, &compoundend, afflst)) {
 369               delete afflst;
 370               return 1;
 371             }
 372           }
 373        }
 374
 375        /* parse in the data used by compound_check() method */
 376        if (strncmp(line,"COMPOUNDWORDMAX",15) == 0) {
 377           if (parse_num(line, &cpdwordmax, afflst)) {
 378              delete afflst;
 379              return 1;
 380           }
 381        }
 382
 383        /* parse in the flag sign compounds in dictionary */
 384        if (strncmp(line,"COMPOUNDROOT",12) == 0) {
 385           if (parse_flag(line, &compoundroot, afflst)) {
 386              delete afflst;
 387              return 1;
 388           }
 389        }
 390
 391        /* parse in the flag used by compound_check() method */
 392        if (strncmp(line,"COMPOUNDPERMITFLAG",18) == 0) {
 393           if (parse_flag(line, &compoundpermitflag, afflst)) {
 394              delete afflst;
 395              return 1;
 396           }
 397        }
 398
 399        /* parse in the flag used by compound_check() method */
 400        if (strncmp(line,"COMPOUNDFORBIDFLAG",18) == 0) {
 401           if (parse_flag(line, &compoundforbidflag, afflst)) {
 402              delete afflst;
 403              return 1;
 404           }
 405        }
 406
 407        if (strncmp(line,"CHECKCOMPOUNDDUP",16) == 0) {
 408                    checkcompounddup = 1;
 409        }
 410
 411        if (strncmp(line,"CHECKCOMPOUNDREP",16) == 0) {
 412                    checkcompoundrep = 1;
 413        }
 414
 415        if (strncmp(line,"CHECKCOMPOUNDTRIPLE",19) == 0) {
 416                    checkcompoundtriple = 1;
 417        }
 418
 419        if (strncmp(line,"SIMPLIFIEDTRIPLE",16) == 0) {
 420                    simplifiedtriple = 1;
 421        }
 422
 423        if (strncmp(line,"CHECKCOMPOUNDCASE",17) == 0) {
 424                    checkcompoundcase = 1;
 425        }
 426
 427        if (strncmp(line,"NOSUGGEST",9) == 0) {
 428           if (parse_flag(line, &nosuggest, afflst)) {
 429              delete afflst;
 430              return 1;
 431           }
 432        }
 433
 434        if (strncmp(line,"NONGRAMSUGGEST",14) == 0) {
 435           if (parse_flag(line, &nongramsuggest, afflst)) {
 436              delete afflst;
 437              return 1;
 438           }
 439        }
 440
 441        /* parse in the flag used by forbidden words */
 442        if (strncmp(line,"FORBIDDENWORD",13) == 0) {
 443           if (parse_flag(line, &forbiddenword, afflst)) {
 444              delete afflst;
 445              return 1;
 446           }
 447        }
 448
 449        /* parse in the flag used by forbidden words */
 450        if (strncmp(line,"LEMMA_PRESENT",13) == 0) {
 451           if (parse_flag(line, &lemma_present, afflst)) {
 452              delete afflst;
 453              return 1;
 454           }
 455        }
 456
 457        /* parse in the flag used by circumfixes */
 458        if (strncmp(line,"CIRCUMFIX",9) == 0) {
 459           if (parse_flag(line, &circumfix, afflst)) {
 460              delete afflst;
 461              return 1;
 462           }
 463        }
 464
 465        /* parse in the flag used by fogemorphemes */
 466        if (strncmp(line,"ONLYINCOMPOUND",14) == 0) {
 467           if (parse_flag(line, &onlyincompound, afflst)) {
 468              delete afflst;
 469              return 1;
 470           }
 471        }
 472
 473        /* parse in the flag used by `needaffixs' */
 474        if (strncmp(line,"PSEUDOROOT",10) == 0) {
 475           if (parse_flag(line, &needaffix, afflst)) {
 476              delete afflst;
 477              return 1;
 478           }
 479        }
 480
 481        /* parse in the flag used by `needaffixs' */
 482        if (strncmp(line,"NEEDAFFIX",9) == 0) {
 483           if (parse_flag(line, &needaffix, afflst)) {
 484              delete afflst;
 485              return 1;
 486           }
 487        }
 488
 489        /* parse in the minimal length for words in compounds */
 490        if (strncmp(line,"COMPOUNDMIN",11) == 0) {
 491           if (parse_num(line, &cpdmin, afflst)) {
 492              delete afflst;
 493              return 1;
 494           }
 495           if (cpdmin < 1) cpdmin = 1;
 496        }
 497
 498        /* parse in the max. words and syllables in compounds */
 499        if (strncmp(line,"COMPOUNDSYLLABLE",16) == 0) {
 500           if (parse_cpdsyllable(line, afflst)) {
 501              delete afflst;
 502              return 1;
 503           }
 504        }
 505
 506        /* parse in the flag used by compound_check() method */
 507        if (strncmp(line,"SYLLABLENUM",11) == 0) {
 508           if (parse_string(line, &cpdsyllablenum, afflst->getlinenum())) {
 509              delete afflst;
 510              return 1;
 511           }
 512        }
 513
 514        /* parse in the flag used by the controlled compound words */
 515        if (strncmp(line,"CHECKNUM",8) == 0) {
 516            checknum=1;
 517        }
 518
 519        /* parse in the extra word characters */
 520        if (strncmp(line,"WORDCHARS",9) == 0) {
 521           if (parse_array(line, &wordchars, &wordchars_utf16, &wordchars_utf16_len, utf8, afflst->getlinenum())) {
 522              delete afflst;
 523              return 1;
 524           }
 525        }
 526
 527        /* parse in the ignored characters (for example, Arabic optional diacretics charachters */
 528        if (strncmp(line,"IGNORE",6) == 0) {
 529           if (parse_array(line, &ignorechars, &ignorechars_utf16, &ignorechars_utf16_len, utf8, afflst->getlinenum())) {
 530              delete afflst;
 531              return 1;
 532           }
 533        }
 534
 535        /* parse in the typical fault correcting table */
 536        if (strncmp(line,"REP",3) == 0) {
 537           if (parse_reptable(line, afflst)) {
 538              delete afflst;
 539              return 1;
 540           }
 541        }
 542
 543        /* parse in the input conversion table */
 544        if (strncmp(line,"ICONV",5) == 0) {
 545           if (parse_convtable(line, afflst, &iconvtable, "ICONV")) {
 546              delete afflst;
 547              return 1;
 548           }
 549        }
 550
 551        /* parse in the input conversion table */
 552        if (strncmp(line,"OCONV",5) == 0) {
 553           if (parse_convtable(line, afflst, &oconvtable, "OCONV")) {
 554              delete afflst;
 555              return 1;
 556           }
 557        }
 558
 559        /* parse in the phonetic translation table */
 560        if (strncmp(line,"PHONE",5) == 0) {
 561           if (parse_phonetable(line, afflst)) {
 562              delete afflst;
 563              return 1;
 564           }
 565        }
 566
 567        /* parse in the checkcompoundpattern table */
 568        if (strncmp(line,"CHECKCOMPOUNDPATTERN",20) == 0) {
 569           if (parse_checkcpdtable(line, afflst)) {
 570              delete afflst;
 571              return 1;
 572           }
 573        }
 574
 575        /* parse in the defcompound table */
 576        if (strncmp(line,"COMPOUNDRULE",12) == 0) {
 577           if (parse_defcpdtable(line, afflst)) {
 578              delete afflst;
 579              return 1;
 580           }
 581        }
 582
 583        /* parse in the related character map table */
 584        if (strncmp(line,"MAP",3) == 0) {
 585           if (parse_maptable(line, afflst)) {
 586              delete afflst;
 587              return 1;
 588           }
 589        }
 590
 591        /* parse in the word breakpoints table */
 592        if (strncmp(line,"BREAK",5) == 0) {
 593           if (parse_breaktable(line, afflst)) {
 594              delete afflst;
 595              return 1;
 596           }
 597        }
 598
 599        /* parse in the language for language specific codes */
 600        if (strncmp(line,"LANG",4) == 0) {
 601           if (parse_string(line, &lang, afflst->getlinenum())) {
 602              delete afflst;
 603              return 1;
 604           }
 605           langnum = get_lang_num(lang);
 606        }
 607
 608        if (strncmp(line,"VERSION",7) == 0) {
 609           for(line = line + 7; *line == ' ' || *line == '\t'; line++);
 610           version = mystrdup(line);
 611        }
 612
 613        if (strncmp(line,"MAXNGRAMSUGS",12) == 0) {
 614           if (parse_num(line, &maxngramsugs, afflst)) {
 615              delete afflst;
 616              return 1;
 617           }
 618        }
 619
 620        if (strncmp(line,"ONLYMAXDIFF", 11) == 0)
 621                    onlymaxdiff = 1;
 622
 623        if (strncmp(line,"MAXDIFF",7) == 0) {
 624           if (parse_num(line, &maxdiff, afflst)) {
 625              delete afflst;
 626              return 1;
 627           }
 628        }
 629
 630        if (strncmp(line,"MAXCPDSUGS",10) == 0) {
 631           if (parse_num(line, &maxcpdsugs, afflst)) {
 632              delete afflst;
 633              return 1;
 634           }
 635        }
 636
 637        if (strncmp(line,"NOSPLITSUGS",11) == 0) {
 638                    nosplitsugs=1;
 639        }
 640
 641        if (strncmp(line,"FULLSTRIP",9) == 0) {
 642                    fullstrip=1;
 643        }
 644
 645        if (strncmp(line,"SUGSWITHDOTS",12) == 0) {
 646                    sugswithdots=1;
 647        }
 648
 649        /* parse in the flag used by forbidden words */
 650        if (strncmp(line,"KEEPCASE",8) == 0) {
 651           if (parse_flag(line, &keepcase, afflst)) {
 652              delete afflst;
 653              return 1;
 654           }
 655        }
 656
 657        /* parse in the flag used by `forceucase' */
 658        if (strncmp(line,"FORCEUCASE",10) == 0) {
 659           if (parse_flag(line, &forceucase, afflst)) {
 660              delete afflst;
 661              return 1;
 662           }
 663        }
 664
 665        /* parse in the flag used by `warn' */
 666        if (strncmp(line,"WARN",4) == 0) {
 667           if (parse_flag(line, &warn, afflst)) {
 668              delete afflst;
 669              return 1;
 670           }
 671        }
 672
 673        if (strncmp(line,"FORBIDWARN",10) == 0) {
 674                    forbidwarn=1;
 675        }
 676
 677        /* parse in the flag used by the affix generator */
 678        if (strncmp(line,"SUBSTANDARD",11) == 0) {
 679           if (parse_flag(line, &substandard, afflst)) {
 680              delete afflst;
 681              return 1;
 682           }
 683        }
 684
 685        if (strncmp(line,"CHECKSHARPS",11) == 0) {
 686                    checksharps=1;
 687        }
 688
 689        /* parse this affix: P - prefix, S - suffix */
 690        ft = ' ';
 691        if (strncmp(line,"PFX",3) == 0) ft = complexprefixes ? 'S' : 'P';
 692        if (strncmp(line,"SFX",3) == 0) ft = complexprefixes ? 'P' : 'S';
 693        if (ft != ' ') {
 694           if (dupflags_ini) {
 695             memset(dupflags, 0, sizeof(dupflags));
 696             dupflags_ini = 0;
 697           }
 698           if (parse_affix(line, ft, afflst, dupflags)) {
 699              delete afflst;
 700              process_pfx_tree_to_list();
 701              process_sfx_tree_to_list();
 702              return 1;
 703           }
 704        }
 705
 706     }
 707     delete afflst;
 708
 709     // convert affix trees to sorted list
 710     process_pfx_tree_to_list();
 711     process_sfx_tree_to_list();
 712
 713     // now we can speed up performance greatly taking advantage of the
 714     // relationship between the affixes and the idea of "subsets".
 715
 716     // View each prefix as a potential leading subset of another and view
 717     // each suffix (reversed) as a potential trailing subset of another.
 718
 719     // To illustrate this relationship if we know the prefix "ab" is found in the
 720     // word to examine, only prefixes that "ab" is a leading subset of need be examined.
 721     // Furthermore is "ab" is not present then none of the prefixes that "ab" is
 722     // is a subset need be examined.
 723     // The same argument goes for suffix string that are reversed.
 724
 725     // Then to top this off why not examine the first char of the word to quickly
 726     // limit the set of prefixes to examine (i.e. the prefixes to examine must
 727     // be leading supersets of the first character of the word (if they exist)
 728
 729     // To take advantage of this "subset" relationship, we need to add two links
 730     // from entry.  One to take next if the current prefix is found (call it nexteq)
 731     // and one to take next if the current prefix is not found (call it nextne).
 732
 733     // Since we have built ordered lists, all that remains is to properly initialize
 734     // the nextne and nexteq pointers that relate them
 735
 736     process_pfx_order();
 737     process_sfx_order();
 738
 739     /* get encoding for CHECKCOMPOUNDCASE */
 740     if (!utf8) {
 741     char * enc = get_encoding();
 742     csconv = get_current_cs(enc);
 743     free(enc);
 744     enc = NULL;
 745
 746     char expw[MAXLNLEN];
 747     if (wordchars) {
 748         strcpy(expw, wordchars);
 749         free(wordchars);
 750     } else *expw = '\0';
 751
 752     for (int i = 0; i <= 255; i++) {
 753         if ( (csconv[i].cupper != csconv[i].clower) &&
 754             (! strchr(expw, (char) i))) {
 755                 *(expw + strlen(expw) + 1) = '\0';
 756                 *(expw + strlen(expw)) = (char) i;
 757         }
 758     }
 759
 760     wordchars = mystrdup(expw);
 761     }
 762
 763     // default BREAK definition
 764     if (numbreak == -1) {
 765         breaktable = (char **) malloc(sizeof(char *) * 3);
 766         if (!breaktable) return 1;
 767         breaktable[0] = mystrdup("-");
 768         breaktable[1] = mystrdup("^-");
 769         breaktable[2] = mystrdup("-$");
 770         if (breaktable[0] && breaktable[1] && breaktable[2]) numbreak = 3;
 771     }
 772     return 0;
 773 }
 774
 775
 776 // we want to be able to quickly access prefix information
 777 // both by prefix flag, and sorted by prefix string itself
 778 // so we need to set up two indexes
 779
 780 int AffixMgr::build_pfxtree(PfxEntry* pfxptr)
 781 {
 782   PfxEntry * ptr;
 783   PfxEntry * pptr;
 784   PfxEntry * ep = pfxptr;
 785
 786   // get the right starting points
 787   const char * key = ep->getKey();
 788   const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
 789
 790   // first index by flag which must exist
 791   ptr = pFlag[flg];
 792   ep->setFlgNxt(ptr);
 793   pFlag[flg] = ep;
 794
 795
 796   // handle the special case of null affix string
 797   if (strlen(key) == 0) {
 798     // always inset them at head of list at element 0
 799      ptr = pStart[0];
 800      ep->setNext(ptr);
 801      pStart[0] = ep;
 802      return 0;
 803   }
 804
 805   // now handle the normal case
 806   ep->setNextEQ(NULL);
 807   ep->setNextNE(NULL);
 808
 809   unsigned char sp = *((const unsigned char *)key);
 810   ptr = pStart[sp];
 811
 812   // handle the first insert
 813   if (!ptr) {
 814      pStart[sp] = ep;
 815      return 0;
 816   }
 817
 818
 819   // otherwise use binary tree insertion so that a sorted
 820   // list can easily be generated later
 821   pptr = NULL;
 822   for (;;) {
 823     pptr = ptr;
 824     if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) {
 825        ptr = ptr->getNextEQ();
 826        if (!ptr) {
 827           pptr->setNextEQ(ep);
 828           break;
 829        }
 830     } else {
 831        ptr = ptr->getNextNE();
 832        if (!ptr) {
 833           pptr->setNextNE(ep);
 834           break;
 835        }
 836     }
 837   }
 838   return 0;
 839 }
 840
 841 // we want to be able to quickly access suffix information
 842 // both by suffix flag, and sorted by the reverse of the
 843 // suffix string itself; so we need to set up two indexes
 844 int AffixMgr::build_sfxtree(SfxEntry* sfxptr)
 845 {
 846   SfxEntry * ptr;
 847   SfxEntry * pptr;
 848   SfxEntry * ep = sfxptr;
 849
 850   /* get the right starting point */
 851   const char * key = ep->getKey();
 852   const unsigned char flg = (unsigned char) (ep->getFlag() & 0x00FF);
 853
 854   // first index by flag which must exist
 855   ptr = sFlag[flg];
 856   ep->setFlgNxt(ptr);
 857   sFlag[flg] = ep;
 858
 859   // next index by affix string
 860
 861   // handle the special case of null affix string
 862   if (strlen(key) == 0) {
 863     // always inset them at head of list at element 0
 864      ptr = sStart[0];
 865      ep->setNext(ptr);
 866      sStart[0] = ep;
 867      return 0;
 868   }
 869
 870   // now handle the normal case
 871   ep->setNextEQ(NULL);
 872   ep->setNextNE(NULL);
 873
 874   unsigned char sp = *((const unsigned char *)key);
 875   ptr = sStart[sp];
 876
 877   // handle the first insert
 878   if (!ptr) {
 879      sStart[sp] = ep;
 880      return 0;
 881   }
 882
 883   // otherwise use binary tree insertion so that a sorted
 884   // list can easily be generated later
 885   pptr = NULL;
 886   for (;;) {
 887     pptr = ptr;
 888     if (strcmp(ep->getKey(), ptr->getKey() ) <= 0) {
 889        ptr = ptr->getNextEQ();
 890        if (!ptr) {
 891           pptr->setNextEQ(ep);
 892           break;
 893        }
 894     } else {
 895        ptr = ptr->getNextNE();
 896        if (!ptr) {
 897           pptr->setNextNE(ep);
 898           break;
 899        }
 900     }
 901   }
 902   return 0;
 903 }
 904
 905 // convert from binary tree to sorted list
 906 int AffixMgr::process_pfx_tree_to_list()
 907 {
 908   for (int i=1; i< SETSIZE; i++) {
 909     pStart[i] = process_pfx_in_order(pStart[i],NULL);
 910   }
 911   return 0;
 912 }
 913
 914
 915 PfxEntry* AffixMgr::process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr)
 916 {
 917   if (ptr) {
 918     nptr = process_pfx_in_order(ptr->getNextNE(), nptr);
 919     ptr->setNext(nptr);
 920     nptr = process_pfx_in_order(ptr->getNextEQ(), ptr);
 921   }
 922   return nptr;
 923 }
 924
 925
 926 // convert from binary tree to sorted list
 927 int AffixMgr:: process_sfx_tree_to_list()
 928 {
 929   for (int i=1; i< SETSIZE; i++) {
 930     sStart[i] = process_sfx_in_order(sStart[i],NULL);
 931   }
 932   return 0;
 933 }
 934
 935 SfxEntry* AffixMgr::process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr)
 936 {
 937   if (ptr) {
 938     nptr = process_sfx_in_order(ptr->getNextNE(), nptr);
 939     ptr->setNext(nptr);
 940     nptr = process_sfx_in_order(ptr->getNextEQ(), ptr);
 941   }
 942   return nptr;
 943 }
 944
 945
 946 // reinitialize the PfxEntry links NextEQ and NextNE to speed searching
 947 // using the idea of leading subsets this time
 948 int AffixMgr::process_pfx_order()
 949 {
 950     PfxEntry* ptr;
 951
 952     // loop through each prefix list starting point
 953     for (int i=1; i < SETSIZE; i++) {
 954
 955          ptr = pStart[i];
 956
 957          // look through the remainder of the list
 958          //  and find next entry with affix that
 959          // the current one is not a subset of
 960          // mark that as destination for NextNE
 961          // use next in list that you are a subset
 962          // of as NextEQ
 963
 964          for (; ptr != NULL; ptr = ptr->getNext()) {
 965
 966              PfxEntry * nptr = ptr->getNext();
 967              for (; nptr != NULL; nptr = nptr->getNext()) {
 968                  if (! isSubset( ptr->getKey() , nptr->getKey() )) break;
 969              }
 970              ptr->setNextNE(nptr);
 971              ptr->setNextEQ(NULL);
 972              if ((ptr->getNext()) && isSubset(ptr->getKey() , (ptr->getNext())->getKey()))
 973                  ptr->setNextEQ(ptr->getNext());
 974          }
 975
 976          // now clean up by adding smart search termination strings:
 977          // if you are already a superset of the previous prefix
 978          // but not a subset of the next, search can end here
 979          // so set NextNE properly
 980
 981          ptr = pStart[i];
 982          for (; ptr != NULL; ptr = ptr->getNext()) {
 983              PfxEntry * nptr = ptr->getNext();
 984              PfxEntry * mptr = NULL;
 985              for (; nptr != NULL; nptr = nptr->getNext()) {
 986                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
 987                  mptr = nptr;
 988              }
 989              if (mptr) mptr->setNextNE(NULL);
 990          }
 991     }
 992     return 0;
 993 }
 994
 995 // initialize the SfxEntry links NextEQ and NextNE to speed searching
 996 // using the idea of leading subsets this time
 997 int AffixMgr::process_sfx_order()
 998 {
 999     SfxEntry* ptr;
1000
1001     // loop through each prefix list starting point
1002     for (int i=1; i < SETSIZE; i++) {
1003
1004          ptr = sStart[i];
1005
1006          // look through the remainder of the list
1007          //  and find next entry with affix that
1008          // the current one is not a subset of
1009          // mark that as destination for NextNE
1010          // use next in list that you are a subset
1011          // of as NextEQ
1012
1013          for (; ptr != NULL; ptr = ptr->getNext()) {
1014              SfxEntry * nptr = ptr->getNext();
1015              for (; nptr != NULL; nptr = nptr->getNext()) {
1016                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
1017              }
1018              ptr->setNextNE(nptr);
1019              ptr->setNextEQ(NULL);
1020              if ((ptr->getNext()) && isSubset(ptr->getKey(),(ptr->getNext())->getKey()))
1021                  ptr->setNextEQ(ptr->getNext());
1022          }
1023
1024
1025          // now clean up by adding smart search termination strings:
1026          // if you are already a superset of the previous suffix
1027          // but not a subset of the next, search can end here
1028          // so set NextNE properly
1029
1030          ptr = sStart[i];
1031          for (; ptr != NULL; ptr = ptr->getNext()) {
1032              SfxEntry * nptr = ptr->getNext();
1033              SfxEntry * mptr = NULL;
1034              for (; nptr != NULL; nptr = nptr->getNext()) {
1035                  if (! isSubset(ptr->getKey(),nptr->getKey())) break;
1036                  mptr = nptr;
1037              }
1038              if (mptr) mptr->setNextNE(NULL);
1039          }
1040     }
1041     return 0;
1042 }
1043
1044 // add flags to the result for dictionary debugging
1045 void AffixMgr::debugflag(char * result, unsigned short flag) {
1046     char * st = encode_flag(flag);
1047     mystrcat(result, " ", MAXLNLEN);
1048     mystrcat(result, MORPH_FLAG, MAXLNLEN);
1049     if (st) {
1050         mystrcat(result, st, MAXLNLEN);
1051         free(st);
1052     }
1053 }
1054
1055 // calculate the character length of the condition
1056 int AffixMgr::condlen(char * st)
1057 {
1058   int l = 0;
1059   bool group = false;
1060   for(; *st; st++) {
1061     if (*st == '[') {
1062         group = true;
1063         l++;
1064     } else if (*st == ']') group = false;
1065     else if (!group && (!utf8 ||
1066         (!(*st & 0x80) || ((*st & 0xc0) == 0x80)))) l++;
1067   }
1068   return l;
1069 }
1070
1071 int AffixMgr::encodeit(affentry &entry, char * cs)
1072 {
1073   if (strcmp(cs,".") != 0) {
1074     entry.numconds = (char) condlen(cs);
1075     strncpy(entry.c.conds, cs, MAXCONDLEN);
1076     // long condition (end of conds padded by strncpy)
1077     if (entry.c.conds[MAXCONDLEN - 1] && cs[MAXCONDLEN]) {
1078       entry.opts += aeLONGCOND;
1079       entry.c.l.conds2 = mystrdup(cs + MAXCONDLEN_1);
1080       if (!entry.c.l.conds2) return 1;
1081     }
1082   } else {
1083     entry.numconds = 0;
1084     entry.c.conds[0] = '\0';
1085   }
1086   return 0;
1087 }
1088
1089 // return 1 if s1 is a leading subset of s2 (dots are for infixes)
1090 inline int AffixMgr::isSubset(const char * s1, const char * s2)
1091  {
1092     while (((*s1 == *s2) || (*s1 == '.')) && (*s1 != '\0')) {
1093         s1++;
1094         s2++;
1095     }
1096     return (*s1 == '\0');
1097  }
1098
1099
1100 // check word for prefixes
1101 struct hentry * AffixMgr::prefix_check(const char * word, int len, char in_compound,
1102     const FLAG needflag)
1103 {
1104     struct hentry * rv= NULL;
1105
1106     pfx = NULL;
1107     pfxappnd = NULL;
1108     sfxappnd = NULL;
1109
1110     // first handle the special case of 0 length prefixes
1111     PfxEntry * pe = pStart[0];
1112     while (pe) {
1113         if (
1114             // fogemorpheme
1115               ((in_compound != IN_CPD_NOT) || !(pe->getCont() &&
1116                   (TESTAFF(pe->getCont(), onlyincompound, pe->getContLen())))) &&
1117             // permit prefixes in compounds
1118               ((in_compound != IN_CPD_END) || (pe->getCont() &&
1119                   (TESTAFF(pe->getCont(), compoundpermitflag, pe->getContLen()))))
1120               ) {
1121                     // check prefix
1122                     rv = pe->checkword(word, len, in_compound, needflag);
1123                     if (rv) {
1124                         pfx=pe; // BUG: pfx not stateless
1125                         return rv;
1126                     }
1127              }
1128        pe = pe->getNext();
1129     }
1130
1131     // now handle the general case
1132     unsigned char sp = *((const unsigned char *)word);
1133     PfxEntry * pptr = pStart[sp];
1134
1135     while (pptr) {
1136         if (isSubset(pptr->getKey(),word)) {
1137              if (
1138             // fogemorpheme
1139               ((in_compound != IN_CPD_NOT) || !(pptr->getCont() &&
1140                   (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen())))) &&
1141             // permit prefixes in compounds
1142               ((in_compound != IN_CPD_END) || (pptr->getCont() &&
1143                   (TESTAFF(pptr->getCont(), compoundpermitflag, pptr->getContLen()))))
1144               ) {
1145             // check prefix
1146                   rv = pptr->checkword(word, len, in_compound, needflag);
1147                   if (rv) {
1148                     pfx=pptr; // BUG: pfx not stateless
1149                     return rv;
1150                   }
1151              }
1152              pptr = pptr->getNextEQ();
1153         } else {
1154              pptr = pptr->getNextNE();
1155         }
1156     }
1157
1158     return NULL;
1159 }
1160
1161 // check word for prefixes
1162 struct hentry * AffixMgr::prefix_check_twosfx(const char * word, int len,
1163     char in_compound, const FLAG needflag)
1164 {
1165     struct hentry * rv= NULL;
1166
1167     pfx = NULL;
1168     sfxappnd = NULL;
1169
1170     // first handle the special case of 0 length prefixes
1171     PfxEntry * pe = pStart[0];
1172
1173     while (pe) {
1174         rv = pe->check_twosfx(word, len, in_compound, needflag);
1175         if (rv) return rv;
1176         pe = pe->getNext();
1177     }
1178
1179     // now handle the general case
1180     unsigned char sp = *((const unsigned char *)word);
1181     PfxEntry * pptr = pStart[sp];
1182
1183     while (pptr) {
1184         if (isSubset(pptr->getKey(),word)) {
1185             rv = pptr->check_twosfx(word, len, in_compound, needflag);
1186             if (rv) {
1187                 pfx = pptr;
1188                 return rv;
1189             }
1190             pptr = pptr->getNextEQ();
1191         } else {
1192              pptr = pptr->getNextNE();
1193         }
1194     }
1195
1196     return NULL;
1197 }
1198
1199 // check word for prefixes
1200 char * AffixMgr::prefix_check_morph(const char * word, int len, char in_compound,
1201     const FLAG needflag)
1202 {
1203     char * st;
1204
1205     char result[MAXLNLEN];
1206     result[0] = '\0';
1207
1208     pfx = NULL;
1209     sfxappnd = NULL;
1210
1211     // first handle the special case of 0 length prefixes
1212     PfxEntry * pe = pStart[0];
1213     while (pe) {
1214        st = pe->check_morph(word,len,in_compound, needflag);
1215        if (st) {
1216             mystrcat(result, st, MAXLNLEN);
1217             free(st);
1218        }
1219        // if (rv) return rv;
1220        pe = pe->getNext();
1221     }
1222
1223     // now handle the general case
1224     unsigned char sp = *((const unsigned char *)word);
1225     PfxEntry * pptr = pStart[sp];
1226
1227     while (pptr) {
1228         if (isSubset(pptr->getKey(),word)) {
1229             st = pptr->check_morph(word,len,in_compound, needflag);
1230             if (st) {
1231               // fogemorpheme
1232               if ((in_compound != IN_CPD_NOT) || !((pptr->getCont() &&
1233                         (TESTAFF(pptr->getCont(), onlyincompound, pptr->getContLen()))))) {
1234                     mystrcat(result, st, MAXLNLEN);
1235                     pfx = pptr;
1236                 }
1237                 free(st);
1238             }
1239             pptr = pptr->getNextEQ();
1240         } else {
1241             pptr = pptr->getNextNE();
1242         }
1243     }
1244
1245     if (*result) return mystrdup(result);
1246     return NULL;
1247 }
1248
1249
1250 // check word for prefixes
1251 char * AffixMgr::prefix_check_twosfx_morph(const char * word, int len,
1252     char in_compound, const FLAG needflag)
1253 {
1254     char * st;
1255
1256     char result[MAXLNLEN];
1257     result[0] = '\0';
1258
1259     pfx = NULL;
1260     sfxappnd = NULL;
1261
1262     // first handle the special case of 0 length prefixes
1263     PfxEntry * pe = pStart[0];
1264     while (pe) {
1265         st = pe->check_twosfx_morph(word,len,in_compound, needflag);
1266         if (st) {
1267             mystrcat(result, st, MAXLNLEN);
1268             free(st);
1269         }
1270         pe = pe->getNext();
1271     }
1272
1273     // now handle the general case
1274     unsigned char sp = *((const unsigned char *)word);
1275     PfxEntry * pptr = pStart[sp];
1276
1277     while (pptr) {
1278         if (isSubset(pptr->getKey(),word)) {
1279             st = pptr->check_twosfx_morph(word, len, in_compound, needflag);
1280             if (st) {
1281                 mystrcat(result, st, MAXLNLEN);
1282                 free(st);
1283                 pfx = pptr;
1284             }
1285             pptr = pptr->getNextEQ();
1286         } else {
1287             pptr = pptr->getNextNE();
1288         }
1289     }
1290
1291     if (*result) return mystrdup(result);
1292     return NULL;
1293 }
1294
1295 // Is word a non compound with a REP substitution (see checkcompoundrep)?
1296 int AffixMgr::cpdrep_check(const char * word, int wl)
1297 {
1298   char candidate[MAXLNLEN];
1299   const char * r;
1300   int lenr, lenp;
1301
1302   if ((wl < 2) || !numrep) return 0;
1303
1304   for (int i=0; i < numrep; i++ ) {
1305       r = word;
1306       lenr = strlen(reptable[i].pattern2);
1307       lenp = strlen(reptable[i].pattern);
1308       // search every occurence of the pattern in the word
1309       while ((r=strstr(r, reptable[i].pattern)) != NULL) {
1310           strcpy(candidate, word);
1311           if (r-word + lenr + strlen(r+lenp) >= MAXLNLEN) break;
1312           strcpy(candidate+(r-word),reptable[i].pattern2);
1313           strcpy(candidate+(r-word)+lenr, r+lenp);
1314           if (candidate_check(candidate,strlen(candidate))) return 1;
1315           r++; // search for the next letter
1316       }
1317    }
1318    return 0;
1319 }
1320
1321 // forbid compoundings when there are special patterns at word bound
1322 int AffixMgr::cpdpat_check(const char * word, int pos, hentry * r1, hentry * r2, const char affixed)
1323 {
1324   int len;
1325   for (int i = 0; i < numcheckcpd; i++) {
1326       if (isSubset(checkcpdtable[i].pattern2, word + pos) &&
1327         (!r1 || !checkcpdtable[i].cond ||
1328           (r1->astr && TESTAFF(r1->astr, checkcpdtable[i].cond, r1->alen))) &&
1329         (!r2 || !checkcpdtable[i].cond2 ||
1330           (r2->astr && TESTAFF(r2->astr, checkcpdtable[i].cond2, r2->alen))) &&
1331         // zero length pattern => only TESTAFF
1332         // zero pattern (0/flag) => unmodified stem (zero affixes allowed)
1333         (!*(checkcpdtable[i].pattern) || (
1334             (*(checkcpdtable[i].pattern)=='0' && r1->blen <= pos && strncmp(word + pos - r1->blen, r1->word, r1->blen) == 0) ||
1335             (*(checkcpdtable[i].pattern)!='0' && (len = strlen(checkcpdtable[i].pattern)) &&
1336                 strncmp(word + pos - len, checkcpdtable[i].pattern, len) == 0)))) {
1337             return 1;
1338         }
1339   }
1340   return 0;
1341 }
1342
1343 // forbid compounding with neighbouring upper and lower case characters at word bounds
1344 int AffixMgr::cpdcase_check(const char * word, int pos)
1345 {
1346   if (utf8) {
1347       w_char u, w;
1348       const char * p;
1349       u8_u16(&u, 1, word + pos);
1350       for (p = word + pos - 1; (*p & 0xc0) == 0x80; p--);
1351       u8_u16(&w, 1, p);
1352       unsigned short a = (u.h << 8) + u.l;
1353       unsigned short b = (w.h << 8) + w.l;
1354       if (((unicodetoupper(a, langnum) == a) || (unicodetoupper(b, langnum) == b)) &&
1355           (a != '-') && (b != '-')) return 1;
1356   } else {
1357       unsigned char a = *(word + pos - 1);
1358       unsigned char b = *(word + pos);
1359       if ((csconv[a].ccase || csconv[b].ccase) && (a != '-') && (b != '-')) return 1;
1360   }
1361   return 0;
1362 }
1363
1364 // check compound patterns
1365 int AffixMgr::defcpd_check(hentry *** words, short wnum, hentry * rv, hentry ** def, char all)
1366 {
1367   signed short btpp[MAXWORDLEN]; // metacharacter (*, ?) positions for backtracking
1368   signed short btwp[MAXWORDLEN]; // word positions for metacharacters
1369   int btnum[MAXWORDLEN]; // number of matched characters in metacharacter positions
1370   short bt = 0;
1371   int i, j;
1372   int ok;
1373   int w = 0;
1374
1375   if (!*words) {
1376     w = 1;
1377     *words = def;
1378   }
1379
1380   if (!*words) {
1381     return 0;
1382   }
1383
1384   (*words)[wnum] = rv;
1385
1386   // has the last word COMPOUNDRULE flag?
1387   if (rv->alen == 0) {
1388     (*words)[wnum] = NULL;
1389     if (w) *words = NULL;
1390     return 0;
1391   }
1392   ok = 0;
1393   for (i = 0; i < numdefcpd; i++) {
1394     for (j = 0; j < defcpdtable[i].len; j++) {
1395        if (defcpdtable[i].def[j] != '*' && defcpdtable[i].def[j] != '?' &&
1396           TESTAFF(rv->astr, defcpdtable[i].def[j], rv->alen)) ok = 1;
1397     }
1398   }
1399   if (ok == 0) {
1400     (*words)[wnum] = NULL;
1401     if (w) *words = NULL;
1402     return 0;
1403   }
1404
1405   for (i = 0; i < numdefcpd; i++) {
1406     signed short pp = 0; // pattern position
1407     signed short wp = 0; // "words" position
1408     int ok2;
1409     ok = 1;
1410     ok2 = 1;
1411     do {
1412       while ((pp < defcpdtable[i].len) && (wp <= wnum)) {
1413         if (((pp+1) < defcpdtable[i].len) &&
1414           ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) {
1415             int wend = (defcpdtable[i].def[pp+1] == '?') ? wp : wnum;
1416             ok2 = 1;
1417             pp+=2;
1418             btpp[bt] = pp;
1419             btwp[bt] = wp;
1420             while (wp <= wend) {
1421                 if (!(*words)[wp]->alen ||
1422                   !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp-2], (*words)[wp]->alen)) {
1423                     ok2 = 0;
1424                     break;
1425                 }
1426                 wp++;
1427             }
1428             if (wp <= wnum) ok2 = 0;
1429             btnum[bt] = wp - btwp[bt];
1430             if (btnum[bt] > 0) bt++;
1431             if (ok2) break;
1432         } else {
1433             ok2 = 1;
1434             if (!(*words)[wp] || !(*words)[wp]->alen ||
1435               !TESTAFF((*words)[wp]->astr, defcpdtable[i].def[pp], (*words)[wp]->alen)) {
1436                 ok = 0;
1437                 break;
1438             }
1439             pp++;
1440             wp++;
1441             if ((defcpdtable[i].len == pp) && !(wp > wnum)) ok = 0;
1442         }
1443       }
1444     if (ok && ok2) {
1445         int r = pp;
1446         while ((defcpdtable[i].len > r) && ((r+1) < defcpdtable[i].len) &&
1447             ((defcpdtable[i].def[r+1] == '*') || (defcpdtable[i].def[r+1] == '?'))) r+=2;
1448         if (defcpdtable[i].len <= r) return 1;
1449     }
1450     // backtrack
1451     if (bt) do {
1452         ok = 1;
1453         btnum[bt - 1]--;
1454         pp = btpp[bt - 1];
1455         wp = btwp[bt - 1] + (signed short) btnum[bt - 1];
1456     } while ((btnum[bt - 1] < 0) && --bt);
1457   } while (bt);
1458
1459   if (ok && ok2 && (!all || (defcpdtable[i].len <= pp))) return 1;
1460
1461   // check zero ending
1462   while (ok && ok2 && (defcpdtable[i].len > pp) && ((pp+1) < defcpdtable[i].len) &&
1463     ((defcpdtable[i].def[pp+1] == '*') || (defcpdtable[i].def[pp+1] == '?'))) pp+=2;
1464   if (ok && ok2 && (defcpdtable[i].len <= pp)) return 1;
1465   }
1466   (*words)[wnum] = NULL;
1467   if (w) *words = NULL;
1468   return 0;
1469 }
1470
1471 inline int AffixMgr::candidate_check(const char * word, int len)
1472 {
1473   struct hentry * rv=NULL;
1474
1475   rv = lookup(word);
1476   if (rv) return 1;
1477
1478 //  rv = prefix_check(word,len,1);
1479 //  if (rv) return 1;
1480
1481   rv = affix_check(word,len);
1482   if (rv) return 1;
1483   return 0;
1484 }
1485
1486 // calculate number of syllable for compound-checking
1487 short AffixMgr::get_syllable(const char * word, int wlen)
1488 {
1489     if (cpdmaxsyllable==0) return 0;
1490
1491     short num=0;
1492
1493     if (!utf8) {
1494         for (int i=0; i<wlen; i++) {
1495             if (strchr(cpdvowels, word[i])) num++;
1496         }
1497     } else if (cpdvowels_utf16) {
1498         w_char w[MAXWORDUTF8LEN];
1499         int i = u8_u16(w, MAXWORDUTF8LEN, word);
1500         for (; i > 0; i--) {
1501             if (flag_bsearch((unsigned short *) cpdvowels_utf16,
1502                 ((unsigned short *) w)[i - 1], cpdvowels_utf16_len)) num++;
1503         }
1504     }
1505     return num;
1506 }
1507
1508 void AffixMgr::setcminmax(int * cmin, int * cmax, const char * word, int len) {
1509     if (utf8) {
1510         int i;
1511         for (*cmin = 0, i = 0; (i < cpdmin) && word[*cmin]; i++) {
1512           for ((*cmin)++; (word[*cmin] & 0xc0) == 0x80; (*cmin)++);
1513         }
1514         for (*cmax = len, i = 0; (i < (cpdmin - 1)) && *cmax; i++) {
1515           for ((*cmax)--; (word[*cmax] & 0xc0) == 0x80; (*cmax)--);
1516         }
1517     } else {
1518         *cmin = cpdmin;
1519         *cmax = len - cpdmin + 1;
1520     }
1521 }
1522
1523
1524 // check if compound word is correctly spelled
1525 // hu_mov_rule = spec. Hungarian rule (XXX)
1526 struct hentry * AffixMgr::compound_check(const char * word, int len,
1527     short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words = NULL,
1528     char hu_mov_rule = 0, char is_sug = 0, int * info = NULL)
1529 {
1530     int i;
1531     short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
1532     struct hentry * rv = NULL;
1533     struct hentry * rv_first;
1534     struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
1535     char st [MAXWORDUTF8LEN + 4];
1536     char ch = '\0';
1537     int cmin;
1538     int cmax;
1539     int striple = 0;
1540     int scpd = 0;
1541     int soldi = 0;
1542     int oldcmin = 0;
1543     int oldcmax = 0;
1544     int oldlen = 0;
1545     int checkedstriple = 0;
1546     int onlycpdrule;
1547     int affixed = 0;
1548     hentry ** oldwords = words;
1549
1550     int checked_prefix;
1551
1552     setcminmax(&cmin, &cmax, word, len);
1553
1554     strcpy(st, word);
1555
1556     for (i = cmin; i < cmax; i++) {
1557         // go to end of the UTF-8 character
1558         if (utf8) {
1559             for (; (st[i] & 0xc0) == 0x80; i++);
1560             if (i >= cmax) return NULL;
1561         }
1562
1563         words = oldwords;
1564         onlycpdrule = (words) ? 1 : 0;
1565
1566         do { // onlycpdrule loop
1567
1568         oldnumsyllable = numsyllable;
1569         oldwordnum = wordnum;
1570         checked_prefix = 0;
1571
1572
1573         do { // simplified checkcompoundpattern loop
1574
1575         if (scpd > 0) {
1576           for (; scpd <= numcheckcpd && (!checkcpdtable[scpd-1].pattern3 ||
1577             strncmp(word + i, checkcpdtable[scpd-1].pattern3, strlen(checkcpdtable[scpd-1].pattern3)) != 0); scpd++);
1578
1579           if (scpd > numcheckcpd) break; // break simplified checkcompoundpattern loop
1580           strcpy(st + i, checkcpdtable[scpd-1].pattern);
1581           soldi = i;
1582           i += strlen(checkcpdtable[scpd-1].pattern);
1583           strcpy(st + i, checkcpdtable[scpd-1].pattern2);
1584           strcpy(st + i + strlen(checkcpdtable[scpd-1].pattern2), word + soldi + strlen(checkcpdtable[scpd-1].pattern3));
1585
1586           oldlen = len;
1587           len += strlen(checkcpdtable[scpd-1].pattern) + strlen(checkcpdtable[scpd-1].pattern2) - strlen(checkcpdtable[scpd-1].pattern3);
1588           oldcmin = cmin;
1589           oldcmax = cmax;
1590           setcminmax(&cmin, &cmax, st, len);
1591
1592           cmax = len - cpdmin + 1;
1593         }
1594
1595         ch = st[i];
1596         st[i] = '\0';
1597
1598         sfx = NULL;
1599         pfx = NULL;
1600
1601         // FIRST WORD
1602
1603         affixed = 1;
1604         rv = lookup(st); // perhaps without prefix
1605
1606         // search homonym with compound flag
1607         while ((rv) && !hu_mov_rule &&
1608             ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
1609                 !((compoundflag && !words && !onlycpdrule && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1610                   (compoundbegin && !wordnum && !onlycpdrule &&
1611                         TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
1612                   (compoundmiddle && wordnum && !words && !onlycpdrule &&
1613                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
1614                   (numdefcpd && onlycpdrule &&
1615                     ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
1616                     (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))) ||
1617                   (scpd != 0 && checkcpdtable[scpd-1].cond != FLAG_NULL &&
1618                     !TESTAFF(rv->astr, checkcpdtable[scpd-1].cond, rv->alen)))
1619                   ) {
1620             rv = rv->next_homonym;
1621         }
1622
1623         if (rv) affixed = 0;
1624
1625         if (!rv) {
1626             if (onlycpdrule) break;
1627             if (compoundflag &&
1628              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
1629                 if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
1630                         FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
1631                     sfx->getCont() &&
1632                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
1633                             sfx->getContLen())) || (compoundend &&
1634                         TESTAFF(sfx->getCont(), compoundend,
1635                             sfx->getContLen())))) {
1636                         rv = NULL;
1637                 }
1638             }
1639
1640             if (rv ||
1641               (((wordnum == 0) && compoundbegin &&
1642                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
1643                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
1644               ((wordnum > 0) && compoundmiddle &&
1645                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
1646                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
1647               ) checked_prefix = 1;
1648         // else check forbiddenwords and needaffix
1649         } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1650             TESTAFF(rv->astr, needaffix, rv->alen) ||
1651             TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
1652             (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen))
1653              )) {
1654                 st[i] = ch;
1655                 //continue;
1656                 break;
1657         }
1658
1659             // check non_compound flag in suffix and prefix
1660             if ((rv) && !hu_mov_rule &&
1661                 ((pfx && pfx->getCont() &&
1662                     TESTAFF(pfx->getCont(), compoundforbidflag,
1663                         pfx->getContLen())) ||
1664                 (sfx && sfx->getCont() &&
1665                     TESTAFF(sfx->getCont(), compoundforbidflag,
1666                         sfx->getContLen())))) {
1667                     rv = NULL;
1668             }
1669
1670             // check compoundend flag in suffix and prefix
1671             if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
1672                 ((pfx && pfx->getCont() &&
1673                     TESTAFF(pfx->getCont(), compoundend,
1674                         pfx->getContLen())) ||
1675                 (sfx && sfx->getCont() &&
1676                     TESTAFF(sfx->getCont(), compoundend,
1677                         sfx->getContLen())))) {
1678                     rv = NULL;
1679             }
1680
1681             // check compoundmiddle flag in suffix and prefix
1682             if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
1683                 ((pfx && pfx->getCont() &&
1684                     TESTAFF(pfx->getCont(), compoundmiddle,
1685                         pfx->getContLen())) ||
1686                 (sfx && sfx->getCont() &&
1687                     TESTAFF(sfx->getCont(), compoundmiddle,
1688                         sfx->getContLen())))) {
1689                     rv = NULL;
1690             }
1691
1692         // check forbiddenwords
1693         if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1694             TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
1695             (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) {
1696                 return NULL;
1697             }
1698
1699         // increment word number, if the second root has a compoundroot flag
1700         if ((rv) && compoundroot &&
1701             (TESTAFF(rv->astr, compoundroot, rv->alen))) {
1702                 wordnum++;
1703         }
1704
1705         // first word is acceptable in compound words?
1706         if (((rv) &&
1707           ( checked_prefix || (words && words[wnum]) ||
1708             (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1709             ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
1710             ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen))// ||
1711 //            (numdefcpd && )
1712
1713 // LANG_hu section: spec. Hungarian rule
1714             || ((langnum == LANG_hu) && hu_mov_rule && (
1715                     TESTAFF(rv->astr, 'F', rv->alen) || // XXX hardwired Hungarian dictionary codes
1716                     TESTAFF(rv->astr, 'G', rv->alen) ||
1717                     TESTAFF(rv->astr, 'H', rv->alen)
1718                 )
1719               )
1720 // END of LANG_hu section
1721           ) &&
1722           (
1723              // test CHECKCOMPOUNDPATTERN conditions
1724              scpd == 0 || checkcpdtable[scpd-1].cond == FLAG_NULL ||
1725                 TESTAFF(rv->astr, checkcpdtable[scpd-1].cond, rv->alen)
1726           )
1727           && ! (( checkcompoundtriple && scpd == 0 && !words && // test triple letters
1728                    (word[i-1]==word[i]) && (
1729                       ((i>1) && (word[i-1]==word[i-2])) ||
1730                       ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
1731                    )
1732                ) ||
1733                (
1734                  checkcompoundcase && scpd == 0 && !words && cpdcase_check(word, i)
1735                ))
1736          )
1737 // LANG_hu section: spec. Hungarian rule
1738          || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
1739               (sfx && sfx->getCont() && ( // XXX hardwired Hungarian dic. codes
1740                         TESTAFF(sfx->getCont(), (unsigned short) 'x', sfx->getContLen()) ||
1741                         TESTAFF(sfx->getCont(), (unsigned short) '%', sfx->getContLen())
1742                     )
1743                )
1744              )
1745          ) { // first word is ok condition
1746
1747 // LANG_hu section: spec. Hungarian rule
1748             if (langnum == LANG_hu) {
1749                 // calculate syllable number of the word
1750                 numsyllable += get_syllable(st, i);
1751                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
1752                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
1753             }
1754 // END of LANG_hu section
1755
1756             // NEXT WORD(S)
1757             rv_first = rv;
1758             st[i] = ch;
1759
1760         do { // striple loop
1761
1762             // check simplifiedtriple
1763             if (simplifiedtriple) {
1764               if (striple) {
1765                 checkedstriple = 1;
1766                 i--; // check "fahrt" instead of "ahrt" in "Schiffahrt"
1767               } else if (i > 2 && *(word+i - 1) == *(word + i - 2)) striple = 1;
1768             }
1769
1770             rv = lookup((st+i)); // perhaps without prefix
1771
1772         // search homonym with compound flag
1773         while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
1774                         !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1775                           (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
1776                            (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))) ||
1777                              (scpd != 0 && checkcpdtable[scpd-1].cond2 != FLAG_NULL &&
1778                                 !TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))
1779                            )) {
1780             rv = rv->next_homonym;
1781         }
1782
1783             // check FORCEUCASE
1784             if (rv && forceucase && (rv) &&
1785                 (TESTAFF(rv->astr, forceucase, rv->alen)) && !(info && *info & SPELL_ORIGCAP)) rv = NULL;
1786
1787             if (rv && words && words[wnum + 1]) return rv_first;
1788
1789             oldnumsyllable2 = numsyllable;
1790             oldwordnum2 = wordnum;
1791
1792
1793 // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary code
1794             if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
1795                 numsyllable--;
1796             }
1797 // END of LANG_hu section
1798
1799             // increment word number, if the second root has a compoundroot flag
1800             if ((rv) && (compoundroot) &&
1801                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
1802                     wordnum++;
1803             }
1804
1805             // check forbiddenwords
1806             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1807                 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
1808                (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
1809
1810             // second word is acceptable, as a root?
1811             // hungarian conventions: compounding is acceptable,
1812             // when compound forms consist of 2 words, or if more,
1813             // then the syllable number of root words must be 6, or lesser.
1814
1815             if ((rv) && (
1816                       (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
1817                       (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
1818                     )
1819                 && (
1820                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
1821                       ((cpdmaxsyllable!=0) &&
1822                           (numsyllable + get_syllable(HENTRY_WORD(rv), rv->clen)<=cpdmaxsyllable))
1823                     ) &&
1824                (
1825                  // test CHECKCOMPOUNDPATTERN
1826                  !numcheckcpd || scpd != 0 || !cpdpat_check(word, i, rv_first, rv, 0)
1827                ) &&
1828                 (
1829                      (!checkcompounddup || (rv != rv_first))
1830                    )
1831             // test CHECKCOMPOUNDPATTERN conditions
1832                 && (scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL ||
1833                       TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))
1834                 )
1835                  {
1836                       // forbid compound word, if it is a non compound word with typical fault
1837                       if (checkcompoundrep && cpdrep_check(word,len)) return NULL;
1838                       return rv_first;
1839             }
1840
1841             numsyllable = oldnumsyllable2;
1842             wordnum = oldwordnum2;
1843
1844             // perhaps second word has prefix or/and suffix
1845             sfx = NULL;
1846             sfxflag = FLAG_NULL;
1847             rv = (compoundflag && !onlycpdrule) ? affix_check((word+i),strlen(word+i), compoundflag, IN_CPD_END) : NULL;
1848             if (!rv && compoundend && !onlycpdrule) {
1849                 sfx = NULL;
1850                 pfx = NULL;
1851                 rv = affix_check((word+i),strlen(word+i), compoundend, IN_CPD_END);
1852             }
1853
1854             if (!rv && numdefcpd && words) {
1855                 rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
1856                 if (rv && defcpd_check(&words, wnum + 1, rv, NULL, 1)) return rv_first;
1857                 rv = NULL;
1858             }
1859
1860             // test CHECKCOMPOUNDPATTERN conditions (allowed forms)
1861             if (rv && !(scpd == 0 || checkcpdtable[scpd-1].cond2 == FLAG_NULL ||
1862                 TESTAFF(rv->astr, checkcpdtable[scpd-1].cond2, rv->alen))) rv = NULL;
1863
1864             // test CHECKCOMPOUNDPATTERN conditions (forbidden compounds)
1865             if (rv && numcheckcpd && scpd == 0 && cpdpat_check(word, i, rv_first, rv, affixed)) rv = NULL;
1866
1867             // check non_compound flag in suffix and prefix
1868             if ((rv) &&
1869                 ((pfx && pfx->getCont() &&
1870                     TESTAFF(pfx->getCont(), compoundforbidflag,
1871                         pfx->getContLen())) ||
1872                 (sfx && sfx->getCont() &&
1873                     TESTAFF(sfx->getCont(), compoundforbidflag,
1874                         sfx->getContLen())))) {
1875                     rv = NULL;
1876             }
1877
1878             // check FORCEUCASE
1879             if (rv && forceucase && (rv) &&
1880                 (TESTAFF(rv->astr, forceucase, rv->alen)) && !(info && *info & SPELL_ORIGCAP)) rv = NULL;
1881
1882             // check forbiddenwords
1883             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
1884                 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
1885                (is_sug && nosuggest && TESTAFF(rv->astr, nosuggest, rv->alen)))) return NULL;
1886
1887             // pfxappnd = prefix of word+i, or NULL
1888             // calculate syllable number of prefix.
1889             // hungarian convention: when syllable number of prefix is more,
1890             // than 1, the prefix+word counts as two words.
1891
1892             if (langnum == LANG_hu) {
1893                 // calculate syllable number of the word
1894                 numsyllable += get_syllable(word + i, strlen(word + i));
1895
1896                 // - affix syllable num.
1897                 // XXX only second suffix (inflections, not derivations)
1898                 if (sfxappnd) {
1899                     char * tmp = myrevstrdup(sfxappnd);
1900                     numsyllable -= get_syllable(tmp, strlen(tmp));
1901                     free(tmp);
1902                 }
1903
1904                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
1905                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
1906
1907                 // increment syllable num, if last word has a SYLLABLENUM flag
1908                 // and the suffix is beginning `s'
1909
1910                 if (cpdsyllablenum) {
1911                     switch (sfxflag) {
1912                         case 'c': { numsyllable+=2; break; }
1913                         case 'J': { numsyllable += 1; break; }
1914                         case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
1915                     }
1916                 }
1917             }
1918
1919             // increment word number, if the second word has a compoundroot flag
1920             if ((rv) && (compoundroot) &&
1921                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
1922                     wordnum++;
1923             }
1924
1925             // second word is acceptable, as a word with prefix or/and suffix?
1926             // hungarian conventions: compounding is acceptable,
1927             // when compound forms consist 2 word, otherwise
1928             // the syllable number of root words is 6, or lesser.
1929             if ((rv) &&
1930                     (
1931                       ((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) ||
1932                       ((cpdmaxsyllable != 0) &&
1933                           (numsyllable <= cpdmaxsyllable))
1934                     )
1935                 && (
1936                    (!checkcompounddup || (rv != rv_first))
1937                    )) {
1938                     // forbid compound word, if it is a non compound word with typical fault
1939                     if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
1940                     return rv_first;
1941             }
1942
1943             numsyllable = oldnumsyllable2;
1944             wordnum = oldwordnum2;
1945
1946             // perhaps second word is a compound word (recursive call)
1947             if (wordnum < maxwordnum) {
1948                 rv = compound_check((st+i),strlen(st+i), wordnum+1,
1949                      numsyllable, maxwordnum, wnum + 1, words, 0, is_sug, info);
1950
1951                 if (rv && numcheckcpd && ((scpd == 0 && cpdpat_check(word, i, rv_first, rv, affixed)) ||
1952                    (scpd != 0 && !cpdpat_check(word, i, rv_first, rv, affixed)))) rv = NULL;
1953             } else {
1954                 rv=NULL;
1955             }
1956             if (rv) {
1957                 // forbid compound word, if it is a non compound word with typical fault
1958                 if (checkcompoundrep || forbiddenword) {
1959                     struct hentry * rv2 = NULL;
1960
1961                     if (checkcompoundrep && cpdrep_check(word, len)) return NULL;
1962
1963                     // check first part
1964                     if (strncmp(rv->word, word + i, rv->blen) == 0) {
1965                         char r = *(st + i + rv->blen);
1966                         *(st + i + rv->blen) = '\0';
1967
1968                         if (checkcompoundrep && cpdrep_check(st, i + rv->blen)) {
1969                             *(st + i + rv->blen) = r;
1970                             continue;
1971                         }
1972
1973                         if (forbiddenword) {
1974                             rv2 = lookup(word);
1975                             if (!rv2) rv2 = affix_check(word, len);
1976                             if (rv2 && rv2->astr && TESTAFF(rv2->astr, forbiddenword, rv2->alen) &&
1977                                 (strncmp(rv2->word, st, i + rv->blen) == 0)) {
1978                                     return NULL;
1979                             }
1980                         }
1981                         *(st + i + rv->blen) = r;
1982                     }
1983                 }
1984                 return rv_first;
1985             }
1986           } while (striple && !checkedstriple); // end of striple loop
1987
1988           if (checkedstriple) {
1989             i++;
1990             checkedstriple = 0;
1991             striple = 0;
1992           }
1993
1994         } // first word is ok condition
1995
1996         if (soldi != 0) {
1997           i = soldi;
1998           soldi = 0;
1999           len = oldlen;
2000           cmin = oldcmin;
2001           cmax = oldcmax;
2002         }
2003         scpd++;
2004
2005
2006         } while (!onlycpdrule && simplifiedcpd && scpd <= numcheckcpd); // end of simplifiedcpd loop
2007
2008         scpd = 0;
2009         wordnum = oldwordnum;
2010         numsyllable = oldnumsyllable;
2011
2012         if (soldi != 0) {
2013           i = soldi;
2014           strcpy(st, word); // XXX add more optim.
2015           soldi = 0;
2016         } else st[i] = ch;
2017
2018         } while (numdefcpd && oldwordnum == 0 && !onlycpdrule && (onlycpdrule = 1)); // end of onlycpd loop
2019
2020     }
2021
2022     return NULL;
2023 }
2024
2025 // check if compound word is correctly spelled
2026 // hu_mov_rule = spec. Hungarian rule (XXX)
2027 int AffixMgr::compound_check_morph(const char * word, int len,
2028     short wordnum, short numsyllable, short maxwordnum, short wnum, hentry ** words,
2029     char hu_mov_rule = 0, char ** result = NULL, char * partresult = NULL)
2030 {
2031     int i;
2032     short oldnumsyllable, oldnumsyllable2, oldwordnum, oldwordnum2;
2033     int ok = 0;
2034
2035     struct hentry * rv = NULL;
2036     struct hentry * rv_first;
2037     struct hentry * rwords[MAXWORDLEN]; // buffer for COMPOUND pattern checking
2038     char st [MAXWORDUTF8LEN + 4];
2039     char ch;
2040
2041     int checked_prefix;
2042     char presult[MAXLNLEN];
2043
2044     int cmin;
2045     int cmax;
2046
2047     int onlycpdrule;
2048     int affixed = 0;
2049     hentry ** oldwords = words;
2050
2051     setcminmax(&cmin, &cmax, word, len);
2052
2053     strcpy(st, word);
2054
2055     for (i = cmin; i < cmax; i++) {
2056         oldnumsyllable = numsyllable;
2057         oldwordnum = wordnum;
2058         checked_prefix = 0;
2059
2060         // go to end of the UTF-8 character
2061         if (utf8) {
2062             for (; (st[i] & 0xc0) == 0x80; i++);
2063             if (i >= cmax) return 0;
2064         }
2065
2066         words = oldwords;
2067         onlycpdrule = (words) ? 1 : 0;
2068
2069         do { // onlycpdrule loop
2070
2071         oldnumsyllable = numsyllable;
2072         oldwordnum = wordnum;
2073         checked_prefix = 0;
2074
2075         ch = st[i];
2076         st[i] = '\0';
2077         sfx = NULL;
2078
2079         // FIRST WORD
2080
2081         affixed = 1;
2082
2083         *presult = '\0';
2084         if (partresult) mystrcat(presult, partresult, MAXLNLEN);
2085
2086         rv = lookup(st); // perhaps without prefix
2087
2088         // search homonym with compound flag
2089         while ((rv) && !hu_mov_rule &&
2090             ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
2091                 !((compoundflag && !words && !onlycpdrule && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2092                 (compoundbegin && !wordnum && !onlycpdrule &&
2093                         TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
2094                 (compoundmiddle && wordnum && !words && !onlycpdrule &&
2095                     TESTAFF(rv->astr, compoundmiddle, rv->alen)) ||
2096                   (numdefcpd && onlycpdrule &&
2097                     ((!words && !wordnum && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0)) ||
2098                     (words && defcpd_check(&words, wnum, rv, (hentry **) &rwords, 0))))
2099                   ))) {
2100             rv = rv->next_homonym;
2101         }
2102
2103         if (rv) affixed = 0;
2104
2105         if (rv)  {
2106             sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_PART, st);
2107             if (!HENTRY_FIND(rv, MORPH_STEM)) {
2108                 sprintf(presult + strlen(presult), "%c%s%s", MSEP_FLD, MORPH_STEM, st);
2109             }
2110             // store the pointer of the hash entry
2111 //            sprintf(presult + strlen(presult), "%c%s%p", MSEP_FLD, MORPH_HENTRY, rv);
2112             if (HENTRY_DATA(rv)) {
2113                 sprintf(presult + strlen(presult), "%c%s", MSEP_FLD, HENTRY_DATA2(rv));
2114             }
2115         }
2116
2117         if (!rv) {
2118             if (onlycpdrule) break;
2119             if (compoundflag &&
2120              !(rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundflag))) {
2121                 if ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL,
2122                         FLAG_NULL, compoundflag, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) && !hu_mov_rule &&
2123                     sfx->getCont() &&
2124                         ((compoundforbidflag && TESTAFF(sfx->getCont(), compoundforbidflag,
2125                             sfx->getContLen())) || (compoundend &&
2126                         TESTAFF(sfx->getCont(), compoundend,
2127                             sfx->getContLen())))) {
2128                         rv = NULL;
2129                 }
2130             }
2131
2132             if (rv ||
2133               (((wordnum == 0) && compoundbegin &&
2134                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundbegin, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
2135                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundbegin)))) ||
2136               ((wordnum > 0) && compoundmiddle &&
2137                 ((rv = suffix_check(st, i, 0, NULL, NULL, 0, NULL, FLAG_NULL, compoundmiddle, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN)) ||
2138                 (rv = prefix_check(st, i, hu_mov_rule ? IN_CPD_OTHER : IN_CPD_BEGIN, compoundmiddle)))))
2139               ) {
2140                 // char * p = prefix_check_morph(st, i, 0, compound);
2141                 char * p = NULL;
2142                 if (compoundflag) p = affix_check_morph(st, i, compoundflag);
2143                 if (!p || (*p == '\0')) {
2144                    if (p) free(p);
2145                    p = NULL;
2146                    if ((wordnum == 0) && compoundbegin) {
2147                      p = affix_check_morph(st, i, compoundbegin);
2148                    } else if ((wordnum > 0) && compoundmiddle) {
2149                      p = affix_check_morph(st, i, compoundmiddle);
2150                    }
2151                 }
2152                 if (p && (*p != '\0')) {
2153                     sprintf(presult + strlen(presult), "%c%s%s%s", MSEP_FLD,
2154                         MORPH_PART, st, line_uniq_app(&p, MSEP_REC));
2155                 }
2156                 if (p) free(p);
2157                 checked_prefix = 1;
2158             }
2159         // else check forbiddenwords
2160         } else if (rv->astr && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
2161             TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen) ||
2162             TESTAFF(rv->astr, needaffix, rv->alen))) {
2163                 st[i] = ch;
2164                 continue;
2165         }
2166
2167             // check non_compound flag in suffix and prefix
2168             if ((rv) && !hu_mov_rule &&
2169                 ((pfx && pfx->getCont() &&
2170                     TESTAFF(pfx->getCont(), compoundforbidflag,
2171                         pfx->getContLen())) ||
2172                 (sfx && sfx->getCont() &&
2173                     TESTAFF(sfx->getCont(), compoundforbidflag,
2174                         sfx->getContLen())))) {
2175                     continue;
2176             }
2177
2178             // check compoundend flag in suffix and prefix
2179             if ((rv) && !checked_prefix && compoundend && !hu_mov_rule &&
2180                 ((pfx && pfx->getCont() &&
2181                     TESTAFF(pfx->getCont(), compoundend,
2182                         pfx->getContLen())) ||
2183                 (sfx && sfx->getCont() &&
2184                     TESTAFF(sfx->getCont(), compoundend,
2185                         sfx->getContLen())))) {
2186                     continue;
2187             }
2188
2189             // check compoundmiddle flag in suffix and prefix
2190             if ((rv) && !checked_prefix && (wordnum==0) && compoundmiddle && !hu_mov_rule &&
2191                 ((pfx && pfx->getCont() &&
2192                     TESTAFF(pfx->getCont(), compoundmiddle,
2193                         pfx->getContLen())) ||
2194                 (sfx && sfx->getCont() &&
2195                     TESTAFF(sfx->getCont(), compoundmiddle,
2196                         sfx->getContLen())))) {
2197                     rv = NULL;
2198             }
2199
2200         // check forbiddenwords
2201         if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen)
2202             || TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) continue;
2203
2204         // increment word number, if the second root has a compoundroot flag
2205         if ((rv) && (compoundroot) &&
2206             (TESTAFF(rv->astr, compoundroot, rv->alen))) {
2207                 wordnum++;
2208         }
2209
2210         // first word is acceptable in compound words?
2211         if (((rv) &&
2212           ( checked_prefix || (words && words[wnum]) ||
2213             (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2214             ((oldwordnum == 0) && compoundbegin && TESTAFF(rv->astr, compoundbegin, rv->alen)) ||
2215             ((oldwordnum > 0) && compoundmiddle && TESTAFF(rv->astr, compoundmiddle, rv->alen))
2216 // LANG_hu section: spec. Hungarian rule
2217             || ((langnum == LANG_hu) && // hu_mov_rule
2218                 hu_mov_rule && (
2219                     TESTAFF(rv->astr, 'F', rv->alen) ||
2220                     TESTAFF(rv->astr, 'G', rv->alen) ||
2221                     TESTAFF(rv->astr, 'H', rv->alen)
2222                 )
2223               )
2224 // END of LANG_hu section
2225           )
2226           && ! (( checkcompoundtriple && !words && // test triple letters
2227                    (word[i-1]==word[i]) && (
2228                       ((i>1) && (word[i-1]==word[i-2])) ||
2229                       ((word[i-1]==word[i+1])) // may be word[i+1] == '\0'
2230                    )
2231                ) ||
2232                (
2233                    // test CHECKCOMPOUNDPATTERN
2234                    numcheckcpd && !words && cpdpat_check(word, i, rv, NULL, affixed)
2235                ) ||
2236                (
2237                  checkcompoundcase && !words && cpdcase_check(word, i)
2238                ))
2239          )
2240 // LANG_hu section: spec. Hungarian rule
2241          || ((!rv) && (langnum == LANG_hu) && hu_mov_rule && (rv = affix_check(st,i)) &&
2242               (sfx && sfx->getCont() && (
2243                         TESTAFF(sfx->getCont(), (unsigned short) 'x', sfx->getContLen()) ||
2244                         TESTAFF(sfx->getCont(), (unsigned short) '%', sfx->getContLen())
2245                     )
2246                )
2247              )
2248 // END of LANG_hu section
2249          ) {
2250
2251 // LANG_hu section: spec. Hungarian rule
2252             if (langnum == LANG_hu) {
2253                 // calculate syllable number of the word
2254                 numsyllable += get_syllable(st, i);
2255
2256                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
2257                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
2258             }
2259 // END of LANG_hu section
2260
2261             // NEXT WORD(S)
2262             rv_first = rv;
2263             rv = lookup((word+i)); // perhaps without prefix
2264
2265         // search homonym with compound flag
2266         while ((rv) && ((needaffix && TESTAFF(rv->astr, needaffix, rv->alen)) ||
2267                         !((compoundflag && !words && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2268                           (compoundend && !words && TESTAFF(rv->astr, compoundend, rv->alen)) ||
2269                            (numdefcpd && words && defcpd_check(&words, wnum + 1, rv, NULL,1))))) {
2270             rv = rv->next_homonym;
2271         }
2272
2273             if (rv && words && words[wnum + 1]) {
2274                   mystrcat(*result, presult, MAXLNLEN);
2275                   mystrcat(*result, " ", MAXLNLEN);
2276                   mystrcat(*result, MORPH_PART, MAXLNLEN);
2277                   mystrcat(*result, word+i, MAXLNLEN);
2278                   if (complexprefixes && HENTRY_DATA(rv)) mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
2279                   if (!HENTRY_FIND(rv, MORPH_STEM)) {
2280                     mystrcat(*result, " ", MAXLNLEN);
2281                     mystrcat(*result, MORPH_STEM, MAXLNLEN);
2282                     mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
2283                   }
2284                   // store the pointer of the hash entry
2285 //                  sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
2286                   if (!complexprefixes && HENTRY_DATA(rv)) {
2287                     mystrcat(*result, " ", MAXLNLEN);
2288                     mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
2289                   }
2290                   mystrcat(*result, "\n", MAXLNLEN);
2291                   ok = 1;
2292                   return 0;
2293             }
2294
2295             oldnumsyllable2 = numsyllable;
2296             oldwordnum2 = wordnum;
2297
2298 // LANG_hu section: spec. Hungarian rule
2299             if ((rv) && (langnum == LANG_hu) && (TESTAFF(rv->astr, 'I', rv->alen)) && !(TESTAFF(rv->astr, 'J', rv->alen))) {
2300                 numsyllable--;
2301             }
2302 // END of LANG_hu section
2303             // increment word number, if the second root has a compoundroot flag
2304             if ((rv) && (compoundroot) &&
2305                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
2306                     wordnum++;
2307             }
2308
2309             // check forbiddenwords
2310             if ((rv) && (rv->astr) && (TESTAFF(rv->astr, forbiddenword, rv->alen) ||
2311                 TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))) {
2312                 st[i] = ch;
2313                 continue;
2314             }
2315
2316             // second word is acceptable, as a root?
2317             // hungarian conventions: compounding is acceptable,
2318             // when compound forms consist of 2 words, or if more,
2319             // then the syllable number of root words must be 6, or lesser.
2320             if ((rv) && (
2321                       (compoundflag && TESTAFF(rv->astr, compoundflag, rv->alen)) ||
2322                       (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))
2323                     )
2324                 && (
2325                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
2326                       ((cpdmaxsyllable!=0) &&
2327                           (numsyllable+get_syllable(HENTRY_WORD(rv),rv->blen)<=cpdmaxsyllable))
2328                     )
2329                 && (
2330                      (!checkcompounddup || (rv != rv_first))
2331                    )
2332                 )
2333                  {
2334                       // bad compound word
2335                       mystrcat(*result, presult, MAXLNLEN);
2336                       mystrcat(*result, " ", MAXLNLEN);
2337                       mystrcat(*result, MORPH_PART, MAXLNLEN);
2338                       mystrcat(*result, word+i, MAXLNLEN);
2339
2340                       if (HENTRY_DATA(rv)) {
2341                         if (complexprefixes) mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
2342                         if (! HENTRY_FIND(rv, MORPH_STEM)) {
2343                            mystrcat(*result, " ", MAXLNLEN);
2344                            mystrcat(*result, MORPH_STEM, MAXLNLEN);
2345                            mystrcat(*result, HENTRY_WORD(rv), MAXLNLEN);
2346                         }
2347                         // store the pointer of the hash entry
2348 //                        sprintf(*result + strlen(*result), " %s%p", MORPH_HENTRY, rv);
2349                         if (!complexprefixes) {
2350                             mystrcat(*result, " ", MAXLNLEN);
2351                             mystrcat(*result, HENTRY_DATA2(rv), MAXLNLEN);
2352                         }
2353                       }
2354                       mystrcat(*result, "\n", MAXLNLEN);
2355                               ok = 1;
2356             }
2357
2358             numsyllable = oldnumsyllable2 ;
2359             wordnum = oldwordnum2;
2360
2361             // perhaps second word has prefix or/and suffix
2362             sfx = NULL;
2363             sfxflag = FLAG_NULL;
2364
2365             if (compoundflag && !onlycpdrule) rv = affix_check((word+i),strlen(word+i), compoundflag); else rv = NULL;
2366
2367             if (!rv && compoundend && !onlycpdrule) {
2368                 sfx = NULL;
2369                 pfx = NULL;
2370                 rv = affix_check((word+i),strlen(word+i), compoundend);
2371             }
2372
2373             if (!rv && numdefcpd && words) {
2374                 rv = affix_check((word+i),strlen(word+i), 0, IN_CPD_END);
2375                 if (rv && words && defcpd_check(&words, wnum + 1, rv, NULL, 1)) {
2376                       char * m = NULL;
2377                       if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
2378                       if ((!m || *m == '\0') && compoundend) {
2379                             if (m) free(m);
2380                             m = affix_check_morph((word+i),strlen(word+i), compoundend);
2381                       }
2382                       mystrcat(*result, presult, MAXLNLEN);
2383                       if (m || (*m != '\0')) {
2384                         sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
2385                             MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
2386                       }
2387                       if (m) free(m);
2388                       mystrcat(*result, "\n", MAXLNLEN);
2389                       ok = 1;
2390                 }
2391             }
2392
2393             // check non_compound flag in suffix and prefix
2394             if ((rv) &&
2395                 ((pfx && pfx->getCont() &&
2396                     TESTAFF(pfx->getCont(), compoundforbidflag,
2397                         pfx->getContLen())) ||
2398                 (sfx && sfx->getCont() &&
2399                     TESTAFF(sfx->getCont(), compoundforbidflag,
2400                         sfx->getContLen())))) {
2401                     rv = NULL;
2402             }
2403
2404             // check forbiddenwords
2405             if ((rv) && (rv->astr) && (TESTAFF(rv->astr,forbiddenword,rv->alen) ||
2406                     TESTAFF(rv->astr, ONLYUPCASEFLAG, rv->alen))
2407                     && (! TESTAFF(rv->astr, needaffix, rv->alen))) {
2408                         st[i] = ch;
2409                         continue;
2410                     }
2411
2412             if (langnum == LANG_hu) {
2413                 // calculate syllable number of the word
2414                 numsyllable += get_syllable(word + i, strlen(word + i));
2415
2416                 // - affix syllable num.
2417                 // XXX only second suffix (inflections, not derivations)
2418                 if (sfxappnd) {
2419                     char * tmp = myrevstrdup(sfxappnd);
2420                     numsyllable -= get_syllable(tmp, strlen(tmp));
2421                     free(tmp);
2422                 }
2423
2424                 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
2425                 if (pfx && (get_syllable(pfx->getKey(),strlen(pfx->getKey())) > 1)) wordnum++;
2426
2427                 // increment syllable num, if last word has a SYLLABLENUM flag
2428                 // and the suffix is beginning `s'
2429
2430                 if (cpdsyllablenum) {
2431                     switch (sfxflag) {
2432                         case 'c': { numsyllable+=2; break; }
2433                         case 'J': { numsyllable += 1; break; }
2434                         case 'I': { if (rv && TESTAFF(rv->astr, 'J', rv->alen)) numsyllable += 1; break; }
2435                     }
2436                 }
2437             }
2438
2439             // increment word number, if the second word has a compoundroot flag
2440             if ((rv) && (compoundroot) &&
2441                 (TESTAFF(rv->astr, compoundroot, rv->alen))) {
2442                     wordnum++;
2443             }
2444             // second word is acceptable, as a word with prefix or/and suffix?
2445             // hungarian conventions: compounding is acceptable,
2446             // when compound forms consist 2 word, otherwise
2447             // the syllable number of root words is 6, or lesser.
2448             if ((rv) &&
2449                     (
2450                       ((cpdwordmax==-1) || (wordnum+1<cpdwordmax)) ||
2451                       ((cpdmaxsyllable!=0) &&
2452                           (numsyllable <= cpdmaxsyllable))
2453                     )
2454                 && (
2455                    (!checkcompounddup || (rv != rv_first))
2456                    )) {
2457                       char * m = NULL;
2458                       if (compoundflag) m = affix_check_morph((word+i),strlen(word+i), compoundflag);
2459                       if ((!m || *m == '\0') && compoundend) {
2460                             if (m) free(m);
2461                             m = affix_check_morph((word+i),strlen(word+i), compoundend);
2462                       }
2463                       mystrcat(*result, presult, MAXLNLEN);
2464                       if (m && (*m != '\0')) {
2465                         sprintf(*result + strlen(*result), "%c%s%s%s", MSEP_FLD,
2466                             MORPH_PART, word + i, line_uniq_app(&m, MSEP_REC));
2467                       }
2468                       if (m) free(m);
2469                       sprintf(*result + strlen(*result), "%c", MSEP_REC);
2470                       ok = 1;
2471             }
2472
2473             numsyllable = oldnumsyllable2;
2474             wordnum = oldwordnum2;
2475
2476             // perhaps second word is a compound word (recursive call)
2477             if ((wordnum < maxwordnum) && (ok == 0)) {
2478                         compound_check_morph((word+i),strlen(word+i), wordnum+1,
2479                              numsyllable, maxwordnum, wnum + 1, words, 0, result, presult);
2480             } else {
2481                 rv=NULL;
2482             }
2483         }
2484         st[i] = ch;
2485         wordnum = oldwordnum;
2486         numsyllable = oldnumsyllable;
2487
2488         } while (numdefcpd && oldwordnum == 0 && !onlycpdrule && (onlycpdrule = 1)); // end of onlycpd loop
2489
2490     }
2491     return 0;
2492 }
2493
2494  // return 1 if s1 (reversed) is a leading subset of end of s2
2495 /* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
2496  {
2497     while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
2498         s1++;
2499         end_of_s2--;
2500         len--;
2501     }
2502     return (*s1 == '\0');
2503  }
2504  */
2505
2506 inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
2507  {
2508     while ((len > 0) && (*s1 != '\0') && ((*s1 == *end_of_s2) || (*s1 == '.'))) {
2509         s1++;
2510         end_of_s2--;
2511         len--;
2512     }
2513     return (*s1 == '\0');
2514  }
2515
2516 // check word for suffixes
2517
2518 struct hentry * AffixMgr::suffix_check (const char * word, int len,
2519        int sfxopts, PfxEntry * ppfx, char ** wlst, int maxSug, int * ns,
2520        const FLAG cclass, const FLAG needflag, char in_compound)
2521 {
2522     struct hentry * rv = NULL;
2523     PfxEntry* ep = ppfx;
2524
2525     // first handle the special case of 0 length suffixes
2526     SfxEntry * se = sStart[0];
2527
2528     while (se) {
2529         if (!cclass || se->getCont()) {
2530             // suffixes are not allowed in beginning of compounds
2531             if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2532              // except when signed with compoundpermitflag flag
2533              (se->getCont() && compoundpermitflag &&
2534                 TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
2535               // no circumfix flag in prefix and suffix
2536               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2537                    circumfix, ep->getContLen())) &&
2538                (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
2539               // circumfix flag in prefix AND suffix
2540               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2541                    circumfix, ep->getContLen())) &&
2542                (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
2543             // fogemorpheme
2544               (in_compound ||
2545                  !(se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen())))) &&
2546             // needaffix on prefix or first suffix
2547               (cclass ||
2548                    !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
2549                    (ppfx && !((ep->getCont()) &&
2550                      TESTAFF(ep->getCont(), needaffix,
2551                        ep->getContLen())))
2552               )) {
2553                 rv = se->checkword(word,len, sfxopts, ppfx, wlst, maxSug, ns, (FLAG) cclass,
2554                     needflag, (in_compound ? 0 : onlyincompound));
2555                 if (rv) {
2556                     sfx=se; // BUG: sfx not stateless
2557                     return rv;
2558                 }
2559             }
2560         }
2561        se = se->getNext();
2562     }
2563
2564     // now handle the general case
2565     if (len == 0) return NULL; // FULLSTRIP
2566     unsigned char sp= *((const unsigned char *)(word + len - 1));
2567     SfxEntry * sptr = sStart[sp];
2568
2569     while (sptr) {
2570         if (isRevSubset(sptr->getKey(), word + len - 1, len)
2571         ) {
2572             // suffixes are not allowed in beginning of compounds
2573             if ((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2574              // except when signed with compoundpermitflag flag
2575              (sptr->getCont() && compoundpermitflag &&
2576                 TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
2577               // no circumfix flag in prefix and suffix
2578               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2579                    circumfix, ep->getContLen())) &&
2580                (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
2581               // circumfix flag in prefix AND suffix
2582               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2583                    circumfix, ep->getContLen())) &&
2584                (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
2585             // fogemorpheme
2586               (in_compound ||
2587                  !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
2588             // needaffix on prefix or first suffix
2589               (cclass ||
2590                   !(sptr->getCont() && TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
2591                   (ppfx && !((ep->getCont()) &&
2592                      TESTAFF(ep->getCont(), needaffix,
2593                        ep->getContLen())))
2594               )
2595             ) if (in_compound != IN_CPD_END || ppfx || !(sptr->getCont() && TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))) {
2596                 rv = sptr->checkword(word,len, sfxopts, ppfx, wlst,
2597                     maxSug, ns, cclass, needflag, (in_compound ? 0 : onlyincompound));
2598                 if (rv) {
2599                     sfx=sptr; // BUG: sfx not stateless
2600                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2601                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
2602                     return rv;
2603                 }
2604              }
2605              sptr = sptr->getNextEQ();
2606         } else {
2607              sptr = sptr->getNextNE();
2608         }
2609     }
2610
2611     return NULL;
2612 }
2613
2614 // check word for two-level suffixes
2615
2616 struct hentry * AffixMgr::suffix_check_twosfx(const char * word, int len,
2617        int sfxopts, PfxEntry * ppfx, const FLAG needflag)
2618 {
2619     struct hentry * rv = NULL;
2620
2621     // first handle the special case of 0 length suffixes
2622     SfxEntry * se = sStart[0];
2623     while (se) {
2624         if (contclasses[se->getFlag()])
2625         {
2626             rv = se->check_twosfx(word,len, sfxopts, ppfx, needflag);
2627             if (rv) return rv;
2628         }
2629         se = se->getNext();
2630     }
2631
2632     // now handle the general case
2633     if (len == 0) return NULL; // FULLSTRIP
2634     unsigned char sp = *((const unsigned char *)(word + len - 1));
2635     SfxEntry * sptr = sStart[sp];
2636
2637     while (sptr) {
2638         if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
2639             if (contclasses[sptr->getFlag()])
2640             {
2641                 rv = sptr->check_twosfx(word,len, sfxopts, ppfx, needflag);
2642                 if (rv) {
2643                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2644                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
2645                     return rv;
2646                 }
2647             }
2648             sptr = sptr->getNextEQ();
2649         } else {
2650              sptr = sptr->getNextNE();
2651         }
2652     }
2653
2654     return NULL;
2655 }
2656
2657 char * AffixMgr::suffix_check_twosfx_morph(const char * word, int len,
2658        int sfxopts, PfxEntry * ppfx, const FLAG needflag)
2659 {
2660     char result[MAXLNLEN];
2661     char result2[MAXLNLEN];
2662     char result3[MAXLNLEN];
2663
2664     char * st;
2665
2666     result[0] = '\0';
2667     result2[0] = '\0';
2668     result3[0] = '\0';
2669
2670     // first handle the special case of 0 length suffixes
2671     SfxEntry * se = sStart[0];
2672     while (se) {
2673         if (contclasses[se->getFlag()])
2674         {
2675             st = se->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
2676             if (st) {
2677                 if (ppfx) {
2678                     if (ppfx->getMorph()) {
2679                         mystrcat(result, ppfx->getMorph(), MAXLNLEN);
2680                         mystrcat(result, " ", MAXLNLEN);
2681                     } else debugflag(result, ppfx->getFlag());
2682                 }
2683                 mystrcat(result, st, MAXLNLEN);
2684                 free(st);
2685                 if (se->getMorph()) {
2686                     mystrcat(result, " ", MAXLNLEN);
2687                     mystrcat(result, se->getMorph(), MAXLNLEN);
2688                 } else debugflag(result, se->getFlag());
2689                 mystrcat(result, "\n", MAXLNLEN);
2690             }
2691         }
2692         se = se->getNext();
2693     }
2694
2695     // now handle the general case
2696     if (len == 0) return NULL; // FULLSTRIP
2697     unsigned char sp = *((const unsigned char *)(word + len - 1));
2698     SfxEntry * sptr = sStart[sp];
2699
2700     while (sptr) {
2701         if (isRevSubset(sptr->getKey(), word + len - 1, len)) {
2702             if (contclasses[sptr->getFlag()])
2703             {
2704                 st = sptr->check_twosfx_morph(word,len, sfxopts, ppfx, needflag);
2705                 if (st) {
2706                     sfxflag = sptr->getFlag(); // BUG: sfxflag not stateless
2707                     if (!sptr->getCont()) sfxappnd=sptr->getKey(); // BUG: sfxappnd not stateless
2708                     strcpy(result2, st);
2709                     free(st);
2710
2711                 result3[0] = '\0';
2712
2713                 if (sptr->getMorph()) {
2714                     mystrcat(result3, " ", MAXLNLEN);
2715                     mystrcat(result3, sptr->getMorph(), MAXLNLEN);
2716                 } else debugflag(result3, sptr->getFlag());
2717                 strlinecat(result2, result3);
2718                 mystrcat(result2, "\n", MAXLNLEN);
2719                 mystrcat(result,  result2, MAXLNLEN);
2720                 }
2721             }
2722             sptr = sptr->getNextEQ();
2723         } else {
2724              sptr = sptr->getNextNE();
2725         }
2726     }
2727     if (*result) return mystrdup(result);
2728     return NULL;
2729 }
2730
2731 char * AffixMgr::suffix_check_morph(const char * word, int len,
2732        int sfxopts, PfxEntry * ppfx, const FLAG cclass, const FLAG needflag, char in_compound)
2733 {
2734     char result[MAXLNLEN];
2735
2736     struct hentry * rv = NULL;
2737
2738     result[0] = '\0';
2739
2740     PfxEntry* ep = ppfx;
2741
2742     // first handle the special case of 0 length suffixes
2743     SfxEntry * se = sStart[0];
2744     while (se) {
2745         if (!cclass || se->getCont()) {
2746             // suffixes are not allowed in beginning of compounds
2747             if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2748              // except when signed with compoundpermitflag flag
2749              (se->getCont() && compoundpermitflag &&
2750                 TESTAFF(se->getCont(),compoundpermitflag,se->getContLen()))) && (!circumfix ||
2751               // no circumfix flag in prefix and suffix
2752               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2753                    circumfix, ep->getContLen())) &&
2754                (!se->getCont() || !(TESTAFF(se->getCont(),circumfix,se->getContLen())))) ||
2755               // circumfix flag in prefix AND suffix
2756               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2757                    circumfix, ep->getContLen())) &&
2758                (se->getCont() && (TESTAFF(se->getCont(),circumfix,se->getContLen())))))  &&
2759             // fogemorpheme
2760               (in_compound ||
2761                  !((se->getCont() && (TESTAFF(se->getCont(), onlyincompound, se->getContLen()))))) &&
2762             // needaffix on prefix or first suffix
2763               (cclass ||
2764                    !(se->getCont() && TESTAFF(se->getCont(), needaffix, se->getContLen())) ||
2765                    (ppfx && !((ep->getCont()) &&
2766                      TESTAFF(ep->getCont(), needaffix,
2767                        ep->getContLen())))
2768               )
2769             ))
2770             rv = se->checkword(word, len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
2771          while (rv) {
2772            if (ppfx) {
2773                 if (ppfx->getMorph()) {
2774                     mystrcat(result, ppfx->getMorph(), MAXLNLEN);
2775                     mystrcat(result, " ", MAXLNLEN);
2776                 } else debugflag(result, ppfx->getFlag());
2777             }
2778             if (complexprefixes && HENTRY_DATA(rv)) mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
2779             if (! HENTRY_FIND(rv, MORPH_STEM)) {
2780                 mystrcat(result, " ", MAXLNLEN);
2781                 mystrcat(result, MORPH_STEM, MAXLNLEN);
2782                 mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
2783             }
2784             // store the pointer of the hash entry
2785 //            sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
2786
2787             if (!complexprefixes && HENTRY_DATA(rv)) {
2788                     mystrcat(result, " ", MAXLNLEN);
2789                     mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
2790             }
2791             if (se->getMorph()) {
2792                 mystrcat(result, " ", MAXLNLEN);
2793                 mystrcat(result, se->getMorph(), MAXLNLEN);
2794             } else debugflag(result, se->getFlag());
2795             mystrcat(result, "\n", MAXLNLEN);
2796             rv = se->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
2797          }
2798        }
2799        se = se->getNext();
2800     }
2801
2802     // now handle the general case
2803     if (len == 0) return NULL; // FULLSTRIP
2804     unsigned char sp = *((const unsigned char *)(word + len - 1));
2805     SfxEntry * sptr = sStart[sp];
2806
2807     while (sptr) {
2808         if (isRevSubset(sptr->getKey(), word + len - 1, len)
2809         ) {
2810             // suffixes are not allowed in beginning of compounds
2811             if (((((in_compound != IN_CPD_BEGIN)) || // && !cclass
2812              // except when signed with compoundpermitflag flag
2813              (sptr->getCont() && compoundpermitflag &&
2814                 TESTAFF(sptr->getCont(),compoundpermitflag,sptr->getContLen()))) && (!circumfix ||
2815               // no circumfix flag in prefix and suffix
2816               ((!ppfx || !(ep->getCont()) || !TESTAFF(ep->getCont(),
2817                    circumfix, ep->getContLen())) &&
2818                (!sptr->getCont() || !(TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))) ||
2819               // circumfix flag in prefix AND suffix
2820               ((ppfx && (ep->getCont()) && TESTAFF(ep->getCont(),
2821                    circumfix, ep->getContLen())) &&
2822                (sptr->getCont() && (TESTAFF(sptr->getCont(),circumfix,sptr->getContLen())))))  &&
2823             // fogemorpheme
2824               (in_compound ||
2825                  !((sptr->getCont() && (TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))) &&
2826             // needaffix on first suffix
2827               (cclass || !(sptr->getCont() &&
2828                    TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())))
2829             )) rv = sptr->checkword(word,len, sfxopts, ppfx, NULL, 0, 0, cclass, needflag);
2830             while (rv) {
2831                     if (ppfx) {
2832                         if (ppfx->getMorph()) {
2833                             mystrcat(result, ppfx->getMorph(), MAXLNLEN);
2834                             mystrcat(result, " ", MAXLNLEN);
2835                         } else debugflag(result, ppfx->getFlag());
2836                     }
2837                     if (complexprefixes && HENTRY_DATA(rv)) mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
2838                     if (! HENTRY_FIND(rv, MORPH_STEM)) {
2839                             mystrcat(result, " ", MAXLNLEN);
2840                             mystrcat(result, MORPH_STEM, MAXLNLEN);
2841                             mystrcat(result, HENTRY_WORD(rv), MAXLNLEN);
2842                     }
2843                     // store the pointer of the hash entry
2844 //                    sprintf(result + strlen(result), " %s%p", MORPH_HENTRY, rv);
2845
2846                     if (!complexprefixes && HENTRY_DATA(rv)) {
2847                         mystrcat(result, " ", MAXLNLEN);
2848                         mystrcat(result, HENTRY_DATA2(rv), MAXLNLEN);
2849                     }
2850
2851                 if (sptr->getMorph()) {
2852                     mystrcat(result, " ", MAXLNLEN);
2853                     mystrcat(result, sptr->getMorph(), MAXLNLEN);
2854                 } else debugflag(result, sptr->getFlag());
2855                 mystrcat(result, "\n", MAXLNLEN);
2856                 rv = sptr->get_next_homonym(rv, sfxopts, ppfx, cclass, needflag);
2857             }
2858              sptr = sptr->getNextEQ();
2859         } else {
2860              sptr = sptr->getNextNE();
2861         }
2862     }
2863
2864     if (*result) return mystrdup(result);
2865     return NULL;
2866 }
2867
2868 // check if word with affixes is correctly spelled
2869 struct hentry * AffixMgr::affix_check (const char * word, int len, const FLAG needflag, char in_compound)
2870 {
2871     struct hentry * rv= NULL;
2872
2873     // check all prefixes (also crossed with suffixes if allowed)
2874     rv = prefix_check(word, len, in_compound, needflag);
2875     if (rv) return rv;
2876
2877     // if still not found check all suffixes
2878     rv = suffix_check(word, len, 0, NULL, NULL, 0, NULL, FLAG_NULL, needflag, in_compound);
2879
2880     if (havecontclass) {
2881         sfx = NULL;
2882         pfx = NULL;
2883
2884         if (rv) return rv;
2885         // if still not found check all two-level suffixes
2886         rv = suffix_check_twosfx(word, len, 0, NULL, needflag);
2887
2888         if (rv) return rv;
2889         // if still not found check all two-level suffixes
2890         rv = prefix_check_twosfx(word, len, IN_CPD_NOT, needflag);
2891     }
2892
2893     return rv;
2894 }
2895
2896 // check if word with affixes is correctly spelled
2897 char * AffixMgr::affix_check_morph(const char * word, int len, const FLAG needflag, char in_compound)
2898 {
2899     char result[MAXLNLEN];
2900     char * st = NULL;
2901
2902     *result = '\0';
2903
2904     // check all prefixes (also crossed with suffixes if allowed)
2905     st = prefix_check_morph(word, len, in_compound);
2906     if (st) {
2907         mystrcat(result, st, MAXLNLEN);
2908         free(st);
2909     }
2910
2911     // if still not found check all suffixes
2912     st = suffix_check_morph(word, len, 0, NULL, '\0', needflag, in_compound);
2913     if (st) {
2914         mystrcat(result, st, MAXLNLEN);
2915         free(st);
2916     }
2917
2918     if (havecontclass) {
2919         sfx = NULL;
2920         pfx = NULL;
2921         // if still not found check all two-level suffixes
2922         st = suffix_check_twosfx_morph(word, len, 0, NULL, needflag);
2923         if (st) {
2924             mystrcat(result, st, MAXLNLEN);
2925             free(st);
2926         }
2927
2928         // if still not found check all two-level suffixes
2929         st = prefix_check_twosfx_morph(word, len, IN_CPD_NOT, needflag);
2930         if (st) {
2931             mystrcat(result, st, MAXLNLEN);
2932             free(st);
2933         }
2934     }
2935
2936     return mystrdup(result);
2937 }
2938
2939 char * AffixMgr::morphgen(char * ts, int wl, const unsigned short * ap,
2940     unsigned short al, char * morph, char * targetmorph, int level)
2941 {
2942     // handle suffixes
2943     char * stemmorph;
2944     char * stemmorphcatpos;
2945     char mymorph[MAXLNLEN];
2946
2947     if (!morph) return NULL;
2948
2949     // check substandard flag
2950     if (TESTAFF(ap, substandard, al)) return NULL;
2951
2952     if (morphcmp(morph, targetmorph) == 0) return mystrdup(ts);
2953
2954 //    int targetcount = get_sfxcount(targetmorph);
2955
2956     // use input suffix fields, if exist
2957     if (strstr(morph, MORPH_INFL_SFX) || strstr(morph, MORPH_DERI_SFX)) {
2958         stemmorph = mymorph;
2959         strcpy(stemmorph, morph);
2960         mystrcat(stemmorph, " ", MAXLNLEN);
2961         stemmorphcatpos = stemmorph + strlen(stemmorph);
2962     } else {
2963         stemmorph = morph;
2964         stemmorphcatpos = NULL;
2965     }
2966
2967     for (int i = 0; i < al; i++) {
2968         const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
2969         SfxEntry * sptr = sFlag[c];
2970         while (sptr) {
2971             if (sptr->getFlag() == ap[i] && sptr->getMorph() && ((sptr->getContLen() == 0) ||
2972                 // don't generate forms with substandard affixes
2973                 !TESTAFF(sptr->getCont(), substandard, sptr->getContLen()))) {
2974
2975                 if (stemmorphcatpos) strcpy(stemmorphcatpos, sptr->getMorph());
2976                 else stemmorph = (char *) sptr->getMorph();
2977
2978                 int cmp = morphcmp(stemmorph, targetmorph);
2979
2980                 if (cmp == 0) {
2981                     char * newword = sptr->add(ts, wl);
2982                     if (newword) {
2983                         hentry * check = pHMgr->lookup(newword); // XXX extra dic
2984                         if (!check || !check->astr ||
2985                             !(TESTAFF(check->astr, forbiddenword, check->alen) ||
2986                               TESTAFF(check->astr, ONLYUPCASEFLAG, check->alen))) {
2987                                 return newword;
2988                         }
2989                         free(newword);
2990                     }
2991                 }
2992
2993                 // recursive call for secondary suffixes
2994                 if ((level == 0) && (cmp == 1) && (sptr->getContLen() > 0) &&
2995 //                    (get_sfxcount(stemmorph) < targetcount) &&
2996                     !TESTAFF(sptr->getCont(), substandard, sptr->getContLen())) {
2997                     char * newword = sptr->add(ts, wl);
2998                     if (newword) {
2999                         char * newword2 = morphgen(newword, strlen(newword), sptr->getCont(),
3000                             sptr->getContLen(), stemmorph, targetmorph, 1);
3001
3002                         if (newword2) {
3003                             free(newword);
3004                             return newword2;
3005                         }
3006                         free(newword);
3007                         newword = NULL;
3008                     }
3009                 }
3010             }
3011             sptr = sptr->getFlgNxt();
3012         }
3013     }
3014    return NULL;
3015 }
3016
3017
3018 int AffixMgr::expand_rootword(struct guessword * wlst, int maxn, const char * ts,
3019     int wl, const unsigned short * ap, unsigned short al, char * bad, int badl,
3020     char * phon)
3021 {
3022     int nh=0;
3023     // first add root word to list
3024     if ((nh < maxn) && !(al && ((needaffix && TESTAFF(ap, needaffix, al)) ||
3025          (onlyincompound && TESTAFF(ap, onlyincompound, al))))) {
3026        wlst[nh].word = mystrdup(ts);
3027        if (!wlst[nh].word) return 0;
3028        wlst[nh].allow = (1 == 0);
3029        wlst[nh].orig = NULL;
3030        nh++;
3031        // add special phonetic version
3032        if (phon && (nh < maxn)) {
3033             wlst[nh].word = mystrdup(phon);
3034             if (!wlst[nh].word) return nh - 1;
3035             wlst[nh].allow = (1 == 0);
3036             wlst[nh].orig = mystrdup(ts);
3037             if (!wlst[nh].orig) return nh - 1;
3038             nh++;
3039        }
3040     }
3041
3042     // handle suffixes
3043     for (int i = 0; i < al; i++) {
3044        const unsigned char c = (unsigned char) (ap[i] & 0x00FF);
3045        SfxEntry * sptr = sFlag[c];
3046        while (sptr) {
3047          if ((sptr->getFlag() == ap[i]) && (!sptr->getKeyLen() || ((badl > sptr->getKeyLen()) &&
3048                 (strcmp(sptr->getAffix(), bad + badl - sptr->getKeyLen()) == 0))) &&
3049                 // check needaffix flag
3050                 !(sptr->getCont() && ((needaffix &&
3051                       TESTAFF(sptr->getCont(), needaffix, sptr->getContLen())) ||
3052                   (circumfix &&
3053                       TESTAFF(sptr->getCont(), circumfix, sptr->getContLen())) ||
3054                   (onlyincompound &&
3055                       TESTAFF(sptr->getCont(), onlyincompound, sptr->getContLen()))))
3056                 ) {
3057             char * newword = sptr->add(ts, wl);
3058             if (newword) {
3059                 if (nh < maxn) {
3060                     wlst[nh].word = newword;
3061                     wlst[nh].allow = sptr->allowCross();
3062                     wlst[nh].orig = NULL;
3063                     nh++;
3064                     // add special phonetic version
3065                     if (phon && (nh < maxn)) {
3066                         char st[MAXWORDUTF8LEN];
3067                         strcpy(st, phon);
3068                         strcat(st, sptr->getKey());
3069                         reverseword(st + strlen(phon));
3070                         wlst[nh].word = mystrdup(st);
3071                         if (!wlst[nh].word) return nh - 1;
3072                         wlst[nh].allow = (1 == 0);
3073                         wlst[nh].orig = mystrdup(newword);
3074                         if (!wlst[nh].orig) return nh - 1;
3075                         nh++;
3076                     }
3077                 } else {
3078                     free(newword);
3079                 }
3080             }
3081          }
3082          sptr = sptr->getFlgNxt();
3083        }
3084     }
3085
3086     int n = nh;
3087
3088     // handle cross products of prefixes and suffixes
3089     for (int j=1;j<n ;j++)
3090        if (wlst[j].allow) {
3091           for (int k = 0; k < al; k++) {
3092              const unsigned char c = (unsigned char) (ap[k] & 0x00FF);
3093              PfxEntry * cptr = pFlag[c];
3094              while (cptr) {
3095                 if ((cptr->getFlag() == ap[k]) && cptr->allowCross() && (!cptr->getKeyLen() || ((badl > cptr->getKeyLen()) &&
3096                         (strncmp(cptr->getKey(), bad, cptr->getKeyLen()) == 0)))) {
3097                     int l1 = strlen(wlst[j].word);
3098                     char * newword = cptr->add(wlst[j].word, l1);
3099                     if (newword) {
3100                        if (nh < maxn) {
3101                           wlst[nh].word = newword;
3102                           wlst[nh].allow = cptr->allowCross();
3103                           wlst[nh].orig = NULL;
3104                           nh++;
3105                        } else {
3106                           free(newword);
3107                        }
3108                     }
3109                 }
3110                 cptr = cptr->getFlgNxt();
3111              }
3112           }
3113        }
3114
3115
3116     // now handle pure prefixes
3117     for (int m = 0; m < al; m ++) {
3118        const unsigned char c = (unsigned char) (ap[m] & 0x00FF);
3119        PfxEntry * ptr = pFlag[c];
3120        while (ptr) {
3121          if ((ptr->getFlag() == ap[m]) && (!ptr->getKeyLen() || ((badl > ptr->getKeyLen()) &&
3122                 (strncmp(ptr->getKey(), bad, ptr->getKeyLen()) == 0))) &&
3123                 // check needaffix flag
3124                 !(ptr->getCont() && ((needaffix &&
3125                       TESTAFF(ptr->getCont(), needaffix, ptr->getContLen())) ||
3126                      (circumfix &&
3127                       TESTAFF(ptr->getCont(), circumfix, ptr->getContLen())) ||
3128                   (onlyincompound &&
3129                       TESTAFF(ptr->getCont(), onlyincompound, ptr->getContLen()))))
3130                 ) {
3131             char * newword = ptr->add(ts, wl);
3132             if (newword) {
3133                 if (nh < maxn) {
3134                     wlst[nh].word = newword;
3135                     wlst[nh].allow = ptr->allowCross();
3136                     wlst[nh].orig = NULL;
3137                     nh++;
3138                 } else {
3139                     free(newword);
3140                 }
3141             }
3142          }
3143          ptr = ptr->getFlgNxt();
3144        }
3145     }
3146
3147     return nh;
3148 }
3149
3150 // return length of replacing table
3151 int AffixMgr::get_numrep() const
3152 {
3153   return numrep;
3154 }
3155
3156 // return replacing table
3157 struct replentry * AffixMgr::get_reptable() const
3158 {
3159   if (! reptable ) return NULL;
3160   return reptable;
3161 }
3162
3163 // return iconv table
3164 RepList * AffixMgr::get_iconvtable() const
3165 {
3166   if (! iconvtable ) return NULL;
3167   return iconvtable;
3168 }
3169
3170 // return oconv table
3171 RepList * AffixMgr::get_oconvtable() const
3172 {
3173   if (! oconvtable ) return NULL;
3174   return oconvtable;
3175 }
3176
3177 // return replacing table
3178 struct phonetable * AffixMgr::get_phonetable() const
3179 {
3180   if (! phone ) return NULL;
3181   return phone;
3182 }
3183
3184 // return length of character map table
3185 int AffixMgr::get_nummap() const
3186 {
3187   return nummap;
3188 }
3189
3190 // return character map table
3191 struct mapentry * AffixMgr::get_maptable() const
3192 {
3193   if (! maptable ) return NULL;
3194   return maptable;
3195 }
3196
3197 // return length of word break table
3198 int AffixMgr::get_numbreak() const
3199 {
3200   return numbreak;
3201 }
3202
3203 // return character map table
3204 char ** AffixMgr::get_breaktable() const
3205 {
3206   if (! breaktable ) return NULL;
3207   return breaktable;
3208 }
3209
3210 // return text encoding of dictionary
3211 char * AffixMgr::get_encoding()
3212 {
3213   if (! encoding ) encoding = mystrdup(SPELL_ENCODING);
3214   return mystrdup(encoding);
3215 }
3216
3217 // return text encoding of dictionary
3218 int AffixMgr::get_langnum() const
3219 {
3220   return langnum;
3221 }
3222
3223 // return double prefix option
3224 int AffixMgr::get_complexprefixes() const
3225 {
3226   return complexprefixes;
3227 }
3228
3229 // return FULLSTRIP option
3230 int AffixMgr::get_fullstrip() const
3231 {
3232   return fullstrip;
3233 }
3234
3235 FLAG AffixMgr::get_keepcase() const
3236 {
3237   return keepcase;
3238 }
3239
3240 FLAG AffixMgr::get_forceucase() const
3241 {
3242   return forceucase;
3243 }
3244
3245 FLAG AffixMgr::get_warn() const
3246 {
3247   return warn;
3248 }
3249
3250 int AffixMgr::get_forbidwarn() const
3251 {
3252   return forbidwarn;
3253 }
3254
3255 int AffixMgr::get_checksharps() const
3256 {
3257   return checksharps;
3258 }
3259
3260 char * AffixMgr::encode_flag(unsigned short aflag) const
3261 {
3262   return pHMgr->encode_flag(aflag);
3263 }
3264
3265
3266 // return the preferred ignore string for suggestions
3267 char * AffixMgr::get_ignore() const
3268 {
3269   if (!ignorechars) return NULL;
3270   return ignorechars;
3271 }
3272
3273 // return the preferred ignore string for suggestions
3274 unsigned short * AffixMgr::get_ignore_utf16(int * len) const
3275 {
3276   *len = ignorechars_utf16_len;
3277   return ignorechars_utf16;
3278 }
3279
3280 // return the keyboard string for suggestions
3281 char * AffixMgr::get_key_string()
3282 {
3283   if (! keystring ) keystring = mystrdup(SPELL_KEYSTRING);
3284   return mystrdup(keystring);
3285 }
3286
3287 // return the preferred try string for suggestions
3288 char * AffixMgr::get_try_string() const
3289 {
3290   if (! trystring ) return NULL;
3291   return mystrdup(trystring);
3292 }
3293
3294 // return the preferred try string for suggestions
3295 const char * AffixMgr::get_wordchars() const
3296 {
3297   return wordchars;
3298 }
3299
3300 unsigned short * AffixMgr::get_wordchars_utf16(int * len) const
3301 {
3302   *len = wordchars_utf16_len;
3303   return wordchars_utf16;
3304 }
3305
3306 // is there compounding?
3307 int AffixMgr::get_compound() const
3308 {
3309   return compoundflag || compoundbegin || numdefcpd;
3310 }
3311
3312 // return the compound words control flag
3313 FLAG AffixMgr::get_compoundflag() const
3314 {
3315   return compoundflag;
3316 }
3317
3318 // return the forbidden words control flag
3319 FLAG AffixMgr::get_forbiddenword() const
3320 {
3321   return forbiddenword;
3322 }
3323
3324 // return the forbidden words control flag
3325 FLAG AffixMgr::get_nosuggest() const
3326 {
3327   return nosuggest;
3328 }
3329
3330 // return the forbidden words control flag
3331 FLAG AffixMgr::get_nongramsuggest() const
3332 {
3333   return nongramsuggest;
3334 }
3335
3336 // return the forbidden words flag modify flag
3337 FLAG AffixMgr::get_needaffix() const
3338 {
3339   return needaffix;
3340 }
3341
3342 // return the onlyincompound flag
3343 FLAG AffixMgr::get_onlyincompound() const
3344 {
3345   return onlyincompound;
3346 }
3347
3348 // return the compound word signal flag
3349 FLAG AffixMgr::get_compoundroot() const
3350 {
3351   return compoundroot;
3352 }
3353
3354 // return the compound begin signal flag
3355 FLAG AffixMgr::get_compoundbegin() const
3356 {
3357   return compoundbegin;
3358 }
3359
3360 // return the value of checknum
3361 int AffixMgr::get_checknum() const
3362 {
3363   return checknum;
3364 }
3365
3366 // return the value of prefix
3367 const char * AffixMgr::get_prefix() const
3368 {
3369   if (pfx) return pfx->getKey();
3370   return NULL;
3371 }
3372
3373 // return the value of suffix
3374 const char * AffixMgr::get_suffix() const
3375 {
3376   return sfxappnd;
3377 }
3378
3379 // return the value of suffix
3380 const char * AffixMgr::get_version() const
3381 {
3382   return version;
3383 }
3384
3385 // return lemma_present flag
3386 FLAG AffixMgr::get_lemma_present() const
3387 {
3388   return lemma_present;
3389 }
3390
3391 // utility method to look up root words in hash table
3392 struct hentry * AffixMgr::lookup(const char * word)
3393 {
3394   int i;
3395   struct hentry * he = NULL;
3396   for (i = 0; i < *maxdic && !he; i++) {
3397     he = (alldic[i])->lookup(word);
3398   }
3399   return he;
3400 }
3401
3402 // return the value of suffix
3403 int AffixMgr::have_contclass() const
3404 {
3405   return havecontclass;
3406 }
3407
3408 // return utf8
3409 int AffixMgr::get_utf8() const
3410 {
3411   return utf8;
3412 }
3413
3414 int AffixMgr::get_maxngramsugs(void) const
3415 {
3416   return maxngramsugs;
3417 }
3418
3419 int AffixMgr::get_maxcpdsugs(void) const
3420 {
3421   return maxcpdsugs;
3422 }
3423
3424 int AffixMgr::get_maxdiff(void) const
3425 {
3426   return maxdiff;
3427 }
3428
3429 int AffixMgr::get_onlymaxdiff(void) const
3430 {
3431   return onlymaxdiff;
3432 }
3433
3434 // return nosplitsugs
3435 int AffixMgr::get_nosplitsugs(void) const
3436 {
3437   return nosplitsugs;
3438 }
3439
3440 // return sugswithdots
3441 int AffixMgr::get_sugswithdots(void) const
3442 {
3443   return sugswithdots;
3444 }
3445
3446 /* parse flag */
3447 int AffixMgr::parse_flag(char * line, unsigned short * out, FileMgr * af) {
3448    char * s = NULL;
3449    if (*out != FLAG_NULL && !(*out >= DEFAULTFLAGS)) {
3450       HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix file parameter\n", af->getlinenum());
3451       return 1;
3452    }
3453    if (parse_string(line, &s, af->getlinenum())) return 1;
3454    *out = pHMgr->decode_flag(s);
3455    free(s);
3456    return 0;
3457 }
3458
3459 /* parse num */
3460 int AffixMgr::parse_num(char * line, int * out, FileMgr * af) {
3461    char * s = NULL;
3462    if (*out != -1) {
3463       HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix file parameter\n", af->getlinenum());
3464       return 1;
3465    }
3466    if (parse_string(line, &s, af->getlinenum())) return 1;
3467    *out = atoi(s);
3468    free(s);
3469    return 0;
3470 }
3471
3472 /* parse in the max syllablecount of compound words and  */
3473 int  AffixMgr::parse_cpdsyllable(char * line, FileMgr * af)
3474 {
3475    char * tp = line;
3476    char * piece;
3477    int i = 0;
3478    int np = 0;
3479    w_char w[MAXWORDLEN];
3480    piece = mystrsep(&tp, 0);
3481    while (piece) {
3482       if (*piece != '\0') {
3483           switch(i) {
3484              case 0: { np++; break; }
3485              case 1: { cpdmaxsyllable = atoi(piece); np++; break; }
3486              case 2: {
3487                 if (!utf8) {
3488                     cpdvowels = mystrdup(piece);
3489                 } else {
3490                     int n = u8_u16(w, MAXWORDLEN, piece);
3491                     if (n > 0) {
3492                         flag_qsort((unsigned short *) w, 0, n);
3493                         cpdvowels_utf16 = (w_char *) malloc(n * sizeof(w_char));
3494                         if (!cpdvowels_utf16) return 1;
3495                         memcpy(cpdvowels_utf16, w, n * sizeof(w_char));
3496                     }
3497                     cpdvowels_utf16_len = n;
3498                 }
3499                 np++;
3500                 break;
3501              }
3502              default: break;
3503           }
3504           i++;
3505       }
3506       piece = mystrsep(&tp, 0);
3507    }
3508    if (np < 2) {
3509       HUNSPELL_WARNING(stderr, "error: line %d: missing compoundsyllable information\n", af->getlinenum());
3510       return 1;
3511    }
3512    if (np == 2) cpdvowels = mystrdup("aeiouAEIOU");
3513    return 0;
3514 }
3515
3516 /* parse in the typical fault correcting table */
3517 int  AffixMgr::parse_reptable(char * line, FileMgr * af)
3518 {
3519    if (numrep != 0) {
3520       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3521       return 1;
3522    }
3523    char * tp = line;
3524    char * piece;
3525    int i = 0;
3526    int np = 0;
3527    piece = mystrsep(&tp, 0);
3528    while (piece) {
3529        if (*piece != '\0') {
3530           switch(i) {
3531              case 0: { np++; break; }
3532              case 1: {
3533                        numrep = atoi(piece);
3534                        if (numrep < 1) {
3535                           HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", af->getlinenum());
3536                           return 1;
3537                        }
3538                        reptable = (replentry *) malloc(numrep * sizeof(struct replentry));
3539                        if (!reptable) return 1;
3540                        np++;
3541                        break;
3542                      }
3543              default: break;
3544           }
3545           i++;
3546        }
3547        piece = mystrsep(&tp, 0);
3548    }
3549    if (np != 2) {
3550       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
3551       return 1;
3552    }
3553
3554    /* now parse the numrep lines to read in the remainder of the table */
3555    char * nl;
3556    for (int j=0; j < numrep; j++) {
3557         if (!(nl = af->getline())) return 1;
3558         mychomp(nl);
3559         tp = nl;
3560         i = 0;
3561         reptable[j].pattern = NULL;
3562         reptable[j].pattern2 = NULL;
3563         piece = mystrsep(&tp, 0);
3564         while (piece) {
3565            if (*piece != '\0') {
3566                switch(i) {
3567                   case 0: {
3568                              if (strncmp(piece,"REP",3) != 0) {
3569                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3570                                  numrep = 0;
3571                                  return 1;
3572                              }
3573                              break;
3574                           }
3575                   case 1: {
3576                             if (*piece == '^') reptable[j].start = true; else reptable[j].start = false;
3577                             reptable[j].pattern = mystrrep(mystrdup(piece + int(reptable[j].start)),"_"," ");
3578                             int lr = strlen(reptable[j].pattern) - 1;
3579                             if (reptable[j].pattern[lr] == '$') {
3580                                 reptable[j].end = true;
3581                                 reptable[j].pattern[lr] = '\0';
3582                             } else reptable[j].end = false;
3583                             break;
3584                           }
3585                   case 2: { reptable[j].pattern2 = mystrrep(mystrdup(piece),"_"," "); break; }
3586                   default: break;
3587                }
3588                i++;
3589            }
3590            piece = mystrsep(&tp, 0);
3591         }
3592         if ((!(reptable[j].pattern)) || (!(reptable[j].pattern2))) {
3593              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3594              numrep = 0;
3595              return 1;
3596         }
3597    }
3598    return 0;
3599 }
3600
3601 /* parse in the typical fault correcting table */
3602 int  AffixMgr::parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword)
3603 {
3604    if (*rl) {
3605       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3606       return 1;
3607    }
3608    char * tp = line;
3609    char * piece;
3610    int i = 0;
3611    int np = 0;
3612    int numrl = 0;
3613    piece = mystrsep(&tp, 0);
3614    while (piece) {
3615        if (*piece != '\0') {
3616           switch(i) {
3617              case 0: { np++; break; }
3618              case 1: {
3619                        numrl = atoi(piece);
3620                        if (numrl < 1) {
3621                           HUNSPELL_WARNING(stderr, "error: line %d: incorrect entry number\n", af->getlinenum());
3622                           return 1;
3623                        }
3624                        *rl = new RepList(numrl);
3625                        if (!*rl) return 1;
3626                        np++;
3627                        break;
3628                      }
3629              default: break;
3630           }
3631           i++;
3632        }
3633        piece = mystrsep(&tp, 0);
3634    }
3635    if (np != 2) {
3636       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
3637       return 1;
3638    }
3639
3640    /* now parse the num lines to read in the remainder of the table */
3641    char * nl;
3642    for (int j=0; j < numrl; j++) {
3643         if (!(nl = af->getline())) return 1;
3644         mychomp(nl);
3645         tp = nl;
3646         i = 0;
3647         char * pattern = NULL;
3648         char * pattern2 = NULL;
3649         piece = mystrsep(&tp, 0);
3650         while (piece) {
3651            if (*piece != '\0') {
3652                switch(i) {
3653                   case 0: {
3654                              if (strncmp(piece, keyword, sizeof(keyword)) != 0) {
3655                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3656                                  delete *rl;
3657                                  *rl = NULL;
3658                                  return 1;
3659                              }
3660                              break;
3661                           }
3662                   case 1: { pattern = mystrrep(mystrdup(piece),"_"," "); break; }
3663                   case 2: {
3664                     pattern2 = mystrrep(mystrdup(piece),"_"," ");
3665                     break;
3666                   }
3667                   default: break;
3668                }
3669                i++;
3670            }
3671            piece = mystrsep(&tp, 0);
3672         }
3673         if (!pattern || !pattern2) {
3674             if (pattern)
3675                 free(pattern);
3676             if (pattern2)
3677                 free(pattern2);
3678             HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3679             return 1;
3680         }
3681         (*rl)->add(pattern, pattern2);
3682    }
3683    return 0;
3684 }
3685
3686
3687 /* parse in the typical fault correcting table */
3688 int  AffixMgr::parse_phonetable(char * line, FileMgr * af)
3689 {
3690    if (phone) {
3691       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3692       return 1;
3693    }
3694    char * tp = line;
3695    char * piece;
3696    int i = 0;
3697    int np = 0;
3698    piece = mystrsep(&tp, 0);
3699    while (piece) {
3700        if (*piece != '\0') {
3701           switch(i) {
3702              case 0: { np++; break; }
3703              case 1: {
3704                        phone = (phonetable *) malloc(sizeof(struct phonetable));
3705                        if (!phone) return 1;
3706                        phone->num = atoi(piece);
3707                        phone->rules = NULL;
3708                        phone->utf8 = (char) utf8;
3709                        if (phone->num < 1) {
3710                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
3711                           return 1;
3712                        }
3713                        phone->rules = (char * *) malloc(2 * (phone->num + 1) * sizeof(char *));
3714                        if (!phone->rules) {
3715                           free(phone);
3716                           phone = NULL;
3717                           return 1;
3718                        }
3719                        np++;
3720                        break;
3721                      }
3722              default: break;
3723           }
3724           i++;
3725        }
3726        piece = mystrsep(&tp, 0);
3727    }
3728    if (np != 2) {
3729       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
3730       return 1;
3731    }
3732
3733    /* now parse the phone->num lines to read in the remainder of the table */
3734    char * nl;
3735    for (int j=0; j < phone->num; j++) {
3736         if (!(nl = af->getline())) return 1;
3737         mychomp(nl);
3738         tp = nl;
3739         i = 0;
3740         phone->rules[j * 2] = NULL;
3741         phone->rules[j * 2 + 1] = NULL;
3742         piece = mystrsep(&tp, 0);
3743         while (piece) {
3744            if (*piece != '\0') {
3745                switch(i) {
3746                   case 0: {
3747                              if (strncmp(piece,"PHONE",5) != 0) {
3748                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3749                                  phone->num = 0;
3750                                  return 1;
3751                              }
3752                              break;
3753                           }
3754                   case 1: { phone->rules[j * 2] = mystrrep(mystrdup(piece),"_",""); break; }
3755                   case 2: { phone->rules[j * 2 + 1] = mystrrep(mystrdup(piece),"_",""); break; }
3756                   default: break;
3757                }
3758                i++;
3759            }
3760            piece = mystrsep(&tp, 0);
3761         }
3762         if ((!(phone->rules[j * 2])) || (!(phone->rules[j * 2 + 1]))) {
3763              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3764              phone->num = 0;
3765              return 1;
3766         }
3767    }
3768    phone->rules[phone->num * 2] = mystrdup("");
3769    phone->rules[phone->num * 2 + 1] = mystrdup("");
3770    init_phonet_hash(*phone);
3771    return 0;
3772 }
3773
3774 /* parse in the checkcompoundpattern table */
3775 int  AffixMgr::parse_checkcpdtable(char * line, FileMgr * af)
3776 {
3777    if (numcheckcpd != 0) {
3778       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3779       return 1;
3780    }
3781    char * tp = line;
3782    char * piece;
3783    int i = 0;
3784    int np = 0;
3785    piece = mystrsep(&tp, 0);
3786    while (piece) {
3787        if (*piece != '\0') {
3788           switch(i) {
3789              case 0: { np++; break; }
3790              case 1: {
3791                        numcheckcpd = atoi(piece);
3792                        if (numcheckcpd < 1) {
3793                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
3794                           return 1;
3795                        }
3796                        checkcpdtable = (patentry *) malloc(numcheckcpd * sizeof(struct patentry));
3797                        if (!checkcpdtable) return 1;
3798                        np++;
3799                        break;
3800                      }
3801              default: break;
3802           }
3803           i++;
3804        }
3805        piece = mystrsep(&tp, 0);
3806    }
3807    if (np != 2) {
3808       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n",  af->getlinenum());
3809       return 1;
3810    }
3811
3812    /* now parse the numcheckcpd lines to read in the remainder of the table */
3813    char * nl;
3814    for (int j=0; j < numcheckcpd; j++) {
3815         if (!(nl = af->getline())) return 1;
3816         mychomp(nl);
3817         tp = nl;
3818         i = 0;
3819         checkcpdtable[j].pattern = NULL;
3820         checkcpdtable[j].pattern2 = NULL;
3821         checkcpdtable[j].pattern3 = NULL;
3822         checkcpdtable[j].cond = FLAG_NULL;
3823         checkcpdtable[j].cond2 = FLAG_NULL;
3824         piece = mystrsep(&tp, 0);
3825         while (piece) {
3826            if (*piece != '\0') {
3827                switch(i) {
3828                   case 0: {
3829                              if (strncmp(piece,"CHECKCOMPOUNDPATTERN",20) != 0) {
3830                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3831                                  numcheckcpd = 0;
3832                                  return 1;
3833                              }
3834                              break;
3835                           }
3836                   case 1: {
3837                     checkcpdtable[j].pattern = mystrdup(piece);
3838                     char * p = strchr(checkcpdtable[j].pattern, '/');
3839                     if (p) {
3840                       *p = '\0';
3841                     checkcpdtable[j].cond = pHMgr->decode_flag(p + 1);
3842                     }
3843                     break; }
3844                   case 2: {
3845                     checkcpdtable[j].pattern2 = mystrdup(piece);
3846                     char * p = strchr(checkcpdtable[j].pattern2, '/');
3847                     if (p) {
3848                       *p = '\0';
3849                       checkcpdtable[j].cond2 = pHMgr->decode_flag(p + 1);
3850                     }
3851                     break;
3852                     }
3853                   case 3: { checkcpdtable[j].pattern3 = mystrdup(piece); simplifiedcpd = 1; break; }
3854                   default: break;
3855                }
3856                i++;
3857            }
3858            piece = mystrsep(&tp, 0);
3859         }
3860         if ((!(checkcpdtable[j].pattern)) || (!(checkcpdtable[j].pattern2))) {
3861              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3862              numcheckcpd = 0;
3863              return 1;
3864         }
3865    }
3866    return 0;
3867 }
3868
3869 /* parse in the compound rule table */
3870 int  AffixMgr::parse_defcpdtable(char * line, FileMgr * af)
3871 {
3872    if (numdefcpd != 0) {
3873       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3874       return 1;
3875    }
3876    char * tp = line;
3877    char * piece;
3878    int i = 0;
3879    int np = 0;
3880    piece = mystrsep(&tp, 0);
3881    while (piece) {
3882        if (*piece != '\0') {
3883           switch(i) {
3884              case 0: { np++; break; }
3885              case 1: {
3886                        numdefcpd = atoi(piece);
3887                        if (numdefcpd < 1) {
3888                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
3889                           return 1;
3890                        }
3891                        defcpdtable = (flagentry *) malloc(numdefcpd * sizeof(flagentry));
3892                        if (!defcpdtable) return 1;
3893                        np++;
3894                        break;
3895                      }
3896              default: break;
3897           }
3898           i++;
3899        }
3900        piece = mystrsep(&tp, 0);
3901    }
3902    if (np != 2) {
3903       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
3904       return 1;
3905    }
3906
3907    /* now parse the numdefcpd lines to read in the remainder of the table */
3908    char * nl;
3909    for (int j=0; j < numdefcpd; j++) {
3910         if (!(nl = af->getline())) return 1;
3911         mychomp(nl);
3912         tp = nl;
3913         i = 0;
3914         defcpdtable[j].def = NULL;
3915         piece = mystrsep(&tp, 0);
3916         while (piece) {
3917            if (*piece != '\0') {
3918                switch(i) {
3919                   case 0: {
3920                              if (strncmp(piece, "COMPOUNDRULE", 12) != 0) {
3921                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3922                                  numdefcpd = 0;
3923                                  return 1;
3924                              }
3925                              break;
3926                           }
3927                   case 1: { // handle parenthesized flags
3928                             if (strchr(piece, '(')) {
3929                                 defcpdtable[j].def = (FLAG *) malloc(strlen(piece) * sizeof(FLAG));
3930                                 defcpdtable[j].len = 0;
3931                                 int end = 0;
3932                                 FLAG * conv;
3933                                 while (!end) {
3934                                     char * par = piece + 1;
3935                                     while (*par != '(' && *par != ')' && *par != '\0') par++;
3936                                     if (*par == '\0') end = 1; else *par = '\0';
3937                                     if (*piece == '(') piece++;
3938                                     if (*piece == '*' || *piece == '?') {
3939                                         defcpdtable[j].def[defcpdtable[j].len++] = (FLAG) *piece;
3940                                     } else if (*piece != '\0') {
3941                                         int l = pHMgr->decode_flags(&conv, piece, af);
3942                                         for (int k = 0; k < l; k++) defcpdtable[j].def[defcpdtable[j].len++] = conv[k];
3943                                         free(conv);
3944                                     }
3945                                     piece = par + 1;
3946                                 }
3947                             } else {
3948                                 defcpdtable[j].len = pHMgr->decode_flags(&(defcpdtable[j].def), piece, af);
3949                             }
3950                             break;
3951                            }
3952                   default: break;
3953                }
3954                i++;
3955            }
3956            piece = mystrsep(&tp, 0);
3957         }
3958         if (!defcpdtable[j].len) {
3959              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
3960              numdefcpd = 0;
3961              return 1;
3962         }
3963    }
3964    return 0;
3965 }
3966
3967
3968 /* parse in the character map table */
3969 int  AffixMgr::parse_maptable(char * line, FileMgr * af)
3970 {
3971    if (nummap != 0) {
3972       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
3973       return 1;
3974    }
3975    char * tp = line;
3976    char * piece;
3977    int i = 0;
3978    int np = 0;
3979    piece = mystrsep(&tp, 0);
3980    while (piece) {
3981        if (*piece != '\0') {
3982           switch(i) {
3983              case 0: { np++; break; }
3984              case 1: {
3985                        nummap = atoi(piece);
3986                        if (nummap < 1) {
3987                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
3988                           return 1;
3989                        }
3990                        maptable = (mapentry *) malloc(nummap * sizeof(struct mapentry));
3991                        if (!maptable) return 1;
3992                        np++;
3993                        break;
3994                      }
3995              default: break;
3996           }
3997           i++;
3998        }
3999        piece = mystrsep(&tp, 0);
4000    }
4001    if (np != 2) {
4002       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
4003       return 1;
4004    }
4005
4006    /* now parse the nummap lines to read in the remainder of the table */
4007    char * nl;
4008    for (int j=0; j < nummap; j++) {
4009         if (!(nl = af->getline())) return 1;
4010         mychomp(nl);
4011         tp = nl;
4012         i = 0;
4013         maptable[j].set = NULL;
4014         maptable[j].len = 0;
4015         piece = mystrsep(&tp, 0);
4016         while (piece) {
4017            if (*piece != '\0') {
4018                switch(i) {
4019                   case 0: {
4020                              if (strncmp(piece,"MAP",3) != 0) {
4021                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
4022                                  nummap = 0;
4023                                  return 1;
4024                              }
4025                              break;
4026                           }
4027                   case 1: {
4028                             int setn = 0;
4029                             maptable[j].len = strlen(piece);
4030                             maptable[j].set = (char **) malloc(maptable[j].len * sizeof(char*));
4031                             if (!maptable[j].set) return 1;
4032                             for (int k = 0; k < maptable[j].len; k++) {
4033                                 int chl = 1;
4034                                 int chb = k;
4035                                 if (piece[k] == '(') {
4036                                     char * parpos = strchr(piece + k, ')');
4037                                     if (parpos != NULL) {
4038                                         chb = k + 1;
4039                                         chl = (int)(parpos - piece) - k - 1;
4040                                         k = k + chl + 1;
4041                                     }
4042                                 } else {
4043                                     if (utf8 && (piece[k] & 0xc0) == 0xc0) {
4044                                         for (k++; utf8 && (piece[k] & 0xc0) == 0x80; k++);
4045                                         chl = k - chb;
4046                                         k--;
4047                                     }
4048                                 }
4049                                 maptable[j].set[setn] = (char *) malloc(chl + 1);
4050                                 if (!maptable[j].set[setn]) return 1;
4051                                 strncpy(maptable[j].set[setn], piece + chb, chl);
4052                                 maptable[j].set[setn][chl] = '\0';
4053                                 setn++;
4054                             }
4055                             maptable[j].len = setn;
4056                             break; }
4057                   default: break;
4058                }
4059                i++;
4060            }
4061            piece = mystrsep(&tp, 0);
4062         }
4063         if (!maptable[j].set || !maptable[j].len) {
4064              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
4065              nummap = 0;
4066              return 1;
4067         }
4068    }
4069    return 0;
4070 }
4071
4072 /* parse in the word breakpoint table */
4073 int  AffixMgr::parse_breaktable(char * line, FileMgr * af)
4074 {
4075    if (numbreak > -1) {
4076       HUNSPELL_WARNING(stderr, "error: line %d: multiple table definitions\n", af->getlinenum());
4077       return 1;
4078    }
4079    char * tp = line;
4080    char * piece;
4081    int i = 0;
4082    int np = 0;
4083    piece = mystrsep(&tp, 0);
4084    while (piece) {
4085        if (*piece != '\0') {
4086           switch(i) {
4087              case 0: { np++; break; }
4088              case 1: {
4089                        numbreak = atoi(piece);
4090                        if (numbreak < 0) {
4091                           HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n", af->getlinenum());
4092                           return 1;
4093                        }
4094                        if (numbreak == 0) return 0;
4095                        breaktable = (char **) malloc(numbreak * sizeof(char *));
4096                        if (!breaktable) return 1;
4097                        np++;
4098                        break;
4099                      }
4100              default: break;
4101           }
4102           i++;
4103        }
4104        piece = mystrsep(&tp, 0);
4105    }
4106    if (np != 2) {
4107       HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
4108       return 1;
4109    }
4110
4111    /* now parse the numbreak lines to read in the remainder of the table */
4112    char * nl;
4113    for (int j=0; j < numbreak; j++) {
4114         if (!(nl = af->getline())) return 1;
4115         mychomp(nl);
4116         tp = nl;
4117         i = 0;
4118         piece = mystrsep(&tp, 0);
4119         while (piece) {
4120            if (*piece != '\0') {
4121                switch(i) {
4122                   case 0: {
4123                              if (strncmp(piece,"BREAK",5) != 0) {
4124                                  HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
4125                                  numbreak = 0;
4126                                  return 1;
4127                              }
4128                              break;
4129                           }
4130                   case 1: {
4131                             breaktable[j] = mystrdup(piece);
4132                             break;
4133                           }
4134                   default: break;
4135                }
4136                i++;
4137            }
4138            piece = mystrsep(&tp, 0);
4139         }
4140         if (!breaktable) {
4141              HUNSPELL_WARNING(stderr, "error: line %d: table is corrupt\n", af->getlinenum());
4142              numbreak = 0;
4143              return 1;
4144         }
4145    }
4146    return 0;
4147 }
4148
4149 void AffixMgr::reverse_condition(char * piece) {
4150     int neg = 0;
4151     for (char * k = piece + strlen(piece) - 1; k >= piece; k--) {
4152         switch(*k) {
4153           case '[': {
4154                 if (neg) *(k+1) = '['; else *k = ']';
4155                     break;
4156             }
4157           case ']': {
4158                 *k = '[';
4159                 if (neg) *(k+1) = '^';
4160                 neg = 0;
4161                 break;
4162             }
4163           case '^': {
4164                if (*(k+1) == ']') neg = 1; else *(k+1) = *k;
4165                break;
4166                 }
4167           default: {
4168             if (neg) *(k+1) = *k;
4169           }
4170        }
4171     }
4172 }
4173
4174 int  AffixMgr::parse_affix(char * line, const char at, FileMgr * af, char * dupflags)
4175 {
4176    int numents = 0;      // number of affentry structures to parse
4177
4178    unsigned short aflag = 0;      // affix char identifier
4179
4180    char ff=0;
4181    std::vector<affentry> affentries;
4182
4183    char * tp = line;
4184    char * nl = line;
4185    char * piece;
4186    int i = 0;
4187
4188    // checking lines with bad syntax
4189 #ifdef DEBUG
4190    int basefieldnum = 0;
4191 #endif
4192
4193    // split affix header line into pieces
4194
4195    int np = 0;
4196
4197    piece = mystrsep(&tp, 0);
4198    while (piece) {
4199       if (*piece != '\0') {
4200           switch(i) {
4201              // piece 1 - is type of affix
4202              case 0: { np++; break; }
4203
4204              // piece 2 - is affix char
4205              case 1: {
4206                     np++;
4207                     aflag = pHMgr->decode_flag(piece);
4208                     if (((at == 'S') && (dupflags[aflag] & dupSFX)) ||
4209                         ((at == 'P') && (dupflags[aflag] & dupPFX))) {
4210                         HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions of an affix flag\n",
4211                             af->getlinenum());
4212                         // return 1; XXX permissive mode for bad dictionaries
4213                     }
4214                     dupflags[aflag] += (char) ((at == 'S') ? dupSFX : dupPFX);
4215                     break;
4216                     }
4217              // piece 3 - is cross product indicator
4218              case 2: { np++; if (*piece == 'Y') ff = aeXPRODUCT; break; }
4219
4220              // piece 4 - is number of affentries
4221              case 3: {
4222                        np++;
4223                        numents = atoi(piece);
4224                        if (numents == 0) {
4225                            char * err = pHMgr->encode_flag(aflag);
4226                            if (err) {
4227                                 HUNSPELL_WARNING(stderr, "error: line %d: bad entry number\n",
4228                                    af->getlinenum());
4229                                 free(err);
4230                            }
4231                            return 1;
4232                        }
4233                        affentries.resize(numents);
4234                        affentries[0].opts = ff;
4235                        if (utf8) affentries[0].opts += aeUTF8;
4236                        if (pHMgr->is_aliasf()) affentries[0].opts += aeALIASF;
4237                        if (pHMgr->is_aliasm()) affentries[0].opts += aeALIASM;
4238                        affentries[0].aflag = aflag;
4239                      }
4240
4241              default: break;
4242           }
4243           i++;
4244       }
4245       piece = mystrsep(&tp, 0);
4246    }
4247    // check to make sure we parsed enough pieces
4248    if (np != 4) {
4249        char * err = pHMgr->encode_flag(aflag);
4250        if (err) {
4251             HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", af->getlinenum());
4252             free(err);
4253        }
4254        return 1;
4255    }
4256
4257    // now parse numents affentries for this affix
4258    std::vector<affentry>::iterator start = affentries.begin();
4259    std::vector<affentry>::iterator end = affentries.end();
4260    for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
4261       if (!(nl = af->getline())) return 1;
4262       mychomp(nl);
4263       tp = nl;
4264       i = 0;
4265       np = 0;
4266
4267       // split line into pieces
4268       piece = mystrsep(&tp, 0);
4269       while (piece) {
4270          if (*piece != '\0') {
4271              switch(i) {
4272                 // piece 1 - is type
4273                 case 0: {
4274                           np++;
4275                           if (entry != start) entry->opts = start->opts &
4276                              (char) (aeXPRODUCT + aeUTF8 + aeALIASF + aeALIASM);
4277                           break;
4278                         }
4279
4280                 // piece 2 - is affix char
4281                 case 1: {
4282                           np++;
4283                           if (pHMgr->decode_flag(piece) != aflag) {
4284                               char * err = pHMgr->encode_flag(aflag);
4285                               if (err) {
4286                                 HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
4287                                     af->getlinenum(), err);
4288                                 free(err);
4289                               }
4290                               return 1;
4291                           }
4292
4293                           if (entry != start) entry->aflag = start->aflag;
4294                           break;
4295                         }
4296
4297                 // piece 3 - is string to strip or 0 for null
4298                 case 2: {
4299                           np++;
4300                           if (complexprefixes) {
4301                             if (utf8) reverseword_utf(piece); else reverseword(piece);
4302                           }
4303                           entry->strip = mystrdup(piece);
4304                           entry->stripl = (unsigned char) strlen(entry->strip);
4305                           if (strcmp(entry->strip,"0") == 0) {
4306                               free(entry->strip);
4307                               entry->strip=mystrdup("");
4308                               entry->stripl = 0;
4309                           }
4310                           break;
4311                         }
4312
4313                 // piece 4 - is affix string or 0 for null
4314                 case 3: {
4315                           char * dash;
4316                           entry->morphcode = NULL;
4317                           entry->contclass = NULL;
4318                           entry->contclasslen = 0;
4319                           np++;
4320                           dash = strchr(piece, '/');
4321                           if (dash) {
4322                             *dash = '\0';
4323
4324                             if (ignorechars) {
4325                               if (utf8) {
4326                                 remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
4327                               } else {
4328                                 remove_ignored_chars(piece,ignorechars);
4329                               }
4330                             }
4331
4332                             if (complexprefixes) {
4333                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
4334                             }
4335                             entry->appnd = mystrdup(piece);
4336
4337                             if (pHMgr->is_aliasf()) {
4338                                 int index = atoi(dash + 1);
4339                                 entry->contclasslen = (unsigned short) pHMgr->get_aliasf(index, &(entry->contclass), af);
4340                                 if (!entry->contclasslen) HUNSPELL_WARNING(stderr, "error: bad affix flag alias: \"%s\"\n", dash+1);
4341                             } else {
4342                                 entry->contclasslen = (unsigned short) pHMgr->decode_flags(&(entry->contclass), dash + 1, af);
4343                                 flag_qsort(entry->contclass, 0, entry->contclasslen);
4344                             }
4345                             *dash = '/';
4346
4347                             havecontclass = 1;
4348                             for (unsigned short _i = 0; _i < entry->contclasslen; _i++) {
4349                               contclasses[(entry->contclass)[_i]] = 1;
4350                             }
4351                           } else {
4352                             if (ignorechars) {
4353                               if (utf8) {
4354                                 remove_ignored_chars_utf(piece, ignorechars_utf16, ignorechars_utf16_len);
4355                               } else {
4356                                 remove_ignored_chars(piece,ignorechars);
4357                               }
4358                             }
4359
4360                             if (complexprefixes) {
4361                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
4362                             }
4363                             entry->appnd = mystrdup(piece);
4364                           }
4365
4366                           entry->appndl = (unsigned char) strlen(entry->appnd);
4367                           if (strcmp(entry->appnd,"0") == 0) {
4368                               free(entry->appnd);
4369                               entry->appnd=mystrdup("");
4370                               entry->appndl = 0;
4371                           }
4372                           break;
4373                         }
4374
4375                 // piece 5 - is the conditions descriptions
4376                 case 4: {
4377                           np++;
4378                           if (complexprefixes) {
4379                             if (utf8) reverseword_utf(piece); else reverseword(piece);
4380                             reverse_condition(piece);
4381                           }
4382                           if (entry->stripl && (strcmp(piece, ".") != 0) &&
4383                             redundant_condition(at, entry->strip, entry->stripl, piece, af->getlinenum()))
4384                                 strcpy(piece, ".");
4385                           if (at == 'S') {
4386                             reverseword(piece);
4387                             reverse_condition(piece);
4388                           }
4389                           if (encodeit(*entry, piece)) return 1;
4390                          break;
4391                 }
4392
4393                 case 5: {
4394                           np++;
4395                           if (pHMgr->is_aliasm()) {
4396                             int index = atoi(piece);
4397                             entry->morphcode = pHMgr->get_aliasm(index);
4398                           } else {
4399                             if (complexprefixes) { // XXX - fix me for morph. gen.
4400                                 if (utf8) reverseword_utf(piece); else reverseword(piece);
4401                             }
4402                             // add the remaining of the line
4403                             if (*tp) {
4404                                 *(tp - 1) = ' ';
4405                                 tp = tp + strlen(tp);
4406                             }
4407                             entry->morphcode = mystrdup(piece);
4408                             if (!entry->morphcode) return 1;
4409                           }
4410                           break;
4411                 }
4412                 default: break;
4413              }
4414              i++;
4415          }
4416          piece = mystrsep(&tp, 0);
4417       }
4418       // check to make sure we parsed enough pieces
4419       if (np < 4) {
4420           char * err = pHMgr->encode_flag(aflag);
4421           if (err) {
4422             HUNSPELL_WARNING(stderr, "error: line %d: affix %s is corrupt\n",
4423                 af->getlinenum(), err);
4424             free(err);
4425           }
4426           return 1;
4427       }
4428
4429 #ifdef DEBUG
4430       // detect unnecessary fields, excepting comments
4431       if (basefieldnum) {
4432         int fieldnum = !(entry->morphcode) ? 5 : ((*(entry->morphcode)=='#') ? 5 : 6);
4433           if (fieldnum != basefieldnum)
4434             HUNSPELL_WARNING(stderr, "warning: line %d: bad field number\n", af->getlinenum());
4435       } else {
4436         basefieldnum = !(entry->morphcode) ? 5 : ((*(entry->morphcode)=='#') ? 5 : 6);
4437       }
4438 #endif
4439    }
4440
4441    // now create SfxEntry or PfxEntry objects and use links to
4442    // build an ordered (sorted by affix string) list
4443    for (std::vector<affentry>::iterator entry = start; entry != end; ++entry) {
4444       if (at == 'P') {
4445           PfxEntry * pfxptr = new PfxEntry(this,&(*entry));
4446           build_pfxtree(pfxptr);
4447       } else {
4448           SfxEntry * sfxptr = new SfxEntry(this,&(*entry));
4449           build_sfxtree(sfxptr);
4450       }
4451    }
4452    return 0;
4453 }
4454
4455 int AffixMgr::redundant_condition(char ft, char * strip, int stripl, const char * cond, int linenum) {
4456   int condl = strlen(cond);
4457   int i;
4458   int j;
4459   int neg;
4460   int in;
4461   if (ft == 'P') { // prefix
4462     if (strncmp(strip, cond, condl) == 0) return 1;
4463     if (utf8) {
4464     } else {
4465       for (i = 0, j = 0; (i < stripl) && (j < condl); i++, j++) {
4466         if (cond[j] != '[') {
4467           if (cond[j] != strip[i]) {
4468             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
4469             return 0;
4470           }
4471         } else {
4472           neg = (cond[j+1] == '^') ? 1 : 0;
4473           in = 0;
4474           do {
4475             j++;
4476             if (strip[i] == cond[j]) in = 1;
4477           } while ((j < (condl - 1)) && (cond[j] != ']'));
4478           if (j == (condl - 1) && (cond[j] != ']')) {
4479             HUNSPELL_WARNING(stderr, "error: line %d: missing ] in condition:\n%s\n", linenum, cond);
4480             return 0;
4481           }
4482           if ((!neg && !in) || (neg && in)) {
4483             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
4484             return 0;
4485           }
4486         }
4487       }
4488       if (j >= condl) return 1;
4489     }
4490   } else { // suffix
4491     if ((stripl >= condl) && strcmp(strip + stripl - condl, cond) == 0) return 1;
4492     if (utf8) {
4493     } else {
4494       for (i = stripl - 1, j = condl - 1; (i >= 0) && (j >= 0); i--, j--) {
4495         if (cond[j] != ']') {
4496           if (cond[j] != strip[i]) {
4497             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
4498             return 0;
4499           }
4500         } else {
4501           in = 0;
4502           do {
4503             j--;
4504             if (strip[i] == cond[j]) in = 1;
4505           } while ((j > 0) && (cond[j] != '['));
4506           if ((j == 0) && (cond[j] != '[')) {
4507             HUNSPELL_WARNING(stderr, "error: line: %d: missing ] in condition:\n%s\n", linenum, cond);
4508             return 0;
4509           }
4510           neg = (cond[j+1] == '^') ? 1 : 0;
4511           if ((!neg && !in) || (neg && in)) {
4512             HUNSPELL_WARNING(stderr, "warning: line %d: incompatible stripping characters and condition\n", linenum);
4513             return 0;
4514           }
4515         }
4516       }
4517       if (j < 0) return 1;
4518     }
4519   }
4520   return 0;
4521 }