1 #include "license.hunspell"
2 #include "license.myspell"
16 #include "affixmgr.hxx"
17 #include "affentry.hxx"
18 #include "langnum.hxx"
22 #ifndef MOZILLA_CLIENT
28 AffixMgr::AffixMgr(const char * affpath
, HashMgr
* ptr
)
30 // register hash manager and load affix data from aff file
46 compoundflag
= FLAG_NULL
; // permits word in compound forms
47 compoundbegin
= FLAG_NULL
; // may be first word in compound forms
48 compoundmiddle
= FLAG_NULL
; // may be middle word in compound forms
49 compoundend
= FLAG_NULL
; // may be last word in compound forms
50 compoundroot
= FLAG_NULL
; // compound word signing flag
51 compoundpermitflag
= FLAG_NULL
; // compound permitting flag for suffixed word
52 compoundforbidflag
= FLAG_NULL
; // compound fordidden flag for suffixed word
53 checkcompounddup
= 0; // forbid double words in compounds
54 checkcompoundrep
= 0; // forbid bad compounds (may be non compound word with a REP substitution)
55 checkcompoundcase
= 0; // forbid upper and lowercase combinations at word bounds
56 checkcompoundtriple
= 0; // forbid compounds with triple letters
57 forbiddenword
= FLAG_NULL
; // forbidden word signing flag
58 nosuggest
= FLAG_NULL
; // don't suggest words signed with NOSUGGEST flag
59 lang
= NULL
; // language
60 langnum
= 0; // language code (see http://l10n.openoffice.org/languages.html)
61 pseudoroot
= FLAG_NULL
; // forbidden root, allowed only with suffixes
62 cpdwordmax
= -1; // default: unlimited wordcount in compound words
63 cpdmin
= -1; // undefined
64 cpdmaxsyllable
= 0; // default: unlimited syllablecount in compound words
65 cpdvowels
=NULL
; // vowels (for calculating of Hungarian compounding limit, O(n) search! XXX)
66 cpdvowels_utf16
=NULL
; // vowels for UTF-8 encoding (bsearch instead of O(n) search)
67 cpdvowels_utf16_len
=0; // vowels
68 pfxappnd
=NULL
; // previous prefix for counting the syllables of prefix BUG
69 sfxappnd
=NULL
; // previous suffix for counting a special syllables BUG
70 cpdsyllablenum
=NULL
; // syllable count incrementing flag
71 checknum
=0; // checking numbers, and word with numbers
72 wordchars
=NULL
; // letters + spec. word characters
73 wordchars_utf16
=NULL
; // letters + spec. word characters
74 wordchars_utf16_len
=0; // letters + spec. word characters
75 ignorechars
=NULL
; // letters + spec. word characters
76 ignorechars_utf16
=NULL
; // letters + spec. word characters
77 ignorechars_utf16_len
=0; // letters + spec. word characters
78 version
=NULL
; // affix and dictionary file version string
79 havecontclass
=0; // flags of possible continuing classes (double affix)
80 // LEMMA_PRESENT: not put root into the morphological output. Lemma presents
81 // in morhological description in dictionary file. It's often combined with PSEUDOROOT.
82 lemma_present
= FLAG_NULL
;
83 circumfix
= FLAG_NULL
;
84 onlyincompound
= FLAG_NULL
;
85 flag_mode
= FLAG_CHAR
; // default one-character flags in affix and dic file
86 maxngramsugs
= -1; // undefined
92 derived
= NULL
; // XXX not threadsafe variable for experimental stemming
96 for (int i
=0; i
< SETSIZE
; i
++) {
103 for (int j
=0; j
< CONTSIZE
; j
++) {
107 if (parse_file(affpath
)) {
108 HUNSPELL_WARNING(stderr
, "Failure loading aff file %s\n",affpath
);
109 wordchars
= mystrdup("qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM");
112 if (cpdmin
== -1) cpdmin
= MINCPDLEN
;
117 AffixMgr::~AffixMgr()
120 // pass through linked prefix entries and clean up
121 for (int i
=0; i
< SETSIZE
;i
++) {
123 PfxEntry
* ptr
= (PfxEntry
*)pStart
[i
];
124 PfxEntry
* nptr
= NULL
;
126 nptr
= ptr
->getNext();
133 // pass through linked suffix entries and clean up
134 for (int j
=0; j
< SETSIZE
; j
++) {
136 SfxEntry
* ptr
= (SfxEntry
*)sStart
[j
];
137 SfxEntry
* nptr
= NULL
;
139 nptr
= ptr
->getNext();
147 if (trystring
) free(trystring
);
149 if (encoding
) free(encoding
);
152 for (int j
=0; j
< nummap
; j
++) {
153 if (maptable
[j
].set
) free(maptable
[j
].set
);
154 if (maptable
[j
].set_utf16
) free(maptable
[j
].set_utf16
);
155 maptable
[j
].set
= NULL
;
163 for (int j
=0; j
< numbreak
; j
++) {
164 if (breaktable
[j
]) free(breaktable
[j
]);
165 breaktable
[j
] = NULL
;
172 for (int j
=0; j
< numrep
; j
++) {
173 free(reptable
[j
].pattern
);
174 free(reptable
[j
].pattern2
);
175 reptable
[j
].pattern
= NULL
;
176 reptable
[j
].pattern2
= NULL
;
182 for (int j
=0; j
< numdefcpd
; j
++) {
183 free(defcpdtable
[j
].def
);
184 defcpdtable
[j
].def
= NULL
;
191 for (int j
=0; j
< numcheckcpd
; j
++) {
192 free(checkcpdtable
[j
].pattern
);
193 free(checkcpdtable
[j
].pattern2
);
194 checkcpdtable
[j
].pattern
= NULL
;
195 checkcpdtable
[j
].pattern2
= NULL
;
198 checkcpdtable
= NULL
;
201 FREE_FLAG(compoundflag
);
202 FREE_FLAG(compoundbegin
);
203 FREE_FLAG(compoundmiddle
);
204 FREE_FLAG(compoundend
);
205 FREE_FLAG(compoundpermitflag
);
206 FREE_FLAG(compoundforbidflag
);
207 FREE_FLAG(compoundroot
);
208 FREE_FLAG(forbiddenword
);
209 FREE_FLAG(nosuggest
);
210 FREE_FLAG(pseudoroot
);
211 FREE_FLAG(lemma_present
);
212 FREE_FLAG(circumfix
);
213 FREE_FLAG(onlyincompound
);
219 if (cpdvowels
) free(cpdvowels
);
220 if (cpdvowels_utf16
) free(cpdvowels_utf16
);
221 if (cpdsyllablenum
) free(cpdsyllablenum
);
223 if (lang
) free(lang
);
224 if (wordchars
) free(wordchars
);
225 if (wordchars_utf16
) free(wordchars_utf16
);
226 if (ignorechars
) free(ignorechars
);
227 if (ignorechars_utf16
) free(ignorechars_utf16
);
228 if (version
) free(version
);
229 if (derived
) free(derived
);
234 // read in aff file and build up prefix and suffix entry objects
235 int AffixMgr::parse_file(const char * affpath
)
239 char line
[MAXLNLEN
+1];
244 // checking flag duplication
245 char dupflags
[CONTSIZE
];
246 char dupflags_ini
= 1;
248 // first line indicator for removing byte order mark
251 // open the affix file
253 afflst
= fopen(affpath
,"r");
255 HUNSPELL_WARNING(stderr
, "error: could not open affix description file %s\n",affpath
);
259 // step one is to parse the affix file building up the internal
260 // affix data structures
263 // read in each line ignoring any that do not
264 // start with a known line type indicator
265 while (fgets(line
,MAXLNLEN
,afflst
)) {
268 /* remove byte order mark */
271 if (strncmp(line
,"",3) == 0) {
272 memmove(line
, line
+3, strlen(line
+3)+1);
273 HUNSPELL_WARNING(stderr
, "warning: affix file begins with byte order mark: possible incompatibility with old Hunspell versions\n");
277 /* parse in the try string */
278 if (strncmp(line
,"TRY",3) == 0) {
279 if (parse_string(line
, &trystring
, "TRY")) {
285 /* parse in the name of the character set used by the .dict and .aff */
286 if (strncmp(line
,"SET",3) == 0) {
287 if (parse_string(line
, &encoding
, "SET")) {
291 if (strcmp(encoding
, "UTF-8") == 0) {
293 #ifndef OPENOFFICEORG
294 #ifndef MOZILLA_CLIENT
295 if (initialize_utf_tbl()) return 1;
301 /* parse COMPLEXPREFIXES for agglutinative languages with right-to-left writing system */
302 if (strncmp(line
,"COMPLEXPREFIXES",15) == 0)
305 /* parse in the flag used by the controlled compound words */
306 if (strncmp(line
,"COMPOUNDFLAG",12) == 0) {
307 if (parse_flag(line
, &compoundflag
, "COMPOUNDFLAG")) {
313 /* parse in the flag used by compound words */
314 if (strncmp(line
,"COMPOUNDBEGIN",13) == 0) {
315 if (complexprefixes
) {
316 if (parse_flag(line
, &compoundend
, "COMPOUNDBEGIN")) {
321 if (parse_flag(line
, &compoundbegin
, "COMPOUNDBEGIN")) {
328 /* parse in the flag used by compound words */
329 if (strncmp(line
,"COMPOUNDMIDDLE",14) == 0) {
330 if (parse_flag(line
, &compoundmiddle
, "COMPOUNDMIDDLE")) {
335 /* parse in the flag used by compound words */
336 if (strncmp(line
,"COMPOUNDEND",11) == 0) {
337 if (complexprefixes
) {
338 if (parse_flag(line
, &compoundbegin
, "COMPOUNDEND")) {
343 if (parse_flag(line
, &compoundend
, "COMPOUNDEND")) {
350 /* parse in the data used by compound_check() method */
351 if (strncmp(line
,"COMPOUNDWORDMAX",15) == 0) {
352 if (parse_num(line
, &cpdwordmax
, "COMPOUNDWORDMAX")) {
358 /* parse in the flag sign compounds in dictionary */
359 if (strncmp(line
,"COMPOUNDROOT",12) == 0) {
360 if (parse_flag(line
, &compoundroot
, "COMPOUNDROOT")) {
366 /* parse in the flag used by compound_check() method */
367 if (strncmp(line
,"COMPOUNDPERMITFLAG",18) == 0) {
368 if (parse_flag(line
, &compoundpermitflag
, "COMPOUNDPERMITFLAG")) {
374 /* parse in the flag used by compound_check() method */
375 if (strncmp(line
,"COMPOUNDFORBIDFLAG",18) == 0) {
376 if (parse_flag(line
, &compoundforbidflag
, "COMPOUNDFORBIDFLAG")) {
382 if (strncmp(line
,"CHECKCOMPOUNDDUP",16) == 0) {
383 checkcompounddup
= 1;
386 if (strncmp(line
,"CHECKCOMPOUNDREP",16) == 0) {
387 checkcompoundrep
= 1;
390 if (strncmp(line
,"CHECKCOMPOUNDTRIPLE",19) == 0) {
391 checkcompoundtriple
= 1;
394 if (strncmp(line
,"CHECKCOMPOUNDCASE",17) == 0) {
395 checkcompoundcase
= 1;
398 if (strncmp(line
,"NOSUGGEST",9) == 0) {
399 if (parse_flag(line
, &nosuggest
, "NOSUGGEST")) {
405 /* parse in the flag used by forbidden words */
406 if (strncmp(line
,"FORBIDDENWORD",13) == 0) {
407 if (parse_flag(line
, &forbiddenword
, "FORBIDDENWORD")) {
413 /* parse in the flag used by forbidden words */
414 if (strncmp(line
,"LEMMA_PRESENT",13) == 0) {
415 if (parse_flag(line
, &lemma_present
, "LEMMA_PRESENT")) {
421 /* parse in the flag used by circumfixes */
422 if (strncmp(line
,"CIRCUMFIX",9) == 0) {
423 if (parse_flag(line
, &circumfix
, "CIRCUMFIX")) {
429 /* parse in the flag used by fogemorphemes */
430 if (strncmp(line
,"ONLYINCOMPOUND",14) == 0) {
431 if (parse_flag(line
, &onlyincompound
, "ONLYINCOMPOUND")) {
437 /* parse in the flag used by `pseudoroots' */
438 if (strncmp(line
,"PSEUDOROOT",10) == 0) {
439 if (parse_flag(line
, &pseudoroot
, "PSEUDOROOT")) {
445 /* parse in the flag used by `pseudoroots' */
446 if (strncmp(line
,"NEEDAFFIX",9) == 0) {
447 if (parse_flag(line
, &pseudoroot
, "NEEDAFFIX")) {
453 /* parse in the minimal length for words in compounds */
454 if (strncmp(line
,"COMPOUNDMIN",11) == 0) {
455 if (parse_num(line
, &cpdmin
, "COMPOUNDMIN")) {
459 if (cpdmin
< 1) cpdmin
= 1;
462 /* parse in the max. words and syllables in compounds */
463 if (strncmp(line
,"COMPOUNDSYLLABLE",16) == 0) {
464 if (parse_cpdsyllable(line
)) {
470 /* parse in the flag used by compound_check() method */
471 if (strncmp(line
,"SYLLABLENUM",11) == 0) {
472 if (parse_string(line
, &cpdsyllablenum
, "SYLLABLENUM")) {
478 /* parse in the flag used by the controlled compound words */
479 if (strncmp(line
,"CHECKNUM",8) == 0) {
483 /* parse in the extra word characters */
484 if (strncmp(line
,"WORDCHARS",9) == 0) {
485 if (parse_array(line
, &wordchars
, &wordchars_utf16
, &wordchars_utf16_len
, "WORDCHARS", utf8
)) {
491 /* parse in the ignored characters (for example, Arabic optional diacretics charachters */
492 if (strncmp(line
,"IGNORE",6) == 0) {
493 if (parse_array(line
, &ignorechars
, &ignorechars_utf16
, &ignorechars_utf16_len
, "IGNORE", utf8
)) {
499 /* parse in the typical fault correcting table */
500 if (strncmp(line
,"REP",3) == 0) {
501 if (parse_reptable(line
, afflst
)) {
507 /* parse in the checkcompoundpattern table */
508 if (strncmp(line
,"CHECKCOMPOUNDPATTERN",20) == 0) {
509 if (parse_checkcpdtable(line
, afflst
)) {
515 /* parse in the defcompound table */
516 if (strncmp(line
,"COMPOUNDRULE",12) == 0) {
517 if (parse_defcpdtable(line
, afflst
)) {
523 /* parse in the related character map table */
524 if (strncmp(line
,"MAP",3) == 0) {
525 if (parse_maptable(line
, afflst
)) {
531 /* parse in the word breakpoints table */
532 if (strncmp(line
,"BREAK",5) == 0) {
533 if (parse_breaktable(line
, afflst
)) {
539 /* parse in the language for language specific codes */
540 if (strncmp(line
,"LANG",4) == 0) {
541 if (parse_string(line
, &lang
, "LANG")) {
545 langnum
= get_lang_num(lang
);
548 if (strncmp(line
,"VERSION",7) == 0) {
549 if (parse_string(line
, &version
, "VERSION")) {
555 if (strncmp(line
,"MAXNGRAMSUGS",12) == 0) {
556 if (parse_num(line
, &maxngramsugs
, "MAXNGRAMSUGS")) {
562 if (strncmp(line
,"NOSPLITSUGS",11) == 0) {
566 if (strncmp(line
,"SUGSWITHDOTS",12) == 0) {
570 /* parse in the flag used by forbidden words */
571 if (strncmp(line
,"KEEPCASE",8) == 0) {
572 if (parse_flag(line
, &keepcase
, "KEEPCASE")) {
578 if (strncmp(line
,"CHECKSHARPS",11) == 0) {
582 /* parse this affix: P - prefix, S - suffix */
584 if (strncmp(line
,"PFX",3) == 0) ft
= complexprefixes
? 'S' : 'P';
585 if (strncmp(line
,"SFX",3) == 0) ft
= complexprefixes
? 'P' : 'S';
588 for (int i
= 0; i
< CONTSIZE
; i
++) dupflags
[i
] = 0;
591 if (parse_affix(line
, ft
, afflst
, dupflags
)) {
593 process_pfx_tree_to_list();
594 process_sfx_tree_to_list();
602 // convert affix trees to sorted list
603 process_pfx_tree_to_list();
604 process_sfx_tree_to_list();
606 // now we can speed up performance greatly taking advantage of the
607 // relationship between the affixes and the idea of "subsets".
609 // View each prefix as a potential leading subset of another and view
610 // each suffix (reversed) as a potential trailing subset of another.
612 // To illustrate this relationship if we know the prefix "ab" is found in the
613 // word to examine, only prefixes that "ab" is a leading subset of need be examined.
614 // Furthermore is "ab" is not present then none of the prefixes that "ab" is
615 // is a subset need be examined.
616 // The same argument goes for suffix string that are reversed.
618 // Then to top this off why not examine the first char of the word to quickly
619 // limit the set of prefixes to examine (i.e. the prefixes to examine must
620 // be leading supersets of the first character of the word (if they exist)
622 // To take advantage of this "subset" relationship, we need to add two links
623 // from entry. One to take next if the current prefix is found (call it nexteq)
624 // and one to take next if the current prefix is not found (call it nextne).
626 // Since we have built ordered lists, all that remains is to properly intialize
627 // the nextne and nexteq pointers that relate them
632 // expand wordchars string, based on csutil (for external tokenization)
634 char * enc
= get_encoding();
635 csconv
= get_current_cs(enc
);
641 strcpy(expw
, wordchars
);
645 for (int i
= 0; i
<= 255; i
++) {
646 if ( (csconv
[i
].cupper
!= csconv
[i
].clower
) &&
647 (! strchr(expw
, (char) i
))) {
648 *(expw
+ strlen(expw
) + 1) = '\0';
649 *(expw
+ strlen(expw
)) = (char) i
;
653 wordchars
= mystrdup(expw
);
655 // temporary BREAK definition for German dash handling (OOo issue 64400)
656 if ((langnum
== LANG_de
) && (!breaktable
)) {
657 breaktable
= (char **) malloc(sizeof(char *));
658 if (!breaktable
) return 1;
659 breaktable
[0] = mystrdup("-");
666 // we want to be able to quickly access prefix information
667 // both by prefix flag, and sorted by prefix string itself
668 // so we need to set up two indexes
670 int AffixMgr::build_pfxtree(AffEntry
* pfxptr
)
674 PfxEntry
* ep
= (PfxEntry
*) pfxptr
;
676 // get the right starting points
677 const char * key
= ep
->getKey();
678 const unsigned char flg
= (unsigned char) (ep
->getFlag() & 0x00FF);
680 // first index by flag which must exist
681 ptr
= (PfxEntry
*)pFlag
[flg
];
683 pFlag
[flg
] = (AffEntry
*) ep
;
686 // handle the special case of null affix string
687 if (strlen(key
) == 0) {
688 // always inset them at head of list at element 0
689 ptr
= (PfxEntry
*)pStart
[0];
691 pStart
[0] = (AffEntry
*)ep
;
695 // now handle the normal case
699 unsigned char sp
= *((const unsigned char *)key
);
700 ptr
= (PfxEntry
*)pStart
[sp
];
702 // handle the first insert
704 pStart
[sp
] = (AffEntry
*)ep
;
709 // otherwise use binary tree insertion so that a sorted
710 // list can easily be generated later
714 if (strcmp(ep
->getKey(), ptr
->getKey() ) <= 0) {
715 ptr
= ptr
->getNextEQ();
721 ptr
= ptr
->getNextNE();
731 // we want to be able to quickly access suffix information
732 // both by suffix flag, and sorted by the reverse of the
733 // suffix string itself; so we need to set up two indexes
734 int AffixMgr::build_sfxtree(AffEntry
* sfxptr
)
738 SfxEntry
* ep
= (SfxEntry
*) sfxptr
;
740 /* get the right starting point */
741 const char * key
= ep
->getKey();
742 const unsigned char flg
= (unsigned char) (ep
->getFlag() & 0x00FF);
744 // first index by flag which must exist
745 ptr
= (SfxEntry
*)sFlag
[flg
];
747 sFlag
[flg
] = (AffEntry
*) ep
;
749 // next index by affix string
751 // handle the special case of null affix string
752 if (strlen(key
) == 0) {
753 // always inset them at head of list at element 0
754 ptr
= (SfxEntry
*)sStart
[0];
756 sStart
[0] = (AffEntry
*)ep
;
760 // now handle the normal case
764 unsigned char sp
= *((const unsigned char *)key
);
765 ptr
= (SfxEntry
*)sStart
[sp
];
767 // handle the first insert
769 sStart
[sp
] = (AffEntry
*)ep
;
773 // otherwise use binary tree insertion so that a sorted
774 // list can easily be generated later
778 if (strcmp(ep
->getKey(), ptr
->getKey() ) <= 0) {
779 ptr
= ptr
->getNextEQ();
785 ptr
= ptr
->getNextNE();
795 // convert from binary tree to sorted list
796 int AffixMgr::process_pfx_tree_to_list()
798 for (int i
=1; i
< SETSIZE
; i
++) {
799 pStart
[i
] = process_pfx_in_order(pStart
[i
],NULL
);
805 AffEntry
* AffixMgr::process_pfx_in_order(AffEntry
* ptr
, AffEntry
* nptr
)
808 nptr
= process_pfx_in_order(((PfxEntry
*) ptr
)->getNextNE(), nptr
);
809 ((PfxEntry
*) ptr
)->setNext((PfxEntry
*) nptr
);
810 nptr
= process_pfx_in_order(((PfxEntry
*) ptr
)->getNextEQ(), ptr
);
816 // convert from binary tree to sorted list
817 int AffixMgr:: process_sfx_tree_to_list()
819 for (int i
=1; i
< SETSIZE
; i
++) {
820 sStart
[i
] = process_sfx_in_order(sStart
[i
],NULL
);
825 AffEntry
* AffixMgr::process_sfx_in_order(AffEntry
* ptr
, AffEntry
* nptr
)
828 nptr
= process_sfx_in_order(((SfxEntry
*) ptr
)->getNextNE(), nptr
);
829 ((SfxEntry
*) ptr
)->setNext((SfxEntry
*) nptr
);
830 nptr
= process_sfx_in_order(((SfxEntry
*) ptr
)->getNextEQ(), ptr
);
836 // reinitialize the PfxEntry links NextEQ and NextNE to speed searching
837 // using the idea of leading subsets this time
838 int AffixMgr::process_pfx_order()
842 // loop through each prefix list starting point
843 for (int i
=1; i
< SETSIZE
; i
++) {
845 ptr
= (PfxEntry
*)pStart
[i
];
847 // look through the remainder of the list
848 // and find next entry with affix that
849 // the current one is not a subset of
850 // mark that as destination for NextNE
851 // use next in list that you are a subset
854 for (; ptr
!= NULL
; ptr
= ptr
->getNext()) {
856 PfxEntry
* nptr
= ptr
->getNext();
857 for (; nptr
!= NULL
; nptr
= nptr
->getNext()) {
858 if (! isSubset( ptr
->getKey() , nptr
->getKey() )) break;
860 ptr
->setNextNE(nptr
);
861 ptr
->setNextEQ(NULL
);
862 if ((ptr
->getNext()) && isSubset(ptr
->getKey() , (ptr
->getNext())->getKey()))
863 ptr
->setNextEQ(ptr
->getNext());
866 // now clean up by adding smart search termination strings:
867 // if you are already a superset of the previous prefix
868 // but not a subset of the next, search can end here
869 // so set NextNE properly
871 ptr
= (PfxEntry
*) pStart
[i
];
872 for (; ptr
!= NULL
; ptr
= ptr
->getNext()) {
873 PfxEntry
* nptr
= ptr
->getNext();
874 PfxEntry
* mptr
= NULL
;
875 for (; nptr
!= NULL
; nptr
= nptr
->getNext()) {
876 if (! isSubset(ptr
->getKey(),nptr
->getKey())) break;
879 if (mptr
) mptr
->setNextNE(NULL
);
885 // initialize the SfxEntry links NextEQ and NextNE to speed searching
886 // using the idea of leading subsets this time
887 int AffixMgr::process_sfx_order()
891 // loop through each prefix list starting point
892 for (int i
=1; i
< SETSIZE
; i
++) {
894 ptr
= (SfxEntry
*) sStart
[i
];
896 // look through the remainder of the list
897 // and find next entry with affix that
898 // the current one is not a subset of
899 // mark that as destination for NextNE
900 // use next in list that you are a subset
903 for (; ptr
!= NULL
; ptr
= ptr
->getNext()) {
904 SfxEntry
* nptr
= ptr
->getNext();
905 for (; nptr
!= NULL
; nptr
= nptr
->getNext()) {
906 if (! isSubset(ptr
->getKey(),nptr
->getKey())) break;
908 ptr
->setNextNE(nptr
);
909 ptr
->setNextEQ(NULL
);
910 if ((ptr
->getNext()) && isSubset(ptr
->getKey(),(ptr
->getNext())->getKey()))
911 ptr
->setNextEQ(ptr
->getNext());
915 // now clean up by adding smart search termination strings:
916 // if you are already a superset of the previous suffix
917 // but not a subset of the next, search can end here
918 // so set NextNE properly
920 ptr
= (SfxEntry
*) sStart
[i
];
921 for (; ptr
!= NULL
; ptr
= ptr
->getNext()) {
922 SfxEntry
* nptr
= ptr
->getNext();
923 SfxEntry
* mptr
= NULL
;
924 for (; nptr
!= NULL
; nptr
= nptr
->getNext()) {
925 if (! isSubset(ptr
->getKey(),nptr
->getKey())) break;
928 if (mptr
) mptr
->setNextNE(NULL
);
936 // takes aff file condition string and creates the
937 // conds array - please see the appendix at the end of the
938 // file affentry.cxx which describes what is going on here
939 // in much more detail
941 int AffixMgr::encodeit(struct affentry
* ptr
, char * cs
)
945 unsigned char mbr
[MAXLNLEN
];
946 w_char wmbr
[MAXLNLEN
];
947 w_char
* wpos
= wmbr
;
949 // now clear the conditions array */
950 for (i
=0;i
<SETSIZE
;i
++) ptr
->conds
.base
[i
] = (unsigned char) 0;
952 // now parse the string to create the conds array */
954 unsigned char neg
= 0; // complement indicator
955 int grp
= 0; // group indicator
956 unsigned char n
= 0; // number of conditions
957 int ec
= 0; // end condition indicator
958 int nm
= 0; // number of member in group
960 // if no condition just return
961 if (strcmp(cs
,".")==0) {
968 c
= *((unsigned char *)(cs
+ i
));
970 // start group indicator
977 if ((grp
== 1) && (c
== '^')) {
982 // end goup indicator
988 // add character of group to list
989 if ((grp
== 1) && (c
!= 0)) {
1004 // set the proper bits in the condition array vals for those chars
1005 for (j
=0;j
<nm
;j
++) {
1006 k
= (unsigned int) mbr
[j
];
1007 ptr
->conds
.base
[k
] = ptr
->conds
.base
[k
] | ((unsigned char)1 << n
);
1010 // complement so set all of them and then unset indicated ones
1011 for (j
=0;j
<SETSIZE
;j
++) ptr
->conds
.base
[j
] = ptr
->conds
.base
[j
] | ((unsigned char)1 << n
);
1012 for (j
=0;j
<nm
;j
++) {
1013 k
= (unsigned int) mbr
[j
];
1014 ptr
->conds
.base
[k
] = ptr
->conds
.base
[k
] & ~((unsigned char)1 << n
);
1021 // not a group so just set the proper bit for this char
1022 // but first handle special case of . inside condition
1024 // wild card character so set them all
1025 for (j
=0;j
<SETSIZE
;j
++) ptr
->conds
.base
[j
] = ptr
->conds
.base
[j
] | ((unsigned char)1 << n
);
1027 ptr
->conds
.base
[(unsigned int) c
] = ptr
->conds
.base
[(unsigned int)c
] | ((unsigned char)1 << n
);
1032 } else { // UTF-8 character set
1034 ptr
->conds
.utf8
.neg
[n
] = neg
;
1036 // set the proper bits in the condition array vals for those chars
1037 for (j
=0;j
<nm
;j
++) {
1038 k
= (unsigned int) mbr
[j
];
1040 u8_u16(wpos
, 1, (char *) mbr
+ j
);
1042 if ((k
& 0xe0) == 0xe0) j
+=2; else j
++; // 3-byte UTF-8 character
1044 ptr
->conds
.utf8
.ascii
[k
] = ptr
->conds
.utf8
.ascii
[k
] | ((unsigned char)1 << n
);
1047 } else { // neg == 1
1048 // complement so set all of them and then unset indicated ones
1049 for (j
=0;j
<(SETSIZE
/2);j
++) ptr
->conds
.utf8
.ascii
[j
] = ptr
->conds
.utf8
.ascii
[j
] | ((unsigned char)1 << n
);
1050 for (j
=0;j
<nm
;j
++) {
1051 k
= (unsigned int) mbr
[j
];
1053 u8_u16(wpos
, 1, (char *) mbr
+ j
);
1055 if ((k
& 0xe0) == 0xe0) j
+=2; else j
++; // 3-byte UTF-8 character
1057 ptr
->conds
.utf8
.ascii
[k
] = ptr
->conds
.utf8
.ascii
[k
] & ~((unsigned char)1 << n
);
1064 ptr
->conds
.utf8
.wlen
[n
] = wpos
- wmbr
;
1065 if ((wpos
- wmbr
) != 0) {
1066 ptr
->conds
.utf8
.wchars
[n
] = (w_char
*) malloc(sizeof(w_char
) * (wpos
- wmbr
));
1067 if (!ptr
->conds
.utf8
.wchars
[n
]) return 1;
1068 memcpy(ptr
->conds
.utf8
.wchars
[n
], wmbr
, sizeof(w_char
) * (wpos
- wmbr
));
1069 flag_qsort((unsigned short *) ptr
->conds
.utf8
.wchars
[n
], 0, ptr
->conds
.utf8
.wlen
[n
]);
1072 } else { // grp == 0
1073 // is UTF-8 character?
1075 ptr
->conds
.utf8
.wchars
[n
] = (w_char
*) malloc(sizeof(w_char
));
1076 if (!ptr
->conds
.utf8
.wchars
[n
]) return 1;
1077 ptr
->conds
.utf8
.wlen
[n
] = 1;
1078 u8_u16(ptr
->conds
.utf8
.wchars
[n
], 1, cs
+ i
);
1079 if ((c
& 0xe0) == 0xe0) i
+=2; else i
++; // 3-byte UFT-8 character
1081 ptr
->conds
.utf8
.wchars
[n
] = NULL
;
1082 // not a group so just set the proper bit for this char
1083 // but first handle special case of . inside condition
1085 ptr
->conds
.utf8
.all
[n
] = 1;
1086 // wild card character so set them all
1087 for (j
=0;j
<(SETSIZE
/2);j
++) ptr
->conds
.utf8
.ascii
[j
] = ptr
->conds
.utf8
.ascii
[j
] | ((unsigned char)1 << n
);
1089 ptr
->conds
.utf8
.all
[n
] = 0;
1090 ptr
->conds
.utf8
.ascii
[(unsigned int) c
] = ptr
->conds
.utf8
.ascii
[(unsigned int)c
] | ((unsigned char)1 << n
);
1107 // return 1 if s1 is a leading subset of s2
1108 /* inline int AffixMgr::isSubset(const char * s1, const char * s2)
1110 while ((*s1 == *s2) && *s1) {
1114 return (*s1 == '\0');
1118 // return 1 if s1 is a leading subset of s2 (dots are for infixes)
1119 inline int AffixMgr::isSubset(const char * s1
, const char * s2
)
1121 while (((*s1
== *s2
) || (*s1
== '.')) && (*s1
!= '\0')) {
1125 return (*s1
== '\0');
1129 // check word for prefixes
1130 struct hentry
* AffixMgr::prefix_check(const char * word
, int len
, char in_compound
,
1131 const FLAG needflag
)
1133 struct hentry
* rv
= NULL
;
1139 // first handle the special case of 0 length prefixes
1140 PfxEntry
* pe
= (PfxEntry
*) pStart
[0];
1144 ((in_compound
!= IN_CPD_NOT
) || !(pe
->getCont() &&
1145 (TESTAFF(pe
->getCont(), onlyincompound
, pe
->getContLen())))) &&
1146 // permit prefixes in compounds
1147 ((in_compound
!= IN_CPD_END
) || (pe
->getCont() &&
1148 (TESTAFF(pe
->getCont(), compoundpermitflag
, pe
->getContLen()))))
1151 rv
= pe
->checkword(word
, len
, in_compound
, needflag
);
1153 pfx
=(AffEntry
*)pe
; // BUG: pfx not stateless
1160 // now handle the general case
1161 unsigned char sp
= *((const unsigned char *)word
);
1162 PfxEntry
* pptr
= (PfxEntry
*)pStart
[sp
];
1165 if (isSubset(pptr
->getKey(),word
)) {
1168 ((in_compound
!= IN_CPD_NOT
) || !(pptr
->getCont() &&
1169 (TESTAFF(pptr
->getCont(), onlyincompound
, pptr
->getContLen())))) &&
1170 // permit prefixes in compounds
1171 ((in_compound
!= IN_CPD_END
) || (pptr
->getCont() &&
1172 (TESTAFF(pptr
->getCont(), compoundpermitflag
, pptr
->getContLen()))))
1175 rv
= pptr
->checkword(word
, len
, in_compound
, needflag
);
1177 pfx
=(AffEntry
*)pptr
; // BUG: pfx not stateless
1181 pptr
= pptr
->getNextEQ();
1183 pptr
= pptr
->getNextNE();
1190 // check word for prefixes
1191 struct hentry
* AffixMgr::prefix_check_twosfx(const char * word
, int len
,
1192 char in_compound
, const FLAG needflag
)
1194 struct hentry
* rv
= NULL
;
1199 // first handle the special case of 0 length prefixes
1200 PfxEntry
* pe
= (PfxEntry
*) pStart
[0];
1203 rv
= pe
->check_twosfx(word
, len
, in_compound
, needflag
);
1208 // now handle the general case
1209 unsigned char sp
= *((const unsigned char *)word
);
1210 PfxEntry
* pptr
= (PfxEntry
*)pStart
[sp
];
1213 if (isSubset(pptr
->getKey(),word
)) {
1214 rv
= pptr
->check_twosfx(word
, len
, in_compound
, needflag
);
1216 pfx
= (AffEntry
*)pptr
;
1219 pptr
= pptr
->getNextEQ();
1221 pptr
= pptr
->getNextNE();
1228 #ifdef HUNSPELL_EXPERIMENTAL
1229 // check word for prefixes
1230 char * AffixMgr::prefix_check_morph(const char * word
, int len
, char in_compound
,
1231 const FLAG needflag
)
1235 char result
[MAXLNLEN
];
1241 // first handle the special case of 0 length prefixes
1242 PfxEntry
* pe
= (PfxEntry
*) pStart
[0];
1244 st
= pe
->check_morph(word
,len
,in_compound
, needflag
);
1249 // if (rv) return rv;
1253 // now handle the general case
1254 unsigned char sp
= *((const unsigned char *)word
);
1255 PfxEntry
* pptr
= (PfxEntry
*)pStart
[sp
];
1258 if (isSubset(pptr
->getKey(),word
)) {
1259 st
= pptr
->check_morph(word
,len
,in_compound
, needflag
);
1262 if ((in_compound
!= IN_CPD_NOT
) || !((pptr
->getCont() &&
1263 (TESTAFF(pptr
->getCont(), onlyincompound
, pptr
->getContLen()))))) {
1265 pfx
= (AffEntry
*)pptr
;
1269 pptr
= pptr
->getNextEQ();
1271 pptr
= pptr
->getNextNE();
1275 if (*result
) return mystrdup(result
);
1280 // check word for prefixes
1281 char * AffixMgr::prefix_check_twosfx_morph(const char * word
, int len
,
1282 char in_compound
, const FLAG needflag
)
1286 char result
[MAXLNLEN
];
1292 // first handle the special case of 0 length prefixes
1293 PfxEntry
* pe
= (PfxEntry
*) pStart
[0];
1295 st
= pe
->check_twosfx_morph(word
,len
,in_compound
, needflag
);
1303 // now handle the general case
1304 unsigned char sp
= *((const unsigned char *)word
);
1305 PfxEntry
* pptr
= (PfxEntry
*)pStart
[sp
];
1308 if (isSubset(pptr
->getKey(),word
)) {
1309 st
= pptr
->check_twosfx_morph(word
, len
, in_compound
, needflag
);
1313 pfx
= (AffEntry
*)pptr
;
1315 pptr
= pptr
->getNextEQ();
1317 pptr
= pptr
->getNextNE();
1321 if (*result
) return mystrdup(result
);
1324 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1327 // Is word a non compound with a REP substitution (see checkcompoundrep)?
1328 int AffixMgr::cpdrep_check(const char * word
, int wl
)
1330 char candidate
[MAXLNLEN
];
1334 if ((wl
< 2) || !numrep
) return 0;
1336 for (int i
=0; i
< numrep
; i
++ ) {
1338 lenr
= strlen(reptable
[i
].pattern2
);
1339 lenp
= strlen(reptable
[i
].pattern
);
1340 // search every occurence of the pattern in the word
1341 while ((r
=strstr(r
, reptable
[i
].pattern
)) != NULL
) {
1342 strcpy(candidate
, word
);
1343 if (r
-word
+ lenr
+ strlen(r
+lenp
) >= MAXLNLEN
) break;
1344 strcpy(candidate
+(r
-word
),reptable
[i
].pattern2
);
1345 strcpy(candidate
+(r
-word
)+lenr
, r
+lenp
);
1346 if (candidate_check(candidate
,strlen(candidate
))) return 1;
1347 r
++; // search for the next letter
1353 // forbid compoundings when there are special patterns at word bound
1354 int AffixMgr::cpdpat_check(const char * word
, int pos
)
1357 for (int i
= 0; i
< numcheckcpd
; i
++) {
1358 if (isSubset(checkcpdtable
[i
].pattern2
, word
+ pos
) &&
1359 (len
= strlen(checkcpdtable
[i
].pattern
)) && (pos
> len
) &&
1360 (strncmp(word
+ pos
- len
, checkcpdtable
[i
].pattern
, len
) == 0)) return 1;
1365 // forbid compounding with neighbouring upper and lower case characters at word bounds
1366 int AffixMgr::cpdcase_check(const char * word
, int pos
)
1371 u8_u16(&u
, 1, word
+ pos
);
1372 for (p
= word
+ pos
- 1; (*p
& 0xc0) == 0x80; p
--);
1374 unsigned short a
= (u
.h
<< 8) + u
.l
;
1375 unsigned short b
= (w
.h
<< 8) + w
.l
;
1376 if (((unicodetoupper(a
, langnum
) == a
) || (unicodetoupper(b
, langnum
) == b
))) return 1;
1378 unsigned char a
= *(word
+ pos
- 1);
1379 unsigned char b
= *(word
+ pos
);
1380 if ((csconv
[a
].ccase
|| csconv
[b
].ccase
) && (a
!= '-') && (b
!= '-')) return 1;
1385 // check compound patterns
1386 int AffixMgr::defcpd_check(hentry
*** words
, short wnum
, hentry
* rv
, hentry
** def
, char all
)
1388 signed short btpp
[MAXWORDLEN
]; // metacharacter (*, ?) positions for backtracking
1389 signed short btwp
[MAXWORDLEN
]; // word positions for metacharacters
1390 int btnum
[MAXWORDLEN
]; // number of matched characters in metacharacter positions
1399 (*words
)[wnum
] = rv
;
1401 for (i
= 0; i
< numdefcpd
; i
++) {
1402 signed short pp
= 0; // pattern position
1403 signed short wp
= 0; // "words" position
1408 while ((pp
< defcpdtable
[i
].len
) && (wp
<= wnum
)) {
1409 if (((pp
+1) < defcpdtable
[i
].len
) &&
1410 ((defcpdtable
[i
].def
[pp
+1] == '*') || (defcpdtable
[i
].def
[pp
+1] == '?'))) {
1411 int wend
= (defcpdtable
[i
].def
[pp
+1] == '?') ? wp
: wnum
;
1416 while (wp
<= wend
) {
1417 if (!(*words
)[wp
]->alen
||
1418 !TESTAFF((*words
)[wp
]->astr
, defcpdtable
[i
].def
[pp
-2], (*words
)[wp
]->alen
)) {
1424 if (wp
<= wnum
) ok2
= 0;
1425 btnum
[bt
] = wp
- btwp
[bt
];
1426 if (btnum
[bt
] > 0) bt
++;
1430 if (!(*words
)[wp
] || !(*words
)[wp
]->alen
||
1431 !TESTAFF((*words
)[wp
]->astr
, defcpdtable
[i
].def
[pp
], (*words
)[wp
]->alen
)) {
1437 if ((defcpdtable
[i
].len
== pp
) && !(wp
> wnum
)) ok
= 0;
1442 while ((defcpdtable
[i
].len
> r
) && ((r
+1) < defcpdtable
[i
].len
) &&
1443 ((defcpdtable
[i
].def
[r
+1] == '*') || (defcpdtable
[i
].def
[r
+1] == '?'))) r
+=2;
1444 if (defcpdtable
[i
].len
<= r
) return 1;
1451 wp
= btwp
[bt
- 1] + btnum
[bt
- 1];
1452 } while ((btnum
[bt
- 1] < 0) && --bt
);
1455 if (ok
&& ok2
&& (!all
|| (defcpdtable
[i
].len
<= pp
))) return 1;
1456 // check zero ending
1457 while (ok
&& ok2
&& (defcpdtable
[i
].len
> pp
) && ((pp
+1) < defcpdtable
[i
].len
) &&
1458 ((defcpdtable
[i
].def
[pp
+1] == '*') || (defcpdtable
[i
].def
[pp
+1] == '?'))) pp
+=2;
1459 if (ok
&& ok2
&& (defcpdtable
[i
].len
<= pp
)) return 1;
1461 (*words
)[wnum
] = NULL
;
1462 if (w
) *words
= NULL
;
1466 inline int AffixMgr::candidate_check(const char * word
, int len
)
1468 struct hentry
* rv
=NULL
;
1473 // rv = prefix_check(word,len,1);
1474 // if (rv) return 1;
1476 rv
= affix_check(word
,len
);
1481 // calculate number of syllable for compound-checking
1482 short AffixMgr::get_syllable(const char * word
, int wlen
)
1484 if (cpdmaxsyllable
==0) return 0;
1489 for (int i
=0; i
<wlen
; i
++) {
1490 if (strchr(cpdvowels
, word
[i
])) num
++;
1492 } else if (cpdvowels_utf16
) {
1493 w_char w
[MAXWORDUTF8LEN
];
1494 int i
= u8_u16(w
, MAXWORDUTF8LEN
, word
);
1496 if (flag_bsearch((unsigned short *) cpdvowels_utf16
,
1497 ((unsigned short *) w
)[i
- 1], cpdvowels_utf16_len
)) num
++;
1503 // check if compound word is correctly spelled
1504 // hu_mov_rule = spec. Hungarian rule (XXX)
1505 struct hentry
* AffixMgr::compound_check(const char * word
, int len
,
1506 short wordnum
, short numsyllable
, short maxwordnum
, short wnum
, hentry
** words
= NULL
,
1507 char hu_mov_rule
= 0, int * cmpdstemnum
= NULL
, int * cmpdstem
= NULL
, char is_sug
= 0)
1510 short oldnumsyllable
, oldnumsyllable2
, oldwordnum
, oldwordnum2
;
1511 int oldcmpdstemnum
= 0;
1512 struct hentry
* rv
= NULL
;
1513 struct hentry
* rv_first
;
1514 struct hentry
* rwords
[MAXWORDLEN
]; // buffer for COMPOUND pattern checking
1515 char st
[MAXWORDUTF8LEN
+ 4];
1532 for (cmin
= 0, i
= 0; (i
< cpdmin
) && word
[cmin
]; i
++) {
1534 for (; (word
[cmin
] & 0xc0) == 0x80; cmin
++);
1536 for (cmax
= len
, i
= 0; (i
< (cpdmin
- 1)) && cmax
; i
++) {
1538 for (; (word
[cmax
] & 0xc0) == 0x80; cmax
--);
1542 cmax
= len
- cpdmin
+ 1;
1547 for (i
= cmin
; i
< cmax
; i
++) {
1549 oldnumsyllable
= numsyllable
;
1550 oldwordnum
= wordnum
;
1553 // go to end of the UTF-8 character
1555 for (; (st
[i
] & 0xc0) == 0x80; i
++);
1556 if (i
>= cmax
) return NULL
;
1568 rv
= lookup(st
); // perhaps without prefix
1570 // search homonym with compound flag
1571 while ((rv
) && !hu_mov_rule
&&
1572 ((pseudoroot
&& TESTAFF(rv
->astr
, pseudoroot
, rv
->alen
)) ||
1573 !((compoundflag
&& !words
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
1574 (compoundbegin
&& !wordnum
&&
1575 TESTAFF(rv
->astr
, compoundbegin
, rv
->alen
)) ||
1576 (compoundmiddle
&& wordnum
&& !words
&&
1577 TESTAFF(rv
->astr
, compoundmiddle
, rv
->alen
)) ||
1579 ((!words
&& !wordnum
&& defcpd_check(&words
, wnum
, rv
, (hentry
**) &rwords
, 0)) ||
1580 (words
&& defcpd_check(&words
, wnum
, rv
, (hentry
**) &rwords
, 0))))
1582 rv
= rv
->next_homonym
;
1587 !(rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundflag
))) {
1588 if ((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
,
1589 FLAG_NULL
, compoundflag
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) && !hu_mov_rule
&&
1590 ((SfxEntry
*)sfx
)->getCont() &&
1591 ((compoundforbidflag
&& TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundforbidflag
,
1592 ((SfxEntry
*)sfx
)->getContLen())) || (compoundend
&&
1593 TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundend
,
1594 ((SfxEntry
*)sfx
)->getContLen())))) {
1599 (((wordnum
== 0) && compoundbegin
&&
1600 ((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
, FLAG_NULL
, compoundbegin
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) ||
1601 (rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundbegin
)))) ||
1602 ((wordnum
> 0) && compoundmiddle
&&
1603 ((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
, FLAG_NULL
, compoundmiddle
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) ||
1604 (rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundmiddle
)))))
1605 ) checked_prefix
= 1;
1606 // else check forbiddenwords and pseudoroot
1607 } else if (rv
->astr
&& (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
1608 TESTAFF(rv
->astr
, pseudoroot
, rv
->alen
) ||
1609 (is_sug
&& nosuggest
&& TESTAFF(rv
->astr
, nosuggest
, rv
->alen
))
1615 // check non_compound flag in suffix and prefix
1616 if ((rv
) && !hu_mov_rule
&&
1617 ((pfx
&& ((PfxEntry
*)pfx
)->getCont() &&
1618 TESTAFF(((PfxEntry
*)pfx
)->getCont(), compoundforbidflag
,
1619 ((PfxEntry
*)pfx
)->getContLen())) ||
1620 (sfx
&& ((SfxEntry
*)sfx
)->getCont() &&
1621 TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundforbidflag
,
1622 ((SfxEntry
*)sfx
)->getContLen())))) {
1626 // check compoundend flag in suffix and prefix
1627 if ((rv
) && !checked_prefix
&& compoundend
&& !hu_mov_rule
&&
1628 ((pfx
&& ((PfxEntry
*)pfx
)->getCont() &&
1629 TESTAFF(((PfxEntry
*)pfx
)->getCont(), compoundend
,
1630 ((PfxEntry
*)pfx
)->getContLen())) ||
1631 (sfx
&& ((SfxEntry
*)sfx
)->getCont() &&
1632 TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundend
,
1633 ((SfxEntry
*)sfx
)->getContLen())))) {
1637 // check compoundmiddle flag in suffix and prefix
1638 if ((rv
) && !checked_prefix
&& (wordnum
==0) && compoundmiddle
&& !hu_mov_rule
&&
1639 ((pfx
&& ((PfxEntry
*)pfx
)->getCont() &&
1640 TESTAFF(((PfxEntry
*)pfx
)->getCont(), compoundmiddle
,
1641 ((PfxEntry
*)pfx
)->getContLen())) ||
1642 (sfx
&& ((SfxEntry
*)sfx
)->getCont() &&
1643 TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundmiddle
,
1644 ((SfxEntry
*)sfx
)->getContLen())))) {
1648 // check forbiddenwords
1649 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
1650 (is_sug
&& nosuggest
&& TESTAFF(rv
->astr
, nosuggest
, rv
->alen
)))) {
1654 // increment word number, if the second root has a compoundroot flag
1655 if ((rv
) && compoundroot
&&
1656 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
1660 // first word is acceptable in compound words?
1662 ( checked_prefix
|| (words
&& words
[wnum
]) ||
1663 (compoundflag
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
1664 ((oldwordnum
== 0) && compoundbegin
&& TESTAFF(rv
->astr
, compoundbegin
, rv
->alen
)) ||
1665 ((oldwordnum
> 0) && compoundmiddle
&& TESTAFF(rv
->astr
, compoundmiddle
, rv
->alen
))// ||
1668 // LANG_hu section: spec. Hungarian rule
1669 || ((langnum
== LANG_hu
) && hu_mov_rule
&& (
1670 TESTAFF(rv
->astr
, 'F', rv
->alen
) || // XXX hardwired Hungarian dictionary codes
1671 TESTAFF(rv
->astr
, 'G', rv
->alen
) ||
1672 TESTAFF(rv
->astr
, 'H', rv
->alen
)
1675 // END of LANG_hu section
1677 && ! (( checkcompoundtriple
&& // test triple letters
1678 (word
[i
-1]==word
[i
]) && (
1679 ((i
>1) && (word
[i
-1]==word
[i
-2])) ||
1680 ((word
[i
-1]==word
[i
+1])) // may be word[i+1] == '\0'
1684 // test CHECKCOMPOUNDPATTERN
1685 numcheckcpd
&& cpdpat_check(word
, i
)
1688 checkcompoundcase
&& cpdcase_check(word
, i
)
1691 // LANG_hu section: spec. Hungarian rule
1692 || ((!rv
) && (langnum
== LANG_hu
) && hu_mov_rule
&& (rv
= affix_check(st
,i
)) &&
1693 (sfx
&& ((SfxEntry
*)sfx
)->getCont() && ( // XXX hardwired Hungarian dic. codes
1694 TESTAFF(((SfxEntry
*)sfx
)->getCont(), (unsigned short) 'x', ((SfxEntry
*)sfx
)->getContLen()) ||
1695 TESTAFF(((SfxEntry
*)sfx
)->getCont(), (unsigned short) '%', ((SfxEntry
*)sfx
)->getContLen())
1699 // END of LANG_hu section
1702 // LANG_hu section: spec. Hungarian rule
1703 if (langnum
== LANG_hu
) {
1704 // calculate syllable number of the word
1705 numsyllable
+= get_syllable(st
, i
);
1707 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
1708 if (pfx
&& (get_syllable(((PfxEntry
*)pfx
)->getKey(),strlen(((PfxEntry
*)pfx
)->getKey())) > 1)) wordnum
++;
1710 // END of LANG_hu section
1713 if (cmpdstem
) cmpdstem
[*cmpdstemnum
- 1] = i
;
1718 rv
= lookup((word
+i
)); // perhaps without prefix
1720 // search homonym with compound flag
1721 while ((rv
) && ((pseudoroot
&& TESTAFF(rv
->astr
, pseudoroot
, rv
->alen
)) ||
1722 !((compoundflag
&& !words
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
1723 (compoundend
&& !words
&& TESTAFF(rv
->astr
, compoundend
, rv
->alen
)) ||
1724 (numdefcpd
&& words
&& defcpd_check(&words
, wnum
+ 1, rv
, NULL
,1))))) {
1725 rv
= rv
->next_homonym
;
1728 if (rv
&& words
&& words
[wnum
+ 1]) return rv
;
1730 oldnumsyllable2
= numsyllable
;
1731 oldwordnum2
= wordnum
;
1733 // LANG_hu section: spec. Hungarian rule, XXX hardwired dictionary code
1734 if ((rv
) && (langnum
== LANG_hu
) && (TESTAFF(rv
->astr
, 'I', rv
->alen
)) && !(TESTAFF(rv
->astr
, 'J', rv
->alen
))) {
1737 // END of LANG_hu section
1739 // increment word number, if the second root has a compoundroot flag
1740 if ((rv
) && (compoundroot
) &&
1741 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
1745 // check forbiddenwords
1746 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
1747 (is_sug
&& nosuggest
&& TESTAFF(rv
->astr
, nosuggest
, rv
->alen
)))) return NULL
;
1749 // second word is acceptable, as a root?
1750 // hungarian conventions: compounding is acceptable,
1751 // when compound forms consist of 2 words, or if more,
1752 // then the syllable number of root words must be 6, or lesser.
1755 (compoundflag
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
1756 (compoundend
&& TESTAFF(rv
->astr
, compoundend
, rv
->alen
))
1759 ((cpdwordmax
==-1) || (wordnum
+1<cpdwordmax
)) ||
1760 ((cpdmaxsyllable
==0) ||
1761 (numsyllable
+ get_syllable(rv
->word
,rv
->wlen
)<=cpdmaxsyllable
))
1764 (!checkcompounddup
|| (rv
!= rv_first
))
1768 // forbid compound word, if it is a non compound word with typical fault
1769 if (checkcompoundrep
&& cpdrep_check(word
,len
)) return NULL
;
1773 numsyllable
= oldnumsyllable2
;
1774 wordnum
= oldwordnum2
;
1776 // perhaps second word has prefix or/and suffix
1778 sfxflag
= FLAG_NULL
;
1779 rv
= (compoundflag
) ? affix_check((word
+i
),strlen(word
+i
), compoundflag
, IN_CPD_END
) : NULL
;
1780 if (!rv
&& compoundend
) {
1783 rv
= affix_check((word
+i
),strlen(word
+i
), compoundend
, IN_CPD_END
);
1786 if (!rv
&& numdefcpd
&& words
) {
1787 rv
= affix_check((word
+i
),strlen(word
+i
), 0, IN_CPD_END
);
1788 if (rv
&& defcpd_check(&words
, wnum
+ 1, rv
, NULL
, 1)) return rv
;
1791 // check non_compound flag in suffix and prefix
1793 ((pfx
&& ((PfxEntry
*)pfx
)->getCont() &&
1794 TESTAFF(((PfxEntry
*)pfx
)->getCont(), compoundforbidflag
,
1795 ((PfxEntry
*)pfx
)->getContLen())) ||
1796 (sfx
&& ((SfxEntry
*)sfx
)->getCont() &&
1797 TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundforbidflag
,
1798 ((SfxEntry
*)sfx
)->getContLen())))) {
1802 // check forbiddenwords
1803 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
1804 (is_sug
&& nosuggest
&& TESTAFF(rv
->astr
, nosuggest
, rv
->alen
)))) return NULL
;
1806 // pfxappnd = prefix of word+i, or NULL
1807 // calculate syllable number of prefix.
1808 // hungarian convention: when syllable number of prefix is more,
1809 // than 1, the prefix+word counts as two words.
1811 if (langnum
== LANG_hu
) {
1812 // calculate syllable number of the word
1813 numsyllable
+= get_syllable(word
+ i
, strlen(word
+ i
));
1815 // - affix syllable num.
1816 // XXX only second suffix (inflections, not derivations)
1818 char * tmp
= myrevstrdup(sfxappnd
);
1819 numsyllable
-= get_syllable(tmp
, strlen(tmp
));
1823 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
1824 if (pfx
&& (get_syllable(((PfxEntry
*)pfx
)->getKey(),strlen(((PfxEntry
*)pfx
)->getKey())) > 1)) wordnum
++;
1826 // increment syllable num, if last word has a SYLLABLENUM flag
1827 // and the suffix is beginning `s'
1829 if (cpdsyllablenum
) {
1831 case 'c': { numsyllable
+=2; break; }
1832 case 'J': { numsyllable
+= 1; break; }
1833 case 'I': { if (TESTAFF(rv
->astr
, 'J', rv
->alen
)) numsyllable
+= 1; break; }
1838 // increment word number, if the second word has a compoundroot flag
1839 if ((rv
) && (compoundroot
) &&
1840 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
1844 // second word is acceptable, as a word with prefix or/and suffix?
1845 // hungarian conventions: compounding is acceptable,
1846 // when compound forms consist 2 word, otherwise
1847 // the syllable number of root words is 6, or lesser.
1850 ((cpdwordmax
== -1) || (wordnum
+ 1 < cpdwordmax
)) ||
1851 ((cpdmaxsyllable
== 0) ||
1852 (numsyllable
<= cpdmaxsyllable
))
1855 (!checkcompounddup
|| (rv
!= rv_first
))
1857 // forbid compound word, if it is a non compound word with typical fault
1858 if (checkcompoundrep
&& cpdrep_check(word
, len
)) return NULL
;
1862 numsyllable
= oldnumsyllable2
;
1863 wordnum
= oldwordnum2
;
1865 if (cmpdstemnum
) oldcmpdstemnum
= *cmpdstemnum
;
1867 // perhaps second word is a compound word (recursive call)
1868 if (wordnum
< maxwordnum
) {
1869 rv
= compound_check((word
+i
),strlen(word
+i
), wordnum
+1,
1870 numsyllable
, maxwordnum
, wnum
+ 1, words
,
1871 0, cmpdstemnum
, cmpdstem
, is_sug
);
1876 // forbid compound word, if it is a non compound word with typical fault
1877 if (checkcompoundrep
&& cpdrep_check(word
, len
)) return NULL
;
1881 if (cmpdstemnum
) *cmpdstemnum
= oldcmpdstemnum
;
1886 wordnum
= oldwordnum
;
1887 numsyllable
= oldnumsyllable
;
1893 #ifdef HUNSPELL_EXPERIMENTAL
1894 // check if compound word is correctly spelled
1895 // hu_mov_rule = spec. Hungarian rule (XXX)
1896 int AffixMgr::compound_check_morph(const char * word
, int len
,
1897 short wordnum
, short numsyllable
, short maxwordnum
, short wnum
, hentry
** words
,
1898 char hu_mov_rule
= 0, char ** result
= NULL
, char * partresult
= NULL
)
1901 short oldnumsyllable
, oldnumsyllable2
, oldwordnum
, oldwordnum2
;
1904 struct hentry
* rv
= NULL
;
1905 struct hentry
* rv_first
;
1906 struct hentry
* rwords
[MAXWORDLEN
]; // buffer for COMPOUND pattern checking
1907 char st
[MAXWORDUTF8LEN
+ 4];
1911 char presult
[MAXLNLEN
];
1917 for (cmin
= 0, i
= 0; (i
< cpdmin
) && word
[cmin
]; i
++) {
1919 for (; (word
[cmin
] & 0xc0) == 0x80; cmin
++);
1921 for (cmax
= len
, i
= 0; (i
< (cpdmin
- 1)) && cmax
; i
++) {
1923 for (; (word
[cmax
] & 0xc0) == 0x80; cmax
--);
1927 cmax
= len
- cpdmin
+ 1;
1932 for (i
= cmin
; i
< cmax
; i
++) {
1933 oldnumsyllable
= numsyllable
;
1934 oldwordnum
= wordnum
;
1937 // go to end of the UTF-8 character
1939 for (; (st
[i
] & 0xc0) == 0x80; i
++);
1940 if (i
>= cmax
) return 0;
1949 if (partresult
) strcat(presult
, partresult
);
1951 rv
= lookup(st
); // perhaps without prefix
1953 // search homonym with compound flag
1954 while ((rv
) && !hu_mov_rule
&&
1955 ((pseudoroot
&& TESTAFF(rv
->astr
, pseudoroot
, rv
->alen
)) ||
1956 !((compoundflag
&& !words
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
1957 (compoundbegin
&& !wordnum
&&
1958 TESTAFF(rv
->astr
, compoundbegin
, rv
->alen
)) ||
1959 (compoundmiddle
&& wordnum
&& !words
&&
1960 TESTAFF(rv
->astr
, compoundmiddle
, rv
->alen
)) ||
1962 ((!words
&& !wordnum
&& defcpd_check(&words
, wnum
, rv
, (hentry
**) &rwords
, 0)) ||
1963 (words
&& defcpd_check(&words
, wnum
, rv
, (hentry
**) &rwords
, 0))))
1965 rv
= rv
->next_homonym
;
1969 if (rv
->description
) {
1970 if ((!rv
->astr
) || !TESTAFF(rv
->astr
, lemma_present
, rv
->alen
))
1971 strcat(presult
, st
);
1972 strcat(presult
, rv
->description
);
1978 !(rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundflag
))) {
1979 if ((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
,
1980 FLAG_NULL
, compoundflag
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) && !hu_mov_rule
&&
1981 ((SfxEntry
*)sfx
)->getCont() &&
1982 ((compoundforbidflag
&& TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundforbidflag
,
1983 ((SfxEntry
*)sfx
)->getContLen())) || (compoundend
&&
1984 TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundend
,
1985 ((SfxEntry
*)sfx
)->getContLen())))) {
1991 (((wordnum
== 0) && compoundbegin
&&
1992 ((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
, FLAG_NULL
, compoundbegin
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) ||
1993 (rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundbegin
)))) ||
1994 ((wordnum
> 0) && compoundmiddle
&&
1995 ((rv
= suffix_check(st
, i
, 0, NULL
, NULL
, 0, NULL
, FLAG_NULL
, compoundmiddle
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
)) ||
1996 (rv
= prefix_check(st
, i
, hu_mov_rule
? IN_CPD_OTHER
: IN_CPD_BEGIN
, compoundmiddle
)))))
1998 //char * p = prefix_check_morph(st, i, 0, compound);
2000 if (compoundflag
) p
= affix_check_morph(st
, i
, compoundflag
);
2001 if (!p
|| (*p
== '\0')) {
2002 if ((wordnum
== 0) && compoundbegin
) {
2003 p
= affix_check_morph(st
, i
, compoundbegin
);
2004 } else if ((wordnum
> 0) && compoundmiddle
) {
2005 p
= affix_check_morph(st
, i
, compoundmiddle
);
2010 if (strchr(p
, '\n')) {
2011 strcat(presult
, "(");
2012 strcat(presult
, line_join(p
, '|'));
2013 strcat(presult
, ")");
2018 if (presult
[strlen(presult
) - 1] == '\n') {
2019 presult
[strlen(presult
) - 1] = '\0';
2022 //strcat(presult, "+");
2024 // else check forbiddenwords
2025 } else if (rv
->astr
&& (TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
) ||
2026 TESTAFF(rv
->astr
, pseudoroot
, rv
->alen
))) {
2031 // check non_compound flag in suffix and prefix
2032 if ((rv
) && !hu_mov_rule
&&
2033 ((pfx
&& ((PfxEntry
*)pfx
)->getCont() &&
2034 TESTAFF(((PfxEntry
*)pfx
)->getCont(), compoundforbidflag
,
2035 ((PfxEntry
*)pfx
)->getContLen())) ||
2036 (sfx
&& ((SfxEntry
*)sfx
)->getCont() &&
2037 TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundforbidflag
,
2038 ((SfxEntry
*)sfx
)->getContLen())))) {
2042 // check compoundend flag in suffix and prefix
2043 if ((rv
) && !checked_prefix
&& compoundend
&& !hu_mov_rule
&&
2044 ((pfx
&& ((PfxEntry
*)pfx
)->getCont() &&
2045 TESTAFF(((PfxEntry
*)pfx
)->getCont(), compoundend
,
2046 ((PfxEntry
*)pfx
)->getContLen())) ||
2047 (sfx
&& ((SfxEntry
*)sfx
)->getCont() &&
2048 TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundend
,
2049 ((SfxEntry
*)sfx
)->getContLen())))) {
2053 // check compoundmiddle flag in suffix and prefix
2054 if ((rv
) && !checked_prefix
&& (wordnum
==0) && compoundmiddle
&& !hu_mov_rule
&&
2055 ((pfx
&& ((PfxEntry
*)pfx
)->getCont() &&
2056 TESTAFF(((PfxEntry
*)pfx
)->getCont(), compoundmiddle
,
2057 ((PfxEntry
*)pfx
)->getContLen())) ||
2058 (sfx
&& ((SfxEntry
*)sfx
)->getCont() &&
2059 TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundmiddle
,
2060 ((SfxEntry
*)sfx
)->getContLen())))) {
2064 // check forbiddenwords
2065 if ((rv
) && (rv
->astr
) && TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
)) continue;
2067 // increment word number, if the second root has a compoundroot flag
2068 if ((rv
) && (compoundroot
) &&
2069 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
2073 // first word is acceptable in compound words?
2075 ( checked_prefix
|| (words
&& words
[wnum
]) ||
2076 (compoundflag
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
2077 ((oldwordnum
== 0) && compoundbegin
&& TESTAFF(rv
->astr
, compoundbegin
, rv
->alen
)) ||
2078 ((oldwordnum
> 0) && compoundmiddle
&& TESTAFF(rv
->astr
, compoundmiddle
, rv
->alen
))
2079 // LANG_hu section: spec. Hungarian rule
2080 || ((langnum
== LANG_hu
) && // hu_mov_rule
2082 TESTAFF(rv
->astr
, 'F', rv
->alen
) ||
2083 TESTAFF(rv
->astr
, 'G', rv
->alen
) ||
2084 TESTAFF(rv
->astr
, 'H', rv
->alen
)
2087 // END of LANG_hu section
2089 && ! (( checkcompoundtriple
&& // test triple letters
2090 (word
[i
-1]==word
[i
]) && (
2091 ((i
>1) && (word
[i
-1]==word
[i
-2])) ||
2092 ((word
[i
-1]==word
[i
+1])) // may be word[i+1] == '\0'
2096 // test CHECKCOMPOUNDPATTERN
2097 numcheckcpd
&& cpdpat_check(word
, i
)
2100 checkcompoundcase
&& cpdcase_check(word
, i
)
2103 // LANG_hu section: spec. Hungarian rule
2104 || ((!rv
) && (langnum
== LANG_hu
) && hu_mov_rule
&& (rv
= affix_check(st
,i
)) &&
2105 (sfx
&& ((SfxEntry
*)sfx
)->getCont() && (
2106 TESTAFF(((SfxEntry
*)sfx
)->getCont(), (unsigned short) 'x', ((SfxEntry
*)sfx
)->getContLen()) ||
2107 TESTAFF(((SfxEntry
*)sfx
)->getCont(), (unsigned short) '%', ((SfxEntry
*)sfx
)->getContLen())
2111 // END of LANG_hu section
2114 // LANG_hu section: spec. Hungarian rule
2115 if (langnum
== LANG_hu
) {
2116 // calculate syllable number of the word
2117 numsyllable
+= get_syllable(st
, i
);
2119 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
2120 if (pfx
&& (get_syllable(((PfxEntry
*)pfx
)->getKey(),strlen(((PfxEntry
*)pfx
)->getKey())) > 1)) wordnum
++;
2122 // END of LANG_hu section
2126 rv
= lookup((word
+i
)); // perhaps without prefix
2128 // search homonym with compound flag
2129 while ((rv
) && ((pseudoroot
&& TESTAFF(rv
->astr
, pseudoroot
, rv
->alen
)) ||
2130 !((compoundflag
&& !words
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
2131 (compoundend
&& !words
&& TESTAFF(rv
->astr
, compoundend
, rv
->alen
)) ||
2132 (numdefcpd
&& defcpd_check(&words
, wnum
+ 1, rv
, NULL
,1))))) {
2133 rv
= rv
->next_homonym
;
2136 if (rv
&& words
&& words
[wnum
+ 1]) {
2137 strcat(*result
, presult
);
2138 if (complexprefixes
&& rv
->description
) strcat(*result
, rv
->description
);
2139 if (rv
->description
&& ((!rv
->astr
) ||
2140 !TESTAFF(rv
->astr
, lemma_present
, rv
->alen
)))
2141 strcat(*result
, rv
->word
);
2142 if (!complexprefixes
&& rv
->description
) strcat(*result
, rv
->description
);
2143 strcat(*result
, "\n");
2148 oldnumsyllable2
= numsyllable
;
2149 oldwordnum2
= wordnum
;
2151 // LANG_hu section: spec. Hungarian rule
2152 if ((rv
) && (langnum
== LANG_hu
) && (TESTAFF(rv
->astr
, 'I', rv
->alen
)) && !(TESTAFF(rv
->astr
, 'J', rv
->alen
))) {
2155 // END of LANG_hu section
2156 // increment word number, if the second root has a compoundroot flag
2157 if ((rv
) && (compoundroot
) &&
2158 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
2162 // check forbiddenwords
2163 if ((rv
) && (rv
->astr
) && TESTAFF(rv
->astr
, forbiddenword
, rv
->alen
)) {
2168 // second word is acceptable, as a root?
2169 // hungarian conventions: compounding is acceptable,
2170 // when compound forms consist of 2 words, or if more,
2171 // then the syllable number of root words must be 6, or lesser.
2173 (compoundflag
&& TESTAFF(rv
->astr
, compoundflag
, rv
->alen
)) ||
2174 (compoundend
&& TESTAFF(rv
->astr
, compoundend
, rv
->alen
))
2177 ((cpdwordmax
==-1) || (wordnum
+1<cpdwordmax
)) ||
2178 ((cpdmaxsyllable
==0) ||
2179 (numsyllable
+get_syllable(rv
->word
,rv
->wlen
)<=cpdmaxsyllable
))
2182 (!checkcompounddup
|| (rv
!= rv_first
))
2186 // bad compound word
2187 strcat(*result
, presult
);
2189 if (rv
->description
) {
2190 if (complexprefixes
) strcat(*result
, rv
->description
);
2191 if ((!rv
->astr
) || !TESTAFF(rv
->astr
, lemma_present
, rv
->alen
))
2192 strcat(*result
, rv
->word
);
2193 if (!complexprefixes
) strcat(*result
, rv
->description
);
2195 strcat(*result
, "\n");
2199 numsyllable
= oldnumsyllable2
;
2200 wordnum
= oldwordnum2
;
2202 // perhaps second word has prefix or/and suffix
2204 sfxflag
= FLAG_NULL
;
2206 if (compoundflag
) rv
= affix_check((word
+i
),strlen(word
+i
), compoundflag
); else rv
= NULL
;
2208 if (!rv
&& compoundend
) {
2211 rv
= affix_check((word
+i
),strlen(word
+i
), compoundend
);
2214 if (!rv
&& numdefcpd
&& words
) {
2215 rv
= affix_check((word
+i
),strlen(word
+i
), 0, IN_CPD_END
);
2216 if (rv
&& words
&& defcpd_check(&words
, wnum
+ 1, rv
, NULL
, 1)) {
2218 if (compoundflag
) m
= affix_check_morph((word
+i
),strlen(word
+i
), compoundflag
);
2219 if ((!m
|| *m
== '\0') && compoundend
)
2220 m
= affix_check_morph((word
+i
),strlen(word
+i
), compoundend
);
2221 strcat(*result
, presult
);
2224 if (strchr(m
, '\n')) {
2225 strcat(*result
, "(");
2226 strcat(*result
, line_join(m
, '|'));
2227 strcat(*result
, ")");
2233 strcat(*result
, "\n");
2238 // check non_compound flag in suffix and prefix
2240 ((pfx
&& ((PfxEntry
*)pfx
)->getCont() &&
2241 TESTAFF(((PfxEntry
*)pfx
)->getCont(), compoundforbidflag
,
2242 ((PfxEntry
*)pfx
)->getContLen())) ||
2243 (sfx
&& ((SfxEntry
*)sfx
)->getCont() &&
2244 TESTAFF(((SfxEntry
*)sfx
)->getCont(), compoundforbidflag
,
2245 ((SfxEntry
*)sfx
)->getContLen())))) {
2249 // check forbiddenwords
2250 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
,forbiddenword
,rv
->alen
))
2251 && (! TESTAFF(rv
->astr
, pseudoroot
, rv
->alen
))) {
2256 if (langnum
== LANG_hu
) {
2257 // calculate syllable number of the word
2258 numsyllable
+= get_syllable(word
+ i
, strlen(word
+ i
));
2260 // - affix syllable num.
2261 // XXX only second suffix (inflections, not derivations)
2263 char * tmp
= myrevstrdup(sfxappnd
);
2264 numsyllable
-= get_syllable(tmp
, strlen(tmp
));
2268 // + 1 word, if syllable number of the prefix > 1 (hungarian convention)
2269 if (pfx
&& (get_syllable(((PfxEntry
*)pfx
)->getKey(),strlen(((PfxEntry
*)pfx
)->getKey())) > 1)) wordnum
++;
2271 // increment syllable num, if last word has a SYLLABLENUM flag
2272 // and the suffix is beginning `s'
2274 if (cpdsyllablenum
) {
2276 case 'c': { numsyllable
+=2; break; }
2277 case 'J': { numsyllable
+= 1; break; }
2278 case 'I': { if (rv
&& TESTAFF(rv
->astr
, 'J', rv
->alen
)) numsyllable
+= 1; break; }
2283 // increment word number, if the second word has a compoundroot flag
2284 if ((rv
) && (compoundroot
) &&
2285 (TESTAFF(rv
->astr
, compoundroot
, rv
->alen
))) {
2288 // second word is acceptable, as a word with prefix or/and suffix?
2289 // hungarian conventions: compounding is acceptable,
2290 // when compound forms consist 2 word, otherwise
2291 // the syllable number of root words is 6, or lesser.
2294 ((cpdwordmax
==-1) || (wordnum
+1<cpdwordmax
)) ||
2295 ((cpdmaxsyllable
==0) ||
2296 (numsyllable
<= cpdmaxsyllable
))
2299 (!checkcompounddup
|| (rv
!= rv_first
))
2302 if (compoundflag
) m
= affix_check_morph((word
+i
),strlen(word
+i
), compoundflag
);
2303 if ((!m
|| *m
== '\0') && compoundend
)
2304 m
= affix_check_morph((word
+i
),strlen(word
+i
), compoundend
);
2305 strcat(*result
, presult
);
2308 if (strchr(m
, '\n')) {
2309 strcat(*result
, "(");
2310 strcat(*result
, line_join(m
, '|'));
2311 strcat(*result
, ")");
2317 strcat(*result
, "\n");
2321 numsyllable
= oldnumsyllable2
;
2322 wordnum
= oldwordnum2
;
2324 // perhaps second word is a compound word (recursive call)
2325 if ((wordnum
< maxwordnum
) && (ok
== 0)) {
2326 compound_check_morph((word
+i
),strlen(word
+i
), wordnum
+1,
2327 numsyllable
, maxwordnum
, wnum
+ 1, words
, 0, result
, presult
);
2333 wordnum
= oldwordnum
;
2334 numsyllable
= oldnumsyllable
;
2338 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
2340 // return 1 if s1 (reversed) is a leading subset of end of s2
2341 /* inline int AffixMgr::isRevSubset(const char * s1, const char * end_of_s2, int len)
2343 while ((len > 0) && *s1 && (*s1 == *end_of_s2)) {
2348 return (*s1 == '\0');
2352 inline int AffixMgr::isRevSubset(const char * s1
, const char * end_of_s2
, int len
)
2354 while ((len
> 0) && (*s1
!= '\0') && ((*s1
== *end_of_s2
) || (*s1
== '.'))) {
2359 return (*s1
== '\0');
2362 // check word for suffixes
2364 struct hentry
* AffixMgr::suffix_check (const char * word
, int len
,
2365 int sfxopts
, AffEntry
* ppfx
, char ** wlst
, int maxSug
, int * ns
,
2366 const FLAG cclass
, const FLAG needflag
, char in_compound
)
2368 struct hentry
* rv
= NULL
;
2369 char result
[MAXLNLEN
];
2371 PfxEntry
* ep
= (PfxEntry
*) ppfx
;
2373 // first handle the special case of 0 length suffixes
2374 SfxEntry
* se
= (SfxEntry
*) sStart
[0];
2377 if (!cclass
|| se
->getCont()) {
2378 // suffixes are not allowed in beginning of compounds
2379 if ((((in_compound
!= IN_CPD_BEGIN
)) || // && !cclass
2380 // except when signed with compoundpermitflag flag
2381 (se
->getCont() && compoundpermitflag
&&
2382 TESTAFF(se
->getCont(),compoundpermitflag
,se
->getContLen()))) && (!circumfix
||
2383 // no circumfix flag in prefix and suffix
2384 ((!ppfx
|| !(ep
->getCont()) || !TESTAFF(ep
->getCont(),
2385 circumfix
, ep
->getContLen())) &&
2386 (!se
->getCont() || !(TESTAFF(se
->getCont(),circumfix
,se
->getContLen())))) ||
2387 // circumfix flag in prefix AND suffix
2388 ((ppfx
&& (ep
->getCont()) && TESTAFF(ep
->getCont(),
2389 circumfix
, ep
->getContLen())) &&
2390 (se
->getCont() && (TESTAFF(se
->getCont(),circumfix
,se
->getContLen()))))) &&
2393 !((se
->getCont() && (TESTAFF(se
->getCont(), onlyincompound
, se
->getContLen()))))) &&
2394 // pseudoroot on prefix or first suffix
2396 !(se
->getCont() && TESTAFF(se
->getCont(), pseudoroot
, se
->getContLen())) ||
2397 (ppfx
&& !((ep
->getCont()) &&
2398 TESTAFF(ep
->getCont(), pseudoroot
,
2402 rv
= se
->checkword(word
,len
, sfxopts
, ppfx
, wlst
, maxSug
, ns
, (FLAG
) cclass
,
2403 needflag
, (in_compound
? 0 : onlyincompound
));
2405 sfx
=(AffEntry
*)se
; // BUG: sfx not stateless
2413 // now handle the general case
2414 unsigned char sp
= *((const unsigned char *)(word
+ len
- 1));
2415 SfxEntry
* sptr
= (SfxEntry
*) sStart
[sp
];
2418 if (isRevSubset(sptr
->getKey(), word
+ len
- 1, len
)
2420 // suffixes are not allowed in beginning of compounds
2421 if ((((in_compound
!= IN_CPD_BEGIN
)) || // && !cclass
2422 // except when signed with compoundpermitflag flag
2423 (sptr
->getCont() && compoundpermitflag
&&
2424 TESTAFF(sptr
->getCont(),compoundpermitflag
,sptr
->getContLen()))) && (!circumfix
||
2425 // no circumfix flag in prefix and suffix
2426 ((!ppfx
|| !(ep
->getCont()) || !TESTAFF(ep
->getCont(),
2427 circumfix
, ep
->getContLen())) &&
2428 (!sptr
->getCont() || !(TESTAFF(sptr
->getCont(),circumfix
,sptr
->getContLen())))) ||
2429 // circumfix flag in prefix AND suffix
2430 ((ppfx
&& (ep
->getCont()) && TESTAFF(ep
->getCont(),
2431 circumfix
, ep
->getContLen())) &&
2432 (sptr
->getCont() && (TESTAFF(sptr
->getCont(),circumfix
,sptr
->getContLen()))))) &&
2435 !((sptr
->getCont() && (TESTAFF(sptr
->getCont(), onlyincompound
, sptr
->getContLen()))))) &&
2436 // pseudoroot on prefix or first suffix
2438 !(sptr
->getCont() && TESTAFF(sptr
->getCont(), pseudoroot
, sptr
->getContLen())) ||
2439 (ppfx
&& !((ep
->getCont()) &&
2440 TESTAFF(ep
->getCont(), pseudoroot
,
2444 rv
= sptr
->checkword(word
,len
, sfxopts
, ppfx
, wlst
,
2445 maxSug
, ns
, cclass
, needflag
, (in_compound
? 0 : onlyincompound
));
2447 sfx
=(AffEntry
*)sptr
; // BUG: sfx not stateless
2448 sfxflag
= sptr
->getFlag(); // BUG: sfxflag not stateless
2449 if (!sptr
->getCont()) sfxappnd
=sptr
->getKey(); // BUG: sfxappnd not stateless
2450 if (cclass
|| sptr
->getCont()) {
2452 derived
= mystrdup(word
);
2454 strcpy(result
, derived
); // XXX check size
2455 strcat(result
, "\n");
2456 strcat(result
, word
);
2458 derived
= mystrdup(result
);
2464 sptr
= sptr
->getNextEQ();
2466 sptr
= sptr
->getNextNE();
2473 // check word for two-level suffixes
2475 struct hentry
* AffixMgr::suffix_check_twosfx(const char * word
, int len
,
2476 int sfxopts
, AffEntry
* ppfx
, const FLAG needflag
)
2478 struct hentry
* rv
= NULL
;
2480 // first handle the special case of 0 length suffixes
2481 SfxEntry
* se
= (SfxEntry
*) sStart
[0];
2483 if (contclasses
[se
->getFlag()])
2485 rv
= se
->check_twosfx(word
,len
, sfxopts
, ppfx
, needflag
);
2491 // now handle the general case
2492 unsigned char sp
= *((const unsigned char *)(word
+ len
- 1));
2493 SfxEntry
* sptr
= (SfxEntry
*) sStart
[sp
];
2496 if (isRevSubset(sptr
->getKey(), word
+ len
- 1, len
)) {
2497 if (contclasses
[sptr
->getFlag()])
2499 rv
= sptr
->check_twosfx(word
,len
, sfxopts
, ppfx
, needflag
);
2501 sfxflag
= sptr
->getFlag(); // BUG: sfxflag not stateless
2502 if (!sptr
->getCont()) sfxappnd
=sptr
->getKey(); // BUG: sfxappnd not stateless
2506 sptr
= sptr
->getNextEQ();
2508 sptr
= sptr
->getNextNE();
2515 #ifdef HUNSPELL_EXPERIMENTAL
2516 char * AffixMgr::suffix_check_twosfx_morph(const char * word
, int len
,
2517 int sfxopts
, AffEntry
* ppfx
, const FLAG needflag
)
2519 char result
[MAXLNLEN
];
2520 char result2
[MAXLNLEN
];
2521 char result3
[MAXLNLEN
];
2529 // first handle the special case of 0 length suffixes
2530 SfxEntry
* se
= (SfxEntry
*) sStart
[0];
2532 if (contclasses
[se
->getFlag()])
2534 st
= se
->check_twosfx_morph(word
,len
, sfxopts
, ppfx
, needflag
);
2537 if (((PfxEntry
*) ppfx
)->getMorph()) strcat(result
, ((PfxEntry
*) ppfx
)->getMorph());
2541 if (se
->getMorph()) strcat(result
, se
->getMorph());
2542 strcat(result
, "\n");
2548 // now handle the general case
2549 unsigned char sp
= *((const unsigned char *)(word
+ len
- 1));
2550 SfxEntry
* sptr
= (SfxEntry
*) sStart
[sp
];
2553 if (isRevSubset(sptr
->getKey(), word
+ len
- 1, len
)) {
2554 if (contclasses
[sptr
->getFlag()])
2556 st
= sptr
->check_twosfx_morph(word
,len
, sfxopts
, ppfx
, needflag
);
2558 sfxflag
= sptr
->getFlag(); // BUG: sfxflag not stateless
2559 if (!sptr
->getCont()) sfxappnd
=sptr
->getKey(); // BUG: sfxappnd not stateless
2560 strcpy(result2
, st
);
2565 unsigned short flag
= sptr
->getFlag();
2566 if (flag_mode
== FLAG_NUM
) {
2567 sprintf(result3
, "<%d>", sptr
->getKey());
2568 } else if (flag_mode
== FLAG_LONG
) {
2569 sprintf(result3
, "<%c%c>", flag
>> 8, (flag
<< 8) >>8);
2570 } else sprintf(result3
, "<%c>", flag
);
2571 strcat(result3
, ":");
2573 if (sptr
->getMorph()) strcat(result3
, sptr
->getMorph());
2574 strlinecat(result2
, result3
);
2575 strcat(result2
, "\n");
2576 strcat(result
, result2
);
2579 sptr
= sptr
->getNextEQ();
2581 sptr
= sptr
->getNextNE();
2584 if (result
) return mystrdup(result
);
2588 char * AffixMgr::suffix_check_morph(const char * word
, int len
,
2589 int sfxopts
, AffEntry
* ppfx
, const FLAG cclass
, const FLAG needflag
, char in_compound
)
2591 char result
[MAXLNLEN
];
2593 struct hentry
* rv
= NULL
;
2597 PfxEntry
* ep
= (PfxEntry
*) ppfx
;
2599 // first handle the special case of 0 length suffixes
2600 SfxEntry
* se
= (SfxEntry
*) sStart
[0];
2602 if (!cclass
|| se
->getCont()) {
2603 // suffixes are not allowed in beginning of compounds
2604 if (((((in_compound
!= IN_CPD_BEGIN
)) || // && !cclass
2605 // except when signed with compoundpermitflag flag
2606 (se
->getCont() && compoundpermitflag
&&
2607 TESTAFF(se
->getCont(),compoundpermitflag
,se
->getContLen()))) && (!circumfix
||
2608 // no circumfix flag in prefix and suffix
2609 ((!ppfx
|| !(ep
->getCont()) || !TESTAFF(ep
->getCont(),
2610 circumfix
, ep
->getContLen())) &&
2611 (!se
->getCont() || !(TESTAFF(se
->getCont(),circumfix
,se
->getContLen())))) ||
2612 // circumfix flag in prefix AND suffix
2613 ((ppfx
&& (ep
->getCont()) && TESTAFF(ep
->getCont(),
2614 circumfix
, ep
->getContLen())) &&
2615 (se
->getCont() && (TESTAFF(se
->getCont(),circumfix
,se
->getContLen()))))) &&
2618 !((se
->getCont() && (TESTAFF(se
->getCont(), onlyincompound
, se
->getContLen()))))) &&
2619 // pseudoroot on prefix or first suffix
2621 !(se
->getCont() && TESTAFF(se
->getCont(), pseudoroot
, se
->getContLen())) ||
2622 (ppfx
&& !((ep
->getCont()) &&
2623 TESTAFF(ep
->getCont(), pseudoroot
,
2627 rv
= se
->checkword(word
,len
, sfxopts
, ppfx
, NULL
, 0, 0, cclass
, needflag
);
2630 if (((PfxEntry
*) ppfx
)->getMorph()) strcat(result
, ((PfxEntry
*) ppfx
)->getMorph());
2632 if (complexprefixes
&& rv
->description
) strcat(result
, rv
->description
);
2633 if (rv
->description
&& ((!rv
->astr
) ||
2634 !TESTAFF(rv
->astr
, lemma_present
, rv
->alen
)))
2635 strcat(result
, rv
->word
);
2636 if (!complexprefixes
&& rv
->description
) strcat(result
, rv
->description
);
2637 if (se
->getMorph()) strcat(result
, se
->getMorph());
2638 strcat(result
, "\n");
2639 rv
= se
->get_next_homonym(rv
, sfxopts
, ppfx
, cclass
, needflag
);
2645 // now handle the general case
2646 unsigned char sp
= *((const unsigned char *)(word
+ len
- 1));
2647 SfxEntry
* sptr
= (SfxEntry
*) sStart
[sp
];
2650 if (isRevSubset(sptr
->getKey(), word
+ len
- 1, len
)
2652 // suffixes are not allowed in beginning of compounds
2653 if (((((in_compound
!= IN_CPD_BEGIN
)) || // && !cclass
2654 // except when signed with compoundpermitflag flag
2655 (sptr
->getCont() && compoundpermitflag
&&
2656 TESTAFF(sptr
->getCont(),compoundpermitflag
,sptr
->getContLen()))) && (!circumfix
||
2657 // no circumfix flag in prefix and suffix
2658 ((!ppfx
|| !(ep
->getCont()) || !TESTAFF(ep
->getCont(),
2659 circumfix
, ep
->getContLen())) &&
2660 (!sptr
->getCont() || !(TESTAFF(sptr
->getCont(),circumfix
,sptr
->getContLen())))) ||
2661 // circumfix flag in prefix AND suffix
2662 ((ppfx
&& (ep
->getCont()) && TESTAFF(ep
->getCont(),
2663 circumfix
, ep
->getContLen())) &&
2664 (sptr
->getCont() && (TESTAFF(sptr
->getCont(),circumfix
,sptr
->getContLen()))))) &&
2667 !((sptr
->getCont() && (TESTAFF(sptr
->getCont(), onlyincompound
, sptr
->getContLen()))))) &&
2668 // pseudoroot on first suffix
2669 (cclass
|| !(sptr
->getCont() &&
2670 TESTAFF(sptr
->getCont(), pseudoroot
, sptr
->getContLen())))
2671 )) rv
= sptr
->checkword(word
,len
, sfxopts
, ppfx
, NULL
, 0, 0, cclass
, needflag
);
2674 if (((PfxEntry
*) ppfx
)->getMorph()) strcat(result
, ((PfxEntry
*) ppfx
)->getMorph());
2676 if (complexprefixes
&& rv
->description
) strcat(result
, rv
->description
);
2677 if (rv
->description
&& ((!rv
->astr
) ||
2678 !TESTAFF(rv
->astr
, lemma_present
, rv
->alen
))) strcat(result
, rv
->word
);
2679 if (!complexprefixes
&& rv
->description
) strcat(result
, rv
->description
);
2681 unsigned short flag
= sptr
->getFlag();
2682 if (flag_mode
== FLAG_NUM
) {
2683 sprintf(result
, "<%d>", sptr
->getKey());
2684 } else if (flag_mode
== FLAG_LONG
) {
2685 sprintf(result
, "<%c%c>", flag
>> 8, (flag
<< 8) >>8);
2686 } else sprintf(result
, "<%c>", flag
);
2687 strcat(result
, ":");
2690 if (sptr
->getMorph()) strcat(result
, sptr
->getMorph());
2691 strcat(result
, "\n");
2692 rv
= sptr
->get_next_homonym(rv
, sfxopts
, ppfx
, cclass
, needflag
);
2694 sptr
= sptr
->getNextEQ();
2696 sptr
= sptr
->getNextNE();
2700 if (*result
) return mystrdup(result
);
2703 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
2706 // check if word with affixes is correctly spelled
2707 struct hentry
* AffixMgr::affix_check (const char * word
, int len
, const FLAG needflag
, char in_compound
)
2709 struct hentry
* rv
= NULL
;
2710 if (derived
) free(derived
);
2713 // check all prefixes (also crossed with suffixes if allowed)
2714 rv
= prefix_check(word
, len
, in_compound
, needflag
);
2717 // if still not found check all suffixes
2718 rv
= suffix_check(word
, len
, 0, NULL
, NULL
, 0, NULL
, FLAG_NULL
, needflag
, in_compound
);
2720 if (havecontclass
) {
2724 // if still not found check all two-level suffixes
2725 rv
= suffix_check_twosfx(word
, len
, 0, NULL
, needflag
);
2727 // if still not found check all two-level suffixes
2728 rv
= prefix_check_twosfx(word
, len
, IN_CPD_NOT
, needflag
);
2733 #ifdef HUNSPELL_EXPERIMENTAL
2734 // check if word with affixes is correctly spelled
2735 char * AffixMgr::affix_check_morph(const char * word
, int len
, const FLAG needflag
, char in_compound
)
2737 char result
[MAXLNLEN
];
2742 // check all prefixes (also crossed with suffixes if allowed)
2743 st
= prefix_check_morph(word
, len
, in_compound
);
2749 // if still not found check all suffixes
2750 st
= suffix_check_morph(word
, len
, 0, NULL
, '\0', needflag
, in_compound
);
2756 if (havecontclass
) {
2759 // if still not found check all two-level suffixes
2760 st
= suffix_check_twosfx_morph(word
, len
, 0, NULL
, needflag
);
2766 // if still not found check all two-level suffixes
2767 st
= prefix_check_twosfx_morph(word
, len
, IN_CPD_NOT
, needflag
);
2774 return mystrdup(result
);
2776 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
2779 int AffixMgr::expand_rootword(struct guessword
* wlst
, int maxn
, const char * ts
,
2780 int wl
, const unsigned short * ap
, unsigned short al
, char * bad
, int badl
)
2785 // first add root word to list
2786 if ((nh
< maxn
) && !(al
&& ((pseudoroot
&& TESTAFF(ap
, pseudoroot
, al
)) ||
2787 (onlyincompound
&& TESTAFF(ap
, onlyincompound
, al
))))) {
2788 wlst
[nh
].word
= mystrdup(ts
);
2789 wlst
[nh
].allow
= (1 == 0);
2794 for (int i
= 0; i
< al
; i
++) {
2795 unsigned short c
= (unsigned short) ap
[i
];
2796 SfxEntry
* sptr
= (SfxEntry
*)sFlag
[c
];
2798 if (!sptr
->getKeyLen() || ((badl
> sptr
->getKeyLen()) &&
2799 (strcmp(sptr
->getAffix(), bad
+ badl
- sptr
->getKeyLen()) == 0)) &&
2800 // check pseudoroot flag
2801 !(sptr
->getCont() && ((pseudoroot
&&
2802 TESTAFF(sptr
->getCont(), pseudoroot
, sptr
->getContLen())) ||
2804 TESTAFF(sptr
->getCont(), circumfix
, sptr
->getContLen())) ||
2806 TESTAFF(sptr
->getCont(), onlyincompound
, sptr
->getContLen()))))
2808 char * newword
= sptr
->add(ts
, wl
);
2811 wlst
[nh
].word
= newword
;
2812 wlst
[nh
].allow
= sptr
->allowCross();
2819 sptr
= (SfxEntry
*)sptr
->getFlgNxt();
2825 // handle cross products of prefixes and suffixes
2826 for (int j
=1;j
<n
;j
++)
2827 if (wlst
[j
].allow
) {
2828 for (int k
= 0; k
< al
; k
++) {
2829 unsigned short c
= (unsigned short) ap
[k
];
2830 PfxEntry
* cptr
= (PfxEntry
*) pFlag
[c
];
2832 if (cptr
->allowCross() && (!cptr
->getKeyLen() || ((badl
> cptr
->getKeyLen()) &&
2833 (strncmp(cptr
->getKey(), bad
, cptr
->getKeyLen()) == 0)))) {
2834 int l1
= strlen(wlst
[j
].word
);
2835 char * newword
= cptr
->add(wlst
[j
].word
, l1
);
2838 wlst
[nh
].word
= newword
;
2839 wlst
[nh
].allow
= cptr
->allowCross();
2846 cptr
= (PfxEntry
*)cptr
->getFlgNxt();
2852 // now handle pure prefixes
2853 for (int m
= 0; m
< al
; m
++) {
2854 unsigned short c
= (unsigned short) ap
[m
];
2855 PfxEntry
* ptr
= (PfxEntry
*) pFlag
[c
];
2857 if (!ptr
->getKeyLen() || ((badl
> ptr
->getKeyLen()) &&
2858 (strncmp(ptr
->getKey(), bad
, ptr
->getKeyLen()) == 0)) &&
2859 // check pseudoroot flag
2860 !(ptr
->getCont() && ((pseudoroot
&&
2861 TESTAFF(ptr
->getCont(), pseudoroot
, ptr
->getContLen())) ||
2863 TESTAFF(ptr
->getCont(), circumfix
, ptr
->getContLen())) ||
2865 TESTAFF(ptr
->getCont(), onlyincompound
, ptr
->getContLen()))))
2867 char * newword
= ptr
->add(ts
, wl
);
2870 wlst
[nh
].word
= newword
;
2871 wlst
[nh
].allow
= ptr
->allowCross();
2878 ptr
= (PfxEntry
*)ptr
->getFlgNxt();
2887 // return length of replacing table
2888 int AffixMgr::get_numrep()
2893 // return replacing table
2894 struct replentry
* AffixMgr::get_reptable()
2896 if (! reptable
) return NULL
;
2900 // return length of character map table
2901 int AffixMgr::get_nummap()
2906 // return character map table
2907 struct mapentry
* AffixMgr::get_maptable()
2909 if (! maptable
) return NULL
;
2913 // return length of word break table
2914 int AffixMgr::get_numbreak()
2919 // return character map table
2920 char ** AffixMgr::get_breaktable()
2922 if (! breaktable
) return NULL
;
2926 // return text encoding of dictionary
2927 char * AffixMgr::get_encoding()
2930 encoding
= mystrdup("ISO8859-1");
2932 return mystrdup(encoding
);
2935 // return text encoding of dictionary
2936 int AffixMgr::get_langnum()
2941 // return double prefix option
2942 int AffixMgr::get_complexprefixes()
2944 return complexprefixes
;
2947 FLAG
AffixMgr::get_keepcase()
2952 int AffixMgr::get_checksharps()
2957 // return the preferred ignore string for suggestions
2958 char * AffixMgr::get_ignore()
2960 if (!ignorechars
) return NULL
;
2961 return mystrdup(ignorechars
);
2964 // return the preferred ignore string for suggestions
2965 unsigned short * AffixMgr::get_ignore_utf16(int * len
)
2967 *len
= ignorechars_utf16_len
;
2968 return ignorechars_utf16
;
2971 // return the preferred try string for suggestions
2972 char * AffixMgr::get_try_string()
2974 if (! trystring
) return NULL
;
2975 return mystrdup(trystring
);
2978 // return the preferred try string for suggestions
2979 const char * AffixMgr::get_wordchars()
2984 unsigned short * AffixMgr::get_wordchars_utf16(int * len
)
2986 *len
= wordchars_utf16_len
;
2987 return wordchars_utf16
;
2990 // is there compounding?
2991 int AffixMgr::get_compound()
2993 return compoundflag
|| compoundbegin
|| numdefcpd
;
2996 // return the compound words control flag
2997 FLAG
AffixMgr::get_compoundflag()
2999 return compoundflag
;
3002 // return the forbidden words control flag
3003 FLAG
AffixMgr::get_forbiddenword()
3005 return forbiddenword
;
3008 // return the forbidden words control flag
3009 FLAG
AffixMgr::get_nosuggest()
3014 // return the forbidden words flag modify flag
3015 FLAG
AffixMgr::get_pseudoroot()
3020 // return the onlyincompound flag
3021 FLAG
AffixMgr::get_onlyincompound()
3023 return onlyincompound
;
3026 // return the compound word signal flag
3027 FLAG
AffixMgr::get_compoundroot()
3029 return compoundroot
;
3032 // return the compound begin signal flag
3033 FLAG
AffixMgr::get_compoundbegin()
3035 return compoundbegin
;
3038 // return the value of checknum
3039 int AffixMgr::get_checknum()
3044 // return the value of prefix
3045 const char * AffixMgr::get_prefix()
3047 if (pfx
) return ((PfxEntry
*)pfx
)->getKey();
3051 // return the value of suffix
3052 const char * AffixMgr::get_suffix()
3057 // return the value of derived form (base word with first suffix).
3058 const char * AffixMgr::get_derived()
3063 // return the value of suffix
3064 const char * AffixMgr::get_version()
3069 // return lemma_present flag
3070 FLAG
AffixMgr::get_lemma_present()
3072 return lemma_present
;
3075 // utility method to look up root words in hash table
3076 struct hentry
* AffixMgr::lookup(const char * word
)
3078 if (! pHMgr
) return NULL
;
3079 return pHMgr
->lookup(word
);
3082 // return the value of suffix
3083 const int AffixMgr::have_contclass()
3085 return havecontclass
;
3089 int AffixMgr::get_utf8()
3094 // return nosplitsugs
3095 int AffixMgr::get_maxngramsugs(void)
3097 return maxngramsugs
;
3100 // return nosplitsugs
3101 int AffixMgr::get_nosplitsugs(void)
3106 // return sugswithdots
3107 int AffixMgr::get_sugswithdots(void)
3109 return sugswithdots
;
3113 int AffixMgr::parse_flag(char * line
, unsigned short * out
, const char * name
) {
3115 if (*out
!= FLAG_NULL
) {
3116 HUNSPELL_WARNING(stderr
, "error: duplicate %s line\n", name
);
3119 if (parse_string(line
, &s
, name
)) return 1;
3120 *out
= pHMgr
->decode_flag(s
);
3126 int AffixMgr::parse_num(char * line
, int * out
, const char * name
) {
3129 HUNSPELL_WARNING(stderr
, "error: duplicate %s line\n", name
);
3132 if (parse_string(line
, &s
, name
)) return 1;
3138 /* parse in the max syllablecount of compound words and */
3139 int AffixMgr::parse_cpdsyllable(char * line
)
3145 w_char w
[MAXWORDLEN
];
3146 piece
= mystrsep(&tp
, 0);
3148 if (*piece
!= '\0') {
3150 case 0: { np
++; break; }
3151 case 1: { cpdmaxsyllable
= atoi(piece
); np
++; break; }
3154 cpdvowels
= mystrdup(piece
);
3156 int n
= u8_u16(w
, MAXWORDLEN
, piece
);
3158 flag_qsort((unsigned short *) w
, 0, n
);
3159 cpdvowels_utf16
= (w_char
*) malloc(n
* sizeof(w_char
));
3160 if (!cpdvowels_utf16
) return 1;
3161 memcpy(cpdvowels_utf16
, w
, n
* sizeof(w_char
));
3163 cpdvowels_utf16_len
= n
;
3173 piece
= mystrsep(&tp
, 0);
3176 HUNSPELL_WARNING(stderr
, "error: missing compoundsyllable information\n");
3179 if (np
== 2) cpdvowels
= mystrdup("aeiouAEIOU");
3183 /* parse in the typical fault correcting table */
3184 int AffixMgr::parse_reptable(char * line
, FILE * af
)
3187 HUNSPELL_WARNING(stderr
, "error: duplicate REP tables used\n");
3194 piece
= mystrsep(&tp
, 0);
3196 if (*piece
!= '\0') {
3198 case 0: { np
++; break; }
3200 numrep
= atoi(piece
);
3202 HUNSPELL_WARNING(stderr
, "incorrect number of entries in replacement table\n");
3206 reptable
= (replentry
*) malloc(numrep
* sizeof(struct replentry
));
3207 if (!reptable
) return 1;
3216 piece
= mystrsep(&tp
, 0);
3219 HUNSPELL_WARNING(stderr
, "error: missing replacement table information\n");
3223 /* now parse the numrep lines to read in the remainder of the table */
3225 for (int j
=0; j
< numrep
; j
++) {
3226 if (!fgets(nl
,MAXLNLEN
,af
)) return 1;
3230 reptable
[j
].pattern
= NULL
;
3231 reptable
[j
].pattern2
= NULL
;
3232 piece
= mystrsep(&tp
, 0);
3234 if (*piece
!= '\0') {
3237 if (strncmp(piece
,"REP",3) != 0) {
3238 HUNSPELL_WARNING(stderr
, "error: replacement table is corrupt\n");
3244 case 1: { reptable
[j
].pattern
= mystrrep(mystrdup(piece
),"_"," "); break; }
3245 case 2: { reptable
[j
].pattern2
= mystrrep(mystrdup(piece
),"_"," "); break; }
3251 piece
= mystrsep(&tp
, 0);
3253 if ((!(reptable
[j
].pattern
)) || (!(reptable
[j
].pattern2
))) {
3254 HUNSPELL_WARNING(stderr
, "error: replacement table is corrupt\n");
3261 /* parse in the checkcompoundpattern table */
3262 int AffixMgr::parse_checkcpdtable(char * line
, FILE * af
)
3264 if (numcheckcpd
!= 0) {
3265 HUNSPELL_WARNING(stderr
, "error: duplicate compound pattern tables used\n");
3272 piece
= mystrsep(&tp
, 0);
3274 if (*piece
!= '\0') {
3276 case 0: { np
++; break; }
3278 numcheckcpd
= atoi(piece
);
3279 if (numcheckcpd
< 1) {
3280 HUNSPELL_WARNING(stderr
, "incorrect number of entries in compound pattern table\n");
3284 checkcpdtable
= (replentry
*) malloc(numcheckcpd
* sizeof(struct replentry
));
3285 if (!checkcpdtable
) return 1;
3294 piece
= mystrsep(&tp
, 0);
3297 HUNSPELL_WARNING(stderr
, "error: missing compound pattern table information\n");
3301 /* now parse the numcheckcpd lines to read in the remainder of the table */
3303 for (int j
=0; j
< numcheckcpd
; j
++) {
3304 if (!fgets(nl
,MAXLNLEN
,af
)) return 1;
3308 checkcpdtable
[j
].pattern
= NULL
;
3309 checkcpdtable
[j
].pattern2
= NULL
;
3310 piece
= mystrsep(&tp
, 0);
3312 if (*piece
!= '\0') {
3315 if (strncmp(piece
,"CHECKCOMPOUNDPATTERN",20) != 0) {
3316 HUNSPELL_WARNING(stderr
, "error: compound pattern table is corrupt\n");
3322 case 1: { checkcpdtable
[j
].pattern
= mystrdup(piece
); break; }
3323 case 2: { checkcpdtable
[j
].pattern2
= mystrdup(piece
); break; }
3329 piece
= mystrsep(&tp
, 0);
3331 if ((!(checkcpdtable
[j
].pattern
)) || (!(checkcpdtable
[j
].pattern2
))) {
3332 HUNSPELL_WARNING(stderr
, "error: compound pattern table is corrupt\n");
3339 /* parse in the compound rule table */
3340 int AffixMgr::parse_defcpdtable(char * line
, FILE * af
)
3342 if (numdefcpd
!= 0) {
3343 HUNSPELL_WARNING(stderr
, "error: duplicate compound rule tables used\n");
3350 piece
= mystrsep(&tp
, 0);
3352 if (*piece
!= '\0') {
3354 case 0: { np
++; break; }
3356 numdefcpd
= atoi(piece
);
3357 if (numdefcpd
< 1) {
3358 HUNSPELL_WARNING(stderr
, "incorrect number of entries in compound rule table\n");
3362 defcpdtable
= (flagentry
*) malloc(numdefcpd
* sizeof(flagentry
));
3363 if (!defcpdtable
) return 1;
3372 piece
= mystrsep(&tp
, 0);
3375 HUNSPELL_WARNING(stderr
, "error: missing compound rule table information\n");
3379 /* now parse the numdefcpd lines to read in the remainder of the table */
3381 for (int j
=0; j
< numdefcpd
; j
++) {
3382 if (!fgets(nl
,MAXLNLEN
,af
)) return 1;
3386 defcpdtable
[j
].def
= NULL
;
3387 piece
= mystrsep(&tp
, 0);
3389 if (*piece
!= '\0') {
3392 if (strncmp(piece
, "COMPOUNDRULE", 12) != 0) {
3393 HUNSPELL_WARNING(stderr
, "error: compound rule table is corrupt\n");
3400 defcpdtable
[j
].len
=
3401 pHMgr
->decode_flags(&(defcpdtable
[j
].def
), piece
);
3409 piece
= mystrsep(&tp
, 0);
3411 if (!defcpdtable
[j
].len
) {
3412 HUNSPELL_WARNING(stderr
, "error: compound rule table is corrupt\n");
3420 /* parse in the character map table */
3421 int AffixMgr::parse_maptable(char * line
, FILE * af
)
3424 HUNSPELL_WARNING(stderr
, "error: duplicate MAP tables used\n");
3431 piece
= mystrsep(&tp
, 0);
3433 if (*piece
!= '\0') {
3435 case 0: { np
++; break; }
3437 nummap
= atoi(piece
);
3439 HUNSPELL_WARNING(stderr
, "incorrect number of entries in map table\n");
3443 maptable
= (mapentry
*) malloc(nummap
* sizeof(struct mapentry
));
3444 if (!maptable
) return 1;
3453 piece
= mystrsep(&tp
, 0);
3456 HUNSPELL_WARNING(stderr
, "error: missing map table information\n");
3460 /* now parse the nummap lines to read in the remainder of the table */
3462 for (int j
=0; j
< nummap
; j
++) {
3463 if (!fgets(nl
,MAXLNLEN
,af
)) return 1;
3467 maptable
[j
].set
= NULL
;
3468 maptable
[j
].len
= 0;
3469 piece
= mystrsep(&tp
, 0);
3471 if (*piece
!= '\0') {
3474 if (strncmp(piece
,"MAP",3) != 0) {
3475 HUNSPELL_WARNING(stderr
, "error: map table is corrupt\n");
3482 maptable
[j
].len
= 0;
3483 maptable
[j
].set
= NULL
;
3484 maptable
[j
].set_utf16
= NULL
;
3486 maptable
[j
].set
= mystrdup(piece
);
3487 maptable
[j
].len
= strlen(maptable
[j
].set
);
3489 w_char w
[MAXWORDLEN
];
3490 int n
= u8_u16(w
, MAXWORDLEN
, piece
);
3492 flag_qsort((unsigned short *) w
, 0, n
);
3493 maptable
[j
].set_utf16
= (w_char
*) malloc(n
* sizeof(w_char
));
3494 if (!maptable
[j
].set_utf16
) return 1;
3495 memcpy(maptable
[j
].set_utf16
, w
, n
* sizeof(w_char
));
3497 maptable
[j
].len
= n
;
3505 piece
= mystrsep(&tp
, 0);
3507 if ((!(maptable
[j
].set
|| maptable
[j
].set_utf16
)) || (!(maptable
[j
].len
))) {
3508 HUNSPELL_WARNING(stderr
, "error: map table is corrupt\n");
3515 /* parse in the word breakpoint table */
3516 int AffixMgr::parse_breaktable(char * line
, FILE * af
)
3518 if (numbreak
!= 0) {
3519 HUNSPELL_WARNING(stderr
, "error: duplicate word breakpoint tables used\n");
3526 piece
= mystrsep(&tp
, 0);
3528 if (*piece
!= '\0') {
3530 case 0: { np
++; break; }
3532 numbreak
= atoi(piece
);
3534 HUNSPELL_WARNING(stderr
, "incorrect number of entries in BREAK table\n");
3538 breaktable
= (char **) malloc(numbreak
* sizeof(char *));
3539 if (!breaktable
) return 1;
3548 piece
= mystrsep(&tp
, 0);
3551 HUNSPELL_WARNING(stderr
, "error: missing word breakpoint table information\n");
3555 /* now parse the numbreak lines to read in the remainder of the table */
3557 for (int j
=0; j
< numbreak
; j
++) {
3558 if (!fgets(nl
,MAXLNLEN
,af
)) return 1;
3562 piece
= mystrsep(&tp
, 0);
3564 if (*piece
!= '\0') {
3567 if (strncmp(piece
,"BREAK",5) != 0) {
3568 HUNSPELL_WARNING(stderr
, "error: BREAK table is corrupt\n");
3575 breaktable
[j
] = mystrdup(piece
);
3583 piece
= mystrsep(&tp
, 0);
3586 HUNSPELL_WARNING(stderr
, "error: BREAK table is corrupt\n");
3593 int AffixMgr::parse_affix(char * line
, const char at
, FILE * af
, char * dupflags
)
3595 int numents
= 0; // number of affentry structures to parse
3597 unsigned short aflag
= 0; // affix char identifier
3600 struct affentry
* ptr
= NULL
;
3601 struct affentry
* nptr
= NULL
;
3608 // checking lines with bad syntax
3610 int basefieldnum
= 0;
3613 // split affix header line into pieces
3616 piece
= mystrsep(&tp
, 0);
3618 if (*piece
!= '\0') {
3620 // piece 1 - is type of affix
3621 case 0: { np
++; break; }
3623 // piece 2 - is affix char
3626 aflag
= pHMgr
->decode_flag(piece
);
3627 if (((at
== 'S') && (dupflags
[aflag
] & dupSFX
)) ||
3628 ((at
== 'P') && (dupflags
[aflag
] & dupPFX
))) {
3629 HUNSPELL_WARNING(stderr
, "error: duplicate affix flag %s in line %s\n", piece
, nl
);
3630 // return 1; XXX permissive mode for bad dictionaries
3632 dupflags
[aflag
] += ((at
== 'S') ? dupSFX
: dupPFX
);
3635 // piece 3 - is cross product indicator
3636 case 2: { np
++; if (*piece
== 'Y') ff
= aeXPRODUCT
; break; }
3638 // piece 4 - is number of affentries
3641 numents
= atoi(piece
);
3643 char * err
= pHMgr
->encode_flag(aflag
);
3644 HUNSPELL_WARNING(stderr
, "error: affix %s header has incorrect entry count in line %s\n",
3649 ptr
= (struct affentry
*) malloc(numents
* sizeof(struct affentry
));
3652 if (utf8
) ptr
->opts
+= aeUTF8
;
3653 if (pHMgr
->is_aliasf()) ptr
->opts
+= aeALIASF
;
3654 #ifdef HUNSPELL_EXPERIMENTAL
3655 if (pHMgr
->is_aliasm()) ptr
->opts
+= aeALIASM
;
3665 piece
= mystrsep(&tp
, 0);
3667 // check to make sure we parsed enough pieces
3669 char * err
= pHMgr
->encode_flag(aflag
);
3670 HUNSPELL_WARNING(stderr
, "error: affix %s header has insufficient data in line %s\n", err
, nl
);
3676 // store away ptr to first affentry
3679 // now parse numents affentries for this affix
3680 for (int j
=0; j
< numents
; j
++) {
3681 if (!fgets(nl
,MAXLNLEN
,af
)) return 1;
3687 // split line into pieces
3688 piece
= mystrsep(&tp
, 0);
3690 if (*piece
!= '\0') {
3692 // piece 1 - is type
3695 if (nptr
!= ptr
) nptr
->opts
= ptr
->opts
;
3699 // piece 2 - is affix char
3702 if (pHMgr
->decode_flag(piece
) != aflag
) {
3703 char * err
= pHMgr
->encode_flag(aflag
);
3704 HUNSPELL_WARNING(stderr
, "error: affix %s is corrupt near line %s\n", err
, nl
);
3705 HUNSPELL_WARNING(stderr
, "error: possible incorrect count\n");
3711 if (nptr
!= ptr
) nptr
->aflag
= ptr
->aflag
;
3715 // piece 3 - is string to strip or 0 for null
3718 if (complexprefixes
) {
3719 if (utf8
) reverseword_utf(piece
); else reverseword(piece
);
3721 nptr
->strip
= mystrdup(piece
);
3722 nptr
->stripl
= (unsigned char) strlen(nptr
->strip
);
3723 if (strcmp(nptr
->strip
,"0") == 0) {
3725 nptr
->strip
=mystrdup("");
3731 // piece 4 - is affix string or 0 for null
3734 #ifdef HUNSPELL_EXPERIMENTAL
3735 nptr
->morphcode
= NULL
;
3737 nptr
->contclass
= NULL
;
3738 nptr
->contclasslen
= 0;
3740 dash
= strchr(piece
, '/');
3746 remove_ignored_chars_utf(piece
, ignorechars_utf16
, ignorechars_utf16_len
);
3748 remove_ignored_chars(piece
,ignorechars
);
3752 if (complexprefixes
) {
3753 if (utf8
) reverseword_utf(piece
); else reverseword(piece
);
3755 nptr
->appnd
= mystrdup(piece
);
3757 if (pHMgr
->is_aliasf()) {
3758 int index
= atoi(dash
+ 1);
3759 nptr
->contclasslen
= (unsigned short) pHMgr
->get_aliasf(index
, &(nptr
->contclass
));
3761 nptr
->contclasslen
= (unsigned short) pHMgr
->decode_flags(&(nptr
->contclass
), dash
+ 1);
3762 flag_qsort(nptr
->contclass
, 0, nptr
->contclasslen
);
3767 for (unsigned short _i
= 0; _i
< nptr
->contclasslen
; _i
++) {
3768 contclasses
[(nptr
->contclass
)[_i
]] = 1;
3773 remove_ignored_chars_utf(piece
, ignorechars_utf16
, ignorechars_utf16_len
);
3775 remove_ignored_chars(piece
,ignorechars
);
3779 if (complexprefixes
) {
3780 if (utf8
) reverseword_utf(piece
); else reverseword(piece
);
3782 nptr
->appnd
= mystrdup(piece
);
3785 nptr
->appndl
= (unsigned char) strlen(nptr
->appnd
);
3786 if (strcmp(nptr
->appnd
,"0") == 0) {
3788 nptr
->appnd
=mystrdup("");
3794 // piece 5 - is the conditions descriptions
3797 if (complexprefixes
) {
3799 if (utf8
) reverseword_utf(piece
); else reverseword(piece
);
3800 // reverse condition
3801 for (char * k
= piece
+ strlen(piece
) - 1; k
>= piece
; k
--) {
3804 if (neg
) *(k
+1) = '['; else *k
= ']';
3809 if (neg
) *(k
+1) = '^';
3814 if (*(k
+1) == ']') neg
= 1; else *(k
+1) = *k
;
3818 if (neg
) *(k
+1) = *k
;
3823 if (nptr
->stripl
&& (strcmp(piece
, ".") != 0) &&
3824 redundant_condition(at
, nptr
->strip
, nptr
->stripl
, piece
, nl
))
3826 if (encodeit(nptr
,piece
)) return 1;
3830 #ifdef HUNSPELL_EXPERIMENTAL
3833 if (pHMgr
->is_aliasm()) {
3834 int index
= atoi(piece
);
3835 nptr
->morphcode
= pHMgr
->get_aliasm(index
);
3837 if (complexprefixes
) {
3838 if (utf8
) reverseword_utf(piece
); else reverseword(piece
);
3840 nptr
->morphcode
= mystrdup(piece
);
3851 piece
= mystrsep(&tp
, 0);
3853 // check to make sure we parsed enough pieces
3855 char * err
= pHMgr
->encode_flag(aflag
);
3856 HUNSPELL_WARNING(stderr
, "error: affix %s is corrupt near line %s\n", err
, nl
);
3863 #ifdef HUNSPELL_EXPERIMENTAL
3864 // detect unnecessary fields, excepting comments
3866 int fieldnum
= !(nptr
->morphcode
) ? 5 : ((*(nptr
->morphcode
)=='#') ? 5 : 6);
3867 if (fieldnum
!= basefieldnum
)
3868 HUNSPELL_WARNING(stderr
, "warning: bad field number:\n%s\n", nl
);
3870 basefieldnum
= !(nptr
->morphcode
) ? 5 : ((*(nptr
->morphcode
)=='#') ? 5 : 6);
3877 // now create SfxEntry or PfxEntry objects and use links to
3878 // build an ordered (sorted by affix string) list
3880 for (int k
= 0; k
< numents
; k
++) {
3882 PfxEntry
* pfxptr
= new PfxEntry(this,nptr
);
3883 build_pfxtree((AffEntry
*)pfxptr
);
3885 SfxEntry
* sfxptr
= new SfxEntry(this,nptr
);
3886 build_sfxtree((AffEntry
*)sfxptr
);
3894 int AffixMgr::redundant_condition(char ft
, char * strip
, int stripl
, const char * cond
, char * line
) {
3895 int condl
= strlen(cond
);
3900 if (ft
== 'P') { // prefix
3901 if (strncmp(strip
, cond
, condl
) == 0) return 1;
3904 for (i
= 0, j
= 0; (i
< stripl
) && (j
< condl
); i
++, j
++) {
3905 if (cond
[j
] != '[') {
3906 if (cond
[j
] != strip
[i
]) {
3907 HUNSPELL_WARNING(stderr
, "warning: incompatible stripping characters and condition:\n%s\n", line
);
3910 neg
= (cond
[j
+1] == '^') ? 1 : 0;
3914 if (strip
[i
] == cond
[j
]) in
= 1;
3915 } while ((j
< (condl
- 1)) && (cond
[j
] != ']'));
3916 if (j
== (condl
- 1) && (cond
[j
] != ']')) {
3917 HUNSPELL_WARNING(stderr
, "error: missing ] in condition:\n%s\n", line
);
3920 if ((!neg
&& !in
) || (neg
&& in
)) {
3921 HUNSPELL_WARNING(stderr
, "warning: incompatible stripping characters and condition:\n%s\n", line
);
3926 if (j
>= condl
) return 1;
3929 if ((stripl
>= condl
) && strcmp(strip
+ stripl
- condl
, cond
) == 0) return 1;
3932 for (i
= stripl
- 1, j
= condl
- 1; (i
>= 0) && (j
>= 0); i
--, j
--) {
3933 if (cond
[j
] != ']') {
3934 if (cond
[j
] != strip
[i
]) {
3935 HUNSPELL_WARNING(stderr
, "warning: incompatible stripping characters and condition:\n%s\n", line
);
3941 if (strip
[i
] == cond
[j
]) in
= 1;
3942 } while ((j
> 0) && (cond
[j
] != '['));
3943 if ((j
== 0) && (cond
[j
] != '[')) {
3944 HUNSPELL_WARNING(stderr
, "error: missing ] in condition:\n%s\n", line
);
3947 neg
= (cond
[j
+1] == '^') ? 1 : 0;
3948 if ((!neg
&& !in
) || (neg
&& in
)) {
3949 HUNSPELL_WARNING(stderr
, "warning: incompatible stripping characters and condition:\n%s\n", line
);
3954 if (j
< 0) return 1;