1 #include "license.hunspell"
2 #include "license.myspell"
16 #include "suggestmgr.hxx"
18 #ifndef MOZILLA_CLIENT
25 SuggestMgr::SuggestMgr(const char * tryme
, int maxn
,
29 // register affix manager and check in string of chars to
30 // try when building candidate suggestions
39 maxngramsugs
= MAXNGRAMSUGS
;
45 char * enc
= pAMgr
->get_encoding();
46 csconv
= get_current_cs(enc
);
48 nosplitsugs
= pAMgr
->get_nosplitsugs();
49 if (pAMgr
->get_maxngramsugs() >= 0) maxngramsugs
= pAMgr
->get_maxngramsugs();
50 utf8
= pAMgr
->get_utf8();
51 complexprefixes
= pAMgr
->get_complexprefixes();
57 ctryl
= u8_u16(t
, MAXSWL
, tryme
);
58 ctry_utf
= (w_char
*) malloc(ctryl
* sizeof(w_char
));
59 memcpy(ctry_utf
, t
, ctryl
* sizeof(w_char
));
61 ctry
= mystrdup(tryme
);
68 SuggestMgr::~SuggestMgr()
73 if (ctry_utf
) free(ctry_utf
);
79 int SuggestMgr::testsug(char** wlst
, const char * candidate
, int wl
, int ns
, int cpdsuggest
,
80 int * timer
, time_t * timelimit
) {
82 if (ns
== maxSug
) return maxSug
;
83 for (int k
=0; k
< ns
; k
++) {
84 if (strcmp(candidate
,wlst
[k
]) == 0) cwrd
= 0;
86 if ((cwrd
) && checkword(candidate
, wl
, cpdsuggest
, timer
, timelimit
)) {
87 wlst
[ns
] = mystrdup(candidate
);
88 if (wlst
[ns
] == NULL
) {
89 for (int j
=0; j
<ns
; j
++) free(wlst
[j
]);
97 // generate suggestions for a mispelled word
98 // pass in address of array of char * pointers
100 int SuggestMgr::suggest(char*** slst
, const char * w
, int nsug
)
102 int nocompoundtwowords
= 0;
104 w_char word_utf
[MAXSWL
];
107 char w2
[MAXWORDUTF8LEN
];
108 const char * word
= w
;
110 // word reversing wrapper for complex prefixes
111 if (complexprefixes
) {
113 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
120 wlst
= (char **) malloc(maxSug
* sizeof(char *));
121 if (wlst
== NULL
) return -1;
122 for (int i
= 0; i
< maxSug
; i
++) {
128 wl
= u8_u16(word_utf
, MAXSWL
, word
);
131 for (int cpdsuggest
=0; (cpdsuggest
<2) && (nocompoundtwowords
==0); cpdsuggest
++) {
133 // suggestions for an uppercase word (html -> HTML)
134 if ((nsug
< maxSug
) && (nsug
> -1)) {
135 nsug
= (utf8
) ? capchars_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
136 capchars(wlst
, word
, nsug
, cpdsuggest
);
139 // perhaps we made a typical fault of spelling
140 if ((nsug
< maxSug
) && (nsug
> -1))
141 nsug
= replchars(wlst
, word
, nsug
, cpdsuggest
);
143 // perhaps we made chose the wrong char from a related set
144 if ((nsug
< maxSug
) && (nsug
> -1) && (cpdsuggest
== 0)) {
145 nsug
= mapchars(wlst
, word
, nsug
);
148 // did we swap the order of chars by mistake
149 if ((nsug
< maxSug
) && (nsug
> -1)) {
150 nsug
= (utf8
) ? swapchar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
151 swapchar(wlst
, word
, nsug
, cpdsuggest
);
154 // did we swap the order of non adjacent chars by mistake
155 if ((nsug
< maxSug
) && (nsug
> -1)) {
156 nsug
= (utf8
) ? longswapchar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
157 longswapchar(wlst
, word
, nsug
, cpdsuggest
);
160 // did we forgot a char
161 if ((nsug
< maxSug
) && (nsug
> -1)) {
162 nsug
= (utf8
) ? forgotchar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
163 forgotchar(wlst
, word
, nsug
, cpdsuggest
);
166 // did we move a char
167 if ((nsug
< maxSug
) && (nsug
> -1)) {
168 nsug
= (utf8
) ? movechar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
169 movechar(wlst
, word
, nsug
, cpdsuggest
);
172 // did we add a char that should not be there
173 if ((nsug
< maxSug
) && (nsug
> -1)) {
174 nsug
= (utf8
) ? extrachar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
175 extrachar(wlst
, word
, nsug
, cpdsuggest
);
178 // did we just hit the wrong key in place of a good char
179 if ((nsug
< maxSug
) && (nsug
> -1)) {
180 nsug
= (utf8
) ? badchar_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
181 badchar(wlst
, word
, nsug
, cpdsuggest
);
184 // did we double two characters
185 if ((nsug
< maxSug
) && (nsug
> -1)) {
186 nsug
= (utf8
) ? doubletwochars_utf(wlst
, word_utf
, wl
, nsug
, cpdsuggest
) :
187 doubletwochars(wlst
, word
, nsug
, cpdsuggest
);
191 // only suggest compound words when no other suggestion
192 if ((cpdsuggest
==0) && (nsug
>0)) nocompoundtwowords
=1;
194 // perhaps we forgot to hit space and two words ran together
195 if ((!nosplitsugs
) && (nsug
< maxSug
) && (nsug
> -1)) {
196 nsug
= twowords(wlst
, word
, nsug
, cpdsuggest
);
199 } // repeating ``for'' statement compounding support
202 // we ran out of memory - we should free up as much as possible
203 for (int i
= 0; i
< maxSug
; i
++)
204 if (wlst
[i
] != NULL
) free(wlst
[i
]);
213 // generate suggestions for a word with typical mistake
214 // pass in address of array of char * pointers
215 #ifdef HUNSPELL_EXPERIMENTAL
216 int SuggestMgr::suggest_auto(char*** slst
, const char * w
, int nsug
)
218 int nocompoundtwowords
= 0;
221 char w2
[MAXWORDUTF8LEN
];
222 const char * word
= w
;
224 // word reversing wrapper for complex prefixes
225 if (complexprefixes
) {
227 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
234 wlst
= (char **) malloc(maxSug
* sizeof(char *));
235 if (wlst
== NULL
) return -1;
238 for (int cpdsuggest
=0; (cpdsuggest
<2) && (nocompoundtwowords
==0); cpdsuggest
++) {
240 // perhaps we made a typical fault of spelling
241 if ((nsug
< maxSug
) && (nsug
> -1))
242 nsug
= replchars(wlst
, word
, nsug
, cpdsuggest
);
244 // perhaps we made chose the wrong char from a related set
245 if ((nsug
< maxSug
) && (nsug
> -1) && (cpdsuggest
== 0))
246 nsug
= mapchars(wlst
, word
, nsug
);
248 if ((cpdsuggest
==0) && (nsug
>0)) nocompoundtwowords
=1;
250 // perhaps we forgot to hit space and two words ran together
252 if ((nsug
< maxSug
) && (nsug
> -1) && check_forbidden(word
, strlen(word
))) {
253 nsug
= twowords(wlst
, word
, nsug
, cpdsuggest
);
256 } // repeating ``for'' statement compounding support
259 for (int i
=0;i
<maxSug
; i
++)
260 if (wlst
[i
] != NULL
) free(wlst
[i
]);
268 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
270 // suggestions for an uppercase word (html -> HTML)
271 int SuggestMgr::capchars_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
273 char candidate
[MAXSWUTF8L
];
274 w_char candidate_utf
[MAXSWL
];
275 memcpy(candidate_utf
, word
, wl
* sizeof(w_char
));
276 mkallcap_utf(candidate_utf
, wl
, pAMgr
->get_langnum());
277 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
278 return testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
281 // suggestions for an uppercase word (html -> HTML)
282 int SuggestMgr::capchars(char** wlst
, const char * word
, int ns
, int cpdsuggest
)
284 char candidate
[MAXSWUTF8L
];
285 strcpy(candidate
, word
);
286 mkallcap(candidate
, csconv
);
287 return testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
290 // suggestions for when chose the wrong char out of a related set
291 int SuggestMgr::mapchars(char** wlst
, const char * word
, int ns
)
296 int wl
= strlen(word
);
297 if (wl
< 2 || ! pAMgr
) return ns
;
299 int nummap
= pAMgr
->get_nummap();
300 struct mapentry
* maptable
= pAMgr
->get_maptable();
301 if (maptable
==NULL
) return ns
;
303 timelimit
= time(NULL
);
307 int len
= u8_u16(w
, MAXSWL
, word
);
308 ns
= map_related_utf(w
, len
, 0, wlst
, ns
, maptable
, nummap
, &timer
, &timelimit
);
309 } else ns
= map_related(word
, 0, wlst
, ns
, maptable
, nummap
, &timer
, &timelimit
);
313 int SuggestMgr::map_related(const char * word
, int i
, char** wlst
, int ns
,
314 const mapentry
* maptable
, int nummap
, int * timer
, time_t * timelimit
)
316 char c
= *(word
+ i
);
319 int wl
= strlen(word
);
320 for (int m
=0; m
< ns
; m
++)
321 if (strcmp(word
,wlst
[m
]) == 0) cwrd
= 0;
322 if ((cwrd
) && (checkword(word
, wl
, 0, timer
, timelimit
) ||
323 checkword(word
, wl
, 1, timer
, timelimit
))) {
325 wlst
[ns
] = mystrdup(word
);
326 if (wlst
[ns
] == NULL
) return -1;
333 for (int j
= 0; j
< nummap
; j
++) {
334 if (strchr(maptable
[j
].set
,c
) != 0) {
336 char * newword
= mystrdup(word
);
337 for (int k
= 0; k
< maptable
[j
].len
; k
++) {
338 *(newword
+ i
) = *(maptable
[j
].set
+ k
);
339 ns
= map_related(newword
, (i
+1), wlst
, ns
, maptable
, nummap
, timer
, timelimit
);
340 if (!(*timelimit
)) return ns
;
347 ns
= map_related(word
, i
, wlst
, ns
, maptable
, nummap
, timer
, timelimit
);
352 int SuggestMgr::map_related_utf(w_char
* word
, int len
, int i
, char** wlst
, int ns
,
353 const mapentry
* maptable
, int nummap
, int * timer
, time_t * timelimit
)
359 u16_u8(s
, MAXSWUTF8L
, word
, len
);
361 for (int m
=0; m
< ns
; m
++)
362 if (strcmp(s
,wlst
[m
]) == 0) cwrd
= 0;
363 if ((cwrd
) && (checkword(s
, wl
, 0, timer
, timelimit
) ||
364 checkword(s
, wl
, 1, timer
, timelimit
))) {
366 wlst
[ns
] = mystrdup(s
);
367 if (wlst
[ns
] == NULL
) return -1;
374 unsigned short c
= *((unsigned short *) word
+ i
);
375 for (int j
= 0; j
< nummap
; j
++) {
376 if (flag_bsearch((unsigned short *) maptable
[j
].set_utf16
, c
, maptable
[j
].len
)) {
378 for (int k
= 0; k
< maptable
[j
].len
; k
++) {
379 *(word
+ i
) = *(maptable
[j
].set_utf16
+ k
);
380 ns
= map_related_utf(word
, len
, i
+ 1, wlst
, ns
, maptable
, nummap
, timer
, timelimit
);
381 if (!(*timelimit
)) return ns
;
383 *((unsigned short *) word
+ i
) = c
;
388 ns
= map_related_utf(word
, len
, i
, wlst
, ns
, maptable
, nummap
, timer
, timelimit
);
395 // suggestions for a typical fault of spelling, that
396 // differs with more, than 1 letter from the right form.
397 int SuggestMgr::replchars(char** wlst
, const char * word
, int ns
, int cpdsuggest
)
399 char candidate
[MAXSWUTF8L
];
402 int wl
= strlen(word
);
403 if (wl
< 2 || ! pAMgr
) return ns
;
404 int numrep
= pAMgr
->get_numrep();
405 struct replentry
* reptable
= pAMgr
->get_reptable();
406 if (reptable
==NULL
) return ns
;
407 for (int i
=0; i
< numrep
; i
++ ) {
409 lenr
= strlen(reptable
[i
].pattern2
);
410 lenp
= strlen(reptable
[i
].pattern
);
411 // search every occurence of the pattern in the word
412 while ((r
=strstr(r
, reptable
[i
].pattern
)) != NULL
) {
413 strcpy(candidate
, word
);
414 if (r
-word
+ lenr
+ strlen(r
+lenp
) >= MAXSWUTF8L
) break;
415 strcpy(candidate
+(r
-word
),reptable
[i
].pattern2
);
416 strcpy(candidate
+(r
-word
)+lenr
, r
+lenp
);
417 ns
= testsug(wlst
, candidate
, wl
-lenp
+lenr
, ns
, cpdsuggest
, NULL
, NULL
);
418 if (ns
== -1) return -1;
419 r
++; // search for the next letter
425 // perhaps we doubled two characters (pattern aba -> ababa, for example vacation -> vacacation)
426 int SuggestMgr::doubletwochars(char** wlst
, const char * word
, int ns
, int cpdsuggest
)
428 char candidate
[MAXSWUTF8L
];
430 int wl
= strlen(word
);
431 if (wl
< 5 || ! pAMgr
) return ns
;
432 for (int i
=2; i
< wl
; i
++ ) {
433 if (word
[i
]==word
[i
-2]) {
436 strcpy(candidate
,word
);
437 strcpy(candidate
+i
-1,word
+i
+1);
438 ns
= testsug(wlst
, candidate
, wl
-2, ns
, cpdsuggest
, NULL
, NULL
);
439 if (ns
== -1) return -1;
449 // perhaps we doubled two characters (pattern aba -> ababa, for example vacation -> vacacation)
450 int SuggestMgr::doubletwochars_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
453 w_char candidate_utf
[MAXSWL
];
454 char candidate
[MAXSWUTF8L
];
456 if (wl
< 5 || ! pAMgr
) return ns
;
457 for (int i
=2; i
< wl
; i
++) {
458 if ((word
[i
].l
==word
[i
-2].l
) && (word
[i
].h
==word
[i
-2].h
)) {
461 memcpy(candidate_utf
, word
, (i
- 1) * sizeof(w_char
));
462 memcpy(candidate_utf
+i
-1, word
+i
+1, (wl
-i
-1) * sizeof(w_char
));
463 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
-2);
464 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
465 if (ns
== -1) return -1;
475 // error is wrong char in place of correct one
476 int SuggestMgr::badchar(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
479 char candidate
[MAXSWUTF8L
];
480 time_t timelimit
= time(NULL
);
481 int timer
= MINTIMER
;
482 int wl
= strlen(word
);
483 strcpy(candidate
, word
);
484 // swap out each char one by one and try all the tryme
485 // chars in its place to see if that makes a good word
486 for (int i
=0; i
< wl
; i
++) {
488 for (int j
=0; j
< ctryl
; j
++) {
489 if (ctry
[j
] == tmpc
) continue;
490 candidate
[i
] = ctry
[j
];
491 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, &timer
, &timelimit
);
492 if (ns
== -1) return -1;
493 if (!timelimit
) return ns
;
500 // error is wrong char in place of correct one
501 int SuggestMgr::badchar_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
504 w_char candidate_utf
[MAXSWL
];
505 char candidate
[MAXSWUTF8L
];
506 time_t timelimit
= time(NULL
);
507 int timer
= MINTIMER
;
508 memcpy(candidate_utf
, word
, wl
* sizeof(w_char
));
509 // swap out each char one by one and try all the tryme
510 // chars in its place to see if that makes a good word
511 for (int i
=0; i
< wl
; i
++) {
512 tmpc
= candidate_utf
[i
];
513 for (int j
=0; j
< ctryl
; j
++) {
514 if ((ctry_utf
[j
].l
== tmpc
.l
) && (ctry_utf
[j
].h
== tmpc
.h
)) continue;
515 candidate_utf
[i
] = ctry_utf
[j
];
516 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
517 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, &timer
, &timelimit
);
518 if (ns
== -1) return -1;
519 if (!timelimit
) return ns
;
520 candidate_utf
[i
] = tmpc
;
526 // error is word has an extra letter it does not need
527 int SuggestMgr::extrachar_utf(char** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
529 char candidate
[MAXSWUTF8L
];
530 w_char candidate_utf
[MAXSWL
];
533 if (wl
< 2) return ns
;
534 // try omitting one char of word at a time
535 memcpy(candidate_utf
, word
+ 1, (wl
- 1) * sizeof(w_char
));
536 for (p
= word
, r
= candidate_utf
; p
< word
+ wl
; ) {
537 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
- 1);
538 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
539 if (ns
== -1) return -1;
545 // error is word has an extra letter it does not need
546 int SuggestMgr::extrachar(char** wlst
, const char * word
, int ns
, int cpdsuggest
)
548 char candidate
[MAXSWUTF8L
];
551 int wl
= strlen(word
);
552 if (wl
< 2) return ns
;
553 // try omitting one char of word at a time
554 strcpy (candidate
, word
+ 1);
555 for (p
= word
, r
= candidate
; *p
!= 0; ) {
556 ns
= testsug(wlst
, candidate
, wl
-1, ns
, cpdsuggest
, NULL
, NULL
);
557 if (ns
== -1) return -1;
564 // error is missing a letter it needs
565 int SuggestMgr::forgotchar(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
567 char candidate
[MAXSWUTF8L
];
571 time_t timelimit
= time(NULL
);
572 int timer
= MINTIMER
;
573 int wl
= strlen(word
);
574 // try inserting a tryme character before every letter
575 strcpy(candidate
+ 1, word
);
576 for (p
= word
, q
= candidate
; *p
!= 0; ) {
577 for (int i
= 0; i
< ctryl
; i
++) {
579 ns
= testsug(wlst
, candidate
, wl
+1, ns
, cpdsuggest
, &timer
, &timelimit
);
580 if (ns
== -1) return -1;
581 if (!timelimit
) return ns
;
585 // now try adding one to end */
586 for (int i
= 0; i
< ctryl
; i
++) {
588 ns
= testsug(wlst
, candidate
, wl
+1, ns
, cpdsuggest
, NULL
, NULL
);
589 if (ns
== -1) return -1;
594 // error is missing a letter it needs
595 int SuggestMgr::forgotchar_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
597 w_char candidate_utf
[MAXSWL
];
598 char candidate
[MAXSWUTF8L
];
602 time_t timelimit
= time(NULL
);
603 int timer
= MINTIMER
;
604 // try inserting a tryme character before every letter
605 memcpy (candidate_utf
+ 1, word
, wl
* sizeof(w_char
));
606 for (p
= word
, q
= candidate_utf
; p
< (word
+ wl
); ) {
607 for (int i
= 0; i
< ctryl
; i
++) {
610 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
+ 1);
611 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, &timer
, &timelimit
);
612 if (ns
== -1) return -1;
613 if (!timelimit
) return ns
;
617 // now try adding one to end */
618 for (int i
= 0; i
< ctryl
; i
++) {
621 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
+ 1);
622 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
623 if (ns
== -1) return -1;
629 /* error is should have been two words */
630 int SuggestMgr::twowords(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
632 char candidate
[MAXSWUTF8L
];
639 if (wl
< 3) return ns
;
641 if (pAMgr
->get_langnum() == LANG_hu
) forbidden
= check_forbidden(word
, wl
);
643 strcpy(candidate
+ 1, word
);
645 // split the string into two pieces after every char
646 // if both pieces are good words make them a suggestion
647 for (p
= candidate
+ 1; p
[1] != '\0'; p
++) {
649 // go to end of the UTF-8 character
650 while (utf8
&& ((p
[1] & 0xc0) == 0x80)) {
655 c1
= checkword(candidate
,strlen(candidate
), cpdsuggest
, NULL
, NULL
);
657 c2
= checkword((p
+1),strlen(p
+1), cpdsuggest
, NULL
, NULL
);
661 // spec. Hungarian code (need a better compound word support)
662 if ((pAMgr
->get_langnum() == LANG_hu
) && !forbidden
&&
663 // if 3 repeating letter, use - instead of space
664 (((p
[-1] == p
[1]) && (((p
>candidate
+1) && (p
[-1] == p
[-2])) || (p
[-1] == p
[2]))) ||
665 // or multiple compounding, with more, than 6 syllables
666 ((c1
== 3) && (c2
>= 2)))) *p
= '-';
669 for (int k
=0; k
< ns
; k
++)
670 if (strcmp(candidate
,wlst
[k
]) == 0) cwrd
= 0;
673 wlst
[ns
] = mystrdup(candidate
);
674 if (wlst
[ns
] == NULL
) return -1;
685 // error is adjacent letter were swapped
686 int SuggestMgr::swapchar(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
688 char candidate
[MAXSWUTF8L
];
692 // try swapping adjacent chars one by one
693 strcpy(candidate
, word
);
694 for (p
= candidate
; p
[1] != 0; p
++) {
698 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
699 if (ns
== -1) return -1;
706 // error is adjacent letter were swapped
707 int SuggestMgr::swapchar_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
709 w_char candidate_utf
[MAXSWL
];
710 char candidate
[MAXSWUTF8L
];
713 // try swapping adjacent chars one by one
714 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
715 for (p
= candidate_utf
; p
< (candidate_utf
+ wl
- 1); p
++) {
719 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
720 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
721 if (ns
== -1) return -1;
728 // error is not adjacent letter were swapped
729 int SuggestMgr::longswapchar(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
731 char candidate
[MAXSWUTF8L
];
736 // try swapping not adjacent chars one by one
737 strcpy(candidate
, word
);
738 for (p
= candidate
; *p
!= 0; p
++) {
739 for (q
= candidate
; *q
!= 0; q
++) {
744 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
745 if (ns
== -1) return -1;
755 // error is adjacent letter were swapped
756 int SuggestMgr::longswapchar_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
758 w_char candidate_utf
[MAXSWL
];
759 char candidate
[MAXSWUTF8L
];
763 // try swapping not adjacent chars
764 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
765 for (p
= candidate_utf
; p
< (candidate_utf
+ wl
); p
++) {
766 for (q
= candidate_utf
; q
< (candidate_utf
+ wl
); q
++) {
771 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
772 if (ns
== -1) return -1;
781 // error is a letter was moved
782 int SuggestMgr::movechar(char ** wlst
, const char * word
, int ns
, int cpdsuggest
)
784 char candidate
[MAXSWUTF8L
];
791 strcpy(candidate
, word
);
792 for (p
= candidate
; *p
!= 0; p
++) {
793 for (q
= p
+ 1; (*q
!= 0) && ((q
- p
) < 10); q
++) {
797 if ((q
-p
) < 2) continue; // omit swap char
798 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
799 if (ns
== -1) return -1;
801 strcpy(candidate
, word
);
803 for (p
= candidate
+ wl
- 1; p
> candidate
; p
--) {
804 for (q
= p
- 1; (q
>= candidate
) && ((p
- q
) < 10); q
--) {
808 if ((p
-q
) < 2) continue; // omit swap char
809 ns
= testsug(wlst
, candidate
, strlen(candidate
), ns
, cpdsuggest
, NULL
, NULL
);
810 if (ns
== -1) return -1;
812 strcpy(candidate
, word
);
817 // error is a letter was moved
818 int SuggestMgr::movechar_utf(char ** wlst
, const w_char
* word
, int wl
, int ns
, int cpdsuggest
)
820 w_char candidate_utf
[MAXSWL
];
821 char candidate
[MAXSWUTF8L
];
826 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
827 for (p
= candidate_utf
; p
< (candidate_utf
+ wl
); p
++) {
828 for (q
= p
+ 1; (q
< (candidate_utf
+ wl
)) && ((q
- p
) < 10); q
++) {
832 if ((q
-p
) < 2) continue; // omit swap char
833 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
834 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
835 if (ns
== -1) return -1;
837 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
839 for (p
= candidate_utf
+ wl
- 1; p
> candidate_utf
; p
--) {
840 for (q
= p
- 1; (q
>= candidate_utf
) && ((p
- q
) < 10); q
--) {
844 if ((p
-q
) < 2) continue; // omit swap char
845 u16_u8(candidate
, MAXSWUTF8L
, candidate_utf
, wl
);
846 ns
= testsug(wlst
, candidate
, wl
, ns
, cpdsuggest
, NULL
, NULL
);
847 if (ns
== -1) return -1;
849 memcpy (candidate_utf
, word
, wl
* sizeof(w_char
));
854 // generate a set of suggestions for very poorly spelled words
855 int SuggestMgr::ngsuggest(char** wlst
, char * w
, HashMgr
* pHMgr
)
864 if (!pHMgr
) return 0;
866 // exhaustively search through all root words
867 // keeping track of the MAX_ROOTS most similar root words
868 struct hentry
* roots
[MAX_ROOTS
];
869 int scores
[MAX_ROOTS
];
870 for (i
= 0; i
< MAX_ROOTS
; i
++) {
872 scores
[i
] = -100 * i
;
876 char w2
[MAXWORDUTF8LEN
];
879 // word reversing wrapper for complex prefixes
880 if (complexprefixes
) {
882 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
888 int nc
= strlen(word
);
889 int n
= (utf8
) ? u8_u16(u8
, MAXSWL
, word
) : nc
;
891 // set character based ngram suggestion for words with non-BMP Unicode characters
898 struct hentry
* hp
= NULL
;
900 while ((hp
= pHMgr
->walk_hashtable(col
, hp
))) {
901 if ((hp
->astr
) && (pAMgr
) &&
902 (TESTAFF(hp
->astr
, pAMgr
->get_forbiddenword(), hp
->alen
) ||
903 TESTAFF(hp
->astr
, pAMgr
->get_nosuggest(), hp
->alen
) ||
904 TESTAFF(hp
->astr
, pAMgr
->get_onlyincompound(), hp
->alen
))) continue;
905 sc
= ngram(3, word
, hp
->word
, NGRAM_LONGER_WORSE
);
906 if (sc
> scores
[lp
]) {
910 for (j
=0; j
< MAX_ROOTS
; j
++)
911 if (scores
[j
] < lval
) {
918 // find minimum threshhold for a passable suggestion
919 // mangle original word three differnt ways
920 // and score them to generate a minimum acceptable score
922 for (int sp
= 1; sp
< 4; sp
++) {
924 for (int k
=sp
; k
< n
; k
+=4) *((unsigned short *) u8
+ k
) = '*';
925 u16_u8(mw
, MAXSWUTF8L
, u8
, n
);
926 thresh
= thresh
+ ngram(n
, word
, mw
, NGRAM_ANY_MISMATCH
);
929 for (int k
=sp
; k
< n
; k
+=4) *(mw
+ k
) = '*';
930 thresh
= thresh
+ ngram(n
, word
, mw
, NGRAM_ANY_MISMATCH
);
936 // now expand affixes on each of these root words and
937 // and use length adjusted ngram scores to select
938 // possible suggestions
939 char * guess
[MAX_GUESS
];
940 int gscore
[MAX_GUESS
];
941 for(i
=0;i
<MAX_GUESS
;i
++) {
943 gscore
[i
] = -100 * i
;
948 struct guessword
* glst
;
949 glst
= (struct guessword
*) calloc(MAX_WORDS
,sizeof(struct guessword
));
951 if (nonbmp
) utf8
= 1;
955 for (i
= 0; i
< MAX_ROOTS
; i
++) {
958 struct hentry
* rp
= roots
[i
];
959 int nw
= pAMgr
->expand_rootword(glst
, MAX_WORDS
, rp
->word
, rp
->wlen
,
960 rp
->astr
, rp
->alen
, word
, nc
);
962 for (int k
= 0; k
< nw
; k
++) {
963 sc
= ngram(n
, word
, glst
[k
].word
, NGRAM_ANY_MISMATCH
);
965 if (sc
> gscore
[lp
]) {
966 if (guess
[lp
]) free (guess
[lp
]);
968 guess
[lp
] = glst
[k
].word
;
970 for (j
=0; j
< MAX_GUESS
; j
++)
971 if (gscore
[j
] < lval
) {
975 } else free (glst
[k
].word
);
976 } else free(glst
[k
].word
);
982 // now we are done generating guesses
983 // sort in order of decreasing score
985 bubblesort(&guess
[0], &gscore
[0], MAX_GUESS
);
987 // weight suggestions with a similarity index, based on
988 // the longest common subsequent algorithm and resort
991 for (i
=0; i
< MAX_GUESS
; i
++) {
998 len
= u8_u16(_w
, MAXSWL
, guess
[i
]);
999 mkallsmall_utf(_w
, len
, pAMgr
->get_langnum());
1000 u16_u8(gl
, MAXSWUTF8L
, _w
, len
);
1002 strcpy(gl
, guess
[i
]);
1003 mkallsmall(gl
, csconv
);
1004 len
= strlen(guess
[i
]);
1007 int _lcs
= lcslen(word
, gl
);
1009 // same characters with different casing
1010 if ((n
== len
) && (n
== _lcs
)) {
1015 // heuristic weigthing of ngram scores
1017 // length of longest common subsequent minus lenght difference
1018 2 * _lcs
- abs((int) (n
- len
)) +
1019 // weight equal first letter
1020 equalfirstletter(word
, gl
) +
1021 // weight equal character positions
1022 ((_lcs
== commoncharacterpositions(word
, gl
, &is_swap
)) ? 1: 0) +
1023 // swap character (not neighboring)
1024 ((is_swap
) ? 1000 : 0);
1028 bubblesort(&guess
[0], &gscore
[0], MAX_GUESS
);
1034 for (i
=0; i
< MAX_GUESS
; i
++) {
1036 if ((ns
< maxngramsugs
) && (ns
< maxSug
) && (!same
|| (gscore
[i
] > 1000))) {
1038 // we have excellent suggestion(s)
1039 if (gscore
[i
] > 1000) same
= 1;
1040 for (j
=0; j
< ns
; j
++)
1041 // don't suggest previous suggestions or a previous suggestion with prefixes or affixes
1042 if (strstr(guess
[i
], wlst
[j
]) ||
1043 // check forbidden words
1044 !checkword(guess
[i
], strlen(guess
[i
]), 0, NULL
, NULL
)) unique
= 0;
1045 if (unique
) wlst
[ns
++] = guess
[i
]; else free(guess
[i
]);
1046 } else free(guess
[i
]);
1050 if (nonbmp
) utf8
= 1;
1055 // see if a candidate suggestion is spelled correctly
1056 // needs to check both root words and words with affixes
1058 // obsolote MySpell-HU modifications:
1059 // return value 2 and 3 marks compounding with hyphen (-)
1060 // `3' marks roots without suffix
1061 int SuggestMgr::checkword(const char * word
, int len
, int cpdsuggest
, int * timer
, time_t * timelimit
)
1063 struct hentry
* rv
=NULL
;
1069 if (!(*timer
) && timelimit
) {
1070 if (time(NULL
) > *timelimit
) {
1074 *timer
= MAXPLUSTIMER
;
1079 if (cpdsuggest
==1) {
1080 if (pAMgr
->get_compound()) {
1081 rv
= pAMgr
->compound_check(word
,len
,0,0,0,0,NULL
,0,NULL
,NULL
,1);
1082 if (rv
) return 3; // XXX obsolote categorisation
1087 rv
= pAMgr
->lookup(word
);
1090 if ((rv
->astr
) && (TESTAFF(rv
->astr
,pAMgr
->get_forbiddenword(),rv
->alen
)
1091 || TESTAFF(rv
->astr
,pAMgr
->get_nosuggest(),rv
->alen
))) return 0;
1092 if (rv
->astr
&& (TESTAFF(rv
->astr
,pAMgr
->get_pseudoroot(),rv
->alen
) ||
1093 TESTAFF(rv
->astr
,pAMgr
->get_onlyincompound(),rv
->alen
))) rv
= NULL
;
1094 } else rv
= pAMgr
->prefix_check(word
, len
, 0); // only prefix, and prefix + suffix XXX
1099 rv
= pAMgr
->suffix_check(word
, len
, 0, NULL
, NULL
, 0, NULL
); // only suffix
1102 if (!rv
&& pAMgr
->have_contclass()) {
1103 rv
= pAMgr
->suffix_check_twosfx(word
, len
, 0, NULL
, FLAG_NULL
);
1104 if (!rv
) rv
= pAMgr
->prefix_check_twosfx(word
, len
, 1, FLAG_NULL
);
1107 // check forbidden words
1108 if ((rv
) && (rv
->astr
) && (TESTAFF(rv
->astr
,pAMgr
->get_forbiddenword(),rv
->alen
)
1109 || TESTAFF(rv
->astr
,pAMgr
->get_nosuggest(),rv
->alen
) ||
1110 TESTAFF(rv
->astr
,pAMgr
->get_onlyincompound(),rv
->alen
))) return 0;
1112 if (rv
) { // XXX obsolote
1113 if ((pAMgr
->get_compoundflag()) &&
1114 TESTAFF(rv
->astr
, pAMgr
->get_compoundflag(), rv
->alen
)) return 2 + nosuffix
;
1121 int SuggestMgr::check_forbidden(const char * word
, int len
)
1123 struct hentry
* rv
= NULL
;
1126 rv
= pAMgr
->lookup(word
);
1127 if (rv
&& rv
->astr
&& (TESTAFF(rv
->astr
,pAMgr
->get_pseudoroot(),rv
->alen
) ||
1128 TESTAFF(rv
->astr
,pAMgr
->get_onlyincompound(),rv
->alen
))) rv
= NULL
;
1129 if (!(pAMgr
->prefix_check(word
,len
,1)))
1130 rv
= pAMgr
->suffix_check(word
,len
, 0, NULL
, NULL
, 0, NULL
); // prefix+suffix, suffix
1131 // check forbidden words
1132 if ((rv
) && (rv
->astr
) && TESTAFF(rv
->astr
,pAMgr
->get_forbiddenword(),rv
->alen
)) return 1;
1137 #ifdef HUNSPELL_EXPERIMENTAL
1138 // suggest stems, XXX experimental code
1139 int SuggestMgr::suggest_stems(char*** slst
, const char * w
, int nsug
)
1141 char buf
[MAXSWUTF8L
];
1143 int prevnsug
= nsug
;
1145 char w2
[MAXWORDUTF8LEN
];
1146 const char * word
= w
;
1148 // word reversing wrapper for complex prefixes
1149 if (complexprefixes
) {
1151 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
1158 wlst
= (char **) calloc(maxSug
, sizeof(char *));
1159 if (wlst
== NULL
) return -1;
1161 // perhaps there are a fix stem in the dictionary
1162 if ((nsug
< maxSug
) && (nsug
> -1)) {
1164 nsug
= fixstems(wlst
, word
, nsug
);
1165 if (nsug
== prevnsug
) {
1166 char * s
= mystrdup(word
);
1167 char * p
= s
+ strlen(s
);
1168 while ((*p
!= '-') && (p
!= s
)) p
--;
1171 nsug
= fixstems(wlst
, s
, nsug
);
1172 if ((nsug
== prevnsug
) && (nsug
< maxSug
) && (nsug
>= 0)) {
1175 for (t
= s
; (t
[0] != '\0') && ((t
[0] >= '0') || (t
[0] <= '9')); t
++); // is a number?
1176 if (*t
!= '\0') strcpy(buf
, "# ");
1178 wlst
[nsug
] = mystrdup(buf
);
1179 if (wlst
[nsug
] == NULL
) return -1;
1183 nsug
= fixstems(wlst
, p
, nsug
);
1191 for (int i
=0;i
<maxSug
; i
++)
1192 if (wlst
[i
] != NULL
) free(wlst
[i
]);
1202 // there are fix stems in dictionary
1203 int SuggestMgr::fixstems(char ** wlst
, const char * word
, int ns
)
1205 char buf
[MAXSWUTF8L
];
1206 char prefix
[MAXSWUTF8L
] = "";
1208 int dicstem
= 1; // 0 = lookup, 1= affix, 2 = compound
1210 struct hentry
* rv
= NULL
;
1212 int wl
= strlen(word
);
1214 int cmpdstem
[MAXCOMPOUND
];
1217 rv
= pAMgr
->lookup(word
);
1221 // try stripping off affixes
1222 rv
= pAMgr
->affix_check(word
, wl
);
1224 // else try check compound word
1225 if (!rv
&& pAMgr
->get_compound()) {
1226 rv
= pAMgr
->compound_check(word
, wl
,
1227 0, 0, 100, 0, NULL
, 0, &cmpdstemnum
, cmpdstem
,1);
1231 for (int j
= 0; j
< cmpdstemnum
; j
++) {
1232 cpdindex
+= cmpdstem
[j
];
1234 if(! (pAMgr
->lookup(word
+ cpdindex
)))
1235 pAMgr
->affix_check(word
+ cpdindex
, wl
- cpdindex
); // for prefix
1240 if (pAMgr
->get_prefix()) {
1241 strcpy(prefix
, pAMgr
->get_prefix());
1244 // XXX obsolete, will be a general solution for stemming
1245 if ((prefix
) && (strncmp(prefix
, "leg", 3)==0)) prefix
[0] = '\0'; // (HU)
1252 if ((rv
) && (ns
< maxSug
)) {
1254 // check fixstem flag and not_valid_stem flag
1256 if ((ns
< maxSug
) && (dicstem
< 2)) {
1257 strcpy(buf
, prefix
);
1258 if ((dicstem
> 0) && pAMgr
->get_derived()) {
1260 if (strlen(prefix
) == 1) {
1261 strcat(buf
, (pAMgr
->get_derived()) + 1);
1263 strcat(buf
, pAMgr
->get_derived());
1266 // special stem in affix description
1267 const char * wordchars
= pAMgr
->get_wordchars();
1268 if (rv
->description
&&
1269 (strchr(wordchars
, *(rv
->description
)))) {
1270 char * desc
= (rv
->description
) + 1;
1271 while (strchr(wordchars
, *desc
)) desc
++;
1272 strncat(buf
, rv
->description
, desc
- (rv
->description
));
1274 strcat(buf
, rv
->word
);
1277 wlst
[ns
] = mystrdup(buf
);
1278 if (wlst
[ns
] == NULL
) return -1;
1286 // if (rv->astr && (strchr(rv->astr, '0') == NULL)) {
1289 buf
[cpdindex
] = '\0';
1290 if (prefix
) strcat(buf
, prefix
);
1291 if (pAMgr
->get_derived()) {
1292 strcat(buf
, pAMgr
->get_derived());
1294 // special stem in affix description
1295 const char * wordchars
= pAMgr
->get_wordchars();
1296 if (rv
->description
&&
1297 (strchr(wordchars
, *(rv
->description
)))) {
1298 char * desc
= (rv
->description
) + 1;
1299 while (strchr(wordchars
, *desc
)) desc
++;
1300 strncat(buf
, rv
->description
, desc
- (rv
->description
));
1302 strcat(buf
, rv
->word
);
1306 wlst
[ns
] = mystrdup(buf
);
1307 if (wlst
[ns
] == NULL
) return -1;
1316 // suggest possible stems
1317 int SuggestMgr::suggest_pos_stems(char*** slst
, const char * w
, int nsug
)
1321 struct hentry
* rv
= NULL
;
1323 char w2
[MAXSWUTF8L
];
1324 const char * word
= w
;
1326 // word reversing wrapper for complex prefixes
1327 if (complexprefixes
) {
1329 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
1333 int wl
= strlen(word
);
1339 wlst
= (char **) calloc(maxSug
, sizeof(char *));
1340 if (wlst
== NULL
) return -1;
1343 rv
= pAMgr
->suffix_check(word
, wl
, 0, NULL
, wlst
, maxSug
, &nsug
);
1345 // delete dash from end of word
1347 for (int j
=0; j
< nsug
; j
++) {
1348 if (wlst
[j
][strlen(wlst
[j
]) - 1] == '-') wlst
[j
][strlen(wlst
[j
]) - 1] = '\0';
1357 char * SuggestMgr::suggest_morph(const char * w
)
1359 char result
[MAXLNLEN
];
1360 char * r
= (char *) result
;
1363 struct hentry
* rv
= NULL
;
1367 if (! pAMgr
) return NULL
;
1369 char w2
[MAXSWUTF8L
];
1370 const char * word
= w
;
1372 // word reversing wrapper for complex prefixes
1373 if (complexprefixes
) {
1375 if (utf8
) reverseword_utf(w2
); else reverseword(w2
);
1379 rv
= pAMgr
->lookup(word
);
1382 if ((!rv
->astr
) || !(TESTAFF(rv
->astr
, pAMgr
->get_forbiddenword(), rv
->alen
) ||
1383 TESTAFF(rv
->astr
, pAMgr
->get_pseudoroot(), rv
->alen
) ||
1384 TESTAFF(rv
->astr
,pAMgr
->get_onlyincompound(),rv
->alen
))) {
1385 if (rv
->description
&& ((!rv
->astr
) ||
1386 !TESTAFF(rv
->astr
, pAMgr
->get_lemma_present(), rv
->alen
)))
1387 strcat(result
, word
);
1388 if (rv
->description
) strcat(result
, rv
->description
);
1389 strcat(result
, "\n");
1391 rv
= rv
->next_homonym
;
1394 st
= pAMgr
->affix_check_morph(word
,strlen(word
));
1400 if (pAMgr
->get_compound() && (*result
== '\0'))
1401 pAMgr
->compound_check_morph(word
, strlen(word
),
1402 0, 0, 100, 0,NULL
, 0, &r
, NULL
);
1404 return (*result
) ? mystrdup(line_uniq(delete_zeros(result
))) : NULL
;
1407 char * SuggestMgr::suggest_morph_for_spelling_error(const char * word
)
1410 char ** wlst
= (char **) calloc(maxSug
, sizeof(char *));
1411 // we will use only the first suggestion
1412 for (int i
= 0; i
< maxSug
- 1; i
++) wlst
[i
] = "";
1413 int ns
= suggest(&wlst
, word
, maxSug
- 1);
1415 p
= suggest_morph(wlst
[maxSug
- 1]);
1416 free(wlst
[maxSug
- 1]);
1418 if (wlst
) free(wlst
);
1421 #endif // END OF HUNSPELL_EXPERIMENTAL CODE
1424 // generate an n-gram score comparing s1 and s2
1425 int SuggestMgr::ngram(int n
, char * s1
, const char * s2
, int uselen
)
1435 l1
= u8_u16(su1
, MAXSWL
, s1
);
1436 l2
= u8_u16(su2
, MAXSWL
, s2
);
1437 if (!l2
|| (l1
==-1) || (l2
==-1)) return 0;
1438 // decapitalize dictionary word
1439 if (complexprefixes
) {
1440 mkallsmall_utf(su2
+l2
-1, 1, pAMgr
->get_langnum());
1442 mkallsmall_utf(su2
, 1, pAMgr
->get_langnum());
1444 for (int j
= 1; j
<= n
; j
++) {
1446 for (int i
= 0; i
<= (l1
-j
); i
++) {
1447 for (int l
= 0; l
<= (l2
-j
); l
++) {
1449 for (k
= 0; (k
< j
); k
++) {
1450 w_char
* c1
= su1
+ i
+ k
;
1451 w_char
* c2
= su2
+ l
+ k
;
1452 if ((c1
->l
!= c2
->l
) || (c1
->h
!= c2
->h
)) break;
1460 nscore
= nscore
+ ns
;
1469 if (complexprefixes
) {
1470 *(t
+l2
-1) = csconv
[((unsigned char)*(t
+l2
-1))].clower
;
1472 mkallsmall(t
, csconv
);
1474 for (int j
= 1; j
<= n
; j
++) {
1476 for (int i
= 0; i
<= (l1
-j
); i
++) {
1477 char c
= *(s1
+ i
+ j
);
1478 *(s1
+ i
+ j
) = '\0';
1479 if (strstr(t
,(s1
+i
))) ns
++;
1482 nscore
= nscore
+ ns
;
1488 if (uselen
== NGRAM_LONGER_WORSE
) ns
= (l2
-l1
)-2;
1489 if (uselen
== NGRAM_ANY_MISMATCH
) ns
= abs(l2
-l1
)-2;
1490 ns
= (nscore
- ((ns
> 0) ? ns
: 0));
1494 int SuggestMgr::equalfirstletter(char * s1
, const char * s2
) {
1498 // decapitalize dictionary word
1499 if (complexprefixes
) {
1500 int l1
= u8_u16(su1
, MAXSWL
, s1
);
1501 int l2
= u8_u16(su2
, MAXSWL
, s2
);
1502 if (*((short *)su1
+l1
-1) == *((short *)su2
+l2
-1)) return 1;
1506 if (*((short *)su1
) == *((short *)su2
)) return 1;
1509 if (complexprefixes
) {
1510 int l1
= strlen(s1
);
1511 int l2
= strlen(s2
);
1512 if (*(s2
+l1
-1) == *(s2
+l2
-1)) return 1;
1514 if (*s1
== *s2
) return 1;
1520 int SuggestMgr::commoncharacterpositions(char * s1
, const char * s2
, int * is_swap
) {
1528 int l1
= u8_u16(su1
, MAXSWL
, s1
);
1529 int l2
= u8_u16(su2
, MAXSWL
, s2
);
1530 if (!l2
|| (l1
==-1) || (l2
==-1)) return 0;
1531 // decapitalize dictionary word
1532 if (complexprefixes
) {
1533 mkallsmall_utf(su2
+l2
-1, 1, pAMgr
->get_langnum());
1535 mkallsmall_utf(su2
, 1, pAMgr
->get_langnum());
1537 for (int i
= 0; (i
< l1
) && (i
< l2
); i
++) {
1538 if (((short *) su1
)[i
] == ((short *) su2
)[i
]) {
1541 if (diff
< 2) diffpos
[diff
] = i
;
1545 if ((diff
== 2) && (l1
== l2
) &&
1546 (((short *) su1
)[diffpos
[0]] == ((short *) su2
)[diffpos
[1]]) &&
1547 (((short *) su1
)[diffpos
[1]] == ((short *) su2
)[diffpos
[0]])) *is_swap
= 1;
1552 // decapitalize dictionary word
1553 if (complexprefixes
) {
1555 *(t
+l2
-1) = csconv
[((unsigned char)*(t
+l2
-1))].clower
;
1557 mkallsmall(t
, csconv
);
1559 for (i
= 0; (*(s1
+i
) != 0) && (*(t
+i
) != 0); i
++) {
1560 if (*(s1
+i
) == *(t
+i
)) {
1563 if (diff
< 2) diffpos
[diff
] = i
;
1567 if ((diff
== 2) && (*(s1
+i
) == 0) && (*(t
+i
) == 0) &&
1568 (*(s1
+diffpos
[0]) == *(t
+diffpos
[1])) &&
1569 (*(s1
+diffpos
[1]) == *(t
+diffpos
[0]))) *is_swap
= 1;
1574 int SuggestMgr::mystrlen(const char * word
) {
1577 return u8_u16(w
, MAXSWL
, word
);
1578 } else return strlen(word
);
1581 // sort in decreasing order of score
1582 void SuggestMgr::bubblesort(char** rword
, int* rsc
, int n
)
1588 if (rsc
[j
-1] < rsc
[j
]) {
1589 int sctmp
= rsc
[j
-1];
1590 char * wdtmp
= rword
[j
-1];
1592 rword
[j
-1] = rword
[j
];
1603 // longest common subsequence
1604 void SuggestMgr::lcs(const char * s
, const char * s2
, int * l1
, int * l2
, char ** result
) {
1613 m
= u8_u16(su
, MAXSWL
, s
);
1614 n
= u8_u16(su2
, MAXSWL
, s2
);
1619 c
= (char *) malloc((m
+ 1) * (n
+ 1));
1620 b
= (char *) malloc((m
+ 1) * (n
+ 1));
1621 for (i
= 1; i
<= m
; i
++) c
[i
*(n
+1)] = 0;
1622 for (j
= 0; j
<= n
; j
++) c
[j
] = 0;
1623 for (i
= 1; i
<= m
; i
++) {
1624 for (j
= 1; j
<= n
; j
++) {
1625 if ((utf8
) && (*((short *) su
+i
-1) == *((short *)su2
+j
-1))
1626 || (!utf8
) && ((*(s
+i
-1)) == (*(s2
+j
-1)))) {
1627 c
[i
*(n
+1) + j
] = c
[(i
-1)*(n
+1) + j
-1]+1;
1628 b
[i
*(n
+1) + j
] = LCS_UPLEFT
;
1629 } else if (c
[(i
-1)*(n
+1) + j
] >= c
[i
*(n
+1) + j
-1]) {
1630 c
[i
*(n
+1) + j
] = c
[(i
-1)*(n
+1) + j
];
1631 b
[i
*(n
+1) + j
] = LCS_UP
;
1633 c
[i
*(n
+1) + j
] = c
[i
*(n
+1) + j
-1];
1634 b
[i
*(n
+1) + j
] = LCS_LEFT
;
1644 int SuggestMgr::lcslen(const char * s
, const char* s2
) {
1651 lcs(s
, s2
, &m
, &n
, &result
);
1654 while ((i
!= 0) && (j
!= 0)) {
1655 if (result
[i
*(n
+1) + j
] == LCS_UPLEFT
) {
1659 } else if (result
[i
*(n
+1) + j
] == LCS_UP
) {
1663 if (result
) free(result
);