src/spell.c

   1 /* vi:set ts=8 sts=4 sw=4:
   2  *
   3  * VIM - Vi IMproved    by Bram Moolenaar
   4  *
   5  * Do ":help uganda"  in Vim to read copying and usage conditions.
   6  * Do ":help credits" in Vim to see a list of people who contributed.
   7  * See README.txt for an overview of the Vim source code.
   8  */
   9
  10 /*
  11  * spell.c: code for spell checking
  12  *
  13  * The spell checking mechanism uses a tree (aka trie).  Each node in the tree
  14  * has a list of bytes that can appear (siblings).  For each byte there is a
  15  * pointer to the node with the byte that follows in the word (child).
  16  *
  17  * A NUL byte is used where the word may end.  The bytes are sorted, so that
  18  * binary searching can be used and the NUL bytes are at the start.  The
  19  * number of possible bytes is stored before the list of bytes.
  20  *
  21  * The tree uses two arrays: "byts" stores the characters, "idxs" stores
  22  * either the next index or flags.  The tree starts at index 0.  For example,
  23  * to lookup "vi" this sequence is followed:
  24  *      i = 0
  25  *      len = byts[i]
  26  *      n = where "v" appears in byts[i + 1] to byts[i + len]
  27  *      i = idxs[n]
  28  *      len = byts[i]
  29  *      n = where "i" appears in byts[i + 1] to byts[i + len]
  30  *      i = idxs[n]
  31  *      len = byts[i]
  32  *      find that byts[i + 1] is 0, idxs[i + 1] has flags for "vi".
  33  *
  34  * There are two word trees: one with case-folded words and one with words in
  35  * original case.  The second one is only used for keep-case words and is
  36  * usually small.
  37  *
  38  * There is one additional tree for when not all prefixes are applied when
  39  * generating the .spl file.  This tree stores all the possible prefixes, as
  40  * if they were words.  At each word (prefix) end the prefix nr is stored, the
  41  * following word must support this prefix nr.  And the condition nr is
  42  * stored, used to lookup the condition that the word must match with.
  43  *
  44  * Thanks to Olaf Seibert for providing an example implementation of this tree
  45  * and the compression mechanism.
  46  * LZ trie ideas:
  47  *      http://www.irb.hr/hr/home/ristov/papers/RistovLZtrieRevision1.pdf
  48  * More papers: http://www-igm.univ-mlv.fr/~laporte/publi_en.html
  49  *
  50  * Matching involves checking the caps type: Onecap ALLCAP KeepCap.
  51  *
  52  * Why doesn't Vim use aspell/ispell/myspell/etc.?
  53  * See ":help develop-spell".
  54  */
  55
  56 /* Use SPELL_PRINTTREE for debugging: dump the word tree after adding a word.
  57  * Only use it for small word lists! */
  58 #if 0
  59 # define SPELL_PRINTTREE
  60 #endif
  61
  62 /* Use DEBUG_TRIEWALK to print the changes made in suggest_trie_walk() for a
  63  * specific word. */
  64 #if 0
  65 # define DEBUG_TRIEWALK
  66 #endif
  67
  68 /*
  69  * Use this to adjust the score after finding suggestions, based on the
  70  * suggested word sounding like the bad word.  This is much faster than doing
  71  * it for every possible suggestion.
  72  * Disadvantage: When "the" is typed as "hte" it sounds quite different ("@"
  73  * vs "ht") and goes down in the list.
  74  * Used when 'spellsuggest' is set to "best".
  75  */
  76 #define RESCORE(word_score, sound_score) ((3 * word_score + sound_score) / 4)
  77
  78 /*
  79  * Do the opposite: based on a maximum end score and a known sound score,
  80  * compute the maximum word score that can be used.
  81  */
  82 #define MAXSCORE(word_score, sound_score) ((4 * word_score - sound_score) / 3)
  83
  84 /*
  85  * Vim spell file format: <HEADER>
  86  *                        <SECTIONS>
  87  *                        <LWORDTREE>
  88  *                        <KWORDTREE>
  89  *                        <PREFIXTREE>
  90  *
  91  * <HEADER>: <fileID> <versionnr>
  92  *
  93  * <fileID>     8 bytes    "VIMspell"
  94  * <versionnr>  1 byte      VIMSPELLVERSION
  95  *
  96  *
  97  * Sections make it possible to add information to the .spl file without
  98  * making it incompatible with previous versions.  There are two kinds of
  99  * sections:
 100  * 1. Not essential for correct spell checking.  E.g. for making suggestions.
 101  *    These are skipped when not supported.
 102  * 2. Optional information, but essential for spell checking when present.
 103  *    E.g. conditions for affixes.  When this section is present but not
 104  *    supported an error message is given.
 105  *
 106  * <SECTIONS>: <section> ... <sectionend>
 107  *
 108  * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
 109  *
 110  * <sectionID>    1 byte    number from 0 to 254 identifying the section
 111  *
 112  * <sectionflags> 1 byte    SNF_REQUIRED: this section is required for correct
 113  *                                          spell checking
 114  *
 115  * <sectionlen>   4 bytes   length of section contents, MSB first
 116  *
 117  * <sectionend>   1 byte    SN_END
 118  *
 119  *
 120  * sectionID == SN_INFO: <infotext>
 121  * <infotext>    N bytes    free format text with spell file info (version,
 122  *                          website, etc)
 123  *
 124  * sectionID == SN_REGION: <regionname> ...
 125  * <regionname>  2 bytes    Up to 8 region names: ca, au, etc.  Lower case.
 126  *                          First <regionname> is region 1.
 127  *
 128  * sectionID == SN_CHARFLAGS: <charflagslen> <charflags>
 129  *                              <folcharslen> <folchars>
 130  * <charflagslen> 1 byte    Number of bytes in <charflags> (should be 128).
 131  * <charflags>  N bytes     List of flags (first one is for character 128):
 132  *                          0x01  word character        CF_WORD
 133  *                          0x02  upper-case character  CF_UPPER
 134  * <folcharslen>  2 bytes   Number of bytes in <folchars>.
 135  * <folchars>     N bytes   Folded characters, first one is for character 128.
 136  *
 137  * sectionID == SN_MIDWORD: <midword>
 138  * <midword>     N bytes    Characters that are word characters only when used
 139  *                          in the middle of a word.
 140  *
 141  * sectionID == SN_PREFCOND: <prefcondcnt> <prefcond> ...
 142  * <prefcondcnt> 2 bytes    Number of <prefcond> items following.
 143  * <prefcond> : <condlen> <condstr>
 144  * <condlen>    1 byte      Length of <condstr>.
 145  * <condstr>    N bytes     Condition for the prefix.
 146  *
 147  * sectionID == SN_REP: <repcount> <rep> ...
 148  * <repcount>    2 bytes    number of <rep> items, MSB first.
 149  * <rep> : <repfromlen> <repfrom> <reptolen> <repto>
 150  * <repfromlen>  1 byte     length of <repfrom>
 151  * <repfrom>     N bytes    "from" part of replacement
 152  * <reptolen>    1 byte     length of <repto>
 153  * <repto>       N bytes    "to" part of replacement
 154  *
 155  * sectionID == SN_REPSAL: <repcount> <rep> ...
 156  *   just like SN_REP but for soundfolded words
 157  *
 158  * sectionID == SN_SAL: <salflags> <salcount> <sal> ...
 159  * <salflags>    1 byte     flags for soundsalike conversion:
 160  *                          SAL_F0LLOWUP
 161  *                          SAL_COLLAPSE
 162  *                          SAL_REM_ACCENTS
 163  * <salcount>    2 bytes    number of <sal> items following
 164  * <sal> : <salfromlen> <salfrom> <saltolen> <salto>
 165  * <salfromlen>  1 byte     length of <salfrom>
 166  * <salfrom>     N bytes    "from" part of soundsalike
 167  * <saltolen>    1 byte     length of <salto>
 168  * <salto>       N bytes    "to" part of soundsalike
 169  *
 170  * sectionID == SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
 171  * <sofofromlen> 2 bytes    length of <sofofrom>
 172  * <sofofrom>    N bytes    "from" part of soundfold
 173  * <sofotolen>   2 bytes    length of <sofoto>
 174  * <sofoto>      N bytes    "to" part of soundfold
 175  *
 176  * sectionID == SN_SUGFILE: <timestamp>
 177  * <timestamp>   8 bytes    time in seconds that must match with .sug file
 178  *
 179  * sectionID == SN_NOSPLITSUGS: nothing
 180  *
 181  * sectionID == SN_WORDS: <word> ...
 182  * <word>        N bytes    NUL terminated common word
 183  *
 184  * sectionID == SN_MAP: <mapstr>
 185  * <mapstr>      N bytes    String with sequences of similar characters,
 186  *                          separated by slashes.
 187  *
 188  * sectionID == SN_COMPOUND: <compmax> <compminlen> <compsylmax> <compoptions>
 189  *                              <comppatcount> <comppattern> ... <compflags>
 190  * <compmax>     1 byte     Maximum nr of words in compound word.
 191  * <compminlen>  1 byte     Minimal word length for compounding.
 192  * <compsylmax>  1 byte     Maximum nr of syllables in compound word.
 193  * <compoptions> 2 bytes    COMP_ flags.
 194  * <comppatcount> 2 bytes   number of <comppattern> following
 195  * <compflags>   N bytes    Flags from COMPOUNDRULE items, separated by
 196  *                          slashes.
 197  *
 198  * <comppattern>: <comppatlen> <comppattext>
 199  * <comppatlen>  1 byte     length of <comppattext>
 200  * <comppattext> N bytes    end or begin chars from CHECKCOMPOUNDPATTERN
 201  *
 202  * sectionID == SN_NOBREAK: (empty, its presence is what matters)
 203  *
 204  * sectionID == SN_SYLLABLE: <syllable>
 205  * <syllable>    N bytes    String from SYLLABLE item.
 206  *
 207  * <LWORDTREE>: <wordtree>
 208  *
 209  * <KWORDTREE>: <wordtree>
 210  *
 211  * <PREFIXTREE>: <wordtree>
 212  *
 213  *
 214  * <wordtree>: <nodecount> <nodedata> ...
 215  *
 216  * <nodecount>  4 bytes     Number of nodes following.  MSB first.
 217  *
 218  * <nodedata>: <siblingcount> <sibling> ...
 219  *
 220  * <siblingcount> 1 byte    Number of siblings in this node.  The siblings
 221  *                          follow in sorted order.
 222  *
 223  * <sibling>: <byte> [ <nodeidx> <xbyte>
 224  *                    | <flags> [<flags2>] [<region>] [<affixID>]
 225  *                    | [<pflags>] <affixID> <prefcondnr> ]
 226  *
 227  * <byte>       1 byte      Byte value of the sibling.  Special cases:
 228  *                          BY_NOFLAGS: End of word without flags and for all
 229  *                                      regions.
 230  *                                      For PREFIXTREE <affixID> and
 231  *                                      <prefcondnr> follow.
 232  *                          BY_FLAGS:   End of word, <flags> follow.
 233  *                                      For PREFIXTREE <pflags>, <affixID>
 234  *                                      and <prefcondnr> follow.
 235  *                          BY_FLAGS2:  End of word, <flags> and <flags2>
 236  *                                      follow.  Not used in PREFIXTREE.
 237  *                          BY_INDEX:   Child of sibling is shared, <nodeidx>
 238  *                                      and <xbyte> follow.
 239  *
 240  * <nodeidx>    3 bytes     Index of child for this sibling, MSB first.
 241  *
 242  * <xbyte>      1 byte      byte value of the sibling.
 243  *
 244  * <flags>      1 byte      bitmask of:
 245  *                          WF_ALLCAP   word must have only capitals
 246  *                          WF_ONECAP   first char of word must be capital
 247  *                          WF_KEEPCAP  keep-case word
 248  *                          WF_FIXCAP   keep-case word, all caps not allowed
 249  *                          WF_RARE     rare word
 250  *                          WF_BANNED   bad word
 251  *                          WF_REGION   <region> follows
 252  *                          WF_AFX      <affixID> follows
 253  *
 254  * <flags2>     1 byte      Bitmask of:
 255  *                          WF_HAS_AFF >> 8   word includes affix
 256  *                          WF_NEEDCOMP >> 8  word only valid in compound
 257  *                          WF_NOSUGGEST >> 8  word not used for suggestions
 258  *                          WF_COMPROOT >> 8  word already a compound
 259  *                          WF_NOCOMPBEF >> 8 no compounding before this word
 260  *                          WF_NOCOMPAFT >> 8 no compounding after this word
 261  *
 262  * <pflags>     1 byte      bitmask of:
 263  *                          WFP_RARE    rare prefix
 264  *                          WFP_NC      non-combining prefix
 265  *                          WFP_UP      letter after prefix made upper case
 266  *
 267  * <region>     1 byte      Bitmask for regions in which word is valid.  When
 268  *                          omitted it's valid in all regions.
 269  *                          Lowest bit is for region 1.
 270  *
 271  * <affixID>    1 byte      ID of affix that can be used with this word.  In
 272  *                          PREFIXTREE used for the required prefix ID.
 273  *
 274  * <prefcondnr> 2 bytes     Prefix condition number, index in <prefcond> list
 275  *                          from HEADER.
 276  *
 277  * All text characters are in 'encoding', but stored as single bytes.
 278  */
 279
 280 /*
 281  * Vim .sug file format:  <SUGHEADER>
 282  *                        <SUGWORDTREE>
 283  *                        <SUGTABLE>
 284  *
 285  * <SUGHEADER>: <fileID> <versionnr> <timestamp>
 286  *
 287  * <fileID>     6 bytes     "VIMsug"
 288  * <versionnr>  1 byte      VIMSUGVERSION
 289  * <timestamp>  8 bytes     timestamp that must match with .spl file
 290  *
 291  *
 292  * <SUGWORDTREE>: <wordtree>  (see above, no flags or region used)
 293  *
 294  *
 295  * <SUGTABLE>: <sugwcount> <sugline> ...
 296  *
 297  * <sugwcount>  4 bytes     number of <sugline> following
 298  *
 299  * <sugline>: <sugnr> ... NUL
 300  *
 301  * <sugnr>:     X bytes     word number that results in this soundfolded word,
 302  *                          stored as an offset to the previous number in as
 303  *                          few bytes as possible, see offset2bytes())
 304  */
 305
 306 #if defined(MSDOS) || defined(WIN16) || defined(WIN32) || defined(_WIN64)
 307 # include "vimio.h"     /* for lseek(), must be before vim.h */
 308 #endif
 309
 310 #include "vim.h"
 311
 312 #if defined(FEAT_SPELL) || defined(PROTO)
 313
 314 #ifndef UNIX            /* it's in os_unix.h for Unix */
 315 # include <time.h>      /* for time_t */
 316 #endif
 317
 318 #define MAXWLEN 250             /* Assume max. word len is this many bytes.
 319                                    Some places assume a word length fits in a
 320                                    byte, thus it can't be above 255. */
 321
 322 /* Type used for indexes in the word tree need to be at least 4 bytes.  If int
 323  * is 8 bytes we could use something smaller, but what? */
 324 #if SIZEOF_INT > 3
 325 typedef int idx_T;
 326 #else
 327 typedef long idx_T;
 328 #endif
 329
 330 /* Flags used for a word.  Only the lowest byte can be used, the region byte
 331  * comes above it. */
 332 #define WF_REGION   0x01        /* region byte follows */
 333 #define WF_ONECAP   0x02        /* word with one capital (or all capitals) */
 334 #define WF_ALLCAP   0x04        /* word must be all capitals */
 335 #define WF_RARE     0x08        /* rare word */
 336 #define WF_BANNED   0x10        /* bad word */
 337 #define WF_AFX      0x20        /* affix ID follows */
 338 #define WF_FIXCAP   0x40        /* keep-case word, allcap not allowed */
 339 #define WF_KEEPCAP  0x80        /* keep-case word */
 340
 341 /* for <flags2>, shifted up one byte to be used in wn_flags */
 342 #define WF_HAS_AFF  0x0100      /* word includes affix */
 343 #define WF_NEEDCOMP 0x0200      /* word only valid in compound */
 344 #define WF_NOSUGGEST 0x0400     /* word not to be suggested */
 345 #define WF_COMPROOT 0x0800      /* already compounded word, COMPOUNDROOT */
 346 #define WF_NOCOMPBEF 0x1000     /* no compounding before this word */
 347 #define WF_NOCOMPAFT 0x2000     /* no compounding after this word */
 348
 349 /* only used for su_badflags */
 350 #define WF_MIXCAP   0x20        /* mix of upper and lower case: macaRONI */
 351
 352 #define WF_CAPMASK (WF_ONECAP | WF_ALLCAP | WF_KEEPCAP | WF_FIXCAP)
 353
 354 /* flags for <pflags> */
 355 #define WFP_RARE            0x01        /* rare prefix */
 356 #define WFP_NC              0x02        /* prefix is not combining */
 357 #define WFP_UP              0x04        /* to-upper prefix */
 358 #define WFP_COMPPERMIT      0x08        /* prefix with COMPOUNDPERMITFLAG */
 359 #define WFP_COMPFORBID      0x10        /* prefix with COMPOUNDFORBIDFLAG */
 360
 361 /* Flags for postponed prefixes in "sl_pidxs".  Must be above affixID (one
 362  * byte) and prefcondnr (two bytes). */
 363 #define WF_RAREPFX  (WFP_RARE << 24)    /* rare postponed prefix */
 364 #define WF_PFX_NC   (WFP_NC << 24)      /* non-combining postponed prefix */
 365 #define WF_PFX_UP   (WFP_UP << 24)      /* to-upper postponed prefix */
 366 #define WF_PFX_COMPPERMIT (WFP_COMPPERMIT << 24) /* postponed prefix with
 367                                                   * COMPOUNDPERMITFLAG */
 368 #define WF_PFX_COMPFORBID (WFP_COMPFORBID << 24) /* postponed prefix with
 369                                                   * COMPOUNDFORBIDFLAG */
 370
 371
 372 /* flags for <compoptions> */
 373 #define COMP_CHECKDUP           1       /* CHECKCOMPOUNDDUP */
 374 #define COMP_CHECKREP           2       /* CHECKCOMPOUNDREP */
 375 #define COMP_CHECKCASE          4       /* CHECKCOMPOUNDCASE */
 376 #define COMP_CHECKTRIPLE        8       /* CHECKCOMPOUNDTRIPLE */
 377
 378 /* Special byte values for <byte>.  Some are only used in the tree for
 379  * postponed prefixes, some only in the other trees.  This is a bit messy... */
 380 #define BY_NOFLAGS      0       /* end of word without flags or region; for
 381                                  * postponed prefix: no <pflags> */
 382 #define BY_INDEX        1       /* child is shared, index follows */
 383 #define BY_FLAGS        2       /* end of word, <flags> byte follows; for
 384                                  * postponed prefix: <pflags> follows */
 385 #define BY_FLAGS2       3       /* end of word, <flags> and <flags2> bytes
 386                                  * follow; never used in prefix tree */
 387 #define BY_SPECIAL  BY_FLAGS2   /* highest special byte value */
 388
 389 /* Info from "REP", "REPSAL" and "SAL" entries in ".aff" file used in si_rep,
 390  * si_repsal, sl_rep, and si_sal.  Not for sl_sal!
 391  * One replacement: from "ft_from" to "ft_to". */
 392 typedef struct fromto_S
 393 {
 394     char_u      *ft_from;
 395     char_u      *ft_to;
 396 } fromto_T;
 397
 398 /* Info from "SAL" entries in ".aff" file used in sl_sal.
 399  * The info is split for quick processing by spell_soundfold().
 400  * Note that "sm_oneof" and "sm_rules" point into sm_lead. */
 401 typedef struct salitem_S
 402 {
 403     char_u      *sm_lead;       /* leading letters */
 404     int         sm_leadlen;     /* length of "sm_lead" */
 405     char_u      *sm_oneof;      /* letters from () or NULL */
 406     char_u      *sm_rules;      /* rules like ^, $, priority */
 407     char_u      *sm_to;         /* replacement. */
 408 #ifdef FEAT_MBYTE
 409     int         *sm_lead_w;     /* wide character copy of "sm_lead" */
 410     int         *sm_oneof_w;    /* wide character copy of "sm_oneof" */
 411     int         *sm_to_w;       /* wide character copy of "sm_to" */
 412 #endif
 413 } salitem_T;
 414
 415 #ifdef FEAT_MBYTE
 416 typedef int salfirst_T;
 417 #else
 418 typedef short salfirst_T;
 419 #endif
 420
 421 /* Values for SP_*ERROR are negative, positive values are used by
 422  * read_cnt_string(). */
 423 #define SP_TRUNCERROR   -1      /* spell file truncated error */
 424 #define SP_FORMERROR    -2      /* format error in spell file */
 425 #define SP_OTHERERROR   -3      /* other error while reading spell file */
 426
 427 /*
 428  * Structure used to store words and other info for one language, loaded from
 429  * a .spl file.
 430  * The main access is through the tree in "sl_fbyts/sl_fidxs", storing the
 431  * case-folded words.  "sl_kbyts/sl_kidxs" is for keep-case words.
 432  *
 433  * The "byts" array stores the possible bytes in each tree node, preceded by
 434  * the number of possible bytes, sorted on byte value:
 435  *      <len> <byte1> <byte2> ...
 436  * The "idxs" array stores the index of the child node corresponding to the
 437  * byte in "byts".
 438  * Exception: when the byte is zero, the word may end here and "idxs" holds
 439  * the flags, region mask and affixID for the word.  There may be several
 440  * zeros in sequence for alternative flag/region/affixID combinations.
 441  */
 442 typedef struct slang_S slang_T;
 443 struct slang_S
 444 {
 445     slang_T     *sl_next;       /* next language */
 446     char_u      *sl_name;       /* language name "en", "en.rare", "nl", etc. */
 447     char_u      *sl_fname;      /* name of .spl file */
 448     int         sl_add;         /* TRUE if it's a .add file. */
 449
 450     char_u      *sl_fbyts;      /* case-folded word bytes */
 451     idx_T       *sl_fidxs;      /* case-folded word indexes */
 452     char_u      *sl_kbyts;      /* keep-case word bytes */
 453     idx_T       *sl_kidxs;      /* keep-case word indexes */
 454     char_u      *sl_pbyts;      /* prefix tree word bytes */
 455     idx_T       *sl_pidxs;      /* prefix tree word indexes */
 456
 457     char_u      *sl_info;       /* infotext string or NULL */
 458
 459     char_u      sl_regions[17]; /* table with up to 8 region names plus NUL */
 460
 461     char_u      *sl_midword;    /* MIDWORD string or NULL */
 462
 463     hashtab_T   sl_wordcount;   /* hashtable with word count, wordcount_T */
 464
 465     int         sl_compmax;     /* COMPOUNDWORDMAX (default: MAXWLEN) */
 466     int         sl_compminlen;  /* COMPOUNDMIN (default: 0) */
 467     int         sl_compsylmax;  /* COMPOUNDSYLMAX (default: MAXWLEN) */
 468     int         sl_compoptions; /* COMP_* flags */
 469     garray_T    sl_comppat;     /* CHECKCOMPOUNDPATTERN items */
 470     regprog_T   *sl_compprog;   /* COMPOUNDRULE turned into a regexp progrm
 471                                  * (NULL when no compounding) */
 472     char_u      *sl_compstartflags; /* flags for first compound word */
 473     char_u      *sl_compallflags; /* all flags for compound words */
 474     char_u      sl_nobreak;     /* When TRUE: no spaces between words */
 475     char_u      *sl_syllable;   /* SYLLABLE repeatable chars or NULL */
 476     garray_T    sl_syl_items;   /* syllable items */
 477
 478     int         sl_prefixcnt;   /* number of items in "sl_prefprog" */
 479     regprog_T   **sl_prefprog;  /* table with regprogs for prefixes */
 480
 481     garray_T    sl_rep;         /* list of fromto_T entries from REP lines */
 482     short       sl_rep_first[256];  /* indexes where byte first appears, -1 if
 483                                        there is none */
 484     garray_T    sl_sal;         /* list of salitem_T entries from SAL lines */
 485     salfirst_T  sl_sal_first[256];  /* indexes where byte first appears, -1 if
 486                                        there is none */
 487     int         sl_followup;    /* SAL followup */
 488     int         sl_collapse;    /* SAL collapse_result */
 489     int         sl_rem_accents; /* SAL remove_accents */
 490     int         sl_sofo;        /* SOFOFROM and SOFOTO instead of SAL items:
 491                                  * "sl_sal_first" maps chars, when has_mbyte
 492                                  * "sl_sal" is a list of wide char lists. */
 493     garray_T    sl_repsal;      /* list of fromto_T entries from REPSAL lines */
 494     short       sl_repsal_first[256];  /* sl_rep_first for REPSAL lines */
 495     int         sl_nosplitsugs; /* don't suggest splitting a word */
 496
 497     /* Info from the .sug file.  Loaded on demand. */
 498     time_t      sl_sugtime;     /* timestamp for .sug file */
 499     char_u      *sl_sbyts;      /* soundfolded word bytes */
 500     idx_T       *sl_sidxs;      /* soundfolded word indexes */
 501     buf_T       *sl_sugbuf;     /* buffer with word number table */
 502     int         sl_sugloaded;   /* TRUE when .sug file was loaded or failed to
 503                                    load */
 504
 505     int         sl_has_map;     /* TRUE if there is a MAP line */
 506 #ifdef FEAT_MBYTE
 507     hashtab_T   sl_map_hash;    /* MAP for multi-byte chars */
 508     int         sl_map_array[256]; /* MAP for first 256 chars */
 509 #else
 510     char_u      sl_map_array[256]; /* MAP for first 256 chars */
 511 #endif
 512     hashtab_T   sl_sounddone;   /* table with soundfolded words that have
 513                                    handled, see add_sound_suggest() */
 514 };
 515
 516 /* First language that is loaded, start of the linked list of loaded
 517  * languages. */
 518 static slang_T *first_lang = NULL;
 519
 520 /* Flags used in .spl file for soundsalike flags. */
 521 #define SAL_F0LLOWUP            1
 522 #define SAL_COLLAPSE            2
 523 #define SAL_REM_ACCENTS         4
 524
 525 /*
 526  * Structure used in "b_langp", filled from 'spelllang'.
 527  */
 528 typedef struct langp_S
 529 {
 530     slang_T     *lp_slang;      /* info for this language */
 531     slang_T     *lp_sallang;    /* language used for sound folding or NULL */
 532     slang_T     *lp_replang;    /* language used for REP items or NULL */
 533     int         lp_region;      /* bitmask for region or REGION_ALL */
 534 } langp_T;
 535
 536 #define LANGP_ENTRY(ga, i)      (((langp_T *)(ga).ga_data) + (i))
 537
 538 #define REGION_ALL 0xff         /* word valid in all regions */
 539
 540 #define VIMSPELLMAGIC "VIMspell"  /* string at start of Vim spell file */
 541 #define VIMSPELLMAGICL 8
 542 #define VIMSPELLVERSION 50
 543
 544 #define VIMSUGMAGIC "VIMsug"    /* string at start of Vim .sug file */
 545 #define VIMSUGMAGICL 6
 546 #define VIMSUGVERSION 1
 547
 548 /* Section IDs.  Only renumber them when VIMSPELLVERSION changes! */
 549 #define SN_REGION       0       /* <regionname> section */
 550 #define SN_CHARFLAGS    1       /* charflags section */
 551 #define SN_MIDWORD      2       /* <midword> section */
 552 #define SN_PREFCOND     3       /* <prefcond> section */
 553 #define SN_REP          4       /* REP items section */
 554 #define SN_SAL          5       /* SAL items section */
 555 #define SN_SOFO         6       /* soundfolding section */
 556 #define SN_MAP          7       /* MAP items section */
 557 #define SN_COMPOUND     8       /* compound words section */
 558 #define SN_SYLLABLE     9       /* syllable section */
 559 #define SN_NOBREAK      10      /* NOBREAK section */
 560 #define SN_SUGFILE      11      /* timestamp for .sug file */
 561 #define SN_REPSAL       12      /* REPSAL items section */
 562 #define SN_WORDS        13      /* common words */
 563 #define SN_NOSPLITSUGS  14      /* don't split word for suggestions */
 564 #define SN_INFO         15      /* info section */
 565 #define SN_END          255     /* end of sections */
 566
 567 #define SNF_REQUIRED    1       /* <sectionflags>: required section */
 568
 569 /* Result values.  Lower number is accepted over higher one. */
 570 #define SP_BANNED       -1
 571 #define SP_OK           0
 572 #define SP_RARE         1
 573 #define SP_LOCAL        2
 574 #define SP_BAD          3
 575
 576 /* file used for "zG" and "zW" */
 577 static char_u   *int_wordlist = NULL;
 578
 579 typedef struct wordcount_S
 580 {
 581     short_u     wc_count;           /* nr of times word was seen */
 582     char_u      wc_word[1];         /* word, actually longer */
 583 } wordcount_T;
 584
 585 static wordcount_T dumwc;
 586 #define WC_KEY_OFF  (unsigned)(dumwc.wc_word - (char_u *)&dumwc)
 587 #define HI2WC(hi)     ((wordcount_T *)((hi)->hi_key - WC_KEY_OFF))
 588 #define MAXWORDCOUNT 0xffff
 589
 590 /*
 591  * Information used when looking for suggestions.
 592  */
 593 typedef struct suginfo_S
 594 {
 595     garray_T    su_ga;              /* suggestions, contains "suggest_T" */
 596     int         su_maxcount;        /* max. number of suggestions displayed */
 597     int         su_maxscore;        /* maximum score for adding to su_ga */
 598     int         su_sfmaxscore;      /* idem, for when doing soundfold words */
 599     garray_T    su_sga;             /* like su_ga, sound-folded scoring */
 600     char_u      *su_badptr;         /* start of bad word in line */
 601     int         su_badlen;          /* length of detected bad word in line */
 602     int         su_badflags;        /* caps flags for bad word */
 603     char_u      su_badword[MAXWLEN]; /* bad word truncated at su_badlen */
 604     char_u      su_fbadword[MAXWLEN]; /* su_badword case-folded */
 605     char_u      su_sal_badword[MAXWLEN]; /* su_badword soundfolded */
 606     hashtab_T   su_banned;          /* table with banned words */
 607     slang_T     *su_sallang;        /* default language for sound folding */
 608 } suginfo_T;
 609
 610 /* One word suggestion.  Used in "si_ga". */
 611 typedef struct suggest_S
 612 {
 613     char_u      *st_word;       /* suggested word, allocated string */
 614     int         st_wordlen;     /* STRLEN(st_word) */
 615     int         st_orglen;      /* length of replaced text */
 616     int         st_score;       /* lower is better */
 617     int         st_altscore;    /* used when st_score compares equal */
 618     int         st_salscore;    /* st_score is for soundalike */
 619     int         st_had_bonus;   /* bonus already included in score */
 620     slang_T     *st_slang;      /* language used for sound folding */
 621 } suggest_T;
 622
 623 #define SUG(ga, i) (((suggest_T *)(ga).ga_data)[i])
 624
 625 /* TRUE if a word appears in the list of banned words.  */
 626 #define WAS_BANNED(su, word) (!HASHITEM_EMPTY(hash_find(&su->su_banned, word)))
 627
 628 /* Number of suggestions kept when cleaning up.  We need to keep more than
 629  * what is displayed, because when rescore_suggestions() is called the score
 630  * may change and wrong suggestions may be removed later. */
 631 #define SUG_CLEAN_COUNT(su)    ((su)->su_maxcount < 130 ? 150 : (su)->su_maxcount + 20)
 632
 633 /* Threshold for sorting and cleaning up suggestions.  Don't want to keep lots
 634  * of suggestions that are not going to be displayed. */
 635 #define SUG_MAX_COUNT(su)       (SUG_CLEAN_COUNT(su) + 50)
 636
 637 /* score for various changes */
 638 #define SCORE_SPLIT     149     /* split bad word */
 639 #define SCORE_SPLIT_NO  249     /* split bad word with NOSPLITSUGS */
 640 #define SCORE_ICASE     52      /* slightly different case */
 641 #define SCORE_REGION    200     /* word is for different region */
 642 #define SCORE_RARE      180     /* rare word */
 643 #define SCORE_SWAP      75      /* swap two characters */
 644 #define SCORE_SWAP3     110     /* swap two characters in three */
 645 #define SCORE_REP       65      /* REP replacement */
 646 #define SCORE_SUBST     93      /* substitute a character */
 647 #define SCORE_SIMILAR   33      /* substitute a similar character */
 648 #define SCORE_SUBCOMP   33      /* substitute a composing character */
 649 #define SCORE_DEL       94      /* delete a character */
 650 #define SCORE_DELDUP    66      /* delete a duplicated character */
 651 #define SCORE_DELCOMP   28      /* delete a composing character */
 652 #define SCORE_INS       96      /* insert a character */
 653 #define SCORE_INSDUP    67      /* insert a duplicate character */
 654 #define SCORE_INSCOMP   30      /* insert a composing character */
 655 #define SCORE_NONWORD   103     /* change non-word to word char */
 656
 657 #define SCORE_FILE      30      /* suggestion from a file */
 658 #define SCORE_MAXINIT   350     /* Initial maximum score: higher == slower.
 659                                  * 350 allows for about three changes. */
 660
 661 #define SCORE_COMMON1   30      /* subtracted for words seen before */
 662 #define SCORE_COMMON2   40      /* subtracted for words often seen */
 663 #define SCORE_COMMON3   50      /* subtracted for words very often seen */
 664 #define SCORE_THRES2    10      /* word count threshold for COMMON2 */
 665 #define SCORE_THRES3    100     /* word count threshold for COMMON3 */
 666
 667 /* When trying changed soundfold words it becomes slow when trying more than
 668  * two changes.  With less then two changes it's slightly faster but we miss a
 669  * few good suggestions.  In rare cases we need to try three of four changes.
 670  */
 671 #define SCORE_SFMAX1    200     /* maximum score for first try */
 672 #define SCORE_SFMAX2    300     /* maximum score for second try */
 673 #define SCORE_SFMAX3    400     /* maximum score for third try */
 674
 675 #define SCORE_BIG       SCORE_INS * 3   /* big difference */
 676 #define SCORE_MAXMAX    999999          /* accept any score */
 677 #define SCORE_LIMITMAX  350             /* for spell_edit_score_limit() */
 678
 679 /* for spell_edit_score_limit() we need to know the minimum value of
 680  * SCORE_ICASE, SCORE_SWAP, SCORE_DEL, SCORE_SIMILAR and SCORE_INS */
 681 #define SCORE_EDIT_MIN  SCORE_SIMILAR
 682
 683 /*
 684  * Structure to store info for word matching.
 685  */
 686 typedef struct matchinf_S
 687 {
 688     langp_T     *mi_lp;                 /* info for language and region */
 689
 690     /* pointers to original text to be checked */
 691     char_u      *mi_word;               /* start of word being checked */
 692     char_u      *mi_end;                /* end of matching word so far */
 693     char_u      *mi_fend;               /* next char to be added to mi_fword */
 694     char_u      *mi_cend;               /* char after what was used for
 695                                            mi_capflags */
 696
 697     /* case-folded text */
 698     char_u      mi_fword[MAXWLEN + 1];  /* mi_word case-folded */
 699     int         mi_fwordlen;            /* nr of valid bytes in mi_fword */
 700
 701     /* for when checking word after a prefix */
 702     int         mi_prefarridx;          /* index in sl_pidxs with list of
 703                                            affixID/condition */
 704     int         mi_prefcnt;             /* number of entries at mi_prefarridx */
 705     int         mi_prefixlen;           /* byte length of prefix */
 706 #ifdef FEAT_MBYTE
 707     int         mi_cprefixlen;          /* byte length of prefix in original
 708                                            case */
 709 #else
 710 # define mi_cprefixlen mi_prefixlen     /* it's the same value */
 711 #endif
 712
 713     /* for when checking a compound word */
 714     int         mi_compoff;             /* start of following word offset */
 715     char_u      mi_compflags[MAXWLEN];  /* flags for compound words used */
 716     int         mi_complen;             /* nr of compound words used */
 717     int         mi_compextra;           /* nr of COMPOUNDROOT words */
 718
 719     /* others */
 720     int         mi_result;              /* result so far: SP_BAD, SP_OK, etc. */
 721     int         mi_capflags;            /* WF_ONECAP WF_ALLCAP WF_KEEPCAP */
 722     buf_T       *mi_buf;                /* buffer being checked */
 723
 724     /* for NOBREAK */
 725     int         mi_result2;             /* "mi_resul" without following word */
 726     char_u      *mi_end2;               /* "mi_end" without following word */
 727 } matchinf_T;
 728
 729 /*
 730  * The tables used for recognizing word characters according to spelling.
 731  * These are only used for the first 256 characters of 'encoding'.
 732  */
 733 typedef struct spelltab_S
 734 {
 735     char_u  st_isw[256];        /* flags: is word char */
 736     char_u  st_isu[256];        /* flags: is uppercase char */
 737     char_u  st_fold[256];       /* chars: folded case */
 738     char_u  st_upper[256];      /* chars: upper case */
 739 } spelltab_T;
 740
 741 static spelltab_T   spelltab;
 742 static int          did_set_spelltab;
 743
 744 #define CF_WORD         0x01
 745 #define CF_UPPER        0x02
 746
 747 static void clear_spell_chartab __ARGS((spelltab_T *sp));
 748 static int set_spell_finish __ARGS((spelltab_T  *new_st));
 749 static int spell_iswordp __ARGS((char_u *p, buf_T *buf));
 750 static int spell_iswordp_nmw __ARGS((char_u *p));
 751 #ifdef FEAT_MBYTE
 752 static int spell_mb_isword_class __ARGS((int cl));
 753 static int spell_iswordp_w __ARGS((int *p, buf_T *buf));
 754 #endif
 755 static int write_spell_prefcond __ARGS((FILE *fd, garray_T *gap));
 756
 757 /*
 758  * For finding suggestions: At each node in the tree these states are tried:
 759  */
 760 typedef enum
 761 {
 762     STATE_START = 0,    /* At start of node check for NUL bytes (goodword
 763                          * ends); if badword ends there is a match, otherwise
 764                          * try splitting word. */
 765     STATE_NOPREFIX,     /* try without prefix */
 766     STATE_SPLITUNDO,    /* Undo splitting. */
 767     STATE_ENDNUL,       /* Past NUL bytes at start of the node. */
 768     STATE_PLAIN,        /* Use each byte of the node. */
 769     STATE_DEL,          /* Delete a byte from the bad word. */
 770     STATE_INS_PREP,     /* Prepare for inserting bytes. */
 771     STATE_INS,          /* Insert a byte in the bad word. */
 772     STATE_SWAP,         /* Swap two bytes. */
 773     STATE_UNSWAP,       /* Undo swap two characters. */
 774     STATE_SWAP3,        /* Swap two characters over three. */
 775     STATE_UNSWAP3,      /* Undo Swap two characters over three. */
 776     STATE_UNROT3L,      /* Undo rotate three characters left */
 777     STATE_UNROT3R,      /* Undo rotate three characters right */
 778     STATE_REP_INI,      /* Prepare for using REP items. */
 779     STATE_REP,          /* Use matching REP items from the .aff file. */
 780     STATE_REP_UNDO,     /* Undo a REP item replacement. */
 781     STATE_FINAL         /* End of this node. */
 782 } state_T;
 783
 784 /*
 785  * Struct to keep the state at each level in suggest_try_change().
 786  */
 787 typedef struct trystate_S
 788 {
 789     state_T     ts_state;       /* state at this level, STATE_ */
 790     int         ts_score;       /* score */
 791     idx_T       ts_arridx;      /* index in tree array, start of node */
 792     short       ts_curi;        /* index in list of child nodes */
 793     char_u      ts_fidx;        /* index in fword[], case-folded bad word */
 794     char_u      ts_fidxtry;     /* ts_fidx at which bytes may be changed */
 795     char_u      ts_twordlen;    /* valid length of tword[] */
 796     char_u      ts_prefixdepth; /* stack depth for end of prefix or
 797                                  * PFD_PREFIXTREE or PFD_NOPREFIX */
 798     char_u      ts_flags;       /* TSF_ flags */
 799 #ifdef FEAT_MBYTE
 800     char_u      ts_tcharlen;    /* number of bytes in tword character */
 801     char_u      ts_tcharidx;    /* current byte index in tword character */
 802     char_u      ts_isdiff;      /* DIFF_ values */
 803     char_u      ts_fcharstart;  /* index in fword where badword char started */
 804 #endif
 805     char_u      ts_prewordlen;  /* length of word in "preword[]" */
 806     char_u      ts_splitoff;    /* index in "tword" after last split */
 807     char_u      ts_splitfidx;   /* "ts_fidx" at word split */
 808     char_u      ts_complen;     /* nr of compound words used */
 809     char_u      ts_compsplit;   /* index for "compflags" where word was spit */
 810     char_u      ts_save_badflags;   /* su_badflags saved here */
 811     char_u      ts_delidx;      /* index in fword for char that was deleted,
 812                                    valid when "ts_flags" has TSF_DIDDEL */
 813 } trystate_T;
 814
 815 /* values for ts_isdiff */
 816 #define DIFF_NONE       0       /* no different byte (yet) */
 817 #define DIFF_YES        1       /* different byte found */
 818 #define DIFF_INSERT     2       /* inserting character */
 819
 820 /* values for ts_flags */
 821 #define TSF_PREFIXOK    1       /* already checked that prefix is OK */
 822 #define TSF_DIDSPLIT    2       /* tried split at this point */
 823 #define TSF_DIDDEL      4       /* did a delete, "ts_delidx" has index */
 824
 825 /* special values ts_prefixdepth */
 826 #define PFD_NOPREFIX    0xff    /* not using prefixes */
 827 #define PFD_PREFIXTREE  0xfe    /* walking through the prefix tree */
 828 #define PFD_NOTSPECIAL  0xfd    /* highest value that's not special */
 829
 830 /* mode values for find_word */
 831 #define FIND_FOLDWORD       0   /* find word case-folded */
 832 #define FIND_KEEPWORD       1   /* find keep-case word */
 833 #define FIND_PREFIX         2   /* find word after prefix */
 834 #define FIND_COMPOUND       3   /* find case-folded compound word */
 835 #define FIND_KEEPCOMPOUND   4   /* find keep-case compound word */
 836
 837 static slang_T *slang_alloc __ARGS((char_u *lang));
 838 static void slang_free __ARGS((slang_T *lp));
 839 static void slang_clear __ARGS((slang_T *lp));
 840 static void slang_clear_sug __ARGS((slang_T *lp));
 841 static void find_word __ARGS((matchinf_T *mip, int mode));
 842 static int can_compound __ARGS((slang_T *slang, char_u *word, char_u *flags));
 843 static int valid_word_prefix __ARGS((int totprefcnt, int arridx, int flags, char_u *word, slang_T *slang, int cond_req));
 844 static void find_prefix __ARGS((matchinf_T *mip, int mode));
 845 static int fold_more __ARGS((matchinf_T *mip));
 846 static int spell_valid_case __ARGS((int wordflags, int treeflags));
 847 static int no_spell_checking __ARGS((win_T *wp));
 848 static void spell_load_lang __ARGS((char_u *lang));
 849 static char_u *spell_enc __ARGS((void));
 850 static void int_wordlist_spl __ARGS((char_u *fname));
 851 static void spell_load_cb __ARGS((char_u *fname, void *cookie));
 852 static slang_T *spell_load_file __ARGS((char_u *fname, char_u *lang, slang_T *old_lp, int silent));
 853 static int get2c __ARGS((FILE *fd));
 854 static int get3c __ARGS((FILE *fd));
 855 static int get4c __ARGS((FILE *fd));
 856 static time_t get8c __ARGS((FILE *fd));
 857 static char_u *read_cnt_string __ARGS((FILE *fd, int cnt_bytes, int *lenp));
 858 static char_u *read_string __ARGS((FILE *fd, int cnt));
 859 static int read_region_section __ARGS((FILE *fd, slang_T *slang, int len));
 860 static int read_charflags_section __ARGS((FILE *fd));
 861 static int read_prefcond_section __ARGS((FILE *fd, slang_T *lp));
 862 static int read_rep_section __ARGS((FILE *fd, garray_T *gap, short *first));
 863 static int read_sal_section __ARGS((FILE *fd, slang_T *slang));
 864 static int read_words_section __ARGS((FILE *fd, slang_T *lp, int len));
 865 static void count_common_word __ARGS((slang_T *lp, char_u *word, int len, int count));
 866 static int score_wordcount_adj __ARGS((slang_T *slang, int score, char_u *word, int split));
 867 static int read_sofo_section __ARGS((FILE *fd, slang_T *slang));
 868 static int read_compound __ARGS((FILE *fd, slang_T *slang, int len));
 869 static int byte_in_str __ARGS((char_u *str, int byte));
 870 static int init_syl_tab __ARGS((slang_T *slang));
 871 static int count_syllables __ARGS((slang_T *slang, char_u *word));
 872 static int set_sofo __ARGS((slang_T *lp, char_u *from, char_u *to));
 873 static void set_sal_first __ARGS((slang_T *lp));
 874 #ifdef FEAT_MBYTE
 875 static int *mb_str2wide __ARGS((char_u *s));
 876 #endif
 877 static int spell_read_tree __ARGS((FILE *fd, char_u **bytsp, idx_T **idxsp, int prefixtree, int prefixcnt));
 878 static idx_T read_tree_node __ARGS((FILE *fd, char_u *byts, idx_T *idxs, int maxidx, int startidx, int prefixtree, int maxprefcondnr));
 879 static void clear_midword __ARGS((buf_T *buf));
 880 static void use_midword __ARGS((slang_T *lp, buf_T *buf));
 881 static int find_region __ARGS((char_u *rp, char_u *region));
 882 static int captype __ARGS((char_u *word, char_u *end));
 883 static int badword_captype __ARGS((char_u *word, char_u *end));
 884 static void spell_reload_one __ARGS((char_u *fname, int added_word));
 885 static void set_spell_charflags __ARGS((char_u *flags, int cnt, char_u *upp));
 886 static int set_spell_chartab __ARGS((char_u *fol, char_u *low, char_u *upp));
 887 static int spell_casefold __ARGS((char_u *p, int len, char_u *buf, int buflen));
 888 static int check_need_cap __ARGS((linenr_T lnum, colnr_T col));
 889 static void spell_find_suggest __ARGS((char_u *badptr, int badlen, suginfo_T *su, int maxcount, int banbadword, int need_cap, int interactive));
 890 #ifdef FEAT_EVAL
 891 static void spell_suggest_expr __ARGS((suginfo_T *su, char_u *expr));
 892 #endif
 893 static void spell_suggest_file __ARGS((suginfo_T *su, char_u *fname));
 894 static void spell_suggest_intern __ARGS((suginfo_T *su, int interactive));
 895 static void suggest_load_files __ARGS((void));
 896 static void tree_count_words __ARGS((char_u *byts, idx_T *idxs));
 897 static void spell_find_cleanup __ARGS((suginfo_T *su));
 898 static void onecap_copy __ARGS((char_u *word, char_u *wcopy, int upper));
 899 static void allcap_copy __ARGS((char_u *word, char_u *wcopy));
 900 static void suggest_try_special __ARGS((suginfo_T *su));
 901 static void suggest_try_change __ARGS((suginfo_T *su));
 902 static void suggest_trie_walk __ARGS((suginfo_T *su, langp_T *lp, char_u *fword, int soundfold));
 903 static void go_deeper __ARGS((trystate_T *stack, int depth, int score_add));
 904 #ifdef FEAT_MBYTE
 905 static int nofold_len __ARGS((char_u *fword, int flen, char_u *word));
 906 #endif
 907 static void find_keepcap_word __ARGS((slang_T *slang, char_u *fword, char_u *kword));
 908 static void score_comp_sal __ARGS((suginfo_T *su));
 909 static void score_combine __ARGS((suginfo_T *su));
 910 static int stp_sal_score __ARGS((suggest_T *stp, suginfo_T *su, slang_T *slang, char_u *badsound));
 911 static void suggest_try_soundalike_prep __ARGS((void));
 912 static void suggest_try_soundalike __ARGS((suginfo_T *su));
 913 static void suggest_try_soundalike_finish __ARGS((void));
 914 static void add_sound_suggest __ARGS((suginfo_T *su, char_u *goodword, int score, langp_T *lp));
 915 static int soundfold_find __ARGS((slang_T *slang, char_u *word));
 916 static void make_case_word __ARGS((char_u *fword, char_u *cword, int flags));
 917 static void set_map_str __ARGS((slang_T *lp, char_u *map));
 918 static int similar_chars __ARGS((slang_T *slang, int c1, int c2));
 919 static void add_suggestion __ARGS((suginfo_T *su, garray_T *gap, char_u *goodword, int badlen, int score, int altscore, int had_bonus, slang_T *slang, int maxsf));
 920 static void check_suggestions __ARGS((suginfo_T *su, garray_T *gap));
 921 static void add_banned __ARGS((suginfo_T *su, char_u *word));
 922 static void rescore_suggestions __ARGS((suginfo_T *su));
 923 static void rescore_one __ARGS((suginfo_T *su, suggest_T *stp));
 924 static int cleanup_suggestions __ARGS((garray_T *gap, int maxscore, int keep));
 925 static void spell_soundfold __ARGS((slang_T *slang, char_u *inword, int folded, char_u *res));
 926 static void spell_soundfold_sofo __ARGS((slang_T *slang, char_u *inword, char_u *res));
 927 static void spell_soundfold_sal __ARGS((slang_T *slang, char_u *inword, char_u *res));
 928 #ifdef FEAT_MBYTE
 929 static void spell_soundfold_wsal __ARGS((slang_T *slang, char_u *inword, char_u *res));
 930 #endif
 931 static int soundalike_score __ARGS((char_u *goodsound, char_u *badsound));
 932 static int spell_edit_score __ARGS((slang_T *slang, char_u *badword, char_u *goodword));
 933 static int spell_edit_score_limit __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit));
 934 #ifdef FEAT_MBYTE
 935 static int spell_edit_score_limit_w __ARGS((slang_T *slang, char_u *badword, char_u *goodword, int limit));
 936 #endif
 937 static void dump_word __ARGS((slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T lnum));
 938 static linenr_T dump_prefixes __ARGS((slang_T *slang, char_u *word, char_u *pat, int *dir, int round, int flags, linenr_T startlnum));
 939 static buf_T *open_spellbuf __ARGS((void));
 940 static void close_spellbuf __ARGS((buf_T *buf));
 941
 942 /*
 943  * Use our own character-case definitions, because the current locale may
 944  * differ from what the .spl file uses.
 945  * These must not be called with negative number!
 946  */
 947 #ifndef FEAT_MBYTE
 948 /* Non-multi-byte implementation. */
 949 # define SPELL_TOFOLD(c) ((c) < 256 ? spelltab.st_fold[c] : (c))
 950 # define SPELL_TOUPPER(c) ((c) < 256 ? spelltab.st_upper[c] : (c))
 951 # define SPELL_ISUPPER(c) ((c) < 256 ? spelltab.st_isu[c] : FALSE)
 952 #else
 953 # if defined(HAVE_WCHAR_H)
 954 #  include <wchar.h>        /* for towupper() and towlower() */
 955 # endif
 956 /* Multi-byte implementation.  For Unicode we can call utf_*(), but don't do
 957  * that for ASCII, because we don't want to use 'casemap' here.  Otherwise use
 958  * the "w" library function for characters above 255 if available. */
 959 # ifdef HAVE_TOWLOWER
 960 #  define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
 961             : (c) < 256 ? spelltab.st_fold[c] : towlower(c))
 962 # else
 963 #  define SPELL_TOFOLD(c) (enc_utf8 && (c) >= 128 ? utf_fold(c) \
 964             : (c) < 256 ? spelltab.st_fold[c] : (c))
 965 # endif
 966
 967 # ifdef HAVE_TOWUPPER
 968 #  define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
 969             : (c) < 256 ? spelltab.st_upper[c] : towupper(c))
 970 # else
 971 #  define SPELL_TOUPPER(c) (enc_utf8 && (c) >= 128 ? utf_toupper(c) \
 972             : (c) < 256 ? spelltab.st_upper[c] : (c))
 973 # endif
 974
 975 # ifdef HAVE_ISWUPPER
 976 #  define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
 977             : (c) < 256 ? spelltab.st_isu[c] : iswupper(c))
 978 # else
 979 #  define SPELL_ISUPPER(c) (enc_utf8 && (c) >= 128 ? utf_isupper(c) \
 980             : (c) < 256 ? spelltab.st_isu[c] : (FALSE))
 981 # endif
 982 #endif
 983
 984
 985 static char *e_format = N_("E759: Format error in spell file");
 986 static char *e_spell_trunc = N_("E758: Truncated spell file");
 987 static char *e_afftrailing = N_("Trailing text in %s line %d: %s");
 988 static char *e_affname = N_("Affix name too long in %s line %d: %s");
 989 static char *e_affform = N_("E761: Format error in affix file FOL, LOW or UPP");
 990 static char *e_affrange = N_("E762: Character in FOL, LOW or UPP is out of range");
 991 static char *msg_compressing = N_("Compressing word tree...");
 992
 993 /* Remember what "z?" replaced. */
 994 static char_u   *repl_from = NULL;
 995 static char_u   *repl_to = NULL;
 996
 997 /*
 998  * Main spell-checking function.
 999  * "ptr" points to a character that could be the start of a word.
1000  * "*attrp" is set to the highlight index for a badly spelled word.  For a
1001  * non-word or when it's OK it remains unchanged.
1002  * This must only be called when 'spelllang' is not empty.
1003  *
1004  * "capcol" is used to check for a Capitalised word after the end of a
1005  * sentence.  If it's zero then perform the check.  Return the column where to
1006  * check next, or -1 when no sentence end was found.  If it's NULL then don't
1007  * worry.
1008  *
1009  * Returns the length of the word in bytes, also when it's OK, so that the
1010  * caller can skip over the word.
1011  */
1012     int
1013 spell_check(wp, ptr, attrp, capcol, docount)
1014     win_T       *wp;            /* current window */
1015     char_u      *ptr;
1016     hlf_T       *attrp;
1017     int         *capcol;        /* column to check for Capital */
1018     int         docount;        /* count good words */
1019 {
1020     matchinf_T  mi;             /* Most things are put in "mi" so that it can
1021                                    be passed to functions quickly. */
1022     int         nrlen = 0;      /* found a number first */
1023     int         c;
1024     int         wrongcaplen = 0;
1025     int         lpi;
1026     int         count_word = docount;
1027
1028     /* A word never starts at a space or a control character.  Return quickly
1029      * then, skipping over the character. */
1030     if (*ptr <= ' ')
1031         return 1;
1032
1033     /* Return here when loading language files failed. */
1034     if (wp->w_buffer->b_langp.ga_len == 0)
1035         return 1;
1036
1037     vim_memset(&mi, 0, sizeof(matchinf_T));
1038
1039     /* A number is always OK.  Also skip hexadecimal numbers 0xFF99 and
1040      * 0X99FF.  But always do check spelling to find "3GPP" and "11
1041      * julifeest". */
1042     if (*ptr >= '0' && *ptr <= '9')
1043     {
1044         if (*ptr == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
1045             mi.mi_end = skiphex(ptr + 2);
1046         else
1047             mi.mi_end = skipdigits(ptr);
1048         nrlen = (int)(mi.mi_end - ptr);
1049     }
1050
1051     /* Find the normal end of the word (until the next non-word character). */
1052     mi.mi_word = ptr;
1053     mi.mi_fend = ptr;
1054     if (spell_iswordp(mi.mi_fend, wp->w_buffer))
1055     {
1056         do
1057         {
1058             mb_ptr_adv(mi.mi_fend);
1059         } while (*mi.mi_fend != NUL && spell_iswordp(mi.mi_fend, wp->w_buffer));
1060
1061         if (capcol != NULL && *capcol == 0 && wp->w_buffer->b_cap_prog != NULL)
1062         {
1063             /* Check word starting with capital letter. */
1064             c = PTR2CHAR(ptr);
1065             if (!SPELL_ISUPPER(c))
1066                 wrongcaplen = (int)(mi.mi_fend - ptr);
1067         }
1068     }
1069     if (capcol != NULL)
1070         *capcol = -1;
1071
1072     /* We always use the characters up to the next non-word character,
1073      * also for bad words. */
1074     mi.mi_end = mi.mi_fend;
1075
1076     /* Check caps type later. */
1077     mi.mi_buf = wp->w_buffer;
1078
1079     /* case-fold the word with one non-word character, so that we can check
1080      * for the word end. */
1081     if (*mi.mi_fend != NUL)
1082         mb_ptr_adv(mi.mi_fend);
1083
1084     (void)spell_casefold(ptr, (int)(mi.mi_fend - ptr), mi.mi_fword,
1085                                                              MAXWLEN + 1);
1086     mi.mi_fwordlen = (int)STRLEN(mi.mi_fword);
1087
1088     /* The word is bad unless we recognize it. */
1089     mi.mi_result = SP_BAD;
1090     mi.mi_result2 = SP_BAD;
1091
1092     /*
1093      * Loop over the languages specified in 'spelllang'.
1094      * We check them all, because a word may be matched longer in another
1095      * language.
1096      */
1097     for (lpi = 0; lpi < wp->w_buffer->b_langp.ga_len; ++lpi)
1098     {
1099         mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, lpi);
1100
1101         /* If reloading fails the language is still in the list but everything
1102          * has been cleared. */
1103         if (mi.mi_lp->lp_slang->sl_fidxs == NULL)
1104             continue;
1105
1106         /* Check for a matching word in case-folded words. */
1107         find_word(&mi, FIND_FOLDWORD);
1108
1109         /* Check for a matching word in keep-case words. */
1110         find_word(&mi, FIND_KEEPWORD);
1111
1112         /* Check for matching prefixes. */
1113         find_prefix(&mi, FIND_FOLDWORD);
1114
1115         /* For a NOBREAK language, may want to use a word without a following
1116          * word as a backup. */
1117         if (mi.mi_lp->lp_slang->sl_nobreak && mi.mi_result == SP_BAD
1118                                                    && mi.mi_result2 != SP_BAD)
1119         {
1120             mi.mi_result = mi.mi_result2;
1121             mi.mi_end = mi.mi_end2;
1122         }
1123
1124         /* Count the word in the first language where it's found to be OK. */
1125         if (count_word && mi.mi_result == SP_OK)
1126         {
1127             count_common_word(mi.mi_lp->lp_slang, ptr,
1128                                                    (int)(mi.mi_end - ptr), 1);
1129             count_word = FALSE;
1130         }
1131     }
1132
1133     if (mi.mi_result != SP_OK)
1134     {
1135         /* If we found a number skip over it.  Allows for "42nd".  Do flag
1136          * rare and local words, e.g., "3GPP". */
1137         if (nrlen > 0)
1138         {
1139             if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
1140                 return nrlen;
1141         }
1142
1143         /* When we are at a non-word character there is no error, just
1144          * skip over the character (try looking for a word after it). */
1145         else if (!spell_iswordp_nmw(ptr))
1146         {
1147             if (capcol != NULL && wp->w_buffer->b_cap_prog != NULL)
1148             {
1149                 regmatch_T      regmatch;
1150
1151                 /* Check for end of sentence. */
1152                 regmatch.regprog = wp->w_buffer->b_cap_prog;
1153                 regmatch.rm_ic = FALSE;
1154                 if (vim_regexec(&regmatch, ptr, 0))
1155                     *capcol = (int)(regmatch.endp[0] - ptr);
1156             }
1157
1158 #ifdef FEAT_MBYTE
1159             if (has_mbyte)
1160                 return (*mb_ptr2len)(ptr);
1161 #endif
1162             return 1;
1163         }
1164         else if (mi.mi_end == ptr)
1165             /* Always include at least one character.  Required for when there
1166              * is a mixup in "midword". */
1167             mb_ptr_adv(mi.mi_end);
1168         else if (mi.mi_result == SP_BAD
1169                 && LANGP_ENTRY(wp->w_buffer->b_langp, 0)->lp_slang->sl_nobreak)
1170         {
1171             char_u      *p, *fp;
1172             int         save_result = mi.mi_result;
1173
1174             /* First language in 'spelllang' is NOBREAK.  Find first position
1175              * at which any word would be valid. */
1176             mi.mi_lp = LANGP_ENTRY(wp->w_buffer->b_langp, 0);
1177             if (mi.mi_lp->lp_slang->sl_fidxs != NULL)
1178             {
1179                 p = mi.mi_word;
1180                 fp = mi.mi_fword;
1181                 for (;;)
1182                 {
1183                     mb_ptr_adv(p);
1184                     mb_ptr_adv(fp);
1185                     if (p >= mi.mi_end)
1186                         break;
1187                     mi.mi_compoff = (int)(fp - mi.mi_fword);
1188                     find_word(&mi, FIND_COMPOUND);
1189                     if (mi.mi_result != SP_BAD)
1190                     {
1191                         mi.mi_end = p;
1192                         break;
1193                     }
1194                 }
1195                 mi.mi_result = save_result;
1196             }
1197         }
1198
1199         if (mi.mi_result == SP_BAD || mi.mi_result == SP_BANNED)
1200             *attrp = HLF_SPB;
1201         else if (mi.mi_result == SP_RARE)
1202             *attrp = HLF_SPR;
1203         else
1204             *attrp = HLF_SPL;
1205     }
1206
1207     if (wrongcaplen > 0 && (mi.mi_result == SP_OK || mi.mi_result == SP_RARE))
1208     {
1209         /* Report SpellCap only when the word isn't badly spelled. */
1210         *attrp = HLF_SPC;
1211         return wrongcaplen;
1212     }
1213
1214     return (int)(mi.mi_end - ptr);
1215 }
1216
1217 /*
1218  * Check if the word at "mip->mi_word" is in the tree.
1219  * When "mode" is FIND_FOLDWORD check in fold-case word tree.
1220  * When "mode" is FIND_KEEPWORD check in keep-case word tree.
1221  * When "mode" is FIND_PREFIX check for word after prefix in fold-case word
1222  * tree.
1223  *
1224  * For a match mip->mi_result is updated.
1225  */
1226     static void
1227 find_word(mip, mode)
1228     matchinf_T  *mip;
1229     int         mode;
1230 {
1231     idx_T       arridx = 0;
1232     int         endlen[MAXWLEN];    /* length at possible word endings */
1233     idx_T       endidx[MAXWLEN];    /* possible word endings */
1234     int         endidxcnt = 0;
1235     int         len;
1236     int         wlen = 0;
1237     int         flen;
1238     int         c;
1239     char_u      *ptr;
1240     idx_T       lo, hi, m;
1241 #ifdef FEAT_MBYTE
1242     char_u      *s;
1243 #endif
1244     char_u      *p;
1245     int         res = SP_BAD;
1246     slang_T     *slang = mip->mi_lp->lp_slang;
1247     unsigned    flags;
1248     char_u      *byts;
1249     idx_T       *idxs;
1250     int         word_ends;
1251     int         prefix_found;
1252     int         nobreak_result;
1253
1254     if (mode == FIND_KEEPWORD || mode == FIND_KEEPCOMPOUND)
1255     {
1256         /* Check for word with matching case in keep-case tree. */
1257         ptr = mip->mi_word;
1258         flen = 9999;                /* no case folding, always enough bytes */
1259         byts = slang->sl_kbyts;
1260         idxs = slang->sl_kidxs;
1261
1262         if (mode == FIND_KEEPCOMPOUND)
1263             /* Skip over the previously found word(s). */
1264             wlen += mip->mi_compoff;
1265     }
1266     else
1267     {
1268         /* Check for case-folded in case-folded tree. */
1269         ptr = mip->mi_fword;
1270         flen = mip->mi_fwordlen;    /* available case-folded bytes */
1271         byts = slang->sl_fbyts;
1272         idxs = slang->sl_fidxs;
1273
1274         if (mode == FIND_PREFIX)
1275         {
1276             /* Skip over the prefix. */
1277             wlen = mip->mi_prefixlen;
1278             flen -= mip->mi_prefixlen;
1279         }
1280         else if (mode == FIND_COMPOUND)
1281         {
1282             /* Skip over the previously found word(s). */
1283             wlen = mip->mi_compoff;
1284             flen -= mip->mi_compoff;
1285         }
1286
1287     }
1288
1289     if (byts == NULL)
1290         return;                 /* array is empty */
1291
1292     /*
1293      * Repeat advancing in the tree until:
1294      * - there is a byte that doesn't match,
1295      * - we reach the end of the tree,
1296      * - or we reach the end of the line.
1297      */
1298     for (;;)
1299     {
1300         if (flen <= 0 && *mip->mi_fend != NUL)
1301             flen = fold_more(mip);
1302
1303         len = byts[arridx++];
1304
1305         /* If the first possible byte is a zero the word could end here.
1306          * Remember this index, we first check for the longest word. */
1307         if (byts[arridx] == 0)
1308         {
1309             if (endidxcnt == MAXWLEN)
1310             {
1311                 /* Must be a corrupted spell file. */
1312                 EMSG(_(e_format));
1313                 return;
1314             }
1315             endlen[endidxcnt] = wlen;
1316             endidx[endidxcnt++] = arridx++;
1317             --len;
1318
1319             /* Skip over the zeros, there can be several flag/region
1320              * combinations. */
1321             while (len > 0 && byts[arridx] == 0)
1322             {
1323                 ++arridx;
1324                 --len;
1325             }
1326             if (len == 0)
1327                 break;      /* no children, word must end here */
1328         }
1329
1330         /* Stop looking at end of the line. */
1331         if (ptr[wlen] == NUL)
1332             break;
1333
1334         /* Perform a binary search in the list of accepted bytes. */
1335         c = ptr[wlen];
1336         if (c == TAB)       /* <Tab> is handled like <Space> */
1337             c = ' ';
1338         lo = arridx;
1339         hi = arridx + len - 1;
1340         while (lo < hi)
1341         {
1342             m = (lo + hi) / 2;
1343             if (byts[m] > c)
1344                 hi = m - 1;
1345             else if (byts[m] < c)
1346                 lo = m + 1;
1347             else
1348             {
1349                 lo = hi = m;
1350                 break;
1351             }
1352         }
1353
1354         /* Stop if there is no matching byte. */
1355         if (hi < lo || byts[lo] != c)
1356             break;
1357
1358         /* Continue at the child (if there is one). */
1359         arridx = idxs[lo];
1360         ++wlen;
1361         --flen;
1362
1363         /* One space in the good word may stand for several spaces in the
1364          * checked word. */
1365         if (c == ' ')
1366         {
1367             for (;;)
1368             {
1369                 if (flen <= 0 && *mip->mi_fend != NUL)
1370                     flen = fold_more(mip);
1371                 if (ptr[wlen] != ' ' && ptr[wlen] != TAB)
1372                     break;
1373                 ++wlen;
1374                 --flen;
1375             }
1376         }
1377     }
1378
1379     /*
1380      * Verify that one of the possible endings is valid.  Try the longest
1381      * first.
1382      */
1383     while (endidxcnt > 0)
1384     {
1385         --endidxcnt;
1386         arridx = endidx[endidxcnt];
1387         wlen = endlen[endidxcnt];
1388
1389 #ifdef FEAT_MBYTE
1390         if ((*mb_head_off)(ptr, ptr + wlen) > 0)
1391             continue;       /* not at first byte of character */
1392 #endif
1393         if (spell_iswordp(ptr + wlen, mip->mi_buf))
1394         {
1395             if (slang->sl_compprog == NULL && !slang->sl_nobreak)
1396                 continue;           /* next char is a word character */
1397             word_ends = FALSE;
1398         }
1399         else
1400             word_ends = TRUE;
1401         /* The prefix flag is before compound flags.  Once a valid prefix flag
1402          * has been found we try compound flags. */
1403         prefix_found = FALSE;
1404
1405 #ifdef FEAT_MBYTE
1406         if (mode != FIND_KEEPWORD && has_mbyte)
1407         {
1408             /* Compute byte length in original word, length may change
1409              * when folding case.  This can be slow, take a shortcut when the
1410              * case-folded word is equal to the keep-case word. */
1411             p = mip->mi_word;
1412             if (STRNCMP(ptr, p, wlen) != 0)
1413             {
1414                 for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1415                     mb_ptr_adv(p);
1416                 wlen = (int)(p - mip->mi_word);
1417             }
1418         }
1419 #endif
1420
1421         /* Check flags and region.  For FIND_PREFIX check the condition and
1422          * prefix ID.
1423          * Repeat this if there are more flags/region alternatives until there
1424          * is a match. */
1425         res = SP_BAD;
1426         for (len = byts[arridx - 1]; len > 0 && byts[arridx] == 0;
1427                                                               --len, ++arridx)
1428         {
1429             flags = idxs[arridx];
1430
1431             /* For the fold-case tree check that the case of the checked word
1432              * matches with what the word in the tree requires.
1433              * For keep-case tree the case is always right.  For prefixes we
1434              * don't bother to check. */
1435             if (mode == FIND_FOLDWORD)
1436             {
1437                 if (mip->mi_cend != mip->mi_word + wlen)
1438                 {
1439                     /* mi_capflags was set for a different word length, need
1440                      * to do it again. */
1441                     mip->mi_cend = mip->mi_word + wlen;
1442                     mip->mi_capflags = captype(mip->mi_word, mip->mi_cend);
1443                 }
1444
1445                 if (mip->mi_capflags == WF_KEEPCAP
1446                                 || !spell_valid_case(mip->mi_capflags, flags))
1447                     continue;
1448             }
1449
1450             /* When mode is FIND_PREFIX the word must support the prefix:
1451              * check the prefix ID and the condition.  Do that for the list at
1452              * mip->mi_prefarridx that find_prefix() filled. */
1453             else if (mode == FIND_PREFIX && !prefix_found)
1454             {
1455                 c = valid_word_prefix(mip->mi_prefcnt, mip->mi_prefarridx,
1456                                     flags,
1457                                     mip->mi_word + mip->mi_cprefixlen, slang,
1458                                     FALSE);
1459                 if (c == 0)
1460                     continue;
1461
1462                 /* Use the WF_RARE flag for a rare prefix. */
1463                 if (c & WF_RAREPFX)
1464                     flags |= WF_RARE;
1465                 prefix_found = TRUE;
1466             }
1467
1468             if (slang->sl_nobreak)
1469             {
1470                 if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND)
1471                         && (flags & WF_BANNED) == 0)
1472                 {
1473                     /* NOBREAK: found a valid following word.  That's all we
1474                      * need to know, so return. */
1475                     mip->mi_result = SP_OK;
1476                     break;
1477                 }
1478             }
1479
1480             else if ((mode == FIND_COMPOUND || mode == FIND_KEEPCOMPOUND
1481                                                                 || !word_ends))
1482             {
1483                 /* If there is no compound flag or the word is shorter than
1484                  * COMPOUNDMIN reject it quickly.
1485                  * Makes you wonder why someone puts a compound flag on a word
1486                  * that's too short...  Myspell compatibility requires this
1487                  * anyway. */
1488                 if (((unsigned)flags >> 24) == 0
1489                              || wlen - mip->mi_compoff < slang->sl_compminlen)
1490                     continue;
1491 #ifdef FEAT_MBYTE
1492                 /* For multi-byte chars check character length against
1493                  * COMPOUNDMIN. */
1494                 if (has_mbyte
1495                         && slang->sl_compminlen > 0
1496                         && mb_charlen_len(mip->mi_word + mip->mi_compoff,
1497                                 wlen - mip->mi_compoff) < slang->sl_compminlen)
1498                         continue;
1499 #endif
1500
1501                 /* Limit the number of compound words to COMPOUNDWORDMAX if no
1502                  * maximum for syllables is specified. */
1503                 if (!word_ends && mip->mi_complen + mip->mi_compextra + 2
1504                                                            > slang->sl_compmax
1505                                            && slang->sl_compsylmax == MAXWLEN)
1506                     continue;
1507
1508                 /* Don't allow compounding on a side where an affix was added,
1509                  * unless COMPOUNDPERMITFLAG was used. */
1510                 if (mip->mi_complen > 0 && (flags & WF_NOCOMPBEF))
1511                     continue;
1512                 if (!word_ends && (flags & WF_NOCOMPAFT))
1513                     continue;
1514
1515                 /* Quickly check if compounding is possible with this flag. */
1516                 if (!byte_in_str(mip->mi_complen == 0
1517                                         ? slang->sl_compstartflags
1518                                         : slang->sl_compallflags,
1519                                             ((unsigned)flags >> 24)))
1520                     continue;
1521
1522                 if (mode == FIND_COMPOUND)
1523                 {
1524                     int     capflags;
1525
1526                     /* Need to check the caps type of the appended compound
1527                      * word. */
1528 #ifdef FEAT_MBYTE
1529                     if (has_mbyte && STRNCMP(ptr, mip->mi_word,
1530                                                         mip->mi_compoff) != 0)
1531                     {
1532                         /* case folding may have changed the length */
1533                         p = mip->mi_word;
1534                         for (s = ptr; s < ptr + mip->mi_compoff; mb_ptr_adv(s))
1535                             mb_ptr_adv(p);
1536                     }
1537                     else
1538 #endif
1539                         p = mip->mi_word + mip->mi_compoff;
1540                     capflags = captype(p, mip->mi_word + wlen);
1541                     if (capflags == WF_KEEPCAP || (capflags == WF_ALLCAP
1542                                                  && (flags & WF_FIXCAP) != 0))
1543                         continue;
1544
1545                     if (capflags != WF_ALLCAP)
1546                     {
1547                         /* When the character before the word is a word
1548                          * character we do not accept a Onecap word.  We do
1549                          * accept a no-caps word, even when the dictionary
1550                          * word specifies ONECAP. */
1551                         mb_ptr_back(mip->mi_word, p);
1552                         if (spell_iswordp_nmw(p)
1553                                 ? capflags == WF_ONECAP
1554                                 : (flags & WF_ONECAP) != 0
1555                                                      && capflags != WF_ONECAP)
1556                             continue;
1557                     }
1558                 }
1559
1560                 /* If the word ends the sequence of compound flags of the
1561                  * words must match with one of the COMPOUNDRULE items and
1562                  * the number of syllables must not be too large. */
1563                 mip->mi_compflags[mip->mi_complen] = ((unsigned)flags >> 24);
1564                 mip->mi_compflags[mip->mi_complen + 1] = NUL;
1565                 if (word_ends)
1566                 {
1567                     char_u      fword[MAXWLEN];
1568
1569                     if (slang->sl_compsylmax < MAXWLEN)
1570                     {
1571                         /* "fword" is only needed for checking syllables. */
1572                         if (ptr == mip->mi_word)
1573                             (void)spell_casefold(ptr, wlen, fword, MAXWLEN);
1574                         else
1575                             vim_strncpy(fword, ptr, endlen[endidxcnt]);
1576                     }
1577                     if (!can_compound(slang, fword, mip->mi_compflags))
1578                         continue;
1579                 }
1580             }
1581
1582             /* Check NEEDCOMPOUND: can't use word without compounding. */
1583             else if (flags & WF_NEEDCOMP)
1584                 continue;
1585
1586             nobreak_result = SP_OK;
1587
1588             if (!word_ends)
1589             {
1590                 int     save_result = mip->mi_result;
1591                 char_u  *save_end = mip->mi_end;
1592                 langp_T *save_lp = mip->mi_lp;
1593                 int     lpi;
1594
1595                 /* Check that a valid word follows.  If there is one and we
1596                  * are compounding, it will set "mi_result", thus we are
1597                  * always finished here.  For NOBREAK we only check that a
1598                  * valid word follows.
1599                  * Recursive! */
1600                 if (slang->sl_nobreak)
1601                     mip->mi_result = SP_BAD;
1602
1603                 /* Find following word in case-folded tree. */
1604                 mip->mi_compoff = endlen[endidxcnt];
1605 #ifdef FEAT_MBYTE
1606                 if (has_mbyte && mode == FIND_KEEPWORD)
1607                 {
1608                     /* Compute byte length in case-folded word from "wlen":
1609                      * byte length in keep-case word.  Length may change when
1610                      * folding case.  This can be slow, take a shortcut when
1611                      * the case-folded word is equal to the keep-case word. */
1612                     p = mip->mi_fword;
1613                     if (STRNCMP(ptr, p, wlen) != 0)
1614                     {
1615                         for (s = ptr; s < ptr + wlen; mb_ptr_adv(s))
1616                             mb_ptr_adv(p);
1617                         mip->mi_compoff = (int)(p - mip->mi_fword);
1618                     }
1619                 }
1620 #endif
1621                 c = mip->mi_compoff;
1622                 ++mip->mi_complen;
1623                 if (flags & WF_COMPROOT)
1624                     ++mip->mi_compextra;
1625
1626                 /* For NOBREAK we need to try all NOBREAK languages, at least
1627                  * to find the ".add" file(s). */
1628                 for (lpi = 0; lpi < mip->mi_buf->b_langp.ga_len; ++lpi)
1629                 {
1630                     if (slang->sl_nobreak)
1631                     {
1632                         mip->mi_lp = LANGP_ENTRY(mip->mi_buf->b_langp, lpi);
1633                         if (mip->mi_lp->lp_slang->sl_fidxs == NULL
1634                                          || !mip->mi_lp->lp_slang->sl_nobreak)
1635                             continue;
1636                     }
1637
1638                     find_word(mip, FIND_COMPOUND);
1639
1640                     /* When NOBREAK any word that matches is OK.  Otherwise we
1641                      * need to find the longest match, thus try with keep-case
1642                      * and prefix too. */
1643                     if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1644                     {
1645                         /* Find following word in keep-case tree. */
1646                         mip->mi_compoff = wlen;
1647                         find_word(mip, FIND_KEEPCOMPOUND);
1648
1649 #if 0       /* Disabled, a prefix must not appear halfway a compound word,
1650                unless the COMPOUNDPERMITFLAG is used and then it can't be a
1651                postponed prefix. */
1652                         if (!slang->sl_nobreak || mip->mi_result == SP_BAD)
1653                         {
1654                             /* Check for following word with prefix. */
1655                             mip->mi_compoff = c;
1656                             find_prefix(mip, FIND_COMPOUND);
1657                         }
1658 #endif
1659                     }
1660
1661                     if (!slang->sl_nobreak)
1662                         break;
1663                 }
1664                 --mip->mi_complen;
1665                 if (flags & WF_COMPROOT)
1666                     --mip->mi_compextra;
1667                 mip->mi_lp = save_lp;
1668
1669                 if (slang->sl_nobreak)
1670                 {
1671                     nobreak_result = mip->mi_result;
1672                     mip->mi_result = save_result;
1673                     mip->mi_end = save_end;
1674                 }
1675                 else
1676                 {
1677                     if (mip->mi_result == SP_OK)
1678                         break;
1679                     continue;
1680                 }
1681             }
1682
1683             if (flags & WF_BANNED)
1684                 res = SP_BANNED;
1685             else if (flags & WF_REGION)
1686             {
1687                 /* Check region. */
1688                 if ((mip->mi_lp->lp_region & (flags >> 16)) != 0)
1689                     res = SP_OK;
1690                 else
1691                     res = SP_LOCAL;
1692             }
1693             else if (flags & WF_RARE)
1694                 res = SP_RARE;
1695             else
1696                 res = SP_OK;
1697
1698             /* Always use the longest match and the best result.  For NOBREAK
1699              * we separately keep the longest match without a following good
1700              * word as a fall-back. */
1701             if (nobreak_result == SP_BAD)
1702             {
1703                 if (mip->mi_result2 > res)
1704                 {
1705                     mip->mi_result2 = res;
1706                     mip->mi_end2 = mip->mi_word + wlen;
1707                 }
1708                 else if (mip->mi_result2 == res
1709                                         && mip->mi_end2 < mip->mi_word + wlen)
1710                     mip->mi_end2 = mip->mi_word + wlen;
1711             }
1712             else if (mip->mi_result > res)
1713             {
1714                 mip->mi_result = res;
1715                 mip->mi_end = mip->mi_word + wlen;
1716             }
1717             else if (mip->mi_result == res && mip->mi_end < mip->mi_word + wlen)
1718                 mip->mi_end = mip->mi_word + wlen;
1719
1720             if (mip->mi_result == SP_OK)
1721                 break;
1722         }
1723
1724         if (mip->mi_result == SP_OK)
1725             break;
1726     }
1727 }
1728
1729 /*
1730  * Return TRUE if "flags" is a valid sequence of compound flags and "word"
1731  * does not have too many syllables.
1732  */
1733     static int
1734 can_compound(slang, word, flags)
1735     slang_T     *slang;
1736     char_u      *word;
1737     char_u      *flags;
1738 {
1739     regmatch_T  regmatch;
1740 #ifdef FEAT_MBYTE
1741     char_u      uflags[MAXWLEN * 2];
1742     int         i;
1743 #endif
1744     char_u      *p;
1745
1746     if (slang->sl_compprog == NULL)
1747         return FALSE;
1748 #ifdef FEAT_MBYTE
1749     if (enc_utf8)
1750     {
1751         /* Need to convert the single byte flags to utf8 characters. */
1752         p = uflags;
1753         for (i = 0; flags[i] != NUL; ++i)
1754             p += mb_char2bytes(flags[i], p);
1755         *p = NUL;
1756         p = uflags;
1757     }
1758     else
1759 #endif
1760         p = flags;
1761     regmatch.regprog = slang->sl_compprog;
1762     regmatch.rm_ic = FALSE;
1763     if (!vim_regexec(&regmatch, p, 0))
1764         return FALSE;
1765
1766     /* Count the number of syllables.  This may be slow, do it last.  If there
1767      * are too many syllables AND the number of compound words is above
1768      * COMPOUNDWORDMAX then compounding is not allowed. */
1769     if (slang->sl_compsylmax < MAXWLEN
1770                        && count_syllables(slang, word) > slang->sl_compsylmax)
1771         return (int)STRLEN(flags) < slang->sl_compmax;
1772     return TRUE;
1773 }
1774
1775 /*
1776  * Return non-zero if the prefix indicated by "arridx" matches with the prefix
1777  * ID in "flags" for the word "word".
1778  * The WF_RAREPFX flag is included in the return value for a rare prefix.
1779  */
1780     static int
1781 valid_word_prefix(totprefcnt, arridx, flags, word, slang, cond_req)
1782     int         totprefcnt;     /* nr of prefix IDs */
1783     int         arridx;         /* idx in sl_pidxs[] */
1784     int         flags;
1785     char_u      *word;
1786     slang_T     *slang;
1787     int         cond_req;       /* only use prefixes with a condition */
1788 {
1789     int         prefcnt;
1790     int         pidx;
1791     regprog_T   *rp;
1792     regmatch_T  regmatch;
1793     int         prefid;
1794
1795     prefid = (unsigned)flags >> 24;
1796     for (prefcnt = totprefcnt - 1; prefcnt >= 0; --prefcnt)
1797     {
1798         pidx = slang->sl_pidxs[arridx + prefcnt];
1799
1800         /* Check the prefix ID. */
1801         if (prefid != (pidx & 0xff))
1802             continue;
1803
1804         /* Check if the prefix doesn't combine and the word already has a
1805          * suffix. */
1806         if ((flags & WF_HAS_AFF) && (pidx & WF_PFX_NC))
1807             continue;
1808
1809         /* Check the condition, if there is one.  The condition index is
1810          * stored in the two bytes above the prefix ID byte.  */
1811         rp = slang->sl_prefprog[((unsigned)pidx >> 8) & 0xffff];
1812         if (rp != NULL)
1813         {
1814             regmatch.regprog = rp;
1815             regmatch.rm_ic = FALSE;
1816             if (!vim_regexec(&regmatch, word, 0))
1817                 continue;
1818         }
1819         else if (cond_req)
1820             continue;
1821
1822         /* It's a match!  Return the WF_ flags. */
1823         return pidx;
1824     }
1825     return 0;
1826 }
1827
1828 /*
1829  * Check if the word at "mip->mi_word" has a matching prefix.
1830  * If it does, then check the following word.
1831  *
1832  * If "mode" is "FIND_COMPOUND" then do the same after another word, find a
1833  * prefix in a compound word.
1834  *
1835  * For a match mip->mi_result is updated.
1836  */
1837     static void
1838 find_prefix(mip, mode)
1839     matchinf_T  *mip;
1840     int         mode;
1841 {
1842     idx_T       arridx = 0;
1843     int         len;
1844     int         wlen = 0;
1845     int         flen;
1846     int         c;
1847     char_u      *ptr;
1848     idx_T       lo, hi, m;
1849     slang_T     *slang = mip->mi_lp->lp_slang;
1850     char_u      *byts;
1851     idx_T       *idxs;
1852
1853     byts = slang->sl_pbyts;
1854     if (byts == NULL)
1855         return;                 /* array is empty */
1856
1857     /* We use the case-folded word here, since prefixes are always
1858      * case-folded. */
1859     ptr = mip->mi_fword;
1860     flen = mip->mi_fwordlen;    /* available case-folded bytes */
1861     if (mode == FIND_COMPOUND)
1862     {
1863         /* Skip over the previously found word(s). */
1864         ptr += mip->mi_compoff;
1865         flen -= mip->mi_compoff;
1866     }
1867     idxs = slang->sl_pidxs;
1868
1869     /*
1870      * Repeat advancing in the tree until:
1871      * - there is a byte that doesn't match,
1872      * - we reach the end of the tree,
1873      * - or we reach the end of the line.
1874      */
1875     for (;;)
1876     {
1877         if (flen == 0 && *mip->mi_fend != NUL)
1878             flen = fold_more(mip);
1879
1880         len = byts[arridx++];
1881
1882         /* If the first possible byte is a zero the prefix could end here.
1883          * Check if the following word matches and supports the prefix. */
1884         if (byts[arridx] == 0)
1885         {
1886             /* There can be several prefixes with different conditions.  We
1887              * try them all, since we don't know which one will give the
1888              * longest match.  The word is the same each time, pass the list
1889              * of possible prefixes to find_word(). */
1890             mip->mi_prefarridx = arridx;
1891             mip->mi_prefcnt = len;
1892             while (len > 0 && byts[arridx] == 0)
1893             {
1894                 ++arridx;
1895                 --len;
1896             }
1897             mip->mi_prefcnt -= len;
1898
1899             /* Find the word that comes after the prefix. */
1900             mip->mi_prefixlen = wlen;
1901             if (mode == FIND_COMPOUND)
1902                 /* Skip over the previously found word(s). */
1903                 mip->mi_prefixlen += mip->mi_compoff;
1904
1905 #ifdef FEAT_MBYTE
1906             if (has_mbyte)
1907             {
1908                 /* Case-folded length may differ from original length. */
1909                 mip->mi_cprefixlen = nofold_len(mip->mi_fword,
1910                                              mip->mi_prefixlen, mip->mi_word);
1911             }
1912             else
1913                 mip->mi_cprefixlen = mip->mi_prefixlen;
1914 #endif
1915             find_word(mip, FIND_PREFIX);
1916
1917
1918             if (len == 0)
1919                 break;      /* no children, word must end here */
1920         }
1921
1922         /* Stop looking at end of the line. */
1923         if (ptr[wlen] == NUL)
1924             break;
1925
1926         /* Perform a binary search in the list of accepted bytes. */
1927         c = ptr[wlen];
1928         lo = arridx;
1929         hi = arridx + len - 1;
1930         while (lo < hi)
1931         {
1932             m = (lo + hi) / 2;
1933             if (byts[m] > c)
1934                 hi = m - 1;
1935             else if (byts[m] < c)
1936                 lo = m + 1;
1937             else
1938             {
1939                 lo = hi = m;
1940                 break;
1941             }
1942         }
1943
1944         /* Stop if there is no matching byte. */
1945         if (hi < lo || byts[lo] != c)
1946             break;
1947
1948         /* Continue at the child (if there is one). */
1949         arridx = idxs[lo];
1950         ++wlen;
1951         --flen;
1952     }
1953 }
1954
1955 /*
1956  * Need to fold at least one more character.  Do until next non-word character
1957  * for efficiency.  Include the non-word character too.
1958  * Return the length of the folded chars in bytes.
1959  */
1960     static int
1961 fold_more(mip)
1962     matchinf_T  *mip;
1963 {
1964     int         flen;
1965     char_u      *p;
1966
1967     p = mip->mi_fend;
1968     do
1969     {
1970         mb_ptr_adv(mip->mi_fend);
1971     } while (*mip->mi_fend != NUL && spell_iswordp(mip->mi_fend, mip->mi_buf));
1972
1973     /* Include the non-word character so that we can check for the word end. */
1974     if (*mip->mi_fend != NUL)
1975         mb_ptr_adv(mip->mi_fend);
1976
1977     (void)spell_casefold(p, (int)(mip->mi_fend - p),
1978                              mip->mi_fword + mip->mi_fwordlen,
1979                              MAXWLEN - mip->mi_fwordlen);
1980     flen = (int)STRLEN(mip->mi_fword + mip->mi_fwordlen);
1981     mip->mi_fwordlen += flen;
1982     return flen;
1983 }
1984
1985 /*
1986  * Check case flags for a word.  Return TRUE if the word has the requested
1987  * case.
1988  */
1989     static int
1990 spell_valid_case(wordflags, treeflags)
1991     int     wordflags;      /* flags for the checked word. */
1992     int     treeflags;      /* flags for the word in the spell tree */
1993 {
1994     return ((wordflags == WF_ALLCAP && (treeflags & WF_FIXCAP) == 0)
1995             || ((treeflags & (WF_ALLCAP | WF_KEEPCAP)) == 0
1996                 && ((treeflags & WF_ONECAP) == 0
1997                                            || (wordflags & WF_ONECAP) != 0)));
1998 }
1999
2000 /*
2001  * Return TRUE if spell checking is not enabled.
2002  */
2003     static int
2004 no_spell_checking(wp)
2005     win_T       *wp;
2006 {
2007     if (!wp->w_p_spell || *wp->w_buffer->b_p_spl == NUL
2008                                          || wp->w_buffer->b_langp.ga_len == 0)
2009     {
2010         EMSG(_("E756: Spell checking is not enabled"));
2011         return TRUE;
2012     }
2013     return FALSE;
2014 }
2015
2016 /*
2017  * Move to next spell error.
2018  * "curline" is FALSE for "[s", "]s", "[S" and "]S".
2019  * "curline" is TRUE to find word under/after cursor in the same line.
2020  * For Insert mode completion "dir" is BACKWARD and "curline" is TRUE: move
2021  * to after badly spelled word before the cursor.
2022  * Return 0 if not found, length of the badly spelled word otherwise.
2023  */
2024     int
2025 spell_move_to(wp, dir, allwords, curline, attrp)
2026     win_T       *wp;
2027     int         dir;            /* FORWARD or BACKWARD */
2028     int         allwords;       /* TRUE for "[s"/"]s", FALSE for "[S"/"]S" */
2029     int         curline;
2030     hlf_T       *attrp;         /* return: attributes of bad word or NULL
2031                                    (only when "dir" is FORWARD) */
2032 {
2033     linenr_T    lnum;
2034     pos_T       found_pos;
2035     int         found_len = 0;
2036     char_u      *line;
2037     char_u      *p;
2038     char_u      *endp;
2039     hlf_T       attr;
2040     int         len;
2041 # ifdef FEAT_SYN_HL
2042     int         has_syntax = syntax_present(wp->w_buffer);
2043 # endif
2044     int         col;
2045     int         can_spell;
2046     char_u      *buf = NULL;
2047     int         buflen = 0;
2048     int         skip = 0;
2049     int         capcol = -1;
2050     int         found_one = FALSE;
2051     int         wrapped = FALSE;
2052
2053     if (no_spell_checking(wp))
2054         return 0;
2055
2056     /*
2057      * Start looking for bad word at the start of the line, because we can't
2058      * start halfway a word, we don't know where it starts or ends.
2059      *
2060      * When searching backwards, we continue in the line to find the last
2061      * bad word (in the cursor line: before the cursor).
2062      *
2063      * We concatenate the start of the next line, so that wrapped words work
2064      * (e.g. "et<line-break>cetera").  Doesn't work when searching backwards
2065      * though...
2066      */
2067     lnum = wp->w_cursor.lnum;
2068     clearpos(&found_pos);
2069
2070     while (!got_int)
2071     {
2072         line = ml_get_buf(wp->w_buffer, lnum, FALSE);
2073
2074         len = (int)STRLEN(line);
2075         if (buflen < len + MAXWLEN + 2)
2076         {
2077             vim_free(buf);
2078             buflen = len + MAXWLEN + 2;
2079             buf = alloc(buflen);
2080             if (buf == NULL)
2081                 break;
2082         }
2083
2084         /* In first line check first word for Capital. */
2085         if (lnum == 1)
2086             capcol = 0;
2087
2088         /* For checking first word with a capital skip white space. */
2089         if (capcol == 0)
2090             capcol = (int)(skipwhite(line) - line);
2091         else if (curline && wp == curwin)
2092         {
2093             /* For spellbadword(): check if first word needs a capital. */
2094             col = (int)(skipwhite(line) - line);
2095             if (check_need_cap(lnum, col))
2096                 capcol = col;
2097
2098             /* Need to get the line again, may have looked at the previous
2099              * one. */
2100             line = ml_get_buf(wp->w_buffer, lnum, FALSE);
2101         }
2102
2103         /* Copy the line into "buf" and append the start of the next line if
2104          * possible. */
2105         STRCPY(buf, line);
2106         if (lnum < wp->w_buffer->b_ml.ml_line_count)
2107             spell_cat_line(buf + STRLEN(buf),
2108                           ml_get_buf(wp->w_buffer, lnum + 1, FALSE), MAXWLEN);
2109
2110         p = buf + skip;
2111         endp = buf + len;
2112         while (p < endp)
2113         {
2114             /* When searching backward don't search after the cursor.  Unless
2115              * we wrapped around the end of the buffer. */
2116             if (dir == BACKWARD
2117                     && lnum == wp->w_cursor.lnum
2118                     && !wrapped
2119                     && (colnr_T)(p - buf) >= wp->w_cursor.col)
2120                 break;
2121
2122             /* start of word */
2123             attr = HLF_COUNT;
2124             len = spell_check(wp, p, &attr, &capcol, FALSE);
2125
2126             if (attr != HLF_COUNT)
2127             {
2128                 /* We found a bad word.  Check the attribute. */
2129                 if (allwords || attr == HLF_SPB)
2130                 {
2131                     /* When searching forward only accept a bad word after
2132                      * the cursor. */
2133                     if (dir == BACKWARD
2134                             || lnum != wp->w_cursor.lnum
2135                             || (lnum == wp->w_cursor.lnum
2136                                 && (wrapped
2137                                     || (colnr_T)(curline ? p - buf + len
2138                                                      : p - buf)
2139                                                   > wp->w_cursor.col)))
2140                     {
2141 # ifdef FEAT_SYN_HL
2142                         if (has_syntax)
2143                         {
2144                             col = (int)(p - buf);
2145                             (void)syn_get_id(wp, lnum, (colnr_T)col,
2146                                                     FALSE, &can_spell, FALSE);
2147                             if (!can_spell)
2148                                 attr = HLF_COUNT;
2149                         }
2150                         else
2151 #endif
2152                             can_spell = TRUE;
2153
2154                         if (can_spell)
2155                         {
2156                             found_one = TRUE;
2157                             found_pos.lnum = lnum;
2158                             found_pos.col = (int)(p - buf);
2159 #ifdef FEAT_VIRTUALEDIT
2160                             found_pos.coladd = 0;
2161 #endif
2162                             if (dir == FORWARD)
2163                             {
2164                                 /* No need to search further. */
2165                                 wp->w_cursor = found_pos;
2166                                 vim_free(buf);
2167                                 if (attrp != NULL)
2168                                     *attrp = attr;
2169                                 return len;
2170                             }
2171                             else if (curline)
2172                                 /* Insert mode completion: put cursor after
2173                                  * the bad word. */
2174                                 found_pos.col += len;
2175                             found_len = len;
2176                         }
2177                     }
2178                     else
2179                         found_one = TRUE;
2180                 }
2181             }
2182
2183             /* advance to character after the word */
2184             p += len;
2185             capcol -= len;
2186         }
2187
2188         if (dir == BACKWARD && found_pos.lnum != 0)
2189         {
2190             /* Use the last match in the line (before the cursor). */
2191             wp->w_cursor = found_pos;
2192             vim_free(buf);
2193             return found_len;
2194         }
2195
2196         if (curline)
2197             break;      /* only check cursor line */
2198
2199         /* Advance to next line. */
2200         if (dir == BACKWARD)
2201         {
2202             /* If we are back at the starting line and searched it again there
2203              * is no match, give up. */
2204             if (lnum == wp->w_cursor.lnum && wrapped)
2205                 break;
2206
2207             if (lnum > 1)
2208                 --lnum;
2209             else if (!p_ws)
2210                 break;      /* at first line and 'nowrapscan' */
2211             else
2212             {
2213                 /* Wrap around to the end of the buffer.  May search the
2214                  * starting line again and accept the last match. */
2215                 lnum = wp->w_buffer->b_ml.ml_line_count;
2216                 wrapped = TRUE;
2217                 if (!shortmess(SHM_SEARCH))
2218                     give_warning((char_u *)_(top_bot_msg), TRUE);
2219             }
2220             capcol = -1;
2221         }
2222         else
2223         {
2224             if (lnum < wp->w_buffer->b_ml.ml_line_count)
2225                 ++lnum;
2226             else if (!p_ws)
2227                 break;      /* at first line and 'nowrapscan' */
2228             else
2229             {
2230                 /* Wrap around to the start of the buffer.  May search the
2231                  * starting line again and accept the first match. */
2232                 lnum = 1;
2233                 wrapped = TRUE;
2234                 if (!shortmess(SHM_SEARCH))
2235                     give_warning((char_u *)_(bot_top_msg), TRUE);
2236             }
2237
2238             /* If we are back at the starting line and there is no match then
2239              * give up. */
2240             if (lnum == wp->w_cursor.lnum && !found_one)
2241                 break;
2242
2243             /* Skip the characters at the start of the next line that were
2244              * included in a match crossing line boundaries. */
2245             if (attr == HLF_COUNT)
2246                 skip = (int)(p - endp);
2247             else
2248                 skip = 0;
2249
2250             /* Capcol skips over the inserted space. */
2251             --capcol;
2252
2253             /* But after empty line check first word in next line */
2254             if (*skipwhite(line) == NUL)
2255                 capcol = 0;
2256         }
2257
2258         line_breakcheck();
2259     }
2260
2261     vim_free(buf);
2262     return 0;
2263 }
2264
2265 /*
2266  * For spell checking: concatenate the start of the following line "line" into
2267  * "buf", blanking-out special characters.  Copy less then "maxlen" bytes.
2268  * Keep the blanks at the start of the next line, this is used in win_line()
2269  * to skip those bytes if the word was OK.
2270  */
2271     void
2272 spell_cat_line(buf, line, maxlen)
2273     char_u      *buf;
2274     char_u      *line;
2275     int         maxlen;
2276 {
2277     char_u      *p;
2278     int         n;
2279
2280     p = skipwhite(line);
2281     while (vim_strchr((char_u *)"*#/\"\t", *p) != NULL)
2282         p = skipwhite(p + 1);
2283
2284     if (*p != NUL)
2285     {
2286         /* Only worth concatenating if there is something else than spaces to
2287          * concatenate. */
2288         n = (int)(p - line) + 1;
2289         if (n < maxlen - 1)
2290         {
2291             vim_memset(buf, ' ', n);
2292             vim_strncpy(buf +  n, p, maxlen - 1 - n);
2293         }
2294     }
2295 }
2296
2297 /*
2298  * Structure used for the cookie argument of do_in_runtimepath().
2299  */
2300 typedef struct spelload_S
2301 {
2302     char_u  sl_lang[MAXWLEN + 1];       /* language name */
2303     slang_T *sl_slang;                  /* resulting slang_T struct */
2304     int     sl_nobreak;                 /* NOBREAK language found */
2305 } spelload_T;
2306
2307 /*
2308  * Load word list(s) for "lang" from Vim spell file(s).
2309  * "lang" must be the language without the region: e.g., "en".
2310  */
2311     static void
2312 spell_load_lang(lang)
2313     char_u      *lang;
2314 {
2315     char_u      fname_enc[85];
2316     int         r;
2317     spelload_T  sl;
2318 #ifdef FEAT_AUTOCMD
2319     int         round;
2320 #endif
2321
2322     /* Copy the language name to pass it to spell_load_cb() as a cookie.
2323      * It's truncated when an error is detected. */
2324     STRCPY(sl.sl_lang, lang);
2325     sl.sl_slang = NULL;
2326     sl.sl_nobreak = FALSE;
2327
2328 #ifdef FEAT_AUTOCMD
2329     /* We may retry when no spell file is found for the language, an
2330      * autocommand may load it then. */
2331     for (round = 1; round <= 2; ++round)
2332 #endif
2333     {
2334         /*
2335          * Find the first spell file for "lang" in 'runtimepath' and load it.
2336          */
2337         vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
2338                                         "spell/%s.%s.spl", lang, spell_enc());
2339         r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
2340
2341         if (r == FAIL && *sl.sl_lang != NUL)
2342         {
2343             /* Try loading the ASCII version. */
2344             vim_snprintf((char *)fname_enc, sizeof(fname_enc) - 5,
2345                                                   "spell/%s.ascii.spl", lang);
2346             r = do_in_runtimepath(fname_enc, FALSE, spell_load_cb, &sl);
2347
2348 #ifdef FEAT_AUTOCMD
2349             if (r == FAIL && *sl.sl_lang != NUL && round == 1
2350                     && apply_autocmds(EVENT_SPELLFILEMISSING, lang,
2351                                               curbuf->b_fname, FALSE, curbuf))
2352                 continue;
2353             break;
2354 #endif
2355         }
2356 #ifdef FEAT_AUTOCMD
2357         break;
2358 #endif
2359     }
2360
2361     if (r == FAIL)
2362     {
2363         smsg((char_u *)_("Warning: Cannot find word list \"%s.%s.spl\" or \"%s.ascii.spl\""),
2364                                                      lang, spell_enc(), lang);
2365     }
2366     else if (sl.sl_slang != NULL)
2367     {
2368         /* At least one file was loaded, now load ALL the additions. */
2369         STRCPY(fname_enc + STRLEN(fname_enc) - 3, "add.spl");
2370         do_in_runtimepath(fname_enc, TRUE, spell_load_cb, &sl);
2371     }
2372 }
2373
2374 /*
2375  * Return the encoding used for spell checking: Use 'encoding', except that we
2376  * use "latin1" for "latin9".  And limit to 60 characters (just in case).
2377  */
2378     static char_u *
2379 spell_enc()
2380 {
2381
2382 #ifdef FEAT_MBYTE
2383     if (STRLEN(p_enc) < 60 && STRCMP(p_enc, "iso-8859-15") != 0)
2384         return p_enc;
2385 #endif
2386     return (char_u *)"latin1";
2387 }
2388
2389 /*
2390  * Get the name of the .spl file for the internal wordlist into
2391  * "fname[MAXPATHL]".
2392  */
2393     static void
2394 int_wordlist_spl(fname)
2395     char_u          *fname;
2396 {
2397     vim_snprintf((char *)fname, MAXPATHL, "%s.%s.spl",
2398                                                   int_wordlist, spell_enc());
2399 }
2400
2401 /*
2402  * Allocate a new slang_T for language "lang".  "lang" can be NULL.
2403  * Caller must fill "sl_next".
2404  */
2405     static slang_T *
2406 slang_alloc(lang)
2407     char_u      *lang;
2408 {
2409     slang_T *lp;
2410
2411     lp = (slang_T *)alloc_clear(sizeof(slang_T));
2412     if (lp != NULL)
2413     {
2414         if (lang != NULL)
2415             lp->sl_name = vim_strsave(lang);
2416         ga_init2(&lp->sl_rep, sizeof(fromto_T), 10);
2417         ga_init2(&lp->sl_repsal, sizeof(fromto_T), 10);
2418         lp->sl_compmax = MAXWLEN;
2419         lp->sl_compsylmax = MAXWLEN;
2420         hash_init(&lp->sl_wordcount);
2421     }
2422
2423     return lp;
2424 }
2425
2426 /*
2427  * Free the contents of an slang_T and the structure itself.
2428  */
2429     static void
2430 slang_free(lp)
2431     slang_T     *lp;
2432 {
2433     vim_free(lp->sl_name);
2434     vim_free(lp->sl_fname);
2435     slang_clear(lp);
2436     vim_free(lp);
2437 }
2438
2439 /*
2440  * Clear an slang_T so that the file can be reloaded.
2441  */
2442     static void
2443 slang_clear(lp)
2444     slang_T     *lp;
2445 {
2446     garray_T    *gap;
2447     fromto_T    *ftp;
2448     salitem_T   *smp;
2449     int         i;
2450     int         round;
2451
2452     vim_free(lp->sl_fbyts);
2453     lp->sl_fbyts = NULL;
2454     vim_free(lp->sl_kbyts);
2455     lp->sl_kbyts = NULL;
2456     vim_free(lp->sl_pbyts);
2457     lp->sl_pbyts = NULL;
2458
2459     vim_free(lp->sl_fidxs);
2460     lp->sl_fidxs = NULL;
2461     vim_free(lp->sl_kidxs);
2462     lp->sl_kidxs = NULL;
2463     vim_free(lp->sl_pidxs);
2464     lp->sl_pidxs = NULL;
2465
2466     for (round = 1; round <= 2; ++round)
2467     {
2468         gap = round == 1 ? &lp->sl_rep : &lp->sl_repsal;
2469         while (gap->ga_len > 0)
2470         {
2471             ftp = &((fromto_T *)gap->ga_data)[--gap->ga_len];
2472             vim_free(ftp->ft_from);
2473             vim_free(ftp->ft_to);
2474         }
2475         ga_clear(gap);
2476     }
2477
2478     gap = &lp->sl_sal;
2479     if (lp->sl_sofo)
2480     {
2481         /* "ga_len" is set to 1 without adding an item for latin1 */
2482         if (gap->ga_data != NULL)
2483             /* SOFOFROM and SOFOTO items: free lists of wide characters. */
2484             for (i = 0; i < gap->ga_len; ++i)
2485                 vim_free(((int **)gap->ga_data)[i]);
2486     }
2487     else
2488         /* SAL items: free salitem_T items */
2489         while (gap->ga_len > 0)
2490         {
2491             smp = &((salitem_T *)gap->ga_data)[--gap->ga_len];
2492             vim_free(smp->sm_lead);
2493             /* Don't free sm_oneof and sm_rules, they point into sm_lead. */
2494             vim_free(smp->sm_to);
2495 #ifdef FEAT_MBYTE
2496             vim_free(smp->sm_lead_w);
2497             vim_free(smp->sm_oneof_w);
2498             vim_free(smp->sm_to_w);
2499 #endif
2500         }
2501     ga_clear(gap);
2502
2503     for (i = 0; i < lp->sl_prefixcnt; ++i)
2504         vim_free(lp->sl_prefprog[i]);
2505     lp->sl_prefixcnt = 0;
2506     vim_free(lp->sl_prefprog);
2507     lp->sl_prefprog = NULL;
2508
2509     vim_free(lp->sl_info);
2510     lp->sl_info = NULL;
2511
2512     vim_free(lp->sl_midword);
2513     lp->sl_midword = NULL;
2514
2515     vim_free(lp->sl_compprog);
2516     vim_free(lp->sl_compstartflags);
2517     vim_free(lp->sl_compallflags);
2518     lp->sl_compprog = NULL;
2519     lp->sl_compstartflags = NULL;
2520     lp->sl_compallflags = NULL;
2521
2522     vim_free(lp->sl_syllable);
2523     lp->sl_syllable = NULL;
2524     ga_clear(&lp->sl_syl_items);
2525
2526     ga_clear_strings(&lp->sl_comppat);
2527
2528     hash_clear_all(&lp->sl_wordcount, WC_KEY_OFF);
2529     hash_init(&lp->sl_wordcount);
2530
2531 #ifdef FEAT_MBYTE
2532     hash_clear_all(&lp->sl_map_hash, 0);
2533 #endif
2534
2535     /* Clear info from .sug file. */
2536     slang_clear_sug(lp);
2537
2538     lp->sl_compmax = MAXWLEN;
2539     lp->sl_compminlen = 0;
2540     lp->sl_compsylmax = MAXWLEN;
2541     lp->sl_regions[0] = NUL;
2542 }
2543
2544 /*
2545  * Clear the info from the .sug file in "lp".
2546  */
2547     static void
2548 slang_clear_sug(lp)
2549     slang_T     *lp;
2550 {
2551     vim_free(lp->sl_sbyts);
2552     lp->sl_sbyts = NULL;
2553     vim_free(lp->sl_sidxs);
2554     lp->sl_sidxs = NULL;
2555     close_spellbuf(lp->sl_sugbuf);
2556     lp->sl_sugbuf = NULL;
2557     lp->sl_sugloaded = FALSE;
2558     lp->sl_sugtime = 0;
2559 }
2560
2561 /*
2562  * Load one spell file and store the info into a slang_T.
2563  * Invoked through do_in_runtimepath().
2564  */
2565     static void
2566 spell_load_cb(fname, cookie)
2567     char_u      *fname;
2568     void        *cookie;
2569 {
2570     spelload_T  *slp = (spelload_T *)cookie;
2571     slang_T     *slang;
2572
2573     slang = spell_load_file(fname, slp->sl_lang, NULL, FALSE);
2574     if (slang != NULL)
2575     {
2576         /* When a previously loaded file has NOBREAK also use it for the
2577          * ".add" files. */
2578         if (slp->sl_nobreak && slang->sl_add)
2579             slang->sl_nobreak = TRUE;
2580         else if (slang->sl_nobreak)
2581             slp->sl_nobreak = TRUE;
2582
2583         slp->sl_slang = slang;
2584     }
2585 }
2586
2587 /*
2588  * Load one spell file and store the info into a slang_T.
2589  *
2590  * This is invoked in three ways:
2591  * - From spell_load_cb() to load a spell file for the first time.  "lang" is
2592  *   the language name, "old_lp" is NULL.  Will allocate an slang_T.
2593  * - To reload a spell file that was changed.  "lang" is NULL and "old_lp"
2594  *   points to the existing slang_T.
2595  * - Just after writing a .spl file; it's read back to produce the .sug file.
2596  *   "old_lp" is NULL and "lang" is NULL.  Will allocate an slang_T.
2597  *
2598  * Returns the slang_T the spell file was loaded into.  NULL for error.
2599  */
2600     static slang_T *
2601 spell_load_file(fname, lang, old_lp, silent)
2602     char_u      *fname;
2603     char_u      *lang;
2604     slang_T     *old_lp;
2605     int         silent;         /* no error if file doesn't exist */
2606 {
2607     FILE        *fd;
2608     char_u      buf[VIMSPELLMAGICL];
2609     char_u      *p;
2610     int         i;
2611     int         n;
2612     int         len;
2613     char_u      *save_sourcing_name = sourcing_name;
2614     linenr_T    save_sourcing_lnum = sourcing_lnum;
2615     slang_T     *lp = NULL;
2616     int         c = 0;
2617     int         res;
2618
2619     fd = mch_fopen((char *)fname, "r");
2620     if (fd == NULL)
2621     {
2622         if (!silent)
2623             EMSG2(_(e_notopen), fname);
2624         else if (p_verbose > 2)
2625         {
2626             verbose_enter();
2627             smsg((char_u *)e_notopen, fname);
2628             verbose_leave();
2629         }
2630         goto endFAIL;
2631     }
2632     if (p_verbose > 2)
2633     {
2634         verbose_enter();
2635         smsg((char_u *)_("Reading spell file \"%s\""), fname);
2636         verbose_leave();
2637     }
2638
2639     if (old_lp == NULL)
2640     {
2641         lp = slang_alloc(lang);
2642         if (lp == NULL)
2643             goto endFAIL;
2644
2645         /* Remember the file name, used to reload the file when it's updated. */
2646         lp->sl_fname = vim_strsave(fname);
2647         if (lp->sl_fname == NULL)
2648             goto endFAIL;
2649
2650         /* Check for .add.spl. */
2651         lp->sl_add = strstr((char *)gettail(fname), ".add.") != NULL;
2652     }
2653     else
2654         lp = old_lp;
2655
2656     /* Set sourcing_name, so that error messages mention the file name. */
2657     sourcing_name = fname;
2658     sourcing_lnum = 0;
2659
2660     /*
2661      * <HEADER>: <fileID>
2662      */
2663     for (i = 0; i < VIMSPELLMAGICL; ++i)
2664         buf[i] = getc(fd);                              /* <fileID> */
2665     if (STRNCMP(buf, VIMSPELLMAGIC, VIMSPELLMAGICL) != 0)
2666     {
2667         EMSG(_("E757: This does not look like a spell file"));
2668         goto endFAIL;
2669     }
2670     c = getc(fd);                                       /* <versionnr> */
2671     if (c < VIMSPELLVERSION)
2672     {
2673         EMSG(_("E771: Old spell file, needs to be updated"));
2674         goto endFAIL;
2675     }
2676     else if (c > VIMSPELLVERSION)
2677     {
2678         EMSG(_("E772: Spell file is for newer version of Vim"));
2679         goto endFAIL;
2680     }
2681
2682
2683     /*
2684      * <SECTIONS>: <section> ... <sectionend>
2685      * <section>: <sectionID> <sectionflags> <sectionlen> (section contents)
2686      */
2687     for (;;)
2688     {
2689         n = getc(fd);                       /* <sectionID> or <sectionend> */
2690         if (n == SN_END)
2691             break;
2692         c = getc(fd);                                   /* <sectionflags> */
2693         len = get4c(fd);                                /* <sectionlen> */
2694         if (len < 0)
2695             goto truncerr;
2696
2697         res = 0;
2698         switch (n)
2699         {
2700             case SN_INFO:
2701                 lp->sl_info = read_string(fd, len);     /* <infotext> */
2702                 if (lp->sl_info == NULL)
2703                     goto endFAIL;
2704                 break;
2705
2706             case SN_REGION:
2707                 res = read_region_section(fd, lp, len);
2708                 break;
2709
2710             case SN_CHARFLAGS:
2711                 res = read_charflags_section(fd);
2712                 break;
2713
2714             case SN_MIDWORD:
2715                 lp->sl_midword = read_string(fd, len);  /* <midword> */
2716                 if (lp->sl_midword == NULL)
2717                     goto endFAIL;
2718                 break;
2719
2720             case SN_PREFCOND:
2721                 res = read_prefcond_section(fd, lp);
2722                 break;
2723
2724             case SN_REP:
2725                 res = read_rep_section(fd, &lp->sl_rep, lp->sl_rep_first);
2726                 break;
2727
2728             case SN_REPSAL:
2729                 res = read_rep_section(fd, &lp->sl_repsal, lp->sl_repsal_first);
2730                 break;
2731
2732             case SN_SAL:
2733                 res = read_sal_section(fd, lp);
2734                 break;
2735
2736             case SN_SOFO:
2737                 res = read_sofo_section(fd, lp);
2738                 break;
2739
2740             case SN_MAP:
2741                 p = read_string(fd, len);               /* <mapstr> */
2742                 if (p == NULL)
2743                     goto endFAIL;
2744                 set_map_str(lp, p);
2745                 vim_free(p);
2746                 break;
2747
2748             case SN_WORDS:
2749                 res = read_words_section(fd, lp, len);
2750                 break;
2751
2752             case SN_SUGFILE:
2753                 lp->sl_sugtime = get8c(fd);             /* <timestamp> */
2754                 break;
2755
2756             case SN_NOSPLITSUGS:
2757                 lp->sl_nosplitsugs = TRUE;              /* <timestamp> */
2758                 break;
2759
2760             case SN_COMPOUND:
2761                 res = read_compound(fd, lp, len);
2762                 break;
2763
2764             case SN_NOBREAK:
2765                 lp->sl_nobreak = TRUE;
2766                 break;
2767
2768             case SN_SYLLABLE:
2769                 lp->sl_syllable = read_string(fd, len); /* <syllable> */
2770                 if (lp->sl_syllable == NULL)
2771                     goto endFAIL;
2772                 if (init_syl_tab(lp) == FAIL)
2773                     goto endFAIL;
2774                 break;
2775
2776             default:
2777                 /* Unsupported section.  When it's required give an error
2778                  * message.  When it's not required skip the contents. */
2779                 if (c & SNF_REQUIRED)
2780                 {
2781                     EMSG(_("E770: Unsupported section in spell file"));
2782                     goto endFAIL;
2783                 }
2784                 while (--len >= 0)
2785                     if (getc(fd) < 0)
2786                         goto truncerr;
2787                 break;
2788         }
2789 someerror:
2790         if (res == SP_FORMERROR)
2791         {
2792             EMSG(_(e_format));
2793             goto endFAIL;
2794         }
2795         if (res == SP_TRUNCERROR)
2796         {
2797 truncerr:
2798             EMSG(_(e_spell_trunc));
2799             goto endFAIL;
2800         }
2801         if (res == SP_OTHERERROR)
2802             goto endFAIL;
2803     }
2804
2805     /* <LWORDTREE> */
2806     res = spell_read_tree(fd, &lp->sl_fbyts, &lp->sl_fidxs, FALSE, 0);
2807     if (res != 0)
2808         goto someerror;
2809
2810     /* <KWORDTREE> */
2811     res = spell_read_tree(fd, &lp->sl_kbyts, &lp->sl_kidxs, FALSE, 0);
2812     if (res != 0)
2813         goto someerror;
2814
2815     /* <PREFIXTREE> */
2816     res = spell_read_tree(fd, &lp->sl_pbyts, &lp->sl_pidxs, TRUE,
2817                                                             lp->sl_prefixcnt);
2818     if (res != 0)
2819         goto someerror;
2820
2821     /* For a new file link it in the list of spell files. */
2822     if (old_lp == NULL && lang != NULL)
2823     {
2824         lp->sl_next = first_lang;
2825         first_lang = lp;
2826     }
2827
2828     goto endOK;
2829
2830 endFAIL:
2831     if (lang != NULL)
2832         /* truncating the name signals the error to spell_load_lang() */
2833         *lang = NUL;
2834     if (lp != NULL && old_lp == NULL)
2835         slang_free(lp);
2836     lp = NULL;
2837
2838 endOK:
2839     if (fd != NULL)
2840         fclose(fd);
2841     sourcing_name = save_sourcing_name;
2842     sourcing_lnum = save_sourcing_lnum;
2843
2844     return lp;
2845 }
2846
2847 /*
2848  * Read 2 bytes from "fd" and turn them into an int, MSB first.
2849  */
2850     static int
2851 get2c(fd)
2852     FILE        *fd;
2853 {
2854     long        n;
2855
2856     n = getc(fd);
2857     n = (n << 8) + getc(fd);
2858     return n;
2859 }
2860
2861 /*
2862  * Read 3 bytes from "fd" and turn them into an int, MSB first.
2863  */
2864     static int
2865 get3c(fd)
2866     FILE        *fd;
2867 {
2868     long        n;
2869
2870     n = getc(fd);
2871     n = (n << 8) + getc(fd);
2872     n = (n << 8) + getc(fd);
2873     return n;
2874 }
2875
2876 /*
2877  * Read 4 bytes from "fd" and turn them into an int, MSB first.
2878  */
2879     static int
2880 get4c(fd)
2881     FILE        *fd;
2882 {
2883     long        n;
2884
2885     n = getc(fd);
2886     n = (n << 8) + getc(fd);
2887     n = (n << 8) + getc(fd);
2888     n = (n << 8) + getc(fd);
2889     return n;
2890 }
2891
2892 /*
2893  * Read 8 bytes from "fd" and turn them into a time_t, MSB first.
2894  */
2895     static time_t
2896 get8c(fd)
2897     FILE        *fd;
2898 {
2899     time_t      n = 0;
2900     int         i;
2901
2902     for (i = 0; i < 8; ++i)
2903         n = (n << 8) + getc(fd);
2904     return n;
2905 }
2906
2907 /*
2908  * Read a length field from "fd" in "cnt_bytes" bytes.
2909  * Allocate memory, read the string into it and add a NUL at the end.
2910  * Returns NULL when the count is zero.
2911  * Sets "*cntp" to SP_*ERROR when there is an error, length of the result
2912  * otherwise.
2913  */
2914     static char_u *
2915 read_cnt_string(fd, cnt_bytes, cntp)
2916     FILE        *fd;
2917     int         cnt_bytes;
2918     int         *cntp;
2919 {
2920     int         cnt = 0;
2921     int         i;
2922     char_u      *str;
2923
2924     /* read the length bytes, MSB first */
2925     for (i = 0; i < cnt_bytes; ++i)
2926         cnt = (cnt << 8) + getc(fd);
2927     if (cnt < 0)
2928     {
2929         *cntp = SP_TRUNCERROR;
2930         return NULL;
2931     }
2932     *cntp = cnt;
2933     if (cnt == 0)
2934         return NULL;        /* nothing to read, return NULL */
2935
2936     str = read_string(fd, cnt);
2937     if (str == NULL)
2938         *cntp = SP_OTHERERROR;
2939     return str;
2940 }
2941
2942 /*
2943  * Read a string of length "cnt" from "fd" into allocated memory.
2944  * Returns NULL when out of memory or unable to read that many bytes.
2945  */
2946     static char_u *
2947 read_string(fd, cnt)
2948     FILE        *fd;
2949     int         cnt;
2950 {
2951     char_u      *str;
2952     int         i;
2953     int         c;
2954
2955     /* allocate memory */
2956     str = alloc((unsigned)cnt + 1);
2957     if (str != NULL)
2958     {
2959         /* Read the string.  Quit when running into the EOF. */
2960         for (i = 0; i < cnt; ++i)
2961         {
2962             c = getc(fd);
2963             if (c == EOF)
2964             {
2965                 vim_free(str);
2966                 return NULL;
2967             }
2968             str[i] = c;
2969         }
2970         str[i] = NUL;
2971     }
2972     return str;
2973 }
2974
2975 /*
2976  * Read SN_REGION: <regionname> ...
2977  * Return SP_*ERROR flags.
2978  */
2979     static int
2980 read_region_section(fd, lp, len)
2981     FILE        *fd;
2982     slang_T     *lp;
2983     int         len;
2984 {
2985     int         i;
2986
2987     if (len > 16)
2988         return SP_FORMERROR;
2989     for (i = 0; i < len; ++i)
2990         lp->sl_regions[i] = getc(fd);                   /* <regionname> */
2991     lp->sl_regions[len] = NUL;
2992     return 0;
2993 }
2994
2995 /*
2996  * Read SN_CHARFLAGS section: <charflagslen> <charflags>
2997  *                              <folcharslen> <folchars>
2998  * Return SP_*ERROR flags.
2999  */
3000     static int
3001 read_charflags_section(fd)
3002     FILE        *fd;
3003 {
3004     char_u      *flags;
3005     char_u      *fol;
3006     int         flagslen, follen;
3007
3008     /* <charflagslen> <charflags> */
3009     flags = read_cnt_string(fd, 1, &flagslen);
3010     if (flagslen < 0)
3011         return flagslen;
3012
3013     /* <folcharslen> <folchars> */
3014     fol = read_cnt_string(fd, 2, &follen);
3015     if (follen < 0)
3016     {
3017         vim_free(flags);
3018         return follen;
3019     }
3020
3021     /* Set the word-char flags and fill SPELL_ISUPPER() table. */
3022     if (flags != NULL && fol != NULL)
3023         set_spell_charflags(flags, flagslen, fol);
3024
3025     vim_free(flags);
3026     vim_free(fol);
3027
3028     /* When <charflagslen> is zero then <fcharlen> must also be zero. */
3029     if ((flags == NULL) != (fol == NULL))
3030         return SP_FORMERROR;
3031     return 0;
3032 }
3033
3034 /*
3035  * Read SN_PREFCOND section.
3036  * Return SP_*ERROR flags.
3037  */
3038     static int
3039 read_prefcond_section(fd, lp)
3040     FILE        *fd;
3041     slang_T     *lp;
3042 {
3043     int         cnt;
3044     int         i;
3045     int         n;
3046     char_u      *p;
3047     char_u      buf[MAXWLEN + 1];
3048
3049     /* <prefcondcnt> <prefcond> ... */
3050     cnt = get2c(fd);                                    /* <prefcondcnt> */
3051     if (cnt <= 0)
3052         return SP_FORMERROR;
3053
3054     lp->sl_prefprog = (regprog_T **)alloc_clear(
3055                                          (unsigned)sizeof(regprog_T *) * cnt);
3056     if (lp->sl_prefprog == NULL)
3057         return SP_OTHERERROR;
3058     lp->sl_prefixcnt = cnt;
3059
3060     for (i = 0; i < cnt; ++i)
3061     {
3062         /* <prefcond> : <condlen> <condstr> */
3063         n = getc(fd);                                   /* <condlen> */
3064         if (n < 0 || n >= MAXWLEN)
3065             return SP_FORMERROR;
3066
3067         /* When <condlen> is zero we have an empty condition.  Otherwise
3068          * compile the regexp program used to check for the condition. */
3069         if (n > 0)
3070         {
3071             buf[0] = '^';           /* always match at one position only */
3072             p = buf + 1;
3073             while (n-- > 0)
3074                 *p++ = getc(fd);                        /* <condstr> */
3075             *p = NUL;
3076             lp->sl_prefprog[i] = vim_regcomp(buf, RE_MAGIC + RE_STRING);
3077         }
3078     }
3079     return 0;
3080 }
3081
3082 /*
3083  * Read REP or REPSAL items section from "fd": <repcount> <rep> ...
3084  * Return SP_*ERROR flags.
3085  */
3086     static int
3087 read_rep_section(fd, gap, first)
3088     FILE        *fd;
3089     garray_T    *gap;
3090     short       *first;
3091 {
3092     int         cnt;
3093     fromto_T    *ftp;
3094     int         i;
3095
3096     cnt = get2c(fd);                                    /* <repcount> */
3097     if (cnt < 0)
3098         return SP_TRUNCERROR;
3099
3100     if (ga_grow(gap, cnt) == FAIL)
3101         return SP_OTHERERROR;
3102
3103     /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
3104     for (; gap->ga_len < cnt; ++gap->ga_len)
3105     {
3106         ftp = &((fromto_T *)gap->ga_data)[gap->ga_len];
3107         ftp->ft_from = read_cnt_string(fd, 1, &i);
3108         if (i < 0)
3109             return i;
3110         if (i == 0)
3111             return SP_FORMERROR;
3112         ftp->ft_to = read_cnt_string(fd, 1, &i);
3113         if (i <= 0)
3114         {
3115             vim_free(ftp->ft_from);
3116             if (i < 0)
3117                 return i;
3118             return SP_FORMERROR;
3119         }
3120     }
3121
3122     /* Fill the first-index table. */
3123     for (i = 0; i < 256; ++i)
3124         first[i] = -1;
3125     for (i = 0; i < gap->ga_len; ++i)
3126     {
3127         ftp = &((fromto_T *)gap->ga_data)[i];
3128         if (first[*ftp->ft_from] == -1)
3129             first[*ftp->ft_from] = i;
3130     }
3131     return 0;
3132 }
3133
3134 /*
3135  * Read SN_SAL section: <salflags> <salcount> <sal> ...
3136  * Return SP_*ERROR flags.
3137  */
3138     static int
3139 read_sal_section(fd, slang)
3140     FILE        *fd;
3141     slang_T     *slang;
3142 {
3143     int         i;
3144     int         cnt;
3145     garray_T    *gap;
3146     salitem_T   *smp;
3147     int         ccnt;
3148     char_u      *p;
3149     int         c = NUL;
3150
3151     slang->sl_sofo = FALSE;
3152
3153     i = getc(fd);                               /* <salflags> */
3154     if (i & SAL_F0LLOWUP)
3155         slang->sl_followup = TRUE;
3156     if (i & SAL_COLLAPSE)
3157         slang->sl_collapse = TRUE;
3158     if (i & SAL_REM_ACCENTS)
3159         slang->sl_rem_accents = TRUE;
3160
3161     cnt = get2c(fd);                            /* <salcount> */
3162     if (cnt < 0)
3163         return SP_TRUNCERROR;
3164
3165     gap = &slang->sl_sal;
3166     ga_init2(gap, sizeof(salitem_T), 10);
3167     if (ga_grow(gap, cnt + 1) == FAIL)
3168         return SP_OTHERERROR;
3169
3170     /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
3171     for (; gap->ga_len < cnt; ++gap->ga_len)
3172     {
3173         smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3174         ccnt = getc(fd);                        /* <salfromlen> */
3175         if (ccnt < 0)
3176             return SP_TRUNCERROR;
3177         if ((p = alloc(ccnt + 2)) == NULL)
3178             return SP_OTHERERROR;
3179         smp->sm_lead = p;
3180
3181         /* Read up to the first special char into sm_lead. */
3182         for (i = 0; i < ccnt; ++i)
3183         {
3184             c = getc(fd);                       /* <salfrom> */
3185             if (vim_strchr((char_u *)"0123456789(-<^$", c) != NULL)
3186                 break;
3187             *p++ = c;
3188         }
3189         smp->sm_leadlen = (int)(p - smp->sm_lead);
3190         *p++ = NUL;
3191
3192         /* Put (abc) chars in sm_oneof, if any. */
3193         if (c == '(')
3194         {
3195             smp->sm_oneof = p;
3196             for (++i; i < ccnt; ++i)
3197             {
3198                 c = getc(fd);                   /* <salfrom> */
3199                 if (c == ')')
3200                     break;
3201                 *p++ = c;
3202             }
3203             *p++ = NUL;
3204             if (++i < ccnt)
3205                 c = getc(fd);
3206         }
3207         else
3208             smp->sm_oneof = NULL;
3209
3210         /* Any following chars go in sm_rules. */
3211         smp->sm_rules = p;
3212         if (i < ccnt)
3213             /* store the char we got while checking for end of sm_lead */
3214             *p++ = c;
3215         for (++i; i < ccnt; ++i)
3216             *p++ = getc(fd);                    /* <salfrom> */
3217         *p++ = NUL;
3218
3219         /* <saltolen> <salto> */
3220         smp->sm_to = read_cnt_string(fd, 1, &ccnt);
3221         if (ccnt < 0)
3222         {
3223             vim_free(smp->sm_lead);
3224             return ccnt;
3225         }
3226
3227 #ifdef FEAT_MBYTE
3228         if (has_mbyte)
3229         {
3230             /* convert the multi-byte strings to wide char strings */
3231             smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3232             smp->sm_leadlen = mb_charlen(smp->sm_lead);
3233             if (smp->sm_oneof == NULL)
3234                 smp->sm_oneof_w = NULL;
3235             else
3236                 smp->sm_oneof_w = mb_str2wide(smp->sm_oneof);
3237             if (smp->sm_to == NULL)
3238                 smp->sm_to_w = NULL;
3239             else
3240                 smp->sm_to_w = mb_str2wide(smp->sm_to);
3241             if (smp->sm_lead_w == NULL
3242                     || (smp->sm_oneof_w == NULL && smp->sm_oneof != NULL)
3243                     || (smp->sm_to_w == NULL && smp->sm_to != NULL))
3244             {
3245                 vim_free(smp->sm_lead);
3246                 vim_free(smp->sm_to);
3247                 vim_free(smp->sm_lead_w);
3248                 vim_free(smp->sm_oneof_w);
3249                 vim_free(smp->sm_to_w);
3250                 return SP_OTHERERROR;
3251             }
3252         }
3253 #endif
3254     }
3255
3256     if (gap->ga_len > 0)
3257     {
3258         /* Add one extra entry to mark the end with an empty sm_lead.  Avoids
3259          * that we need to check the index every time. */
3260         smp = &((salitem_T *)gap->ga_data)[gap->ga_len];
3261         if ((p = alloc(1)) == NULL)
3262             return SP_OTHERERROR;
3263         p[0] = NUL;
3264         smp->sm_lead = p;
3265         smp->sm_leadlen = 0;
3266         smp->sm_oneof = NULL;
3267         smp->sm_rules = p;
3268         smp->sm_to = NULL;
3269 #ifdef FEAT_MBYTE
3270         if (has_mbyte)
3271         {
3272             smp->sm_lead_w = mb_str2wide(smp->sm_lead);
3273             smp->sm_leadlen = 0;
3274             smp->sm_oneof_w = NULL;
3275             smp->sm_to_w = NULL;
3276         }
3277 #endif
3278         ++gap->ga_len;
3279     }
3280
3281     /* Fill the first-index table. */
3282     set_sal_first(slang);
3283
3284     return 0;
3285 }
3286
3287 /*
3288  * Read SN_WORDS: <word> ...
3289  * Return SP_*ERROR flags.
3290  */
3291     static int
3292 read_words_section(fd, lp, len)
3293     FILE        *fd;
3294     slang_T     *lp;
3295     int         len;
3296 {
3297     int         done = 0;
3298     int         i;
3299     int         c;
3300     char_u      word[MAXWLEN];
3301
3302     while (done < len)
3303     {
3304         /* Read one word at a time. */
3305         for (i = 0; ; ++i)
3306         {
3307             c = getc(fd);
3308             if (c == EOF)
3309                 return SP_TRUNCERROR;
3310             word[i] = c;
3311             if (word[i] == NUL)
3312                 break;
3313             if (i == MAXWLEN - 1)
3314                 return SP_FORMERROR;
3315         }
3316
3317         /* Init the count to 10. */
3318         count_common_word(lp, word, -1, 10);
3319         done += i + 1;
3320     }
3321     return 0;
3322 }
3323
3324 /*
3325  * Add a word to the hashtable of common words.
3326  * If it's already there then the counter is increased.
3327  */
3328     static void
3329 count_common_word(lp, word, len, count)
3330     slang_T     *lp;
3331     char_u      *word;
3332     int         len;        /* word length, -1 for upto NUL */
3333     int         count;      /* 1 to count once, 10 to init */
3334 {
3335     hash_T      hash;
3336     hashitem_T  *hi;
3337     wordcount_T *wc;
3338     char_u      buf[MAXWLEN];
3339     char_u      *p;
3340
3341     if (len == -1)
3342         p = word;
3343     else
3344     {
3345         vim_strncpy(buf, word, len);
3346         p = buf;
3347     }
3348
3349     hash = hash_hash(p);
3350     hi = hash_lookup(&lp->sl_wordcount, p, hash);
3351     if (HASHITEM_EMPTY(hi))
3352     {
3353         wc = (wordcount_T *)alloc((unsigned)(sizeof(wordcount_T) + STRLEN(p)));
3354         if (wc == NULL)
3355             return;
3356         STRCPY(wc->wc_word, p);
3357         wc->wc_count = count;
3358         hash_add_item(&lp->sl_wordcount, hi, wc->wc_word, hash);
3359     }
3360     else
3361     {
3362         wc = HI2WC(hi);
3363         if ((wc->wc_count += count) < (unsigned)count)  /* check for overflow */
3364             wc->wc_count = MAXWORDCOUNT;
3365     }
3366 }
3367
3368 /*
3369  * Adjust the score of common words.
3370  */
3371     static int
3372 score_wordcount_adj(slang, score, word, split)
3373     slang_T     *slang;
3374     int         score;
3375     char_u      *word;
3376     int         split;      /* word was split, less bonus */
3377 {
3378     hashitem_T  *hi;
3379     wordcount_T *wc;
3380     int         bonus;
3381     int         newscore;
3382
3383     hi = hash_find(&slang->sl_wordcount, word);
3384     if (!HASHITEM_EMPTY(hi))
3385     {
3386         wc = HI2WC(hi);
3387         if (wc->wc_count < SCORE_THRES2)
3388             bonus = SCORE_COMMON1;
3389         else if (wc->wc_count < SCORE_THRES3)
3390             bonus = SCORE_COMMON2;
3391         else
3392             bonus = SCORE_COMMON3;
3393         if (split)
3394             newscore = score - bonus / 2;
3395         else
3396             newscore = score - bonus;
3397         if (newscore < 0)
3398             return 0;
3399         return newscore;
3400     }
3401     return score;
3402 }
3403
3404 /*
3405  * SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
3406  * Return SP_*ERROR flags.
3407  */
3408     static int
3409 read_sofo_section(fd, slang)
3410     FILE        *fd;
3411     slang_T     *slang;
3412 {
3413     int         cnt;
3414     char_u      *from, *to;
3415     int         res;
3416
3417     slang->sl_sofo = TRUE;
3418
3419     /* <sofofromlen> <sofofrom> */
3420     from = read_cnt_string(fd, 2, &cnt);
3421     if (cnt < 0)
3422         return cnt;
3423
3424     /* <sofotolen> <sofoto> */
3425     to = read_cnt_string(fd, 2, &cnt);
3426     if (cnt < 0)
3427     {
3428         vim_free(from);
3429         return cnt;
3430     }
3431
3432     /* Store the info in slang->sl_sal and/or slang->sl_sal_first. */
3433     if (from != NULL && to != NULL)
3434         res = set_sofo(slang, from, to);
3435     else if (from != NULL || to != NULL)
3436         res = SP_FORMERROR;    /* only one of two strings is an error */
3437     else
3438         res = 0;
3439
3440     vim_free(from);
3441     vim_free(to);
3442     return res;
3443 }
3444
3445 /*
3446  * Read the compound section from the .spl file:
3447  *      <compmax> <compminlen> <compsylmax> <compoptions> <compflags>
3448  * Returns SP_*ERROR flags.
3449  */
3450     static int
3451 read_compound(fd, slang, len)
3452     FILE        *fd;
3453     slang_T     *slang;
3454     int         len;
3455 {
3456     int         todo = len;
3457     int         c;
3458     int         atstart;
3459     char_u      *pat;
3460     char_u      *pp;
3461     char_u      *cp;
3462     char_u      *ap;
3463     int         cnt;
3464     garray_T    *gap;
3465
3466     if (todo < 2)
3467         return SP_FORMERROR;    /* need at least two bytes */
3468
3469     --todo;
3470     c = getc(fd);                                       /* <compmax> */
3471     if (c < 2)
3472         c = MAXWLEN;
3473     slang->sl_compmax = c;
3474
3475     --todo;
3476     c = getc(fd);                                       /* <compminlen> */
3477     if (c < 1)
3478         c = 0;
3479     slang->sl_compminlen = c;
3480
3481     --todo;
3482     c = getc(fd);                                       /* <compsylmax> */
3483     if (c < 1)
3484         c = MAXWLEN;
3485     slang->sl_compsylmax = c;
3486
3487     c = getc(fd);                                       /* <compoptions> */
3488     if (c != 0)
3489         ungetc(c, fd);      /* be backwards compatible with Vim 7.0b */
3490     else
3491     {
3492         --todo;
3493         c = getc(fd);       /* only use the lower byte for now */
3494         --todo;
3495         slang->sl_compoptions = c;
3496
3497         gap = &slang->sl_comppat;
3498         c = get2c(fd);                                  /* <comppatcount> */
3499         todo -= 2;
3500         ga_init2(gap, sizeof(char_u *), c);
3501         if (ga_grow(gap, c) == OK)
3502             while (--c >= 0)
3503             {
3504                 ((char_u **)(gap->ga_data))[gap->ga_len++] =
3505                                                  read_cnt_string(fd, 1, &cnt);
3506                                             /* <comppatlen> <comppattext> */
3507                 if (cnt < 0)
3508                     return cnt;
3509                 todo -= cnt + 1;
3510             }
3511     }
3512     if (todo < 0)
3513         return SP_FORMERROR;
3514
3515     /* Turn the COMPOUNDRULE items into a regexp pattern:
3516      * "a[bc]/a*b+" -> "^\(a[bc]\|a*b\+\)$".
3517      * Inserting backslashes may double the length, "^\(\)$<Nul>" is 7 bytes.
3518      * Conversion to utf-8 may double the size. */
3519     c = todo * 2 + 7;
3520 #ifdef FEAT_MBYTE
3521     if (enc_utf8)
3522         c += todo * 2;
3523 #endif
3524     pat = alloc((unsigned)c);
3525     if (pat == NULL)
3526         return SP_OTHERERROR;
3527
3528     /* We also need a list of all flags that can appear at the start and one
3529      * for all flags. */
3530     cp = alloc(todo + 1);
3531     if (cp == NULL)
3532     {
3533         vim_free(pat);
3534         return SP_OTHERERROR;
3535     }
3536     slang->sl_compstartflags = cp;
3537     *cp = NUL;
3538
3539     ap = alloc(todo + 1);
3540     if (ap == NULL)
3541     {
3542         vim_free(pat);
3543         return SP_OTHERERROR;
3544     }
3545     slang->sl_compallflags = ap;
3546     *ap = NUL;
3547
3548     pp = pat;
3549     *pp++ = '^';
3550     *pp++ = '\\';
3551     *pp++ = '(';
3552
3553     atstart = 1;
3554     while (todo-- > 0)
3555     {
3556         c = getc(fd);                                   /* <compflags> */
3557         if (c == EOF)
3558         {
3559             vim_free(pat);
3560             return SP_TRUNCERROR;
3561         }
3562
3563         /* Add all flags to "sl_compallflags". */
3564         if (vim_strchr((char_u *)"+*[]/", c) == NULL
3565                 && !byte_in_str(slang->sl_compallflags, c))
3566         {
3567             *ap++ = c;
3568             *ap = NUL;
3569         }
3570
3571         if (atstart != 0)
3572         {
3573             /* At start of item: copy flags to "sl_compstartflags".  For a
3574              * [abc] item set "atstart" to 2 and copy up to the ']'. */
3575             if (c == '[')
3576                 atstart = 2;
3577             else if (c == ']')
3578                 atstart = 0;
3579             else
3580             {
3581                 if (!byte_in_str(slang->sl_compstartflags, c))
3582                 {
3583                     *cp++ = c;
3584                     *cp = NUL;
3585                 }
3586                 if (atstart == 1)
3587                     atstart = 0;
3588             }
3589         }
3590         if (c == '/')       /* slash separates two items */
3591         {
3592             *pp++ = '\\';
3593             *pp++ = '|';
3594             atstart = 1;
3595         }
3596         else                /* normal char, "[abc]" and '*' are copied as-is */
3597         {
3598             if (c == '+' || c == '~')
3599                 *pp++ = '\\';       /* "a+" becomes "a\+" */
3600 #ifdef FEAT_MBYTE
3601             if (enc_utf8)
3602                 pp += mb_char2bytes(c, pp);
3603             else
3604 #endif
3605                 *pp++ = c;
3606         }
3607     }
3608
3609     *pp++ = '\\';
3610     *pp++ = ')';
3611     *pp++ = '$';
3612     *pp = NUL;
3613
3614     slang->sl_compprog = vim_regcomp(pat, RE_MAGIC + RE_STRING + RE_STRICT);
3615     vim_free(pat);
3616     if (slang->sl_compprog == NULL)
3617         return SP_FORMERROR;
3618
3619     return 0;
3620 }
3621
3622 /*
3623  * Return TRUE if byte "n" appears in "str".
3624  * Like strchr() but independent of locale.
3625  */
3626     static int
3627 byte_in_str(str, n)
3628     char_u      *str;
3629     int         n;
3630 {
3631     char_u      *p;
3632
3633     for (p = str; *p != NUL; ++p)
3634         if (*p == n)
3635             return TRUE;
3636     return FALSE;
3637 }
3638
3639 #define SY_MAXLEN   30
3640 typedef struct syl_item_S
3641 {
3642     char_u      sy_chars[SY_MAXLEN];        /* the sequence of chars */
3643     int         sy_len;
3644 } syl_item_T;
3645
3646 /*
3647  * Truncate "slang->sl_syllable" at the first slash and put the following items
3648  * in "slang->sl_syl_items".
3649  */
3650     static int
3651 init_syl_tab(slang)
3652     slang_T     *slang;
3653 {
3654     char_u      *p;
3655     char_u      *s;
3656     int         l;
3657     syl_item_T  *syl;
3658
3659     ga_init2(&slang->sl_syl_items, sizeof(syl_item_T), 4);
3660     p = vim_strchr(slang->sl_syllable, '/');
3661     while (p != NULL)
3662     {
3663         *p++ = NUL;
3664         if (*p == NUL)      /* trailing slash */
3665             break;
3666         s = p;
3667         p = vim_strchr(p, '/');
3668         if (p == NULL)
3669             l = (int)STRLEN(s);
3670         else
3671             l = (int)(p - s);
3672         if (l >= SY_MAXLEN)
3673             return SP_FORMERROR;
3674         if (ga_grow(&slang->sl_syl_items, 1) == FAIL)
3675             return SP_OTHERERROR;
3676         syl = ((syl_item_T *)slang->sl_syl_items.ga_data)
3677                                                + slang->sl_syl_items.ga_len++;
3678         vim_strncpy(syl->sy_chars, s, l);
3679         syl->sy_len = l;
3680     }
3681     return OK;
3682 }
3683
3684 /*
3685  * Count the number of syllables in "word".
3686  * When "word" contains spaces the syllables after the last space are counted.
3687  * Returns zero if syllables are not defines.
3688  */
3689     static int
3690 count_syllables(slang, word)
3691     slang_T     *slang;
3692     char_u      *word;
3693 {
3694     int         cnt = 0;
3695     int         skip = FALSE;
3696     char_u      *p;
3697     int         len;
3698     int         i;
3699     syl_item_T  *syl;
3700     int         c;
3701
3702     if (slang->sl_syllable == NULL)
3703         return 0;
3704
3705     for (p = word; *p != NUL; p += len)
3706     {
3707         /* When running into a space reset counter. */
3708         if (*p == ' ')
3709         {
3710             len = 1;
3711             cnt = 0;
3712             continue;
3713         }
3714
3715         /* Find longest match of syllable items. */
3716         len = 0;
3717         for (i = 0; i < slang->sl_syl_items.ga_len; ++i)
3718         {
3719             syl = ((syl_item_T *)slang->sl_syl_items.ga_data) + i;
3720             if (syl->sy_len > len
3721                                && STRNCMP(p, syl->sy_chars, syl->sy_len) == 0)
3722                 len = syl->sy_len;
3723         }
3724         if (len != 0)   /* found a match, count syllable  */
3725         {
3726             ++cnt;
3727             skip = FALSE;
3728         }
3729         else
3730         {
3731             /* No recognized syllable item, at least a syllable char then? */
3732 #ifdef FEAT_MBYTE
3733             c = mb_ptr2char(p);
3734             len = (*mb_ptr2len)(p);
3735 #else
3736             c = *p;
3737             len = 1;
3738 #endif
3739             if (vim_strchr(slang->sl_syllable, c) == NULL)
3740                 skip = FALSE;       /* No, search for next syllable */
3741             else if (!skip)
3742             {
3743                 ++cnt;              /* Yes, count it */
3744                 skip = TRUE;        /* don't count following syllable chars */
3745             }
3746         }
3747     }
3748     return cnt;
3749 }
3750
3751 /*
3752  * Set the SOFOFROM and SOFOTO items in language "lp".
3753  * Returns SP_*ERROR flags when there is something wrong.
3754  */
3755     static int
3756 set_sofo(lp, from, to)
3757     slang_T     *lp;
3758     char_u      *from;
3759     char_u      *to;
3760 {
3761     int         i;
3762
3763 #ifdef FEAT_MBYTE
3764     garray_T    *gap;
3765     char_u      *s;
3766     char_u      *p;
3767     int         c;
3768     int         *inp;
3769
3770     if (has_mbyte)
3771     {
3772         /* Use "sl_sal" as an array with 256 pointers to a list of wide
3773          * characters.  The index is the low byte of the character.
3774          * The list contains from-to pairs with a terminating NUL.
3775          * sl_sal_first[] is used for latin1 "from" characters. */
3776         gap = &lp->sl_sal;
3777         ga_init2(gap, sizeof(int *), 1);
3778         if (ga_grow(gap, 256) == FAIL)
3779             return SP_OTHERERROR;
3780         vim_memset(gap->ga_data, 0, sizeof(int *) * 256);
3781         gap->ga_len = 256;
3782
3783         /* First count the number of items for each list.  Temporarily use
3784          * sl_sal_first[] for this. */
3785         for (p = from, s = to; *p != NUL && *s != NUL; )
3786         {
3787             c = mb_cptr2char_adv(&p);
3788             mb_cptr_adv(s);
3789             if (c >= 256)
3790                 ++lp->sl_sal_first[c & 0xff];
3791         }
3792         if (*p != NUL || *s != NUL)         /* lengths differ */
3793             return SP_FORMERROR;
3794
3795         /* Allocate the lists. */
3796         for (i = 0; i < 256; ++i)
3797             if (lp->sl_sal_first[i] > 0)
3798             {
3799                 p = alloc(sizeof(int) * (lp->sl_sal_first[i] * 2 + 1));
3800                 if (p == NULL)
3801                     return SP_OTHERERROR;
3802                 ((int **)gap->ga_data)[i] = (int *)p;
3803                 *(int *)p = 0;
3804             }
3805
3806         /* Put the characters up to 255 in sl_sal_first[] the rest in a sl_sal
3807          * list. */
3808         vim_memset(lp->sl_sal_first, 0, sizeof(salfirst_T) * 256);
3809         for (p = from, s = to; *p != NUL && *s != NUL; )
3810         {
3811             c = mb_cptr2char_adv(&p);
3812             i = mb_cptr2char_adv(&s);
3813             if (c >= 256)
3814             {
3815                 /* Append the from-to chars at the end of the list with
3816                  * the low byte. */
3817                 inp = ((int **)gap->ga_data)[c & 0xff];
3818                 while (*inp != 0)
3819                     ++inp;
3820                 *inp++ = c;             /* from char */
3821                 *inp++ = i;             /* to char */
3822                 *inp++ = NUL;           /* NUL at the end */
3823             }
3824             else
3825                 /* mapping byte to char is done in sl_sal_first[] */
3826                 lp->sl_sal_first[c] = i;
3827         }
3828     }
3829     else
3830 #endif
3831     {
3832         /* mapping bytes to bytes is done in sl_sal_first[] */
3833         if (STRLEN(from) != STRLEN(to))
3834             return SP_FORMERROR;
3835
3836         for (i = 0; to[i] != NUL; ++i)
3837             lp->sl_sal_first[from[i]] = to[i];
3838         lp->sl_sal.ga_len = 1;          /* indicates we have soundfolding */
3839     }
3840
3841     return 0;
3842 }
3843
3844 /*
3845  * Fill the first-index table for "lp".
3846  */
3847     static void
3848 set_sal_first(lp)
3849     slang_T     *lp;
3850 {
3851     salfirst_T  *sfirst;
3852     int         i;
3853     salitem_T   *smp;
3854     int         c;
3855     garray_T    *gap = &lp->sl_sal;
3856
3857     sfirst = lp->sl_sal_first;
3858     for (i = 0; i < 256; ++i)
3859         sfirst[i] = -1;
3860     smp = (salitem_T *)gap->ga_data;
3861     for (i = 0; i < gap->ga_len; ++i)
3862     {
3863 #ifdef FEAT_MBYTE
3864         if (has_mbyte)
3865             /* Use the lowest byte of the first character.  For latin1 it's
3866              * the character, for other encodings it should differ for most
3867              * characters. */
3868             c = *smp[i].sm_lead_w & 0xff;
3869         else
3870 #endif
3871             c = *smp[i].sm_lead;
3872         if (sfirst[c] == -1)
3873         {
3874             sfirst[c] = i;
3875 #ifdef FEAT_MBYTE
3876             if (has_mbyte)
3877             {
3878                 int             n;
3879
3880                 /* Make sure all entries with this byte are following each
3881                  * other.  Move the ones that are in the wrong position.  Do
3882                  * keep the same ordering! */
3883                 while (i + 1 < gap->ga_len
3884                                        && (*smp[i + 1].sm_lead_w & 0xff) == c)
3885                     /* Skip over entry with same index byte. */
3886                     ++i;
3887
3888                 for (n = 1; i + n < gap->ga_len; ++n)
3889                     if ((*smp[i + n].sm_lead_w & 0xff) == c)
3890                     {
3891                         salitem_T  tsal;
3892
3893                         /* Move entry with same index byte after the entries
3894                          * we already found. */
3895                         ++i;
3896                         --n;
3897                         tsal = smp[i + n];
3898                         mch_memmove(smp + i + 1, smp + i,
3899                                                        sizeof(salitem_T) * n);
3900                         smp[i] = tsal;
3901                     }
3902             }
3903 #endif
3904         }
3905     }
3906 }
3907
3908 #ifdef FEAT_MBYTE
3909 /*
3910  * Turn a multi-byte string into a wide character string.
3911  * Return it in allocated memory (NULL for out-of-memory)
3912  */
3913     static int *
3914 mb_str2wide(s)
3915     char_u      *s;
3916 {
3917     int         *res;
3918     char_u      *p;
3919     int         i = 0;
3920
3921     res = (int *)alloc(sizeof(int) * (mb_charlen(s) + 1));
3922     if (res != NULL)
3923     {
3924         for (p = s; *p != NUL; )
3925             res[i++] = mb_ptr2char_adv(&p);
3926         res[i] = NUL;
3927     }
3928     return res;
3929 }
3930 #endif
3931
3932 /*
3933  * Read a tree from the .spl or .sug file.
3934  * Allocates the memory and stores pointers in "bytsp" and "idxsp".
3935  * This is skipped when the tree has zero length.
3936  * Returns zero when OK, SP_ value for an error.
3937  */
3938     static int
3939 spell_read_tree(fd, bytsp, idxsp, prefixtree, prefixcnt)
3940     FILE        *fd;
3941     char_u      **bytsp;
3942     idx_T       **idxsp;
3943     int         prefixtree;     /* TRUE for the prefix tree */
3944     int         prefixcnt;      /* when "prefixtree" is TRUE: prefix count */
3945 {
3946     int         len;
3947     int         idx;
3948     char_u      *bp;
3949     idx_T       *ip;
3950
3951     /* The tree size was computed when writing the file, so that we can
3952      * allocate it as one long block. <nodecount> */
3953     len = get4c(fd);
3954     if (len < 0)
3955         return SP_TRUNCERROR;
3956     if (len > 0)
3957     {
3958         /* Allocate the byte array. */
3959         bp = lalloc((long_u)len, TRUE);
3960         if (bp == NULL)
3961             return SP_OTHERERROR;
3962         *bytsp = bp;
3963
3964         /* Allocate the index array. */
3965         ip = (idx_T *)lalloc_clear((long_u)(len * sizeof(int)), TRUE);
3966         if (ip == NULL)
3967             return SP_OTHERERROR;
3968         *idxsp = ip;
3969
3970         /* Recursively read the tree and store it in the array. */
3971         idx = read_tree_node(fd, bp, ip, len, 0, prefixtree, prefixcnt);
3972         if (idx < 0)
3973             return idx;
3974     }
3975     return 0;
3976 }
3977
3978 /*
3979  * Read one row of siblings from the spell file and store it in the byte array
3980  * "byts" and index array "idxs".  Recursively read the children.
3981  *
3982  * NOTE: The code here must match put_node()!
3983  *
3984  * Returns the index (>= 0) following the siblings.
3985  * Returns SP_TRUNCERROR if the file is shorter than expected.
3986  * Returns SP_FORMERROR if there is a format error.
3987  */
3988     static idx_T
3989 read_tree_node(fd, byts, idxs, maxidx, startidx, prefixtree, maxprefcondnr)
3990     FILE        *fd;
3991     char_u      *byts;
3992     idx_T       *idxs;
3993     int         maxidx;             /* size of arrays */
3994     idx_T       startidx;           /* current index in "byts" and "idxs" */
3995     int         prefixtree;         /* TRUE for reading PREFIXTREE */
3996     int         maxprefcondnr;      /* maximum for <prefcondnr> */
3997 {
3998     int         len;
3999     int         i;
4000     int         n;
4001     idx_T       idx = startidx;
4002     int         c;
4003     int         c2;
4004 #define SHARED_MASK     0x8000000
4005
4006     len = getc(fd);                                     /* <siblingcount> */
4007     if (len <= 0)
4008         return SP_TRUNCERROR;
4009
4010     if (startidx + len >= maxidx)
4011         return SP_FORMERROR;
4012     byts[idx++] = len;
4013
4014     /* Read the byte values, flag/region bytes and shared indexes. */
4015     for (i = 1; i <= len; ++i)
4016     {
4017         c = getc(fd);                                   /* <byte> */
4018         if (c < 0)
4019             return SP_TRUNCERROR;
4020         if (c <= BY_SPECIAL)
4021         {
4022             if (c == BY_NOFLAGS && !prefixtree)
4023             {
4024                 /* No flags, all regions. */
4025                 idxs[idx] = 0;
4026                 c = 0;
4027             }
4028             else if (c != BY_INDEX)
4029             {
4030                 if (prefixtree)
4031                 {
4032                     /* Read the optional pflags byte, the prefix ID and the
4033                      * condition nr.  In idxs[] store the prefix ID in the low
4034                      * byte, the condition index shifted up 8 bits, the flags
4035                      * shifted up 24 bits. */
4036                     if (c == BY_FLAGS)
4037                         c = getc(fd) << 24;             /* <pflags> */
4038                     else
4039                         c = 0;
4040
4041                     c |= getc(fd);                      /* <affixID> */
4042
4043                     n = get2c(fd);                      /* <prefcondnr> */
4044                     if (n >= maxprefcondnr)
4045                         return SP_FORMERROR;
4046                     c |= (n << 8);
4047                 }
4048                 else /* c must be BY_FLAGS or BY_FLAGS2 */
4049                 {
4050                     /* Read flags and optional region and prefix ID.  In
4051                      * idxs[] the flags go in the low two bytes, region above
4052                      * that and prefix ID above the region. */
4053                     c2 = c;
4054                     c = getc(fd);                       /* <flags> */
4055                     if (c2 == BY_FLAGS2)
4056                         c = (getc(fd) << 8) + c;        /* <flags2> */
4057                     if (c & WF_REGION)
4058                         c = (getc(fd) << 16) + c;       /* <region> */
4059                     if (c & WF_AFX)
4060                         c = (getc(fd) << 24) + c;       /* <affixID> */
4061                 }
4062
4063                 idxs[idx] = c;
4064                 c = 0;
4065             }
4066             else /* c == BY_INDEX */
4067             {
4068                                                         /* <nodeidx> */
4069                 n = get3c(fd);
4070                 if (n < 0 || n >= maxidx)
4071                     return SP_FORMERROR;
4072                 idxs[idx] = n + SHARED_MASK;
4073                 c = getc(fd);                           /* <xbyte> */
4074             }
4075         }
4076         byts[idx++] = c;
4077     }
4078
4079     /* Recursively read the children for non-shared siblings.
4080      * Skip the end-of-word ones (zero byte value) and the shared ones (and
4081      * remove SHARED_MASK) */
4082     for (i = 1; i <= len; ++i)
4083         if (byts[startidx + i] != 0)
4084         {
4085             if (idxs[startidx + i] & SHARED_MASK)
4086                 idxs[startidx + i] &= ~SHARED_MASK;
4087             else
4088             {
4089                 idxs[startidx + i] = idx;
4090                 idx = read_tree_node(fd, byts, idxs, maxidx, idx,
4091                                                      prefixtree, maxprefcondnr);
4092                 if (idx < 0)
4093                     break;
4094             }
4095         }
4096
4097     return idx;
4098 }
4099
4100 /*
4101  * Parse 'spelllang' and set buf->b_langp accordingly.
4102  * Returns NULL if it's OK, an error message otherwise.
4103  */
4104     char_u *
4105 did_set_spelllang(buf)
4106     buf_T       *buf;
4107 {
4108     garray_T    ga;
4109     char_u      *splp;
4110     char_u      *region;
4111     char_u      region_cp[3];
4112     int         filename;
4113     int         region_mask;
4114     slang_T     *slang;
4115     int         c;
4116     char_u      lang[MAXWLEN + 1];
4117     char_u      spf_name[MAXPATHL];
4118     int         len;
4119     char_u      *p;
4120     int         round;
4121     char_u      *spf;
4122     char_u      *use_region = NULL;
4123     int         dont_use_region = FALSE;
4124     int         nobreak = FALSE;
4125     int         i, j;
4126     langp_T     *lp, *lp2;
4127     static int  recursive = FALSE;
4128     char_u      *ret_msg = NULL;
4129     char_u      *spl_copy;
4130
4131     /* We don't want to do this recursively.  May happen when a language is
4132      * not available and the SpellFileMissing autocommand opens a new buffer
4133      * in which 'spell' is set. */
4134     if (recursive)
4135         return NULL;
4136     recursive = TRUE;
4137
4138     ga_init2(&ga, sizeof(langp_T), 2);
4139     clear_midword(buf);
4140
4141     /* Make a copy of 'spellang', the SpellFileMissing autocommands may change
4142      * it under our fingers. */
4143     spl_copy = vim_strsave(buf->b_p_spl);
4144     if (spl_copy == NULL)
4145         goto theend;
4146
4147     /* loop over comma separated language names. */
4148     for (splp = spl_copy; *splp != NUL; )
4149     {
4150         /* Get one language name. */
4151         copy_option_part(&splp, lang, MAXWLEN, ",");
4152
4153         region = NULL;
4154         len = (int)STRLEN(lang);
4155
4156         /* If the name ends in ".spl" use it as the name of the spell file.
4157          * If there is a region name let "region" point to it and remove it
4158          * from the name. */
4159         if (len > 4 && fnamecmp(lang + len - 4, ".spl") == 0)
4160         {
4161             filename = TRUE;
4162
4163             /* Locate a region and remove it from the file name. */
4164             p = vim_strchr(gettail(lang), '_');
4165             if (p != NULL && ASCII_ISALPHA(p[1]) && ASCII_ISALPHA(p[2])
4166                                                       && !ASCII_ISALPHA(p[3]))
4167             {
4168                 vim_strncpy(region_cp, p + 1, 2);
4169                 mch_memmove(p, p + 3, len - (p - lang) - 2);
4170                 len -= 3;
4171                 region = region_cp;
4172             }
4173             else
4174                 dont_use_region = TRUE;
4175
4176             /* Check if we loaded this language before. */
4177             for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4178                 if (fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME)
4179                     break;
4180         }
4181         else
4182         {
4183             filename = FALSE;
4184             if (len > 3 && lang[len - 3] == '_')
4185             {
4186                 region = lang + len - 2;
4187                 len -= 3;
4188                 lang[len] = NUL;
4189             }
4190             else
4191                 dont_use_region = TRUE;
4192
4193             /* Check if we loaded this language before. */
4194             for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4195                 if (STRICMP(lang, slang->sl_name) == 0)
4196                     break;
4197         }
4198
4199         if (region != NULL)
4200         {
4201             /* If the region differs from what was used before then don't
4202              * use it for 'spellfile'. */
4203             if (use_region != NULL && STRCMP(region, use_region) != 0)
4204                 dont_use_region = TRUE;
4205             use_region = region;
4206         }
4207
4208         /* If not found try loading the language now. */
4209         if (slang == NULL)
4210         {
4211             if (filename)
4212                 (void)spell_load_file(lang, lang, NULL, FALSE);
4213             else
4214             {
4215                 spell_load_lang(lang);
4216 #ifdef FEAT_AUTOCMD
4217                 /* SpellFileMissing autocommands may do anything, including
4218                  * destroying the buffer we are using... */
4219                 if (!buf_valid(buf))
4220                 {
4221                     ret_msg = (char_u *)"E797: SpellFileMissing autocommand deleted buffer";
4222                     goto theend;
4223                 }
4224 #endif
4225             }
4226         }
4227
4228         /*
4229          * Loop over the languages, there can be several files for "lang".
4230          */
4231         for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4232             if (filename ? fullpathcmp(lang, slang->sl_fname, FALSE) == FPC_SAME
4233                          : STRICMP(lang, slang->sl_name) == 0)
4234             {
4235                 region_mask = REGION_ALL;
4236                 if (!filename && region != NULL)
4237                 {
4238                     /* find region in sl_regions */
4239                     c = find_region(slang->sl_regions, region);
4240                     if (c == REGION_ALL)
4241                     {
4242                         if (slang->sl_add)
4243                         {
4244                             if (*slang->sl_regions != NUL)
4245                                 /* This addition file is for other regions. */
4246                                 region_mask = 0;
4247                         }
4248                         else
4249                             /* This is probably an error.  Give a warning and
4250                              * accept the words anyway. */
4251                             smsg((char_u *)
4252                                     _("Warning: region %s not supported"),
4253                                                                       region);
4254                     }
4255                     else
4256                         region_mask = 1 << c;
4257                 }
4258
4259                 if (region_mask != 0)
4260                 {
4261                     if (ga_grow(&ga, 1) == FAIL)
4262                     {
4263                         ga_clear(&ga);
4264                         ret_msg = e_outofmem;
4265                         goto theend;
4266                     }
4267                     LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
4268                     LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
4269                     ++ga.ga_len;
4270                     use_midword(slang, buf);
4271                     if (slang->sl_nobreak)
4272                         nobreak = TRUE;
4273                 }
4274             }
4275     }
4276
4277     /* round 0: load int_wordlist, if possible.
4278      * round 1: load first name in 'spellfile'.
4279      * round 2: load second name in 'spellfile.
4280      * etc. */
4281     spf = buf->b_p_spf;
4282     for (round = 0; round == 0 || *spf != NUL; ++round)
4283     {
4284         if (round == 0)
4285         {
4286             /* Internal wordlist, if there is one. */
4287             if (int_wordlist == NULL)
4288                 continue;
4289             int_wordlist_spl(spf_name);
4290         }
4291         else
4292         {
4293             /* One entry in 'spellfile'. */
4294             copy_option_part(&spf, spf_name, MAXPATHL - 5, ",");
4295             STRCAT(spf_name, ".spl");
4296
4297             /* If it was already found above then skip it. */
4298             for (c = 0; c < ga.ga_len; ++c)
4299             {
4300                 p = LANGP_ENTRY(ga, c)->lp_slang->sl_fname;
4301                 if (p != NULL && fullpathcmp(spf_name, p, FALSE) == FPC_SAME)
4302                     break;
4303             }
4304             if (c < ga.ga_len)
4305                 continue;
4306         }
4307
4308         /* Check if it was loaded already. */
4309         for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4310             if (fullpathcmp(spf_name, slang->sl_fname, FALSE) == FPC_SAME)
4311                 break;
4312         if (slang == NULL)
4313         {
4314             /* Not loaded, try loading it now.  The language name includes the
4315              * region name, the region is ignored otherwise.  for int_wordlist
4316              * use an arbitrary name. */
4317             if (round == 0)
4318                 STRCPY(lang, "internal wordlist");
4319             else
4320             {
4321                 vim_strncpy(lang, gettail(spf_name), MAXWLEN);
4322                 p = vim_strchr(lang, '.');
4323                 if (p != NULL)
4324                     *p = NUL;   /* truncate at ".encoding.add" */
4325             }
4326             slang = spell_load_file(spf_name, lang, NULL, TRUE);
4327
4328             /* If one of the languages has NOBREAK we assume the addition
4329              * files also have this. */
4330             if (slang != NULL && nobreak)
4331                 slang->sl_nobreak = TRUE;
4332         }
4333         if (slang != NULL && ga_grow(&ga, 1) == OK)
4334         {
4335             region_mask = REGION_ALL;
4336             if (use_region != NULL && !dont_use_region)
4337             {
4338                 /* find region in sl_regions */
4339                 c = find_region(slang->sl_regions, use_region);
4340                 if (c != REGION_ALL)
4341                     region_mask = 1 << c;
4342                 else if (*slang->sl_regions != NUL)
4343                     /* This spell file is for other regions. */
4344                     region_mask = 0;
4345             }
4346
4347             if (region_mask != 0)
4348             {
4349                 LANGP_ENTRY(ga, ga.ga_len)->lp_slang = slang;
4350                 LANGP_ENTRY(ga, ga.ga_len)->lp_sallang = NULL;
4351                 LANGP_ENTRY(ga, ga.ga_len)->lp_replang = NULL;
4352                 LANGP_ENTRY(ga, ga.ga_len)->lp_region = region_mask;
4353                 ++ga.ga_len;
4354                 use_midword(slang, buf);
4355             }
4356         }
4357     }
4358
4359     /* Everything is fine, store the new b_langp value. */
4360     ga_clear(&buf->b_langp);
4361     buf->b_langp = ga;
4362
4363     /* For each language figure out what language to use for sound folding and
4364      * REP items.  If the language doesn't support it itself use another one
4365      * with the same name.  E.g. for "en-math" use "en". */
4366     for (i = 0; i < ga.ga_len; ++i)
4367     {
4368         lp = LANGP_ENTRY(ga, i);
4369
4370         /* sound folding */
4371         if (lp->lp_slang->sl_sal.ga_len > 0)
4372             /* language does sound folding itself */
4373             lp->lp_sallang = lp->lp_slang;
4374         else
4375             /* find first similar language that does sound folding */
4376             for (j = 0; j < ga.ga_len; ++j)
4377             {
4378                 lp2 = LANGP_ENTRY(ga, j);
4379                 if (lp2->lp_slang->sl_sal.ga_len > 0
4380                         && STRNCMP(lp->lp_slang->sl_name,
4381                                               lp2->lp_slang->sl_name, 2) == 0)
4382                 {
4383                     lp->lp_sallang = lp2->lp_slang;
4384                     break;
4385                 }
4386             }
4387
4388         /* REP items */
4389         if (lp->lp_slang->sl_rep.ga_len > 0)
4390             /* language has REP items itself */
4391             lp->lp_replang = lp->lp_slang;
4392         else
4393             /* find first similar language that has REP items */
4394             for (j = 0; j < ga.ga_len; ++j)
4395             {
4396                 lp2 = LANGP_ENTRY(ga, j);
4397                 if (lp2->lp_slang->sl_rep.ga_len > 0
4398                         && STRNCMP(lp->lp_slang->sl_name,
4399                                               lp2->lp_slang->sl_name, 2) == 0)
4400                 {
4401                     lp->lp_replang = lp2->lp_slang;
4402                     break;
4403                 }
4404             }
4405     }
4406
4407 theend:
4408     vim_free(spl_copy);
4409     recursive = FALSE;
4410     return ret_msg;
4411 }
4412
4413 /*
4414  * Clear the midword characters for buffer "buf".
4415  */
4416     static void
4417 clear_midword(buf)
4418     buf_T       *buf;
4419 {
4420     vim_memset(buf->b_spell_ismw, 0, 256);
4421 #ifdef FEAT_MBYTE
4422     vim_free(buf->b_spell_ismw_mb);
4423     buf->b_spell_ismw_mb = NULL;
4424 #endif
4425 }
4426
4427 /*
4428  * Use the "sl_midword" field of language "lp" for buffer "buf".
4429  * They add up to any currently used midword characters.
4430  */
4431     static void
4432 use_midword(lp, buf)
4433     slang_T     *lp;
4434     buf_T       *buf;
4435 {
4436     char_u      *p;
4437
4438     if (lp->sl_midword == NULL)     /* there aren't any */
4439         return;
4440
4441     for (p = lp->sl_midword; *p != NUL; )
4442 #ifdef FEAT_MBYTE
4443         if (has_mbyte)
4444         {
4445             int     c, l, n;
4446             char_u  *bp;
4447
4448             c = mb_ptr2char(p);
4449             l = (*mb_ptr2len)(p);
4450             if (c < 256 && l <= 2)
4451                 buf->b_spell_ismw[c] = TRUE;
4452             else if (buf->b_spell_ismw_mb == NULL)
4453                 /* First multi-byte char in "b_spell_ismw_mb". */
4454                 buf->b_spell_ismw_mb = vim_strnsave(p, l);
4455             else
4456             {
4457                 /* Append multi-byte chars to "b_spell_ismw_mb". */
4458                 n = (int)STRLEN(buf->b_spell_ismw_mb);
4459                 bp = vim_strnsave(buf->b_spell_ismw_mb, n + l);
4460                 if (bp != NULL)
4461                 {
4462                     vim_free(buf->b_spell_ismw_mb);
4463                     buf->b_spell_ismw_mb = bp;
4464                     vim_strncpy(bp + n, p, l);
4465                 }
4466             }
4467             p += l;
4468         }
4469         else
4470 #endif
4471             buf->b_spell_ismw[*p++] = TRUE;
4472 }
4473
4474 /*
4475  * Find the region "region[2]" in "rp" (points to "sl_regions").
4476  * Each region is simply stored as the two characters of it's name.
4477  * Returns the index if found (first is 0), REGION_ALL if not found.
4478  */
4479     static int
4480 find_region(rp, region)
4481     char_u      *rp;
4482     char_u      *region;
4483 {
4484     int         i;
4485
4486     for (i = 0; ; i += 2)
4487     {
4488         if (rp[i] == NUL)
4489             return REGION_ALL;
4490         if (rp[i] == region[0] && rp[i + 1] == region[1])
4491             break;
4492     }
4493     return i / 2;
4494 }
4495
4496 /*
4497  * Return case type of word:
4498  * w word       0
4499  * Word         WF_ONECAP
4500  * W WORD       WF_ALLCAP
4501  * WoRd wOrd    WF_KEEPCAP
4502  */
4503     static int
4504 captype(word, end)
4505     char_u      *word;
4506     char_u      *end;       /* When NULL use up to NUL byte. */
4507 {
4508     char_u      *p;
4509     int         c;
4510     int         firstcap;
4511     int         allcap;
4512     int         past_second = FALSE;    /* past second word char */
4513
4514     /* find first letter */
4515     for (p = word; !spell_iswordp_nmw(p); mb_ptr_adv(p))
4516         if (end == NULL ? *p == NUL : p >= end)
4517             return 0;       /* only non-word characters, illegal word */
4518 #ifdef FEAT_MBYTE
4519     if (has_mbyte)
4520         c = mb_ptr2char_adv(&p);
4521     else
4522 #endif
4523         c = *p++;
4524     firstcap = allcap = SPELL_ISUPPER(c);
4525
4526     /*
4527      * Need to check all letters to find a word with mixed upper/lower.
4528      * But a word with an upper char only at start is a ONECAP.
4529      */
4530     for ( ; end == NULL ? *p != NUL : p < end; mb_ptr_adv(p))
4531         if (spell_iswordp_nmw(p))
4532         {
4533             c = PTR2CHAR(p);
4534             if (!SPELL_ISUPPER(c))
4535             {
4536                 /* UUl -> KEEPCAP */
4537                 if (past_second && allcap)
4538                     return WF_KEEPCAP;
4539                 allcap = FALSE;
4540             }
4541             else if (!allcap)
4542                 /* UlU -> KEEPCAP */
4543                 return WF_KEEPCAP;
4544             past_second = TRUE;
4545         }
4546
4547     if (allcap)
4548         return WF_ALLCAP;
4549     if (firstcap)
4550         return WF_ONECAP;
4551     return 0;
4552 }
4553
4554 /*
4555  * Like captype() but for a KEEPCAP word add ONECAP if the word starts with a
4556  * capital.  So that make_case_word() can turn WOrd into Word.
4557  * Add ALLCAP for "WOrD".
4558  */
4559     static int
4560 badword_captype(word, end)
4561     char_u      *word;
4562     char_u      *end;
4563 {
4564     int         flags = captype(word, end);
4565     int         c;
4566     int         l, u;
4567     int         first;
4568     char_u      *p;
4569
4570     if (flags & WF_KEEPCAP)
4571     {
4572         /* Count the number of UPPER and lower case letters. */
4573         l = u = 0;
4574         first = FALSE;
4575         for (p = word; p < end; mb_ptr_adv(p))
4576         {
4577             c = PTR2CHAR(p);
4578             if (SPELL_ISUPPER(c))
4579             {
4580                 ++u;
4581                 if (p == word)
4582                     first = TRUE;
4583             }
4584             else
4585                 ++l;
4586         }
4587
4588         /* If there are more UPPER than lower case letters suggest an
4589          * ALLCAP word.  Otherwise, if the first letter is UPPER then
4590          * suggest ONECAP.  Exception: "ALl" most likely should be "All",
4591          * require three upper case letters. */
4592         if (u > l && u > 2)
4593             flags |= WF_ALLCAP;
4594         else if (first)
4595             flags |= WF_ONECAP;
4596
4597         if (u >= 2 && l >= 2)   /* maCARONI maCAroni */
4598             flags |= WF_MIXCAP;
4599     }
4600     return flags;
4601 }
4602
4603 # if defined(FEAT_MBYTE) || defined(EXITFREE) || defined(PROTO)
4604 /*
4605  * Free all languages.
4606  */
4607     void
4608 spell_free_all()
4609 {
4610     slang_T     *slang;
4611     buf_T       *buf;
4612     char_u      fname[MAXPATHL];
4613
4614     /* Go through all buffers and handle 'spelllang'. */
4615     for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4616         ga_clear(&buf->b_langp);
4617
4618     while (first_lang != NULL)
4619     {
4620         slang = first_lang;
4621         first_lang = slang->sl_next;
4622         slang_free(slang);
4623     }
4624
4625     if (int_wordlist != NULL)
4626     {
4627         /* Delete the internal wordlist and its .spl file */
4628         mch_remove(int_wordlist);
4629         int_wordlist_spl(fname);
4630         mch_remove(fname);
4631         vim_free(int_wordlist);
4632         int_wordlist = NULL;
4633     }
4634
4635     init_spell_chartab();
4636
4637     vim_free(repl_to);
4638     repl_to = NULL;
4639     vim_free(repl_from);
4640     repl_from = NULL;
4641 }
4642 # endif
4643
4644 # if defined(FEAT_MBYTE) || defined(PROTO)
4645 /*
4646  * Clear all spelling tables and reload them.
4647  * Used after 'encoding' is set and when ":mkspell" was used.
4648  */
4649     void
4650 spell_reload()
4651 {
4652     buf_T       *buf;
4653     win_T       *wp;
4654
4655     /* Initialize the table for spell_iswordp(). */
4656     init_spell_chartab();
4657
4658     /* Unload all allocated memory. */
4659     spell_free_all();
4660
4661     /* Go through all buffers and handle 'spelllang'. */
4662     for (buf = firstbuf; buf != NULL; buf = buf->b_next)
4663     {
4664         /* Only load the wordlists when 'spelllang' is set and there is a
4665          * window for this buffer in which 'spell' is set. */
4666         if (*buf->b_p_spl != NUL)
4667         {
4668             FOR_ALL_WINDOWS(wp)
4669                 if (wp->w_buffer == buf && wp->w_p_spell)
4670                 {
4671                     (void)did_set_spelllang(buf);
4672 # ifdef FEAT_WINDOWS
4673                     break;
4674 # endif
4675                 }
4676         }
4677     }
4678 }
4679 # endif
4680
4681 /*
4682  * Reload the spell file "fname" if it's loaded.
4683  */
4684     static void
4685 spell_reload_one(fname, added_word)
4686     char_u      *fname;
4687     int         added_word;     /* invoked through "zg" */
4688 {
4689     slang_T     *slang;
4690     int         didit = FALSE;
4691
4692     for (slang = first_lang; slang != NULL; slang = slang->sl_next)
4693     {
4694         if (fullpathcmp(fname, slang->sl_fname, FALSE) == FPC_SAME)
4695         {
4696             slang_clear(slang);
4697             if (spell_load_file(fname, NULL, slang, FALSE) == NULL)
4698                 /* reloading failed, clear the language */
4699                 slang_clear(slang);
4700             redraw_all_later(SOME_VALID);
4701             didit = TRUE;
4702         }
4703     }
4704
4705     /* When "zg" was used and the file wasn't loaded yet, should redo
4706      * 'spelllang' to load it now. */
4707     if (added_word && !didit)
4708         did_set_spelllang(curbuf);
4709 }
4710
4711
4712 /*
4713  * Functions for ":mkspell".
4714  */
4715
4716 #define MAXLINELEN  500         /* Maximum length in bytes of a line in a .aff
4717                                    and .dic file. */
4718 /*
4719  * Main structure to store the contents of a ".aff" file.
4720  */
4721 typedef struct afffile_S
4722 {
4723     char_u      *af_enc;        /* "SET", normalized, alloc'ed string or NULL */
4724     int         af_flagtype;    /* AFT_CHAR, AFT_LONG, AFT_NUM or AFT_CAPLONG */
4725     unsigned    af_rare;        /* RARE ID for rare word */
4726     unsigned    af_keepcase;    /* KEEPCASE ID for keep-case word */
4727     unsigned    af_bad;         /* BAD ID for banned word */
4728     unsigned    af_needaffix;   /* NEEDAFFIX ID */
4729     unsigned    af_circumfix;   /* CIRCUMFIX ID */
4730     unsigned    af_needcomp;    /* NEEDCOMPOUND ID */
4731     unsigned    af_comproot;    /* COMPOUNDROOT ID */
4732     unsigned    af_compforbid;  /* COMPOUNDFORBIDFLAG ID */
4733     unsigned    af_comppermit;  /* COMPOUNDPERMITFLAG ID */
4734     unsigned    af_nosuggest;   /* NOSUGGEST ID */
4735     int         af_pfxpostpone; /* postpone prefixes without chop string and
4736                                    without flags */
4737     hashtab_T   af_pref;        /* hashtable for prefixes, affheader_T */
4738     hashtab_T   af_suff;        /* hashtable for suffixes, affheader_T */
4739     hashtab_T   af_comp;        /* hashtable for compound flags, compitem_T */
4740 } afffile_T;
4741
4742 #define AFT_CHAR        0       /* flags are one character */
4743 #define AFT_LONG        1       /* flags are two characters */
4744 #define AFT_CAPLONG     2       /* flags are one or two characters */
4745 #define AFT_NUM         3       /* flags are numbers, comma separated */
4746
4747 typedef struct affentry_S affentry_T;
4748 /* Affix entry from ".aff" file.  Used for prefixes and suffixes. */
4749 struct affentry_S
4750 {
4751     affentry_T  *ae_next;       /* next affix with same name/number */
4752     char_u      *ae_chop;       /* text to chop off basic word (can be NULL) */
4753     char_u      *ae_add;        /* text to add to basic word (can be NULL) */
4754     char_u      *ae_flags;      /* flags on the affix (can be NULL) */
4755     char_u      *ae_cond;       /* condition (NULL for ".") */
4756     regprog_T   *ae_prog;       /* regexp program for ae_cond or NULL */
4757     char        ae_compforbid;  /* COMPOUNDFORBIDFLAG found */
4758     char        ae_comppermit;  /* COMPOUNDPERMITFLAG found */
4759 };
4760
4761 #ifdef FEAT_MBYTE
4762 # define AH_KEY_LEN 17          /* 2 x 8 bytes + NUL */
4763 #else
4764 # define AH_KEY_LEN 7           /* 6 digits + NUL */
4765 #endif
4766
4767 /* Affix header from ".aff" file.  Used for af_pref and af_suff. */
4768 typedef struct affheader_S
4769 {
4770     char_u      ah_key[AH_KEY_LEN]; /* key for hashtab == name of affix */
4771     unsigned    ah_flag;        /* affix name as number, uses "af_flagtype" */
4772     int         ah_newID;       /* prefix ID after renumbering; 0 if not used */
4773     int         ah_combine;     /* suffix may combine with prefix */
4774     int         ah_follows;     /* another affix block should be following */
4775     affentry_T  *ah_first;      /* first affix entry */
4776 } affheader_T;
4777
4778 #define HI2AH(hi)   ((affheader_T *)(hi)->hi_key)
4779
4780 /* Flag used in compound items. */
4781 typedef struct compitem_S
4782 {
4783     char_u      ci_key[AH_KEY_LEN]; /* key for hashtab == name of compound */
4784     unsigned    ci_flag;        /* affix name as number, uses "af_flagtype" */
4785     int         ci_newID;       /* affix ID after renumbering. */
4786 } compitem_T;
4787
4788 #define HI2CI(hi)   ((compitem_T *)(hi)->hi_key)
4789
4790 /*
4791  * Structure that is used to store the items in the word tree.  This avoids
4792  * the need to keep track of each allocated thing, everything is freed all at
4793  * once after ":mkspell" is done.
4794  */
4795 #define  SBLOCKSIZE 16000       /* size of sb_data */
4796 typedef struct sblock_S sblock_T;
4797 struct sblock_S
4798 {
4799     sblock_T    *sb_next;       /* next block in list */
4800     int         sb_used;        /* nr of bytes already in use */
4801     char_u      sb_data[1];     /* data, actually longer */
4802 };
4803
4804 /*
4805  * A node in the tree.
4806  */
4807 typedef struct wordnode_S wordnode_T;
4808 struct wordnode_S
4809 {
4810     union   /* shared to save space */
4811     {
4812         char_u  hashkey[6];     /* the hash key, only used while compressing */
4813         int     index;          /* index in written nodes (valid after first
4814                                    round) */
4815     } wn_u1;
4816     union   /* shared to save space */
4817     {
4818         wordnode_T *next;       /* next node with same hash key */
4819         wordnode_T *wnode;      /* parent node that will write this node */
4820     } wn_u2;
4821     wordnode_T  *wn_child;      /* child (next byte in word) */
4822     wordnode_T  *wn_sibling;    /* next sibling (alternate byte in word,
4823                                    always sorted) */
4824     int         wn_refs;        /* Nr. of references to this node.  Only
4825                                    relevant for first node in a list of
4826                                    siblings, in following siblings it is
4827                                    always one. */
4828     char_u      wn_byte;        /* Byte for this node. NUL for word end */
4829
4830     /* Info for when "wn_byte" is NUL.
4831      * In PREFIXTREE "wn_region" is used for the prefcondnr.
4832      * In the soundfolded word tree "wn_flags" has the MSW of the wordnr and
4833      * "wn_region" the LSW of the wordnr. */
4834     char_u      wn_affixID;     /* supported/required prefix ID or 0 */
4835     short_u     wn_flags;       /* WF_ flags */
4836     short       wn_region;      /* region mask */
4837
4838 #ifdef SPELL_PRINTTREE
4839     int         wn_nr;          /* sequence nr for printing */
4840 #endif
4841 };
4842
4843 #define WN_MASK  0xffff         /* mask relevant bits of "wn_flags" */
4844
4845 #define HI2WN(hi)    (wordnode_T *)((hi)->hi_key)
4846
4847 /*
4848  * Info used while reading the spell files.
4849  */
4850 typedef struct spellinfo_S
4851 {
4852     wordnode_T  *si_foldroot;   /* tree with case-folded words */
4853     long        si_foldwcount;  /* nr of words in si_foldroot */
4854
4855     wordnode_T  *si_keeproot;   /* tree with keep-case words */
4856     long        si_keepwcount;  /* nr of words in si_keeproot */
4857
4858     wordnode_T  *si_prefroot;   /* tree with postponed prefixes */
4859
4860     long        si_sugtree;     /* creating the soundfolding trie */
4861
4862     sblock_T    *si_blocks;     /* memory blocks used */
4863     long        si_blocks_cnt;  /* memory blocks allocated */
4864     long        si_compress_cnt;    /* words to add before lowering
4865                                        compression limit */
4866     wordnode_T  *si_first_free; /* List of nodes that have been freed during
4867                                    compression, linked by "wn_child" field. */
4868     long        si_free_count;  /* number of nodes in si_first_free */
4869 #ifdef SPELL_PRINTTREE
4870     int         si_wordnode_nr; /* sequence nr for nodes */
4871 #endif
4872     buf_T       *si_spellbuf;   /* buffer used to store soundfold word table */
4873
4874     int         si_ascii;       /* handling only ASCII words */
4875     int         si_add;         /* addition file */
4876     int         si_clear_chartab;   /* when TRUE clear char tables */
4877     int         si_region;      /* region mask */
4878     vimconv_T   si_conv;        /* for conversion to 'encoding' */
4879     int         si_memtot;      /* runtime memory used */
4880     int         si_verbose;     /* verbose messages */
4881     int         si_msg_count;   /* number of words added since last message */
4882     char_u      *si_info;       /* info text chars or NULL  */
4883     int         si_region_count; /* number of regions supported (1 when there
4884                                     are no regions) */
4885     char_u      si_region_name[16]; /* region names; used only if
4886                                      * si_region_count > 1) */
4887
4888     garray_T    si_rep;         /* list of fromto_T entries from REP lines */
4889     garray_T    si_repsal;      /* list of fromto_T entries from REPSAL lines */
4890     garray_T    si_sal;         /* list of fromto_T entries from SAL lines */
4891     char_u      *si_sofofr;     /* SOFOFROM text */
4892     char_u      *si_sofoto;     /* SOFOTO text */
4893     int         si_nosugfile;   /* NOSUGFILE item found */
4894     int         si_nosplitsugs; /* NOSPLITSUGS item found */
4895     int         si_followup;    /* soundsalike: ? */
4896     int         si_collapse;    /* soundsalike: ? */
4897     hashtab_T   si_commonwords; /* hashtable for common words */
4898     time_t      si_sugtime;     /* timestamp for .sug file */
4899     int         si_rem_accents; /* soundsalike: remove accents */
4900     garray_T    si_map;         /* MAP info concatenated */
4901     char_u      *si_midword;    /* MIDWORD chars or NULL  */
4902     int         si_compmax;     /* max nr of words for compounding */
4903     int         si_compminlen;  /* minimal length for compounding */
4904     int         si_compsylmax;  /* max nr of syllables for compounding */
4905     int         si_compoptions; /* COMP_ flags */
4906     garray_T    si_comppat;     /* CHECKCOMPOUNDPATTERN items, each stored as
4907                                    a string */
4908     char_u      *si_compflags;  /* flags used for compounding */
4909     char_u      si_nobreak;     /* NOBREAK */
4910     char_u      *si_syllable;   /* syllable string */
4911     garray_T    si_prefcond;    /* table with conditions for postponed
4912                                  * prefixes, each stored as a string */
4913     int         si_newprefID;   /* current value for ah_newID */
4914     int         si_newcompID;   /* current value for compound ID */
4915 } spellinfo_T;
4916
4917 static afffile_T *spell_read_aff __ARGS((spellinfo_T *spin, char_u *fname));
4918 static void aff_process_flags __ARGS((afffile_T *affile, affentry_T *entry));
4919 static int spell_info_item __ARGS((char_u *s));
4920 static unsigned affitem2flag __ARGS((int flagtype, char_u *item, char_u *fname, int lnum));
4921 static unsigned get_affitem __ARGS((int flagtype, char_u **pp));
4922 static void process_compflags __ARGS((spellinfo_T *spin, afffile_T *aff, char_u *compflags));
4923 static void check_renumber __ARGS((spellinfo_T *spin));
4924 static int flag_in_afflist __ARGS((int flagtype, char_u *afflist, unsigned flag));
4925 static void aff_check_number __ARGS((int spinval, int affval, char *name));
4926 static void aff_check_string __ARGS((char_u *spinval, char_u *affval, char *name));
4927 static int str_equal __ARGS((char_u *s1, char_u *s2));
4928 static void add_fromto __ARGS((spellinfo_T *spin, garray_T *gap, char_u *from, char_u *to));
4929 static int sal_to_bool __ARGS((char_u *s));
4930 static int has_non_ascii __ARGS((char_u *s));
4931 static void spell_free_aff __ARGS((afffile_T *aff));
4932 static int spell_read_dic __ARGS((spellinfo_T *spin, char_u *fname, afffile_T *affile));
4933 static int get_affix_flags __ARGS((afffile_T *affile, char_u *afflist));
4934 static int get_pfxlist __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
4935 static void get_compflags __ARGS((afffile_T *affile, char_u *afflist, char_u *store_afflist));
4936 static int store_aff_word __ARGS((spellinfo_T *spin, char_u *word, char_u *afflist, afffile_T *affile, hashtab_T *ht, hashtab_T *xht, int condit, int flags, char_u *pfxlist, int pfxlen));
4937 static int spell_read_wordfile __ARGS((spellinfo_T *spin, char_u *fname));
4938 static void *getroom __ARGS((spellinfo_T *spin, size_t len, int align));
4939 static char_u *getroom_save __ARGS((spellinfo_T *spin, char_u *s));
4940 static void free_blocks __ARGS((sblock_T *bl));
4941 static wordnode_T *wordtree_alloc __ARGS((spellinfo_T *spin));
4942 static int store_word __ARGS((spellinfo_T *spin, char_u *word, int flags, int region, char_u *pfxlist, int need_affix));
4943 static int tree_add_word __ARGS((spellinfo_T *spin, char_u *word, wordnode_T *tree, int flags, int region, int affixID));
4944 static wordnode_T *get_wordnode __ARGS((spellinfo_T *spin));
4945 static int deref_wordnode __ARGS((spellinfo_T *spin, wordnode_T *node));
4946 static void free_wordnode __ARGS((spellinfo_T *spin, wordnode_T *n));
4947 static void wordtree_compress __ARGS((spellinfo_T *spin, wordnode_T *root));
4948 static int node_compress __ARGS((spellinfo_T *spin, wordnode_T *node, hashtab_T *ht, int *tot));
4949 static int node_equal __ARGS((wordnode_T *n1, wordnode_T *n2));
4950 static void put_sugtime __ARGS((spellinfo_T *spin, FILE *fd));
4951 static int write_vim_spell __ARGS((spellinfo_T *spin, char_u *fname));
4952 static void clear_node __ARGS((wordnode_T *node));
4953 static int put_node __ARGS((FILE *fd, wordnode_T *node, int index, int regionmask, int prefixtree));
4954 static void spell_make_sugfile __ARGS((spellinfo_T *spin, char_u *wfname));
4955 static int sug_filltree __ARGS((spellinfo_T *spin, slang_T *slang));
4956 static int sug_maketable __ARGS((spellinfo_T *spin));
4957 static int sug_filltable __ARGS((spellinfo_T *spin, wordnode_T *node, int startwordnr, garray_T *gap));
4958 static int offset2bytes __ARGS((int nr, char_u *buf));
4959 static int bytes2offset __ARGS((char_u **pp));
4960 static void sug_write __ARGS((spellinfo_T *spin, char_u *fname));
4961 static void mkspell __ARGS((int fcount, char_u **fnames, int ascii, int overwrite, int added_word));
4962 static void spell_message __ARGS((spellinfo_T *spin, char_u *str));
4963 static void init_spellfile __ARGS((void));
4964
4965 /* In the postponed prefixes tree wn_flags is used to store the WFP_ flags,
4966  * but it must be negative to indicate the prefix tree to tree_add_word().
4967  * Use a negative number with the lower 8 bits zero. */
4968 #define PFX_FLAGS       -256
4969
4970 /* flags for "condit" argument of store_aff_word() */
4971 #define CONDIT_COMB     1       /* affix must combine */
4972 #define CONDIT_CFIX     2       /* affix must have CIRCUMFIX flag */
4973 #define CONDIT_SUF      4       /* add a suffix for matching flags */
4974 #define CONDIT_AFF      8       /* word already has an affix */
4975
4976 /*
4977  * Tunable parameters for when the tree is compressed.  See 'mkspellmem'.
4978  */
4979 static long compress_start = 30000;     /* memory / SBLOCKSIZE */
4980 static long compress_inc = 100;         /* memory / SBLOCKSIZE */
4981 static long compress_added = 500000;    /* word count */
4982
4983 #ifdef SPELL_PRINTTREE
4984 /*
4985  * For debugging the tree code: print the current tree in a (more or less)
4986  * readable format, so that we can see what happens when adding a word and/or
4987  * compressing the tree.
4988  * Based on code from Olaf Seibert.
4989  */
4990 #define PRINTLINESIZE   1000
4991 #define PRINTWIDTH      6
4992
4993 #define PRINTSOME(l, depth, fmt, a1, a2) vim_snprintf(l + depth * PRINTWIDTH, \
4994             PRINTLINESIZE - PRINTWIDTH * depth, fmt, a1, a2)
4995
4996 static char line1[PRINTLINESIZE];
4997 static char line2[PRINTLINESIZE];
4998 static char line3[PRINTLINESIZE];
4999
5000     static void
5001 spell_clear_flags(wordnode_T *node)
5002 {
5003     wordnode_T  *np;
5004
5005     for (np = node; np != NULL; np = np->wn_sibling)
5006     {
5007         np->wn_u1.index = FALSE;
5008         spell_clear_flags(np->wn_child);
5009     }
5010 }
5011
5012     static void
5013 spell_print_node(wordnode_T *node, int depth)
5014 {
5015     if (node->wn_u1.index)
5016     {
5017         /* Done this node before, print the reference. */
5018         PRINTSOME(line1, depth, "(%d)", node->wn_nr, 0);
5019         PRINTSOME(line2, depth, "    ", 0, 0);
5020         PRINTSOME(line3, depth, "    ", 0, 0);
5021         msg(line1);
5022         msg(line2);
5023         msg(line3);
5024     }
5025     else
5026     {
5027         node->wn_u1.index = TRUE;
5028
5029         if (node->wn_byte != NUL)
5030         {
5031             if (node->wn_child != NULL)
5032                 PRINTSOME(line1, depth, " %c -> ", node->wn_byte, 0);
5033             else
5034                 /* Cannot happen? */
5035                 PRINTSOME(line1, depth, " %c ???", node->wn_byte, 0);
5036         }
5037         else
5038             PRINTSOME(line1, depth, " $    ", 0, 0);
5039
5040         PRINTSOME(line2, depth, "%d/%d    ", node->wn_nr, node->wn_refs);
5041
5042         if (node->wn_sibling != NULL)
5043             PRINTSOME(line3, depth, " |    ", 0, 0);
5044         else
5045             PRINTSOME(line3, depth, "      ", 0, 0);
5046
5047         if (node->wn_byte == NUL)
5048         {
5049             msg(line1);
5050             msg(line2);
5051             msg(line3);
5052         }
5053
5054         /* do the children */
5055         if (node->wn_byte != NUL && node->wn_child != NULL)
5056             spell_print_node(node->wn_child, depth + 1);
5057
5058         /* do the siblings */
5059         if (node->wn_sibling != NULL)
5060         {
5061             /* get rid of all parent details except | */
5062             STRCPY(line1, line3);
5063             STRCPY(line2, line3);
5064             spell_print_node(node->wn_sibling, depth);
5065         }
5066     }
5067 }
5068
5069     static void
5070 spell_print_tree(wordnode_T *root)
5071 {
5072     if (root != NULL)
5073     {
5074         /* Clear the "wn_u1.index" fields, used to remember what has been
5075          * done. */
5076         spell_clear_flags(root);
5077
5078         /* Recursively print the tree. */
5079         spell_print_node(root, 0);
5080     }
5081 }
5082 #endif /* SPELL_PRINTTREE */
5083
5084 /*
5085  * Read the affix file "fname".
5086  * Returns an afffile_T, NULL for complete failure.
5087  */
5088     static afffile_T *
5089 spell_read_aff(spin, fname)
5090     spellinfo_T *spin;
5091     char_u      *fname;
5092 {
5093     FILE        *fd;
5094     afffile_T   *aff;
5095     char_u      rline[MAXLINELEN];
5096     char_u      *line;
5097     char_u      *pc = NULL;
5098 #define MAXITEMCNT  30
5099     char_u      *(items[MAXITEMCNT]);
5100     int         itemcnt;
5101     char_u      *p;
5102     int         lnum = 0;
5103     affheader_T *cur_aff = NULL;
5104     int         did_postpone_prefix = FALSE;
5105     int         aff_todo = 0;
5106     hashtab_T   *tp;
5107     char_u      *low = NULL;
5108     char_u      *fol = NULL;
5109     char_u      *upp = NULL;
5110     int         do_rep;
5111     int         do_repsal;
5112     int         do_sal;
5113     int         do_mapline;
5114     int         found_map = FALSE;
5115     hashitem_T  *hi;
5116     int         l;
5117     int         compminlen = 0;         /* COMPOUNDMIN value */
5118     int         compsylmax = 0;         /* COMPOUNDSYLMAX value */
5119     int         compoptions = 0;        /* COMP_ flags */
5120     int         compmax = 0;            /* COMPOUNDWORDMAX value */
5121     char_u      *compflags = NULL;      /* COMPOUNDFLAG and COMPOUNDRULE
5122                                            concatenated */
5123     char_u      *midword = NULL;        /* MIDWORD value */
5124     char_u      *syllable = NULL;       /* SYLLABLE value */
5125     char_u      *sofofrom = NULL;       /* SOFOFROM value */
5126     char_u      *sofoto = NULL;         /* SOFOTO value */
5127
5128     /*
5129      * Open the file.
5130      */
5131     fd = mch_fopen((char *)fname, "r");
5132     if (fd == NULL)
5133     {
5134         EMSG2(_(e_notopen), fname);
5135         return NULL;
5136     }
5137
5138     vim_snprintf((char *)IObuff, IOSIZE, _("Reading affix file %s ..."), fname);
5139     spell_message(spin, IObuff);
5140
5141     /* Only do REP lines when not done in another .aff file already. */
5142     do_rep = spin->si_rep.ga_len == 0;
5143
5144     /* Only do REPSAL lines when not done in another .aff file already. */
5145     do_repsal = spin->si_repsal.ga_len == 0;
5146
5147     /* Only do SAL lines when not done in another .aff file already. */
5148     do_sal = spin->si_sal.ga_len == 0;
5149
5150     /* Only do MAP lines when not done in another .aff file already. */
5151     do_mapline = spin->si_map.ga_len == 0;
5152
5153     /*
5154      * Allocate and init the afffile_T structure.
5155      */
5156     aff = (afffile_T *)getroom(spin, sizeof(afffile_T), TRUE);
5157     if (aff == NULL)
5158     {
5159         fclose(fd);
5160         return NULL;
5161     }
5162     hash_init(&aff->af_pref);
5163     hash_init(&aff->af_suff);
5164     hash_init(&aff->af_comp);
5165
5166     /*
5167      * Read all the lines in the file one by one.
5168      */
5169     while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
5170     {
5171         line_breakcheck();
5172         ++lnum;
5173
5174         /* Skip comment lines. */
5175         if (*rline == '#')
5176             continue;
5177
5178         /* Convert from "SET" to 'encoding' when needed. */
5179         vim_free(pc);
5180 #ifdef FEAT_MBYTE
5181         if (spin->si_conv.vc_type != CONV_NONE)
5182         {
5183             pc = string_convert(&spin->si_conv, rline, NULL);
5184             if (pc == NULL)
5185             {
5186                 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
5187                                                            fname, lnum, rline);
5188                 continue;
5189             }
5190             line = pc;
5191         }
5192         else
5193 #endif
5194         {
5195             pc = NULL;
5196             line = rline;
5197         }
5198
5199         /* Split the line up in white separated items.  Put a NUL after each
5200          * item. */
5201         itemcnt = 0;
5202         for (p = line; ; )
5203         {
5204             while (*p != NUL && *p <= ' ')  /* skip white space and CR/NL */
5205                 ++p;
5206             if (*p == NUL)
5207                 break;
5208             if (itemcnt == MAXITEMCNT)      /* too many items */
5209                 break;
5210             items[itemcnt++] = p;
5211             /* A few items have arbitrary text argument, don't split them. */
5212             if (itemcnt == 2 && spell_info_item(items[0]))
5213                 while (*p >= ' ' || *p == TAB)    /* skip until CR/NL */
5214                     ++p;
5215             else
5216                 while (*p > ' ')    /* skip until white space or CR/NL */
5217                     ++p;
5218             if (*p == NUL)
5219                 break;
5220             *p++ = NUL;
5221         }
5222
5223         /* Handle non-empty lines. */
5224         if (itemcnt > 0)
5225         {
5226             if (STRCMP(items[0], "SET") == 0 && itemcnt == 2
5227                                                        && aff->af_enc == NULL)
5228             {
5229 #ifdef FEAT_MBYTE
5230                 /* Setup for conversion from "ENC" to 'encoding'. */
5231                 aff->af_enc = enc_canonize(items[1]);
5232                 if (aff->af_enc != NULL && !spin->si_ascii
5233                         && convert_setup(&spin->si_conv, aff->af_enc,
5234                                                                p_enc) == FAIL)
5235                     smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
5236                                                fname, aff->af_enc, p_enc);
5237                 spin->si_conv.vc_fail = TRUE;
5238 #else
5239                     smsg((char_u *)_("Conversion in %s not supported"), fname);
5240 #endif
5241             }
5242             else if (STRCMP(items[0], "FLAG") == 0 && itemcnt == 2
5243                                               && aff->af_flagtype == AFT_CHAR)
5244             {
5245                 if (STRCMP(items[1], "long") == 0)
5246                     aff->af_flagtype = AFT_LONG;
5247                 else if (STRCMP(items[1], "num") == 0)
5248                     aff->af_flagtype = AFT_NUM;
5249                 else if (STRCMP(items[1], "caplong") == 0)
5250                     aff->af_flagtype = AFT_CAPLONG;
5251                 else
5252                     smsg((char_u *)_("Invalid value for FLAG in %s line %d: %s"),
5253                             fname, lnum, items[1]);
5254                 if (aff->af_rare != 0
5255                         || aff->af_keepcase != 0
5256                         || aff->af_bad != 0
5257                         || aff->af_needaffix != 0
5258                         || aff->af_circumfix != 0
5259                         || aff->af_needcomp != 0
5260                         || aff->af_comproot != 0
5261                         || aff->af_nosuggest != 0
5262                         || compflags != NULL
5263                         || aff->af_suff.ht_used > 0
5264                         || aff->af_pref.ht_used > 0)
5265                     smsg((char_u *)_("FLAG after using flags in %s line %d: %s"),
5266                             fname, lnum, items[1]);
5267             }
5268             else if (spell_info_item(items[0]))
5269             {
5270                     p = (char_u *)getroom(spin,
5271                             (spin->si_info == NULL ? 0 : STRLEN(spin->si_info))
5272                             + STRLEN(items[0])
5273                             + STRLEN(items[1]) + 3, FALSE);
5274                     if (p != NULL)
5275                     {
5276                         if (spin->si_info != NULL)
5277                         {
5278                             STRCPY(p, spin->si_info);
5279                             STRCAT(p, "\n");
5280                         }
5281                         STRCAT(p, items[0]);
5282                         STRCAT(p, " ");
5283                         STRCAT(p, items[1]);
5284                         spin->si_info = p;
5285                     }
5286             }
5287             else if (STRCMP(items[0], "MIDWORD") == 0 && itemcnt == 2
5288                                                            && midword == NULL)
5289             {
5290                 midword = getroom_save(spin, items[1]);
5291             }
5292             else if (STRCMP(items[0], "TRY") == 0 && itemcnt == 2)
5293             {
5294                 /* ignored, we look in the tree for what chars may appear */
5295             }
5296             /* TODO: remove "RAR" later */
5297             else if ((STRCMP(items[0], "RAR") == 0
5298                         || STRCMP(items[0], "RARE") == 0) && itemcnt == 2
5299                                                        && aff->af_rare == 0)
5300             {
5301                 aff->af_rare = affitem2flag(aff->af_flagtype, items[1],
5302                                                                  fname, lnum);
5303             }
5304             /* TODO: remove "KEP" later */
5305             else if ((STRCMP(items[0], "KEP") == 0
5306                     || STRCMP(items[0], "KEEPCASE") == 0) && itemcnt == 2
5307                                                      && aff->af_keepcase == 0)
5308             {
5309                 aff->af_keepcase = affitem2flag(aff->af_flagtype, items[1],
5310                                                                  fname, lnum);
5311             }
5312             else if (STRCMP(items[0], "BAD") == 0 && itemcnt == 2
5313                                                        && aff->af_bad == 0)
5314             {
5315                 aff->af_bad = affitem2flag(aff->af_flagtype, items[1],
5316                                                                  fname, lnum);
5317             }
5318             else if (STRCMP(items[0], "NEEDAFFIX") == 0 && itemcnt == 2
5319                                                     && aff->af_needaffix == 0)
5320             {
5321                 aff->af_needaffix = affitem2flag(aff->af_flagtype, items[1],
5322                                                                  fname, lnum);
5323             }
5324             else if (STRCMP(items[0], "CIRCUMFIX") == 0 && itemcnt == 2
5325                                                     && aff->af_circumfix == 0)
5326             {
5327                 aff->af_circumfix = affitem2flag(aff->af_flagtype, items[1],
5328                                                                  fname, lnum);
5329             }
5330             else if (STRCMP(items[0], "NOSUGGEST") == 0 && itemcnt == 2
5331                                                     && aff->af_nosuggest == 0)
5332             {
5333                 aff->af_nosuggest = affitem2flag(aff->af_flagtype, items[1],
5334                                                                  fname, lnum);
5335             }
5336             else if (STRCMP(items[0], "NEEDCOMPOUND") == 0 && itemcnt == 2
5337                                                      && aff->af_needcomp == 0)
5338             {
5339                 aff->af_needcomp = affitem2flag(aff->af_flagtype, items[1],
5340                                                                  fname, lnum);
5341             }
5342             else if (STRCMP(items[0], "COMPOUNDROOT") == 0 && itemcnt == 2
5343                                                      && aff->af_comproot == 0)
5344             {
5345                 aff->af_comproot = affitem2flag(aff->af_flagtype, items[1],
5346                                                                  fname, lnum);
5347             }
5348             else if (STRCMP(items[0], "COMPOUNDFORBIDFLAG") == 0
5349                                    && itemcnt == 2 && aff->af_compforbid == 0)
5350             {
5351                 aff->af_compforbid = affitem2flag(aff->af_flagtype, items[1],
5352                                                                  fname, lnum);
5353                 if (aff->af_pref.ht_used > 0)
5354                     smsg((char_u *)_("Defining COMPOUNDFORBIDFLAG after PFX item may give wrong results in %s line %d"),
5355                             fname, lnum);
5356             }
5357             else if (STRCMP(items[0], "COMPOUNDPERMITFLAG") == 0
5358                                    && itemcnt == 2 && aff->af_comppermit == 0)
5359             {
5360                 aff->af_comppermit = affitem2flag(aff->af_flagtype, items[1],
5361                                                                  fname, lnum);
5362                 if (aff->af_pref.ht_used > 0)
5363                     smsg((char_u *)_("Defining COMPOUNDPERMITFLAG after PFX item may give wrong results in %s line %d"),
5364                             fname, lnum);
5365             }
5366             else if (STRCMP(items[0], "COMPOUNDFLAG") == 0 && itemcnt == 2
5367                                                          && compflags == NULL)
5368             {
5369                 /* Turn flag "c" into COMPOUNDRULE compatible string "c+",
5370                  * "Na" into "Na+", "1234" into "1234+". */
5371                 p = getroom(spin, STRLEN(items[1]) + 2, FALSE);
5372                 if (p != NULL)
5373                 {
5374                     STRCPY(p, items[1]);
5375                     STRCAT(p, "+");
5376                     compflags = p;
5377                 }
5378             }
5379             else if (STRCMP(items[0], "COMPOUNDRULE") == 0 && itemcnt == 2)
5380             {
5381                 /* Concatenate this string to previously defined ones, using a
5382                  * slash to separate them. */
5383                 l = (int)STRLEN(items[1]) + 1;
5384                 if (compflags != NULL)
5385                     l += (int)STRLEN(compflags) + 1;
5386                 p = getroom(spin, l, FALSE);
5387                 if (p != NULL)
5388                 {
5389                     if (compflags != NULL)
5390                     {
5391                         STRCPY(p, compflags);
5392                         STRCAT(p, "/");
5393                     }
5394                     STRCAT(p, items[1]);
5395                     compflags = p;
5396                 }
5397             }
5398             else if (STRCMP(items[0], "COMPOUNDWORDMAX") == 0 && itemcnt == 2
5399                                                               && compmax == 0)
5400             {
5401                 compmax = atoi((char *)items[1]);
5402                 if (compmax == 0)
5403                     smsg((char_u *)_("Wrong COMPOUNDWORDMAX value in %s line %d: %s"),
5404                                                        fname, lnum, items[1]);
5405             }
5406             else if (STRCMP(items[0], "COMPOUNDMIN") == 0 && itemcnt == 2
5407                                                            && compminlen == 0)
5408             {
5409                 compminlen = atoi((char *)items[1]);
5410                 if (compminlen == 0)
5411                     smsg((char_u *)_("Wrong COMPOUNDMIN value in %s line %d: %s"),
5412                                                        fname, lnum, items[1]);
5413             }
5414             else if (STRCMP(items[0], "COMPOUNDSYLMAX") == 0 && itemcnt == 2
5415                                                            && compsylmax == 0)
5416             {
5417                 compsylmax = atoi((char *)items[1]);
5418                 if (compsylmax == 0)
5419                     smsg((char_u *)_("Wrong COMPOUNDSYLMAX value in %s line %d: %s"),
5420                                                        fname, lnum, items[1]);
5421             }
5422             else if (STRCMP(items[0], "CHECKCOMPOUNDDUP") == 0 && itemcnt == 1)
5423             {
5424                 compoptions |= COMP_CHECKDUP;
5425             }
5426             else if (STRCMP(items[0], "CHECKCOMPOUNDREP") == 0 && itemcnt == 1)
5427             {
5428                 compoptions |= COMP_CHECKREP;
5429             }
5430             else if (STRCMP(items[0], "CHECKCOMPOUNDCASE") == 0 && itemcnt == 1)
5431             {
5432                 compoptions |= COMP_CHECKCASE;
5433             }
5434             else if (STRCMP(items[0], "CHECKCOMPOUNDTRIPLE") == 0
5435                                                               && itemcnt == 1)
5436             {
5437                 compoptions |= COMP_CHECKTRIPLE;
5438             }
5439             else if (STRCMP(items[0], "CHECKCOMPOUNDPATTERN") == 0
5440                                                               && itemcnt == 2)
5441             {
5442                 if (atoi((char *)items[1]) == 0)
5443                     smsg((char_u *)_("Wrong CHECKCOMPOUNDPATTERN value in %s line %d: %s"),
5444                                                        fname, lnum, items[1]);
5445             }
5446             else if (STRCMP(items[0], "CHECKCOMPOUNDPATTERN") == 0
5447                                                               && itemcnt == 3)
5448             {
5449                 garray_T    *gap = &spin->si_comppat;
5450                 int         i;
5451
5452                 /* Only add the couple if it isn't already there. */
5453                 for (i = 0; i < gap->ga_len - 1; i += 2)
5454                     if (STRCMP(((char_u **)(gap->ga_data))[i], items[1]) == 0
5455                             && STRCMP(((char_u **)(gap->ga_data))[i + 1],
5456                                                                items[2]) == 0)
5457                         break;
5458                 if (i >= gap->ga_len && ga_grow(gap, 2) == OK)
5459                 {
5460                     ((char_u **)(gap->ga_data))[gap->ga_len++]
5461                                                = getroom_save(spin, items[1]);
5462                     ((char_u **)(gap->ga_data))[gap->ga_len++]
5463                                                = getroom_save(spin, items[2]);
5464                 }
5465             }
5466             else if (STRCMP(items[0], "SYLLABLE") == 0 && itemcnt == 2
5467                                                           && syllable == NULL)
5468             {
5469                 syllable = getroom_save(spin, items[1]);
5470             }
5471             else if (STRCMP(items[0], "NOBREAK") == 0 && itemcnt == 1)
5472             {
5473                 spin->si_nobreak = TRUE;
5474             }
5475             else if (STRCMP(items[0], "NOSPLITSUGS") == 0 && itemcnt == 1)
5476             {
5477                 spin->si_nosplitsugs = TRUE;
5478             }
5479             else if (STRCMP(items[0], "NOSUGFILE") == 0 && itemcnt == 1)
5480             {
5481                 spin->si_nosugfile = TRUE;
5482             }
5483             else if (STRCMP(items[0], "PFXPOSTPONE") == 0 && itemcnt == 1)
5484             {
5485                 aff->af_pfxpostpone = TRUE;
5486             }
5487             else if ((STRCMP(items[0], "PFX") == 0
5488                                               || STRCMP(items[0], "SFX") == 0)
5489                     && aff_todo == 0
5490                     && itemcnt >= 4)
5491             {
5492                 int     lasti = 4;
5493                 char_u  key[AH_KEY_LEN];
5494
5495                 if (*items[0] == 'P')
5496                     tp = &aff->af_pref;
5497                 else
5498                     tp = &aff->af_suff;
5499
5500                 /* Myspell allows the same affix name to be used multiple
5501                  * times.  The affix files that do this have an undocumented
5502                  * "S" flag on all but the last block, thus we check for that
5503                  * and store it in ah_follows. */
5504                 vim_strncpy(key, items[1], AH_KEY_LEN - 1);
5505                 hi = hash_find(tp, key);
5506                 if (!HASHITEM_EMPTY(hi))
5507                 {
5508                     cur_aff = HI2AH(hi);
5509                     if (cur_aff->ah_combine != (*items[2] == 'Y'))
5510                         smsg((char_u *)_("Different combining flag in continued affix block in %s line %d: %s"),
5511                                                    fname, lnum, items[1]);
5512                     if (!cur_aff->ah_follows)
5513                         smsg((char_u *)_("Duplicate affix in %s line %d: %s"),
5514                                                        fname, lnum, items[1]);
5515                 }
5516                 else
5517                 {
5518                     /* New affix letter. */
5519                     cur_aff = (affheader_T *)getroom(spin,
5520                                                    sizeof(affheader_T), TRUE);
5521                     if (cur_aff == NULL)
5522                         break;
5523                     cur_aff->ah_flag = affitem2flag(aff->af_flagtype, items[1],
5524                                                                  fname, lnum);
5525                     if (cur_aff->ah_flag == 0 || STRLEN(items[1]) >= AH_KEY_LEN)
5526                         break;
5527                     if (cur_aff->ah_flag == aff->af_bad
5528                             || cur_aff->ah_flag == aff->af_rare
5529                             || cur_aff->ah_flag == aff->af_keepcase
5530                             || cur_aff->ah_flag == aff->af_needaffix
5531                             || cur_aff->ah_flag == aff->af_circumfix
5532                             || cur_aff->ah_flag == aff->af_nosuggest
5533                             || cur_aff->ah_flag == aff->af_needcomp
5534                             || cur_aff->ah_flag == aff->af_comproot)
5535                         smsg((char_u *)_("Affix also used for BAD/RARE/KEEPCASE/NEEDAFFIX/NEEDCOMPOUND/NOSUGGEST in %s line %d: %s"),
5536                                                        fname, lnum, items[1]);
5537                     STRCPY(cur_aff->ah_key, items[1]);
5538                     hash_add(tp, cur_aff->ah_key);
5539
5540                     cur_aff->ah_combine = (*items[2] == 'Y');
5541                 }
5542
5543                 /* Check for the "S" flag, which apparently means that another
5544                  * block with the same affix name is following. */
5545                 if (itemcnt > lasti && STRCMP(items[lasti], "S") == 0)
5546                 {
5547                     ++lasti;
5548                     cur_aff->ah_follows = TRUE;
5549                 }
5550                 else
5551                     cur_aff->ah_follows = FALSE;
5552
5553                 /* Myspell allows extra text after the item, but that might
5554                  * mean mistakes go unnoticed.  Require a comment-starter. */
5555                 if (itemcnt > lasti && *items[lasti] != '#')
5556                     smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
5557
5558                 if (STRCMP(items[2], "Y") != 0 && STRCMP(items[2], "N") != 0)
5559                     smsg((char_u *)_("Expected Y or N in %s line %d: %s"),
5560                                                        fname, lnum, items[2]);
5561
5562                 if (*items[0] == 'P' && aff->af_pfxpostpone)
5563                 {
5564                     if (cur_aff->ah_newID == 0)
5565                     {
5566                         /* Use a new number in the .spl file later, to be able
5567                          * to handle multiple .aff files. */
5568                         check_renumber(spin);
5569                         cur_aff->ah_newID = ++spin->si_newprefID;
5570
5571                         /* We only really use ah_newID if the prefix is
5572                          * postponed.  We know that only after handling all
5573                          * the items. */
5574                         did_postpone_prefix = FALSE;
5575                     }
5576                     else
5577                         /* Did use the ID in a previous block. */
5578                         did_postpone_prefix = TRUE;
5579                 }
5580
5581                 aff_todo = atoi((char *)items[3]);
5582             }
5583             else if ((STRCMP(items[0], "PFX") == 0
5584                                               || STRCMP(items[0], "SFX") == 0)
5585                     && aff_todo > 0
5586                     && STRCMP(cur_aff->ah_key, items[1]) == 0
5587                     && itemcnt >= 5)
5588             {
5589                 affentry_T      *aff_entry;
5590                 int             upper = FALSE;
5591                 int             lasti = 5;
5592
5593                 /* Myspell allows extra text after the item, but that might
5594                  * mean mistakes go unnoticed.  Require a comment-starter.
5595                  * Hunspell uses a "-" item. */
5596                 if (itemcnt > lasti && *items[lasti] != '#'
5597                         && (STRCMP(items[lasti], "-") != 0
5598                                                      || itemcnt != lasti + 1))
5599                     smsg((char_u *)_(e_afftrailing), fname, lnum, items[lasti]);
5600
5601                 /* New item for an affix letter. */
5602                 --aff_todo;
5603                 aff_entry = (affentry_T *)getroom(spin,
5604                                                     sizeof(affentry_T), TRUE);
5605                 if (aff_entry == NULL)
5606                     break;
5607
5608                 if (STRCMP(items[2], "0") != 0)
5609                     aff_entry->ae_chop = getroom_save(spin, items[2]);
5610                 if (STRCMP(items[3], "0") != 0)
5611                 {
5612                     aff_entry->ae_add = getroom_save(spin, items[3]);
5613
5614                     /* Recognize flags on the affix: abcd/XYZ */
5615                     aff_entry->ae_flags = vim_strchr(aff_entry->ae_add, '/');
5616                     if (aff_entry->ae_flags != NULL)
5617                     {
5618                         *aff_entry->ae_flags++ = NUL;
5619                         aff_process_flags(aff, aff_entry);
5620                     }
5621                 }
5622
5623                 /* Don't use an affix entry with non-ASCII characters when
5624                  * "spin->si_ascii" is TRUE. */
5625                 if (!spin->si_ascii || !(has_non_ascii(aff_entry->ae_chop)
5626                                           || has_non_ascii(aff_entry->ae_add)))
5627                 {
5628                     aff_entry->ae_next = cur_aff->ah_first;
5629                     cur_aff->ah_first = aff_entry;
5630
5631                     if (STRCMP(items[4], ".") != 0)
5632                     {
5633                         char_u  buf[MAXLINELEN];
5634
5635                         aff_entry->ae_cond = getroom_save(spin, items[4]);
5636                         if (*items[0] == 'P')
5637                             sprintf((char *)buf, "^%s", items[4]);
5638                         else
5639                             sprintf((char *)buf, "%s$", items[4]);
5640                         aff_entry->ae_prog = vim_regcomp(buf,
5641                                             RE_MAGIC + RE_STRING + RE_STRICT);
5642                         if (aff_entry->ae_prog == NULL)
5643                             smsg((char_u *)_("Broken condition in %s line %d: %s"),
5644                                                        fname, lnum, items[4]);
5645                     }
5646
5647                     /* For postponed prefixes we need an entry in si_prefcond
5648                      * for the condition.  Use an existing one if possible.
5649                      * Can't be done for an affix with flags, ignoring
5650                      * COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG. */
5651                     if (*items[0] == 'P' && aff->af_pfxpostpone
5652                                                && aff_entry->ae_flags == NULL)
5653                     {
5654                         /* When the chop string is one lower-case letter and
5655                          * the add string ends in the upper-case letter we set
5656                          * the "upper" flag, clear "ae_chop" and remove the
5657                          * letters from "ae_add".  The condition must either
5658                          * be empty or start with the same letter. */
5659                         if (aff_entry->ae_chop != NULL
5660                                 && aff_entry->ae_add != NULL
5661 #ifdef FEAT_MBYTE
5662                                 && aff_entry->ae_chop[(*mb_ptr2len)(
5663                                                    aff_entry->ae_chop)] == NUL
5664 #else
5665                                 && aff_entry->ae_chop[1] == NUL
5666 #endif
5667                                 )
5668                         {
5669                             int         c, c_up;
5670
5671                             c = PTR2CHAR(aff_entry->ae_chop);
5672                             c_up = SPELL_TOUPPER(c);
5673                             if (c_up != c
5674                                     && (aff_entry->ae_cond == NULL
5675                                         || PTR2CHAR(aff_entry->ae_cond) == c))
5676                             {
5677                                 p = aff_entry->ae_add
5678                                                   + STRLEN(aff_entry->ae_add);
5679                                 mb_ptr_back(aff_entry->ae_add, p);
5680                                 if (PTR2CHAR(p) == c_up)
5681                                 {
5682                                     upper = TRUE;
5683                                     aff_entry->ae_chop = NULL;
5684                                     *p = NUL;
5685
5686                                     /* The condition is matched with the
5687                                      * actual word, thus must check for the
5688                                      * upper-case letter. */
5689                                     if (aff_entry->ae_cond != NULL)
5690                                     {
5691                                         char_u  buf[MAXLINELEN];
5692 #ifdef FEAT_MBYTE
5693                                         if (has_mbyte)
5694                                         {
5695                                             onecap_copy(items[4], buf, TRUE);
5696                                             aff_entry->ae_cond = getroom_save(
5697                                                                    spin, buf);
5698                                         }
5699                                         else
5700 #endif
5701                                             *aff_entry->ae_cond = c_up;
5702                                         if (aff_entry->ae_cond != NULL)
5703                                         {
5704                                             sprintf((char *)buf, "^%s",
5705                                                           aff_entry->ae_cond);
5706                                             vim_free(aff_entry->ae_prog);
5707                                             aff_entry->ae_prog = vim_regcomp(
5708                                                     buf, RE_MAGIC + RE_STRING);
5709                                         }
5710                                     }
5711                                 }
5712                             }
5713                         }
5714
5715                         if (aff_entry->ae_chop == NULL
5716                                                && aff_entry->ae_flags == NULL)
5717                         {
5718                             int         idx;
5719                             char_u      **pp;
5720                             int         n;
5721
5722                             /* Find a previously used condition. */
5723                             for (idx = spin->si_prefcond.ga_len - 1; idx >= 0;
5724                                                                         --idx)
5725                             {
5726                                 p = ((char_u **)spin->si_prefcond.ga_data)[idx];
5727                                 if (str_equal(p, aff_entry->ae_cond))
5728                                     break;
5729                             }
5730                             if (idx < 0 && ga_grow(&spin->si_prefcond, 1) == OK)
5731                             {
5732                                 /* Not found, add a new condition. */
5733                                 idx = spin->si_prefcond.ga_len++;
5734                                 pp = ((char_u **)spin->si_prefcond.ga_data)
5735                                                                         + idx;
5736                                 if (aff_entry->ae_cond == NULL)
5737                                     *pp = NULL;
5738                                 else
5739                                     *pp = getroom_save(spin,
5740                                                           aff_entry->ae_cond);
5741                             }
5742
5743                             /* Add the prefix to the prefix tree. */
5744                             if (aff_entry->ae_add == NULL)
5745                                 p = (char_u *)"";
5746                             else
5747                                 p = aff_entry->ae_add;
5748
5749                             /* PFX_FLAGS is a negative number, so that
5750                              * tree_add_word() knows this is the prefix tree. */
5751                             n = PFX_FLAGS;
5752                             if (!cur_aff->ah_combine)
5753                                 n |= WFP_NC;
5754                             if (upper)
5755                                 n |= WFP_UP;
5756                             if (aff_entry->ae_comppermit)
5757                                 n |= WFP_COMPPERMIT;
5758                             if (aff_entry->ae_compforbid)
5759                                 n |= WFP_COMPFORBID;
5760                             tree_add_word(spin, p, spin->si_prefroot, n,
5761                                                       idx, cur_aff->ah_newID);
5762                             did_postpone_prefix = TRUE;
5763                         }
5764
5765                         /* Didn't actually use ah_newID, backup si_newprefID. */
5766                         if (aff_todo == 0 && !did_postpone_prefix)
5767                         {
5768                             --spin->si_newprefID;
5769                             cur_aff->ah_newID = 0;
5770                         }
5771                     }
5772                 }
5773             }
5774             else if (STRCMP(items[0], "FOL") == 0 && itemcnt == 2
5775                                                                && fol == NULL)
5776             {
5777                 fol = vim_strsave(items[1]);
5778             }
5779             else if (STRCMP(items[0], "LOW") == 0 && itemcnt == 2
5780                                                                && low == NULL)
5781             {
5782                 low = vim_strsave(items[1]);
5783             }
5784             else if (STRCMP(items[0], "UPP") == 0 && itemcnt == 2
5785                                                                && upp == NULL)
5786             {
5787                 upp = vim_strsave(items[1]);
5788             }
5789             else if ((STRCMP(items[0], "REP") == 0
5790                         || STRCMP(items[0], "REPSAL") == 0)
5791                     && itemcnt == 2)
5792             {
5793                 /* Ignore REP/REPSAL count */;
5794                 if (!isdigit(*items[1]))
5795                     smsg((char_u *)_("Expected REP(SAL) count in %s line %d"),
5796                                                                  fname, lnum);
5797             }
5798             else if ((STRCMP(items[0], "REP") == 0
5799                         || STRCMP(items[0], "REPSAL") == 0)
5800                     && itemcnt >= 3)
5801             {
5802                 /* REP/REPSAL item */
5803                 /* Myspell ignores extra arguments, we require it starts with
5804                  * # to detect mistakes. */
5805                 if (itemcnt > 3 && items[3][0] != '#')
5806                     smsg((char_u *)_(e_afftrailing), fname, lnum, items[3]);
5807                 if (items[0][3] == 'S' ? do_repsal : do_rep)
5808                 {
5809                     /* Replace underscore with space (can't include a space
5810                      * directly). */
5811                     for (p = items[1]; *p != NUL; mb_ptr_adv(p))
5812                         if (*p == '_')
5813                             *p = ' ';
5814                     for (p = items[2]; *p != NUL; mb_ptr_adv(p))
5815                         if (*p == '_')
5816                             *p = ' ';
5817                     add_fromto(spin, items[0][3] == 'S'
5818                                          ? &spin->si_repsal
5819                                          : &spin->si_rep, items[1], items[2]);
5820                 }
5821             }
5822             else if (STRCMP(items[0], "MAP") == 0 && itemcnt == 2)
5823             {
5824                 /* MAP item or count */
5825                 if (!found_map)
5826                 {
5827                     /* First line contains the count. */
5828                     found_map = TRUE;
5829                     if (!isdigit(*items[1]))
5830                         smsg((char_u *)_("Expected MAP count in %s line %d"),
5831                                                                  fname, lnum);
5832                 }
5833                 else if (do_mapline)
5834                 {
5835                     int         c;
5836
5837                     /* Check that every character appears only once. */
5838                     for (p = items[1]; *p != NUL; )
5839                     {
5840 #ifdef FEAT_MBYTE
5841                         c = mb_ptr2char_adv(&p);
5842 #else
5843                         c = *p++;
5844 #endif
5845                         if ((spin->si_map.ga_len > 0
5846                                     && vim_strchr(spin->si_map.ga_data, c)
5847                                                                       != NULL)
5848                                 || vim_strchr(p, c) != NULL)
5849                             smsg((char_u *)_("Duplicate character in MAP in %s line %d"),
5850                                                                  fname, lnum);
5851                     }
5852
5853                     /* We simply concatenate all the MAP strings, separated by
5854                      * slashes. */
5855                     ga_concat(&spin->si_map, items[1]);
5856                     ga_append(&spin->si_map, '/');
5857                 }
5858             }
5859             /* Accept "SAL from to" and "SAL from to # comment". */
5860             else if (STRCMP(items[0], "SAL") == 0
5861                     && (itemcnt == 3 || (itemcnt > 3 && items[3][0] == '#')))
5862             {
5863                 if (do_sal)
5864                 {
5865                     /* SAL item (sounds-a-like)
5866                      * Either one of the known keys or a from-to pair. */
5867                     if (STRCMP(items[1], "followup") == 0)
5868                         spin->si_followup = sal_to_bool(items[2]);
5869                     else if (STRCMP(items[1], "collapse_result") == 0)
5870                         spin->si_collapse = sal_to_bool(items[2]);
5871                     else if (STRCMP(items[1], "remove_accents") == 0)
5872                         spin->si_rem_accents = sal_to_bool(items[2]);
5873                     else
5874                         /* when "to" is "_" it means empty */
5875                         add_fromto(spin, &spin->si_sal, items[1],
5876                                      STRCMP(items[2], "_") == 0 ? (char_u *)""
5877                                                                 : items[2]);
5878                 }
5879             }
5880             else if (STRCMP(items[0], "SOFOFROM") == 0 && itemcnt == 2
5881                                                           && sofofrom == NULL)
5882             {
5883                 sofofrom = getroom_save(spin, items[1]);
5884             }
5885             else if (STRCMP(items[0], "SOFOTO") == 0 && itemcnt == 2
5886                                                             && sofoto == NULL)
5887             {
5888                 sofoto = getroom_save(spin, items[1]);
5889             }
5890             else if (STRCMP(items[0], "COMMON") == 0)
5891             {
5892                 int     i;
5893
5894                 for (i = 1; i < itemcnt; ++i)
5895                 {
5896                     if (HASHITEM_EMPTY(hash_find(&spin->si_commonwords,
5897                                                                    items[i])))
5898                     {
5899                         p = vim_strsave(items[i]);
5900                         if (p == NULL)
5901                             break;
5902                         hash_add(&spin->si_commonwords, p);
5903                     }
5904                 }
5905             }
5906             else
5907                 smsg((char_u *)_("Unrecognized or duplicate item in %s line %d: %s"),
5908                                                        fname, lnum, items[0]);
5909         }
5910     }
5911
5912     if (fol != NULL || low != NULL || upp != NULL)
5913     {
5914         if (spin->si_clear_chartab)
5915         {
5916             /* Clear the char type tables, don't want to use any of the
5917              * currently used spell properties. */
5918             init_spell_chartab();
5919             spin->si_clear_chartab = FALSE;
5920         }
5921
5922         /*
5923          * Don't write a word table for an ASCII file, so that we don't check
5924          * for conflicts with a word table that matches 'encoding'.
5925          * Don't write one for utf-8 either, we use utf_*() and
5926          * mb_get_class(), the list of chars in the file will be incomplete.
5927          */
5928         if (!spin->si_ascii
5929 #ifdef FEAT_MBYTE
5930                 && !enc_utf8
5931 #endif
5932                 )
5933         {
5934             if (fol == NULL || low == NULL || upp == NULL)
5935                 smsg((char_u *)_("Missing FOL/LOW/UPP line in %s"), fname);
5936             else
5937                 (void)set_spell_chartab(fol, low, upp);
5938         }
5939
5940         vim_free(fol);
5941         vim_free(low);
5942         vim_free(upp);
5943     }
5944
5945     /* Use compound specifications of the .aff file for the spell info. */
5946     if (compmax != 0)
5947     {
5948         aff_check_number(spin->si_compmax, compmax, "COMPOUNDWORDMAX");
5949         spin->si_compmax = compmax;
5950     }
5951
5952     if (compminlen != 0)
5953     {
5954         aff_check_number(spin->si_compminlen, compminlen, "COMPOUNDMIN");
5955         spin->si_compminlen = compminlen;
5956     }
5957
5958     if (compsylmax != 0)
5959     {
5960         if (syllable == NULL)
5961             smsg((char_u *)_("COMPOUNDSYLMAX used without SYLLABLE"));
5962         aff_check_number(spin->si_compsylmax, compsylmax, "COMPOUNDSYLMAX");
5963         spin->si_compsylmax = compsylmax;
5964     }
5965
5966     if (compoptions != 0)
5967     {
5968         aff_check_number(spin->si_compoptions, compoptions, "COMPOUND options");
5969         spin->si_compoptions |= compoptions;
5970     }
5971
5972     if (compflags != NULL)
5973         process_compflags(spin, aff, compflags);
5974
5975     /* Check that we didn't use too many renumbered flags. */
5976     if (spin->si_newcompID < spin->si_newprefID)
5977     {
5978         if (spin->si_newcompID == 127 || spin->si_newcompID == 255)
5979             MSG(_("Too many postponed prefixes"));
5980         else if (spin->si_newprefID == 0 || spin->si_newprefID == 127)
5981             MSG(_("Too many compound flags"));
5982         else
5983             MSG(_("Too many postponed prefixes and/or compound flags"));
5984     }
5985
5986     if (syllable != NULL)
5987     {
5988         aff_check_string(spin->si_syllable, syllable, "SYLLABLE");
5989         spin->si_syllable = syllable;
5990     }
5991
5992     if (sofofrom != NULL || sofoto != NULL)
5993     {
5994         if (sofofrom == NULL || sofoto == NULL)
5995             smsg((char_u *)_("Missing SOFO%s line in %s"),
5996                                      sofofrom == NULL ? "FROM" : "TO", fname);
5997         else if (spin->si_sal.ga_len > 0)
5998             smsg((char_u *)_("Both SAL and SOFO lines in %s"), fname);
5999         else
6000         {
6001             aff_check_string(spin->si_sofofr, sofofrom, "SOFOFROM");
6002             aff_check_string(spin->si_sofoto, sofoto, "SOFOTO");
6003             spin->si_sofofr = sofofrom;
6004             spin->si_sofoto = sofoto;
6005         }
6006     }
6007
6008     if (midword != NULL)
6009     {
6010         aff_check_string(spin->si_midword, midword, "MIDWORD");
6011         spin->si_midword = midword;
6012     }
6013
6014     vim_free(pc);
6015     fclose(fd);
6016     return aff;
6017 }
6018
6019 /*
6020  * For affix "entry" move COMPOUNDFORBIDFLAG and COMPOUNDPERMITFLAG from
6021  * ae_flags to ae_comppermit and ae_compforbid.
6022  */
6023     static void
6024 aff_process_flags(affile, entry)
6025     afffile_T   *affile;
6026     affentry_T  *entry;
6027 {
6028     char_u      *p;
6029     char_u      *prevp;
6030     unsigned    flag;
6031
6032     if (entry->ae_flags != NULL
6033                 && (affile->af_compforbid != 0 || affile->af_comppermit != 0))
6034     {
6035         for (p = entry->ae_flags; *p != NUL; )
6036         {
6037             prevp = p;
6038             flag = get_affitem(affile->af_flagtype, &p);
6039             if (flag == affile->af_comppermit || flag == affile->af_compforbid)
6040             {
6041                 STRMOVE(prevp, p);
6042                 p = prevp;
6043                 if (flag == affile->af_comppermit)
6044                     entry->ae_comppermit = TRUE;
6045                 else
6046                     entry->ae_compforbid = TRUE;
6047             }
6048             if (affile->af_flagtype == AFT_NUM && *p == ',')
6049                 ++p;
6050         }
6051         if (*entry->ae_flags == NUL)
6052             entry->ae_flags = NULL;     /* nothing left */
6053     }
6054 }
6055
6056 /*
6057  * Return TRUE if "s" is the name of an info item in the affix file.
6058  */
6059     static int
6060 spell_info_item(s)
6061     char_u      *s;
6062 {
6063     return STRCMP(s, "NAME") == 0
6064         || STRCMP(s, "HOME") == 0
6065         || STRCMP(s, "VERSION") == 0
6066         || STRCMP(s, "AUTHOR") == 0
6067         || STRCMP(s, "EMAIL") == 0
6068         || STRCMP(s, "COPYRIGHT") == 0;
6069 }
6070
6071 /*
6072  * Turn an affix flag name into a number, according to the FLAG type.
6073  * returns zero for failure.
6074  */
6075     static unsigned
6076 affitem2flag(flagtype, item, fname, lnum)
6077     int         flagtype;
6078     char_u      *item;
6079     char_u      *fname;
6080     int         lnum;
6081 {
6082     unsigned    res;
6083     char_u      *p = item;
6084
6085     res = get_affitem(flagtype, &p);
6086     if (res == 0)
6087     {
6088         if (flagtype == AFT_NUM)
6089             smsg((char_u *)_("Flag is not a number in %s line %d: %s"),
6090                                                            fname, lnum, item);
6091         else
6092             smsg((char_u *)_("Illegal flag in %s line %d: %s"),
6093                                                            fname, lnum, item);
6094     }
6095     if (*p != NUL)
6096     {
6097         smsg((char_u *)_(e_affname), fname, lnum, item);
6098         return 0;
6099     }
6100
6101     return res;
6102 }
6103
6104 /*
6105  * Get one affix name from "*pp" and advance the pointer.
6106  * Returns zero for an error, still advances the pointer then.
6107  */
6108     static unsigned
6109 get_affitem(flagtype, pp)
6110     int         flagtype;
6111     char_u      **pp;
6112 {
6113     int         res;
6114
6115     if (flagtype == AFT_NUM)
6116     {
6117         if (!VIM_ISDIGIT(**pp))
6118         {
6119             ++*pp;      /* always advance, avoid getting stuck */
6120             return 0;
6121         }
6122         res = getdigits(pp);
6123     }
6124     else
6125     {
6126 #ifdef FEAT_MBYTE
6127         res = mb_ptr2char_adv(pp);
6128 #else
6129         res = *(*pp)++;
6130 #endif
6131         if (flagtype == AFT_LONG || (flagtype == AFT_CAPLONG
6132                                                  && res >= 'A' && res <= 'Z'))
6133         {
6134             if (**pp == NUL)
6135                 return 0;
6136 #ifdef FEAT_MBYTE
6137             res = mb_ptr2char_adv(pp) + (res << 16);
6138 #else
6139             res = *(*pp)++ + (res << 16);
6140 #endif
6141         }
6142     }
6143     return res;
6144 }
6145
6146 /*
6147  * Process the "compflags" string used in an affix file and append it to
6148  * spin->si_compflags.
6149  * The processing involves changing the affix names to ID numbers, so that
6150  * they fit in one byte.
6151  */
6152     static void
6153 process_compflags(spin, aff, compflags)
6154     spellinfo_T *spin;
6155     afffile_T   *aff;
6156     char_u      *compflags;
6157 {
6158     char_u      *p;
6159     char_u      *prevp;
6160     unsigned    flag;
6161     compitem_T  *ci;
6162     int         id;
6163     int         len;
6164     char_u      *tp;
6165     char_u      key[AH_KEY_LEN];
6166     hashitem_T  *hi;
6167
6168     /* Make room for the old and the new compflags, concatenated with a / in
6169      * between.  Processing it makes it shorter, but we don't know by how
6170      * much, thus allocate the maximum. */
6171     len = (int)STRLEN(compflags) + 1;
6172     if (spin->si_compflags != NULL)
6173         len += (int)STRLEN(spin->si_compflags) + 1;
6174     p = getroom(spin, len, FALSE);
6175     if (p == NULL)
6176         return;
6177     if (spin->si_compflags != NULL)
6178     {
6179         STRCPY(p, spin->si_compflags);
6180         STRCAT(p, "/");
6181     }
6182     spin->si_compflags = p;
6183     tp = p + STRLEN(p);
6184
6185     for (p = compflags; *p != NUL; )
6186     {
6187         if (vim_strchr((char_u *)"/*+[]", *p) != NULL)
6188             /* Copy non-flag characters directly. */
6189             *tp++ = *p++;
6190         else
6191         {
6192             /* First get the flag number, also checks validity. */
6193             prevp = p;
6194             flag = get_affitem(aff->af_flagtype, &p);
6195             if (flag != 0)
6196             {
6197                 /* Find the flag in the hashtable.  If it was used before, use
6198                  * the existing ID.  Otherwise add a new entry. */
6199                 vim_strncpy(key, prevp, p - prevp);
6200                 hi = hash_find(&aff->af_comp, key);
6201                 if (!HASHITEM_EMPTY(hi))
6202                     id = HI2CI(hi)->ci_newID;
6203                 else
6204                 {
6205                     ci = (compitem_T *)getroom(spin, sizeof(compitem_T), TRUE);
6206                     if (ci == NULL)
6207                         break;
6208                     STRCPY(ci->ci_key, key);
6209                     ci->ci_flag = flag;
6210                     /* Avoid using a flag ID that has a special meaning in a
6211                      * regexp (also inside []). */
6212                     do
6213                     {
6214                         check_renumber(spin);
6215                         id = spin->si_newcompID--;
6216                     } while (vim_strchr((char_u *)"/+*[]\\-^", id) != NULL);
6217                     ci->ci_newID = id;
6218                     hash_add(&aff->af_comp, ci->ci_key);
6219                 }
6220                 *tp++ = id;
6221             }
6222             if (aff->af_flagtype == AFT_NUM && *p == ',')
6223                 ++p;
6224         }
6225     }
6226
6227     *tp = NUL;
6228 }
6229
6230 /*
6231  * Check that the new IDs for postponed affixes and compounding don't overrun
6232  * each other.  We have almost 255 available, but start at 0-127 to avoid
6233  * using two bytes for utf-8.  When the 0-127 range is used up go to 128-255.
6234  * When that is used up an error message is given.
6235  */
6236     static void
6237 check_renumber(spin)
6238     spellinfo_T *spin;
6239 {
6240     if (spin->si_newprefID == spin->si_newcompID && spin->si_newcompID < 128)
6241     {
6242         spin->si_newprefID = 127;
6243         spin->si_newcompID = 255;
6244     }
6245 }
6246
6247 /*
6248  * Return TRUE if flag "flag" appears in affix list "afflist".
6249  */
6250     static int
6251 flag_in_afflist(flagtype, afflist, flag)
6252     int         flagtype;
6253     char_u      *afflist;
6254     unsigned    flag;
6255 {
6256     char_u      *p;
6257     unsigned    n;
6258
6259     switch (flagtype)
6260     {
6261         case AFT_CHAR:
6262             return vim_strchr(afflist, flag) != NULL;
6263
6264         case AFT_CAPLONG:
6265         case AFT_LONG:
6266             for (p = afflist; *p != NUL; )
6267             {
6268 #ifdef FEAT_MBYTE
6269                 n = mb_ptr2char_adv(&p);
6270 #else
6271                 n = *p++;
6272 #endif
6273                 if ((flagtype == AFT_LONG || (n >= 'A' && n <= 'Z'))
6274                                                                  && *p != NUL)
6275 #ifdef FEAT_MBYTE
6276                     n = mb_ptr2char_adv(&p) + (n << 16);
6277 #else
6278                     n = *p++ + (n << 16);
6279 #endif
6280                 if (n == flag)
6281                     return TRUE;
6282             }
6283             break;
6284
6285         case AFT_NUM:
6286             for (p = afflist; *p != NUL; )
6287             {
6288                 n = getdigits(&p);
6289                 if (n == flag)
6290                     return TRUE;
6291                 if (*p != NUL)  /* skip over comma */
6292                     ++p;
6293             }
6294             break;
6295     }
6296     return FALSE;
6297 }
6298
6299 /*
6300  * Give a warning when "spinval" and "affval" numbers are set and not the same.
6301  */
6302     static void
6303 aff_check_number(spinval, affval, name)
6304     int     spinval;
6305     int     affval;
6306     char    *name;
6307 {
6308     if (spinval != 0 && spinval != affval)
6309         smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
6310 }
6311
6312 /*
6313  * Give a warning when "spinval" and "affval" strings are set and not the same.
6314  */
6315     static void
6316 aff_check_string(spinval, affval, name)
6317     char_u      *spinval;
6318     char_u      *affval;
6319     char        *name;
6320 {
6321     if (spinval != NULL && STRCMP(spinval, affval) != 0)
6322         smsg((char_u *)_("%s value differs from what is used in another .aff file"), name);
6323 }
6324
6325 /*
6326  * Return TRUE if strings "s1" and "s2" are equal.  Also consider both being
6327  * NULL as equal.
6328  */
6329     static int
6330 str_equal(s1, s2)
6331     char_u      *s1;
6332     char_u      *s2;
6333 {
6334     if (s1 == NULL || s2 == NULL)
6335         return s1 == s2;
6336     return STRCMP(s1, s2) == 0;
6337 }
6338
6339 /*
6340  * Add a from-to item to "gap".  Used for REP and SAL items.
6341  * They are stored case-folded.
6342  */
6343     static void
6344 add_fromto(spin, gap, from, to)
6345     spellinfo_T *spin;
6346     garray_T    *gap;
6347     char_u      *from;
6348     char_u      *to;
6349 {
6350     fromto_T    *ftp;
6351     char_u      word[MAXWLEN];
6352
6353     if (ga_grow(gap, 1) == OK)
6354     {
6355         ftp = ((fromto_T *)gap->ga_data) + gap->ga_len;
6356         (void)spell_casefold(from, (int)STRLEN(from), word, MAXWLEN);
6357         ftp->ft_from = getroom_save(spin, word);
6358         (void)spell_casefold(to, (int)STRLEN(to), word, MAXWLEN);
6359         ftp->ft_to = getroom_save(spin, word);
6360         ++gap->ga_len;
6361     }
6362 }
6363
6364 /*
6365  * Convert a boolean argument in a SAL line to TRUE or FALSE;
6366  */
6367     static int
6368 sal_to_bool(s)
6369     char_u      *s;
6370 {
6371     return STRCMP(s, "1") == 0 || STRCMP(s, "true") == 0;
6372 }
6373
6374 /*
6375  * Return TRUE if string "s" contains a non-ASCII character (128 or higher).
6376  * When "s" is NULL FALSE is returned.
6377  */
6378     static int
6379 has_non_ascii(s)
6380     char_u      *s;
6381 {
6382     char_u      *p;
6383
6384     if (s != NULL)
6385         for (p = s; *p != NUL; ++p)
6386             if (*p >= 128)
6387                 return TRUE;
6388     return FALSE;
6389 }
6390
6391 /*
6392  * Free the structure filled by spell_read_aff().
6393  */
6394     static void
6395 spell_free_aff(aff)
6396     afffile_T   *aff;
6397 {
6398     hashtab_T   *ht;
6399     hashitem_T  *hi;
6400     int         todo;
6401     affheader_T *ah;
6402     affentry_T  *ae;
6403
6404     vim_free(aff->af_enc);
6405
6406     /* All this trouble to free the "ae_prog" items... */
6407     for (ht = &aff->af_pref; ; ht = &aff->af_suff)
6408     {
6409         todo = (int)ht->ht_used;
6410         for (hi = ht->ht_array; todo > 0; ++hi)
6411         {
6412             if (!HASHITEM_EMPTY(hi))
6413             {
6414                 --todo;
6415                 ah = HI2AH(hi);
6416                 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
6417                     vim_free(ae->ae_prog);
6418             }
6419         }
6420         if (ht == &aff->af_suff)
6421             break;
6422     }
6423
6424     hash_clear(&aff->af_pref);
6425     hash_clear(&aff->af_suff);
6426     hash_clear(&aff->af_comp);
6427 }
6428
6429 /*
6430  * Read dictionary file "fname".
6431  * Returns OK or FAIL;
6432  */
6433     static int
6434 spell_read_dic(spin, fname, affile)
6435     spellinfo_T *spin;
6436     char_u      *fname;
6437     afffile_T   *affile;
6438 {
6439     hashtab_T   ht;
6440     char_u      line[MAXLINELEN];
6441     char_u      *p;
6442     char_u      *afflist;
6443     char_u      store_afflist[MAXWLEN];
6444     int         pfxlen;
6445     int         need_affix;
6446     char_u      *dw;
6447     char_u      *pc;
6448     char_u      *w;
6449     int         l;
6450     hash_T      hash;
6451     hashitem_T  *hi;
6452     FILE        *fd;
6453     int         lnum = 1;
6454     int         non_ascii = 0;
6455     int         retval = OK;
6456     char_u      message[MAXLINELEN + MAXWLEN];
6457     int         flags;
6458     int         duplicate = 0;
6459
6460     /*
6461      * Open the file.
6462      */
6463     fd = mch_fopen((char *)fname, "r");
6464     if (fd == NULL)
6465     {
6466         EMSG2(_(e_notopen), fname);
6467         return FAIL;
6468     }
6469
6470     /* The hashtable is only used to detect duplicated words. */
6471     hash_init(&ht);
6472
6473     vim_snprintf((char *)IObuff, IOSIZE,
6474                                   _("Reading dictionary file %s ..."), fname);
6475     spell_message(spin, IObuff);
6476
6477     /* start with a message for the first line */
6478     spin->si_msg_count = 999999;
6479
6480     /* Read and ignore the first line: word count. */
6481     (void)vim_fgets(line, MAXLINELEN, fd);
6482     if (!vim_isdigit(*skipwhite(line)))
6483         EMSG2(_("E760: No word count in %s"), fname);
6484
6485     /*
6486      * Read all the lines in the file one by one.
6487      * The words are converted to 'encoding' here, before being added to
6488      * the hashtable.
6489      */
6490     while (!vim_fgets(line, MAXLINELEN, fd) && !got_int)
6491     {
6492         line_breakcheck();
6493         ++lnum;
6494         if (line[0] == '#' || line[0] == '/')
6495             continue;   /* comment line */
6496
6497         /* Remove CR, LF and white space from the end.  White space halfway
6498          * the word is kept to allow e.g., "et al.". */
6499         l = (int)STRLEN(line);
6500         while (l > 0 && line[l - 1] <= ' ')
6501             --l;
6502         if (l == 0)
6503             continue;   /* empty line */
6504         line[l] = NUL;
6505
6506 #ifdef FEAT_MBYTE
6507         /* Convert from "SET" to 'encoding' when needed. */
6508         if (spin->si_conv.vc_type != CONV_NONE)
6509         {
6510             pc = string_convert(&spin->si_conv, line, NULL);
6511             if (pc == NULL)
6512             {
6513                 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
6514                                                        fname, lnum, line);
6515                 continue;
6516             }
6517             w = pc;
6518         }
6519         else
6520 #endif
6521         {
6522             pc = NULL;
6523             w = line;
6524         }
6525
6526         /* Truncate the word at the "/", set "afflist" to what follows.
6527          * Replace "\/" by "/" and "\\" by "\". */
6528         afflist = NULL;
6529         for (p = w; *p != NUL; mb_ptr_adv(p))
6530         {
6531             if (*p == '\\' && (p[1] == '\\' || p[1] == '/'))
6532                 STRMOVE(p, p + 1);
6533             else if (*p == '/')
6534             {
6535                 *p = NUL;
6536                 afflist = p + 1;
6537                 break;
6538             }
6539         }
6540
6541         /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
6542         if (spin->si_ascii && has_non_ascii(w))
6543         {
6544             ++non_ascii;
6545             vim_free(pc);
6546             continue;
6547         }
6548
6549         /* This takes time, print a message every 10000 words. */
6550         if (spin->si_verbose && spin->si_msg_count > 10000)
6551         {
6552             spin->si_msg_count = 0;
6553             vim_snprintf((char *)message, sizeof(message),
6554                     _("line %6d, word %6d - %s"),
6555                        lnum, spin->si_foldwcount + spin->si_keepwcount, w);
6556             msg_start();
6557             msg_puts_long_attr(message, 0);
6558             msg_clr_eos();
6559             msg_didout = FALSE;
6560             msg_col = 0;
6561             out_flush();
6562         }
6563
6564         /* Store the word in the hashtable to be able to find duplicates. */
6565         dw = (char_u *)getroom_save(spin, w);
6566         if (dw == NULL)
6567         {
6568             retval = FAIL;
6569             vim_free(pc);
6570             break;
6571         }
6572
6573         hash = hash_hash(dw);
6574         hi = hash_lookup(&ht, dw, hash);
6575         if (!HASHITEM_EMPTY(hi))
6576         {
6577             if (p_verbose > 0)
6578                 smsg((char_u *)_("Duplicate word in %s line %d: %s"),
6579                                                              fname, lnum, dw);
6580             else if (duplicate == 0)
6581                 smsg((char_u *)_("First duplicate word in %s line %d: %s"),
6582                                                              fname, lnum, dw);
6583             ++duplicate;
6584         }
6585         else
6586             hash_add_item(&ht, hi, dw, hash);
6587
6588         flags = 0;
6589         store_afflist[0] = NUL;
6590         pfxlen = 0;
6591         need_affix = FALSE;
6592         if (afflist != NULL)
6593         {
6594             /* Extract flags from the affix list. */
6595             flags |= get_affix_flags(affile, afflist);
6596
6597             if (affile->af_needaffix != 0 && flag_in_afflist(
6598                           affile->af_flagtype, afflist, affile->af_needaffix))
6599                 need_affix = TRUE;
6600
6601             if (affile->af_pfxpostpone)
6602                 /* Need to store the list of prefix IDs with the word. */
6603                 pfxlen = get_pfxlist(affile, afflist, store_afflist);
6604
6605             if (spin->si_compflags != NULL)
6606                 /* Need to store the list of compound flags with the word.
6607                  * Concatenate them to the list of prefix IDs. */
6608                 get_compflags(affile, afflist, store_afflist + pfxlen);
6609         }
6610
6611         /* Add the word to the word tree(s). */
6612         if (store_word(spin, dw, flags, spin->si_region,
6613                                            store_afflist, need_affix) == FAIL)
6614             retval = FAIL;
6615
6616         if (afflist != NULL)
6617         {
6618             /* Find all matching suffixes and add the resulting words.
6619              * Additionally do matching prefixes that combine. */
6620             if (store_aff_word(spin, dw, afflist, affile,
6621                            &affile->af_suff, &affile->af_pref,
6622                             CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
6623                 retval = FAIL;
6624
6625             /* Find all matching prefixes and add the resulting words. */
6626             if (store_aff_word(spin, dw, afflist, affile,
6627                           &affile->af_pref, NULL,
6628                             CONDIT_SUF, flags, store_afflist, pfxlen) == FAIL)
6629                 retval = FAIL;
6630         }
6631
6632         vim_free(pc);
6633     }
6634
6635     if (duplicate > 0)
6636         smsg((char_u *)_("%d duplicate word(s) in %s"), duplicate, fname);
6637     if (spin->si_ascii && non_ascii > 0)
6638         smsg((char_u *)_("Ignored %d word(s) with non-ASCII characters in %s"),
6639                                                             non_ascii, fname);
6640     hash_clear(&ht);
6641
6642     fclose(fd);
6643     return retval;
6644 }
6645
6646 /*
6647  * Check for affix flags in "afflist" that are turned into word flags.
6648  * Return WF_ flags.
6649  */
6650     static int
6651 get_affix_flags(affile, afflist)
6652     afffile_T   *affile;
6653     char_u      *afflist;
6654 {
6655     int         flags = 0;
6656
6657     if (affile->af_keepcase != 0 && flag_in_afflist(
6658                            affile->af_flagtype, afflist, affile->af_keepcase))
6659         flags |= WF_KEEPCAP | WF_FIXCAP;
6660     if (affile->af_rare != 0 && flag_in_afflist(
6661                                affile->af_flagtype, afflist, affile->af_rare))
6662         flags |= WF_RARE;
6663     if (affile->af_bad != 0 && flag_in_afflist(
6664                                 affile->af_flagtype, afflist, affile->af_bad))
6665         flags |= WF_BANNED;
6666     if (affile->af_needcomp != 0 && flag_in_afflist(
6667                            affile->af_flagtype, afflist, affile->af_needcomp))
6668         flags |= WF_NEEDCOMP;
6669     if (affile->af_comproot != 0 && flag_in_afflist(
6670                            affile->af_flagtype, afflist, affile->af_comproot))
6671         flags |= WF_COMPROOT;
6672     if (affile->af_nosuggest != 0 && flag_in_afflist(
6673                           affile->af_flagtype, afflist, affile->af_nosuggest))
6674         flags |= WF_NOSUGGEST;
6675     return flags;
6676 }
6677
6678 /*
6679  * Get the list of prefix IDs from the affix list "afflist".
6680  * Used for PFXPOSTPONE.
6681  * Put the resulting flags in "store_afflist[MAXWLEN]" with a terminating NUL
6682  * and return the number of affixes.
6683  */
6684     static int
6685 get_pfxlist(affile, afflist, store_afflist)
6686     afffile_T   *affile;
6687     char_u      *afflist;
6688     char_u      *store_afflist;
6689 {
6690     char_u      *p;
6691     char_u      *prevp;
6692     int         cnt = 0;
6693     int         id;
6694     char_u      key[AH_KEY_LEN];
6695     hashitem_T  *hi;
6696
6697     for (p = afflist; *p != NUL; )
6698     {
6699         prevp = p;
6700         if (get_affitem(affile->af_flagtype, &p) != 0)
6701         {
6702             /* A flag is a postponed prefix flag if it appears in "af_pref"
6703              * and it's ID is not zero. */
6704             vim_strncpy(key, prevp, p - prevp);
6705             hi = hash_find(&affile->af_pref, key);
6706             if (!HASHITEM_EMPTY(hi))
6707             {
6708                 id = HI2AH(hi)->ah_newID;
6709                 if (id != 0)
6710                     store_afflist[cnt++] = id;
6711             }
6712         }
6713         if (affile->af_flagtype == AFT_NUM && *p == ',')
6714             ++p;
6715     }
6716
6717     store_afflist[cnt] = NUL;
6718     return cnt;
6719 }
6720
6721 /*
6722  * Get the list of compound IDs from the affix list "afflist" that are used
6723  * for compound words.
6724  * Puts the flags in "store_afflist[]".
6725  */
6726     static void
6727 get_compflags(affile, afflist, store_afflist)
6728     afffile_T   *affile;
6729     char_u      *afflist;
6730     char_u      *store_afflist;
6731 {
6732     char_u      *p;
6733     char_u      *prevp;
6734     int         cnt = 0;
6735     char_u      key[AH_KEY_LEN];
6736     hashitem_T  *hi;
6737
6738     for (p = afflist; *p != NUL; )
6739     {
6740         prevp = p;
6741         if (get_affitem(affile->af_flagtype, &p) != 0)
6742         {
6743             /* A flag is a compound flag if it appears in "af_comp". */
6744             vim_strncpy(key, prevp, p - prevp);
6745             hi = hash_find(&affile->af_comp, key);
6746             if (!HASHITEM_EMPTY(hi))
6747                 store_afflist[cnt++] = HI2CI(hi)->ci_newID;
6748         }
6749         if (affile->af_flagtype == AFT_NUM && *p == ',')
6750             ++p;
6751     }
6752
6753     store_afflist[cnt] = NUL;
6754 }
6755
6756 /*
6757  * Apply affixes to a word and store the resulting words.
6758  * "ht" is the hashtable with affentry_T that need to be applied, either
6759  * prefixes or suffixes.
6760  * "xht", when not NULL, is the prefix hashtable, to be used additionally on
6761  * the resulting words for combining affixes.
6762  *
6763  * Returns FAIL when out of memory.
6764  */
6765     static int
6766 store_aff_word(spin, word, afflist, affile, ht, xht, condit, flags,
6767                                                               pfxlist, pfxlen)
6768     spellinfo_T *spin;          /* spell info */
6769     char_u      *word;          /* basic word start */
6770     char_u      *afflist;       /* list of names of supported affixes */
6771     afffile_T   *affile;
6772     hashtab_T   *ht;
6773     hashtab_T   *xht;
6774     int         condit;         /* CONDIT_SUF et al. */
6775     int         flags;          /* flags for the word */
6776     char_u      *pfxlist;       /* list of prefix IDs */
6777     int         pfxlen;         /* nr of flags in "pfxlist" for prefixes, rest
6778                                  * is compound flags */
6779 {
6780     int         todo;
6781     hashitem_T  *hi;
6782     affheader_T *ah;
6783     affentry_T  *ae;
6784     regmatch_T  regmatch;
6785     char_u      newword[MAXWLEN];
6786     int         retval = OK;
6787     int         i, j;
6788     char_u      *p;
6789     int         use_flags;
6790     char_u      *use_pfxlist;
6791     int         use_pfxlen;
6792     int         need_affix;
6793     char_u      store_afflist[MAXWLEN];
6794     char_u      pfx_pfxlist[MAXWLEN];
6795     size_t      wordlen = STRLEN(word);
6796     int         use_condit;
6797
6798     todo = (int)ht->ht_used;
6799     for (hi = ht->ht_array; todo > 0 && retval == OK; ++hi)
6800     {
6801         if (!HASHITEM_EMPTY(hi))
6802         {
6803             --todo;
6804             ah = HI2AH(hi);
6805
6806             /* Check that the affix combines, if required, and that the word
6807              * supports this affix. */
6808             if (((condit & CONDIT_COMB) == 0 || ah->ah_combine)
6809                     && flag_in_afflist(affile->af_flagtype, afflist,
6810                                                                  ah->ah_flag))
6811             {
6812                 /* Loop over all affix entries with this name. */
6813                 for (ae = ah->ah_first; ae != NULL; ae = ae->ae_next)
6814                 {
6815                     /* Check the condition.  It's not logical to match case
6816                      * here, but it is required for compatibility with
6817                      * Myspell.
6818                      * Another requirement from Myspell is that the chop
6819                      * string is shorter than the word itself.
6820                      * For prefixes, when "PFXPOSTPONE" was used, only do
6821                      * prefixes with a chop string and/or flags.
6822                      * When a previously added affix had CIRCUMFIX this one
6823                      * must have it too, if it had not then this one must not
6824                      * have one either. */
6825                     regmatch.regprog = ae->ae_prog;
6826                     regmatch.rm_ic = FALSE;
6827                     if ((xht != NULL || !affile->af_pfxpostpone
6828                                 || ae->ae_chop != NULL
6829                                 || ae->ae_flags != NULL)
6830                             && (ae->ae_chop == NULL
6831                                 || STRLEN(ae->ae_chop) < wordlen)
6832                             && (ae->ae_prog == NULL
6833                                 || vim_regexec(&regmatch, word, (colnr_T)0))
6834                             && (((condit & CONDIT_CFIX) == 0)
6835                                 == ((condit & CONDIT_AFF) == 0
6836                                     || ae->ae_flags == NULL
6837                                     || !flag_in_afflist(affile->af_flagtype,
6838                                         ae->ae_flags, affile->af_circumfix))))
6839                     {
6840                         /* Match.  Remove the chop and add the affix. */
6841                         if (xht == NULL)
6842                         {
6843                             /* prefix: chop/add at the start of the word */
6844                             if (ae->ae_add == NULL)
6845                                 *newword = NUL;
6846                             else
6847                                 STRCPY(newword, ae->ae_add);
6848                             p = word;
6849                             if (ae->ae_chop != NULL)
6850                             {
6851                                 /* Skip chop string. */
6852 #ifdef FEAT_MBYTE
6853                                 if (has_mbyte)
6854                                 {
6855                                     i = mb_charlen(ae->ae_chop);
6856                                     for ( ; i > 0; --i)
6857                                         mb_ptr_adv(p);
6858                                 }
6859                                 else
6860 #endif
6861                                     p += STRLEN(ae->ae_chop);
6862                             }
6863                             STRCAT(newword, p);
6864                         }
6865                         else
6866                         {
6867                             /* suffix: chop/add at the end of the word */
6868                             STRCPY(newword, word);
6869                             if (ae->ae_chop != NULL)
6870                             {
6871                                 /* Remove chop string. */
6872                                 p = newword + STRLEN(newword);
6873                                 i = (int)MB_CHARLEN(ae->ae_chop);
6874                                 for ( ; i > 0; --i)
6875                                     mb_ptr_back(newword, p);
6876                                 *p = NUL;
6877                             }
6878                             if (ae->ae_add != NULL)
6879                                 STRCAT(newword, ae->ae_add);
6880                         }
6881
6882                         use_flags = flags;
6883                         use_pfxlist = pfxlist;
6884                         use_pfxlen = pfxlen;
6885                         need_affix = FALSE;
6886                         use_condit = condit | CONDIT_COMB | CONDIT_AFF;
6887                         if (ae->ae_flags != NULL)
6888                         {
6889                             /* Extract flags from the affix list. */
6890                             use_flags |= get_affix_flags(affile, ae->ae_flags);
6891
6892                             if (affile->af_needaffix != 0 && flag_in_afflist(
6893                                         affile->af_flagtype, ae->ae_flags,
6894                                                         affile->af_needaffix))
6895                                 need_affix = TRUE;
6896
6897                             /* When there is a CIRCUMFIX flag the other affix
6898                              * must also have it and we don't add the word
6899                              * with one affix. */
6900                             if (affile->af_circumfix != 0 && flag_in_afflist(
6901                                         affile->af_flagtype, ae->ae_flags,
6902                                                         affile->af_circumfix))
6903                             {
6904                                 use_condit |= CONDIT_CFIX;
6905                                 if ((condit & CONDIT_CFIX) == 0)
6906                                     need_affix = TRUE;
6907                             }
6908
6909                             if (affile->af_pfxpostpone
6910                                                 || spin->si_compflags != NULL)
6911                             {
6912                                 if (affile->af_pfxpostpone)
6913                                     /* Get prefix IDS from the affix list. */
6914                                     use_pfxlen = get_pfxlist(affile,
6915                                                  ae->ae_flags, store_afflist);
6916                                 else
6917                                     use_pfxlen = 0;
6918                                 use_pfxlist = store_afflist;
6919
6920                                 /* Combine the prefix IDs. Avoid adding the
6921                                  * same ID twice. */
6922                                 for (i = 0; i < pfxlen; ++i)
6923                                 {
6924                                     for (j = 0; j < use_pfxlen; ++j)
6925                                         if (pfxlist[i] == use_pfxlist[j])
6926                                             break;
6927                                     if (j == use_pfxlen)
6928                                         use_pfxlist[use_pfxlen++] = pfxlist[i];
6929                                 }
6930
6931                                 if (spin->si_compflags != NULL)
6932                                     /* Get compound IDS from the affix list. */
6933                                     get_compflags(affile, ae->ae_flags,
6934                                                   use_pfxlist + use_pfxlen);
6935
6936                                 /* Combine the list of compound flags.
6937                                  * Concatenate them to the prefix IDs list.
6938                                  * Avoid adding the same ID twice. */
6939                                 for (i = pfxlen; pfxlist[i] != NUL; ++i)
6940                                 {
6941                                     for (j = use_pfxlen;
6942                                                    use_pfxlist[j] != NUL; ++j)
6943                                         if (pfxlist[i] == use_pfxlist[j])
6944                                             break;
6945                                     if (use_pfxlist[j] == NUL)
6946                                     {
6947                                         use_pfxlist[j++] = pfxlist[i];
6948                                         use_pfxlist[j] = NUL;
6949                                     }
6950                                 }
6951                             }
6952                         }
6953
6954                         /* Obey a "COMPOUNDFORBIDFLAG" of the affix: don't
6955                          * use the compound flags. */
6956                         if (use_pfxlist != NULL && ae->ae_compforbid)
6957                         {
6958                             vim_strncpy(pfx_pfxlist, use_pfxlist, use_pfxlen);
6959                             use_pfxlist = pfx_pfxlist;
6960                         }
6961
6962                         /* When there are postponed prefixes... */
6963                         if (spin->si_prefroot != NULL
6964                                 && spin->si_prefroot->wn_sibling != NULL)
6965                         {
6966                             /* ... add a flag to indicate an affix was used. */
6967                             use_flags |= WF_HAS_AFF;
6968
6969                             /* ... don't use a prefix list if combining
6970                              * affixes is not allowed.  But do use the
6971                              * compound flags after them. */
6972                             if (!ah->ah_combine && use_pfxlist != NULL)
6973                                 use_pfxlist += use_pfxlen;
6974                         }
6975
6976                         /* When compounding is supported and there is no
6977                          * "COMPOUNDPERMITFLAG" then forbid compounding on the
6978                          * side where the affix is applied. */
6979                         if (spin->si_compflags != NULL && !ae->ae_comppermit)
6980                         {
6981                             if (xht != NULL)
6982                                 use_flags |= WF_NOCOMPAFT;
6983                             else
6984                                 use_flags |= WF_NOCOMPBEF;
6985                         }
6986
6987                         /* Store the modified word. */
6988                         if (store_word(spin, newword, use_flags,
6989                                                  spin->si_region, use_pfxlist,
6990                                                           need_affix) == FAIL)
6991                             retval = FAIL;
6992
6993                         /* When added a prefix or a first suffix and the affix
6994                          * has flags may add a(nother) suffix.  RECURSIVE! */
6995                         if ((condit & CONDIT_SUF) && ae->ae_flags != NULL)
6996                             if (store_aff_word(spin, newword, ae->ae_flags,
6997                                         affile, &affile->af_suff, xht,
6998                                            use_condit & (xht == NULL
6999                                                         ? ~0 :  ~CONDIT_SUF),
7000                                       use_flags, use_pfxlist, pfxlen) == FAIL)
7001                                 retval = FAIL;
7002
7003                         /* When added a suffix and combining is allowed also
7004                          * try adding a prefix additionally.  Both for the
7005                          * word flags and for the affix flags.  RECURSIVE! */
7006                         if (xht != NULL && ah->ah_combine)
7007                         {
7008                             if (store_aff_word(spin, newword,
7009                                         afflist, affile,
7010                                         xht, NULL, use_condit,
7011                                         use_flags, use_pfxlist,
7012                                         pfxlen) == FAIL
7013                                     || (ae->ae_flags != NULL
7014                                         && store_aff_word(spin, newword,
7015                                             ae->ae_flags, affile,
7016                                             xht, NULL, use_condit,
7017                                             use_flags, use_pfxlist,
7018                                             pfxlen) == FAIL))
7019                                 retval = FAIL;
7020                         }
7021                     }
7022                 }
7023             }
7024         }
7025     }
7026
7027     return retval;
7028 }
7029
7030 /*
7031  * Read a file with a list of words.
7032  */
7033     static int
7034 spell_read_wordfile(spin, fname)
7035     spellinfo_T *spin;
7036     char_u      *fname;
7037 {
7038     FILE        *fd;
7039     long        lnum = 0;
7040     char_u      rline[MAXLINELEN];
7041     char_u      *line;
7042     char_u      *pc = NULL;
7043     char_u      *p;
7044     int         l;
7045     int         retval = OK;
7046     int         did_word = FALSE;
7047     int         non_ascii = 0;
7048     int         flags;
7049     int         regionmask;
7050
7051     /*
7052      * Open the file.
7053      */
7054     fd = mch_fopen((char *)fname, "r");
7055     if (fd == NULL)
7056     {
7057         EMSG2(_(e_notopen), fname);
7058         return FAIL;
7059     }
7060
7061     vim_snprintf((char *)IObuff, IOSIZE, _("Reading word file %s ..."), fname);
7062     spell_message(spin, IObuff);
7063
7064     /*
7065      * Read all the lines in the file one by one.
7066      */
7067     while (!vim_fgets(rline, MAXLINELEN, fd) && !got_int)
7068     {
7069         line_breakcheck();
7070         ++lnum;
7071
7072         /* Skip comment lines. */
7073         if (*rline == '#')
7074             continue;
7075
7076         /* Remove CR, LF and white space from the end. */
7077         l = (int)STRLEN(rline);
7078         while (l > 0 && rline[l - 1] <= ' ')
7079             --l;
7080         if (l == 0)
7081             continue;   /* empty or blank line */
7082         rline[l] = NUL;
7083
7084         /* Convert from "/encoding={encoding}" to 'encoding' when needed. */
7085         vim_free(pc);
7086 #ifdef FEAT_MBYTE
7087         if (spin->si_conv.vc_type != CONV_NONE)
7088         {
7089             pc = string_convert(&spin->si_conv, rline, NULL);
7090             if (pc == NULL)
7091             {
7092                 smsg((char_u *)_("Conversion failure for word in %s line %d: %s"),
7093                                                            fname, lnum, rline);
7094                 continue;
7095             }
7096             line = pc;
7097         }
7098         else
7099 #endif
7100         {
7101             pc = NULL;
7102             line = rline;
7103         }
7104
7105         if (*line == '/')
7106         {
7107             ++line;
7108             if (STRNCMP(line, "encoding=", 9) == 0)
7109             {
7110                 if (spin->si_conv.vc_type != CONV_NONE)
7111                     smsg((char_u *)_("Duplicate /encoding= line ignored in %s line %d: %s"),
7112                                                        fname, lnum, line - 1);
7113                 else if (did_word)
7114                     smsg((char_u *)_("/encoding= line after word ignored in %s line %d: %s"),
7115                                                        fname, lnum, line - 1);
7116                 else
7117                 {
7118 #ifdef FEAT_MBYTE
7119                     char_u      *enc;
7120
7121                     /* Setup for conversion to 'encoding'. */
7122                     line += 9;
7123                     enc = enc_canonize(line);
7124                     if (enc != NULL && !spin->si_ascii
7125                             && convert_setup(&spin->si_conv, enc,
7126                                                                p_enc) == FAIL)
7127                         smsg((char_u *)_("Conversion in %s not supported: from %s to %s"),
7128                                                           fname, line, p_enc);
7129                     vim_free(enc);
7130                     spin->si_conv.vc_fail = TRUE;
7131 #else
7132                     smsg((char_u *)_("Conversion in %s not supported"), fname);
7133 #endif
7134                 }
7135                 continue;
7136             }
7137
7138             if (STRNCMP(line, "regions=", 8) == 0)
7139             {
7140                 if (spin->si_region_count > 1)
7141                     smsg((char_u *)_("Duplicate /regions= line ignored in %s line %d: %s"),
7142                                                        fname, lnum, line);
7143                 else
7144                 {
7145                     line += 8;
7146                     if (STRLEN(line) > 16)
7147                         smsg((char_u *)_("Too many regions in %s line %d: %s"),
7148                                                        fname, lnum, line);
7149                     else
7150                     {
7151                         spin->si_region_count = (int)STRLEN(line) / 2;
7152                         STRCPY(spin->si_region_name, line);
7153
7154                         /* Adjust the mask for a word valid in all regions. */
7155                         spin->si_region = (1 << spin->si_region_count) - 1;
7156                     }
7157                 }
7158                 continue;
7159             }
7160
7161             smsg((char_u *)_("/ line ignored in %s line %d: %s"),
7162                                                        fname, lnum, line - 1);
7163             continue;
7164         }
7165
7166         flags = 0;
7167         regionmask = spin->si_region;
7168
7169         /* Check for flags and region after a slash. */
7170         p = vim_strchr(line, '/');
7171         if (p != NULL)
7172         {
7173             *p++ = NUL;
7174             while (*p != NUL)
7175             {
7176                 if (*p == '=')          /* keep-case word */
7177                     flags |= WF_KEEPCAP | WF_FIXCAP;
7178                 else if (*p == '!')     /* Bad, bad, wicked word. */
7179                     flags |= WF_BANNED;
7180                 else if (*p == '?')     /* Rare word. */
7181                     flags |= WF_RARE;
7182                 else if (VIM_ISDIGIT(*p)) /* region number(s) */
7183                 {
7184                     if ((flags & WF_REGION) == 0)   /* first one */
7185                         regionmask = 0;
7186                     flags |= WF_REGION;
7187
7188                     l = *p - '0';
7189                     if (l > spin->si_region_count)
7190                     {
7191                         smsg((char_u *)_("Invalid region nr in %s line %d: %s"),
7192                                                           fname, lnum, p);
7193                         break;
7194                     }
7195                     regionmask |= 1 << (l - 1);
7196                 }
7197                 else
7198                 {
7199                     smsg((char_u *)_("Unrecognized flags in %s line %d: %s"),
7200                                                               fname, lnum, p);
7201                     break;
7202                 }
7203                 ++p;
7204             }
7205         }
7206
7207         /* Skip non-ASCII words when "spin->si_ascii" is TRUE. */
7208         if (spin->si_ascii && has_non_ascii(line))
7209         {
7210             ++non_ascii;
7211             continue;
7212         }
7213
7214         /* Normal word: store it. */
7215         if (store_word(spin, line, flags, regionmask, NULL, FALSE) == FAIL)
7216         {
7217             retval = FAIL;
7218             break;
7219         }
7220         did_word = TRUE;
7221     }
7222
7223     vim_free(pc);
7224     fclose(fd);
7225
7226     if (spin->si_ascii && non_ascii > 0)
7227     {
7228         vim_snprintf((char *)IObuff, IOSIZE,
7229                   _("Ignored %d words with non-ASCII characters"), non_ascii);
7230         spell_message(spin, IObuff);
7231     }
7232
7233     return retval;
7234 }
7235
7236 /*
7237  * Get part of an sblock_T, "len" bytes long.
7238  * This avoids calling free() for every little struct we use (and keeping
7239  * track of them).
7240  * The memory is cleared to all zeros.
7241  * Returns NULL when out of memory.
7242  */
7243     static void *
7244 getroom(spin, len, align)
7245     spellinfo_T *spin;
7246     size_t      len;            /* length needed */
7247     int         align;          /* align for pointer */
7248 {
7249     char_u      *p;
7250     sblock_T    *bl = spin->si_blocks;
7251
7252     if (align && bl != NULL)
7253         /* Round size up for alignment.  On some systems structures need to be
7254          * aligned to the size of a pointer (e.g., SPARC). */
7255         bl->sb_used = (bl->sb_used + sizeof(char *) - 1)
7256                                                       & ~(sizeof(char *) - 1);
7257
7258     if (bl == NULL || bl->sb_used + len > SBLOCKSIZE)
7259     {
7260         /* Allocate a block of memory. This is not freed until much later. */
7261         bl = (sblock_T *)alloc_clear((unsigned)(sizeof(sblock_T) + SBLOCKSIZE));
7262         if (bl == NULL)
7263             return NULL;
7264         bl->sb_next = spin->si_blocks;
7265         spin->si_blocks = bl;
7266         bl->sb_used = 0;
7267         ++spin->si_blocks_cnt;
7268     }
7269
7270     p = bl->sb_data + bl->sb_used;
7271     bl->sb_used += (int)len;
7272
7273     return p;
7274 }
7275
7276 /*
7277  * Make a copy of a string into memory allocated with getroom().
7278  */
7279     static char_u *
7280 getroom_save(spin, s)
7281     spellinfo_T *spin;
7282     char_u      *s;
7283 {
7284     char_u      *sc;
7285
7286     sc = (char_u *)getroom(spin, STRLEN(s) + 1, FALSE);
7287     if (sc != NULL)
7288         STRCPY(sc, s);
7289     return sc;
7290 }
7291
7292
7293 /*
7294  * Free the list of allocated sblock_T.
7295  */
7296     static void
7297 free_blocks(bl)
7298     sblock_T    *bl;
7299 {
7300     sblock_T    *next;
7301
7302     while (bl != NULL)
7303     {
7304         next = bl->sb_next;
7305         vim_free(bl);
7306         bl = next;
7307     }
7308 }
7309
7310 /*
7311  * Allocate the root of a word tree.
7312  */
7313     static wordnode_T *
7314 wordtree_alloc(spin)
7315     spellinfo_T *spin;
7316 {
7317     return (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
7318 }
7319
7320 /*
7321  * Store a word in the tree(s).
7322  * Always store it in the case-folded tree.  For a keep-case word this is
7323  * useful when the word can also be used with all caps (no WF_FIXCAP flag) and
7324  * used to find suggestions.
7325  * For a keep-case word also store it in the keep-case tree.
7326  * When "pfxlist" is not NULL store the word for each postponed prefix ID and
7327  * compound flag.
7328  */
7329     static int
7330 store_word(spin, word, flags, region, pfxlist, need_affix)
7331     spellinfo_T *spin;
7332     char_u      *word;
7333     int         flags;          /* extra flags, WF_BANNED */
7334     int         region;         /* supported region(s) */
7335     char_u      *pfxlist;       /* list of prefix IDs or NULL */
7336     int         need_affix;     /* only store word with affix ID */
7337 {
7338     int         len = (int)STRLEN(word);
7339     int         ct = captype(word, word + len);
7340     char_u      foldword[MAXWLEN];
7341     int         res = OK;
7342     char_u      *p;
7343
7344     (void)spell_casefold(word, len, foldword, MAXWLEN);
7345     for (p = pfxlist; res == OK; ++p)
7346     {
7347         if (!need_affix || (p != NULL && *p != NUL))
7348             res = tree_add_word(spin, foldword, spin->si_foldroot, ct | flags,
7349                                                   region, p == NULL ? 0 : *p);
7350         if (p == NULL || *p == NUL)
7351             break;
7352     }
7353     ++spin->si_foldwcount;
7354
7355     if (res == OK && (ct == WF_KEEPCAP || (flags & WF_KEEPCAP)))
7356     {
7357         for (p = pfxlist; res == OK; ++p)
7358         {
7359             if (!need_affix || (p != NULL && *p != NUL))
7360                 res = tree_add_word(spin, word, spin->si_keeproot, flags,
7361                                                   region, p == NULL ? 0 : *p);
7362             if (p == NULL || *p == NUL)
7363                 break;
7364         }
7365         ++spin->si_keepwcount;
7366     }
7367     return res;
7368 }
7369
7370 /*
7371  * Add word "word" to a word tree at "root".
7372  * When "flags" < 0 we are adding to the prefix tree where "flags" is used for
7373  * "rare" and "region" is the condition nr.
7374  * Returns FAIL when out of memory.
7375  */
7376     static int
7377 tree_add_word(spin, word, root, flags, region, affixID)
7378     spellinfo_T *spin;
7379     char_u      *word;
7380     wordnode_T  *root;
7381     int         flags;
7382     int         region;
7383     int         affixID;
7384 {
7385     wordnode_T  *node = root;
7386     wordnode_T  *np;
7387     wordnode_T  *copyp, **copyprev;
7388     wordnode_T  **prev = NULL;
7389     int         i;
7390
7391     /* Add each byte of the word to the tree, including the NUL at the end. */
7392     for (i = 0; ; ++i)
7393     {
7394         /* When there is more than one reference to this node we need to make
7395          * a copy, so that we can modify it.  Copy the whole list of siblings
7396          * (we don't optimize for a partly shared list of siblings). */
7397         if (node != NULL && node->wn_refs > 1)
7398         {
7399             --node->wn_refs;
7400             copyprev = prev;
7401             for (copyp = node; copyp != NULL; copyp = copyp->wn_sibling)
7402             {
7403                 /* Allocate a new node and copy the info. */
7404                 np = get_wordnode(spin);
7405                 if (np == NULL)
7406                     return FAIL;
7407                 np->wn_child = copyp->wn_child;
7408                 if (np->wn_child != NULL)
7409                     ++np->wn_child->wn_refs;    /* child gets extra ref */
7410                 np->wn_byte = copyp->wn_byte;
7411                 if (np->wn_byte == NUL)
7412                 {
7413                     np->wn_flags = copyp->wn_flags;
7414                     np->wn_region = copyp->wn_region;
7415                     np->wn_affixID = copyp->wn_affixID;
7416                 }
7417
7418                 /* Link the new node in the list, there will be one ref. */
7419                 np->wn_refs = 1;
7420                 if (copyprev != NULL)
7421                     *copyprev = np;
7422                 copyprev = &np->wn_sibling;
7423
7424                 /* Let "node" point to the head of the copied list. */
7425                 if (copyp == node)
7426                     node = np;
7427             }
7428         }
7429
7430         /* Look for the sibling that has the same character.  They are sorted
7431          * on byte value, thus stop searching when a sibling is found with a
7432          * higher byte value.  For zero bytes (end of word) the sorting is
7433          * done on flags and then on affixID. */
7434         while (node != NULL
7435                 && (node->wn_byte < word[i]
7436                     || (node->wn_byte == NUL
7437                         && (flags < 0
7438                             ? node->wn_affixID < (unsigned)affixID
7439                             : (node->wn_flags < (unsigned)(flags & WN_MASK)
7440                                 || (node->wn_flags == (flags & WN_MASK)
7441                                     && (spin->si_sugtree
7442                                         ? (node->wn_region & 0xffff) < region
7443                                         : node->wn_affixID
7444                                                     < (unsigned)affixID)))))))
7445         {
7446             prev = &node->wn_sibling;
7447             node = *prev;
7448         }
7449         if (node == NULL
7450                 || node->wn_byte != word[i]
7451                 || (word[i] == NUL
7452                     && (flags < 0
7453                         || spin->si_sugtree
7454                         || node->wn_flags != (flags & WN_MASK)
7455                         || node->wn_affixID != affixID)))
7456         {
7457             /* Allocate a new node. */
7458             np = get_wordnode(spin);
7459             if (np == NULL)
7460                 return FAIL;
7461             np->wn_byte = word[i];
7462
7463             /* If "node" is NULL this is a new child or the end of the sibling
7464              * list: ref count is one.  Otherwise use ref count of sibling and
7465              * make ref count of sibling one (matters when inserting in front
7466              * of the list of siblings). */
7467             if (node == NULL)
7468                 np->wn_refs = 1;
7469             else
7470             {
7471                 np->wn_refs = node->wn_refs;
7472                 node->wn_refs = 1;
7473             }
7474             *prev = np;
7475             np->wn_sibling = node;
7476             node = np;
7477         }
7478
7479         if (word[i] == NUL)
7480         {
7481             node->wn_flags = flags;
7482             node->wn_region |= region;
7483             node->wn_affixID = affixID;
7484             break;
7485         }
7486         prev = &node->wn_child;
7487         node = *prev;
7488     }
7489 #ifdef SPELL_PRINTTREE
7490     smsg("Added \"%s\"", word);
7491     spell_print_tree(root->wn_sibling);
7492 #endif
7493
7494     /* count nr of words added since last message */
7495     ++spin->si_msg_count;
7496
7497     if (spin->si_compress_cnt > 1)
7498     {
7499         if (--spin->si_compress_cnt == 1)
7500             /* Did enough words to lower the block count limit. */
7501             spin->si_blocks_cnt += compress_inc;
7502     }
7503
7504     /*
7505      * When we have allocated lots of memory we need to compress the word tree
7506      * to free up some room.  But compression is slow, and we might actually
7507      * need that room, thus only compress in the following situations:
7508      * 1. When not compressed before (si_compress_cnt == 0): when using
7509      *    "compress_start" blocks.
7510      * 2. When compressed before and used "compress_inc" blocks before
7511      *    adding "compress_added" words (si_compress_cnt > 1).
7512      * 3. When compressed before, added "compress_added" words
7513      *    (si_compress_cnt == 1) and the number of free nodes drops below the
7514      *    maximum word length.
7515      */
7516 #ifndef SPELL_PRINTTREE
7517     if (spin->si_compress_cnt == 1
7518             ? spin->si_free_count < MAXWLEN
7519             : spin->si_blocks_cnt >= compress_start)
7520 #endif
7521     {
7522         /* Decrement the block counter.  The effect is that we compress again
7523          * when the freed up room has been used and another "compress_inc"
7524          * blocks have been allocated.  Unless "compress_added" words have
7525          * been added, then the limit is put back again. */
7526         spin->si_blocks_cnt -= compress_inc;
7527         spin->si_compress_cnt = compress_added;
7528
7529         if (spin->si_verbose)
7530         {
7531             msg_start();
7532             msg_puts((char_u *)_(msg_compressing));
7533             msg_clr_eos();
7534             msg_didout = FALSE;
7535             msg_col = 0;
7536             out_flush();
7537         }
7538
7539         /* Compress both trees.  Either they both have many nodes, which makes
7540          * compression useful, or one of them is small, which means
7541          * compression goes fast.  But when filling the souldfold word tree
7542          * there is no keep-case tree. */
7543         wordtree_compress(spin, spin->si_foldroot);
7544         if (affixID >= 0)
7545             wordtree_compress(spin, spin->si_keeproot);
7546     }
7547
7548     return OK;
7549 }
7550
7551 /*
7552  * Check the 'mkspellmem' option.  Return FAIL if it's wrong.
7553  * Sets "sps_flags".
7554  */
7555     int
7556 spell_check_msm()
7557 {
7558     char_u      *p = p_msm;
7559     long        start = 0;
7560     long        incr = 0;
7561     long        added = 0;
7562
7563     if (!VIM_ISDIGIT(*p))
7564         return FAIL;
7565     /* block count = (value * 1024) / SBLOCKSIZE (but avoid overflow)*/
7566     start = (getdigits(&p) * 10) / (SBLOCKSIZE / 102);
7567     if (*p != ',')
7568         return FAIL;
7569     ++p;
7570     if (!VIM_ISDIGIT(*p))
7571         return FAIL;
7572     incr = (getdigits(&p) * 102) / (SBLOCKSIZE / 10);
7573     if (*p != ',')
7574         return FAIL;
7575     ++p;
7576     if (!VIM_ISDIGIT(*p))
7577         return FAIL;
7578     added = getdigits(&p) * 1024;
7579     if (*p != NUL)
7580         return FAIL;
7581
7582     if (start == 0 || incr == 0 || added == 0 || incr > start)
7583         return FAIL;
7584
7585     compress_start = start;
7586     compress_inc = incr;
7587     compress_added = added;
7588     return OK;
7589 }
7590
7591
7592 /*
7593  * Get a wordnode_T, either from the list of previously freed nodes or
7594  * allocate a new one.
7595  */
7596     static wordnode_T *
7597 get_wordnode(spin)
7598     spellinfo_T     *spin;
7599 {
7600     wordnode_T *n;
7601
7602     if (spin->si_first_free == NULL)
7603         n = (wordnode_T *)getroom(spin, sizeof(wordnode_T), TRUE);
7604     else
7605     {
7606         n = spin->si_first_free;
7607         spin->si_first_free = n->wn_child;
7608         vim_memset(n, 0, sizeof(wordnode_T));
7609         --spin->si_free_count;
7610     }
7611 #ifdef SPELL_PRINTTREE
7612     n->wn_nr = ++spin->si_wordnode_nr;
7613 #endif
7614     return n;
7615 }
7616
7617 /*
7618  * Decrement the reference count on a node (which is the head of a list of
7619  * siblings).  If the reference count becomes zero free the node and its
7620  * siblings.
7621  * Returns the number of nodes actually freed.
7622  */
7623     static int
7624 deref_wordnode(spin, node)
7625     spellinfo_T *spin;
7626     wordnode_T  *node;
7627 {
7628     wordnode_T  *np;
7629     int         cnt = 0;
7630
7631     if (--node->wn_refs == 0)
7632     {
7633         for (np = node; np != NULL; np = np->wn_sibling)
7634         {
7635             if (np->wn_child != NULL)
7636                 cnt += deref_wordnode(spin, np->wn_child);
7637             free_wordnode(spin, np);
7638             ++cnt;
7639         }
7640         ++cnt;      /* length field */
7641     }
7642     return cnt;
7643 }
7644
7645 /*
7646  * Free a wordnode_T for re-use later.
7647  * Only the "wn_child" field becomes invalid.
7648  */
7649     static void
7650 free_wordnode(spin, n)
7651     spellinfo_T *spin;
7652     wordnode_T  *n;
7653 {
7654     n->wn_child = spin->si_first_free;
7655     spin->si_first_free = n;
7656     ++spin->si_free_count;
7657 }
7658
7659 /*
7660  * Compress a tree: find tails that are identical and can be shared.
7661  */
7662     static void
7663 wordtree_compress(spin, root)
7664     spellinfo_T     *spin;
7665     wordnode_T      *root;
7666 {
7667     hashtab_T       ht;
7668     int             n;
7669     int             tot = 0;
7670     int             perc;
7671
7672     /* Skip the root itself, it's not actually used.  The first sibling is the
7673      * start of the tree. */
7674     if (root->wn_sibling != NULL)
7675     {
7676         hash_init(&ht);
7677         n = node_compress(spin, root->wn_sibling, &ht, &tot);
7678
7679 #ifndef SPELL_PRINTTREE
7680         if (spin->si_verbose || p_verbose > 2)
7681 #endif
7682         {
7683             if (tot > 1000000)
7684                 perc = (tot - n) / (tot / 100);
7685             else if (tot == 0)
7686                 perc = 0;
7687             else
7688                 perc = (tot - n) * 100 / tot;
7689             vim_snprintf((char *)IObuff, IOSIZE,
7690                           _("Compressed %d of %d nodes; %d (%d%%) remaining"),
7691                                                        n, tot, tot - n, perc);
7692             spell_message(spin, IObuff);
7693         }
7694 #ifdef SPELL_PRINTTREE
7695         spell_print_tree(root->wn_sibling);
7696 #endif
7697         hash_clear(&ht);
7698     }
7699 }
7700
7701 /*
7702  * Compress a node, its siblings and its children, depth first.
7703  * Returns the number of compressed nodes.
7704  */
7705     static int
7706 node_compress(spin, node, ht, tot)
7707     spellinfo_T *spin;
7708     wordnode_T  *node;
7709     hashtab_T   *ht;
7710     int         *tot;       /* total count of nodes before compressing,
7711                                incremented while going through the tree */
7712 {
7713     wordnode_T  *np;
7714     wordnode_T  *tp;
7715     wordnode_T  *child;
7716     hash_T      hash;
7717     hashitem_T  *hi;
7718     int         len = 0;
7719     unsigned    nr, n;
7720     int         compressed = 0;
7721
7722     /*
7723      * Go through the list of siblings.  Compress each child and then try
7724      * finding an identical child to replace it.
7725      * Note that with "child" we mean not just the node that is pointed to,
7726      * but the whole list of siblings of which the child node is the first.
7727      */
7728     for (np = node; np != NULL && !got_int; np = np->wn_sibling)
7729     {
7730         ++len;
7731         if ((child = np->wn_child) != NULL)
7732         {
7733             /* Compress the child first.  This fills hashkey. */
7734             compressed += node_compress(spin, child, ht, tot);
7735
7736             /* Try to find an identical child. */
7737             hash = hash_hash(child->wn_u1.hashkey);
7738             hi = hash_lookup(ht, child->wn_u1.hashkey, hash);
7739             if (!HASHITEM_EMPTY(hi))
7740             {
7741                 /* There are children we encountered before with a hash value
7742                  * identical to the current child.  Now check if there is one
7743                  * that is really identical. */
7744                 for (tp = HI2WN(hi); tp != NULL; tp = tp->wn_u2.next)
7745                     if (node_equal(child, tp))
7746                     {
7747                         /* Found one!  Now use that child in place of the
7748                          * current one.  This means the current child and all
7749                          * its siblings is unlinked from the tree. */
7750                         ++tp->wn_refs;
7751                         compressed += deref_wordnode(spin, child);
7752                         np->wn_child = tp;
7753                         break;
7754                     }
7755                 if (tp == NULL)
7756                 {
7757                     /* No other child with this hash value equals the child of
7758                      * the node, add it to the linked list after the first
7759                      * item. */
7760                     tp = HI2WN(hi);
7761                     child->wn_u2.next = tp->wn_u2.next;
7762                     tp->wn_u2.next = child;
7763                 }
7764             }
7765             else
7766                 /* No other child has this hash value, add it to the
7767                  * hashtable. */
7768                 hash_add_item(ht, hi, child->wn_u1.hashkey, hash);
7769         }
7770     }
7771     *tot += len + 1;    /* add one for the node that stores the length */
7772
7773     /*
7774      * Make a hash key for the node and its siblings, so that we can quickly
7775      * find a lookalike node.  This must be done after compressing the sibling
7776      * list, otherwise the hash key would become invalid by the compression.
7777      */
7778     node->wn_u1.hashkey[0] = len;
7779     nr = 0;
7780     for (np = node; np != NULL; np = np->wn_sibling)
7781     {
7782         if (np->wn_byte == NUL)
7783             /* end node: use wn_flags, wn_region and wn_affixID */
7784             n = np->wn_flags + (np->wn_region << 8) + (np->wn_affixID << 16);
7785         else
7786             /* byte node: use the byte value and the child pointer */
7787             n = (unsigned)(np->wn_byte + ((long_u)np->wn_child << 8));
7788         nr = nr * 101 + n;
7789     }
7790
7791     /* Avoid NUL bytes, it terminates the hash key. */
7792     n = nr & 0xff;
7793     node->wn_u1.hashkey[1] = n == 0 ? 1 : n;
7794     n = (nr >> 8) & 0xff;
7795     node->wn_u1.hashkey[2] = n == 0 ? 1 : n;
7796     n = (nr >> 16) & 0xff;
7797     node->wn_u1.hashkey[3] = n == 0 ? 1 : n;
7798     n = (nr >> 24) & 0xff;
7799     node->wn_u1.hashkey[4] = n == 0 ? 1 : n;
7800     node->wn_u1.hashkey[5] = NUL;
7801
7802     /* Check for CTRL-C pressed now and then. */
7803     fast_breakcheck();
7804
7805     return compressed;
7806 }
7807
7808 /*
7809  * Return TRUE when two nodes have identical siblings and children.
7810  */
7811     static int
7812 node_equal(n1, n2)
7813     wordnode_T  *n1;
7814     wordnode_T  *n2;
7815 {
7816     wordnode_T  *p1;
7817     wordnode_T  *p2;
7818
7819     for (p1 = n1, p2 = n2; p1 != NULL && p2 != NULL;
7820                                      p1 = p1->wn_sibling, p2 = p2->wn_sibling)
7821         if (p1->wn_byte != p2->wn_byte
7822                 || (p1->wn_byte == NUL
7823                     ? (p1->wn_flags != p2->wn_flags
7824                         || p1->wn_region != p2->wn_region
7825                         || p1->wn_affixID != p2->wn_affixID)
7826                     : (p1->wn_child != p2->wn_child)))
7827             break;
7828
7829     return p1 == NULL && p2 == NULL;
7830 }
7831
7832 /*
7833  * Write a number to file "fd", MSB first, in "len" bytes.
7834  */
7835     void
7836 put_bytes(fd, nr, len)
7837     FILE    *fd;
7838     long_u  nr;
7839     int     len;
7840 {
7841     int     i;
7842
7843     for (i = len - 1; i >= 0; --i)
7844         putc((int)(nr >> (i * 8)), fd);
7845 }
7846
7847 #ifdef _MSC_VER
7848 # if (_MSC_VER <= 1200)
7849 /* This line is required for VC6 without the service pack.  Also see the
7850  * matching #pragma below. */
7851  #  pragma optimize("", off)
7852 # endif
7853 #endif
7854
7855 /*
7856  * Write spin->si_sugtime to file "fd".
7857  */
7858     static void
7859 put_sugtime(spin, fd)
7860     spellinfo_T *spin;
7861     FILE        *fd;
7862 {
7863     int         c;
7864     int         i;
7865
7866     /* time_t can be up to 8 bytes in size, more than long_u, thus we
7867      * can't use put_bytes() here. */
7868     for (i = 7; i >= 0; --i)
7869         if (i + 1 > sizeof(time_t))
7870             /* ">>" doesn't work well when shifting more bits than avail */
7871             putc(0, fd);
7872         else
7873         {
7874             c = (unsigned)spin->si_sugtime >> (i * 8);
7875             putc(c, fd);
7876         }
7877 }
7878
7879 #ifdef _MSC_VER
7880 # if (_MSC_VER <= 1200)
7881  #  pragma optimize("", on)
7882 # endif
7883 #endif
7884
7885 static int
7886 #ifdef __BORLANDC__
7887 _RTLENTRYF
7888 #endif
7889 rep_compare __ARGS((const void *s1, const void *s2));
7890
7891 /*
7892  * Function given to qsort() to sort the REP items on "from" string.
7893  */
7894     static int
7895 #ifdef __BORLANDC__
7896 _RTLENTRYF
7897 #endif
7898 rep_compare(s1, s2)
7899     const void  *s1;
7900     const void  *s2;
7901 {
7902     fromto_T    *p1 = (fromto_T *)s1;
7903     fromto_T    *p2 = (fromto_T *)s2;
7904
7905     return STRCMP(p1->ft_from, p2->ft_from);
7906 }
7907
7908 /*
7909  * Write the Vim .spl file "fname".
7910  * Return FAIL or OK;
7911  */
7912     static int
7913 write_vim_spell(spin, fname)
7914     spellinfo_T *spin;
7915     char_u      *fname;
7916 {
7917     FILE        *fd;
7918     int         regionmask;
7919     int         round;
7920     wordnode_T  *tree;
7921     int         nodecount;
7922     int         i;
7923     int         l;
7924     garray_T    *gap;
7925     fromto_T    *ftp;
7926     char_u      *p;
7927     int         rr;
7928     int         retval = OK;
7929
7930     fd = mch_fopen((char *)fname, "w");
7931     if (fd == NULL)
7932     {
7933         EMSG2(_(e_notopen), fname);
7934         return FAIL;
7935     }
7936
7937     /* <HEADER>: <fileID> <versionnr> */
7938                                                             /* <fileID> */
7939     if (fwrite(VIMSPELLMAGIC, VIMSPELLMAGICL, (size_t)1, fd) != 1)
7940     {
7941         EMSG(_(e_write));
7942         retval = FAIL;
7943     }
7944     putc(VIMSPELLVERSION, fd);                              /* <versionnr> */
7945
7946     /*
7947      * <SECTIONS>: <section> ... <sectionend>
7948      */
7949
7950     /* SN_INFO: <infotext> */
7951     if (spin->si_info != NULL)
7952     {
7953         putc(SN_INFO, fd);                              /* <sectionID> */
7954         putc(0, fd);                                    /* <sectionflags> */
7955
7956         i = (int)STRLEN(spin->si_info);
7957         put_bytes(fd, (long_u)i, 4);                    /* <sectionlen> */
7958         fwrite(spin->si_info, (size_t)i, (size_t)1, fd); /* <infotext> */
7959     }
7960
7961     /* SN_REGION: <regionname> ...
7962      * Write the region names only if there is more than one. */
7963     if (spin->si_region_count > 1)
7964     {
7965         putc(SN_REGION, fd);                            /* <sectionID> */
7966         putc(SNF_REQUIRED, fd);                         /* <sectionflags> */
7967         l = spin->si_region_count * 2;
7968         put_bytes(fd, (long_u)l, 4);                    /* <sectionlen> */
7969         fwrite(spin->si_region_name, (size_t)l, (size_t)1, fd);
7970                                                         /* <regionname> ... */
7971         regionmask = (1 << spin->si_region_count) - 1;
7972     }
7973     else
7974         regionmask = 0;
7975
7976     /* SN_CHARFLAGS: <charflagslen> <charflags> <folcharslen> <folchars>
7977      *
7978      * The table with character flags and the table for case folding.
7979      * This makes sure the same characters are recognized as word characters
7980      * when generating an when using a spell file.
7981      * Skip this for ASCII, the table may conflict with the one used for
7982      * 'encoding'.
7983      * Also skip this for an .add.spl file, the main spell file must contain
7984      * the table (avoids that it conflicts).  File is shorter too.
7985      */
7986     if (!spin->si_ascii && !spin->si_add)
7987     {
7988         char_u  folchars[128 * 8];
7989         int     flags;
7990
7991         putc(SN_CHARFLAGS, fd);                         /* <sectionID> */
7992         putc(SNF_REQUIRED, fd);                         /* <sectionflags> */
7993
7994         /* Form the <folchars> string first, we need to know its length. */
7995         l = 0;
7996         for (i = 128; i < 256; ++i)
7997         {
7998 #ifdef FEAT_MBYTE
7999             if (has_mbyte)
8000                 l += mb_char2bytes(spelltab.st_fold[i], folchars + l);
8001             else
8002 #endif
8003                 folchars[l++] = spelltab.st_fold[i];
8004         }
8005         put_bytes(fd, (long_u)(1 + 128 + 2 + l), 4);    /* <sectionlen> */
8006
8007         fputc(128, fd);                                 /* <charflagslen> */
8008         for (i = 128; i < 256; ++i)
8009         {
8010             flags = 0;
8011             if (spelltab.st_isw[i])
8012                 flags |= CF_WORD;
8013             if (spelltab.st_isu[i])
8014                 flags |= CF_UPPER;
8015             fputc(flags, fd);                           /* <charflags> */
8016         }
8017
8018         put_bytes(fd, (long_u)l, 2);                    /* <folcharslen> */
8019         fwrite(folchars, (size_t)l, (size_t)1, fd);     /* <folchars> */
8020     }
8021
8022     /* SN_MIDWORD: <midword> */
8023     if (spin->si_midword != NULL)
8024     {
8025         putc(SN_MIDWORD, fd);                           /* <sectionID> */
8026         putc(SNF_REQUIRED, fd);                         /* <sectionflags> */
8027
8028         i = (int)STRLEN(spin->si_midword);
8029         put_bytes(fd, (long_u)i, 4);                    /* <sectionlen> */
8030         fwrite(spin->si_midword, (size_t)i, (size_t)1, fd); /* <midword> */
8031     }
8032
8033     /* SN_PREFCOND: <prefcondcnt> <prefcond> ... */
8034     if (spin->si_prefcond.ga_len > 0)
8035     {
8036         putc(SN_PREFCOND, fd);                          /* <sectionID> */
8037         putc(SNF_REQUIRED, fd);                         /* <sectionflags> */
8038
8039         l = write_spell_prefcond(NULL, &spin->si_prefcond);
8040         put_bytes(fd, (long_u)l, 4);                    /* <sectionlen> */
8041
8042         write_spell_prefcond(fd, &spin->si_prefcond);
8043     }
8044
8045     /* SN_REP: <repcount> <rep> ...
8046      * SN_SAL: <salflags> <salcount> <sal> ...
8047      * SN_REPSAL: <repcount> <rep> ... */
8048
8049     /* round 1: SN_REP section
8050      * round 2: SN_SAL section (unless SN_SOFO is used)
8051      * round 3: SN_REPSAL section */
8052     for (round = 1; round <= 3; ++round)
8053     {
8054         if (round == 1)
8055             gap = &spin->si_rep;
8056         else if (round == 2)
8057         {
8058             /* Don't write SN_SAL when using a SN_SOFO section */
8059             if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
8060                 continue;
8061             gap = &spin->si_sal;
8062         }
8063         else
8064             gap = &spin->si_repsal;
8065
8066         /* Don't write the section if there are no items. */
8067         if (gap->ga_len == 0)
8068             continue;
8069
8070         /* Sort the REP/REPSAL items. */
8071         if (round != 2)
8072             qsort(gap->ga_data, (size_t)gap->ga_len,
8073                                                sizeof(fromto_T), rep_compare);
8074
8075         i = round == 1 ? SN_REP : (round == 2 ? SN_SAL : SN_REPSAL);
8076         putc(i, fd);                                    /* <sectionID> */
8077
8078         /* This is for making suggestions, section is not required. */
8079         putc(0, fd);                                    /* <sectionflags> */
8080
8081         /* Compute the length of what follows. */
8082         l = 2;      /* count <repcount> or <salcount> */
8083         for (i = 0; i < gap->ga_len; ++i)
8084         {
8085             ftp = &((fromto_T *)gap->ga_data)[i];
8086             l += 1 + (int)STRLEN(ftp->ft_from);  /* count <*fromlen> and <*from> */
8087             l += 1 + (int)STRLEN(ftp->ft_to);    /* count <*tolen> and <*to> */
8088         }
8089         if (round == 2)
8090             ++l;        /* count <salflags> */
8091         put_bytes(fd, (long_u)l, 4);                    /* <sectionlen> */
8092
8093         if (round == 2)
8094         {
8095             i = 0;
8096             if (spin->si_followup)
8097                 i |= SAL_F0LLOWUP;
8098             if (spin->si_collapse)
8099                 i |= SAL_COLLAPSE;
8100             if (spin->si_rem_accents)
8101                 i |= SAL_REM_ACCENTS;
8102             putc(i, fd);                        /* <salflags> */
8103         }
8104
8105         put_bytes(fd, (long_u)gap->ga_len, 2);  /* <repcount> or <salcount> */
8106         for (i = 0; i < gap->ga_len; ++i)
8107         {
8108             /* <rep> : <repfromlen> <repfrom> <reptolen> <repto> */
8109             /* <sal> : <salfromlen> <salfrom> <saltolen> <salto> */
8110             ftp = &((fromto_T *)gap->ga_data)[i];
8111             for (rr = 1; rr <= 2; ++rr)
8112             {
8113                 p = rr == 1 ? ftp->ft_from : ftp->ft_to;
8114                 l = (int)STRLEN(p);
8115                 putc(l, fd);
8116                 fwrite(p, l, (size_t)1, fd);
8117             }
8118         }
8119
8120     }
8121
8122     /* SN_SOFO: <sofofromlen> <sofofrom> <sofotolen> <sofoto>
8123      * This is for making suggestions, section is not required. */
8124     if (spin->si_sofofr != NULL && spin->si_sofoto != NULL)
8125     {
8126         putc(SN_SOFO, fd);                              /* <sectionID> */
8127         putc(0, fd);                                    /* <sectionflags> */
8128
8129         l = (int)STRLEN(spin->si_sofofr);
8130         put_bytes(fd, (long_u)(l + STRLEN(spin->si_sofoto) + 4), 4);
8131                                                         /* <sectionlen> */
8132
8133         put_bytes(fd, (long_u)l, 2);                    /* <sofofromlen> */
8134         fwrite(spin->si_sofofr, l, (size_t)1, fd);      /* <sofofrom> */
8135
8136         l = (int)STRLEN(spin->si_sofoto);
8137         put_bytes(fd, (long_u)l, 2);                    /* <sofotolen> */
8138         fwrite(spin->si_sofoto, l, (size_t)1, fd);      /* <sofoto> */
8139     }
8140
8141     /* SN_WORDS: <word> ...
8142      * This is for making suggestions, section is not required. */
8143     if (spin->si_commonwords.ht_used > 0)
8144     {
8145         putc(SN_WORDS, fd);                             /* <sectionID> */
8146         putc(0, fd);                                    /* <sectionflags> */
8147
8148         /* round 1: count the bytes
8149          * round 2: write the bytes */
8150         for (round = 1; round <= 2; ++round)
8151         {
8152             int         todo;
8153             int         len = 0;
8154             hashitem_T  *hi;
8155
8156             todo = (int)spin->si_commonwords.ht_used;
8157             for (hi = spin->si_commonwords.ht_array; todo > 0; ++hi)
8158                 if (!HASHITEM_EMPTY(hi))
8159                 {
8160                     l = (int)STRLEN(hi->hi_key) + 1;
8161                     len += l;
8162                     if (round == 2)                     /* <word> */
8163                         fwrite(hi->hi_key, (size_t)l, (size_t)1, fd);
8164                     --todo;
8165                 }
8166             if (round == 1)
8167                 put_bytes(fd, (long_u)len, 4);          /* <sectionlen> */
8168         }
8169     }
8170
8171     /* SN_MAP: <mapstr>
8172      * This is for making suggestions, section is not required. */
8173     if (spin->si_map.ga_len > 0)
8174     {
8175         putc(SN_MAP, fd);                               /* <sectionID> */
8176         putc(0, fd);                                    /* <sectionflags> */
8177         l = spin->si_map.ga_len;
8178         put_bytes(fd, (long_u)l, 4);                    /* <sectionlen> */
8179         fwrite(spin->si_map.ga_data, (size_t)l, (size_t)1, fd);
8180                                                         /* <mapstr> */
8181     }
8182
8183     /* SN_SUGFILE: <timestamp>
8184      * This is used to notify that a .sug file may be available and at the
8185      * same time allows for checking that a .sug file that is found matches
8186      * with this .spl file.  That's because the word numbers must be exactly
8187      * right. */
8188     if (!spin->si_nosugfile
8189             && (spin->si_sal.ga_len > 0
8190                      || (spin->si_sofofr != NULL && spin->si_sofoto != NULL)))
8191     {
8192         putc(SN_SUGFILE, fd);                           /* <sectionID> */
8193         putc(0, fd);                                    /* <sectionflags> */
8194         put_bytes(fd, (long_u)8, 4);                    /* <sectionlen> */
8195
8196         /* Set si_sugtime and write it to the file. */
8197         spin->si_sugtime = time(NULL);
8198         put_sugtime(spin, fd);                          /* <timestamp> */
8199     }
8200
8201     /* SN_NOSPLITSUGS: nothing
8202      * This is used to notify that no suggestions with word splits are to be
8203      * made. */
8204     if (spin->si_nosplitsugs)
8205     {
8206         putc(SN_NOSPLITSUGS, fd);                       /* <sectionID> */
8207         putc(0, fd);                                    /* <sectionflags> */
8208         put_bytes(fd, (long_u)0, 4);                    /* <sectionlen> */
8209     }
8210
8211     /* SN_COMPOUND: compound info.
8212      * We don't mark it required, when not supported all compound words will
8213      * be bad words. */
8214     if (spin->si_compflags != NULL)
8215     {
8216         putc(SN_COMPOUND, fd);                          /* <sectionID> */
8217         putc(0, fd);                                    /* <sectionflags> */
8218
8219         l = (int)STRLEN(spin->si_compflags);
8220         for (i = 0; i < spin->si_comppat.ga_len; ++i)
8221             l += (int)STRLEN(((char_u **)(spin->si_comppat.ga_data))[i]) + 1;
8222         put_bytes(fd, (long_u)(l + 7), 4);              /* <sectionlen> */
8223
8224         putc(spin->si_compmax, fd);                     /* <compmax> */
8225         putc(spin->si_compminlen, fd);                  /* <compminlen> */
8226         putc(spin->si_compsylmax, fd);                  /* <compsylmax> */
8227         putc(0, fd);            /* for Vim 7.0b compatibility */
8228         putc(spin->si_compoptions, fd);                 /* <compoptions> */
8229         put_bytes(fd, (long_u)spin->si_comppat.ga_len, 2);
8230                                                         /* <comppatcount> */
8231         for (i = 0; i < spin->si_comppat.ga_len; ++i)
8232         {
8233             p = ((char_u **)(spin->si_comppat.ga_data))[i];
8234             putc((int)STRLEN(p), fd);                   /* <comppatlen> */
8235             fwrite(p, (size_t)STRLEN(p), (size_t)1, fd);/* <comppattext> */
8236         }
8237                                                         /* <compflags> */
8238         fwrite(spin->si_compflags, (size_t)STRLEN(spin->si_compflags),
8239                                                                (size_t)1, fd);
8240     }
8241
8242     /* SN_NOBREAK: NOBREAK flag */
8243     if (spin->si_nobreak)
8244     {
8245         putc(SN_NOBREAK, fd);                           /* <sectionID> */
8246         putc(0, fd);                                    /* <sectionflags> */
8247
8248         /* It's empty, the presence of the section flags the feature. */
8249         put_bytes(fd, (long_u)0, 4);                    /* <sectionlen> */
8250     }
8251
8252     /* SN_SYLLABLE: syllable info.
8253      * We don't mark it required, when not supported syllables will not be
8254      * counted. */
8255     if (spin->si_syllable != NULL)
8256     {
8257         putc(SN_SYLLABLE, fd);                          /* <sectionID> */
8258         putc(0, fd);                                    /* <sectionflags> */
8259
8260         l = (int)STRLEN(spin->si_syllable);
8261         put_bytes(fd, (long_u)l, 4);                    /* <sectionlen> */
8262         fwrite(spin->si_syllable, (size_t)l, (size_t)1, fd); /* <syllable> */
8263     }
8264
8265     /* end of <SECTIONS> */
8266     putc(SN_END, fd);                                   /* <sectionend> */
8267
8268
8269     /*
8270      * <LWORDTREE>  <KWORDTREE>  <PREFIXTREE>
8271      */
8272     spin->si_memtot = 0;
8273     for (round = 1; round <= 3; ++round)
8274     {
8275         if (round == 1)
8276             tree = spin->si_foldroot->wn_sibling;
8277         else if (round == 2)
8278             tree = spin->si_keeproot->wn_sibling;
8279         else
8280             tree = spin->si_prefroot->wn_sibling;
8281
8282         /* Clear the index and wnode fields in the tree. */
8283         clear_node(tree);
8284
8285         /* Count the number of nodes.  Needed to be able to allocate the
8286          * memory when reading the nodes.  Also fills in index for shared
8287          * nodes. */
8288         nodecount = put_node(NULL, tree, 0, regionmask, round == 3);
8289
8290         /* number of nodes in 4 bytes */
8291         put_bytes(fd, (long_u)nodecount, 4);    /* <nodecount> */
8292         spin->si_memtot += nodecount + nodecount * sizeof(int);
8293
8294         /* Write the nodes. */
8295         (void)put_node(fd, tree, 0, regionmask, round == 3);
8296     }
8297
8298     /* Write another byte to check for errors. */
8299     if (putc(0, fd) == EOF)
8300         retval = FAIL;
8301
8302     if (fclose(fd) == EOF)
8303         retval = FAIL;
8304
8305     return retval;
8306 }
8307
8308 /*
8309  * Clear the index and wnode fields of "node", it siblings and its
8310  * children.  This is needed because they are a union with other items to save
8311  * space.
8312  */
8313     static void
8314 clear_node(node)
8315     wordnode_T  *node;
8316 {
8317     wordnode_T  *np;
8318
8319     if (node != NULL)
8320         for (np = node; np != NULL; np = np->wn_sibling)
8321         {
8322             np->wn_u1.index = 0;
8323             np->wn_u2.wnode = NULL;
8324
8325             if (np->wn_byte != NUL)
8326                 clear_node(np->wn_child);
8327         }
8328 }
8329
8330
8331 /*
8332  * Dump a word tree at node "node".
8333  *
8334  * This first writes the list of possible bytes (siblings).  Then for each
8335  * byte recursively write the children.
8336  *
8337  * NOTE: The code here must match the code in read_tree_node(), since
8338  * assumptions are made about the indexes (so that we don't have to write them
8339  * in the file).
8340  *
8341  * Returns the number of nodes used.
8342  */
8343     static int
8344 put_node(fd, node, idx, regionmask, prefixtree)
8345     FILE        *fd;            /* NULL when only counting */
8346     wordnode_T  *node;
8347     int         idx;
8348     int         regionmask;
8349     int         prefixtree;     /* TRUE for PREFIXTREE */
8350 {
8351     int         newindex = idx;
8352     int         siblingcount = 0;
8353     wordnode_T  *np;
8354     int         flags;
8355
8356     /* If "node" is zero the tree is empty. */
8357     if (node == NULL)
8358         return 0;
8359
8360     /* Store the index where this node is written. */
8361     node->wn_u1.index = idx;
8362
8363     /* Count the number of siblings. */
8364     for (np = node; np != NULL; np = np->wn_sibling)
8365         ++siblingcount;
8366
8367     /* Write the sibling count. */
8368     if (fd != NULL)
8369         putc(siblingcount, fd);                         /* <siblingcount> */
8370
8371     /* Write each sibling byte and optionally extra info. */
8372     for (np = node; np != NULL; np = np->wn_sibling)
8373     {
8374         if (np->wn_byte == 0)
8375         {
8376             if (fd != NULL)
8377             {
8378                 /* For a NUL byte (end of word) write the flags etc. */
8379                 if (prefixtree)
8380                 {
8381                     /* In PREFIXTREE write the required affixID and the
8382                      * associated condition nr (stored in wn_region).  The
8383                      * byte value is misused to store the "rare" and "not
8384                      * combining" flags */
8385                     if (np->wn_flags == (short_u)PFX_FLAGS)
8386                         putc(BY_NOFLAGS, fd);           /* <byte> */
8387                     else
8388                     {
8389                         putc(BY_FLAGS, fd);             /* <byte> */
8390                         putc(np->wn_flags, fd);         /* <pflags> */
8391                     }
8392                     putc(np->wn_affixID, fd);           /* <affixID> */
8393                     put_bytes(fd, (long_u)np->wn_region, 2); /* <prefcondnr> */
8394                 }
8395                 else
8396                 {
8397                     /* For word trees we write the flag/region items. */
8398                     flags = np->wn_flags;
8399                     if (regionmask != 0 && np->wn_region != regionmask)
8400                         flags |= WF_REGION;
8401                     if (np->wn_affixID != 0)
8402                         flags |= WF_AFX;
8403                     if (flags == 0)
8404                     {
8405                         /* word without flags or region */
8406                         putc(BY_NOFLAGS, fd);                   /* <byte> */
8407                     }
8408                     else
8409                     {
8410                         if (np->wn_flags >= 0x100)
8411                         {
8412                             putc(BY_FLAGS2, fd);                /* <byte> */
8413                             putc(flags, fd);                    /* <flags> */
8414                             putc((unsigned)flags >> 8, fd);     /* <flags2> */
8415                         }
8416                         else
8417                         {
8418                             putc(BY_FLAGS, fd);                 /* <byte> */
8419                             putc(flags, fd);                    /* <flags> */
8420                         }
8421                         if (flags & WF_REGION)
8422                             putc(np->wn_region, fd);            /* <region> */
8423                         if (flags & WF_AFX)
8424                             putc(np->wn_affixID, fd);           /* <affixID> */
8425                     }
8426                 }
8427             }
8428         }
8429         else
8430         {
8431             if (np->wn_child->wn_u1.index != 0
8432                                          && np->wn_child->wn_u2.wnode != node)
8433             {
8434                 /* The child is written elsewhere, write the reference. */
8435                 if (fd != NULL)
8436                 {
8437                     putc(BY_INDEX, fd);                 /* <byte> */
8438                                                         /* <nodeidx> */
8439                     put_bytes(fd, (long_u)np->wn_child->wn_u1.index, 3);
8440                 }
8441             }
8442             else if (np->wn_child->wn_u2.wnode == NULL)
8443                 /* We will write the child below and give it an index. */
8444                 np->wn_child->wn_u2.wnode = node;
8445
8446             if (fd != NULL)
8447                 if (putc(np->wn_byte, fd) == EOF) /* <byte> or <xbyte> */
8448                 {
8449                     EMSG(_(e_write));
8450                     return 0;
8451                 }
8452         }
8453     }
8454
8455     /* Space used in the array when reading: one for each sibling and one for
8456      * the count. */
8457     newindex += siblingcount + 1;
8458
8459     /* Recursively dump the children of each sibling. */
8460     for (np = node; np != NULL; np = np->wn_sibling)
8461         if (np->wn_byte != 0 && np->wn_child->wn_u2.wnode == node)
8462             newindex = put_node(fd, np->wn_child, newindex, regionmask,
8463                                                                   prefixtree);
8464
8465     return newindex;
8466 }
8467
8468
8469 /*
8470  * ":mkspell [-ascii] outfile  infile ..."
8471  * ":mkspell [-ascii] addfile"
8472  */
8473     void
8474 ex_mkspell(eap)
8475     exarg_T *eap;
8476 {
8477     int         fcount;
8478     char_u      **fnames;
8479     char_u      *arg = eap->arg;
8480     int         ascii = FALSE;
8481
8482     if (STRNCMP(arg, "-ascii", 6) == 0)
8483     {
8484         ascii = TRUE;
8485         arg = skipwhite(arg + 6);
8486     }
8487
8488     /* Expand all the remaining arguments (e.g., $VIMRUNTIME). */
8489     if (get_arglist_exp(arg, &fcount, &fnames) == OK)
8490     {
8491         mkspell(fcount, fnames, ascii, eap->forceit, FALSE);
8492         FreeWild(fcount, fnames);
8493     }
8494 }
8495
8496 /*
8497  * Create the .sug file.
8498  * Uses the soundfold info in "spin".
8499  * Writes the file with the name "wfname", with ".spl" changed to ".sug".
8500  */
8501     static void
8502 spell_make_sugfile(spin, wfname)
8503     spellinfo_T *spin;
8504     char_u      *wfname;
8505 {
8506     char_u      fname[MAXPATHL];
8507     int         len;
8508     slang_T     *slang;
8509     int         free_slang = FALSE;
8510
8511     /*
8512      * Read back the .spl file that was written.  This fills the required
8513      * info for soundfolding.  This also uses less memory than the
8514      * pointer-linked version of the trie.  And it avoids having two versions
8515      * of the code for the soundfolding stuff.
8516      * It might have been done already by spell_reload_one().
8517      */
8518     for (slang = first_lang; slang != NULL; slang = slang->sl_next)
8519         if (fullpathcmp(wfname, slang->sl_fname, FALSE) == FPC_SAME)
8520             break;
8521     if (slang == NULL)
8522     {
8523         spell_message(spin, (char_u *)_("Reading back spell file..."));
8524         slang = spell_load_file(wfname, NULL, NULL, FALSE);
8525         if (slang == NULL)
8526             return;
8527         free_slang = TRUE;
8528     }
8529
8530     /*
8531      * Clear the info in "spin" that is used.
8532      */
8533     spin->si_blocks = NULL;
8534     spin->si_blocks_cnt = 0;
8535     spin->si_compress_cnt = 0;      /* will stay at 0 all the time*/
8536     spin->si_free_count = 0;
8537     spin->si_first_free = NULL;
8538     spin->si_foldwcount = 0;
8539
8540     /*
8541      * Go through the trie of good words, soundfold each word and add it to
8542      * the soundfold trie.
8543      */
8544     spell_message(spin, (char_u *)_("Performing soundfolding..."));
8545     if (sug_filltree(spin, slang) == FAIL)
8546         goto theend;
8547
8548     /*
8549      * Create the table which links each soundfold word with a list of the
8550      * good words it may come from.  Creates buffer "spin->si_spellbuf".
8551      * This also removes the wordnr from the NUL byte entries to make
8552      * compression possible.
8553      */
8554     if (sug_maketable(spin) == FAIL)
8555         goto theend;
8556
8557     smsg((char_u *)_("Number of words after soundfolding: %ld"),
8558                                  (long)spin->si_spellbuf->b_ml.ml_line_count);
8559
8560     /*
8561      * Compress the soundfold trie.
8562      */
8563     spell_message(spin, (char_u *)_(msg_compressing));
8564     wordtree_compress(spin, spin->si_foldroot);
8565
8566     /*
8567      * Write the .sug file.
8568      * Make the file name by changing ".spl" to ".sug".
8569      */
8570     STRCPY(fname, wfname);
8571     len = (int)STRLEN(fname);
8572     fname[len - 2] = 'u';
8573     fname[len - 1] = 'g';
8574     sug_write(spin, fname);
8575
8576 theend:
8577     if (free_slang)
8578         slang_free(slang);
8579     free_blocks(spin->si_blocks);
8580     close_spellbuf(spin->si_spellbuf);
8581 }
8582
8583 /*
8584  * Build the soundfold trie for language "slang".
8585  */
8586     static int
8587 sug_filltree(spin, slang)
8588     spellinfo_T *spin;
8589     slang_T     *slang;
8590 {
8591     char_u      *byts;
8592     idx_T       *idxs;
8593     int         depth;
8594     idx_T       arridx[MAXWLEN];
8595     int         curi[MAXWLEN];
8596     char_u      tword[MAXWLEN];
8597     char_u      tsalword[MAXWLEN];
8598     int         c;
8599     idx_T       n;
8600     unsigned    words_done = 0;
8601     int         wordcount[MAXWLEN];
8602
8603     /* We use si_foldroot for the souldfolded trie. */
8604     spin->si_foldroot = wordtree_alloc(spin);
8605     if (spin->si_foldroot == NULL)
8606         return FAIL;
8607
8608     /* let tree_add_word() know we're adding to the soundfolded tree */
8609     spin->si_sugtree = TRUE;
8610
8611     /*
8612      * Go through the whole case-folded tree, soundfold each word and put it
8613      * in the trie.
8614      */
8615     byts = slang->sl_fbyts;
8616     idxs = slang->sl_fidxs;
8617
8618     arridx[0] = 0;
8619     curi[0] = 1;
8620     wordcount[0] = 0;
8621
8622     depth = 0;
8623     while (depth >= 0 && !got_int)
8624     {
8625         if (curi[depth] > byts[arridx[depth]])
8626         {
8627             /* Done all bytes at this node, go up one level. */
8628             idxs[arridx[depth]] = wordcount[depth];
8629             if (depth > 0)
8630                 wordcount[depth - 1] += wordcount[depth];
8631
8632             --depth;
8633             line_breakcheck();
8634         }
8635         else
8636         {
8637
8638             /* Do one more byte at this node. */
8639             n = arridx[depth] + curi[depth];
8640             ++curi[depth];
8641
8642             c = byts[n];
8643             if (c == 0)
8644             {
8645                 /* Sound-fold the word. */
8646                 tword[depth] = NUL;
8647                 spell_soundfold(slang, tword, TRUE, tsalword);
8648
8649                 /* We use the "flags" field for the MSB of the wordnr,
8650                  * "region" for the LSB of the wordnr.  */
8651                 if (tree_add_word(spin, tsalword, spin->si_foldroot,
8652                                 words_done >> 16, words_done & 0xffff,
8653                                                            0) == FAIL)
8654                     return FAIL;
8655
8656                 ++words_done;
8657                 ++wordcount[depth];
8658
8659                 /* Reset the block count each time to avoid compression
8660                  * kicking in. */
8661                 spin->si_blocks_cnt = 0;
8662
8663                 /* Skip over any other NUL bytes (same word with different
8664                  * flags). */
8665                 while (byts[n + 1] == 0)
8666                 {
8667                     ++n;
8668                     ++curi[depth];
8669                 }
8670             }
8671             else
8672             {
8673                 /* Normal char, go one level deeper. */
8674                 tword[depth++] = c;
8675                 arridx[depth] = idxs[n];
8676                 curi[depth] = 1;
8677                 wordcount[depth] = 0;
8678             }
8679         }
8680     }
8681
8682     smsg((char_u *)_("Total number of words: %d"), words_done);
8683
8684     return OK;
8685 }
8686
8687 /*
8688  * Make the table that links each word in the soundfold trie to the words it
8689  * can be produced from.
8690  * This is not unlike lines in a file, thus use a memfile to be able to access
8691  * the table efficiently.
8692  * Returns FAIL when out of memory.
8693  */
8694     static int
8695 sug_maketable(spin)
8696     spellinfo_T *spin;
8697 {
8698     garray_T    ga;
8699     int         res = OK;
8700
8701     /* Allocate a buffer, open a memline for it and create the swap file
8702      * (uses a temp file, not a .swp file). */
8703     spin->si_spellbuf = open_spellbuf();
8704     if (spin->si_spellbuf == NULL)
8705         return FAIL;
8706
8707     /* Use a buffer to store the line info, avoids allocating many small
8708      * pieces of memory. */
8709     ga_init2(&ga, 1, 100);
8710
8711     /* recursively go through the tree */
8712     if (sug_filltable(spin, spin->si_foldroot->wn_sibling, 0, &ga) == -1)
8713         res = FAIL;
8714
8715     ga_clear(&ga);
8716     return res;
8717 }
8718
8719 /*
8720  * Fill the table for one node and its children.
8721  * Returns the wordnr at the start of the node.
8722  * Returns -1 when out of memory.
8723  */
8724     static int
8725 sug_filltable(spin, node, startwordnr, gap)
8726     spellinfo_T *spin;
8727     wordnode_T  *node;
8728     int         startwordnr;
8729     garray_T    *gap;       /* place to store line of numbers */
8730 {
8731     wordnode_T  *p, *np;
8732     int         wordnr = startwordnr;
8733     int         nr;
8734     int         prev_nr;
8735
8736     for (p = node; p != NULL; p = p->wn_sibling)
8737     {
8738         if (p->wn_byte == NUL)
8739         {
8740             gap->ga_len = 0;
8741             prev_nr = 0;
8742             for (np = p; np != NULL && np->wn_byte == NUL; np = np->wn_sibling)
8743             {
8744                 if (ga_grow(gap, 10) == FAIL)
8745                     return -1;
8746
8747                 nr = (np->wn_flags << 16) + (np->wn_region & 0xffff);
8748                 /* Compute the offset from the previous nr and store the
8749                  * offset in a way that it takes a minimum number of bytes.
8750                  * It's a bit like utf-8, but without the need to mark
8751                  * following bytes. */
8752                 nr -= prev_nr;
8753                 prev_nr += nr;
8754                 gap->ga_len += offset2bytes(nr,
8755                                          (char_u *)gap->ga_data + gap->ga_len);
8756             }
8757
8758             /* add the NUL byte */
8759             ((char_u *)gap->ga_data)[gap->ga_len++] = NUL;
8760
8761             if (ml_append_buf(spin->si_spellbuf, (linenr_T)wordnr,
8762                                      gap->ga_data, gap->ga_len, TRUE) == FAIL)
8763                 return -1;
8764             ++wordnr;
8765
8766             /* Remove extra NUL entries, we no longer need them. We don't
8767              * bother freeing the nodes, the won't be reused anyway. */
8768             while (p->wn_sibling != NULL && p->wn_sibling->wn_byte == NUL)
8769                 p->wn_sibling = p->wn_sibling->wn_sibling;
8770
8771             /* Clear the flags on the remaining NUL node, so that compression
8772              * works a lot better. */
8773             p->wn_flags = 0;
8774             p->wn_region = 0;
8775         }
8776         else
8777         {
8778             wordnr = sug_filltable(spin, p->wn_child, wordnr, gap);
8779             if (wordnr == -1)
8780                 return -1;
8781         }
8782     }
8783     return wordnr;
8784 }
8785
8786 /*
8787  * Convert an offset into a minimal number of bytes.
8788  * Similar to utf_char2byters, but use 8 bits in followup bytes and avoid NUL
8789  * bytes.
8790  */
8791     static int
8792 offset2bytes(nr, buf)
8793     int     nr;
8794     char_u  *buf;
8795 {
8796     int     rem;
8797     int     b1, b2, b3, b4;
8798
8799     /* Split the number in parts of base 255.  We need to avoid NUL bytes. */
8800     b1 = nr % 255 + 1;
8801     rem = nr / 255;
8802     b2 = rem % 255 + 1;
8803     rem = rem / 255;
8804     b3 = rem % 255 + 1;
8805     b4 = rem / 255 + 1;
8806
8807     if (b4 > 1 || b3 > 0x1f)    /* 4 bytes */
8808     {
8809         buf[0] = 0xe0 + b4;
8810         buf[1] = b3;
8811         buf[2] = b2;
8812         buf[3] = b1;
8813         return 4;
8814     }
8815     if (b3 > 1 || b2 > 0x3f )   /* 3 bytes */
8816     {
8817         buf[0] = 0xc0 + b3;
8818         buf[1] = b2;
8819         buf[2] = b1;
8820         return 3;
8821     }
8822     if (b2 > 1 || b1 > 0x7f )   /* 2 bytes */
8823     {
8824         buf[0] = 0x80 + b2;
8825         buf[1] = b1;
8826         return 2;
8827     }
8828                                 /* 1 byte */
8829     buf[0] = b1;
8830     return 1;
8831 }
8832
8833 /*
8834  * Opposite of offset2bytes().
8835  * "pp" points to the bytes and is advanced over it.
8836  * Returns the offset.
8837  */
8838     static int
8839 bytes2offset(pp)
8840     char_u      **pp;
8841 {
8842     char_u      *p = *pp;
8843     int         nr;
8844     int         c;
8845
8846     c = *p++;
8847     if ((c & 0x80) == 0x00)             /* 1 byte */
8848     {
8849         nr = c - 1;
8850     }
8851     else if ((c & 0xc0) == 0x80)        /* 2 bytes */
8852     {
8853         nr = (c & 0x3f) - 1;
8854         nr = nr * 255 + (*p++ - 1);
8855     }
8856     else if ((c & 0xe0) == 0xc0)        /* 3 bytes */
8857     {
8858         nr = (c & 0x1f) - 1;
8859         nr = nr * 255 + (*p++ - 1);
8860         nr = nr * 255 + (*p++ - 1);
8861     }
8862     else                                /* 4 bytes */
8863     {
8864         nr = (c & 0x0f) - 1;
8865         nr = nr * 255 + (*p++ - 1);
8866         nr = nr * 255 + (*p++ - 1);
8867         nr = nr * 255 + (*p++ - 1);
8868     }
8869
8870     *pp = p;
8871     return nr;
8872 }
8873
8874 /*
8875  * Write the .sug file in "fname".
8876  */
8877     static void
8878 sug_write(spin, fname)
8879     spellinfo_T *spin;
8880     char_u      *fname;
8881 {
8882     FILE        *fd;
8883     wordnode_T  *tree;
8884     int         nodecount;
8885     int         wcount;
8886     char_u      *line;
8887     linenr_T    lnum;
8888     int         len;
8889
8890     /* Create the file.  Note that an existing file is silently overwritten! */
8891     fd = mch_fopen((char *)fname, "w");
8892     if (fd == NULL)
8893     {
8894         EMSG2(_(e_notopen), fname);
8895         return;
8896     }
8897
8898     vim_snprintf((char *)IObuff, IOSIZE,
8899                                   _("Writing suggestion file %s ..."), fname);
8900     spell_message(spin, IObuff);
8901
8902     /*
8903      * <SUGHEADER>: <fileID> <versionnr> <timestamp>
8904      */
8905     if (fwrite(VIMSUGMAGIC, VIMSUGMAGICL, (size_t)1, fd) != 1) /* <fileID> */
8906     {
8907         EMSG(_(e_write));
8908         goto theend;
8909     }
8910     putc(VIMSUGVERSION, fd);                            /* <versionnr> */
8911
8912     /* Write si_sugtime to the file. */
8913     put_sugtime(spin, fd);                              /* <timestamp> */
8914
8915     /*
8916      * <SUGWORDTREE>
8917      */
8918     spin->si_memtot = 0;
8919     tree = spin->si_foldroot->wn_sibling;
8920
8921     /* Clear the index and wnode fields in the tree. */
8922     clear_node(tree);
8923
8924     /* Count the number of nodes.  Needed to be able to allocate the
8925      * memory when reading the nodes.  Also fills in index for shared
8926      * nodes. */
8927     nodecount = put_node(NULL, tree, 0, 0, FALSE);
8928
8929     /* number of nodes in 4 bytes */
8930     put_bytes(fd, (long_u)nodecount, 4);        /* <nodecount> */
8931     spin->si_memtot += nodecount + nodecount * sizeof(int);
8932
8933     /* Write the nodes. */
8934     (void)put_node(fd, tree, 0, 0, FALSE);
8935
8936     /*
8937      * <SUGTABLE>: <sugwcount> <sugline> ...
8938      */
8939     wcount = spin->si_spellbuf->b_ml.ml_line_count;
8940     put_bytes(fd, (long_u)wcount, 4);   /* <sugwcount> */
8941
8942     for (lnum = 1; lnum <= (linenr_T)wcount; ++lnum)
8943     {
8944         /* <sugline>: <sugnr> ... NUL */
8945         line = ml_get_buf(spin->si_spellbuf, lnum, FALSE);
8946         len = (int)STRLEN(line) + 1;
8947         if (fwrite(line, (size_t)len, (size_t)1, fd) == 0)
8948         {
8949             EMSG(_(e_write));
8950             goto theend;
8951         }
8952         spin->si_memtot += len;
8953     }
8954
8955     /* Write another byte to check for errors. */
8956     if (putc(0, fd) == EOF)
8957         EMSG(_(e_write));
8958
8959     vim_snprintf((char *)IObuff, IOSIZE,
8960                  _("Estimated runtime memory use: %d bytes"), spin->si_memtot);
8961     spell_message(spin, IObuff);
8962
8963 theend:
8964     /* close the file */
8965     fclose(fd);
8966 }
8967
8968 /*
8969  * Open a spell buffer.  This is a nameless buffer that is not in the buffer
8970  * list and only contains text lines.  Can use a swapfile to reduce memory
8971  * use.
8972  * Most other fields are invalid!  Esp. watch out for string options being
8973  * NULL and there is no undo info.
8974  * Returns NULL when out of memory.
8975  */
8976     static buf_T *
8977 open_spellbuf()
8978 {
8979     buf_T       *buf;
8980
8981     buf = (buf_T *)alloc_clear(sizeof(buf_T));
8982     if (buf != NULL)
8983     {
8984         buf->b_spell = TRUE;
8985         buf->b_p_swf = TRUE;    /* may create a swap file */
8986         ml_open(buf);
8987         ml_open_file(buf);      /* create swap file now */
8988     }
8989     return buf;
8990 }
8991
8992 /*
8993  * Close the buffer used for spell info.
8994  */
8995     static void
8996 close_spellbuf(buf)
8997     buf_T       *buf;
8998 {
8999     if (buf != NULL)
9000     {
9001         ml_close(buf, TRUE);
9002         vim_free(buf);
9003     }
9004 }
9005
9006
9007 /*
9008  * Create a Vim spell file from one or more word lists.
9009  * "fnames[0]" is the output file name.
9010  * "fnames[fcount - 1]" is the last input file name.
9011  * Exception: when "fnames[0]" ends in ".add" it's used as the input file name
9012  * and ".spl" is appended to make the output file name.
9013  */
9014     static void
9015 mkspell(fcount, fnames, ascii, overwrite, added_word)
9016     int         fcount;
9017     char_u      **fnames;
9018     int         ascii;              /* -ascii argument given */
9019     int         overwrite;          /* overwrite existing output file */
9020     int         added_word;         /* invoked through "zg" */
9021 {
9022     char_u      fname[MAXPATHL];
9023     char_u      wfname[MAXPATHL];
9024     char_u      **innames;
9025     int         incount;
9026     afffile_T   *(afile[8]);
9027     int         i;
9028     int         len;
9029     struct stat st;
9030     int         error = FALSE;
9031     spellinfo_T spin;
9032
9033     vim_memset(&spin, 0, sizeof(spin));
9034     spin.si_verbose = !added_word;
9035     spin.si_ascii = ascii;
9036     spin.si_followup = TRUE;
9037     spin.si_rem_accents = TRUE;
9038     ga_init2(&spin.si_rep, (int)sizeof(fromto_T), 20);
9039     ga_init2(&spin.si_repsal, (int)sizeof(fromto_T), 20);
9040     ga_init2(&spin.si_sal, (int)sizeof(fromto_T), 20);
9041     ga_init2(&spin.si_map, (int)sizeof(char_u), 100);
9042     ga_init2(&spin.si_comppat, (int)sizeof(char_u *), 20);
9043     ga_init2(&spin.si_prefcond, (int)sizeof(char_u *), 50);
9044     hash_init(&spin.si_commonwords);
9045     spin.si_newcompID = 127;    /* start compound ID at first maximum */
9046
9047     /* default: fnames[0] is output file, following are input files */
9048     innames = &fnames[1];
9049     incount = fcount - 1;
9050
9051     if (fcount >= 1)
9052     {
9053         len = (int)STRLEN(fnames[0]);
9054         if (fcount == 1 && len > 4 && STRCMP(fnames[0] + len - 4, ".add") == 0)
9055         {
9056             /* For ":mkspell path/en.latin1.add" output file is
9057              * "path/en.latin1.add.spl". */
9058             innames = &fnames[0];
9059             incount = 1;
9060             vim_snprintf((char *)wfname, sizeof(wfname), "%s.spl", fnames[0]);
9061         }
9062         else if (fcount == 1)
9063         {
9064             /* For ":mkspell path/vim" output file is "path/vim.latin1.spl". */
9065             innames = &fnames[0];
9066             incount = 1;
9067             vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
9068                              spin.si_ascii ? (char_u *)"ascii" : spell_enc());
9069         }
9070         else if (len > 4 && STRCMP(fnames[0] + len - 4, ".spl") == 0)
9071         {
9072             /* Name ends in ".spl", use as the file name. */
9073             vim_strncpy(wfname, fnames[0], sizeof(wfname) - 1);
9074         }
9075         else
9076             /* Name should be language, make the file name from it. */
9077             vim_snprintf((char *)wfname, sizeof(wfname), "%s.%s.spl", fnames[0],
9078                              spin.si_ascii ? (char_u *)"ascii" : spell_enc());
9079
9080         /* Check for .ascii.spl. */
9081         if (strstr((char *)gettail(wfname), ".ascii.") != NULL)
9082             spin.si_ascii = TRUE;
9083
9084         /* Check for .add.spl. */
9085         if (strstr((char *)gettail(wfname), ".add.") != NULL)
9086             spin.si_add = TRUE;
9087     }
9088
9089     if (incount <= 0)
9090         EMSG(_(e_invarg));      /* need at least output and input names */
9091     else if (vim_strchr(gettail(wfname), '_') != NULL)
9092         EMSG(_("E751: Output file name must not have region name"));
9093     else if (incount > 8)
9094         EMSG(_("E754: Only up to 8 regions supported"));
9095     else
9096     {
9097         /* Check for overwriting before doing things that may take a lot of
9098          * time. */
9099         if (!overwrite && mch_stat((char *)wfname, &st) >= 0)
9100         {
9101             EMSG(_(e_exists));
9102             return;
9103         }
9104         if (mch_isdir(wfname))
9105         {
9106             EMSG2(_(e_isadir2), wfname);
9107             return;
9108         }
9109
9110         /*
9111          * Init the aff and dic pointers.
9112          * Get the region names if there are more than 2 arguments.
9113          */
9114         for (i = 0; i < incount; ++i)
9115         {
9116             afile[i] = NULL;
9117
9118             if (incount > 1)
9119             {
9120                 len = (int)STRLEN(innames[i]);
9121                 if (STRLEN(gettail(innames[i])) < 5
9122                                                 || innames[i][len - 3] != '_')
9123                 {
9124                     EMSG2(_("E755: Invalid region in %s"), innames[i]);
9125                     return;
9126                 }
9127                 spin.si_region_name[i * 2] = TOLOWER_ASC(innames[i][len - 2]);
9128                 spin.si_region_name[i * 2 + 1] =
9129                                              TOLOWER_ASC(innames[i][len - 1]);
9130             }
9131         }
9132         spin.si_region_count = incount;
9133
9134         spin.si_foldroot = wordtree_alloc(&spin);
9135         spin.si_keeproot = wordtree_alloc(&spin);
9136         spin.si_prefroot = wordtree_alloc(&spin);
9137         if (spin.si_foldroot == NULL
9138                 || spin.si_keeproot == NULL
9139                 || spin.si_prefroot == NULL)
9140         {
9141             free_blocks(spin.si_blocks);
9142             return;
9143         }
9144
9145         /* When not producing a .add.spl file clear the character table when
9146          * we encounter one in the .aff file.  This means we dump the current
9147          * one in the .spl file if the .aff file doesn't define one.  That's
9148          * better than guessing the contents, the table will match a
9149          * previously loaded spell file. */
9150         if (!spin.si_add)
9151             spin.si_clear_chartab = TRUE;
9152
9153         /*
9154          * Read all the .aff and .dic files.
9155          * Text is converted to 'encoding'.
9156          * Words are stored in the case-folded and keep-case trees.
9157          */
9158         for (i = 0; i < incount && !error; ++i)
9159         {
9160             spin.si_conv.vc_type = CONV_NONE;
9161             spin.si_region = 1 << i;
9162
9163             vim_snprintf((char *)fname, sizeof(fname), "%s.aff", innames[i]);
9164             if (mch_stat((char *)fname, &st) >= 0)
9165             {
9166                 /* Read the .aff file.  Will init "spin->si_conv" based on the
9167                  * "SET" line. */
9168                 afile[i] = spell_read_aff(&spin, fname);
9169                 if (afile[i] == NULL)
9170                     error = TRUE;
9171                 else
9172                 {
9173                     /* Read the .dic file and store the words in the trees. */
9174                     vim_snprintf((char *)fname, sizeof(fname), "%s.dic",
9175                                                                   innames[i]);
9176                     if (spell_read_dic(&spin, fname, afile[i]) == FAIL)
9177                         error = TRUE;
9178                 }
9179             }
9180             else
9181             {
9182                 /* No .aff file, try reading the file as a word list.  Store
9183                  * the words in the trees. */
9184                 if (spell_read_wordfile(&spin, innames[i]) == FAIL)
9185                     error = TRUE;
9186             }
9187
9188 #ifdef FEAT_MBYTE
9189             /* Free any conversion stuff. */
9190             convert_setup(&spin.si_conv, NULL, NULL);
9191 #endif
9192         }
9193
9194         if (spin.si_compflags != NULL && spin.si_nobreak)
9195             MSG(_("Warning: both compounding and NOBREAK specified"));
9196
9197         if (!error && !got_int)
9198         {
9199             /*
9200              * Combine tails in the tree.
9201              */
9202             spell_message(&spin, (char_u *)_(msg_compressing));
9203             wordtree_compress(&spin, spin.si_foldroot);
9204             wordtree_compress(&spin, spin.si_keeproot);
9205             wordtree_compress(&spin, spin.si_prefroot);
9206         }
9207
9208         if (!error && !got_int)
9209         {
9210             /*
9211              * Write the info in the spell file.
9212              */
9213             vim_snprintf((char *)IObuff, IOSIZE,
9214                                       _("Writing spell file %s ..."), wfname);
9215             spell_message(&spin, IObuff);
9216
9217             error = write_vim_spell(&spin, wfname) == FAIL;
9218
9219             spell_message(&spin, (char_u *)_("Done!"));
9220             vim_snprintf((char *)IObuff, IOSIZE,
9221                  _("Estimated runtime memory use: %d bytes"), spin.si_memtot);
9222             spell_message(&spin, IObuff);
9223
9224             /*
9225              * If the file is loaded need to reload it.
9226              */
9227             if (!error)
9228                 spell_reload_one(wfname, added_word);
9229         }
9230
9231         /* Free the allocated memory. */
9232         ga_clear(&spin.si_rep);
9233         ga_clear(&spin.si_repsal);
9234         ga_clear(&spin.si_sal);
9235         ga_clear(&spin.si_map);
9236         ga_clear(&spin.si_comppat);
9237         ga_clear(&spin.si_prefcond);
9238         hash_clear_all(&spin.si_commonwords, 0);
9239
9240         /* Free the .aff file structures. */
9241         for (i = 0; i < incount; ++i)
9242             if (afile[i] != NULL)
9243                 spell_free_aff(afile[i]);
9244
9245         /* Free all the bits and pieces at once. */
9246         free_blocks(spin.si_blocks);
9247
9248         /*
9249          * If there is soundfolding info and no NOSUGFILE item create the
9250          * .sug file with the soundfolded word trie.
9251          */
9252         if (spin.si_sugtime != 0 && !error && !got_int)
9253             spell_make_sugfile(&spin, wfname);
9254
9255     }
9256 }
9257
9258 /*
9259  * Display a message for spell file processing when 'verbose' is set or using
9260  * ":mkspell".  "str" can be IObuff.
9261  */
9262     static void
9263 spell_message(spin, str)
9264     spellinfo_T *spin;
9265     char_u      *str;
9266 {
9267     if (spin->si_verbose || p_verbose > 2)
9268     {
9269         if (!spin->si_verbose)
9270             verbose_enter();
9271         MSG(str);
9272         out_flush();
9273         if (!spin->si_verbose)
9274             verbose_leave();
9275     }
9276 }
9277
9278 /*
9279  * ":[count]spellgood  {word}"
9280  * ":[count]spellwrong  {word}"
9281  * ":[count]spellundo  {word}"
9282  */
9283     void
9284 ex_spell(eap)
9285     exarg_T *eap;
9286 {
9287     spell_add_word(eap->arg, (int)STRLEN(eap->arg), eap->cmdidx == CMD_spellwrong,
9288                                    eap->forceit ? 0 : (int)eap->line2,
9289                                    eap->cmdidx == CMD_spellundo);
9290 }
9291
9292 /*
9293  * Add "word[len]" to 'spellfile' as a good or bad word.
9294  */
9295     void
9296 spell_add_word(word, len, bad, idx, undo)
9297     char_u      *word;
9298     int         len;
9299     int         bad;
9300     int         idx;        /* "zG" and "zW": zero, otherwise index in
9301                                'spellfile' */
9302     int         undo;       /* TRUE for "zug", "zuG", "zuw" and "zuW" */
9303 {
9304     FILE        *fd = NULL;
9305     buf_T       *buf = NULL;
9306     int         new_spf = FALSE;
9307     char_u      *fname;
9308     char_u      fnamebuf[MAXPATHL];
9309     char_u      line[MAXWLEN * 2];
9310     long        fpos, fpos_next = 0;
9311     int         i;
9312     char_u      *spf;
9313
9314     if (idx == 0)           /* use internal wordlist */
9315     {
9316         if (int_wordlist == NULL)
9317         {
9318             int_wordlist = vim_tempname('s');
9319             if (int_wordlist == NULL)
9320                 return;
9321         }
9322         fname = int_wordlist;
9323     }
9324     else
9325     {
9326         /* If 'spellfile' isn't set figure out a good default value. */
9327         if (*curbuf->b_p_spf == NUL)
9328         {
9329             init_spellfile();
9330             new_spf = TRUE;
9331         }
9332
9333         if (*curbuf->b_p_spf == NUL)
9334         {
9335             EMSG2(_(e_notset), "spellfile");
9336             return;
9337         }
9338
9339         for (spf = curbuf->b_p_spf, i = 1; *spf != NUL; ++i)
9340         {
9341             copy_option_part(&spf, fnamebuf, MAXPATHL, ",");
9342             if (i == idx)
9343                 break;
9344             if (*spf == NUL)
9345             {
9346                 EMSGN(_("E765: 'spellfile' does not have %ld entries"), idx);
9347                 return;
9348             }
9349         }
9350
9351         /* Check that the user isn't editing the .add file somewhere. */
9352         buf = buflist_findname_exp(fnamebuf);
9353         if (buf != NULL && buf->b_ml.ml_mfp == NULL)
9354             buf = NULL;
9355         if (buf != NULL && bufIsChanged(buf))
9356         {
9357             EMSG(_(e_bufloaded));
9358             return;
9359         }
9360
9361         fname = fnamebuf;
9362     }
9363
9364     if (bad || undo)
9365     {
9366         /* When the word appears as good word we need to remove that one,
9367          * since its flags sort before the one with WF_BANNED. */
9368         fd = mch_fopen((char *)fname, "r");
9369         if (fd != NULL)
9370         {
9371             while (!vim_fgets(line, MAXWLEN * 2, fd))
9372             {
9373                 fpos = fpos_next;
9374                 fpos_next = ftell(fd);
9375                 if (STRNCMP(word, line, len) == 0
9376                         && (line[len] == '/' || line[len] < ' '))
9377                 {
9378                     /* Found duplicate word.  Remove it by writing a '#' at
9379                      * the start of the line.  Mixing reading and writing
9380                      * doesn't work for all systems, close the file first. */
9381                     fclose(fd);
9382                     fd = mch_fopen((char *)fname, "r+");
9383                     if (fd == NULL)
9384                         break;
9385                     if (fseek(fd, fpos, SEEK_SET) == 0)
9386                     {
9387                         fputc('#', fd);
9388                         if (undo)
9389                         {
9390                             home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
9391                             smsg((char_u *)_("Word removed from %s"), NameBuff);
9392                         }
9393                     }
9394                     fseek(fd, fpos_next, SEEK_SET);
9395                 }
9396             }
9397             fclose(fd);
9398         }
9399     }
9400
9401     if (!undo)
9402     {
9403         fd = mch_fopen((char *)fname, "a");
9404         if (fd == NULL && new_spf)
9405         {
9406             char_u *p;
9407
9408             /* We just initialized the 'spellfile' option and can't open the
9409              * file.  We may need to create the "spell" directory first.  We
9410              * already checked the runtime directory is writable in
9411              * init_spellfile(). */
9412             if (!dir_of_file_exists(fname) && (p = gettail_sep(fname)) != fname)
9413             {
9414                 int c = *p;
9415
9416                 /* The directory doesn't exist.  Try creating it and opening
9417                  * the file again. */
9418                 *p = NUL;
9419                 vim_mkdir(fname, 0755);
9420                 *p = c;
9421                 fd = mch_fopen((char *)fname, "a");
9422             }
9423         }
9424
9425         if (fd == NULL)
9426             EMSG2(_(e_notopen), fname);
9427         else
9428         {
9429             if (bad)
9430                 fprintf(fd, "%.*s/!\n", len, word);
9431             else
9432                 fprintf(fd, "%.*s\n", len, word);
9433             fclose(fd);
9434
9435             home_replace(NULL, fname, NameBuff, MAXPATHL, TRUE);
9436             smsg((char_u *)_("Word added to %s"), NameBuff);
9437         }
9438     }
9439
9440     if (fd != NULL)
9441     {
9442         /* Update the .add.spl file. */
9443         mkspell(1, &fname, FALSE, TRUE, TRUE);
9444
9445         /* If the .add file is edited somewhere, reload it. */
9446         if (buf != NULL)
9447             buf_reload(buf, buf->b_orig_mode);
9448
9449         redraw_all_later(SOME_VALID);
9450     }
9451 }
9452
9453 /*
9454  * Initialize 'spellfile' for the current buffer.
9455  */
9456     static void
9457 init_spellfile()
9458 {
9459     char_u      buf[MAXPATHL];
9460     int         l;
9461     char_u      *fname;
9462     char_u      *rtp;
9463     char_u      *lend;
9464     int         aspath = FALSE;
9465     char_u      *lstart = curbuf->b_p_spl;
9466
9467     if (*curbuf->b_p_spl != NUL && curbuf->b_langp.ga_len > 0)
9468     {
9469         /* Find the end of the language name.  Exclude the region.  If there
9470          * is a path separator remember the start of the tail. */
9471         for (lend = curbuf->b_p_spl; *lend != NUL
9472                         && vim_strchr((char_u *)",._", *lend) == NULL; ++lend)
9473             if (vim_ispathsep(*lend))
9474             {
9475                 aspath = TRUE;
9476                 lstart = lend + 1;
9477             }
9478
9479         /* Loop over all entries in 'runtimepath'.  Use the first one where we
9480          * are allowed to write. */
9481         rtp = p_rtp;
9482         while (*rtp != NUL)
9483         {
9484             if (aspath)
9485                 /* Use directory of an entry with path, e.g., for
9486                  * "/dir/lg.utf-8.spl" use "/dir". */
9487                 vim_strncpy(buf, curbuf->b_p_spl, lstart - curbuf->b_p_spl - 1);
9488             else
9489                 /* Copy the path from 'runtimepath' to buf[]. */
9490                 copy_option_part(&rtp, buf, MAXPATHL, ",");
9491             if (filewritable(buf) == 2)
9492             {
9493                 /* Use the first language name from 'spelllang' and the
9494                  * encoding used in the first loaded .spl file. */
9495                 if (aspath)
9496                     vim_strncpy(buf, curbuf->b_p_spl, lend - curbuf->b_p_spl);
9497                 else
9498                 {
9499                     /* Create the "spell" directory if it doesn't exist yet. */
9500                     l = (int)STRLEN(buf);
9501                     vim_snprintf((char *)buf + l, MAXPATHL - l, "/spell");
9502                     if (!filewritable(buf) != 2)
9503                         vim_mkdir(buf, 0755);
9504
9505                     l = (int)STRLEN(buf);
9506                     vim_snprintf((char *)buf + l, MAXPATHL - l,
9507                                  "/%.*s", (int)(lend - lstart), lstart);
9508                 }
9509                 l = (int)STRLEN(buf);
9510                 fname = LANGP_ENTRY(curbuf->b_langp, 0)->lp_slang->sl_fname;
9511                 vim_snprintf((char *)buf + l, MAXPATHL - l, ".%s.add",
9512                         fname != NULL
9513                           && strstr((char *)gettail(fname), ".ascii.") != NULL
9514                                        ? (char_u *)"ascii" : spell_enc());
9515                 set_option_value((char_u *)"spellfile", 0L, buf, OPT_LOCAL);
9516                 break;
9517             }
9518             aspath = FALSE;
9519         }
9520     }
9521 }
9522
9523
9524 /*
9525  * Init the chartab used for spelling for ASCII.
9526  * EBCDIC is not supported!
9527  */
9528     static void
9529 clear_spell_chartab(sp)
9530     spelltab_T  *sp;
9531 {
9532     int         i;
9533
9534     /* Init everything to FALSE. */
9535     vim_memset(sp->st_isw, FALSE, sizeof(sp->st_isw));
9536     vim_memset(sp->st_isu, FALSE, sizeof(sp->st_isu));
9537     for (i = 0; i < 256; ++i)
9538     {
9539         sp->st_fold[i] = i;
9540         sp->st_upper[i] = i;
9541     }
9542
9543     /* We include digits.  A word shouldn't start with a digit, but handling
9544      * that is done separately. */
9545     for (i = '0'; i <= '9'; ++i)
9546         sp->st_isw[i] = TRUE;
9547     for (i = 'A'; i <= 'Z'; ++i)
9548     {
9549         sp->st_isw[i] = TRUE;
9550         sp->st_isu[i] = TRUE;
9551         sp->st_fold[i] = i + 0x20;
9552     }
9553     for (i = 'a'; i <= 'z'; ++i)
9554     {
9555         sp->st_isw[i] = TRUE;
9556         sp->st_upper[i] = i - 0x20;
9557     }
9558 }
9559
9560 /*
9561  * Init the chartab used for spelling.  Only depends on 'encoding'.
9562  * Called once while starting up and when 'encoding' changes.
9563  * The default is to use isalpha(), but the spell file should define the word
9564  * characters to make it possible that 'encoding' differs from the current
9565  * locale.  For utf-8 we don't use isalpha() but our own functions.
9566  */
9567     void
9568 init_spell_chartab()
9569 {
9570     int     i;
9571
9572     did_set_spelltab = FALSE;
9573     clear_spell_chartab(&spelltab);
9574 #ifdef FEAT_MBYTE
9575     if (enc_dbcs)
9576     {
9577         /* DBCS: assume double-wide characters are word characters. */
9578         for (i = 128; i <= 255; ++i)
9579             if (MB_BYTE2LEN(i) == 2)
9580                 spelltab.st_isw[i] = TRUE;
9581     }
9582     else if (enc_utf8)
9583     {
9584         for (i = 128; i < 256; ++i)
9585         {
9586             spelltab.st_isu[i] = utf_isupper(i);
9587             spelltab.st_isw[i] = spelltab.st_isu[i] || utf_islower(i);
9588             spelltab.st_fold[i] = utf_fold(i);
9589             spelltab.st_upper[i] = utf_toupper(i);
9590         }
9591     }
9592     else
9593 #endif
9594     {
9595         /* Rough guess: use locale-dependent library functions. */
9596         for (i = 128; i < 256; ++i)
9597         {
9598             if (MB_ISUPPER(i))
9599             {
9600                 spelltab.st_isw[i] = TRUE;
9601                 spelltab.st_isu[i] = TRUE;
9602                 spelltab.st_fold[i] = MB_TOLOWER(i);
9603             }
9604             else if (MB_ISLOWER(i))
9605             {
9606                 spelltab.st_isw[i] = TRUE;
9607                 spelltab.st_upper[i] = MB_TOUPPER(i);
9608             }
9609         }
9610     }
9611 }
9612
9613 /*
9614  * Set the spell character tables from strings in the affix file.
9615  */
9616     static int
9617 set_spell_chartab(fol, low, upp)
9618     char_u      *fol;
9619     char_u      *low;
9620     char_u      *upp;
9621 {
9622     /* We build the new tables here first, so that we can compare with the
9623      * previous one. */
9624     spelltab_T  new_st;
9625     char_u      *pf = fol, *pl = low, *pu = upp;
9626     int         f, l, u;
9627
9628     clear_spell_chartab(&new_st);
9629
9630     while (*pf != NUL)
9631     {
9632         if (*pl == NUL || *pu == NUL)
9633         {
9634             EMSG(_(e_affform));
9635             return FAIL;
9636         }
9637 #ifdef FEAT_MBYTE
9638         f = mb_ptr2char_adv(&pf);
9639         l = mb_ptr2char_adv(&pl);
9640         u = mb_ptr2char_adv(&pu);
9641 #else
9642         f = *pf++;
9643         l = *pl++;
9644         u = *pu++;
9645 #endif
9646         /* Every character that appears is a word character. */
9647         if (f < 256)
9648             new_st.st_isw[f] = TRUE;
9649         if (l < 256)
9650             new_st.st_isw[l] = TRUE;
9651         if (u < 256)
9652             new_st.st_isw[u] = TRUE;
9653
9654         /* if "LOW" and "FOL" are not the same the "LOW" char needs
9655          * case-folding */
9656         if (l < 256 && l != f)
9657         {
9658             if (f >= 256)
9659             {
9660                 EMSG(_(e_affrange));
9661                 return FAIL;
9662             }
9663             new_st.st_fold[l] = f;
9664         }
9665
9666         /* if "UPP" and "FOL" are not the same the "UPP" char needs
9667          * case-folding, it's upper case and the "UPP" is the upper case of
9668          * "FOL" . */
9669         if (u < 256 && u != f)
9670         {
9671             if (f >= 256)
9672             {
9673                 EMSG(_(e_affrange));
9674                 return FAIL;
9675             }
9676             new_st.st_fold[u] = f;
9677             new_st.st_isu[u] = TRUE;
9678             new_st.st_upper[f] = u;
9679         }
9680     }
9681
9682     if (*pl != NUL || *pu != NUL)
9683     {
9684         EMSG(_(e_affform));
9685         return FAIL;
9686     }
9687
9688     return set_spell_finish(&new_st);
9689 }
9690
9691 /*
9692  * Set the spell character tables from strings in the .spl file.
9693  */
9694     static void
9695 set_spell_charflags(flags, cnt, fol)
9696     char_u      *flags;
9697     int         cnt;        /* length of "flags" */
9698     char_u      *fol;
9699 {
9700     /* We build the new tables here first, so that we can compare with the
9701      * previous one. */
9702     spelltab_T  new_st;
9703     int         i;
9704     char_u      *p = fol;
9705     int         c;
9706
9707     clear_spell_chartab(&new_st);
9708
9709     for (i = 0; i < 128; ++i)
9710     {
9711         if (i < cnt)
9712         {
9713             new_st.st_isw[i + 128] = (flags[i] & CF_WORD) != 0;
9714             new_st.st_isu[i + 128] = (flags[i] & CF_UPPER) != 0;
9715         }
9716
9717         if (*p != NUL)
9718         {
9719 #ifdef FEAT_MBYTE
9720             c = mb_ptr2char_adv(&p);
9721 #else
9722             c = *p++;
9723 #endif
9724             new_st.st_fold[i + 128] = c;
9725             if (i + 128 != c && new_st.st_isu[i + 128] && c < 256)
9726                 new_st.st_upper[c] = i + 128;
9727         }
9728     }
9729
9730     (void)set_spell_finish(&new_st);
9731 }
9732
9733     static int
9734 set_spell_finish(new_st)
9735     spelltab_T  *new_st;
9736 {
9737     int         i;
9738
9739     if (did_set_spelltab)
9740     {
9741         /* check that it's the same table */
9742         for (i = 0; i < 256; ++i)
9743         {
9744             if (spelltab.st_isw[i] != new_st->st_isw[i]
9745                     || spelltab.st_isu[i] != new_st->st_isu[i]
9746                     || spelltab.st_fold[i] != new_st->st_fold[i]
9747                     || spelltab.st_upper[i] != new_st->st_upper[i])
9748             {
9749                 EMSG(_("E763: Word characters differ between spell files"));
9750                 return FAIL;
9751             }
9752         }
9753     }
9754     else
9755     {
9756         /* copy the new spelltab into the one being used */
9757         spelltab = *new_st;
9758         did_set_spelltab = TRUE;
9759     }
9760
9761     return OK;
9762 }
9763
9764 /*
9765  * Return TRUE if "p" points to a word character.
9766  * As a special case we see "midword" characters as word character when it is
9767  * followed by a word character.  This finds they'there but not 'they there'.
9768  * Thus this only works properly when past the first character of the word.
9769  */
9770     static int
9771 spell_iswordp(p, buf)
9772     char_u      *p;
9773     buf_T       *buf;       /* buffer used */
9774 {
9775 #ifdef FEAT_MBYTE
9776     char_u      *s;
9777     int         l;
9778     int         c;
9779
9780     if (has_mbyte)
9781     {
9782         l = MB_BYTE2LEN(*p);
9783         s = p;
9784         if (l == 1)
9785         {
9786             /* be quick for ASCII */
9787             if (buf->b_spell_ismw[*p])
9788             {
9789                 s = p + 1;              /* skip a mid-word character */
9790                 l = MB_BYTE2LEN(*s);
9791             }
9792         }
9793         else
9794         {
9795             c = mb_ptr2char(p);
9796             if (c < 256 ? buf->b_spell_ismw[c]
9797                     : (buf->b_spell_ismw_mb != NULL
9798                            && vim_strchr(buf->b_spell_ismw_mb, c) != NULL))
9799             {
9800                 s = p + l;
9801                 l = MB_BYTE2LEN(*s);
9802             }
9803         }
9804
9805         c = mb_ptr2char(s);
9806         if (c > 255)
9807             return spell_mb_isword_class(mb_get_class(s));
9808         return spelltab.st_isw[c];
9809     }
9810 #endif
9811
9812     return spelltab.st_isw[buf->b_spell_ismw[*p] ? p[1] : p[0]];
9813 }
9814
9815 /*
9816  * Return TRUE if "p" points to a word character.
9817  * Unlike spell_iswordp() this doesn't check for "midword" characters.
9818  */
9819     static int
9820 spell_iswordp_nmw(p)
9821     char_u      *p;
9822 {
9823 #ifdef FEAT_MBYTE
9824     int         c;
9825
9826     if (has_mbyte)
9827     {
9828         c = mb_ptr2char(p);
9829         if (c > 255)
9830             return spell_mb_isword_class(mb_get_class(p));
9831         return spelltab.st_isw[c];
9832     }
9833 #endif
9834     return spelltab.st_isw[*p];
9835 }
9836
9837 #ifdef FEAT_MBYTE
9838 /*
9839  * Return TRUE if word class indicates a word character.
9840  * Only for characters above 255.
9841  * Unicode subscript and superscript are not considered word characters.
9842  */
9843     static int
9844 spell_mb_isword_class(cl)
9845     int cl;
9846 {
9847     return cl >= 2 && cl != 0x2070 && cl != 0x2080;
9848 }
9849
9850 /*
9851  * Return TRUE if "p" points to a word character.
9852  * Wide version of spell_iswordp().
9853  */
9854     static int
9855 spell_iswordp_w(p, buf)
9856     int         *p;
9857     buf_T       *buf;
9858 {
9859     int         *s;
9860
9861     if (*p < 256 ? buf->b_spell_ismw[*p]
9862                  : (buf->b_spell_ismw_mb != NULL
9863                              && vim_strchr(buf->b_spell_ismw_mb, *p) != NULL))
9864         s = p + 1;
9865     else
9866         s = p;
9867
9868     if (*s > 255)
9869     {
9870         if (enc_utf8)
9871             return spell_mb_isword_class(utf_class(*s));
9872         if (enc_dbcs)
9873             return dbcs_class((unsigned)*s >> 8, *s & 0xff) >= 2;
9874         return 0;
9875     }
9876     return spelltab.st_isw[*s];
9877 }
9878 #endif
9879
9880 /*
9881  * Write the table with prefix conditions to the .spl file.
9882  * When "fd" is NULL only count the length of what is written.
9883  */
9884     static int
9885 write_spell_prefcond(fd, gap)
9886     FILE        *fd;
9887     garray_T    *gap;
9888 {
9889     int         i;
9890     char_u      *p;
9891     int         len;
9892     int         totlen;
9893
9894     if (fd != NULL)
9895         put_bytes(fd, (long_u)gap->ga_len, 2);      /* <prefcondcnt> */
9896
9897     totlen = 2 + gap->ga_len; /* length of <prefcondcnt> and <condlen> bytes */
9898
9899     for (i = 0; i < gap->ga_len; ++i)
9900     {
9901         /* <prefcond> : <condlen> <condstr> */
9902         p = ((char_u **)gap->ga_data)[i];
9903         if (p != NULL)
9904         {
9905             len = (int)STRLEN(p);
9906             if (fd != NULL)
9907             {
9908                 fputc(len, fd);
9909                 fwrite(p, (size_t)len, (size_t)1, fd);
9910             }
9911             totlen += len;
9912         }
9913         else if (fd != NULL)
9914             fputc(0, fd);
9915     }
9916
9917     return totlen;
9918 }
9919
9920 /*
9921  * Case-fold "str[len]" into "buf[buflen]".  The result is NUL terminated.
9922  * Uses the character definitions from the .spl file.
9923  * When using a multi-byte 'encoding' the length may change!
9924  * Returns FAIL when something wrong.
9925  */
9926     static int
9927 spell_casefold(str, len, buf, buflen)
9928     char_u      *str;
9929     int         len;
9930     char_u      *buf;
9931     int         buflen;
9932 {
9933     int         i;
9934
9935     if (len >= buflen)
9936     {
9937         buf[0] = NUL;
9938         return FAIL;            /* result will not fit */
9939     }
9940
9941 #ifdef FEAT_MBYTE
9942     if (has_mbyte)
9943     {
9944         int     outi = 0;
9945         char_u  *p;
9946         int     c;
9947
9948         /* Fold one character at a time. */
9949         for (p = str; p < str + len; )
9950         {
9951             if (outi + MB_MAXBYTES > buflen)
9952             {
9953                 buf[outi] = NUL;
9954                 return FAIL;
9955             }
9956             c = mb_cptr2char_adv(&p);
9957             outi += mb_char2bytes(SPELL_TOFOLD(c), buf + outi);
9958         }
9959         buf[outi] = NUL;
9960     }
9961     else
9962 #endif
9963     {
9964         /* Be quick for non-multibyte encodings. */
9965         for (i = 0; i < len; ++i)
9966             buf[i] = spelltab.st_fold[str[i]];
9967         buf[i] = NUL;
9968     }
9969
9970     return OK;
9971 }
9972
9973 /* values for sps_flags */
9974 #define SPS_BEST    1
9975 #define SPS_FAST    2
9976 #define SPS_DOUBLE  4
9977
9978 static int sps_flags = SPS_BEST;        /* flags from 'spellsuggest' */
9979 static int sps_limit = 9999;            /* max nr of suggestions given */
9980
9981 /*
9982  * Check the 'spellsuggest' option.  Return FAIL if it's wrong.
9983  * Sets "sps_flags" and "sps_limit".
9984  */
9985     int
9986 spell_check_sps()
9987 {
9988     char_u      *p;
9989     char_u      *s;
9990     char_u      buf[MAXPATHL];
9991     int         f;
9992
9993     sps_flags = 0;
9994     sps_limit = 9999;
9995
9996     for (p = p_sps; *p != NUL; )
9997     {
9998         copy_option_part(&p, buf, MAXPATHL, ",");
9999
10000         f = 0;
10001         if (VIM_ISDIGIT(*buf))
10002         {
10003             s = buf;
10004             sps_limit = getdigits(&s);
10005             if (*s != NUL && !VIM_ISDIGIT(*s))
10006                 f = -1;
10007         }
10008         else if (STRCMP(buf, "best") == 0)
10009             f = SPS_BEST;
10010         else if (STRCMP(buf, "fast") == 0)
10011             f = SPS_FAST;
10012         else if (STRCMP(buf, "double") == 0)
10013             f = SPS_DOUBLE;
10014         else if (STRNCMP(buf, "expr:", 5) != 0
10015                 && STRNCMP(buf, "file:", 5) != 0)
10016             f = -1;
10017
10018         if (f == -1 || (sps_flags != 0 && f != 0))
10019         {
10020             sps_flags = SPS_BEST;
10021             sps_limit = 9999;
10022             return FAIL;
10023         }
10024         if (f != 0)
10025             sps_flags = f;
10026     }
10027
10028     if (sps_flags == 0)
10029         sps_flags = SPS_BEST;
10030
10031     return OK;
10032 }
10033
10034 /*
10035  * "z?": Find badly spelled word under or after the cursor.
10036  * Give suggestions for the properly spelled word.
10037  * In Visual mode use the highlighted word as the bad word.
10038  * When "count" is non-zero use that suggestion.
10039  */
10040     void
10041 spell_suggest(count)
10042     int         count;
10043 {
10044     char_u      *line;
10045     pos_T       prev_cursor = curwin->w_cursor;
10046     char_u      wcopy[MAXWLEN + 2];
10047     char_u      *p;
10048     int         i;
10049     int         c;
10050     suginfo_T   sug;
10051     suggest_T   *stp;
10052     int         mouse_used;
10053     int         need_cap;
10054     int         limit;
10055     int         selected = count;
10056     int         badlen = 0;
10057
10058     if (no_spell_checking(curwin))
10059         return;
10060
10061 #ifdef FEAT_VISUAL
10062     if (VIsual_active)
10063     {
10064         /* Use the Visually selected text as the bad word.  But reject
10065          * a multi-line selection. */
10066         if (curwin->w_cursor.lnum != VIsual.lnum)
10067         {
10068             vim_beep();
10069             return;
10070         }
10071         badlen = (int)curwin->w_cursor.col - (int)VIsual.col;
10072         if (badlen < 0)
10073             badlen = -badlen;
10074         else
10075             curwin->w_cursor.col = VIsual.col;
10076         ++badlen;
10077         end_visual_mode();
10078     }
10079     else
10080 #endif
10081         /* Find the start of the badly spelled word. */
10082         if (spell_move_to(curwin, FORWARD, TRUE, TRUE, NULL) == 0
10083             || curwin->w_cursor.col > prev_cursor.col)
10084     {
10085         /* No bad word or it starts after the cursor: use the word under the
10086          * cursor. */
10087         curwin->w_cursor = prev_cursor;
10088         line = ml_get_curline();
10089         p = line + curwin->w_cursor.col;
10090         /* Backup to before start of word. */
10091         while (p > line && spell_iswordp_nmw(p))
10092             mb_ptr_back(line, p);
10093         /* Forward to start of word. */
10094         while (*p != NUL && !spell_iswordp_nmw(p))
10095             mb_ptr_adv(p);
10096
10097         if (!spell_iswordp_nmw(p))              /* No word found. */
10098         {
10099             beep_flush();
10100             return;
10101         }
10102         curwin->w_cursor.col = (colnr_T)(p - line);
10103     }
10104
10105     /* Get the word and its length. */
10106
10107     /* Figure out if the word should be capitalised. */
10108     need_cap = check_need_cap(curwin->w_cursor.lnum, curwin->w_cursor.col);
10109
10110     line = ml_get_curline();
10111
10112     /* Get the list of suggestions.  Limit to 'lines' - 2 or the number in
10113      * 'spellsuggest', whatever is smaller. */
10114     if (sps_limit > (int)Rows - 2)
10115         limit = (int)Rows - 2;
10116     else
10117         limit = sps_limit;
10118     spell_find_suggest(line + curwin->w_cursor.col, badlen, &sug, limit,
10119                                                         TRUE, need_cap, TRUE);
10120
10121     if (sug.su_ga.ga_len == 0)
10122         MSG(_("Sorry, no suggestions"));
10123     else if (count > 0)
10124     {
10125         if (count > sug.su_ga.ga_len)
10126             smsg((char_u *)_("Sorry, only %ld suggestions"),
10127                                                       (long)sug.su_ga.ga_len);
10128     }
10129     else
10130     {
10131         vim_free(repl_from);
10132         repl_from = NULL;
10133         vim_free(repl_to);
10134         repl_to = NULL;
10135
10136 #ifdef FEAT_RIGHTLEFT
10137         /* When 'rightleft' is set the list is drawn right-left. */
10138         cmdmsg_rl = curwin->w_p_rl;
10139         if (cmdmsg_rl)
10140             msg_col = Columns - 1;
10141 #endif
10142
10143         /* List the suggestions. */
10144         msg_start();
10145         msg_row = Rows - 1;     /* for when 'cmdheight' > 1 */
10146         lines_left = Rows;      /* avoid more prompt */
10147         vim_snprintf((char *)IObuff, IOSIZE, _("Change \"%.*s\" to:"),
10148                                                 sug.su_badlen, sug.su_badptr);
10149 #ifdef FEAT_RIGHTLEFT
10150         if (cmdmsg_rl && STRNCMP(IObuff, "Change", 6) == 0)
10151         {
10152             /* And now the rabbit from the high hat: Avoid showing the
10153              * untranslated message rightleft. */
10154             vim_snprintf((char *)IObuff, IOSIZE, ":ot \"%.*s\" egnahC",
10155                                                 sug.su_badlen, sug.su_badptr);
10156         }
10157 #endif
10158         msg_puts(IObuff);
10159         msg_clr_eos();
10160         msg_putchar('\n');
10161
10162         msg_scroll = TRUE;
10163         for (i = 0; i < sug.su_ga.ga_len; ++i)
10164         {
10165             stp = &SUG(sug.su_ga, i);
10166
10167             /* The suggested word may replace only part of the bad word, add
10168              * the not replaced part. */
10169             STRCPY(wcopy, stp->st_word);
10170             if (sug.su_badlen > stp->st_orglen)
10171                 vim_strncpy(wcopy + stp->st_wordlen,
10172                                                sug.su_badptr + stp->st_orglen,
10173                                               sug.su_badlen - stp->st_orglen);
10174             vim_snprintf((char *)IObuff, IOSIZE, "%2d", i + 1);
10175 #ifdef FEAT_RIGHTLEFT
10176             if (cmdmsg_rl)
10177                 rl_mirror(IObuff);
10178 #endif
10179             msg_puts(IObuff);
10180
10181             vim_snprintf((char *)IObuff, IOSIZE, " \"%s\"", wcopy);
10182             msg_puts(IObuff);
10183
10184             /* The word may replace more than "su_badlen". */
10185             if (sug.su_badlen < stp->st_orglen)
10186             {
10187                 vim_snprintf((char *)IObuff, IOSIZE, _(" < \"%.*s\""),
10188                                                stp->st_orglen, sug.su_badptr);
10189                 msg_puts(IObuff);
10190             }
10191
10192             if (p_verbose > 0)
10193             {
10194                 /* Add the score. */
10195                 if (sps_flags & (SPS_DOUBLE | SPS_BEST))
10196                     vim_snprintf((char *)IObuff, IOSIZE, " (%s%d - %d)",
10197                         stp->st_salscore ? "s " : "",
10198                         stp->st_score, stp->st_altscore);
10199                 else
10200                     vim_snprintf((char *)IObuff, IOSIZE, " (%d)",
10201                             stp->st_score);
10202 #ifdef FEAT_RIGHTLEFT
10203                 if (cmdmsg_rl)
10204                     /* Mirror the numbers, but keep the leading space. */
10205                     rl_mirror(IObuff + 1);
10206 #endif
10207                 msg_advance(30);
10208                 msg_puts(IObuff);
10209             }
10210             msg_putchar('\n');
10211         }
10212
10213 #ifdef FEAT_RIGHTLEFT
10214         cmdmsg_rl = FALSE;
10215         msg_col = 0;
10216 #endif
10217         /* Ask for choice. */
10218         selected = prompt_for_number(&mouse_used);
10219         if (mouse_used)
10220             selected -= lines_left;
10221         lines_left = Rows;      /* avoid more prompt */
10222     }
10223
10224     if (selected > 0 && selected <= sug.su_ga.ga_len && u_save_cursor() == OK)
10225     {
10226         /* Save the from and to text for :spellrepall. */
10227         stp = &SUG(sug.su_ga, selected - 1);
10228         if (sug.su_badlen > stp->st_orglen)
10229         {
10230             /* Replacing less than "su_badlen", append the remainder to
10231              * repl_to. */
10232             repl_from = vim_strnsave(sug.su_badptr, sug.su_badlen);
10233             vim_snprintf((char *)IObuff, IOSIZE, "%s%.*s", stp->st_word,
10234                     sug.su_badlen - stp->st_orglen,
10235                                               sug.su_badptr + stp->st_orglen);
10236             repl_to = vim_strsave(IObuff);
10237         }
10238         else
10239         {
10240             /* Replacing su_badlen or more, use the whole word. */
10241             repl_from = vim_strnsave(sug.su_badptr, stp->st_orglen);
10242             repl_to = vim_strsave(stp->st_word);
10243         }
10244
10245         /* Replace the word. */
10246         p = alloc((unsigned)STRLEN(line) - stp->st_orglen + stp->st_wordlen + 1);
10247         if (p != NULL)
10248         {
10249             c = (int)(sug.su_badptr - line);
10250             mch_memmove(p, line, c);
10251             STRCPY(p + c, stp->st_word);
10252             STRCAT(p, sug.su_badptr + stp->st_orglen);
10253             ml_replace(curwin->w_cursor.lnum, p, FALSE);
10254             curwin->w_cursor.col = c;
10255
10256             /* For redo we use a change-word command. */
10257             ResetRedobuff();
10258             AppendToRedobuff((char_u *)"ciw");
10259             AppendToRedobuffLit(p + c,
10260                             stp->st_wordlen + sug.su_badlen - stp->st_orglen);
10261             AppendCharToRedobuff(ESC);
10262
10263             /* After this "p" may be invalid. */
10264             changed_bytes(curwin->w_cursor.lnum, c);
10265         }
10266     }
10267     else
10268         curwin->w_cursor = prev_cursor;
10269
10270     spell_find_cleanup(&sug);
10271 }
10272
10273 /*
10274  * Check if the word at line "lnum" column "col" is required to start with a
10275  * capital.  This uses 'spellcapcheck' of the current buffer.
10276  */
10277     static int
10278 check_need_cap(lnum, col)
10279     linenr_T    lnum;
10280     colnr_T     col;
10281 {
10282     int         need_cap = FALSE;
10283     char_u      *line;
10284     char_u      *line_copy = NULL;
10285     char_u      *p;
10286     colnr_T     endcol;
10287     regmatch_T  regmatch;
10288
10289     if (curbuf->b_cap_prog == NULL)
10290         return FALSE;
10291
10292     line = ml_get_curline();
10293     endcol = 0;
10294     if ((int)(skipwhite(line) - line) >= (int)col)
10295     {
10296         /* At start of line, check if previous line is empty or sentence
10297          * ends there. */
10298         if (lnum == 1)
10299             need_cap = TRUE;
10300         else
10301         {
10302             line = ml_get(lnum - 1);
10303             if (*skipwhite(line) == NUL)
10304                 need_cap = TRUE;
10305             else
10306             {
10307                 /* Append a space in place of the line break. */
10308                 line_copy = concat_str(line, (char_u *)" ");
10309                 line = line_copy;
10310                 endcol = (colnr_T)STRLEN(line);
10311             }
10312         }
10313     }
10314     else
10315         endcol = col;
10316
10317     if (endcol > 0)
10318     {
10319         /* Check if sentence ends before the bad word. */
10320         regmatch.regprog = curbuf->b_cap_prog;
10321         regmatch.rm_ic = FALSE;
10322         p = line + endcol;
10323         for (;;)
10324         {
10325             mb_ptr_back(line, p);
10326             if (p == line || spell_iswordp_nmw(p))
10327                 break;
10328             if (vim_regexec(&regmatch, p, 0)
10329                                          && regmatch.endp[0] == line + endcol)
10330             {
10331                 need_cap = TRUE;
10332                 break;
10333             }
10334         }
10335     }
10336
10337     vim_free(line_copy);
10338
10339     return need_cap;
10340 }
10341
10342
10343 /*
10344  * ":spellrepall"
10345  */
10346 /*ARGSUSED*/
10347     void
10348 ex_spellrepall(eap)
10349     exarg_T *eap;
10350 {
10351     pos_T       pos = curwin->w_cursor;
10352     char_u      *frompat;
10353     int         addlen;
10354     char_u      *line;
10355     char_u      *p;
10356     int         save_ws = p_ws;
10357     linenr_T    prev_lnum = 0;
10358
10359     if (repl_from == NULL || repl_to == NULL)
10360     {
10361         EMSG(_("E752: No previous spell replacement"));
10362         return;
10363     }
10364     addlen = (int)(STRLEN(repl_to) - STRLEN(repl_from));
10365
10366     frompat = alloc((unsigned)STRLEN(repl_from) + 7);
10367     if (frompat == NULL)
10368         return;
10369     sprintf((char *)frompat, "\\V\\<%s\\>", repl_from);
10370     p_ws = FALSE;
10371
10372     sub_nsubs = 0;
10373     sub_nlines = 0;
10374     curwin->w_cursor.lnum = 0;
10375     while (!got_int)
10376     {
10377         if (do_search(NULL, '/', frompat, 1L, SEARCH_KEEP, NULL) == 0
10378                                                    || u_save_cursor() == FAIL)
10379             break;
10380
10381         /* Only replace when the right word isn't there yet.  This happens
10382          * when changing "etc" to "etc.". */
10383         line = ml_get_curline();
10384         if (addlen <= 0 || STRNCMP(line + curwin->w_cursor.col,
10385                                                repl_to, STRLEN(repl_to)) != 0)
10386         {
10387             p = alloc((unsigned)STRLEN(line) + addlen + 1);
10388             if (p == NULL)
10389                 break;
10390             mch_memmove(p, line, curwin->w_cursor.col);
10391             STRCPY(p + curwin->w_cursor.col, repl_to);
10392             STRCAT(p, line + curwin->w_cursor.col + STRLEN(repl_from));
10393             ml_replace(curwin->w_cursor.lnum, p, FALSE);
10394             changed_bytes(curwin->w_cursor.lnum, curwin->w_cursor.col);
10395
10396             if (curwin->w_cursor.lnum != prev_lnum)
10397             {
10398                 ++sub_nlines;
10399                 prev_lnum = curwin->w_cursor.lnum;
10400             }
10401             ++sub_nsubs;
10402         }
10403         curwin->w_cursor.col += (colnr_T)STRLEN(repl_to);
10404     }
10405
10406     p_ws = save_ws;
10407     curwin->w_cursor = pos;
10408     vim_free(frompat);
10409
10410     if (sub_nsubs == 0)
10411         EMSG2(_("E753: Not found: %s"), repl_from);
10412     else
10413         do_sub_msg(FALSE);
10414 }
10415
10416 /*
10417  * Find spell suggestions for "word".  Return them in the growarray "*gap" as
10418  * a list of allocated strings.
10419  */
10420     void
10421 spell_suggest_list(gap, word, maxcount, need_cap, interactive)
10422     garray_T    *gap;
10423     char_u      *word;
10424     int         maxcount;       /* maximum nr of suggestions */
10425     int         need_cap;       /* 'spellcapcheck' matched */
10426     int         interactive;
10427 {
10428     suginfo_T   sug;
10429     int         i;
10430     suggest_T   *stp;
10431     char_u      *wcopy;
10432
10433     spell_find_suggest(word, 0, &sug, maxcount, FALSE, need_cap, interactive);
10434
10435     /* Make room in "gap". */
10436     ga_init2(gap, sizeof(char_u *), sug.su_ga.ga_len + 1);
10437     if (ga_grow(gap, sug.su_ga.ga_len) == OK)
10438     {
10439         for (i = 0; i < sug.su_ga.ga_len; ++i)
10440         {
10441             stp = &SUG(sug.su_ga, i);
10442
10443             /* The suggested word may replace only part of "word", add the not
10444              * replaced part. */
10445             wcopy = alloc(stp->st_wordlen
10446                       + (unsigned)STRLEN(sug.su_badptr + stp->st_orglen) + 1);
10447             if (wcopy == NULL)
10448                 break;
10449             STRCPY(wcopy, stp->st_word);
10450             STRCPY(wcopy + stp->st_wordlen, sug.su_badptr + stp->st_orglen);
10451             ((char_u **)gap->ga_data)[gap->ga_len++] = wcopy;
10452         }
10453     }
10454
10455     spell_find_cleanup(&sug);
10456 }
10457
10458 /*
10459  * Find spell suggestions for the word at the start of "badptr".
10460  * Return the suggestions in "su->su_ga".
10461  * The maximum number of suggestions is "maxcount".
10462  * Note: does use info for the current window.
10463  * This is based on the mechanisms of Aspell, but completely reimplemented.
10464  */
10465     static void
10466 spell_find_suggest(badptr, badlen, su, maxcount, banbadword, need_cap, interactive)
10467     char_u      *badptr;
10468     int         badlen;         /* length of bad word or 0 if unknown */
10469     suginfo_T   *su;
10470     int         maxcount;
10471     int         banbadword;     /* don't include badword in suggestions */
10472     int         need_cap;       /* word should start with capital */
10473     int         interactive;
10474 {
10475     hlf_T       attr = HLF_COUNT;
10476     char_u      buf[MAXPATHL];
10477     char_u      *p;
10478     int         do_combine = FALSE;
10479     char_u      *sps_copy;
10480 #ifdef FEAT_EVAL
10481     static int  expr_busy = FALSE;
10482 #endif
10483     int         c;
10484     int         i;
10485     langp_T     *lp;
10486
10487     /*
10488      * Set the info in "*su".
10489      */
10490     vim_memset(su, 0, sizeof(suginfo_T));
10491     ga_init2(&su->su_ga, (int)sizeof(suggest_T), 10);
10492     ga_init2(&su->su_sga, (int)sizeof(suggest_T), 10);
10493     if (*badptr == NUL)
10494         return;
10495     hash_init(&su->su_banned);
10496
10497     su->su_badptr = badptr;
10498     if (badlen != 0)
10499         su->su_badlen = badlen;
10500     else
10501         su->su_badlen = spell_check(curwin, su->su_badptr, &attr, NULL, FALSE);
10502     su->su_maxcount = maxcount;
10503     su->su_maxscore = SCORE_MAXINIT;
10504
10505     if (su->su_badlen >= MAXWLEN)
10506         su->su_badlen = MAXWLEN - 1;    /* just in case */
10507     vim_strncpy(su->su_badword, su->su_badptr, su->su_badlen);
10508     (void)spell_casefold(su->su_badptr, su->su_badlen,
10509                                                     su->su_fbadword, MAXWLEN);
10510     /* get caps flags for bad word */
10511     su->su_badflags = badword_captype(su->su_badptr,
10512                                                su->su_badptr + su->su_badlen);
10513     if (need_cap)
10514         su->su_badflags |= WF_ONECAP;
10515
10516     /* Find the default language for sound folding.  We simply use the first
10517      * one in 'spelllang' that supports sound folding.  That's good for when
10518      * using multiple files for one language, it's not that bad when mixing
10519      * languages (e.g., "pl,en"). */
10520     for (i = 0; i < curbuf->b_langp.ga_len; ++i)
10521     {
10522         lp = LANGP_ENTRY(curbuf->b_langp, i);
10523         if (lp->lp_sallang != NULL)
10524         {
10525             su->su_sallang = lp->lp_sallang;
10526             break;
10527         }
10528     }
10529
10530     /* Soundfold the bad word with the default sound folding, so that we don't
10531      * have to do this many times. */
10532     if (su->su_sallang != NULL)
10533         spell_soundfold(su->su_sallang, su->su_fbadword, TRUE,
10534                                                           su->su_sal_badword);
10535
10536     /* If the word is not capitalised and spell_check() doesn't consider the
10537      * word to be bad then it might need to be capitalised.  Add a suggestion
10538      * for that. */
10539     c = PTR2CHAR(su->su_badptr);
10540     if (!SPELL_ISUPPER(c) && attr == HLF_COUNT)
10541     {
10542         make_case_word(su->su_badword, buf, WF_ONECAP);
10543         add_suggestion(su, &su->su_ga, buf, su->su_badlen, SCORE_ICASE,
10544                                               0, TRUE, su->su_sallang, FALSE);
10545     }
10546
10547     /* Ban the bad word itself.  It may appear in another region. */
10548     if (banbadword)
10549         add_banned(su, su->su_badword);
10550
10551     /* Make a copy of 'spellsuggest', because the expression may change it. */
10552     sps_copy = vim_strsave(p_sps);
10553     if (sps_copy == NULL)
10554         return;
10555
10556     /* Loop over the items in 'spellsuggest'. */
10557     for (p = sps_copy; *p != NUL; )
10558     {
10559         copy_option_part(&p, buf, MAXPATHL, ",");
10560
10561         if (STRNCMP(buf, "expr:", 5) == 0)
10562         {
10563 #ifdef FEAT_EVAL
10564             /* Evaluate an expression.  Skip this when called recursively,
10565              * when using spellsuggest() in the expression. */
10566             if (!expr_busy)
10567             {
10568                 expr_busy = TRUE;
10569                 spell_suggest_expr(su, buf + 5);
10570                 expr_busy = FALSE;
10571             }
10572 #endif
10573         }
10574         else if (STRNCMP(buf, "file:", 5) == 0)
10575             /* Use list of suggestions in a file. */
10576             spell_suggest_file(su, buf + 5);
10577         else
10578         {
10579             /* Use internal method. */
10580             spell_suggest_intern(su, interactive);
10581             if (sps_flags & SPS_DOUBLE)
10582                 do_combine = TRUE;
10583         }
10584     }
10585
10586     vim_free(sps_copy);
10587
10588     if (do_combine)
10589         /* Combine the two list of suggestions.  This must be done last,
10590          * because sorting changes the order again. */
10591         score_combine(su);
10592 }
10593
10594 #ifdef FEAT_EVAL
10595 /*
10596  * Find suggestions by evaluating expression "expr".
10597  */
10598     static void
10599 spell_suggest_expr(su, expr)
10600     suginfo_T   *su;
10601     char_u      *expr;
10602 {
10603     list_T      *list;
10604     listitem_T  *li;
10605     int         score;
10606     char_u      *p;
10607
10608     /* The work is split up in a few parts to avoid having to export
10609      * suginfo_T.
10610      * First evaluate the expression and get the resulting list. */
10611     list = eval_spell_expr(su->su_badword, expr);
10612     if (list != NULL)
10613     {
10614         /* Loop over the items in the list. */
10615         for (li = list->lv_first; li != NULL; li = li->li_next)
10616             if (li->li_tv.v_type == VAR_LIST)
10617             {
10618                 /* Get the word and the score from the items. */
10619                 score = get_spellword(li->li_tv.vval.v_list, &p);
10620                 if (score >= 0 && score <= su->su_maxscore)
10621                     add_suggestion(su, &su->su_ga, p, su->su_badlen,
10622                                        score, 0, TRUE, su->su_sallang, FALSE);
10623             }
10624         list_unref(list);
10625     }
10626
10627     /* Remove bogus suggestions, sort and truncate at "maxcount". */
10628     check_suggestions(su, &su->su_ga);
10629     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10630 }
10631 #endif
10632
10633 /*
10634  * Find suggestions in file "fname".  Used for "file:" in 'spellsuggest'.
10635  */
10636     static void
10637 spell_suggest_file(su, fname)
10638     suginfo_T   *su;
10639     char_u      *fname;
10640 {
10641     FILE        *fd;
10642     char_u      line[MAXWLEN * 2];
10643     char_u      *p;
10644     int         len;
10645     char_u      cword[MAXWLEN];
10646
10647     /* Open the file. */
10648     fd = mch_fopen((char *)fname, "r");
10649     if (fd == NULL)
10650     {
10651         EMSG2(_(e_notopen), fname);
10652         return;
10653     }
10654
10655     /* Read it line by line. */
10656     while (!vim_fgets(line, MAXWLEN * 2, fd) && !got_int)
10657     {
10658         line_breakcheck();
10659
10660         p = vim_strchr(line, '/');
10661         if (p == NULL)
10662             continue;       /* No Tab found, just skip the line. */
10663         *p++ = NUL;
10664         if (STRICMP(su->su_badword, line) == 0)
10665         {
10666             /* Match!  Isolate the good word, until CR or NL. */
10667             for (len = 0; p[len] >= ' '; ++len)
10668                 ;
10669             p[len] = NUL;
10670
10671             /* If the suggestion doesn't have specific case duplicate the case
10672              * of the bad word. */
10673             if (captype(p, NULL) == 0)
10674             {
10675                 make_case_word(p, cword, su->su_badflags);
10676                 p = cword;
10677             }
10678
10679             add_suggestion(su, &su->su_ga, p, su->su_badlen,
10680                                   SCORE_FILE, 0, TRUE, su->su_sallang, FALSE);
10681         }
10682     }
10683
10684     fclose(fd);
10685
10686     /* Remove bogus suggestions, sort and truncate at "maxcount". */
10687     check_suggestions(su, &su->su_ga);
10688     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10689 }
10690
10691 /*
10692  * Find suggestions for the internal method indicated by "sps_flags".
10693  */
10694     static void
10695 spell_suggest_intern(su, interactive)
10696     suginfo_T   *su;
10697     int         interactive;
10698 {
10699     /*
10700      * Load the .sug file(s) that are available and not done yet.
10701      */
10702     suggest_load_files();
10703
10704     /*
10705      * 1. Try special cases, such as repeating a word: "the the" -> "the".
10706      *
10707      * Set a maximum score to limit the combination of operations that is
10708      * tried.
10709      */
10710     suggest_try_special(su);
10711
10712     /*
10713      * 2. Try inserting/deleting/swapping/changing a letter, use REP entries
10714      *    from the .aff file and inserting a space (split the word).
10715      */
10716     suggest_try_change(su);
10717
10718     /* For the resulting top-scorers compute the sound-a-like score. */
10719     if (sps_flags & SPS_DOUBLE)
10720         score_comp_sal(su);
10721
10722     /*
10723      * 3. Try finding sound-a-like words.
10724      */
10725     if ((sps_flags & SPS_FAST) == 0)
10726     {
10727         if (sps_flags & SPS_BEST)
10728             /* Adjust the word score for the suggestions found so far for how
10729              * they sounds like. */
10730             rescore_suggestions(su);
10731
10732         /*
10733          * While going throught the soundfold tree "su_maxscore" is the score
10734          * for the soundfold word, limits the changes that are being tried,
10735          * and "su_sfmaxscore" the rescored score, which is set by
10736          * cleanup_suggestions().
10737          * First find words with a small edit distance, because this is much
10738          * faster and often already finds the top-N suggestions.  If we didn't
10739          * find many suggestions try again with a higher edit distance.
10740          * "sl_sounddone" is used to avoid doing the same word twice.
10741          */
10742         suggest_try_soundalike_prep();
10743         su->su_maxscore = SCORE_SFMAX1;
10744         su->su_sfmaxscore = SCORE_MAXINIT * 3;
10745         suggest_try_soundalike(su);
10746         if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
10747         {
10748             /* We didn't find enough matches, try again, allowing more
10749              * changes to the soundfold word. */
10750             su->su_maxscore = SCORE_SFMAX2;
10751             suggest_try_soundalike(su);
10752             if (su->su_ga.ga_len < SUG_CLEAN_COUNT(su))
10753             {
10754                 /* Still didn't find enough matches, try again, allowing even
10755                  * more changes to the soundfold word. */
10756                 su->su_maxscore = SCORE_SFMAX3;
10757                 suggest_try_soundalike(su);
10758             }
10759         }
10760         su->su_maxscore = su->su_sfmaxscore;
10761         suggest_try_soundalike_finish();
10762     }
10763
10764     /* When CTRL-C was hit while searching do show the results.  Only clear
10765      * got_int when using a command, not for spellsuggest(). */
10766     ui_breakcheck();
10767     if (interactive && got_int)
10768     {
10769         (void)vgetc();
10770         got_int = FALSE;
10771     }
10772
10773     if ((sps_flags & SPS_DOUBLE) == 0 && su->su_ga.ga_len != 0)
10774     {
10775         if (sps_flags & SPS_BEST)
10776             /* Adjust the word score for how it sounds like. */
10777             rescore_suggestions(su);
10778
10779         /* Remove bogus suggestions, sort and truncate at "maxcount". */
10780         check_suggestions(su, &su->su_ga);
10781         (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
10782     }
10783 }
10784
10785 /*
10786  * Load the .sug files for languages that have one and weren't loaded yet.
10787  */
10788     static void
10789 suggest_load_files()
10790 {
10791     langp_T     *lp;
10792     int         lpi;
10793     slang_T     *slang;
10794     char_u      *dotp;
10795     FILE        *fd;
10796     char_u      buf[MAXWLEN];
10797     int         i;
10798     time_t      timestamp;
10799     int         wcount;
10800     int         wordnr;
10801     garray_T    ga;
10802     int         c;
10803
10804     /* Do this for all languages that support sound folding. */
10805     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
10806     {
10807         lp = LANGP_ENTRY(curbuf->b_langp, lpi);
10808         slang = lp->lp_slang;
10809         if (slang->sl_sugtime != 0 && !slang->sl_sugloaded)
10810         {
10811             /* Change ".spl" to ".sug" and open the file.  When the file isn't
10812              * found silently skip it.  Do set "sl_sugloaded" so that we
10813              * don't try again and again. */
10814             slang->sl_sugloaded = TRUE;
10815
10816             dotp = vim_strrchr(slang->sl_fname, '.');
10817             if (dotp == NULL || fnamecmp(dotp, ".spl") != 0)
10818                 continue;
10819             STRCPY(dotp, ".sug");
10820             fd = mch_fopen((char *)slang->sl_fname, "r");
10821             if (fd == NULL)
10822                 goto nextone;
10823
10824             /*
10825              * <SUGHEADER>: <fileID> <versionnr> <timestamp>
10826              */
10827             for (i = 0; i < VIMSUGMAGICL; ++i)
10828                 buf[i] = getc(fd);                      /* <fileID> */
10829             if (STRNCMP(buf, VIMSUGMAGIC, VIMSUGMAGICL) != 0)
10830             {
10831                 EMSG2(_("E778: This does not look like a .sug file: %s"),
10832                                                              slang->sl_fname);
10833                 goto nextone;
10834             }
10835             c = getc(fd);                               /* <versionnr> */
10836             if (c < VIMSUGVERSION)
10837             {
10838                 EMSG2(_("E779: Old .sug file, needs to be updated: %s"),
10839                                                              slang->sl_fname);
10840                 goto nextone;
10841             }
10842             else if (c > VIMSUGVERSION)
10843             {
10844                 EMSG2(_("E780: .sug file is for newer version of Vim: %s"),
10845                                                              slang->sl_fname);
10846                 goto nextone;
10847             }
10848
10849             /* Check the timestamp, it must be exactly the same as the one in
10850              * the .spl file.  Otherwise the word numbers won't match. */
10851             timestamp = get8c(fd);                      /* <timestamp> */
10852             if (timestamp != slang->sl_sugtime)
10853             {
10854                 EMSG2(_("E781: .sug file doesn't match .spl file: %s"),
10855                                                              slang->sl_fname);
10856                 goto nextone;
10857             }
10858
10859             /*
10860              * <SUGWORDTREE>: <wordtree>
10861              * Read the trie with the soundfolded words.
10862              */
10863             if (spell_read_tree(fd, &slang->sl_sbyts, &slang->sl_sidxs,
10864                                                                FALSE, 0) != 0)
10865             {
10866 someerror:
10867                 EMSG2(_("E782: error while reading .sug file: %s"),
10868                                                              slang->sl_fname);
10869                 slang_clear_sug(slang);
10870                 goto nextone;
10871             }
10872
10873             /*
10874              * <SUGTABLE>: <sugwcount> <sugline> ...
10875              *
10876              * Read the table with word numbers.  We use a file buffer for
10877              * this, because it's so much like a file with lines.  Makes it
10878              * possible to swap the info and save on memory use.
10879              */
10880             slang->sl_sugbuf = open_spellbuf();
10881             if (slang->sl_sugbuf == NULL)
10882                 goto someerror;
10883                                                             /* <sugwcount> */
10884             wcount = get4c(fd);
10885             if (wcount < 0)
10886                 goto someerror;
10887
10888             /* Read all the wordnr lists into the buffer, one NUL terminated
10889              * list per line. */
10890             ga_init2(&ga, 1, 100);
10891             for (wordnr = 0; wordnr < wcount; ++wordnr)
10892             {
10893                 ga.ga_len = 0;
10894                 for (;;)
10895                 {
10896                     c = getc(fd);                           /* <sugline> */
10897                     if (c < 0 || ga_grow(&ga, 1) == FAIL)
10898                         goto someerror;
10899                     ((char_u *)ga.ga_data)[ga.ga_len++] = c;
10900                     if (c == NUL)
10901                         break;
10902                 }
10903                 if (ml_append_buf(slang->sl_sugbuf, (linenr_T)wordnr,
10904                                          ga.ga_data, ga.ga_len, TRUE) == FAIL)
10905                     goto someerror;
10906             }
10907             ga_clear(&ga);
10908
10909             /*
10910              * Need to put word counts in the word tries, so that we can find
10911              * a word by its number.
10912              */
10913             tree_count_words(slang->sl_fbyts, slang->sl_fidxs);
10914             tree_count_words(slang->sl_sbyts, slang->sl_sidxs);
10915
10916 nextone:
10917             if (fd != NULL)
10918                 fclose(fd);
10919             STRCPY(dotp, ".spl");
10920         }
10921     }
10922 }
10923
10924
10925 /*
10926  * Fill in the wordcount fields for a trie.
10927  * Returns the total number of words.
10928  */
10929     static void
10930 tree_count_words(byts, idxs)
10931     char_u      *byts;
10932     idx_T       *idxs;
10933 {
10934     int         depth;
10935     idx_T       arridx[MAXWLEN];
10936     int         curi[MAXWLEN];
10937     int         c;
10938     idx_T       n;
10939     int         wordcount[MAXWLEN];
10940
10941     arridx[0] = 0;
10942     curi[0] = 1;
10943     wordcount[0] = 0;
10944     depth = 0;
10945     while (depth >= 0 && !got_int)
10946     {
10947         if (curi[depth] > byts[arridx[depth]])
10948         {
10949             /* Done all bytes at this node, go up one level. */
10950             idxs[arridx[depth]] = wordcount[depth];
10951             if (depth > 0)
10952                 wordcount[depth - 1] += wordcount[depth];
10953
10954             --depth;
10955             fast_breakcheck();
10956         }
10957         else
10958         {
10959             /* Do one more byte at this node. */
10960             n = arridx[depth] + curi[depth];
10961             ++curi[depth];
10962
10963             c = byts[n];
10964             if (c == 0)
10965             {
10966                 /* End of word, count it. */
10967                 ++wordcount[depth];
10968
10969                 /* Skip over any other NUL bytes (same word with different
10970                  * flags). */
10971                 while (byts[n + 1] == 0)
10972                 {
10973                     ++n;
10974                     ++curi[depth];
10975                 }
10976             }
10977             else
10978             {
10979                 /* Normal char, go one level deeper to count the words. */
10980                 ++depth;
10981                 arridx[depth] = idxs[n];
10982                 curi[depth] = 1;
10983                 wordcount[depth] = 0;
10984             }
10985         }
10986     }
10987 }
10988
10989 /*
10990  * Free the info put in "*su" by spell_find_suggest().
10991  */
10992     static void
10993 spell_find_cleanup(su)
10994     suginfo_T   *su;
10995 {
10996     int         i;
10997
10998     /* Free the suggestions. */
10999     for (i = 0; i < su->su_ga.ga_len; ++i)
11000         vim_free(SUG(su->su_ga, i).st_word);
11001     ga_clear(&su->su_ga);
11002     for (i = 0; i < su->su_sga.ga_len; ++i)
11003         vim_free(SUG(su->su_sga, i).st_word);
11004     ga_clear(&su->su_sga);
11005
11006     /* Free the banned words. */
11007     hash_clear_all(&su->su_banned, 0);
11008 }
11009
11010 /*
11011  * Make a copy of "word", with the first letter upper or lower cased, to
11012  * "wcopy[MAXWLEN]".  "word" must not be empty.
11013  * The result is NUL terminated.
11014  */
11015     static void
11016 onecap_copy(word, wcopy, upper)
11017     char_u      *word;
11018     char_u      *wcopy;
11019     int         upper;      /* TRUE: first letter made upper case */
11020 {
11021     char_u      *p;
11022     int         c;
11023     int         l;
11024
11025     p = word;
11026 #ifdef FEAT_MBYTE
11027     if (has_mbyte)
11028         c = mb_cptr2char_adv(&p);
11029     else
11030 #endif
11031         c = *p++;
11032     if (upper)
11033         c = SPELL_TOUPPER(c);
11034     else
11035         c = SPELL_TOFOLD(c);
11036 #ifdef FEAT_MBYTE
11037     if (has_mbyte)
11038         l = mb_char2bytes(c, wcopy);
11039     else
11040 #endif
11041     {
11042         l = 1;
11043         wcopy[0] = c;
11044     }
11045     vim_strncpy(wcopy + l, p, MAXWLEN - l - 1);
11046 }
11047
11048 /*
11049  * Make a copy of "word" with all the letters upper cased into
11050  * "wcopy[MAXWLEN]".  The result is NUL terminated.
11051  */
11052     static void
11053 allcap_copy(word, wcopy)
11054     char_u      *word;
11055     char_u      *wcopy;
11056 {
11057     char_u      *s;
11058     char_u      *d;
11059     int         c;
11060
11061     d = wcopy;
11062     for (s = word; *s != NUL; )
11063     {
11064 #ifdef FEAT_MBYTE
11065         if (has_mbyte)
11066             c = mb_cptr2char_adv(&s);
11067         else
11068 #endif
11069             c = *s++;
11070
11071 #ifdef FEAT_MBYTE
11072         /* We only change ß to SS when we are certain latin1 is used.  It
11073          * would cause weird errors in other 8-bit encodings. */
11074         if (enc_latin1like && c == 0xdf)
11075         {
11076             c = 'S';
11077             if (d - wcopy >= MAXWLEN - 1)
11078                 break;
11079             *d++ = c;
11080         }
11081         else
11082 #endif
11083             c = SPELL_TOUPPER(c);
11084
11085 #ifdef FEAT_MBYTE
11086         if (has_mbyte)
11087         {
11088             if (d - wcopy >= MAXWLEN - MB_MAXBYTES)
11089                 break;
11090             d += mb_char2bytes(c, d);
11091         }
11092         else
11093 #endif
11094         {
11095             if (d - wcopy >= MAXWLEN - 1)
11096                 break;
11097             *d++ = c;
11098         }
11099     }
11100     *d = NUL;
11101 }
11102
11103 /*
11104  * Try finding suggestions by recognizing specific situations.
11105  */
11106     static void
11107 suggest_try_special(su)
11108     suginfo_T   *su;
11109 {
11110     char_u      *p;
11111     size_t      len;
11112     int         c;
11113     char_u      word[MAXWLEN];
11114
11115     /*
11116      * Recognize a word that is repeated: "the the".
11117      */
11118     p = skiptowhite(su->su_fbadword);
11119     len = p - su->su_fbadword;
11120     p = skipwhite(p);
11121     if (STRLEN(p) == len && STRNCMP(su->su_fbadword, p, len) == 0)
11122     {
11123         /* Include badflags: if the badword is onecap or allcap
11124          * use that for the goodword too: "The the" -> "The". */
11125         c = su->su_fbadword[len];
11126         su->su_fbadword[len] = NUL;
11127         make_case_word(su->su_fbadword, word, su->su_badflags);
11128         su->su_fbadword[len] = c;
11129
11130         /* Give a soundalike score of 0, compute the score as if deleting one
11131          * character. */
11132         add_suggestion(su, &su->su_ga, word, su->su_badlen,
11133                        RESCORE(SCORE_REP, 0), 0, TRUE, su->su_sallang, FALSE);
11134     }
11135 }
11136
11137 /*
11138  * Try finding suggestions by adding/removing/swapping letters.
11139  */
11140     static void
11141 suggest_try_change(su)
11142     suginfo_T   *su;
11143 {
11144     char_u      fword[MAXWLEN];     /* copy of the bad word, case-folded */
11145     int         n;
11146     char_u      *p;
11147     int         lpi;
11148     langp_T     *lp;
11149
11150     /* We make a copy of the case-folded bad word, so that we can modify it
11151      * to find matches (esp. REP items).  Append some more text, changing
11152      * chars after the bad word may help. */
11153     STRCPY(fword, su->su_fbadword);
11154     n = (int)STRLEN(fword);
11155     p = su->su_badptr + su->su_badlen;
11156     (void)spell_casefold(p, (int)STRLEN(p), fword + n, MAXWLEN - n);
11157
11158     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
11159     {
11160         lp = LANGP_ENTRY(curbuf->b_langp, lpi);
11161
11162         /* If reloading a spell file fails it's still in the list but
11163          * everything has been cleared. */
11164         if (lp->lp_slang->sl_fbyts == NULL)
11165             continue;
11166
11167         /* Try it for this language.  Will add possible suggestions. */
11168         suggest_trie_walk(su, lp, fword, FALSE);
11169     }
11170 }
11171
11172 /* Check the maximum score, if we go over it we won't try this change. */
11173 #define TRY_DEEPER(su, stack, depth, add) \
11174                 (stack[depth].ts_score + (add) < su->su_maxscore)
11175
11176 /*
11177  * Try finding suggestions by adding/removing/swapping letters.
11178  *
11179  * This uses a state machine.  At each node in the tree we try various
11180  * operations.  When trying if an operation works "depth" is increased and the
11181  * stack[] is used to store info.  This allows combinations, thus insert one
11182  * character, replace one and delete another.  The number of changes is
11183  * limited by su->su_maxscore.
11184  *
11185  * After implementing this I noticed an article by Kemal Oflazer that
11186  * describes something similar: "Error-tolerant Finite State Recognition with
11187  * Applications to Morphological Analysis and Spelling Correction" (1996).
11188  * The implementation in the article is simplified and requires a stack of
11189  * unknown depth.  The implementation here only needs a stack depth equal to
11190  * the length of the word.
11191  *
11192  * This is also used for the sound-folded word, "soundfold" is TRUE then.
11193  * The mechanism is the same, but we find a match with a sound-folded word
11194  * that comes from one or more original words.  Each of these words may be
11195  * added, this is done by add_sound_suggest().
11196  * Don't use:
11197  *      the prefix tree or the keep-case tree
11198  *      "su->su_badlen"
11199  *      anything to do with upper and lower case
11200  *      anything to do with word or non-word characters ("spell_iswordp()")
11201  *      banned words
11202  *      word flags (rare, region, compounding)
11203  *      word splitting for now
11204  *      "similar_chars()"
11205  *      use "slang->sl_repsal" instead of "lp->lp_replang->sl_rep"
11206  */
11207     static void
11208 suggest_trie_walk(su, lp, fword, soundfold)
11209     suginfo_T   *su;
11210     langp_T     *lp;
11211     char_u      *fword;
11212     int         soundfold;
11213 {
11214     char_u      tword[MAXWLEN];     /* good word collected so far */
11215     trystate_T  stack[MAXWLEN];
11216     char_u      preword[MAXWLEN * 3]; /* word found with proper case;
11217                                        * concatanation of prefix compound
11218                                        * words and split word.  NUL terminated
11219                                        * when going deeper but not when coming
11220                                        * back. */
11221     char_u      compflags[MAXWLEN];     /* compound flags, one for each word */
11222     trystate_T  *sp;
11223     int         newscore;
11224     int         score;
11225     char_u      *byts, *fbyts, *pbyts;
11226     idx_T       *idxs, *fidxs, *pidxs;
11227     int         depth;
11228     int         c, c2, c3;
11229     int         n = 0;
11230     int         flags;
11231     garray_T    *gap;
11232     idx_T       arridx;
11233     int         len;
11234     char_u      *p;
11235     fromto_T    *ftp;
11236     int         fl = 0, tl;
11237     int         repextra = 0;       /* extra bytes in fword[] from REP item */
11238     slang_T     *slang = lp->lp_slang;
11239     int         fword_ends;
11240     int         goodword_ends;
11241 #ifdef DEBUG_TRIEWALK
11242     /* Stores the name of the change made at each level. */
11243     char_u      changename[MAXWLEN][80];
11244 #endif
11245     int         breakcheckcount = 1000;
11246     int         compound_ok;
11247
11248     /*
11249      * Go through the whole case-fold tree, try changes at each node.
11250      * "tword[]" contains the word collected from nodes in the tree.
11251      * "fword[]" the word we are trying to match with (initially the bad
11252      * word).
11253      */
11254     depth = 0;
11255     sp = &stack[0];
11256     vim_memset(sp, 0, sizeof(trystate_T));
11257     sp->ts_curi = 1;
11258
11259     if (soundfold)
11260     {
11261         /* Going through the soundfold tree. */
11262         byts = fbyts = slang->sl_sbyts;
11263         idxs = fidxs = slang->sl_sidxs;
11264         pbyts = NULL;
11265         pidxs = NULL;
11266         sp->ts_prefixdepth = PFD_NOPREFIX;
11267         sp->ts_state = STATE_START;
11268     }
11269     else
11270     {
11271         /*
11272          * When there are postponed prefixes we need to use these first.  At
11273          * the end of the prefix we continue in the case-fold tree.
11274          */
11275         fbyts = slang->sl_fbyts;
11276         fidxs = slang->sl_fidxs;
11277         pbyts = slang->sl_pbyts;
11278         pidxs = slang->sl_pidxs;
11279         if (pbyts != NULL)
11280         {
11281             byts = pbyts;
11282             idxs = pidxs;
11283             sp->ts_prefixdepth = PFD_PREFIXTREE;
11284             sp->ts_state = STATE_NOPREFIX;      /* try without prefix first */
11285         }
11286         else
11287         {
11288             byts = fbyts;
11289             idxs = fidxs;
11290             sp->ts_prefixdepth = PFD_NOPREFIX;
11291             sp->ts_state = STATE_START;
11292         }
11293     }
11294
11295     /*
11296      * Loop to find all suggestions.  At each round we either:
11297      * - For the current state try one operation, advance "ts_curi",
11298      *   increase "depth".
11299      * - When a state is done go to the next, set "ts_state".
11300      * - When all states are tried decrease "depth".
11301      */
11302     while (depth >= 0 && !got_int)
11303     {
11304         sp = &stack[depth];
11305         switch (sp->ts_state)
11306         {
11307         case STATE_START:
11308         case STATE_NOPREFIX:
11309             /*
11310              * Start of node: Deal with NUL bytes, which means
11311              * tword[] may end here.
11312              */
11313             arridx = sp->ts_arridx;         /* current node in the tree */
11314             len = byts[arridx];             /* bytes in this node */
11315             arridx += sp->ts_curi;          /* index of current byte */
11316
11317             if (sp->ts_prefixdepth == PFD_PREFIXTREE)
11318             {
11319                 /* Skip over the NUL bytes, we use them later. */
11320                 for (n = 0; n < len && byts[arridx + n] == 0; ++n)
11321                     ;
11322                 sp->ts_curi += n;
11323
11324                 /* Always past NUL bytes now. */
11325                 n = (int)sp->ts_state;
11326                 sp->ts_state = STATE_ENDNUL;
11327                 sp->ts_save_badflags = su->su_badflags;
11328
11329                 /* At end of a prefix or at start of prefixtree: check for
11330                  * following word. */
11331                 if (byts[arridx] == 0 || n == (int)STATE_NOPREFIX)
11332                 {
11333                     /* Set su->su_badflags to the caps type at this position.
11334                      * Use the caps type until here for the prefix itself. */
11335 #ifdef FEAT_MBYTE
11336                     if (has_mbyte)
11337                         n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
11338                     else
11339 #endif
11340                         n = sp->ts_fidx;
11341                     flags = badword_captype(su->su_badptr, su->su_badptr + n);
11342                     su->su_badflags = badword_captype(su->su_badptr + n,
11343                                                su->su_badptr + su->su_badlen);
11344 #ifdef DEBUG_TRIEWALK
11345                     sprintf(changename[depth], "prefix");
11346 #endif
11347                     go_deeper(stack, depth, 0);
11348                     ++depth;
11349                     sp = &stack[depth];
11350                     sp->ts_prefixdepth = depth - 1;
11351                     byts = fbyts;
11352                     idxs = fidxs;
11353                     sp->ts_arridx = 0;
11354
11355                     /* Move the prefix to preword[] with the right case
11356                      * and make find_keepcap_word() works. */
11357                     tword[sp->ts_twordlen] = NUL;
11358                     make_case_word(tword + sp->ts_splitoff,
11359                                           preword + sp->ts_prewordlen, flags);
11360                     sp->ts_prewordlen = (char_u)STRLEN(preword);
11361                     sp->ts_splitoff = sp->ts_twordlen;
11362                 }
11363                 break;
11364             }
11365
11366             if (sp->ts_curi > len || byts[arridx] != 0)
11367             {
11368                 /* Past bytes in node and/or past NUL bytes. */
11369                 sp->ts_state = STATE_ENDNUL;
11370                 sp->ts_save_badflags = su->su_badflags;
11371                 break;
11372             }
11373
11374             /*
11375              * End of word in tree.
11376              */
11377             ++sp->ts_curi;              /* eat one NUL byte */
11378
11379             flags = (int)idxs[arridx];
11380
11381             /* Skip words with the NOSUGGEST flag. */
11382             if (flags & WF_NOSUGGEST)
11383                 break;
11384
11385             fword_ends = (fword[sp->ts_fidx] == NUL
11386                            || (soundfold
11387                                ? vim_iswhite(fword[sp->ts_fidx])
11388                                : !spell_iswordp(fword + sp->ts_fidx, curbuf)));
11389             tword[sp->ts_twordlen] = NUL;
11390
11391             if (sp->ts_prefixdepth <= PFD_NOTSPECIAL
11392                                         && (sp->ts_flags & TSF_PREFIXOK) == 0)
11393             {
11394                 /* There was a prefix before the word.  Check that the prefix
11395                  * can be used with this word. */
11396                 /* Count the length of the NULs in the prefix.  If there are
11397                  * none this must be the first try without a prefix.  */
11398                 n = stack[sp->ts_prefixdepth].ts_arridx;
11399                 len = pbyts[n++];
11400                 for (c = 0; c < len && pbyts[n + c] == 0; ++c)
11401                     ;
11402                 if (c > 0)
11403                 {
11404                     c = valid_word_prefix(c, n, flags,
11405                                        tword + sp->ts_splitoff, slang, FALSE);
11406                     if (c == 0)
11407                         break;
11408
11409                     /* Use the WF_RARE flag for a rare prefix. */
11410                     if (c & WF_RAREPFX)
11411                         flags |= WF_RARE;
11412
11413                     /* Tricky: when checking for both prefix and compounding
11414                      * we run into the prefix flag first.
11415                      * Remember that it's OK, so that we accept the prefix
11416                      * when arriving at a compound flag. */
11417                     sp->ts_flags |= TSF_PREFIXOK;
11418                 }
11419             }
11420
11421             /* Check NEEDCOMPOUND: can't use word without compounding.  Do try
11422              * appending another compound word below. */
11423             if (sp->ts_complen == sp->ts_compsplit && fword_ends
11424                                                      && (flags & WF_NEEDCOMP))
11425                 goodword_ends = FALSE;
11426             else
11427                 goodword_ends = TRUE;
11428
11429             p = NULL;
11430             compound_ok = TRUE;
11431             if (sp->ts_complen > sp->ts_compsplit)
11432             {
11433                 if (slang->sl_nobreak)
11434                 {
11435                     /* There was a word before this word.  When there was no
11436                      * change in this word (it was correct) add the first word
11437                      * as a suggestion.  If this word was corrected too, we
11438                      * need to check if a correct word follows. */
11439                     if (sp->ts_fidx - sp->ts_splitfidx
11440                                           == sp->ts_twordlen - sp->ts_splitoff
11441                             && STRNCMP(fword + sp->ts_splitfidx,
11442                                         tword + sp->ts_splitoff,
11443                                          sp->ts_fidx - sp->ts_splitfidx) == 0)
11444                     {
11445                         preword[sp->ts_prewordlen] = NUL;
11446                         newscore = score_wordcount_adj(slang, sp->ts_score,
11447                                                  preword + sp->ts_prewordlen,
11448                                                  sp->ts_prewordlen > 0);
11449                         /* Add the suggestion if the score isn't too bad. */
11450                         if (newscore <= su->su_maxscore)
11451                             add_suggestion(su, &su->su_ga, preword,
11452                                     sp->ts_splitfidx - repextra,
11453                                     newscore, 0, FALSE,
11454                                     lp->lp_sallang, FALSE);
11455                         break;
11456                     }
11457                 }
11458                 else
11459                 {
11460                     /* There was a compound word before this word.  If this
11461                      * word does not support compounding then give up
11462                      * (splitting is tried for the word without compound
11463                      * flag). */
11464                     if (((unsigned)flags >> 24) == 0
11465                             || sp->ts_twordlen - sp->ts_splitoff
11466                                                        < slang->sl_compminlen)
11467                         break;
11468 #ifdef FEAT_MBYTE
11469                     /* For multi-byte chars check character length against
11470                      * COMPOUNDMIN. */
11471                     if (has_mbyte
11472                             && slang->sl_compminlen > 0
11473                             && mb_charlen(tword + sp->ts_splitoff)
11474                                                        < slang->sl_compminlen)
11475                         break;
11476 #endif
11477
11478                     compflags[sp->ts_complen] = ((unsigned)flags >> 24);
11479                     compflags[sp->ts_complen + 1] = NUL;
11480                     vim_strncpy(preword + sp->ts_prewordlen,
11481                             tword + sp->ts_splitoff,
11482                             sp->ts_twordlen - sp->ts_splitoff);
11483                     p = preword;
11484                     while (*skiptowhite(p) != NUL)
11485                         p = skipwhite(skiptowhite(p));
11486                     if (fword_ends && !can_compound(slang, p,
11487                                                 compflags + sp->ts_compsplit))
11488                         /* Compound is not allowed.  But it may still be
11489                          * possible if we add another (short) word. */
11490                         compound_ok = FALSE;
11491
11492                     /* Get pointer to last char of previous word. */
11493                     p = preword + sp->ts_prewordlen;
11494                     mb_ptr_back(preword, p);
11495                 }
11496             }
11497
11498             /*
11499              * Form the word with proper case in preword.
11500              * If there is a word from a previous split, append.
11501              * For the soundfold tree don't change the case, simply append.
11502              */
11503             if (soundfold)
11504                 STRCPY(preword + sp->ts_prewordlen, tword + sp->ts_splitoff);
11505             else if (flags & WF_KEEPCAP)
11506                 /* Must find the word in the keep-case tree. */
11507                 find_keepcap_word(slang, tword + sp->ts_splitoff,
11508                                                  preword + sp->ts_prewordlen);
11509             else
11510             {
11511                 /* Include badflags: If the badword is onecap or allcap
11512                  * use that for the goodword too.  But if the badword is
11513                  * allcap and it's only one char long use onecap. */
11514                 c = su->su_badflags;
11515                 if ((c & WF_ALLCAP)
11516 #ifdef FEAT_MBYTE
11517                         && su->su_badlen == (*mb_ptr2len)(su->su_badptr)
11518 #else
11519                         && su->su_badlen == 1
11520 #endif
11521                         )
11522                     c = WF_ONECAP;
11523                 c |= flags;
11524
11525                 /* When appending a compound word after a word character don't
11526                  * use Onecap. */
11527                 if (p != NULL && spell_iswordp_nmw(p))
11528                     c &= ~WF_ONECAP;
11529                 make_case_word(tword + sp->ts_splitoff,
11530                                               preword + sp->ts_prewordlen, c);
11531             }
11532
11533             if (!soundfold)
11534             {
11535                 /* Don't use a banned word.  It may appear again as a good
11536                  * word, thus remember it. */
11537                 if (flags & WF_BANNED)
11538                 {
11539                     add_banned(su, preword + sp->ts_prewordlen);
11540                     break;
11541                 }
11542                 if ((sp->ts_complen == sp->ts_compsplit
11543                             && WAS_BANNED(su, preword + sp->ts_prewordlen))
11544                                                    || WAS_BANNED(su, preword))
11545                 {
11546                     if (slang->sl_compprog == NULL)
11547                         break;
11548                     /* the word so far was banned but we may try compounding */
11549                     goodword_ends = FALSE;
11550                 }
11551             }
11552
11553             newscore = 0;
11554             if (!soundfold)     /* soundfold words don't have flags */
11555             {
11556                 if ((flags & WF_REGION)
11557                             && (((unsigned)flags >> 16) & lp->lp_region) == 0)
11558                     newscore += SCORE_REGION;
11559                 if (flags & WF_RARE)
11560                     newscore += SCORE_RARE;
11561
11562                 if (!spell_valid_case(su->su_badflags,
11563                                   captype(preword + sp->ts_prewordlen, NULL)))
11564                     newscore += SCORE_ICASE;
11565             }
11566
11567             /* TODO: how about splitting in the soundfold tree? */
11568             if (fword_ends
11569                     && goodword_ends
11570                     && sp->ts_fidx >= sp->ts_fidxtry
11571                     && compound_ok)
11572             {
11573                 /* The badword also ends: add suggestions. */
11574 #ifdef DEBUG_TRIEWALK
11575                 if (soundfold && STRCMP(preword, "smwrd") == 0)
11576                 {
11577                     int     j;
11578
11579                     /* print the stack of changes that brought us here */
11580                     smsg("------ %s -------", fword);
11581                     for (j = 0; j < depth; ++j)
11582                         smsg("%s", changename[j]);
11583                 }
11584 #endif
11585                 if (soundfold)
11586                 {
11587                     /* For soundfolded words we need to find the original
11588                      * words, the edit distance and then add them. */
11589                     add_sound_suggest(su, preword, sp->ts_score, lp);
11590                 }
11591                 else
11592                 {
11593                     /* Give a penalty when changing non-word char to word
11594                      * char, e.g., "thes," -> "these". */
11595                     p = fword + sp->ts_fidx;
11596                     mb_ptr_back(fword, p);
11597                     if (!spell_iswordp(p, curbuf))
11598                     {
11599                         p = preword + STRLEN(preword);
11600                         mb_ptr_back(preword, p);
11601                         if (spell_iswordp(p, curbuf))
11602                             newscore += SCORE_NONWORD;
11603                     }
11604
11605                     /* Give a bonus to words seen before. */
11606                     score = score_wordcount_adj(slang,
11607                                                 sp->ts_score + newscore,
11608                                                 preword + sp->ts_prewordlen,
11609                                                 sp->ts_prewordlen > 0);
11610
11611                     /* Add the suggestion if the score isn't too bad. */
11612                     if (score <= su->su_maxscore)
11613                     {
11614                         add_suggestion(su, &su->su_ga, preword,
11615                                     sp->ts_fidx - repextra,
11616                                     score, 0, FALSE, lp->lp_sallang, FALSE);
11617
11618                         if (su->su_badflags & WF_MIXCAP)
11619                         {
11620                             /* We really don't know if the word should be
11621                              * upper or lower case, add both. */
11622                             c = captype(preword, NULL);
11623                             if (c == 0 || c == WF_ALLCAP)
11624                             {
11625                                 make_case_word(tword + sp->ts_splitoff,
11626                                               preword + sp->ts_prewordlen,
11627                                                       c == 0 ? WF_ALLCAP : 0);
11628
11629                                 add_suggestion(su, &su->su_ga, preword,
11630                                         sp->ts_fidx - repextra,
11631                                         score + SCORE_ICASE, 0, FALSE,
11632                                         lp->lp_sallang, FALSE);
11633                             }
11634                         }
11635                     }
11636                 }
11637             }
11638
11639             /*
11640              * Try word split and/or compounding.
11641              */
11642             if ((sp->ts_fidx >= sp->ts_fidxtry || fword_ends)
11643 #ifdef FEAT_MBYTE
11644                     /* Don't split halfway a character. */
11645                     && (!has_mbyte || sp->ts_tcharlen == 0)
11646 #endif
11647                     )
11648             {
11649                 int     try_compound;
11650                 int     try_split;
11651
11652                 /* If past the end of the bad word don't try a split.
11653                  * Otherwise try changing the next word.  E.g., find
11654                  * suggestions for "the the" where the second "the" is
11655                  * different.  It's done like a split.
11656                  * TODO: word split for soundfold words */
11657                 try_split = (sp->ts_fidx - repextra < su->su_badlen)
11658                                                                 && !soundfold;
11659
11660                 /* Get here in several situations:
11661                  * 1. The word in the tree ends:
11662                  *    If the word allows compounding try that.  Otherwise try
11663                  *    a split by inserting a space.  For both check that a
11664                  *    valid words starts at fword[sp->ts_fidx].
11665                  *    For NOBREAK do like compounding to be able to check if
11666                  *    the next word is valid.
11667                  * 2. The badword does end, but it was due to a change (e.g.,
11668                  *    a swap).  No need to split, but do check that the
11669                  *    following word is valid.
11670                  * 3. The badword and the word in the tree end.  It may still
11671                  *    be possible to compound another (short) word.
11672                  */
11673                 try_compound = FALSE;
11674                 if (!soundfold
11675                         && slang->sl_compprog != NULL
11676                         && ((unsigned)flags >> 24) != 0
11677                         && sp->ts_twordlen - sp->ts_splitoff
11678                                                        >= slang->sl_compminlen
11679 #ifdef FEAT_MBYTE
11680                         && (!has_mbyte
11681                             || slang->sl_compminlen == 0
11682                             || mb_charlen(tword + sp->ts_splitoff)
11683                                                       >= slang->sl_compminlen)
11684 #endif
11685                         && (slang->sl_compsylmax < MAXWLEN
11686                             || sp->ts_complen + 1 - sp->ts_compsplit
11687                                                           < slang->sl_compmax)
11688                         && (byte_in_str(sp->ts_complen == sp->ts_compsplit
11689                                             ? slang->sl_compstartflags
11690                                             : slang->sl_compallflags,
11691                                                     ((unsigned)flags >> 24))))
11692                 {
11693                     try_compound = TRUE;
11694                     compflags[sp->ts_complen] = ((unsigned)flags >> 24);
11695                     compflags[sp->ts_complen + 1] = NUL;
11696                 }
11697
11698                 /* For NOBREAK we never try splitting, it won't make any word
11699                  * valid. */
11700                 if (slang->sl_nobreak)
11701                     try_compound = TRUE;
11702
11703                 /* If we could add a compound word, and it's also possible to
11704                  * split at this point, do the split first and set
11705                  * TSF_DIDSPLIT to avoid doing it again. */
11706                 else if (!fword_ends
11707                         && try_compound
11708                         && (sp->ts_flags & TSF_DIDSPLIT) == 0)
11709                 {
11710                     try_compound = FALSE;
11711                     sp->ts_flags |= TSF_DIDSPLIT;
11712                     --sp->ts_curi;          /* do the same NUL again */
11713                     compflags[sp->ts_complen] = NUL;
11714                 }
11715                 else
11716                     sp->ts_flags &= ~TSF_DIDSPLIT;
11717
11718                 if (try_split || try_compound)
11719                 {
11720                     if (!try_compound && (!fword_ends || !goodword_ends))
11721                     {
11722                         /* If we're going to split need to check that the
11723                          * words so far are valid for compounding.  If there
11724                          * is only one word it must not have the NEEDCOMPOUND
11725                          * flag. */
11726                         if (sp->ts_complen == sp->ts_compsplit
11727                                                      && (flags & WF_NEEDCOMP))
11728                             break;
11729                         p = preword;
11730                         while (*skiptowhite(p) != NUL)
11731                             p = skipwhite(skiptowhite(p));
11732                         if (sp->ts_complen > sp->ts_compsplit
11733                                 && !can_compound(slang, p,
11734                                                 compflags + sp->ts_compsplit))
11735                             break;
11736
11737                         if (slang->sl_nosplitsugs)
11738                             newscore += SCORE_SPLIT_NO;
11739                         else
11740                             newscore += SCORE_SPLIT;
11741
11742                         /* Give a bonus to words seen before. */
11743                         newscore = score_wordcount_adj(slang, newscore,
11744                                            preword + sp->ts_prewordlen, TRUE);
11745                     }
11746
11747                     if (TRY_DEEPER(su, stack, depth, newscore))
11748                     {
11749                         go_deeper(stack, depth, newscore);
11750 #ifdef DEBUG_TRIEWALK
11751                         if (!try_compound && !fword_ends)
11752                             sprintf(changename[depth], "%.*s-%s: split",
11753                                  sp->ts_twordlen, tword, fword + sp->ts_fidx);
11754                         else
11755                             sprintf(changename[depth], "%.*s-%s: compound",
11756                                  sp->ts_twordlen, tword, fword + sp->ts_fidx);
11757 #endif
11758                         /* Save things to be restored at STATE_SPLITUNDO. */
11759                         sp->ts_save_badflags = su->su_badflags;
11760                         sp->ts_state = STATE_SPLITUNDO;
11761
11762                         ++depth;
11763                         sp = &stack[depth];
11764
11765                         /* Append a space to preword when splitting. */
11766                         if (!try_compound && !fword_ends)
11767                             STRCAT(preword, " ");
11768                         sp->ts_prewordlen = (char_u)STRLEN(preword);
11769                         sp->ts_splitoff = sp->ts_twordlen;
11770                         sp->ts_splitfidx = sp->ts_fidx;
11771
11772                         /* If the badword has a non-word character at this
11773                          * position skip it.  That means replacing the
11774                          * non-word character with a space.  Always skip a
11775                          * character when the word ends.  But only when the
11776                          * good word can end. */
11777                         if (((!try_compound && !spell_iswordp_nmw(fword
11778                                                                + sp->ts_fidx))
11779                                     || fword_ends)
11780                                 && fword[sp->ts_fidx] != NUL
11781                                 && goodword_ends)
11782                         {
11783                             int     l;
11784
11785 #ifdef FEAT_MBYTE
11786                             if (has_mbyte)
11787                                 l = MB_BYTE2LEN(fword[sp->ts_fidx]);
11788                             else
11789 #endif
11790                                 l = 1;
11791                             if (fword_ends)
11792                             {
11793                                 /* Copy the skipped character to preword. */
11794                                 mch_memmove(preword + sp->ts_prewordlen,
11795                                                       fword + sp->ts_fidx, l);
11796                                 sp->ts_prewordlen += l;
11797                                 preword[sp->ts_prewordlen] = NUL;
11798                             }
11799                             else
11800                                 sp->ts_score -= SCORE_SPLIT - SCORE_SUBST;
11801                             sp->ts_fidx += l;
11802                         }
11803
11804                         /* When compounding include compound flag in
11805                          * compflags[] (already set above).  When splitting we
11806                          * may start compounding over again.  */
11807                         if (try_compound)
11808                             ++sp->ts_complen;
11809                         else
11810                             sp->ts_compsplit = sp->ts_complen;
11811                         sp->ts_prefixdepth = PFD_NOPREFIX;
11812
11813                         /* set su->su_badflags to the caps type at this
11814                          * position */
11815 #ifdef FEAT_MBYTE
11816                         if (has_mbyte)
11817                             n = nofold_len(fword, sp->ts_fidx, su->su_badptr);
11818                         else
11819 #endif
11820                             n = sp->ts_fidx;
11821                         su->su_badflags = badword_captype(su->su_badptr + n,
11822                                                su->su_badptr + su->su_badlen);
11823
11824                         /* Restart at top of the tree. */
11825                         sp->ts_arridx = 0;
11826
11827                         /* If there are postponed prefixes, try these too. */
11828                         if (pbyts != NULL)
11829                         {
11830                             byts = pbyts;
11831                             idxs = pidxs;
11832                             sp->ts_prefixdepth = PFD_PREFIXTREE;
11833                             sp->ts_state = STATE_NOPREFIX;
11834                         }
11835                     }
11836                 }
11837             }
11838             break;
11839
11840         case STATE_SPLITUNDO:
11841             /* Undo the changes done for word split or compound word. */
11842             su->su_badflags = sp->ts_save_badflags;
11843
11844             /* Continue looking for NUL bytes. */
11845             sp->ts_state = STATE_START;
11846
11847             /* In case we went into the prefix tree. */
11848             byts = fbyts;
11849             idxs = fidxs;
11850             break;
11851
11852         case STATE_ENDNUL:
11853             /* Past the NUL bytes in the node. */
11854             su->su_badflags = sp->ts_save_badflags;
11855             if (fword[sp->ts_fidx] == NUL
11856 #ifdef FEAT_MBYTE
11857                     && sp->ts_tcharlen == 0
11858 #endif
11859                )
11860             {
11861                 /* The badword ends, can't use STATE_PLAIN. */
11862                 sp->ts_state = STATE_DEL;
11863                 break;
11864             }
11865             sp->ts_state = STATE_PLAIN;
11866             /*FALLTHROUGH*/
11867
11868         case STATE_PLAIN:
11869             /*
11870              * Go over all possible bytes at this node, add each to tword[]
11871              * and use child node.  "ts_curi" is the index.
11872              */
11873             arridx = sp->ts_arridx;
11874             if (sp->ts_curi > byts[arridx])
11875             {
11876                 /* Done all bytes at this node, do next state.  When still at
11877                  * already changed bytes skip the other tricks. */
11878                 if (sp->ts_fidx >= sp->ts_fidxtry)
11879                     sp->ts_state = STATE_DEL;
11880                 else
11881                     sp->ts_state = STATE_FINAL;
11882             }
11883             else
11884             {
11885                 arridx += sp->ts_curi++;
11886                 c = byts[arridx];
11887
11888                 /* Normal byte, go one level deeper.  If it's not equal to the
11889                  * byte in the bad word adjust the score.  But don't even try
11890                  * when the byte was already changed.  And don't try when we
11891                  * just deleted this byte, accepting it is always cheaper then
11892                  * delete + substitute. */
11893                 if (c == fword[sp->ts_fidx]
11894 #ifdef FEAT_MBYTE
11895                         || (sp->ts_tcharlen > 0 && sp->ts_isdiff != DIFF_NONE)
11896 #endif
11897                         )
11898                     newscore = 0;
11899                 else
11900                     newscore = SCORE_SUBST;
11901                 if ((newscore == 0
11902                             || (sp->ts_fidx >= sp->ts_fidxtry
11903                                 && ((sp->ts_flags & TSF_DIDDEL) == 0
11904                                     || c != fword[sp->ts_delidx])))
11905                         && TRY_DEEPER(su, stack, depth, newscore))
11906                 {
11907                     go_deeper(stack, depth, newscore);
11908 #ifdef DEBUG_TRIEWALK
11909                     if (newscore > 0)
11910                         sprintf(changename[depth], "%.*s-%s: subst %c to %c",
11911                                 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11912                                 fword[sp->ts_fidx], c);
11913                     else
11914                         sprintf(changename[depth], "%.*s-%s: accept %c",
11915                                 sp->ts_twordlen, tword, fword + sp->ts_fidx,
11916                                 fword[sp->ts_fidx]);
11917 #endif
11918                     ++depth;
11919                     sp = &stack[depth];
11920                     ++sp->ts_fidx;
11921                     tword[sp->ts_twordlen++] = c;
11922                     sp->ts_arridx = idxs[arridx];
11923 #ifdef FEAT_MBYTE
11924                     if (newscore == SCORE_SUBST)
11925                         sp->ts_isdiff = DIFF_YES;
11926                     if (has_mbyte)
11927                     {
11928                         /* Multi-byte characters are a bit complicated to
11929                          * handle: They differ when any of the bytes differ
11930                          * and then their length may also differ. */
11931                         if (sp->ts_tcharlen == 0)
11932                         {
11933                             /* First byte. */
11934                             sp->ts_tcharidx = 0;
11935                             sp->ts_tcharlen = MB_BYTE2LEN(c);
11936                             sp->ts_fcharstart = sp->ts_fidx - 1;
11937                             sp->ts_isdiff = (newscore != 0)
11938                                                        ? DIFF_YES : DIFF_NONE;
11939                         }
11940                         else if (sp->ts_isdiff == DIFF_INSERT)
11941                             /* When inserting trail bytes don't advance in the
11942                              * bad word. */
11943                             --sp->ts_fidx;
11944                         if (++sp->ts_tcharidx == sp->ts_tcharlen)
11945                         {
11946                             /* Last byte of character. */
11947                             if (sp->ts_isdiff == DIFF_YES)
11948                             {
11949                                 /* Correct ts_fidx for the byte length of the
11950                                  * character (we didn't check that before). */
11951                                 sp->ts_fidx = sp->ts_fcharstart
11952                                             + MB_BYTE2LEN(
11953                                                     fword[sp->ts_fcharstart]);
11954
11955                                 /* For changing a composing character adjust
11956                                  * the score from SCORE_SUBST to
11957                                  * SCORE_SUBCOMP. */
11958                                 if (enc_utf8
11959                                         && utf_iscomposing(
11960                                             mb_ptr2char(tword
11961                                                 + sp->ts_twordlen
11962                                                            - sp->ts_tcharlen))
11963                                         && utf_iscomposing(
11964                                             mb_ptr2char(fword
11965                                                         + sp->ts_fcharstart)))
11966                                     sp->ts_score -=
11967                                                   SCORE_SUBST - SCORE_SUBCOMP;
11968
11969                                 /* For a similar character adjust score from
11970                                  * SCORE_SUBST to SCORE_SIMILAR. */
11971                                 else if (!soundfold
11972                                         && slang->sl_has_map
11973                                         && similar_chars(slang,
11974                                             mb_ptr2char(tword
11975                                                 + sp->ts_twordlen
11976                                                            - sp->ts_tcharlen),
11977                                             mb_ptr2char(fword
11978                                                         + sp->ts_fcharstart)))
11979                                     sp->ts_score -=
11980                                                   SCORE_SUBST - SCORE_SIMILAR;
11981                             }
11982                             else if (sp->ts_isdiff == DIFF_INSERT
11983                                          && sp->ts_twordlen > sp->ts_tcharlen)
11984                             {
11985                                 p = tword + sp->ts_twordlen - sp->ts_tcharlen;
11986                                 c = mb_ptr2char(p);
11987                                 if (enc_utf8 && utf_iscomposing(c))
11988                                 {
11989                                     /* Inserting a composing char doesn't
11990                                      * count that much. */
11991                                     sp->ts_score -= SCORE_INS - SCORE_INSCOMP;
11992                                 }
11993                                 else
11994                                 {
11995                                     /* If the previous character was the same,
11996                                      * thus doubling a character, give a bonus
11997                                      * to the score.  Also for the soundfold
11998                                      * tree (might seem illogical but does
11999                                      * give better scores). */
12000                                     mb_ptr_back(tword, p);
12001                                     if (c == mb_ptr2char(p))
12002                                         sp->ts_score -= SCORE_INS
12003                                                                - SCORE_INSDUP;
12004                                 }
12005                             }
12006
12007                             /* Starting a new char, reset the length. */
12008                             sp->ts_tcharlen = 0;
12009                         }
12010                     }
12011                     else
12012 #endif
12013                     {
12014                         /* If we found a similar char adjust the score.
12015                          * We do this after calling go_deeper() because
12016                          * it's slow. */
12017                         if (newscore != 0
12018                                 && !soundfold
12019                                 && slang->sl_has_map
12020                                 && similar_chars(slang,
12021                                                    c, fword[sp->ts_fidx - 1]))
12022                             sp->ts_score -= SCORE_SUBST - SCORE_SIMILAR;
12023                     }
12024                 }
12025             }
12026             break;
12027
12028         case STATE_DEL:
12029 #ifdef FEAT_MBYTE
12030             /* When past the first byte of a multi-byte char don't try
12031              * delete/insert/swap a character. */
12032             if (has_mbyte && sp->ts_tcharlen > 0)
12033             {
12034                 sp->ts_state = STATE_FINAL;
12035                 break;
12036             }
12037 #endif
12038             /*
12039              * Try skipping one character in the bad word (delete it).
12040              */
12041             sp->ts_state = STATE_INS_PREP;
12042             sp->ts_curi = 1;
12043             if (soundfold && sp->ts_fidx == 0 && fword[sp->ts_fidx] == '*')
12044                 /* Deleting a vowel at the start of a word counts less, see
12045                  * soundalike_score(). */
12046                 newscore = 2 * SCORE_DEL / 3;
12047             else
12048                 newscore = SCORE_DEL;
12049             if (fword[sp->ts_fidx] != NUL
12050                                     && TRY_DEEPER(su, stack, depth, newscore))
12051             {
12052                 go_deeper(stack, depth, newscore);
12053 #ifdef DEBUG_TRIEWALK
12054                 sprintf(changename[depth], "%.*s-%s: delete %c",
12055                         sp->ts_twordlen, tword, fword + sp->ts_fidx,
12056                         fword[sp->ts_fidx]);
12057 #endif
12058                 ++depth;
12059
12060                 /* Remember what character we deleted, so that we can avoid
12061                  * inserting it again. */
12062                 stack[depth].ts_flags |= TSF_DIDDEL;
12063                 stack[depth].ts_delidx = sp->ts_fidx;
12064
12065                 /* Advance over the character in fword[].  Give a bonus to the
12066                  * score if the same character is following "nn" -> "n".  It's
12067                  * a bit illogical for soundfold tree but it does give better
12068                  * results. */
12069 #ifdef FEAT_MBYTE
12070                 if (has_mbyte)
12071                 {
12072                     c = mb_ptr2char(fword + sp->ts_fidx);
12073                     stack[depth].ts_fidx += MB_BYTE2LEN(fword[sp->ts_fidx]);
12074                     if (enc_utf8 && utf_iscomposing(c))
12075                         stack[depth].ts_score -= SCORE_DEL - SCORE_DELCOMP;
12076                     else if (c == mb_ptr2char(fword + stack[depth].ts_fidx))
12077                         stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
12078                 }
12079                 else
12080 #endif
12081                 {
12082                     ++stack[depth].ts_fidx;
12083                     if (fword[sp->ts_fidx] == fword[sp->ts_fidx + 1])
12084                         stack[depth].ts_score -= SCORE_DEL - SCORE_DELDUP;
12085                 }
12086                 break;
12087             }
12088             /*FALLTHROUGH*/
12089
12090         case STATE_INS_PREP:
12091             if (sp->ts_flags & TSF_DIDDEL)
12092             {
12093                 /* If we just deleted a byte then inserting won't make sense,
12094                  * a substitute is always cheaper. */
12095                 sp->ts_state = STATE_SWAP;
12096                 break;
12097             }
12098
12099             /* skip over NUL bytes */
12100             n = sp->ts_arridx;
12101             for (;;)
12102             {
12103                 if (sp->ts_curi > byts[n])
12104                 {
12105                     /* Only NUL bytes at this node, go to next state. */
12106                     sp->ts_state = STATE_SWAP;
12107                     break;
12108                 }
12109                 if (byts[n + sp->ts_curi] != NUL)
12110                 {
12111                     /* Found a byte to insert. */
12112                     sp->ts_state = STATE_INS;
12113                     break;
12114                 }
12115                 ++sp->ts_curi;
12116             }
12117             break;
12118
12119             /*FALLTHROUGH*/
12120
12121         case STATE_INS:
12122             /* Insert one byte.  Repeat this for each possible byte at this
12123              * node. */
12124             n = sp->ts_arridx;
12125             if (sp->ts_curi > byts[n])
12126             {
12127                 /* Done all bytes at this node, go to next state. */
12128                 sp->ts_state = STATE_SWAP;
12129                 break;
12130             }
12131
12132             /* Do one more byte at this node, but:
12133              * - Skip NUL bytes.
12134              * - Skip the byte if it's equal to the byte in the word,
12135              *   accepting that byte is always better.
12136              */
12137             n += sp->ts_curi++;
12138             c = byts[n];
12139             if (soundfold && sp->ts_twordlen == 0 && c == '*')
12140                 /* Inserting a vowel at the start of a word counts less,
12141                  * see soundalike_score(). */
12142                 newscore = 2 * SCORE_INS / 3;
12143             else
12144                 newscore = SCORE_INS;
12145             if (c != fword[sp->ts_fidx]
12146                                     && TRY_DEEPER(su, stack, depth, newscore))
12147             {
12148                 go_deeper(stack, depth, newscore);
12149 #ifdef DEBUG_TRIEWALK
12150                 sprintf(changename[depth], "%.*s-%s: insert %c",
12151                         sp->ts_twordlen, tword, fword + sp->ts_fidx,
12152                         c);
12153 #endif
12154                 ++depth;
12155                 sp = &stack[depth];
12156                 tword[sp->ts_twordlen++] = c;
12157                 sp->ts_arridx = idxs[n];
12158 #ifdef FEAT_MBYTE
12159                 if (has_mbyte)
12160                 {
12161                     fl = MB_BYTE2LEN(c);
12162                     if (fl > 1)
12163                     {
12164                         /* There are following bytes for the same character.
12165                          * We must find all bytes before trying
12166                          * delete/insert/swap/etc. */
12167                         sp->ts_tcharlen = fl;
12168                         sp->ts_tcharidx = 1;
12169                         sp->ts_isdiff = DIFF_INSERT;
12170                     }
12171                 }
12172                 else
12173                     fl = 1;
12174                 if (fl == 1)
12175 #endif
12176                 {
12177                     /* If the previous character was the same, thus doubling a
12178                      * character, give a bonus to the score.  Also for
12179                      * soundfold words (illogical but does give a better
12180                      * score). */
12181                     if (sp->ts_twordlen >= 2
12182                                            && tword[sp->ts_twordlen - 2] == c)
12183                         sp->ts_score -= SCORE_INS - SCORE_INSDUP;
12184                 }
12185             }
12186             break;
12187
12188         case STATE_SWAP:
12189             /*
12190              * Swap two bytes in the bad word: "12" -> "21".
12191              * We change "fword" here, it's changed back afterwards at
12192              * STATE_UNSWAP.
12193              */
12194             p = fword + sp->ts_fidx;
12195             c = *p;
12196             if (c == NUL)
12197             {
12198                 /* End of word, can't swap or replace. */
12199                 sp->ts_state = STATE_FINAL;
12200                 break;
12201             }
12202
12203             /* Don't swap if the first character is not a word character.
12204              * SWAP3 etc. also don't make sense then. */
12205             if (!soundfold && !spell_iswordp(p, curbuf))
12206             {
12207                 sp->ts_state = STATE_REP_INI;
12208                 break;
12209             }
12210
12211 #ifdef FEAT_MBYTE
12212             if (has_mbyte)
12213             {
12214                 n = mb_cptr2len(p);
12215                 c = mb_ptr2char(p);
12216                 if (p[n] == NUL)
12217                     c2 = NUL;
12218                 else if (!soundfold && !spell_iswordp(p + n, curbuf))
12219                     c2 = c; /* don't swap non-word char */
12220                 else
12221                     c2 = mb_ptr2char(p + n);
12222             }
12223             else
12224 #endif
12225             {
12226                 if (p[1] == NUL)
12227                     c2 = NUL;
12228                 else if (!soundfold && !spell_iswordp(p + 1, curbuf))
12229                     c2 = c; /* don't swap non-word char */
12230                 else
12231                     c2 = p[1];
12232             }
12233
12234             /* When the second character is NUL we can't swap. */
12235             if (c2 == NUL)
12236             {
12237                 sp->ts_state = STATE_REP_INI;
12238                 break;
12239             }
12240
12241             /* When characters are identical, swap won't do anything.
12242              * Also get here if the second char is not a word character. */
12243             if (c == c2)
12244             {
12245                 sp->ts_state = STATE_SWAP3;
12246                 break;
12247             }
12248             if (c2 != NUL && TRY_DEEPER(su, stack, depth, SCORE_SWAP))
12249             {
12250                 go_deeper(stack, depth, SCORE_SWAP);
12251 #ifdef DEBUG_TRIEWALK
12252                 sprintf(changename[depth], "%.*s-%s: swap %c and %c",
12253                         sp->ts_twordlen, tword, fword + sp->ts_fidx,
12254                         c, c2);
12255 #endif
12256                 sp->ts_state = STATE_UNSWAP;
12257                 ++depth;
12258 #ifdef FEAT_MBYTE
12259                 if (has_mbyte)
12260                 {
12261                     fl = mb_char2len(c2);
12262                     mch_memmove(p, p + n, fl);
12263                     mb_char2bytes(c, p + fl);
12264                     stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
12265                 }
12266                 else
12267 #endif
12268                 {
12269                     p[0] = c2;
12270                     p[1] = c;
12271                     stack[depth].ts_fidxtry = sp->ts_fidx + 2;
12272                 }
12273             }
12274             else
12275                 /* If this swap doesn't work then SWAP3 won't either. */
12276                 sp->ts_state = STATE_REP_INI;
12277             break;
12278
12279         case STATE_UNSWAP:
12280             /* Undo the STATE_SWAP swap: "21" -> "12". */
12281             p = fword + sp->ts_fidx;
12282 #ifdef FEAT_MBYTE
12283             if (has_mbyte)
12284             {
12285                 n = MB_BYTE2LEN(*p);
12286                 c = mb_ptr2char(p + n);
12287                 mch_memmove(p + MB_BYTE2LEN(p[n]), p, n);
12288                 mb_char2bytes(c, p);
12289             }
12290             else
12291 #endif
12292             {
12293                 c = *p;
12294                 *p = p[1];
12295                 p[1] = c;
12296             }
12297             /*FALLTHROUGH*/
12298
12299         case STATE_SWAP3:
12300             /* Swap two bytes, skipping one: "123" -> "321".  We change
12301              * "fword" here, it's changed back afterwards at STATE_UNSWAP3. */
12302             p = fword + sp->ts_fidx;
12303 #ifdef FEAT_MBYTE
12304             if (has_mbyte)
12305             {
12306                 n = mb_cptr2len(p);
12307                 c = mb_ptr2char(p);
12308                 fl = mb_cptr2len(p + n);
12309                 c2 = mb_ptr2char(p + n);
12310                 if (!soundfold && !spell_iswordp(p + n + fl, curbuf))
12311                     c3 = c;     /* don't swap non-word char */
12312                 else
12313                     c3 = mb_ptr2char(p + n + fl);
12314             }
12315             else
12316 #endif
12317             {
12318                 c = *p;
12319                 c2 = p[1];
12320                 if (!soundfold && !spell_iswordp(p + 2, curbuf))
12321                     c3 = c;     /* don't swap non-word char */
12322                 else
12323                     c3 = p[2];
12324             }
12325
12326             /* When characters are identical: "121" then SWAP3 result is
12327              * identical, ROT3L result is same as SWAP: "211", ROT3L result is
12328              * same as SWAP on next char: "112".  Thus skip all swapping.
12329              * Also skip when c3 is NUL.
12330              * Also get here when the third character is not a word character.
12331              * Second character may any char: "a.b" -> "b.a" */
12332             if (c == c3 || c3 == NUL)
12333             {
12334                 sp->ts_state = STATE_REP_INI;
12335                 break;
12336             }
12337             if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
12338             {
12339                 go_deeper(stack, depth, SCORE_SWAP3);
12340 #ifdef DEBUG_TRIEWALK
12341                 sprintf(changename[depth], "%.*s-%s: swap3 %c and %c",
12342                         sp->ts_twordlen, tword, fword + sp->ts_fidx,
12343                         c, c3);
12344 #endif
12345                 sp->ts_state = STATE_UNSWAP3;
12346                 ++depth;
12347 #ifdef FEAT_MBYTE
12348                 if (has_mbyte)
12349                 {
12350                     tl = mb_char2len(c3);
12351                     mch_memmove(p, p + n + fl, tl);
12352                     mb_char2bytes(c2, p + tl);
12353                     mb_char2bytes(c, p + fl + tl);
12354                     stack[depth].ts_fidxtry = sp->ts_fidx + n + fl + tl;
12355                 }
12356                 else
12357 #endif
12358                 {
12359                     p[0] = p[2];
12360                     p[2] = c;
12361                     stack[depth].ts_fidxtry = sp->ts_fidx + 3;
12362                 }
12363             }
12364             else
12365                 sp->ts_state = STATE_REP_INI;
12366             break;
12367
12368         case STATE_UNSWAP3:
12369             /* Undo STATE_SWAP3: "321" -> "123" */
12370             p = fword + sp->ts_fidx;
12371 #ifdef FEAT_MBYTE
12372             if (has_mbyte)
12373             {
12374                 n = MB_BYTE2LEN(*p);
12375                 c2 = mb_ptr2char(p + n);
12376                 fl = MB_BYTE2LEN(p[n]);
12377                 c = mb_ptr2char(p + n + fl);
12378                 tl = MB_BYTE2LEN(p[n + fl]);
12379                 mch_memmove(p + fl + tl, p, n);
12380                 mb_char2bytes(c, p);
12381                 mb_char2bytes(c2, p + tl);
12382                 p = p + tl;
12383             }
12384             else
12385 #endif
12386             {
12387                 c = *p;
12388                 *p = p[2];
12389                 p[2] = c;
12390                 ++p;
12391             }
12392
12393             if (!soundfold && !spell_iswordp(p, curbuf))
12394             {
12395                 /* Middle char is not a word char, skip the rotate.  First and
12396                  * third char were already checked at swap and swap3. */
12397                 sp->ts_state = STATE_REP_INI;
12398                 break;
12399             }
12400
12401             /* Rotate three characters left: "123" -> "231".  We change
12402              * "fword" here, it's changed back afterwards at STATE_UNROT3L. */
12403             if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
12404             {
12405                 go_deeper(stack, depth, SCORE_SWAP3);
12406 #ifdef DEBUG_TRIEWALK
12407                 p = fword + sp->ts_fidx;
12408                 sprintf(changename[depth], "%.*s-%s: rotate left %c%c%c",
12409                         sp->ts_twordlen, tword, fword + sp->ts_fidx,
12410                         p[0], p[1], p[2]);
12411 #endif
12412                 sp->ts_state = STATE_UNROT3L;
12413                 ++depth;
12414                 p = fword + sp->ts_fidx;
12415 #ifdef FEAT_MBYTE
12416                 if (has_mbyte)
12417                 {
12418                     n = mb_cptr2len(p);
12419                     c = mb_ptr2char(p);
12420                     fl = mb_cptr2len(p + n);
12421                     fl += mb_cptr2len(p + n + fl);
12422                     mch_memmove(p, p + n, fl);
12423                     mb_char2bytes(c, p + fl);
12424                     stack[depth].ts_fidxtry = sp->ts_fidx + n + fl;
12425                 }
12426                 else
12427 #endif
12428                 {
12429                     c = *p;
12430                     *p = p[1];
12431                     p[1] = p[2];
12432                     p[2] = c;
12433                     stack[depth].ts_fidxtry = sp->ts_fidx + 3;
12434                 }
12435             }
12436             else
12437                 sp->ts_state = STATE_REP_INI;
12438             break;
12439
12440         case STATE_UNROT3L:
12441             /* Undo ROT3L: "231" -> "123" */
12442             p = fword + sp->ts_fidx;
12443 #ifdef FEAT_MBYTE
12444             if (has_mbyte)
12445             {
12446                 n = MB_BYTE2LEN(*p);
12447                 n += MB_BYTE2LEN(p[n]);
12448                 c = mb_ptr2char(p + n);
12449                 tl = MB_BYTE2LEN(p[n]);
12450                 mch_memmove(p + tl, p, n);
12451                 mb_char2bytes(c, p);
12452             }
12453             else
12454 #endif
12455             {
12456                 c = p[2];
12457                 p[2] = p[1];
12458                 p[1] = *p;
12459                 *p = c;
12460             }
12461
12462             /* Rotate three bytes right: "123" -> "312".  We change "fword"
12463              * here, it's changed back afterwards at STATE_UNROT3R. */
12464             if (TRY_DEEPER(su, stack, depth, SCORE_SWAP3))
12465             {
12466                 go_deeper(stack, depth, SCORE_SWAP3);
12467 #ifdef DEBUG_TRIEWALK
12468                 p = fword + sp->ts_fidx;
12469                 sprintf(changename[depth], "%.*s-%s: rotate right %c%c%c",
12470                         sp->ts_twordlen, tword, fword + sp->ts_fidx,
12471                         p[0], p[1], p[2]);
12472 #endif
12473                 sp->ts_state = STATE_UNROT3R;
12474                 ++depth;
12475                 p = fword + sp->ts_fidx;
12476 #ifdef FEAT_MBYTE
12477                 if (has_mbyte)
12478                 {
12479                     n = mb_cptr2len(p);
12480                     n += mb_cptr2len(p + n);
12481                     c = mb_ptr2char(p + n);
12482                     tl = mb_cptr2len(p + n);
12483                     mch_memmove(p + tl, p, n);
12484                     mb_char2bytes(c, p);
12485                     stack[depth].ts_fidxtry = sp->ts_fidx + n + tl;
12486                 }
12487                 else
12488 #endif
12489                 {
12490                     c = p[2];
12491                     p[2] = p[1];
12492                     p[1] = *p;
12493                     *p = c;
12494                     stack[depth].ts_fidxtry = sp->ts_fidx + 3;
12495                 }
12496             }
12497             else
12498                 sp->ts_state = STATE_REP_INI;
12499             break;
12500
12501         case STATE_UNROT3R:
12502             /* Undo ROT3R: "312" -> "123" */
12503             p = fword + sp->ts_fidx;
12504 #ifdef FEAT_MBYTE
12505             if (has_mbyte)
12506             {
12507                 c = mb_ptr2char(p);
12508                 tl = MB_BYTE2LEN(*p);
12509                 n = MB_BYTE2LEN(p[tl]);
12510                 n += MB_BYTE2LEN(p[tl + n]);
12511                 mch_memmove(p, p + tl, n);
12512                 mb_char2bytes(c, p + n);
12513             }
12514             else
12515 #endif
12516             {
12517                 c = *p;
12518                 *p = p[1];
12519                 p[1] = p[2];
12520                 p[2] = c;
12521             }
12522             /*FALLTHROUGH*/
12523
12524         case STATE_REP_INI:
12525             /* Check if matching with REP items from the .aff file would work.
12526              * Quickly skip if:
12527              * - there are no REP items and we are not in the soundfold trie
12528              * - the score is going to be too high anyway
12529              * - already applied a REP item or swapped here  */
12530             if ((lp->lp_replang == NULL && !soundfold)
12531                     || sp->ts_score + SCORE_REP >= su->su_maxscore
12532                     || sp->ts_fidx < sp->ts_fidxtry)
12533             {
12534                 sp->ts_state = STATE_FINAL;
12535                 break;
12536             }
12537
12538             /* Use the first byte to quickly find the first entry that may
12539              * match.  If the index is -1 there is none. */
12540             if (soundfold)
12541                 sp->ts_curi = slang->sl_repsal_first[fword[sp->ts_fidx]];
12542             else
12543                 sp->ts_curi = lp->lp_replang->sl_rep_first[fword[sp->ts_fidx]];
12544
12545             if (sp->ts_curi < 0)
12546             {
12547                 sp->ts_state = STATE_FINAL;
12548                 break;
12549             }
12550
12551             sp->ts_state = STATE_REP;
12552             /*FALLTHROUGH*/
12553
12554         case STATE_REP:
12555             /* Try matching with REP items from the .aff file.  For each match
12556              * replace the characters and check if the resulting word is
12557              * valid. */
12558             p = fword + sp->ts_fidx;
12559
12560             if (soundfold)
12561                 gap = &slang->sl_repsal;
12562             else
12563                 gap = &lp->lp_replang->sl_rep;
12564             while (sp->ts_curi < gap->ga_len)
12565             {
12566                 ftp = (fromto_T *)gap->ga_data + sp->ts_curi++;
12567                 if (*ftp->ft_from != *p)
12568                 {
12569                     /* past possible matching entries */
12570                     sp->ts_curi = gap->ga_len;
12571                     break;
12572                 }
12573                 if (STRNCMP(ftp->ft_from, p, STRLEN(ftp->ft_from)) == 0
12574                         && TRY_DEEPER(su, stack, depth, SCORE_REP))
12575                 {
12576                     go_deeper(stack, depth, SCORE_REP);
12577 #ifdef DEBUG_TRIEWALK
12578                     sprintf(changename[depth], "%.*s-%s: replace %s with %s",
12579                             sp->ts_twordlen, tword, fword + sp->ts_fidx,
12580                             ftp->ft_from, ftp->ft_to);
12581 #endif
12582                     /* Need to undo this afterwards. */
12583                     sp->ts_state = STATE_REP_UNDO;
12584
12585                     /* Change the "from" to the "to" string. */
12586                     ++depth;
12587                     fl = (int)STRLEN(ftp->ft_from);
12588                     tl = (int)STRLEN(ftp->ft_to);
12589                     if (fl != tl)
12590                     {
12591                         STRMOVE(p + tl, p + fl);
12592                         repextra += tl - fl;
12593                     }
12594                     mch_memmove(p, ftp->ft_to, tl);
12595                     stack[depth].ts_fidxtry = sp->ts_fidx + tl;
12596 #ifdef FEAT_MBYTE
12597                     stack[depth].ts_tcharlen = 0;
12598 #endif
12599                     break;
12600                 }
12601             }
12602
12603             if (sp->ts_curi >= gap->ga_len && sp->ts_state == STATE_REP)
12604                 /* No (more) matches. */
12605                 sp->ts_state = STATE_FINAL;
12606
12607             break;
12608
12609         case STATE_REP_UNDO:
12610             /* Undo a REP replacement and continue with the next one. */
12611             if (soundfold)
12612                 gap = &slang->sl_repsal;
12613             else
12614                 gap = &lp->lp_replang->sl_rep;
12615             ftp = (fromto_T *)gap->ga_data + sp->ts_curi - 1;
12616             fl = (int)STRLEN(ftp->ft_from);
12617             tl = (int)STRLEN(ftp->ft_to);
12618             p = fword + sp->ts_fidx;
12619             if (fl != tl)
12620             {
12621                 STRMOVE(p + fl, p + tl);
12622                 repextra -= tl - fl;
12623             }
12624             mch_memmove(p, ftp->ft_from, fl);
12625             sp->ts_state = STATE_REP;
12626             break;
12627
12628         default:
12629             /* Did all possible states at this level, go up one level. */
12630             --depth;
12631
12632             if (depth >= 0 && stack[depth].ts_prefixdepth == PFD_PREFIXTREE)
12633             {
12634                 /* Continue in or go back to the prefix tree. */
12635                 byts = pbyts;
12636                 idxs = pidxs;
12637             }
12638
12639             /* Don't check for CTRL-C too often, it takes time. */
12640             if (--breakcheckcount == 0)
12641             {
12642                 ui_breakcheck();
12643                 breakcheckcount = 1000;
12644             }
12645         }
12646     }
12647 }
12648
12649
12650 /*
12651  * Go one level deeper in the tree.
12652  */
12653     static void
12654 go_deeper(stack, depth, score_add)
12655     trystate_T  *stack;
12656     int         depth;
12657     int         score_add;
12658 {
12659     stack[depth + 1] = stack[depth];
12660     stack[depth + 1].ts_state = STATE_START;
12661     stack[depth + 1].ts_score = stack[depth].ts_score + score_add;
12662     stack[depth + 1].ts_curi = 1;       /* start just after length byte */
12663     stack[depth + 1].ts_flags = 0;
12664 }
12665
12666 #ifdef FEAT_MBYTE
12667 /*
12668  * Case-folding may change the number of bytes: Count nr of chars in
12669  * fword[flen] and return the byte length of that many chars in "word".
12670  */
12671     static int
12672 nofold_len(fword, flen, word)
12673     char_u      *fword;
12674     int         flen;
12675     char_u      *word;
12676 {
12677     char_u      *p;
12678     int         i = 0;
12679
12680     for (p = fword; p < fword + flen; mb_ptr_adv(p))
12681         ++i;
12682     for (p = word; i > 0; mb_ptr_adv(p))
12683         --i;
12684     return (int)(p - word);
12685 }
12686 #endif
12687
12688 /*
12689  * "fword" is a good word with case folded.  Find the matching keep-case
12690  * words and put it in "kword".
12691  * Theoretically there could be several keep-case words that result in the
12692  * same case-folded word, but we only find one...
12693  */
12694     static void
12695 find_keepcap_word(slang, fword, kword)
12696     slang_T     *slang;
12697     char_u      *fword;
12698     char_u      *kword;
12699 {
12700     char_u      uword[MAXWLEN];         /* "fword" in upper-case */
12701     int         depth;
12702     idx_T       tryidx;
12703
12704     /* The following arrays are used at each depth in the tree. */
12705     idx_T       arridx[MAXWLEN];
12706     int         round[MAXWLEN];
12707     int         fwordidx[MAXWLEN];
12708     int         uwordidx[MAXWLEN];
12709     int         kwordlen[MAXWLEN];
12710
12711     int         flen, ulen;
12712     int         l;
12713     int         len;
12714     int         c;
12715     idx_T       lo, hi, m;
12716     char_u      *p;
12717     char_u      *byts = slang->sl_kbyts;    /* array with bytes of the words */
12718     idx_T       *idxs = slang->sl_kidxs;    /* array with indexes */
12719
12720     if (byts == NULL)
12721     {
12722         /* array is empty: "cannot happen" */
12723         *kword = NUL;
12724         return;
12725     }
12726
12727     /* Make an all-cap version of "fword". */
12728     allcap_copy(fword, uword);
12729
12730     /*
12731      * Each character needs to be tried both case-folded and upper-case.
12732      * All this gets very complicated if we keep in mind that changing case
12733      * may change the byte length of a multi-byte character...
12734      */
12735     depth = 0;
12736     arridx[0] = 0;
12737     round[0] = 0;
12738     fwordidx[0] = 0;
12739     uwordidx[0] = 0;
12740     kwordlen[0] = 0;
12741     while (depth >= 0)
12742     {
12743         if (fword[fwordidx[depth]] == NUL)
12744         {
12745             /* We are at the end of "fword".  If the tree allows a word to end
12746              * here we have found a match. */
12747             if (byts[arridx[depth] + 1] == 0)
12748             {
12749                 kword[kwordlen[depth]] = NUL;
12750                 return;
12751             }
12752
12753             /* kword is getting too long, continue one level up */
12754             --depth;
12755         }
12756         else if (++round[depth] > 2)
12757         {
12758             /* tried both fold-case and upper-case character, continue one
12759              * level up */
12760             --depth;
12761         }
12762         else
12763         {
12764             /*
12765              * round[depth] == 1: Try using the folded-case character.
12766              * round[depth] == 2: Try using the upper-case character.
12767              */
12768 #ifdef FEAT_MBYTE
12769             if (has_mbyte)
12770             {
12771                 flen = mb_cptr2len(fword + fwordidx[depth]);
12772                 ulen = mb_cptr2len(uword + uwordidx[depth]);
12773             }
12774             else
12775 #endif
12776                 ulen = flen = 1;
12777             if (round[depth] == 1)
12778             {
12779                 p = fword + fwordidx[depth];
12780                 l = flen;
12781             }
12782             else
12783             {
12784                 p = uword + uwordidx[depth];
12785                 l = ulen;
12786             }
12787
12788             for (tryidx = arridx[depth]; l > 0; --l)
12789             {
12790                 /* Perform a binary search in the list of accepted bytes. */
12791                 len = byts[tryidx++];
12792                 c = *p++;
12793                 lo = tryidx;
12794                 hi = tryidx + len - 1;
12795                 while (lo < hi)
12796                 {
12797                     m = (lo + hi) / 2;
12798                     if (byts[m] > c)
12799                         hi = m - 1;
12800                     else if (byts[m] < c)
12801                         lo = m + 1;
12802                     else
12803                     {
12804                         lo = hi = m;
12805                         break;
12806                     }
12807                 }
12808
12809                 /* Stop if there is no matching byte. */
12810                 if (hi < lo || byts[lo] != c)
12811                     break;
12812
12813                 /* Continue at the child (if there is one). */
12814                 tryidx = idxs[lo];
12815             }
12816
12817             if (l == 0)
12818             {
12819                 /*
12820                  * Found the matching char.  Copy it to "kword" and go a
12821                  * level deeper.
12822                  */
12823                 if (round[depth] == 1)
12824                 {
12825                     STRNCPY(kword + kwordlen[depth], fword + fwordidx[depth],
12826                                                                         flen);
12827                     kwordlen[depth + 1] = kwordlen[depth] + flen;
12828                 }
12829                 else
12830                 {
12831                     STRNCPY(kword + kwordlen[depth], uword + uwordidx[depth],
12832                                                                         ulen);
12833                     kwordlen[depth + 1] = kwordlen[depth] + ulen;
12834                 }
12835                 fwordidx[depth + 1] = fwordidx[depth] + flen;
12836                 uwordidx[depth + 1] = uwordidx[depth] + ulen;
12837
12838                 ++depth;
12839                 arridx[depth] = tryidx;
12840                 round[depth] = 0;
12841             }
12842         }
12843     }
12844
12845     /* Didn't find it: "cannot happen". */
12846     *kword = NUL;
12847 }
12848
12849 /*
12850  * Compute the sound-a-like score for suggestions in su->su_ga and add them to
12851  * su->su_sga.
12852  */
12853     static void
12854 score_comp_sal(su)
12855     suginfo_T   *su;
12856 {
12857     langp_T     *lp;
12858     char_u      badsound[MAXWLEN];
12859     int         i;
12860     suggest_T   *stp;
12861     suggest_T   *sstp;
12862     int         score;
12863     int         lpi;
12864
12865     if (ga_grow(&su->su_sga, su->su_ga.ga_len) == FAIL)
12866         return;
12867
12868     /*  Use the sound-folding of the first language that supports it. */
12869     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12870     {
12871         lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12872         if (lp->lp_slang->sl_sal.ga_len > 0)
12873         {
12874             /* soundfold the bad word */
12875             spell_soundfold(lp->lp_slang, su->su_fbadword, TRUE, badsound);
12876
12877             for (i = 0; i < su->su_ga.ga_len; ++i)
12878             {
12879                 stp = &SUG(su->su_ga, i);
12880
12881                 /* Case-fold the suggested word, sound-fold it and compute the
12882                  * sound-a-like score. */
12883                 score = stp_sal_score(stp, su, lp->lp_slang, badsound);
12884                 if (score < SCORE_MAXMAX)
12885                 {
12886                     /* Add the suggestion. */
12887                     sstp = &SUG(su->su_sga, su->su_sga.ga_len);
12888                     sstp->st_word = vim_strsave(stp->st_word);
12889                     if (sstp->st_word != NULL)
12890                     {
12891                         sstp->st_wordlen = stp->st_wordlen;
12892                         sstp->st_score = score;
12893                         sstp->st_altscore = 0;
12894                         sstp->st_orglen = stp->st_orglen;
12895                         ++su->su_sga.ga_len;
12896                     }
12897                 }
12898             }
12899             break;
12900         }
12901     }
12902 }
12903
12904 /*
12905  * Combine the list of suggestions in su->su_ga and su->su_sga.
12906  * They are intwined.
12907  */
12908     static void
12909 score_combine(su)
12910     suginfo_T   *su;
12911 {
12912     int         i;
12913     int         j;
12914     garray_T    ga;
12915     garray_T    *gap;
12916     langp_T     *lp;
12917     suggest_T   *stp;
12918     char_u      *p;
12919     char_u      badsound[MAXWLEN];
12920     int         round;
12921     int         lpi;
12922     slang_T     *slang = NULL;
12923
12924     /* Add the alternate score to su_ga. */
12925     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
12926     {
12927         lp = LANGP_ENTRY(curbuf->b_langp, lpi);
12928         if (lp->lp_slang->sl_sal.ga_len > 0)
12929         {
12930             /* soundfold the bad word */
12931             slang = lp->lp_slang;
12932             spell_soundfold(slang, su->su_fbadword, TRUE, badsound);
12933
12934             for (i = 0; i < su->su_ga.ga_len; ++i)
12935             {
12936                 stp = &SUG(su->su_ga, i);
12937                 stp->st_altscore = stp_sal_score(stp, su, slang, badsound);
12938                 if (stp->st_altscore == SCORE_MAXMAX)
12939                     stp->st_score = (stp->st_score * 3 + SCORE_BIG) / 4;
12940                 else
12941                     stp->st_score = (stp->st_score * 3
12942                                                   + stp->st_altscore) / 4;
12943                 stp->st_salscore = FALSE;
12944             }
12945             break;
12946         }
12947     }
12948
12949     if (slang == NULL)  /* Using "double" without sound folding. */
12950     {
12951         (void)cleanup_suggestions(&su->su_ga, su->su_maxscore,
12952                                                              su->su_maxcount);
12953         return;
12954     }
12955
12956     /* Add the alternate score to su_sga. */
12957     for (i = 0; i < su->su_sga.ga_len; ++i)
12958     {
12959         stp = &SUG(su->su_sga, i);
12960         stp->st_altscore = spell_edit_score(slang,
12961                                                 su->su_badword, stp->st_word);
12962         if (stp->st_score == SCORE_MAXMAX)
12963             stp->st_score = (SCORE_BIG * 7 + stp->st_altscore) / 8;
12964         else
12965             stp->st_score = (stp->st_score * 7 + stp->st_altscore) / 8;
12966         stp->st_salscore = TRUE;
12967     }
12968
12969     /* Remove bad suggestions, sort the suggestions and truncate at "maxcount"
12970      * for both lists. */
12971     check_suggestions(su, &su->su_ga);
12972     (void)cleanup_suggestions(&su->su_ga, su->su_maxscore, su->su_maxcount);
12973     check_suggestions(su, &su->su_sga);
12974     (void)cleanup_suggestions(&su->su_sga, su->su_maxscore, su->su_maxcount);
12975
12976     ga_init2(&ga, (int)sizeof(suginfo_T), 1);
12977     if (ga_grow(&ga, su->su_ga.ga_len + su->su_sga.ga_len) == FAIL)
12978         return;
12979
12980     stp = &SUG(ga, 0);
12981     for (i = 0; i < su->su_ga.ga_len || i < su->su_sga.ga_len; ++i)
12982     {
12983         /* round 1: get a suggestion from su_ga
12984          * round 2: get a suggestion from su_sga */
12985         for (round = 1; round <= 2; ++round)
12986         {
12987             gap = round == 1 ? &su->su_ga : &su->su_sga;
12988             if (i < gap->ga_len)
12989             {
12990                 /* Don't add a word if it's already there. */
12991                 p = SUG(*gap, i).st_word;
12992                 for (j = 0; j < ga.ga_len; ++j)
12993                     if (STRCMP(stp[j].st_word, p) == 0)
12994                         break;
12995                 if (j == ga.ga_len)
12996                     stp[ga.ga_len++] = SUG(*gap, i);
12997                 else
12998                     vim_free(p);
12999             }
13000         }
13001     }
13002
13003     ga_clear(&su->su_ga);
13004     ga_clear(&su->su_sga);
13005
13006     /* Truncate the list to the number of suggestions that will be displayed. */
13007     if (ga.ga_len > su->su_maxcount)
13008     {
13009         for (i = su->su_maxcount; i < ga.ga_len; ++i)
13010             vim_free(stp[i].st_word);
13011         ga.ga_len = su->su_maxcount;
13012     }
13013
13014     su->su_ga = ga;
13015 }
13016
13017 /*
13018  * For the goodword in "stp" compute the soundalike score compared to the
13019  * badword.
13020  */
13021     static int
13022 stp_sal_score(stp, su, slang, badsound)
13023     suggest_T   *stp;
13024     suginfo_T   *su;
13025     slang_T     *slang;
13026     char_u      *badsound;      /* sound-folded badword */
13027 {
13028     char_u      *p;
13029     char_u      *pbad;
13030     char_u      *pgood;
13031     char_u      badsound2[MAXWLEN];
13032     char_u      fword[MAXWLEN];
13033     char_u      goodsound[MAXWLEN];
13034     char_u      goodword[MAXWLEN];
13035     int         lendiff;
13036
13037     lendiff = (int)(su->su_badlen - stp->st_orglen);
13038     if (lendiff >= 0)
13039         pbad = badsound;
13040     else
13041     {
13042         /* soundfold the bad word with more characters following */
13043         (void)spell_casefold(su->su_badptr, stp->st_orglen, fword, MAXWLEN);
13044
13045         /* When joining two words the sound often changes a lot.  E.g., "t he"
13046          * sounds like "t h" while "the" sounds like "@".  Avoid that by
13047          * removing the space.  Don't do it when the good word also contains a
13048          * space. */
13049         if (vim_iswhite(su->su_badptr[su->su_badlen])
13050                                          && *skiptowhite(stp->st_word) == NUL)
13051             for (p = fword; *(p = skiptowhite(p)) != NUL; )
13052                 STRMOVE(p, p + 1);
13053
13054         spell_soundfold(slang, fword, TRUE, badsound2);
13055         pbad = badsound2;
13056     }
13057
13058     if (lendiff > 0)
13059     {
13060         /* Add part of the bad word to the good word, so that we soundfold
13061          * what replaces the bad word. */
13062         STRCPY(goodword, stp->st_word);
13063         vim_strncpy(goodword + stp->st_wordlen,
13064                             su->su_badptr + su->su_badlen - lendiff, lendiff);
13065         pgood = goodword;
13066     }
13067     else
13068         pgood = stp->st_word;
13069
13070     /* Sound-fold the word and compute the score for the difference. */
13071     spell_soundfold(slang, pgood, FALSE, goodsound);
13072
13073     return soundalike_score(goodsound, pbad);
13074 }
13075
13076 /* structure used to store soundfolded words that add_sound_suggest() has
13077  * handled already. */
13078 typedef struct
13079 {
13080     short       sft_score;      /* lowest score used */
13081     char_u      sft_word[1];    /* soundfolded word, actually longer */
13082 } sftword_T;
13083
13084 static sftword_T dumsft;
13085 #define HIKEY2SFT(p)  ((sftword_T *)(p - (dumsft.sft_word - (char_u *)&dumsft)))
13086 #define HI2SFT(hi)     HIKEY2SFT((hi)->hi_key)
13087
13088 /*
13089  * Prepare for calling suggest_try_soundalike().
13090  */
13091     static void
13092 suggest_try_soundalike_prep()
13093 {
13094     langp_T     *lp;
13095     int         lpi;
13096     slang_T     *slang;
13097
13098     /* Do this for all languages that support sound folding and for which a
13099      * .sug file has been loaded. */
13100     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
13101     {
13102         lp = LANGP_ENTRY(curbuf->b_langp, lpi);
13103         slang = lp->lp_slang;
13104         if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
13105             /* prepare the hashtable used by add_sound_suggest() */
13106             hash_init(&slang->sl_sounddone);
13107     }
13108 }
13109
13110 /*
13111  * Find suggestions by comparing the word in a sound-a-like form.
13112  * Note: This doesn't support postponed prefixes.
13113  */
13114     static void
13115 suggest_try_soundalike(su)
13116     suginfo_T   *su;
13117 {
13118     char_u      salword[MAXWLEN];
13119     langp_T     *lp;
13120     int         lpi;
13121     slang_T     *slang;
13122
13123     /* Do this for all languages that support sound folding and for which a
13124      * .sug file has been loaded. */
13125     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
13126     {
13127         lp = LANGP_ENTRY(curbuf->b_langp, lpi);
13128         slang = lp->lp_slang;
13129         if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
13130         {
13131             /* soundfold the bad word */
13132             spell_soundfold(slang, su->su_fbadword, TRUE, salword);
13133
13134             /* try all kinds of inserts/deletes/swaps/etc. */
13135             /* TODO: also soundfold the next words, so that we can try joining
13136              * and splitting */
13137             suggest_trie_walk(su, lp, salword, TRUE);
13138         }
13139     }
13140 }
13141
13142 /*
13143  * Finish up after calling suggest_try_soundalike().
13144  */
13145     static void
13146 suggest_try_soundalike_finish()
13147 {
13148     langp_T     *lp;
13149     int         lpi;
13150     slang_T     *slang;
13151     int         todo;
13152     hashitem_T  *hi;
13153
13154     /* Do this for all languages that support sound folding and for which a
13155      * .sug file has been loaded. */
13156     for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
13157     {
13158         lp = LANGP_ENTRY(curbuf->b_langp, lpi);
13159         slang = lp->lp_slang;
13160         if (slang->sl_sal.ga_len > 0 && slang->sl_sbyts != NULL)
13161         {
13162             /* Free the info about handled words. */
13163             todo = (int)slang->sl_sounddone.ht_used;
13164             for (hi = slang->sl_sounddone.ht_array; todo > 0; ++hi)
13165                 if (!HASHITEM_EMPTY(hi))
13166                 {
13167                     vim_free(HI2SFT(hi));
13168                     --todo;
13169                 }
13170
13171             /* Clear the hashtable, it may also be used by another region. */
13172             hash_clear(&slang->sl_sounddone);
13173             hash_init(&slang->sl_sounddone);
13174         }
13175     }
13176 }
13177
13178 /*
13179  * A match with a soundfolded word is found.  Add the good word(s) that
13180  * produce this soundfolded word.
13181  */
13182     static void
13183 add_sound_suggest(su, goodword, score, lp)
13184     suginfo_T   *su;
13185     char_u      *goodword;
13186     int         score;          /* soundfold score  */
13187     langp_T     *lp;
13188 {
13189     slang_T     *slang = lp->lp_slang;  /* language for sound folding */
13190     int         sfwordnr;
13191     char_u      *nrline;
13192     int         orgnr;
13193     char_u      theword[MAXWLEN];
13194     int         i;
13195     int         wlen;
13196     char_u      *byts;
13197     idx_T       *idxs;
13198     int         n;
13199     int         wordcount;
13200     int         wc;
13201     int         goodscore;
13202     hash_T      hash;
13203     hashitem_T  *hi;
13204     sftword_T   *sft;
13205     int         bc, gc;
13206     int         limit;
13207
13208     /*
13209      * It's very well possible that the same soundfold word is found several
13210      * times with different scores.  Since the following is quite slow only do
13211      * the words that have a better score than before.  Use a hashtable to
13212      * remember the words that have been done.
13213      */
13214     hash = hash_hash(goodword);
13215     hi = hash_lookup(&slang->sl_sounddone, goodword, hash);
13216     if (HASHITEM_EMPTY(hi))
13217     {
13218         sft = (sftword_T *)alloc((unsigned)(sizeof(sftword_T)
13219                                                          + STRLEN(goodword)));
13220         if (sft != NULL)
13221         {
13222             sft->sft_score = score;
13223             STRCPY(sft->sft_word, goodword);
13224             hash_add_item(&slang->sl_sounddone, hi, sft->sft_word, hash);
13225         }
13226     }
13227     else
13228     {
13229         sft = HI2SFT(hi);
13230         if (score >= sft->sft_score)
13231             return;
13232         sft->sft_score = score;
13233     }
13234
13235     /*
13236      * Find the word nr in the soundfold tree.
13237      */
13238     sfwordnr = soundfold_find(slang, goodword);
13239     if (sfwordnr < 0)
13240     {
13241         EMSG2(_(e_intern2), "add_sound_suggest()");
13242         return;
13243     }
13244
13245     /*
13246      * go over the list of good words that produce this soundfold word
13247      */
13248     nrline = ml_get_buf(slang->sl_sugbuf, (linenr_T)(sfwordnr + 1), FALSE);
13249     orgnr = 0;
13250     while (*nrline != NUL)
13251     {
13252         /* The wordnr was stored in a minimal nr of bytes as an offset to the
13253          * previous wordnr. */
13254         orgnr += bytes2offset(&nrline);
13255
13256         byts = slang->sl_fbyts;
13257         idxs = slang->sl_fidxs;
13258
13259         /* Lookup the word "orgnr" one of the two tries. */
13260         n = 0;
13261         wlen = 0;
13262         wordcount = 0;
13263         for (;;)
13264         {
13265             i = 1;
13266             if (wordcount == orgnr && byts[n + 1] == NUL)
13267                 break;  /* found end of word */
13268
13269             if (byts[n + 1] == NUL)
13270                 ++wordcount;
13271
13272             /* skip over the NUL bytes */
13273             for ( ; byts[n + i] == NUL; ++i)
13274                 if (i > byts[n])        /* safety check */
13275                 {
13276                     STRCPY(theword + wlen, "BAD");
13277                     goto badword;
13278                 }
13279
13280             /* One of the siblings must have the word. */
13281             for ( ; i < byts[n]; ++i)
13282             {
13283                 wc = idxs[idxs[n + i]]; /* nr of words under this byte */
13284                 if (wordcount + wc > orgnr)
13285                     break;
13286                 wordcount += wc;
13287             }
13288
13289             theword[wlen++] = byts[n + i];
13290             n = idxs[n + i];
13291         }
13292 badword:
13293         theword[wlen] = NUL;
13294
13295         /* Go over the possible flags and regions. */
13296         for (; i <= byts[n] && byts[n + i] == NUL; ++i)
13297         {
13298             char_u      cword[MAXWLEN];
13299             char_u      *p;
13300             int         flags = (int)idxs[n + i];
13301
13302             /* Skip words with the NOSUGGEST flag */
13303             if (flags & WF_NOSUGGEST)
13304                 continue;
13305
13306             if (flags & WF_KEEPCAP)
13307             {
13308                 /* Must find the word in the keep-case tree. */
13309                 find_keepcap_word(slang, theword, cword);
13310                 p = cword;
13311             }
13312             else
13313             {
13314                 flags |= su->su_badflags;
13315                 if ((flags & WF_CAPMASK) != 0)
13316                 {
13317                     /* Need to fix case according to "flags". */
13318                     make_case_word(theword, cword, flags);
13319                     p = cword;
13320                 }
13321                 else
13322                     p = theword;
13323             }
13324
13325             /* Add the suggestion. */
13326             if (sps_flags & SPS_DOUBLE)
13327             {
13328                 /* Add the suggestion if the score isn't too bad. */
13329                 if (score <= su->su_maxscore)
13330                     add_suggestion(su, &su->su_sga, p, su->su_badlen,
13331                                                score, 0, FALSE, slang, FALSE);
13332             }
13333             else
13334             {
13335                 /* Add a penalty for words in another region. */
13336                 if ((flags & WF_REGION)
13337                             && (((unsigned)flags >> 16) & lp->lp_region) == 0)
13338                     goodscore = SCORE_REGION;
13339                 else
13340                     goodscore = 0;
13341
13342                 /* Add a small penalty for changing the first letter from
13343                  * lower to upper case.  Helps for "tath" -> "Kath", which is
13344                  * less common thatn "tath" -> "path".  Don't do it when the
13345                  * letter is the same, that has already been counted. */
13346                 gc = PTR2CHAR(p);
13347                 if (SPELL_ISUPPER(gc))
13348                 {
13349                     bc = PTR2CHAR(su->su_badword);
13350                     if (!SPELL_ISUPPER(bc)
13351                                       && SPELL_TOFOLD(bc) != SPELL_TOFOLD(gc))
13352                         goodscore += SCORE_ICASE / 2;
13353                 }
13354
13355                 /* Compute the score for the good word.  This only does letter
13356                  * insert/delete/swap/replace.  REP items are not considered,
13357                  * which may make the score a bit higher.
13358                  * Use a limit for the score to make it work faster.  Use
13359                  * MAXSCORE(), because RESCORE() will change the score.
13360                  * If the limit is very high then the iterative method is
13361                  * inefficient, using an array is quicker. */
13362                 limit = MAXSCORE(su->su_sfmaxscore - goodscore, score);
13363                 if (limit > SCORE_LIMITMAX)
13364                     goodscore += spell_edit_score(slang, su->su_badword, p);
13365                 else
13366                     goodscore += spell_edit_score_limit(slang, su->su_badword,
13367                                                                     p, limit);
13368
13369                 /* When going over the limit don't bother to do the rest. */
13370                 if (goodscore < SCORE_MAXMAX)
13371                 {
13372                     /* Give a bonus to words seen before. */
13373                     goodscore = score_wordcount_adj(slang, goodscore, p, FALSE);
13374
13375                     /* Add the suggestion if the score isn't too bad. */
13376                     goodscore = RESCORE(goodscore, score);
13377                     if (goodscore <= su->su_sfmaxscore)
13378                         add_suggestion(su, &su->su_ga, p, su->su_badlen,
13379                                          goodscore, score, TRUE, slang, TRUE);
13380                 }
13381             }
13382         }
13383         /* smsg("word %s (%d): %s (%d)", sftword, sftnr, theword, orgnr); */
13384     }
13385 }
13386
13387 /*
13388  * Find word "word" in fold-case tree for "slang" and return the word number.
13389  */
13390     static int
13391 soundfold_find(slang, word)
13392     slang_T     *slang;
13393     char_u      *word;
13394 {
13395     idx_T       arridx = 0;
13396     int         len;
13397     int         wlen = 0;
13398     int         c;
13399     char_u      *ptr = word;
13400     char_u      *byts;
13401     idx_T       *idxs;
13402     int         wordnr = 0;
13403
13404     byts = slang->sl_sbyts;
13405     idxs = slang->sl_sidxs;
13406
13407     for (;;)
13408     {
13409         /* First byte is the number of possible bytes. */
13410         len = byts[arridx++];
13411
13412         /* If the first possible byte is a zero the word could end here.
13413          * If the word ends we found the word.  If not skip the NUL bytes. */
13414         c = ptr[wlen];
13415         if (byts[arridx] == NUL)
13416         {
13417             if (c == NUL)
13418                 break;
13419
13420             /* Skip over the zeros, there can be several. */
13421             while (len > 0 && byts[arridx] == NUL)
13422             {
13423                 ++arridx;
13424                 --len;
13425             }
13426             if (len == 0)
13427                 return -1;    /* no children, word should have ended here */
13428             ++wordnr;
13429         }
13430
13431         /* If the word ends we didn't find it. */
13432         if (c == NUL)
13433             return -1;
13434
13435         /* Perform a binary search in the list of accepted bytes. */
13436         if (c == TAB)       /* <Tab> is handled like <Space> */
13437             c = ' ';
13438         while (byts[arridx] < c)
13439         {
13440             /* The word count is in the first idxs[] entry of the child. */
13441             wordnr += idxs[idxs[arridx]];
13442             ++arridx;
13443             if (--len == 0)     /* end of the bytes, didn't find it */
13444                 return -1;
13445         }
13446         if (byts[arridx] != c)  /* didn't find the byte */
13447             return -1;
13448
13449         /* Continue at the child (if there is one). */
13450         arridx = idxs[arridx];
13451         ++wlen;
13452
13453         /* One space in the good word may stand for several spaces in the
13454          * checked word. */
13455         if (c == ' ')
13456             while (ptr[wlen] == ' ' || ptr[wlen] == TAB)
13457                 ++wlen;
13458     }
13459
13460     return wordnr;
13461 }
13462
13463 /*
13464  * Copy "fword" to "cword", fixing case according to "flags".
13465  */
13466     static void
13467 make_case_word(fword, cword, flags)
13468     char_u      *fword;
13469     char_u      *cword;
13470     int         flags;
13471 {
13472     if (flags & WF_ALLCAP)
13473         /* Make it all upper-case */
13474         allcap_copy(fword, cword);
13475     else if (flags & WF_ONECAP)
13476         /* Make the first letter upper-case */
13477         onecap_copy(fword, cword, TRUE);
13478     else
13479         /* Use goodword as-is. */
13480         STRCPY(cword, fword);
13481 }
13482
13483 /*
13484  * Use map string "map" for languages "lp".
13485  */
13486     static void
13487 set_map_str(lp, map)
13488     slang_T     *lp;
13489     char_u      *map;
13490 {
13491     char_u      *p;
13492     int         headc = 0;
13493     int         c;
13494     int         i;
13495
13496     if (*map == NUL)
13497     {
13498         lp->sl_has_map = FALSE;
13499         return;
13500     }
13501     lp->sl_has_map = TRUE;
13502
13503     /* Init the array and hash tables empty. */
13504     for (i = 0; i < 256; ++i)
13505         lp->sl_map_array[i] = 0;
13506 #ifdef FEAT_MBYTE
13507     hash_init(&lp->sl_map_hash);
13508 #endif
13509
13510     /*
13511      * The similar characters are stored separated with slashes:
13512      * "aaa/bbb/ccc/".  Fill sl_map_array[c] with the character before c and
13513      * before the same slash.  For characters above 255 sl_map_hash is used.
13514      */
13515     for (p = map; *p != NUL; )
13516     {
13517 #ifdef FEAT_MBYTE
13518         c = mb_cptr2char_adv(&p);
13519 #else
13520         c = *p++;
13521 #endif
13522         if (c == '/')
13523             headc = 0;
13524         else
13525         {
13526             if (headc == 0)
13527                  headc = c;
13528
13529 #ifdef FEAT_MBYTE
13530             /* Characters above 255 don't fit in sl_map_array[], put them in
13531              * the hash table.  Each entry is the char, a NUL the headchar and
13532              * a NUL. */
13533             if (c >= 256)
13534             {
13535                 int         cl = mb_char2len(c);
13536                 int         headcl = mb_char2len(headc);
13537                 char_u      *b;
13538                 hash_T      hash;
13539                 hashitem_T  *hi;
13540
13541                 b = alloc((unsigned)(cl + headcl + 2));
13542                 if (b == NULL)
13543                     return;
13544                 mb_char2bytes(c, b);
13545                 b[cl] = NUL;
13546                 mb_char2bytes(headc, b + cl + 1);
13547                 b[cl + 1 + headcl] = NUL;
13548                 hash = hash_hash(b);
13549                 hi = hash_lookup(&lp->sl_map_hash, b, hash);
13550                 if (HASHITEM_EMPTY(hi))
13551                     hash_add_item(&lp->sl_map_hash, hi, b, hash);
13552                 else
13553                 {
13554                     /* This should have been checked when generating the .spl
13555                      * file. */
13556                     EMSG(_("E783: duplicate char in MAP entry"));
13557                     vim_free(b);
13558                 }
13559             }
13560             else
13561 #endif
13562                 lp->sl_map_array[c] = headc;
13563         }
13564     }
13565 }
13566
13567 /*
13568  * Return TRUE if "c1" and "c2" are similar characters according to the MAP
13569  * lines in the .aff file.
13570  */
13571     static int
13572 similar_chars(slang, c1, c2)
13573     slang_T     *slang;
13574     int         c1;
13575     int         c2;
13576 {
13577     int         m1, m2;
13578 #ifdef FEAT_MBYTE
13579     char_u      buf[MB_MAXBYTES];
13580     hashitem_T  *hi;
13581
13582     if (c1 >= 256)
13583     {
13584         buf[mb_char2bytes(c1, buf)] = 0;
13585         hi = hash_find(&slang->sl_map_hash, buf);
13586         if (HASHITEM_EMPTY(hi))
13587             m1 = 0;
13588         else
13589             m1 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
13590     }
13591     else
13592 #endif
13593         m1 = slang->sl_map_array[c1];
13594     if (m1 == 0)
13595         return FALSE;
13596
13597
13598 #ifdef FEAT_MBYTE
13599     if (c2 >= 256)
13600     {
13601         buf[mb_char2bytes(c2, buf)] = 0;
13602         hi = hash_find(&slang->sl_map_hash, buf);
13603         if (HASHITEM_EMPTY(hi))
13604             m2 = 0;
13605         else
13606             m2 = mb_ptr2char(hi->hi_key + STRLEN(hi->hi_key) + 1);
13607     }
13608     else
13609 #endif
13610         m2 = slang->sl_map_array[c2];
13611
13612     return m1 == m2;
13613 }
13614
13615 /*
13616  * Add a suggestion to the list of suggestions.
13617  * For a suggestion that is already in the list the lowest score is remembered.
13618  */
13619     static void
13620 add_suggestion(su, gap, goodword, badlenarg, score, altscore, had_bonus,
13621                                                                  slang, maxsf)
13622     suginfo_T   *su;
13623     garray_T    *gap;           /* either su_ga or su_sga */
13624     char_u      *goodword;
13625     int         badlenarg;      /* len of bad word replaced with "goodword" */
13626     int         score;
13627     int         altscore;
13628     int         had_bonus;      /* value for st_had_bonus */
13629     slang_T     *slang;         /* language for sound folding */
13630     int         maxsf;          /* su_maxscore applies to soundfold score,
13631                                    su_sfmaxscore to the total score. */
13632 {
13633     int         goodlen;        /* len of goodword changed */
13634     int         badlen;         /* len of bad word changed */
13635     suggest_T   *stp;
13636     suggest_T   new_sug;
13637     int         i;
13638     char_u      *pgood, *pbad;
13639
13640     /* Minimize "badlen" for consistency.  Avoids that changing "the the" to
13641      * "thee the" is added next to changing the first "the" the "thee".  */
13642     pgood = goodword + STRLEN(goodword);
13643     pbad = su->su_badptr + badlenarg;
13644     for (;;)
13645     {
13646         goodlen = (int)(pgood - goodword);
13647         badlen = (int)(pbad - su->su_badptr);
13648         if (goodlen <= 0 || badlen <= 0)
13649             break;
13650         mb_ptr_back(goodword, pgood);
13651         mb_ptr_back(su->su_badptr, pbad);
13652 #ifdef FEAT_MBYTE
13653         if (has_mbyte)
13654         {
13655             if (mb_ptr2char(pgood) != mb_ptr2char(pbad))
13656                 break;
13657         }
13658         else
13659 #endif
13660             if (*pgood != *pbad)
13661                 break;
13662     }
13663
13664     if (badlen == 0 && goodlen == 0)
13665         /* goodword doesn't change anything; may happen for "the the" changing
13666          * the first "the" to itself. */
13667         return;
13668
13669     if (gap->ga_len == 0)
13670         i = -1;
13671     else
13672     {
13673         /* Check if the word is already there.  Also check the length that is
13674          * being replaced "thes," -> "these" is a different suggestion from
13675          * "thes" -> "these". */
13676         stp = &SUG(*gap, 0);
13677         for (i = gap->ga_len; --i >= 0; ++stp)
13678             if (stp->st_wordlen == goodlen
13679                     && stp->st_orglen == badlen
13680                     && STRNCMP(stp->st_word, goodword, goodlen) == 0)
13681             {
13682                 /*
13683                  * Found it.  Remember the word with the lowest score.
13684                  */
13685                 if (stp->st_slang == NULL)
13686                     stp->st_slang = slang;
13687
13688                 new_sug.st_score = score;
13689                 new_sug.st_altscore = altscore;
13690                 new_sug.st_had_bonus = had_bonus;
13691
13692                 if (stp->st_had_bonus != had_bonus)
13693                 {
13694                     /* Only one of the two had the soundalike score computed.
13695                      * Need to do that for the other one now, otherwise the
13696                      * scores can't be compared.  This happens because
13697                      * suggest_try_change() doesn't compute the soundalike
13698                      * word to keep it fast, while some special methods set
13699                      * the soundalike score to zero. */
13700                     if (had_bonus)
13701                         rescore_one(su, stp);
13702                     else
13703                     {
13704                         new_sug.st_word = stp->st_word;
13705                         new_sug.st_wordlen = stp->st_wordlen;
13706                         new_sug.st_slang = stp->st_slang;
13707                         new_sug.st_orglen = badlen;
13708                         rescore_one(su, &new_sug);
13709                     }
13710                 }
13711
13712                 if (stp->st_score > new_sug.st_score)
13713                 {
13714                     stp->st_score = new_sug.st_score;
13715                     stp->st_altscore = new_sug.st_altscore;
13716                     stp->st_had_bonus = new_sug.st_had_bonus;
13717                 }
13718                 break;
13719             }
13720     }
13721
13722     if (i < 0 && ga_grow(gap, 1) == OK)
13723     {
13724         /* Add a suggestion. */
13725         stp = &SUG(*gap, gap->ga_len);
13726         stp->st_word = vim_strnsave(goodword, goodlen);
13727         if (stp->st_word != NULL)
13728         {
13729             stp->st_wordlen = goodlen;
13730             stp->st_score = score;
13731             stp->st_altscore = altscore;
13732             stp->st_had_bonus = had_bonus;
13733             stp->st_orglen = badlen;
13734             stp->st_slang = slang;
13735             ++gap->ga_len;
13736
13737             /* If we have too many suggestions now, sort the list and keep
13738              * the best suggestions. */
13739             if (gap->ga_len > SUG_MAX_COUNT(su))
13740             {
13741                 if (maxsf)
13742                     su->su_sfmaxscore = cleanup_suggestions(gap,
13743                                       su->su_sfmaxscore, SUG_CLEAN_COUNT(su));
13744                 else
13745                 {
13746                     i = su->su_maxscore;
13747                     su->su_maxscore = cleanup_suggestions(gap,
13748                                         su->su_maxscore, SUG_CLEAN_COUNT(su));
13749                 }
13750             }
13751         }
13752     }
13753 }
13754
13755 /*
13756  * Suggestions may in fact be flagged as errors.  Esp. for banned words and
13757  * for split words, such as "the the".  Remove these from the list here.
13758  */
13759     static void
13760 check_suggestions(su, gap)
13761     suginfo_T   *su;
13762     garray_T    *gap;               /* either su_ga or su_sga */
13763 {
13764     suggest_T   *stp;
13765     int         i;
13766     char_u      longword[MAXWLEN + 1];
13767     int         len;
13768     hlf_T       attr;
13769
13770     stp = &SUG(*gap, 0);
13771     for (i = gap->ga_len - 1; i >= 0; --i)
13772     {
13773         /* Need to append what follows to check for "the the". */
13774         STRCPY(longword, stp[i].st_word);
13775         len = stp[i].st_wordlen;
13776         vim_strncpy(longword + len, su->su_badptr + stp[i].st_orglen,
13777                                                                MAXWLEN - len);
13778         attr = HLF_COUNT;
13779         (void)spell_check(curwin, longword, &attr, NULL, FALSE);
13780         if (attr != HLF_COUNT)
13781         {
13782             /* Remove this entry. */
13783             vim_free(stp[i].st_word);
13784             --gap->ga_len;
13785             if (i < gap->ga_len)
13786                 mch_memmove(stp + i, stp + i + 1,
13787                                        sizeof(suggest_T) * (gap->ga_len - i));
13788         }
13789     }
13790 }
13791
13792
13793 /*
13794  * Add a word to be banned.
13795  */
13796     static void
13797 add_banned(su, word)
13798     suginfo_T   *su;
13799     char_u      *word;
13800 {
13801     char_u      *s;
13802     hash_T      hash;
13803     hashitem_T  *hi;
13804
13805     hash = hash_hash(word);
13806     hi = hash_lookup(&su->su_banned, word, hash);
13807     if (HASHITEM_EMPTY(hi))
13808     {
13809         s = vim_strsave(word);
13810         if (s != NULL)
13811             hash_add_item(&su->su_banned, hi, s, hash);
13812     }
13813 }
13814
13815 /*
13816  * Recompute the score for all suggestions if sound-folding is possible.  This
13817  * is slow, thus only done for the final results.
13818  */
13819     static void
13820 rescore_suggestions(su)
13821     suginfo_T   *su;
13822 {
13823     int         i;
13824
13825     if (su->su_sallang != NULL)
13826         for (i = 0; i < su->su_ga.ga_len; ++i)
13827             rescore_one(su, &SUG(su->su_ga, i));
13828 }
13829
13830 /*
13831  * Recompute the score for one suggestion if sound-folding is possible.
13832  */
13833     static void
13834 rescore_one(su, stp)
13835     suginfo_T   *su;
13836     suggest_T   *stp;
13837 {
13838     slang_T     *slang = stp->st_slang;
13839     char_u      sal_badword[MAXWLEN];
13840     char_u      *p;
13841
13842     /* Only rescore suggestions that have no sal score yet and do have a
13843      * language. */
13844     if (slang != NULL && slang->sl_sal.ga_len > 0 && !stp->st_had_bonus)
13845     {
13846         if (slang == su->su_sallang)
13847             p = su->su_sal_badword;
13848         else
13849         {
13850             spell_soundfold(slang, su->su_fbadword, TRUE, sal_badword);
13851             p = sal_badword;
13852         }
13853
13854         stp->st_altscore = stp_sal_score(stp, su, slang, p);
13855         if (stp->st_altscore == SCORE_MAXMAX)
13856             stp->st_altscore = SCORE_BIG;
13857         stp->st_score = RESCORE(stp->st_score, stp->st_altscore);
13858         stp->st_had_bonus = TRUE;
13859     }
13860 }
13861
13862 static int
13863 #ifdef __BORLANDC__
13864 _RTLENTRYF
13865 #endif
13866 sug_compare __ARGS((const void *s1, const void *s2));
13867
13868 /*
13869  * Function given to qsort() to sort the suggestions on st_score.
13870  * First on "st_score", then "st_altscore" then alphabetically.
13871  */
13872     static int
13873 #ifdef __BORLANDC__
13874 _RTLENTRYF
13875 #endif
13876 sug_compare(s1, s2)
13877     const void  *s1;
13878     const void  *s2;
13879 {
13880     suggest_T   *p1 = (suggest_T *)s1;
13881     suggest_T   *p2 = (suggest_T *)s2;
13882     int         n = p1->st_score - p2->st_score;
13883
13884     if (n == 0)
13885     {
13886         n = p1->st_altscore - p2->st_altscore;
13887         if (n == 0)
13888             n = STRICMP(p1->st_word, p2->st_word);
13889     }
13890     return n;
13891 }
13892
13893 /*
13894  * Cleanup the suggestions:
13895  * - Sort on score.
13896  * - Remove words that won't be displayed.
13897  * Returns the maximum score in the list or "maxscore" unmodified.
13898  */
13899     static int
13900 cleanup_suggestions(gap, maxscore, keep)
13901     garray_T    *gap;
13902     int         maxscore;
13903     int         keep;           /* nr of suggestions to keep */
13904 {
13905     suggest_T   *stp = &SUG(*gap, 0);
13906     int         i;
13907
13908     /* Sort the list. */
13909     qsort(gap->ga_data, (size_t)gap->ga_len, sizeof(suggest_T), sug_compare);
13910
13911     /* Truncate the list to the number of suggestions that will be displayed. */
13912     if (gap->ga_len > keep)
13913     {
13914         for (i = keep; i < gap->ga_len; ++i)
13915             vim_free(stp[i].st_word);
13916         gap->ga_len = keep;
13917         return stp[keep - 1].st_score;
13918     }
13919     return maxscore;
13920 }
13921
13922 #if defined(FEAT_EVAL) || defined(PROTO)
13923 /*
13924  * Soundfold a string, for soundfold().
13925  * Result is in allocated memory, NULL for an error.
13926  */
13927     char_u *
13928 eval_soundfold(word)
13929     char_u      *word;
13930 {
13931     langp_T     *lp;
13932     char_u      sound[MAXWLEN];
13933     int         lpi;
13934
13935     if (curwin->w_p_spell && *curbuf->b_p_spl != NUL)
13936         /* Use the sound-folding of the first language that supports it. */
13937         for (lpi = 0; lpi < curbuf->b_langp.ga_len; ++lpi)
13938         {
13939             lp = LANGP_ENTRY(curbuf->b_langp, lpi);
13940             if (lp->lp_slang->sl_sal.ga_len > 0)
13941             {
13942                 /* soundfold the word */
13943                 spell_soundfold(lp->lp_slang, word, FALSE, sound);
13944                 return vim_strsave(sound);
13945             }
13946         }
13947
13948     /* No language with sound folding, return word as-is. */
13949     return vim_strsave(word);
13950 }
13951 #endif
13952
13953 /*
13954  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
13955  *
13956  * There are many ways to turn a word into a sound-a-like representation.  The
13957  * oldest is Soundex (1918!).   A nice overview can be found in "Approximate
13958  * swedish name matching - survey and test of different algorithms" by Klas
13959  * Erikson.
13960  *
13961  * We support two methods:
13962  * 1. SOFOFROM/SOFOTO do a simple character mapping.
13963  * 2. SAL items define a more advanced sound-folding (and much slower).
13964  */
13965     static void
13966 spell_soundfold(slang, inword, folded, res)
13967     slang_T     *slang;
13968     char_u      *inword;
13969     int         folded;     /* "inword" is already case-folded */
13970     char_u      *res;
13971 {
13972     char_u      fword[MAXWLEN];
13973     char_u      *word;
13974
13975     if (slang->sl_sofo)
13976         /* SOFOFROM and SOFOTO used */
13977         spell_soundfold_sofo(slang, inword, res);
13978     else
13979     {
13980         /* SAL items used.  Requires the word to be case-folded. */
13981         if (folded)
13982             word = inword;
13983         else
13984         {
13985             (void)spell_casefold(inword, (int)STRLEN(inword), fword, MAXWLEN);
13986             word = fword;
13987         }
13988
13989 #ifdef FEAT_MBYTE
13990         if (has_mbyte)
13991             spell_soundfold_wsal(slang, word, res);
13992         else
13993 #endif
13994             spell_soundfold_sal(slang, word, res);
13995     }
13996 }
13997
13998 /*
13999  * Perform sound folding of "inword" into "res" according to SOFOFROM and
14000  * SOFOTO lines.
14001  */
14002     static void
14003 spell_soundfold_sofo(slang, inword, res)
14004     slang_T     *slang;
14005     char_u      *inword;
14006     char_u      *res;
14007 {
14008     char_u      *s;
14009     int         ri = 0;
14010     int         c;
14011
14012 #ifdef FEAT_MBYTE
14013     if (has_mbyte)
14014     {
14015         int     prevc = 0;
14016         int     *ip;
14017
14018         /* The sl_sal_first[] table contains the translation for chars up to
14019          * 255, sl_sal the rest. */
14020         for (s = inword; *s != NUL; )
14021         {
14022             c = mb_cptr2char_adv(&s);
14023             if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
14024                 c = ' ';
14025             else if (c < 256)
14026                 c = slang->sl_sal_first[c];
14027             else
14028             {
14029                 ip = ((int **)slang->sl_sal.ga_data)[c & 0xff];
14030                 if (ip == NULL)         /* empty list, can't match */
14031                     c = NUL;
14032                 else
14033                     for (;;)            /* find "c" in the list */
14034                     {
14035                         if (*ip == 0)   /* not found */
14036                         {
14037                             c = NUL;
14038                             break;
14039                         }
14040                         if (*ip == c)   /* match! */
14041                         {
14042                             c = ip[1];
14043                             break;
14044                         }
14045                         ip += 2;
14046                     }
14047             }
14048
14049             if (c != NUL && c != prevc)
14050             {
14051                 ri += mb_char2bytes(c, res + ri);
14052                 if (ri + MB_MAXBYTES > MAXWLEN)
14053                     break;
14054                 prevc = c;
14055             }
14056         }
14057     }
14058     else
14059 #endif
14060     {
14061         /* The sl_sal_first[] table contains the translation. */
14062         for (s = inword; (c = *s) != NUL; ++s)
14063         {
14064             if (vim_iswhite(c))
14065                 c = ' ';
14066             else
14067                 c = slang->sl_sal_first[c];
14068             if (c != NUL && (ri == 0 || res[ri - 1] != c))
14069                 res[ri++] = c;
14070         }
14071     }
14072
14073     res[ri] = NUL;
14074 }
14075
14076     static void
14077 spell_soundfold_sal(slang, inword, res)
14078     slang_T     *slang;
14079     char_u      *inword;
14080     char_u      *res;
14081 {
14082     salitem_T   *smp;
14083     char_u      word[MAXWLEN];
14084     char_u      *s = inword;
14085     char_u      *t;
14086     char_u      *pf;
14087     int         i, j, z;
14088     int         reslen;
14089     int         n, k = 0;
14090     int         z0;
14091     int         k0;
14092     int         n0;
14093     int         c;
14094     int         pri;
14095     int         p0 = -333;
14096     int         c0;
14097
14098     /* Remove accents, if wanted.  We actually remove all non-word characters.
14099      * But keep white space.  We need a copy, the word may be changed here. */
14100     if (slang->sl_rem_accents)
14101     {
14102         t = word;
14103         while (*s != NUL)
14104         {
14105             if (vim_iswhite(*s))
14106             {
14107                 *t++ = ' ';
14108                 s = skipwhite(s);
14109             }
14110             else
14111             {
14112                 if (spell_iswordp_nmw(s))
14113                     *t++ = *s;
14114                 ++s;
14115             }
14116         }
14117         *t = NUL;
14118     }
14119     else
14120         STRCPY(word, s);
14121
14122     smp = (salitem_T *)slang->sl_sal.ga_data;
14123
14124     /*
14125      * This comes from Aspell phonet.cpp.  Converted from C++ to C.
14126      * Changed to keep spaces.
14127      */
14128     i = reslen = z = 0;
14129     while ((c = word[i]) != NUL)
14130     {
14131         /* Start with the first rule that has the character in the word. */
14132         n = slang->sl_sal_first[c];
14133         z0 = 0;
14134
14135         if (n >= 0)
14136         {
14137             /* check all rules for the same letter */
14138             for (; (s = smp[n].sm_lead)[0] == c; ++n)
14139             {
14140                 /* Quickly skip entries that don't match the word.  Most
14141                  * entries are less then three chars, optimize for that. */
14142                 k = smp[n].sm_leadlen;
14143                 if (k > 1)
14144                 {
14145                     if (word[i + 1] != s[1])
14146                         continue;
14147                     if (k > 2)
14148                     {
14149                         for (j = 2; j < k; ++j)
14150                             if (word[i + j] != s[j])
14151                                 break;
14152                         if (j < k)
14153                             continue;
14154                     }
14155                 }
14156
14157                 if ((pf = smp[n].sm_oneof) != NULL)
14158                 {
14159                     /* Check for match with one of the chars in "sm_oneof". */
14160                     while (*pf != NUL && *pf != word[i + k])
14161                         ++pf;
14162                     if (*pf == NUL)
14163                         continue;
14164                     ++k;
14165                 }
14166                 s = smp[n].sm_rules;
14167                 pri = 5;    /* default priority */
14168
14169                 p0 = *s;
14170                 k0 = k;
14171                 while (*s == '-' && k > 1)
14172                 {
14173                     k--;
14174                     s++;
14175                 }
14176                 if (*s == '<')
14177                     s++;
14178                 if (VIM_ISDIGIT(*s))
14179                 {
14180                     /* determine priority */
14181                     pri = *s - '0';
14182                     s++;
14183                 }
14184                 if (*s == '^' && *(s + 1) == '^')
14185                     s++;
14186
14187                 if (*s == NUL
14188                         || (*s == '^'
14189                             && (i == 0 || !(word[i - 1] == ' '
14190                                       || spell_iswordp(word + i - 1, curbuf)))
14191                             && (*(s + 1) != '$'
14192                                 || (!spell_iswordp(word + i + k0, curbuf))))
14193                         || (*s == '$' && i > 0
14194                             && spell_iswordp(word + i - 1, curbuf)
14195                             && (!spell_iswordp(word + i + k0, curbuf))))
14196                 {
14197                     /* search for followup rules, if:    */
14198                     /* followup and k > 1  and  NO '-' in searchstring */
14199                     c0 = word[i + k - 1];
14200                     n0 = slang->sl_sal_first[c0];
14201
14202                     if (slang->sl_followup && k > 1 && n0 >= 0
14203                                            && p0 != '-' && word[i + k] != NUL)
14204                     {
14205                         /* test follow-up rule for "word[i + k]" */
14206                         for ( ; (s = smp[n0].sm_lead)[0] == c0; ++n0)
14207                         {
14208                             /* Quickly skip entries that don't match the word.
14209                              * */
14210                             k0 = smp[n0].sm_leadlen;
14211                             if (k0 > 1)
14212                             {
14213                                 if (word[i + k] != s[1])
14214                                     continue;
14215                                 if (k0 > 2)
14216                                 {
14217                                     pf = word + i + k + 1;
14218                                     for (j = 2; j < k0; ++j)
14219                                         if (*pf++ != s[j])
14220                                             break;
14221                                     if (j < k0)
14222                                         continue;
14223                                 }
14224                             }
14225                             k0 += k - 1;
14226
14227                             if ((pf = smp[n0].sm_oneof) != NULL)
14228                             {
14229                                 /* Check for match with one of the chars in
14230                                  * "sm_oneof". */
14231                                 while (*pf != NUL && *pf != word[i + k0])
14232                                     ++pf;
14233                                 if (*pf == NUL)
14234                                     continue;
14235                                 ++k0;
14236                             }
14237
14238                             p0 = 5;
14239                             s = smp[n0].sm_rules;
14240                             while (*s == '-')
14241                             {
14242                                 /* "k0" gets NOT reduced because
14243                                  * "if (k0 == k)" */
14244                                 s++;
14245                             }
14246                             if (*s == '<')
14247                                 s++;
14248                             if (VIM_ISDIGIT(*s))
14249                             {
14250                                 p0 = *s - '0';
14251                                 s++;
14252                             }
14253
14254                             if (*s == NUL
14255                                     /* *s == '^' cuts */
14256                                     || (*s == '$'
14257                                             && !spell_iswordp(word + i + k0,
14258                                                                      curbuf)))
14259                             {
14260                                 if (k0 == k)
14261                                     /* this is just a piece of the string */
14262                                     continue;
14263
14264                                 if (p0 < pri)
14265                                     /* priority too low */
14266                                     continue;
14267                                 /* rule fits; stop search */
14268                                 break;
14269                             }
14270                         }
14271
14272                         if (p0 >= pri && smp[n0].sm_lead[0] == c0)
14273                             continue;
14274                     }
14275
14276                     /* replace string */
14277                     s = smp[n].sm_to;
14278                     if (s == NULL)
14279                         s = (char_u *)"";
14280                     pf = smp[n].sm_rules;
14281                     p0 = (vim_strchr(pf, '<') != NULL) ? 1 : 0;
14282                     if (p0 == 1 && z == 0)
14283                     {
14284                         /* rule with '<' is used */
14285                         if (reslen > 0 && *s != NUL && (res[reslen - 1] == c
14286                                                     || res[reslen - 1] == *s))
14287                             reslen--;
14288                         z0 = 1;
14289                         z = 1;
14290                         k0 = 0;
14291                         while (*s != NUL && word[i + k0] != NUL)
14292                         {
14293                             word[i + k0] = *s;
14294                             k0++;
14295                             s++;
14296                         }
14297                         if (k > k0)
14298                             STRMOVE(word + i + k0, word + i + k);
14299
14300                         /* new "actual letter" */
14301                         c = word[i];
14302                     }
14303                     else
14304                     {
14305                         /* no '<' rule used */
14306                         i += k - 1;
14307                         z = 0;
14308                         while (*s != NUL && s[1] != NUL && reslen < MAXWLEN)
14309                         {
14310                             if (reslen == 0 || res[reslen - 1] != *s)
14311                                 res[reslen++] = *s;
14312                             s++;
14313                         }
14314                         /* new "actual letter" */
14315                         c = *s;
14316                         if (strstr((char *)pf, "^^") != NULL)
14317                         {
14318                             if (c != NUL)
14319                                 res[reslen++] = c;
14320                             STRMOVE(word, word + i + 1);
14321                             i = 0;
14322                             z0 = 1;
14323                         }
14324                     }
14325                     break;
14326                 }
14327             }
14328         }
14329         else if (vim_iswhite(c))
14330         {
14331             c = ' ';
14332             k = 1;
14333         }
14334
14335         if (z0 == 0)
14336         {
14337             if (k && !p0 && reslen < MAXWLEN && c != NUL
14338                     && (!slang->sl_collapse || reslen == 0
14339                                                      || res[reslen - 1] != c))
14340                 /* condense only double letters */
14341                 res[reslen++] = c;
14342
14343             i++;
14344             z = 0;
14345             k = 0;
14346         }
14347     }
14348
14349     res[reslen] = NUL;
14350 }
14351
14352 #ifdef FEAT_MBYTE
14353 /*
14354  * Turn "inword" into its sound-a-like equivalent in "res[MAXWLEN]".
14355  * Multi-byte version of spell_soundfold().
14356  */
14357     static void
14358 spell_soundfold_wsal(slang, inword, res)
14359     slang_T     *slang;
14360     char_u      *inword;
14361     char_u      *res;
14362 {
14363     salitem_T   *smp = (salitem_T *)slang->sl_sal.ga_data;
14364     int         word[MAXWLEN];
14365     int         wres[MAXWLEN];
14366     int         l;
14367     char_u      *s;
14368     int         *ws;
14369     char_u      *t;
14370     int         *pf;
14371     int         i, j, z;
14372     int         reslen;
14373     int         n, k = 0;
14374     int         z0;
14375     int         k0;
14376     int         n0;
14377     int         c;
14378     int         pri;
14379     int         p0 = -333;
14380     int         c0;
14381     int         did_white = FALSE;
14382
14383     /*
14384      * Convert the multi-byte string to a wide-character string.
14385      * Remove accents, if wanted.  We actually remove all non-word characters.
14386      * But keep white space.
14387      */
14388     n = 0;
14389     for (s = inword; *s != NUL; )
14390     {
14391         t = s;
14392         c = mb_cptr2char_adv(&s);
14393         if (slang->sl_rem_accents)
14394         {
14395             if (enc_utf8 ? utf_class(c) == 0 : vim_iswhite(c))
14396             {
14397                 if (did_white)
14398                     continue;
14399                 c = ' ';
14400                 did_white = TRUE;
14401             }
14402             else
14403             {
14404                 did_white = FALSE;
14405                 if (!spell_iswordp_nmw(t))
14406                     continue;
14407             }
14408         }
14409         word[n++] = c;
14410     }
14411     word[n] = NUL;
14412
14413     /*
14414      * This comes from Aspell phonet.cpp.
14415      * Converted from C++ to C.  Added support for multi-byte chars.
14416      * Changed to keep spaces.
14417      */
14418     i = reslen = z = 0;
14419     while ((c = word[i]) != NUL)
14420     {
14421         /* Start with the first rule that has the character in the word. */
14422         n = slang->sl_sal_first[c & 0xff];
14423         z0 = 0;
14424
14425         if (n >= 0)
14426         {
14427             /* check all rules for the same index byte */
14428             for (; ((ws = smp[n].sm_lead_w)[0] & 0xff) == (c & 0xff); ++n)
14429             {
14430                 /* Quickly skip entries that don't match the word.  Most
14431                  * entries are less then three chars, optimize for that. */
14432                 if (c != ws[0])
14433                     continue;
14434                 k = smp[n].sm_leadlen;
14435                 if (k > 1)
14436                 {
14437                     if (word[i + 1] != ws[1])
14438                         continue;
14439                     if (k > 2)
14440                     {
14441                         for (j = 2; j < k; ++j)
14442                             if (word[i + j] != ws[j])
14443                                 break;
14444                         if (j < k)
14445                             continue;
14446                     }
14447                 }
14448
14449                 if ((pf = smp[n].sm_oneof_w) != NULL)
14450                 {
14451                     /* Check for match with one of the chars in "sm_oneof". */
14452                     while (*pf != NUL && *pf != word[i + k])
14453                         ++pf;
14454                     if (*pf == NUL)
14455                         continue;
14456                     ++k;
14457                 }
14458                 s = smp[n].sm_rules;
14459                 pri = 5;    /* default priority */
14460
14461                 p0 = *s;
14462                 k0 = k;
14463                 while (*s == '-' && k > 1)
14464                 {
14465                     k--;
14466                     s++;
14467                 }
14468                 if (*s == '<')
14469                     s++;
14470                 if (VIM_ISDIGIT(*s))
14471                 {
14472                     /* determine priority */
14473                     pri = *s - '0';
14474                     s++;
14475                 }
14476                 if (*s == '^' && *(s + 1) == '^')
14477                     s++;
14478
14479                 if (*s == NUL
14480                         || (*s == '^'
14481                             && (i == 0 || !(word[i - 1] == ' '
14482                                     || spell_iswordp_w(word + i - 1, curbuf)))
14483                             && (*(s + 1) != '$'
14484                                 || (!spell_iswordp_w(word + i + k0, curbuf))))
14485                         || (*s == '$' && i > 0
14486                             && spell_iswordp_w(word + i - 1, curbuf)
14487                             && (!spell_iswordp_w(word + i + k0, curbuf))))
14488                 {
14489                     /* search for followup rules, if:    */
14490                     /* followup and k > 1  and  NO '-' in searchstring */
14491                     c0 = word[i + k - 1];
14492                     n0 = slang->sl_sal_first[c0 & 0xff];
14493
14494                     if (slang->sl_followup && k > 1 && n0 >= 0
14495                                            && p0 != '-' && word[i + k] != NUL)
14496                     {
14497                         /* Test follow-up rule for "word[i + k]"; loop over
14498                          * all entries with the same index byte. */
14499                         for ( ; ((ws = smp[n0].sm_lead_w)[0] & 0xff)
14500                                                          == (c0 & 0xff); ++n0)
14501                         {
14502                             /* Quickly skip entries that don't match the word.
14503                              */
14504                             if (c0 != ws[0])
14505                                 continue;
14506                             k0 = smp[n0].sm_leadlen;
14507                             if (k0 > 1)
14508                             {
14509                                 if (word[i + k] != ws[1])
14510                                     continue;
14511                                 if (k0 > 2)
14512                                 {
14513                                     pf = word + i + k + 1;
14514                                     for (j = 2; j < k0; ++j)
14515                                         if (*pf++ != ws[j])
14516                                             break;
14517                                     if (j < k0)
14518                                         continue;
14519                                 }
14520                             }
14521                             k0 += k - 1;
14522
14523                             if ((pf = smp[n0].sm_oneof_w) != NULL)
14524                             {
14525                                 /* Check for match with one of the chars in
14526                                  * "sm_oneof". */
14527                                 while (*pf != NUL && *pf != word[i + k0])
14528                                     ++pf;
14529                                 if (*pf == NUL)
14530                                     continue;
14531                                 ++k0;
14532                             }
14533
14534                             p0 = 5;
14535                             s = smp[n0].sm_rules;
14536                             while (*s == '-')
14537                             {
14538                                 /* "k0" gets NOT reduced because
14539                                  * "if (k0 == k)" */
14540                                 s++;
14541                             }
14542                             if (*s == '<')
14543                                 s++;
14544                             if (VIM_ISDIGIT(*s))
14545                             {
14546                                 p0 = *s - '0';
14547                                 s++;
14548                             }
14549
14550                             if (*s == NUL
14551                                     /* *s == '^' cuts */
14552                                     || (*s == '$'
14553                                          && !spell_iswordp_w(word + i + k0,
14554                                                                      curbuf)))
14555                             {
14556                                 if (k0 == k)
14557                                     /* this is just a piece of the string */
14558                                     continue;
14559
14560                                 if (p0 < pri)
14561                                     /* priority too low */
14562                                     continue;
14563                                 /* rule fits; stop search */
14564                                 break;
14565                             }
14566                         }
14567
14568                         if (p0 >= pri && (smp[n0].sm_lead_w[0] & 0xff)
14569                                                                == (c0 & 0xff))
14570                             continue;
14571                     }
14572
14573                     /* replace string */
14574                     ws = smp[n].sm_to_w;
14575                     s = smp[n].sm_rules;
14576                     p0 = (vim_strchr(s, '<') != NULL) ? 1 : 0;
14577                     if (p0 == 1 && z == 0)
14578                     {
14579                         /* rule with '<' is used */
14580                         if (reslen > 0 && ws != NULL && *ws != NUL
14581                                 && (wres[reslen - 1] == c
14582                                                     || wres[reslen - 1] == *ws))
14583                             reslen--;
14584                         z0 = 1;
14585                         z = 1;
14586                         k0 = 0;
14587                         if (ws != NULL)
14588                             while (*ws != NUL && word[i + k0] != NUL)
14589                             {
14590                                 word[i + k0] = *ws;
14591                                 k0++;
14592                                 ws++;
14593                             }
14594                         if (k > k0)
14595                             mch_memmove(word + i + k0, word + i + k,
14596                                     sizeof(int) * (STRLEN(word + i + k) + 1));
14597
14598                         /* new "actual letter" */
14599                         c = word[i];
14600                     }
14601                     else
14602                     {
14603                         /* no '<' rule used */
14604                         i += k - 1;
14605                         z = 0;
14606                         if (ws != NULL)
14607                             while (*ws != NUL && ws[1] != NUL
14608                                                           && reslen < MAXWLEN)
14609                             {
14610                                 if (reslen == 0 || wres[reslen - 1] != *ws)
14611                                     wres[reslen++] = *ws;
14612                                 ws++;
14613                             }
14614                         /* new "actual letter" */
14615                         if (ws == NULL)
14616                             c = NUL;
14617                         else
14618                             c = *ws;
14619                         if (strstr((char *)s, "^^") != NULL)
14620                         {
14621                             if (c != NUL)
14622                                 wres[reslen++] = c;
14623                             mch_memmove(word, word + i + 1,
14624                                     sizeof(int) * (STRLEN(word + i + 1) + 1));
14625                             i = 0;
14626                             z0 = 1;
14627                         }
14628                     }
14629                     break;
14630                 }
14631             }
14632         }
14633         else if (vim_iswhite(c))
14634         {
14635             c = ' ';
14636             k = 1;
14637         }
14638
14639         if (z0 == 0)
14640         {
14641             if (k && !p0 && reslen < MAXWLEN && c != NUL
14642                     && (!slang->sl_collapse || reslen == 0
14643                                                      || wres[reslen - 1] != c))
14644                 /* condense only double letters */
14645                 wres[reslen++] = c;
14646
14647             i++;
14648             z = 0;
14649             k = 0;
14650         }
14651     }
14652
14653     /* Convert wide characters in "wres" to a multi-byte string in "res". */
14654     l = 0;
14655     for (n = 0; n < reslen; ++n)
14656     {
14657         l += mb_char2bytes(wres[n], res + l);
14658         if (l + MB_MAXBYTES > MAXWLEN)
14659             break;
14660     }
14661     res[l] = NUL;
14662 }
14663 #endif
14664
14665 /*
14666  * Compute a score for two sound-a-like words.
14667  * This permits up to two inserts/deletes/swaps/etc. to keep things fast.
14668  * Instead of a generic loop we write out the code.  That keeps it fast by
14669  * avoiding checks that will not be possible.
14670  */
14671     static int
14672 soundalike_score(goodstart, badstart)
14673     char_u      *goodstart;     /* sound-folded good word */
14674     char_u      *badstart;      /* sound-folded bad word */
14675 {
14676     char_u      *goodsound = goodstart;
14677     char_u      *badsound = badstart;
14678     int         goodlen;
14679     int         badlen;
14680     int         n;
14681     char_u      *pl, *ps;
14682     char_u      *pl2, *ps2;
14683     int         score = 0;
14684
14685     /* adding/inserting "*" at the start (word starts with vowel) shouldn't be
14686      * counted so much, vowels halfway the word aren't counted at all. */
14687     if ((*badsound == '*' || *goodsound == '*') && *badsound != *goodsound)
14688     {
14689         if (badsound[1] == goodsound[1]
14690                 || (badsound[1] != NUL
14691                     && goodsound[1] != NUL
14692                     && badsound[2] == goodsound[2]))
14693         {
14694             /* handle like a substitute */
14695         }
14696         else
14697         {
14698             score = 2 * SCORE_DEL / 3;
14699             if (*badsound == '*')
14700                 ++badsound;
14701             else
14702                 ++goodsound;
14703         }
14704     }
14705
14706     goodlen = (int)STRLEN(goodsound);
14707     badlen = (int)STRLEN(badsound);
14708
14709     /* Return quickly if the lengths are too different to be fixed by two
14710      * changes. */
14711     n = goodlen - badlen;
14712     if (n < -2 || n > 2)
14713         return SCORE_MAXMAX;
14714
14715     if (n > 0)
14716     {
14717         pl = goodsound;     /* goodsound is longest */
14718         ps = badsound;
14719     }
14720     else
14721     {
14722         pl = badsound;      /* badsound is longest */
14723         ps = goodsound;
14724     }
14725
14726     /* Skip over the identical part. */
14727     while (*pl == *ps && *pl != NUL)
14728     {
14729         ++pl;
14730         ++ps;
14731     }
14732
14733     switch (n)
14734     {
14735         case -2:
14736         case 2:
14737             /*
14738              * Must delete two characters from "pl".
14739              */
14740             ++pl;       /* first delete */
14741             while (*pl == *ps)
14742             {
14743                 ++pl;
14744                 ++ps;
14745             }
14746             /* strings must be equal after second delete */
14747             if (STRCMP(pl + 1, ps) == 0)
14748                 return score + SCORE_DEL * 2;
14749
14750             /* Failed to compare. */
14751             break;
14752
14753         case -1:
14754         case 1:
14755             /*
14756              * Minimal one delete from "pl" required.
14757              */
14758
14759             /* 1: delete */
14760             pl2 = pl + 1;
14761             ps2 = ps;
14762             while (*pl2 == *ps2)
14763             {
14764                 if (*pl2 == NUL)        /* reached the end */
14765                     return score + SCORE_DEL;
14766                 ++pl2;
14767                 ++ps2;
14768             }
14769
14770             /* 2: delete then swap, then rest must be equal */
14771             if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14772                                              && STRCMP(pl2 + 2, ps2 + 2) == 0)
14773                 return score + SCORE_DEL + SCORE_SWAP;
14774
14775             /* 3: delete then substitute, then the rest must be equal */
14776             if (STRCMP(pl2 + 1, ps2 + 1) == 0)
14777                 return score + SCORE_DEL + SCORE_SUBST;
14778
14779             /* 4: first swap then delete */
14780             if (pl[0] == ps[1] && pl[1] == ps[0])
14781             {
14782                 pl2 = pl + 2;       /* swap, skip two chars */
14783                 ps2 = ps + 2;
14784                 while (*pl2 == *ps2)
14785                 {
14786                     ++pl2;
14787                     ++ps2;
14788                 }
14789                 /* delete a char and then strings must be equal */
14790                 if (STRCMP(pl2 + 1, ps2) == 0)
14791                     return score + SCORE_SWAP + SCORE_DEL;
14792             }
14793
14794             /* 5: first substitute then delete */
14795             pl2 = pl + 1;           /* substitute, skip one char */
14796             ps2 = ps + 1;
14797             while (*pl2 == *ps2)
14798             {
14799                 ++pl2;
14800                 ++ps2;
14801             }
14802             /* delete a char and then strings must be equal */
14803             if (STRCMP(pl2 + 1, ps2) == 0)
14804                 return score + SCORE_SUBST + SCORE_DEL;
14805
14806             /* Failed to compare. */
14807             break;
14808
14809         case 0:
14810             /*
14811              * Lenghts are equal, thus changes must result in same length: An
14812              * insert is only possible in combination with a delete.
14813              * 1: check if for identical strings
14814              */
14815             if (*pl == NUL)
14816                 return score;
14817
14818             /* 2: swap */
14819             if (pl[0] == ps[1] && pl[1] == ps[0])
14820             {
14821                 pl2 = pl + 2;       /* swap, skip two chars */
14822                 ps2 = ps + 2;
14823                 while (*pl2 == *ps2)
14824                 {
14825                     if (*pl2 == NUL)    /* reached the end */
14826                         return score + SCORE_SWAP;
14827                     ++pl2;
14828                     ++ps2;
14829                 }
14830                 /* 3: swap and swap again */
14831                 if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14832                                              && STRCMP(pl2 + 2, ps2 + 2) == 0)
14833                     return score + SCORE_SWAP + SCORE_SWAP;
14834
14835                 /* 4: swap and substitute */
14836                 if (STRCMP(pl2 + 1, ps2 + 1) == 0)
14837                     return score + SCORE_SWAP + SCORE_SUBST;
14838             }
14839
14840             /* 5: substitute */
14841             pl2 = pl + 1;
14842             ps2 = ps + 1;
14843             while (*pl2 == *ps2)
14844             {
14845                 if (*pl2 == NUL)        /* reached the end */
14846                     return score + SCORE_SUBST;
14847                 ++pl2;
14848                 ++ps2;
14849             }
14850
14851             /* 6: substitute and swap */
14852             if (pl2[0] == ps2[1] && pl2[1] == ps2[0]
14853                                              && STRCMP(pl2 + 2, ps2 + 2) == 0)
14854                 return score + SCORE_SUBST + SCORE_SWAP;
14855
14856             /* 7: substitute and substitute */
14857             if (STRCMP(pl2 + 1, ps2 + 1) == 0)
14858                 return score + SCORE_SUBST + SCORE_SUBST;
14859
14860             /* 8: insert then delete */
14861             pl2 = pl;
14862             ps2 = ps + 1;
14863             while (*pl2 == *ps2)
14864             {
14865                 ++pl2;
14866                 ++ps2;
14867             }
14868             if (STRCMP(pl2 + 1, ps2) == 0)
14869                 return score + SCORE_INS + SCORE_DEL;
14870
14871             /* 9: delete then insert */
14872             pl2 = pl + 1;
14873             ps2 = ps;
14874             while (*pl2 == *ps2)
14875             {
14876                 ++pl2;
14877                 ++ps2;
14878             }
14879             if (STRCMP(pl2, ps2 + 1) == 0)
14880                 return score + SCORE_INS + SCORE_DEL;
14881
14882             /* Failed to compare. */
14883             break;
14884     }
14885
14886     return SCORE_MAXMAX;
14887 }
14888
14889 /*
14890  * Compute the "edit distance" to turn "badword" into "goodword".  The less
14891  * deletes/inserts/substitutes/swaps are required the lower the score.
14892  *
14893  * The algorithm is described by Du and Chang, 1992.
14894  * The implementation of the algorithm comes from Aspell editdist.cpp,
14895  * edit_distance().  It has been converted from C++ to C and modified to
14896  * support multi-byte characters.
14897  */
14898     static int
14899 spell_edit_score(slang, badword, goodword)
14900     slang_T     *slang;
14901     char_u      *badword;
14902     char_u      *goodword;
14903 {
14904     int         *cnt;
14905     int         badlen, goodlen;        /* lengths including NUL */
14906     int         j, i;
14907     int         t;
14908     int         bc, gc;
14909     int         pbc, pgc;
14910 #ifdef FEAT_MBYTE
14911     char_u      *p;
14912     int         wbadword[MAXWLEN];
14913     int         wgoodword[MAXWLEN];
14914
14915     if (has_mbyte)
14916     {
14917         /* Get the characters from the multi-byte strings and put them in an
14918          * int array for easy access. */
14919         for (p = badword, badlen = 0; *p != NUL; )
14920             wbadword[badlen++] = mb_cptr2char_adv(&p);
14921         wbadword[badlen++] = 0;
14922         for (p = goodword, goodlen = 0; *p != NUL; )
14923             wgoodword[goodlen++] = mb_cptr2char_adv(&p);
14924         wgoodword[goodlen++] = 0;
14925     }
14926     else
14927 #endif
14928     {
14929         badlen = (int)STRLEN(badword) + 1;
14930         goodlen = (int)STRLEN(goodword) + 1;
14931     }
14932
14933     /* We use "cnt" as an array: CNT(badword_idx, goodword_idx). */
14934 #define CNT(a, b)   cnt[(a) + (b) * (badlen + 1)]
14935     cnt = (int *)lalloc((long_u)(sizeof(int) * (badlen + 1) * (goodlen + 1)),
14936                                                                         TRUE);
14937     if (cnt == NULL)
14938         return 0;       /* out of memory */
14939
14940     CNT(0, 0) = 0;
14941     for (j = 1; j <= goodlen; ++j)
14942         CNT(0, j) = CNT(0, j - 1) + SCORE_INS;
14943
14944     for (i = 1; i <= badlen; ++i)
14945     {
14946         CNT(i, 0) = CNT(i - 1, 0) + SCORE_DEL;
14947         for (j = 1; j <= goodlen; ++j)
14948         {
14949 #ifdef FEAT_MBYTE
14950             if (has_mbyte)
14951             {
14952                 bc = wbadword[i - 1];
14953                 gc = wgoodword[j - 1];
14954             }
14955             else
14956 #endif
14957             {
14958                 bc = badword[i - 1];
14959                 gc = goodword[j - 1];
14960             }
14961             if (bc == gc)
14962                 CNT(i, j) = CNT(i - 1, j - 1);
14963             else
14964             {
14965                 /* Use a better score when there is only a case difference. */
14966                 if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
14967                     CNT(i, j) = SCORE_ICASE + CNT(i - 1, j - 1);
14968                 else
14969                 {
14970                     /* For a similar character use SCORE_SIMILAR. */
14971                     if (slang != NULL
14972                             && slang->sl_has_map
14973                             && similar_chars(slang, gc, bc))
14974                         CNT(i, j) = SCORE_SIMILAR + CNT(i - 1, j - 1);
14975                     else
14976                         CNT(i, j) = SCORE_SUBST + CNT(i - 1, j - 1);
14977                 }
14978
14979                 if (i > 1 && j > 1)
14980                 {
14981 #ifdef FEAT_MBYTE
14982                     if (has_mbyte)
14983                     {
14984                         pbc = wbadword[i - 2];
14985                         pgc = wgoodword[j - 2];
14986                     }
14987                     else
14988 #endif
14989                     {
14990                         pbc = badword[i - 2];
14991                         pgc = goodword[j - 2];
14992                     }
14993                     if (bc == pgc && pbc == gc)
14994                     {
14995                         t = SCORE_SWAP + CNT(i - 2, j - 2);
14996                         if (t < CNT(i, j))
14997                             CNT(i, j) = t;
14998                     }
14999                 }
15000                 t = SCORE_DEL + CNT(i - 1, j);
15001                 if (t < CNT(i, j))
15002                     CNT(i, j) = t;
15003                 t = SCORE_INS + CNT(i, j - 1);
15004                 if (t < CNT(i, j))
15005                     CNT(i, j) = t;
15006             }
15007         }
15008     }
15009
15010     i = CNT(badlen - 1, goodlen - 1);
15011     vim_free(cnt);
15012     return i;
15013 }
15014
15015 typedef struct
15016 {
15017     int         badi;
15018     int         goodi;
15019     int         score;
15020 } limitscore_T;
15021
15022 /*
15023  * Like spell_edit_score(), but with a limit on the score to make it faster.
15024  * May return SCORE_MAXMAX when the score is higher than "limit".
15025  *
15026  * This uses a stack for the edits still to be tried.
15027  * The idea comes from Aspell leditdist.cpp.  Rewritten in C and added support
15028  * for multi-byte characters.
15029  */
15030     static int
15031 spell_edit_score_limit(slang, badword, goodword, limit)
15032     slang_T     *slang;
15033     char_u      *badword;
15034     char_u      *goodword;
15035     int         limit;
15036 {
15037     limitscore_T    stack[10];          /* allow for over 3 * 2 edits */
15038     int             stackidx;
15039     int             bi, gi;
15040     int             bi2, gi2;
15041     int             bc, gc;
15042     int             score;
15043     int             score_off;
15044     int             minscore;
15045     int             round;
15046
15047 #ifdef FEAT_MBYTE
15048     /* Multi-byte characters require a bit more work, use a different function
15049      * to avoid testing "has_mbyte" quite often. */
15050     if (has_mbyte)
15051         return spell_edit_score_limit_w(slang, badword, goodword, limit);
15052 #endif
15053
15054     /*
15055      * The idea is to go from start to end over the words.  So long as
15056      * characters are equal just continue, this always gives the lowest score.
15057      * When there is a difference try several alternatives.  Each alternative
15058      * increases "score" for the edit distance.  Some of the alternatives are
15059      * pushed unto a stack and tried later, some are tried right away.  At the
15060      * end of the word the score for one alternative is known.  The lowest
15061      * possible score is stored in "minscore".
15062      */
15063     stackidx = 0;
15064     bi = 0;
15065     gi = 0;
15066     score = 0;
15067     minscore = limit + 1;
15068
15069     for (;;)
15070     {
15071         /* Skip over an equal part, score remains the same. */
15072         for (;;)
15073         {
15074             bc = badword[bi];
15075             gc = goodword[gi];
15076             if (bc != gc)       /* stop at a char that's different */
15077                 break;
15078             if (bc == NUL)      /* both words end */
15079             {
15080                 if (score < minscore)
15081                     minscore = score;
15082                 goto pop;       /* do next alternative */
15083             }
15084             ++bi;
15085             ++gi;
15086         }
15087
15088         if (gc == NUL)    /* goodword ends, delete badword chars */
15089         {
15090             do
15091             {
15092                 if ((score += SCORE_DEL) >= minscore)
15093                     goto pop;       /* do next alternative */
15094             } while (badword[++bi] != NUL);
15095             minscore = score;
15096         }
15097         else if (bc == NUL) /* badword ends, insert badword chars */
15098         {
15099             do
15100             {
15101                 if ((score += SCORE_INS) >= minscore)
15102                     goto pop;       /* do next alternative */
15103             } while (goodword[++gi] != NUL);
15104             minscore = score;
15105         }
15106         else                    /* both words continue */
15107         {
15108             /* If not close to the limit, perform a change.  Only try changes
15109              * that may lead to a lower score than "minscore".
15110              * round 0: try deleting a char from badword
15111              * round 1: try inserting a char in badword */
15112             for (round = 0; round <= 1; ++round)
15113             {
15114                 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
15115                 if (score_off < minscore)
15116                 {
15117                     if (score_off + SCORE_EDIT_MIN >= minscore)
15118                     {
15119                         /* Near the limit, rest of the words must match.  We
15120                          * can check that right now, no need to push an item
15121                          * onto the stack. */
15122                         bi2 = bi + 1 - round;
15123                         gi2 = gi + round;
15124                         while (goodword[gi2] == badword[bi2])
15125                         {
15126                             if (goodword[gi2] == NUL)
15127                             {
15128                                 minscore = score_off;
15129                                 break;
15130                             }
15131                             ++bi2;
15132                             ++gi2;
15133                         }
15134                     }
15135                     else
15136                     {
15137                         /* try deleting/inserting a character later */
15138                         stack[stackidx].badi = bi + 1 - round;
15139                         stack[stackidx].goodi = gi + round;
15140                         stack[stackidx].score = score_off;
15141                         ++stackidx;
15142                     }
15143                 }
15144             }
15145
15146             if (score + SCORE_SWAP < minscore)
15147             {
15148                 /* If swapping two characters makes a match then the
15149                  * substitution is more expensive, thus there is no need to
15150                  * try both. */
15151                 if (gc == badword[bi + 1] && bc == goodword[gi + 1])
15152                 {
15153                     /* Swap two characters, that is: skip them. */
15154                     gi += 2;
15155                     bi += 2;
15156                     score += SCORE_SWAP;
15157                     continue;
15158                 }
15159             }
15160
15161             /* Substitute one character for another which is the same
15162              * thing as deleting a character from both goodword and badword.
15163              * Use a better score when there is only a case difference. */
15164             if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
15165                 score += SCORE_ICASE;
15166             else
15167             {
15168                 /* For a similar character use SCORE_SIMILAR. */
15169                 if (slang != NULL
15170                         && slang->sl_has_map
15171                         && similar_chars(slang, gc, bc))
15172                     score += SCORE_SIMILAR;
15173                 else
15174                     score += SCORE_SUBST;
15175             }
15176
15177             if (score < minscore)
15178             {
15179                 /* Do the substitution. */
15180                 ++gi;
15181                 ++bi;
15182                 continue;
15183             }
15184         }
15185 pop:
15186         /*
15187          * Get here to try the next alternative, pop it from the stack.
15188          */
15189         if (stackidx == 0)              /* stack is empty, finished */
15190             break;
15191
15192         /* pop an item from the stack */
15193         --stackidx;
15194         gi = stack[stackidx].goodi;
15195         bi = stack[stackidx].badi;
15196         score = stack[stackidx].score;
15197     }
15198
15199     /* When the score goes over "limit" it may actually be much higher.
15200      * Return a very large number to avoid going below the limit when giving a
15201      * bonus. */
15202     if (minscore > limit)
15203         return SCORE_MAXMAX;
15204     return minscore;
15205 }
15206
15207 #ifdef FEAT_MBYTE
15208 /*
15209  * Multi-byte version of spell_edit_score_limit().
15210  * Keep it in sync with the above!
15211  */
15212     static int
15213 spell_edit_score_limit_w(slang, badword, goodword, limit)
15214     slang_T     *slang;
15215     char_u      *badword;
15216     char_u      *goodword;
15217     int         limit;
15218 {
15219     limitscore_T    stack[10];          /* allow for over 3 * 2 edits */
15220     int             stackidx;
15221     int             bi, gi;
15222     int             bi2, gi2;
15223     int             bc, gc;
15224     int             score;
15225     int             score_off;
15226     int             minscore;
15227     int             round;
15228     char_u          *p;
15229     int             wbadword[MAXWLEN];
15230     int             wgoodword[MAXWLEN];
15231
15232     /* Get the characters from the multi-byte strings and put them in an
15233      * int array for easy access. */
15234     bi = 0;
15235     for (p = badword; *p != NUL; )
15236         wbadword[bi++] = mb_cptr2char_adv(&p);
15237     wbadword[bi++] = 0;
15238     gi = 0;
15239     for (p = goodword; *p != NUL; )
15240         wgoodword[gi++] = mb_cptr2char_adv(&p);
15241     wgoodword[gi++] = 0;
15242
15243     /*
15244      * The idea is to go from start to end over the words.  So long as
15245      * characters are equal just continue, this always gives the lowest score.
15246      * When there is a difference try several alternatives.  Each alternative
15247      * increases "score" for the edit distance.  Some of the alternatives are
15248      * pushed unto a stack and tried later, some are tried right away.  At the
15249      * end of the word the score for one alternative is known.  The lowest
15250      * possible score is stored in "minscore".
15251      */
15252     stackidx = 0;
15253     bi = 0;
15254     gi = 0;
15255     score = 0;
15256     minscore = limit + 1;
15257
15258     for (;;)
15259     {
15260         /* Skip over an equal part, score remains the same. */
15261         for (;;)
15262         {
15263             bc = wbadword[bi];
15264             gc = wgoodword[gi];
15265
15266             if (bc != gc)       /* stop at a char that's different */
15267                 break;
15268             if (bc == NUL)      /* both words end */
15269             {
15270                 if (score < minscore)
15271                     minscore = score;
15272                 goto pop;       /* do next alternative */
15273             }
15274             ++bi;
15275             ++gi;
15276         }
15277
15278         if (gc == NUL)    /* goodword ends, delete badword chars */
15279         {
15280             do
15281             {
15282                 if ((score += SCORE_DEL) >= minscore)
15283                     goto pop;       /* do next alternative */
15284             } while (wbadword[++bi] != NUL);
15285             minscore = score;
15286         }
15287         else if (bc == NUL) /* badword ends, insert badword chars */
15288         {
15289             do
15290             {
15291                 if ((score += SCORE_INS) >= minscore)
15292                     goto pop;       /* do next alternative */
15293             } while (wgoodword[++gi] != NUL);
15294             minscore = score;
15295         }
15296         else                    /* both words continue */
15297         {
15298             /* If not close to the limit, perform a change.  Only try changes
15299              * that may lead to a lower score than "minscore".
15300              * round 0: try deleting a char from badword
15301              * round 1: try inserting a char in badword */
15302             for (round = 0; round <= 1; ++round)
15303             {
15304                 score_off = score + (round == 0 ? SCORE_DEL : SCORE_INS);
15305                 if (score_off < minscore)
15306                 {
15307                     if (score_off + SCORE_EDIT_MIN >= minscore)
15308                     {
15309                         /* Near the limit, rest of the words must match.  We
15310                          * can check that right now, no need to push an item
15311                          * onto the stack. */
15312                         bi2 = bi + 1 - round;
15313                         gi2 = gi + round;
15314                         while (wgoodword[gi2] == wbadword[bi2])
15315                         {
15316                             if (wgoodword[gi2] == NUL)
15317                             {
15318                                 minscore = score_off;
15319                                 break;
15320                             }
15321                             ++bi2;
15322                             ++gi2;
15323                         }
15324                     }
15325                     else
15326                     {
15327                         /* try deleting a character from badword later */
15328                         stack[stackidx].badi = bi + 1 - round;
15329                         stack[stackidx].goodi = gi + round;
15330                         stack[stackidx].score = score_off;
15331                         ++stackidx;
15332                     }
15333                 }
15334             }
15335
15336             if (score + SCORE_SWAP < minscore)
15337             {
15338                 /* If swapping two characters makes a match then the
15339                  * substitution is more expensive, thus there is no need to
15340                  * try both. */
15341                 if (gc == wbadword[bi + 1] && bc == wgoodword[gi + 1])
15342                 {
15343                     /* Swap two characters, that is: skip them. */
15344                     gi += 2;
15345                     bi += 2;
15346                     score += SCORE_SWAP;
15347                     continue;
15348                 }
15349             }
15350
15351             /* Substitute one character for another which is the same
15352              * thing as deleting a character from both goodword and badword.
15353              * Use a better score when there is only a case difference. */
15354             if (SPELL_TOFOLD(bc) == SPELL_TOFOLD(gc))
15355                 score += SCORE_ICASE;
15356             else
15357             {
15358                 /* For a similar character use SCORE_SIMILAR. */
15359                 if (slang != NULL
15360                         && slang->sl_has_map
15361                         && similar_chars(slang, gc, bc))
15362                     score += SCORE_SIMILAR;
15363                 else
15364                     score += SCORE_SUBST;
15365             }
15366
15367             if (score < minscore)
15368             {
15369                 /* Do the substitution. */
15370                 ++gi;
15371                 ++bi;
15372                 continue;
15373             }
15374         }
15375 pop:
15376         /*
15377          * Get here to try the next alternative, pop it from the stack.
15378          */
15379         if (stackidx == 0)              /* stack is empty, finished */
15380             break;
15381
15382         /* pop an item from the stack */
15383         --stackidx;
15384         gi = stack[stackidx].goodi;
15385         bi = stack[stackidx].badi;
15386         score = stack[stackidx].score;
15387     }
15388
15389     /* When the score goes over "limit" it may actually be much higher.
15390      * Return a very large number to avoid going below the limit when giving a
15391      * bonus. */
15392     if (minscore > limit)
15393         return SCORE_MAXMAX;
15394     return minscore;
15395 }
15396 #endif
15397
15398 /*
15399  * ":spellinfo"
15400  */
15401 /*ARGSUSED*/
15402     void
15403 ex_spellinfo(eap)
15404     exarg_T *eap;
15405 {
15406     int         lpi;
15407     langp_T     *lp;
15408     char_u      *p;
15409
15410     if (no_spell_checking(curwin))
15411         return;
15412
15413     msg_start();
15414     for (lpi = 0; lpi < curbuf->b_langp.ga_len && !got_int; ++lpi)
15415     {
15416         lp = LANGP_ENTRY(curbuf->b_langp, lpi);
15417         msg_puts((char_u *)"file: ");
15418         msg_puts(lp->lp_slang->sl_fname);
15419         msg_putchar('\n');
15420         p = lp->lp_slang->sl_info;
15421         if (p != NULL)
15422         {
15423             msg_puts(p);
15424             msg_putchar('\n');
15425         }
15426     }
15427     msg_end();
15428 }
15429
15430 #define DUMPFLAG_KEEPCASE   1   /* round 2: keep-case tree */
15431 #define DUMPFLAG_COUNT      2   /* include word count */
15432 #define DUMPFLAG_ICASE      4   /* ignore case when finding matches */
15433 #define DUMPFLAG_ONECAP     8   /* pattern starts with capital */
15434 #define DUMPFLAG_ALLCAP     16  /* pattern is all capitals */
15435
15436 /*
15437  * ":spelldump"
15438  */
15439     void
15440 ex_spelldump(eap)
15441     exarg_T *eap;
15442 {
15443     buf_T       *buf = curbuf;
15444
15445     if (no_spell_checking(curwin))
15446         return;
15447
15448     /* Create a new empty buffer by splitting the window. */
15449     do_cmdline_cmd((char_u *)"new");
15450     if (!bufempty() || !buf_valid(buf))
15451         return;
15452
15453     spell_dump_compl(buf, NULL, 0, NULL, eap->forceit ? DUMPFLAG_COUNT : 0);
15454
15455     /* Delete the empty line that we started with. */
15456     if (curbuf->b_ml.ml_line_count > 1)
15457         ml_delete(curbuf->b_ml.ml_line_count, FALSE);
15458
15459     redraw_later(NOT_VALID);
15460 }
15461
15462 /*
15463  * Go through all possible words and:
15464  * 1. When "pat" is NULL: dump a list of all words in the current buffer.
15465  *      "ic" and "dir" are not used.
15466  * 2. When "pat" is not NULL: add matching words to insert mode completion.
15467  */
15468     void
15469 spell_dump_compl(buf, pat, ic, dir, dumpflags_arg)
15470     buf_T       *buf;       /* buffer with spell checking */
15471     char_u      *pat;       /* leading part of the word */
15472     int         ic;         /* ignore case */
15473     int         *dir;       /* direction for adding matches */
15474     int         dumpflags_arg;  /* DUMPFLAG_* */
15475 {
15476     langp_T     *lp;
15477     slang_T     *slang;
15478     idx_T       arridx[MAXWLEN];
15479     int         curi[MAXWLEN];
15480     char_u      word[MAXWLEN];
15481     int         c;
15482     char_u      *byts;
15483     idx_T       *idxs;
15484     linenr_T    lnum = 0;
15485     int         round;
15486     int         depth;
15487     int         n;
15488     int         flags;
15489     char_u      *region_names = NULL;       /* region names being used */
15490     int         do_region = TRUE;           /* dump region names and numbers */
15491     char_u      *p;
15492     int         lpi;
15493     int         dumpflags = dumpflags_arg;
15494     int         patlen;
15495
15496     /* When ignoring case or when the pattern starts with capital pass this on
15497      * to dump_word(). */
15498     if (pat != NULL)
15499     {
15500         if (ic)
15501             dumpflags |= DUMPFLAG_ICASE;
15502         else
15503         {
15504             n = captype(pat, NULL);
15505             if (n == WF_ONECAP)
15506                 dumpflags |= DUMPFLAG_ONECAP;
15507             else if (n == WF_ALLCAP
15508 #ifdef FEAT_MBYTE
15509                     && (int)STRLEN(pat) > mb_ptr2len(pat)
15510 #else
15511                     && (int)STRLEN(pat) > 1
15512 #endif
15513                     )
15514                 dumpflags |= DUMPFLAG_ALLCAP;
15515         }
15516     }
15517
15518     /* Find out if we can support regions: All languages must support the same
15519      * regions or none at all. */
15520     for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
15521     {
15522         lp = LANGP_ENTRY(buf->b_langp, lpi);
15523         p = lp->lp_slang->sl_regions;
15524         if (p[0] != 0)
15525         {
15526             if (region_names == NULL)       /* first language with regions */
15527                 region_names = p;
15528             else if (STRCMP(region_names, p) != 0)
15529             {
15530                 do_region = FALSE;          /* region names are different */
15531                 break;
15532             }
15533         }
15534     }
15535
15536     if (do_region && region_names != NULL)
15537     {
15538         if (pat == NULL)
15539         {
15540             vim_snprintf((char *)IObuff, IOSIZE, "/regions=%s", region_names);
15541             ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
15542         }
15543     }
15544     else
15545         do_region = FALSE;
15546
15547     /*
15548      * Loop over all files loaded for the entries in 'spelllang'.
15549      */
15550     for (lpi = 0; lpi < buf->b_langp.ga_len; ++lpi)
15551     {
15552         lp = LANGP_ENTRY(buf->b_langp, lpi);
15553         slang = lp->lp_slang;
15554         if (slang->sl_fbyts == NULL)        /* reloading failed */
15555             continue;
15556
15557         if (pat == NULL)
15558         {
15559             vim_snprintf((char *)IObuff, IOSIZE, "# file: %s", slang->sl_fname);
15560             ml_append(lnum++, IObuff, (colnr_T)0, FALSE);
15561         }
15562
15563         /* When matching with a pattern and there are no prefixes only use
15564          * parts of the tree that match "pat". */
15565         if (pat != NULL && slang->sl_pbyts == NULL)
15566             patlen = (int)STRLEN(pat);
15567         else
15568             patlen = -1;
15569
15570         /* round 1: case-folded tree
15571          * round 2: keep-case tree */
15572         for (round = 1; round <= 2; ++round)
15573         {
15574             if (round == 1)
15575             {
15576                 dumpflags &= ~DUMPFLAG_KEEPCASE;
15577                 byts = slang->sl_fbyts;
15578                 idxs = slang->sl_fidxs;
15579             }
15580             else
15581             {
15582                 dumpflags |= DUMPFLAG_KEEPCASE;
15583                 byts = slang->sl_kbyts;
15584                 idxs = slang->sl_kidxs;
15585             }
15586             if (byts == NULL)
15587                 continue;               /* array is empty */
15588
15589             depth = 0;
15590             arridx[0] = 0;
15591             curi[0] = 1;
15592             while (depth >= 0 && !got_int
15593                                        && (pat == NULL || !compl_interrupted))
15594             {
15595                 if (curi[depth] > byts[arridx[depth]])
15596                 {
15597                     /* Done all bytes at this node, go up one level. */
15598                     --depth;
15599                     line_breakcheck();
15600                     ins_compl_check_keys(50);
15601                 }
15602                 else
15603                 {
15604                     /* Do one more byte at this node. */
15605                     n = arridx[depth] + curi[depth];
15606                     ++curi[depth];
15607                     c = byts[n];
15608                     if (c == 0)
15609                     {
15610                         /* End of word, deal with the word.
15611                          * Don't use keep-case words in the fold-case tree,
15612                          * they will appear in the keep-case tree.
15613                          * Only use the word when the region matches. */
15614                         flags = (int)idxs[n];
15615                         if ((round == 2 || (flags & WF_KEEPCAP) == 0)
15616                                 && (flags & WF_NEEDCOMP) == 0
15617                                 && (do_region
15618                                     || (flags & WF_REGION) == 0
15619                                     || (((unsigned)flags >> 16)
15620                                                        & lp->lp_region) != 0))
15621                         {
15622                             word[depth] = NUL;
15623                             if (!do_region)
15624                                 flags &= ~WF_REGION;
15625
15626                             /* Dump the basic word if there is no prefix or
15627                              * when it's the first one. */
15628                             c = (unsigned)flags >> 24;
15629                             if (c == 0 || curi[depth] == 2)
15630                             {
15631                                 dump_word(slang, word, pat, dir,
15632                                                       dumpflags, flags, lnum);
15633                                 if (pat == NULL)
15634                                     ++lnum;
15635                             }
15636
15637                             /* Apply the prefix, if there is one. */
15638                             if (c != 0)
15639                                 lnum = dump_prefixes(slang, word, pat, dir,
15640                                                       dumpflags, flags, lnum);
15641                         }
15642                     }
15643                     else
15644                     {
15645                         /* Normal char, go one level deeper. */
15646                         word[depth++] = c;
15647                         arridx[depth] = idxs[n];
15648                         curi[depth] = 1;
15649
15650                         /* Check if this characters matches with the pattern.
15651                          * If not skip the whole tree below it.
15652                          * Always ignore case here, dump_word() will check
15653                          * proper case later.  This isn't exactly right when
15654                          * length changes for multi-byte characters with
15655                          * ignore case... */
15656                         if (depth <= patlen
15657                                         && MB_STRNICMP(word, pat, depth) != 0)
15658                             --depth;
15659                     }
15660                 }
15661             }
15662         }
15663     }
15664 }
15665
15666 /*
15667  * Dump one word: apply case modifications and append a line to the buffer.
15668  * When "lnum" is zero add insert mode completion.
15669  */
15670     static void
15671 dump_word(slang, word, pat, dir, dumpflags, wordflags, lnum)
15672     slang_T     *slang;
15673     char_u      *word;
15674     char_u      *pat;
15675     int         *dir;
15676     int         dumpflags;
15677     int         wordflags;
15678     linenr_T    lnum;
15679 {
15680     int         keepcap = FALSE;
15681     char_u      *p;
15682     char_u      *tw;
15683     char_u      cword[MAXWLEN];
15684     char_u      badword[MAXWLEN + 10];
15685     int         i;
15686     int         flags = wordflags;
15687
15688     if (dumpflags & DUMPFLAG_ONECAP)
15689         flags |= WF_ONECAP;
15690     if (dumpflags & DUMPFLAG_ALLCAP)
15691         flags |= WF_ALLCAP;
15692
15693     if ((dumpflags & DUMPFLAG_KEEPCASE) == 0 && (flags & WF_CAPMASK) != 0)
15694     {
15695         /* Need to fix case according to "flags". */
15696         make_case_word(word, cword, flags);
15697         p = cword;
15698     }
15699     else
15700     {
15701         p = word;
15702         if ((dumpflags & DUMPFLAG_KEEPCASE)
15703                 && ((captype(word, NULL) & WF_KEEPCAP) == 0
15704                                                  || (flags & WF_FIXCAP) != 0))
15705             keepcap = TRUE;
15706     }
15707     tw = p;
15708
15709     if (pat == NULL)
15710     {
15711         /* Add flags and regions after a slash. */
15712         if ((flags & (WF_BANNED | WF_RARE | WF_REGION)) || keepcap)
15713         {
15714             STRCPY(badword, p);
15715             STRCAT(badword, "/");
15716             if (keepcap)
15717                 STRCAT(badword, "=");
15718             if (flags & WF_BANNED)
15719                 STRCAT(badword, "!");
15720             else if (flags & WF_RARE)
15721                 STRCAT(badword, "?");
15722             if (flags & WF_REGION)
15723                 for (i = 0; i < 7; ++i)
15724                     if (flags & (0x10000 << i))
15725                         sprintf((char *)badword + STRLEN(badword), "%d", i + 1);
15726             p = badword;
15727         }
15728
15729         if (dumpflags & DUMPFLAG_COUNT)
15730         {
15731             hashitem_T  *hi;
15732
15733             /* Include the word count for ":spelldump!". */
15734             hi = hash_find(&slang->sl_wordcount, tw);
15735             if (!HASHITEM_EMPTY(hi))
15736             {
15737                 vim_snprintf((char *)IObuff, IOSIZE, "%s\t%d",
15738                                                      tw, HI2WC(hi)->wc_count);
15739                 p = IObuff;
15740             }
15741         }
15742
15743         ml_append(lnum, p, (colnr_T)0, FALSE);
15744     }
15745     else if (((dumpflags & DUMPFLAG_ICASE)
15746                     ? MB_STRNICMP(p, pat, STRLEN(pat)) == 0
15747                     : STRNCMP(p, pat, STRLEN(pat)) == 0)
15748                 && ins_compl_add_infercase(p, (int)STRLEN(p),
15749                                           p_ic, NULL, *dir, 0) == OK)
15750         /* if dir was BACKWARD then honor it just once */
15751         *dir = FORWARD;
15752 }
15753
15754 /*
15755  * For ":spelldump": Find matching prefixes for "word".  Prepend each to
15756  * "word" and append a line to the buffer.
15757  * When "lnum" is zero add insert mode completion.
15758  * Return the updated line number.
15759  */
15760     static linenr_T
15761 dump_prefixes(slang, word, pat, dir, dumpflags, flags, startlnum)
15762     slang_T     *slang;
15763     char_u      *word;      /* case-folded word */
15764     char_u      *pat;
15765     int         *dir;
15766     int         dumpflags;
15767     int         flags;      /* flags with prefix ID */
15768     linenr_T    startlnum;
15769 {
15770     idx_T       arridx[MAXWLEN];
15771     int         curi[MAXWLEN];
15772     char_u      prefix[MAXWLEN];
15773     char_u      word_up[MAXWLEN];
15774     int         has_word_up = FALSE;
15775     int         c;
15776     char_u      *byts;
15777     idx_T       *idxs;
15778     linenr_T    lnum = startlnum;
15779     int         depth;
15780     int         n;
15781     int         len;
15782     int         i;
15783
15784     /* If the word starts with a lower-case letter make the word with an
15785      * upper-case letter in word_up[]. */
15786     c = PTR2CHAR(word);
15787     if (SPELL_TOUPPER(c) != c)
15788     {
15789         onecap_copy(word, word_up, TRUE);
15790         has_word_up = TRUE;
15791     }
15792
15793     byts = slang->sl_pbyts;
15794     idxs = slang->sl_pidxs;
15795     if (byts != NULL)           /* array not is empty */
15796     {
15797         /*
15798          * Loop over all prefixes, building them byte-by-byte in prefix[].
15799          * When at the end of a prefix check that it supports "flags".
15800          */
15801         depth = 0;
15802         arridx[0] = 0;
15803         curi[0] = 1;
15804         while (depth >= 0 && !got_int)
15805         {
15806             n = arridx[depth];
15807             len = byts[n];
15808             if (curi[depth] > len)
15809             {
15810                 /* Done all bytes at this node, go up one level. */
15811                 --depth;
15812                 line_breakcheck();
15813             }
15814             else
15815             {
15816                 /* Do one more byte at this node. */
15817                 n += curi[depth];
15818                 ++curi[depth];
15819                 c = byts[n];
15820                 if (c == 0)
15821                 {
15822                     /* End of prefix, find out how many IDs there are. */
15823                     for (i = 1; i < len; ++i)
15824                         if (byts[n + i] != 0)
15825                             break;
15826                     curi[depth] += i - 1;
15827
15828                     c = valid_word_prefix(i, n, flags, word, slang, FALSE);
15829                     if (c != 0)
15830                     {
15831                         vim_strncpy(prefix + depth, word, MAXWLEN - depth - 1);
15832                         dump_word(slang, prefix, pat, dir, dumpflags,
15833                                 (c & WF_RAREPFX) ? (flags | WF_RARE)
15834                                                                : flags, lnum);
15835                         if (lnum != 0)
15836                             ++lnum;
15837                     }
15838
15839                     /* Check for prefix that matches the word when the
15840                      * first letter is upper-case, but only if the prefix has
15841                      * a condition. */
15842                     if (has_word_up)
15843                     {
15844                         c = valid_word_prefix(i, n, flags, word_up, slang,
15845                                                                         TRUE);
15846                         if (c != 0)
15847                         {
15848                             vim_strncpy(prefix + depth, word_up,
15849                                                          MAXWLEN - depth - 1);
15850                             dump_word(slang, prefix, pat, dir, dumpflags,
15851                                     (c & WF_RAREPFX) ? (flags | WF_RARE)
15852                                                                : flags, lnum);
15853                             if (lnum != 0)
15854                                 ++lnum;
15855                         }
15856                     }
15857                 }
15858                 else
15859                 {
15860                     /* Normal char, go one level deeper. */
15861                     prefix[depth++] = c;
15862                     arridx[depth] = idxs[n];
15863                     curi[depth] = 1;
15864                 }
15865             }
15866         }
15867     }
15868
15869     return lnum;
15870 }
15871
15872 /*
15873  * Move "p" to the end of word "start".
15874  * Uses the spell-checking word characters.
15875  */
15876     char_u *
15877 spell_to_word_end(start, buf)
15878     char_u  *start;
15879     buf_T   *buf;
15880 {
15881     char_u  *p = start;
15882
15883     while (*p != NUL && spell_iswordp(p, buf))
15884         mb_ptr_adv(p);
15885     return p;
15886 }
15887
15888 #if defined(FEAT_INS_EXPAND) || defined(PROTO)
15889 /*
15890  * For Insert mode completion CTRL-X s:
15891  * Find start of the word in front of column "startcol".
15892  * We don't check if it is badly spelled, with completion we can only change
15893  * the word in front of the cursor.
15894  * Returns the column number of the word.
15895  */
15896     int
15897 spell_word_start(startcol)
15898     int         startcol;
15899 {
15900     char_u      *line;
15901     char_u      *p;
15902     int         col = 0;
15903
15904     if (no_spell_checking(curwin))
15905         return startcol;
15906
15907     /* Find a word character before "startcol". */
15908     line = ml_get_curline();
15909     for (p = line + startcol; p > line; )
15910     {
15911         mb_ptr_back(line, p);
15912         if (spell_iswordp_nmw(p))
15913             break;
15914     }
15915
15916     /* Go back to start of the word. */
15917     while (p > line)
15918     {
15919         col = (int)(p - line);
15920         mb_ptr_back(line, p);
15921         if (!spell_iswordp(p, curbuf))
15922             break;
15923         col = 0;
15924     }
15925
15926     return col;
15927 }
15928
15929 /*
15930  * Need to check for 'spellcapcheck' now, the word is removed before
15931  * expand_spelling() is called.  Therefore the ugly global variable.
15932  */
15933 static int spell_expand_need_cap;
15934
15935     void
15936 spell_expand_check_cap(col)
15937     colnr_T col;
15938 {
15939     spell_expand_need_cap = check_need_cap(curwin->w_cursor.lnum, col);
15940 }
15941
15942 /*
15943  * Get list of spelling suggestions.
15944  * Used for Insert mode completion CTRL-X ?.
15945  * Returns the number of matches.  The matches are in "matchp[]", array of
15946  * allocated strings.
15947  */
15948 /*ARGSUSED*/
15949     int
15950 expand_spelling(lnum, col, pat, matchp)
15951     linenr_T    lnum;
15952     int         col;
15953     char_u      *pat;
15954     char_u      ***matchp;
15955 {
15956     garray_T    ga;
15957
15958     spell_suggest_list(&ga, pat, 100, spell_expand_need_cap, TRUE);
15959     *matchp = ga.ga_data;
15960     return ga.ga_len;
15961 }
15962 #endif
15963
15964 #endif  /* FEAT_SPELL */